mice/0000755000176200001440000000000013624017522011171 5ustar liggesusersmice/NAMESPACE0000644000176200001440000001207213623760442012417 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(anova,mira) S3method(bwplot,mads) S3method(bwplot,mids) S3method(cc,data.frame) S3method(cc,default) S3method(cc,matrix) S3method(cc,mids) S3method(cci,default) S3method(cci,mids) S3method(complete,mids) S3method(densityplot,mids) S3method(ic,data.frame) S3method(ic,default) S3method(ic,matrix) S3method(ic,mids) S3method(ici,default) S3method(ici,mids) S3method(plot,mids) S3method(print,mads) S3method(print,mice.anova) S3method(print,mice.anova.summary) S3method(print,mids) S3method(print,mipo) S3method(print,mipo.summary) S3method(print,mira) S3method(stripplot,mids) S3method(summary,mads) S3method(summary,mice.anova) S3method(summary,mids) S3method(summary,mipo) S3method(summary,mira) S3method(with,mids) S3method(xyplot,mads) S3method(xyplot,mids) export(.norm.draw) export(.pmm.match) export(D1) export(D2) export(D3) export(ampute) export(ampute.continuous) export(ampute.default.freq) export(ampute.default.odds) export(ampute.default.patterns) export(ampute.default.type) export(ampute.default.weights) export(ampute.discrete) export(ampute.mcar) export(appendbreak) export(as.mids) export(as.mira) export(as.mitml.result) export(bwplot) export(cbind) export(cc) export(cci) export(complete) export(construct.blocks) export(densityplot) export(estimice) export(extractBS) export(fico) export(fix.coef) export(flux) export(fluxplot) export(getfit) export(getqbar) export(glm.mids) export(ibind) export(ic) export(ici) export(is.mads) export(is.mids) export(is.mipo) export(is.mira) export(is.mitml.result) export(lm.mids) export(make.blocks) export(make.blots) export(make.formulas) export(make.method) export(make.post) export(make.predictorMatrix) export(make.visitSequence) export(make.where) export(md.pairs) export(md.pattern) export(mdc) export(mice) export(mice.impute.2l.bin) export(mice.impute.2l.lmer) export(mice.impute.2l.norm) export(mice.impute.2l.pan) export(mice.impute.2lonly.mean) export(mice.impute.2lonly.norm) export(mice.impute.2lonly.pmm) export(mice.impute.cart) export(mice.impute.jomoImpute) export(mice.impute.lda) export(mice.impute.logreg) export(mice.impute.logreg.boot) export(mice.impute.mean) export(mice.impute.midastouch) export(mice.impute.mnar.logreg) export(mice.impute.mnar.norm) export(mice.impute.norm) export(mice.impute.norm.boot) export(mice.impute.norm.nob) export(mice.impute.norm.predict) export(mice.impute.panImpute) export(mice.impute.passive) export(mice.impute.pmm) export(mice.impute.polr) export(mice.impute.polyreg) export(mice.impute.quadratic) export(mice.impute.rf) export(mice.impute.ri) export(mice.impute.sample) export(mice.mids) export(mice.theme) export(mids2mplus) export(mids2spss) export(mipo) export(name.blocks) export(name.formulas) export(ncc) export(nelsonaalen) export(nic) export(nimp) export(norm.draw) export(parlmice) export(pool) export(pool.compare) export(pool.r.squared) export(pool.scalar) export(quickpred) export(rbind) export(squeeze) export(stripplot) export(supports.transparent) export(version) export(xyplot) exportClasses(mads) exportClasses(mira) import(methods) importFrom(broom,glance) importFrom(broom,tidy) importFrom(dplyr,"%>%") importFrom(dplyr,.data) importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) importFrom(dplyr,group_by) importFrom(dplyr,lead) importFrom(dplyr,mutate) importFrom(dplyr,n) importFrom(dplyr,pull) importFrom(dplyr,select) importFrom(dplyr,summarize) importFrom(dplyr,syms) importFrom(graphics,abline) importFrom(graphics,axis) importFrom(graphics,box) importFrom(graphics,par) importFrom(graphics,plot) importFrom(graphics,plot.new) importFrom(graphics,plot.window) importFrom(graphics,points) importFrom(graphics,rect) importFrom(graphics,text) importFrom(lattice,bwplot) importFrom(lattice,densityplot) importFrom(lattice,stripplot) importFrom(lattice,xyplot) importFrom(stats,C) importFrom(stats,aggregate) importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,coef) importFrom(stats,complete.cases) importFrom(stats,confint) importFrom(stats,contr.treatment) importFrom(stats,cor) importFrom(stats,df.residual) importFrom(stats,fitted) importFrom(stats,formula) importFrom(stats,gaussian) importFrom(stats,getCall) importFrom(stats,glm) importFrom(stats,is.empty.model) importFrom(stats,lm) importFrom(stats,lm.fit) importFrom(stats,median) importFrom(stats,model.frame) importFrom(stats,model.matrix) importFrom(stats,na.exclude) importFrom(stats,na.omit) importFrom(stats,na.pass) importFrom(stats,pf) importFrom(stats,predict) importFrom(stats,pt) importFrom(stats,qt) importFrom(stats,quantile) importFrom(stats,rbinom) importFrom(stats,rchisq) importFrom(stats,reformulate) importFrom(stats,rgamma) importFrom(stats,rnorm) importFrom(stats,runif) importFrom(stats,summary.glm) importFrom(stats,terms) importFrom(stats,update) importFrom(stats,var) importFrom(stats,vcov) importFrom(tidyr,complete) importFrom(utils,flush.console) importFrom(utils,head) importFrom(utils,install.packages) importFrom(utils,methods) importFrom(utils,packageDescription) importFrom(utils,packageVersion) importFrom(utils,tail) importFrom(utils,write.table) useDynLib(mice) mice/data/0000755000176200001440000000000013620753353012107 5ustar liggesusersmice/data/pattern3.rda0000644000176200001440000000027413416657163014347 0ustar liggesusers]; 0KҊ YD?]Z'\?\.wtH:D$I AR! $AE؛ocmV"\B)RA0 jrI]anY+[vk}Q@aC>8}sRߌ5wq9Z[mice/data/toenail.rda0000755000176200001440000001252511672771330014242 0ustar liggesusersBZh91AY&SY?Nc$@ R@B 0hhh hhUIOT@~ cꪪꪧꑠd4i  M2d4hhhF4шL&FCL0 C#CFFED4mLSMOb(QOTy'&FɧG1 <zOIGhzGz5&O)ɩz6MN@~ 0I$$$da-,>HOnٍ"6rׅs9f "[l@d -%,5;4SacO@1zl`ƛqFLm&)xg7kN>$::bGCI$4zk3q8XeN܂C;ϕS9g1pi2c6Ml \i.b䤺Imθ3$Sft&/٦钳)$M5f]]MM~Dˮ}fq@?ñ|.ӫo q|1) ")Dew$ H &@ nHH#P*g! $*HH79J2 "lD$$C 6 H H 6% ")! Ho/N׶6kZִnh;:_Ʒ+ ٍ'p>ӝOd^2YnNjϺK;7^r /Z={4N{U#X~r9^Bh/!y '_>rOB=`^6z%cuk~}(垙}8>}J'_X_'/_I3֯_z}xw+ /GBY{ KW٧/Cm}=a}?-}`'WK1{KA:r}xOv}Z#z~___N~~e~W_ yCW/_!$`eK/~珌~~%/_U}d'OO|E@Q#H ᯆK_]~jU^R<5|#k/+')|Q'),%6r K>Z@/:(y+_^JW?9|W_||/_2߯_|׿^{/ I;U7E·QN"H#"(7uŀz(w CD#<P>doDQ8ϩynfŠb@8&bq /q=G?I t FD,-ي}[t!~RYȖBBB0$"tJVapTo@`DJ0A#PUR OvkZִ1s qs93Fs9b39@ 9rg9s8g9s#9s1s =~@mg9s 1Ю9Fz4b*1U-[Dm$nݻv v۷o%KDm%Kj1UEֵkZb*1T%Kh-[DmݻZ֮/7EUF"UQUTb*1UEֵkZb*-[Dm%Kh]kZֵ "UQUTb*1UEUF"ukZ֮opm%Kh-[DjUQֵkW`EUF"UDm%Kh8.۷nݻ/7EUF"UQUTb*1UEֵkZ"*fffffffwNkZU:N|I'pʪ2 j*ڪpʪ2 j*ڪpʪ7Zֵ] 11wwwwvU[UUUVUmUUeU[UUnUVU[UUUVUmUUeU[UUnUVU[BlwwwwuU]2 j*ڪpʪ2 j*ڪpʪ2j7cF뻻 ٙ*ڪpʪ2 j*ڪpʪ2 j*ڪpj 뻻 ٙUUUVUmUUeU[UUnUVU[UUUVUmUUeU[UUnUVU[UUUV<2U.#N9|L À G UmUUeU[UUnUVU[UUUVUmUUeU[UUnUVU[UUUVUmUULֵkW`{э뻻{333333322 j*ڪpʪ2 j*ڪpʪ2XZֵk` @6hlF#` 1mшm6Flb6` m6hn8X"Þx蔌#ab$ X$$ J,d,)ieH[@)d Z[BZe-Vd$B’@1lŖVږ${1 HXBR|31Kqf[d $ޏg>)a"bdDsf`! f!K $ \gRaddLX78\{gM$L8I& e&hgLrQ03 !!#Idd߸6b˽.I=.ٍ8 f<..g䩮B1:李nٷdٍaf)\˶$%$iV!2 -ud)bX\[d8HɋtŬιS`l˝v3u۳ag5u\mcٝÇijwͦ +ճtlPf16I$&!:M&Y4:\`)ɉ qԚoX!iss7qfmq vɿl2Ca$7ud!f`p] mMdtۃg΄[fI]4e lzCMuݤߍ Mdٜjmƶer6ݸcsRɧky}7 f'͛5ۧB1{N5`aS+**&82`D@8i8Q'i,C];Lv}:PO#3WG)Ȥ`@y ЬQ֬ V/p5z`bŋ \,XX{/h WEg͜}9Js-8ڨ\,1jAчyF,pgtkxnȹXj*9\r}Eѝo?Uf1!_{y,'q?VEN*Pd!FH@.}Qz]UAz躢(R #"葚XUU=BDh&TM*P@Hh'l"l&Tv: l#t #~a0D&%F cG '$|p;RFW.H7@FGmNFLrA*w1?,{̚`q?Sq21o Np7_ʚy s V'Œ֎6NbqQv|kLbYV qJB{lpJүO^<2.eAE%s(U!曎lmo::{eI2q)ኼUCJ*U"KaLSE.ǎSrCs|b524[RX6)css}ӚwJReo8xB;m~J8ꮓ2A+C4*A\N{zioh6NSdĥX6[G:=\8rjxc^ "w%8,{%-6;RC6+*"Zgi]*-98'isiژ'Gh\A1Gs^YXŒ\Ra3=^F,8D 4yʵ )T/I$qMf(ຟ,yƳ TN#p+x%%`q(MĩxؽՐqEזr"S|zf05b:VMaZ__]5/P3v>wj?jzv>S[cY[8~_SEJ [U7lgU?e+:1x;;ou~[j<_Q~+p*j Sr;l}p-W+?zY!6vY]~.@tS߭}Wokrm~iVv?T^S gϾn^-Ouʾf;w u섽}ֹk-M?>iwa$WF߱>vY<}r Ϯ"?w^]R4 EO>?Qun@%cm}=[5vEAoCrȧzw~wKʎD~Fz<_Ӡa__7}{a^P>k[c? z[WkJ_Y }- ћ67bvQTλێuw,^q?>o[b[bk̥7A'@TѸZVqX7a ydX=}Z|?m-[QZr f::ke)Y5hv}h=ؽ%[ކ:|D㽧'Ͱ'v~NIa x'Aw~m1mac4m 9NQG/q쟏_UB!|?}t+@(,#'͂Q~zUѧ9qۓs]~k0>Յ(U|\2 Y7z2e!Ǒ4q&C4 !7۩ 3ˆ::$c6u8Ǒ ~ຘL8xoD_GI%Xǟ8$%cOɢtԏ^։{X[+ݻו`NӚ1IPjZHTmice/data/walking.rda0000644000176200001440000000471313416657163014245 0ustar liggesusersBZh91AY&SY^:;kH`/@@<@X 8D JI  QOQ@ 40TS)(ީdM2MT{JPM@SJꪞTa0d4AD @ASj4 24ն,V%I%-,ē$2l0-,) V҈+V*In#5VV%2bk,ٖjZYYZn5٣+u1u;FIyq,#ayJ;9$p ɰr<\QI6gi \#^̓&y/[Gx2񉦱.9\0t2^ozr.Z#d3fOA1rndх l|B*[="Ia)Jy]ȫue%}mc lL02**7suV-ZLjT$ն2[&2da6fnl`$[m$lϏqRb)Zٕn_ D1-5`yBq$S3vcR,e%&+ws0"fa,bX0bd[iܔLljwe'Ż> Z!Fo m h"))bV-e))*11LŬжfjIJ1,)c j"Ɍ(L$ōZi S>&LĘPNV+U)QI#왛,F͈bd2RI$ba[-nEs5aH$%STC;8ICðu3,$Kfl b&Y1Qmjx P33M9AC0!e$HoWn *8A;K`&̪ũxE[ "#P qwwc{{ֵkZր~^V34ɞ*G9UC8-b* 5fUĊ PVD PKt[okgu]u%UUUUW]u39ͭbުꪩުꪩު*88c8Vs6cִ5l<}Źmkc&f@I$32I$I$L̀I"fdI3 $H~X1kZ}ߋs<1bk`Vsͭmw1[yֶ1c<ͬ$H$D $&f@I$32I$I$Ĺ$mk` vyI39kZ]uU$I$MYsUURI${wwww={;{xwwws{ $I'9xI$I9UUkZ:목${wwww={;{xwwws{/|Ǐ`0yʪI$I&s5UUk[뮪I$Ik9s+wwww={;{xwwws{ٕUU$I$s{<8{ު6mmcI$H @$/{zhq{-m@$I$H &g9m@hq1c3mm3X$H @${vc뮸׽q[mmg^$H @$i իiæ_nLRMmk Qf[ WK,I%))bK$RJ$IJTI%IbIK$%$KJXE QF%")XbKQ$0f&&d_KHD&%$CDXd&!Xc1BI!abIdcaJXa883?" aK )^J`;u5^NčAXmJKBQ:T5|AYeCc ,7z='|CQ,9=$cCE(c2AELq4p8>{G(WɹKQbZFJ/hĕ(h?#f7D"\TqSCc5иE9Y"Ĵ̗7==+\΅E#ދ^kPh)!:ţU&0<_~afP) ,aTt]BB{mice/data/leiden85.rda0000644000176200001440000000022713416657163014222 0ustar liggesusersM @D.)406 ~E#٘31'wş93T諸""K1d U1TR}1oy:5=#Oh8OAltQ:Hx4]bt!eÔ([zЩmice/data/nhanes2.rda0000644000176200001440000000070613416657163014145 0ustar liggesusersMN@S@1q! eJ 5cҕ+PJƕ\Wƅ@3B}A$p|V1ƙi s\, J(cvSrCR43}ʻ*/=_z5վ'Ε'熾+_!"MLSš:Wc굿u|!`.<ׂ+"Ȍt70yڟ߀9}p?􇹠 ~g<9H'x7dx\K*i?j̃O$}IX-[e'wR2^}q+Ѿul4M)W,~`ƭmice/data/nhanes.rda0000644000176200001440000000062113416657163014057 0ustar liggesusersN@ǧ-*p0&x "[Z@Tģ'O4"P"d_Oƃ/ <{dvwFY l'4Eۑǂ ]3p6r,T]|6Փ?7._VzvEg͋TT{U3=wݝ&uʿ}*?%zXf":kʫyyfܴ?t^K֜{-G[kh&/ c?J+ߕ=h׵2 ろ8N%a;ڃ`Bș? bw,5WV@e'T jSI0Le&g0UXC` 5kO~"Ȟmice/data/pattern1.rda0000644000176200001440000000027113416657163014342 0ustar liggesusers r0b```b`fdd`b2Y# '(H,)I-3d``A xA@!@,@@H관8XXWI= ' H0XsS  0ጦ(\f $$, r$$y@3Cmice/data/selfreport.rda0000644000176200001440000007561413416657163015006 0ustar liggesusers7zXZi"6!X {N])TW"nRʟ[^ nt̗UدG`ıTvo|ǍGk 'qI[3VZ.3Ɨz#$RG 'gGjR94fPi[НvEjht/syȿq_6_w:7bInՀ?FlZ59Kq3+713bqlkdN!̌u ;sbD"CG!(KN^8|G} u '0:5ycި7孲ʜmYtk8On) DP[LC.ޒ%Ahj ZDNEԟmUΰ?VB8ioQ-ҕ`C1&䞁S䀃/BQ ù1@A{r9%P= $5q9OK;8(m!%=ne>(l!Ǘtewp-_޴wU\ɯlm>,V2%孶7N^ 7~%WV~~DOؐE8_A|D=ٰ{*g0`*9ʩ]=`l @ڏ1na0" ҳ> sI|KYN3O eX=q'U "_|7E hCNŷ`c5 t41!sOo"Qm )]{Ac.k}#fd,5 G>ĊHق1il Lv;/Iѳ2Q|߲oT;83Vd2GLSqҁ%Ӫn*xW@Al s!bSJۚ`pob%?1k&DI|Bf/QѢouAJ~;[H'% /V{>[m>აTئٺbfR?E2Rv,̓OH=#-N_=h3:aeP&\okQE2{ TO 31C4Jex'F>fޒQ8ެf /.$I1mC}JMSY" ׌ N{Gm6DrRVD8}L>aNm|k{KKB>Q˟}v$y8  zَAAB۬6e Y故+U`EFnt?G)r-_vRdl9[CJ4TDn\ uz?G) +u0#L;V[@w:}ޫf 4Ն@Nd\#+㷕J]k}v+woD vNf>gg +DS>)Ar}j=8{"۸YTLҏ$6TBZt#t/Bmt.BDۊǴdFRC}jf1R hqˤݕZUT0[AwVbW$ SРۢHfO?&7ްv;4⵭wwx&Lr8j!yJ C.I76]Gִw5)ps羘iڼR[d3܀!n%AQ<؁OD[,D50G|ev-MYP9 >vu\CGcEO3).Z]v~kF["# H(kbp #؈ R3H0EKC%(ȡRV+gNP ȣՐo R﹘nٺ7:tk# Dҭ70pF>,CMRdDZ$Ɵgb9!6 R<l۰n:|'aj$rQMr*){Rf4@xSO]%S!U}G,co(ٚ\p..<Ҋ긂-[2Q;NXy>[~u)i5pH7Č:+Ruڛ;HPϨ__o崄FH+ 9n_v~p8emExFTGBXϣO<ш5q$+Eqv]Fjι7g9t;Pڼxٶ-%9Sˆ t16˲] jN Cl[?j{w5d+5:R&1cS \GGS(:5RDIh ipu!: 0*Oӹȳx\T` 37NXWviV{us9og\{ 9"G aq 5֔pg6x $ưo5ͬkH3jcRp#\\YxlT&KBXcjt[j`;<]}Wi%bVK %e^ voL;Tu`QNps,:M:Wސ$<&8" ƺnAUuL`k(Q#4ezGyF2"4&T[Cs,ь"zXg/ySw촇ާ󟒇@< 0€^b K^<7V|3RX{{'M:퉴 7ckӫD|ӤQB]]7yo(u%.wM onY e7ts,Rb|^54 '߳TA`bj6PGeTLͽCp k V33іKF'fZ@oWЯޟU^آq͆L| TĎq ޣ[ky7ш'1QjyCh i1Y#兆NW;NfZ*"AMGo\xТ]+3> Z)+*e':/m؍ 7*k=ʅZ+I3[6K%Cz)zhX_L^9S~zARWH1tֈYƭ=D̓A45x[b%SZR s~4^{ DlfY&i ۫|w =;ӀP+?`ng,tӄR.Pc}I.vqà C/6˓E Nz*R*4֠*sk)rN+/3=t4dA qVe9!EϐTMm l=c|jV>Ou:fj*dWTBǮquڕbکCww]X*A{j{Lwt+ R%K>KK1we}!rwƯu6F:^^S1΃g}A ͍P0goRZQ@HdKkD-%(0mf2P.[9(v牶hB_(6m :%SkfMeN|`~lJI<2I/qwLt T?+tҡh+9|w;7tZTZ4L&z*`qp6V3EOU7Rk60ֲW-_ ǭDN^ WbWW.-7%֨șQɭ,ϙm\]Qj 7qָ:Wņ7;S)ut#Ԧ7o$b rP\d`p7Rg,#{}E  R[s .# WȯDƠ{^k k3[te7q|dל`I~0/6婤0Xh/gX?.L2B(*u" 7Q(px&9m~[&#KtS^r%Z-2P_YÜ=_!  '?.BEK3h0puO%˝]Ŷ,ޓ *DCi+W[Ͳ;u@XK٦/bT.UY=,c in|*h`҅GZ)Hbǥu#~<!?nl@N;G'?E|WH.oƼ;rU7g1C2]VL-x2!L0gTZҺH$eߜ$RI5Kt_BST-/++i 6߮C{J6W 2!: \vʏBq;syCFˌ Q'VN9ry ?WκyvB5++[PB# 8zuzA|Z*" !\5С+sF U|A pnᘓ %E<=$?GI.(O5 L3M'X*XϞ҅/ w@}_.2?<+FkwW3[dz7U+Re]4 %0M{Z?7c t1 /Jiv뫒3Whʏ,QA4þ}lEZo][8K V:a7 ʶJkM$h(HbMzs4 LSgDVv fJW`Mi<$v 0XtLcL$zxwm'VXR~H'ZH}٬4fOĤ%5vҞDw;97 v;?,rY'qxSΧRDCX4Y t"p[ THDq`c1z]( G4]? bμ Gk؞l7DV_nSٵNHfǰܨ0)^DSĻ,[O%9%P Ћ5QmL"Kz@b _٣-PR6%M=#IZi|@i thȘt5Q@F!ϊ/pvD3Rze1`Aq mfb2d͵= T7sHb85w83{iSpx5+[]*W}1~gaUGa2ٟy^+VļGNѧZ 9V% 8iq`Lbl1:Nы9X_qr+0k#k1Ϝ' \\0IM:SgW'wMﺰ.f].`AnCۙܢ!x32a8ߨī 8jge nI&ͧ".!UVBVtjͭVaqCU'@pUJ'MmM}WЙ:2/I6"?õ}W;qva'ͮ3/ DǞ4 h>Ew>!* A'O,-Q=<'sZtNEg!T"UOv$or{vk? ~فv*P=TmZ^Ҍ <Wl}jڇӍ0zܙ?\/e(ѹ| 63Bt% _r{N;lE3sr#sdvq2%uG el;@W'fbzIy@tS4)]&iPV/󌐟@(Q}>dnt3c$DZoAԳɔ{ڣ3ƈ6#pɌNߓ&mz:=-ZΚDϒL]f>vc?jWrbZT sd7w&-*9'\Y| |Ј Ç KGHy %Ouص/GyˇO [>k@~_-xڀB@/%ϔSeok%8 ;RE먖L9 %bT3|\QHMnͨ߭$9Mu<6*nѠ!q* LO&-~%̮ 3۰m Ha zV?h +FQ0f$qvac8ƝX-ip'^c_ʡBK]:͞㏻5`YESR)ݿ2[8*vŹKT^t"Bo'F]AJMp+5^U-1'bhOrE%I.@=[λ4-TMRDXM!>BPfb{>4@ ORA!e`$?zW<=V0بL1+B b@fau^c]cmLGVc%N"yoXp)絧!%%Aݚ0e@'KA%掑Rp֗H*$ZyHQH̴nb'&G~ΛFpBWsXg2 *bP<3G[rSH A߆dSHoqd{8F}\^+S넎=O.2"U*WǤÖ*;\?=&Z\x)厯ݢ3I5 l%UUV9}qĖp">*sQoMv x^(GY5BMh|L„ b}!mӄ>QYJέ~?h-EXfNlN)9e*vPF yyHbsZIna&VZI!W!"٩,)|IC .8w8;k.{}_AN! 8wI}smd3y;];-muT;}'KWv7M)mT*+j!9^ruTW)#կ\"=[dfBF!XJ^ F@Xwq7~3s_o~' etɆ$3̮[uNFu7m@=81OfA4C u(i6[ pgK.vO$ƖI.^Kۼd#CbDk% %[*PKsa\S)E&wG DQD(k+wأ KvPp͊ U&'ip:Iۛ&۹Nh>"wkRYϮwi?HV8E%VjIrvL $U ;4%6)A)N5vSFb]{ a rO8GHea7-}ݭtS`E!ANjw;D[lh7984f⟊"n:$P]U:g~ln(۲܀U (놃|n6dQbnu!-Bn=VE~DnV[x52v˶.7aBMZE hOL`krT!`zS{wHd-=H8$X|*XkJB 4`N6!%M jyIjTކ3(Vl$ދoU$6S8ǢrY5>, [1Ym}y4K?i0<ȷh^*@2潵X'`=6x&Qs=lq[XV $]W5CUk؎o/rӧmU+ yyo'k(:vN('dzm]Q*l/B( *1=Cvopž ^P鑹؄'xb9-L{i25vV.6GewlbZ|K@R _' NXxT+"Cy@5rڽ `3%I+Jxi P3]\ܲ>qW,uHeH;ڐ—o\]H.W<{w;\N \$rt4 ѩ\ z dLyMcҘq:bqE!bIj%_d,;cgV}Xo_~ۭr KwϚ# |p^hNXO{A{1iO1i? ߴU[([V̖2tK\V5Z*[3<AȣŰ& 7/D%$e zK7838KoFp6*4d´̊Ut TO$n>?y^ Zjf1ɿcNW2o1bмܹhWhU@:ZCv,Slao#-efov5dӀBZgLL[2ϝ 1@w2AT >nО="A҈ e%'5GSA". ʬulC9ptl@$ =>T$EQo J G\/i-\+Χgvf%VE/z1Ӟ{OQz x.m]òbEң;Trɫe;;bwt-waqY\ulDκN͊\{R?x'ߠ @`)}'%!PiNDiur7fp`1wutHOΪ0'0e)`1!ۆ-HN8a}wWeڛC>&;jyMѹIFGNG6 5@LEQ5in}zu;lҀ\(q-"$Q)%G[zE],ػ ]YBw@#°By%@P9l_B~ԑØ<*nzN.J_>&Ūa NJ\ A$̫-s,;;X!jN*?ZAcNy;Wh<:,\AAժB3Neey_=V[صل2^f 6s <8lIYk*;|uΛ*hwvR*m61=8]u˶CYաڳ +1u^MðY>5N*!>-#2T(XB-~꼮HynVE;kkS~Nӟ#蘑w8* +ԟ>R\?_{"3!`s {Lq G({~`+]vlg΢`ULEwySmO {: Gfawc_#FWFk5&:8DJX^4omS'}9O0 ?: V3{;/۷KAә9tX7~*1.J@|"1 \8axA@zMu@N܆ʉ Dx !8}\fЩ\J>TXܺqhtWTͮ|# K`nn3n?=+!JeK{^H JpRi]lfh%xBQa'DK>OrG'΃O'Jo8UTޏQ*e̚XD">Zd^t}k3&Lvy8wkJTTKj\ Ss}Ƭ+Ϗg*4 ~e7m$0qUr9>Q'mas@Ԭc&1e3 NщoNaK$Nm!:Ua@u* wZc #My!j̗{.[xSp"q D{+]K tby6@Wd=r]CkJ&8i}4_0 @`p: q 2=G fc;ΉyPzhuo^XJكy J%=Eixă 8o 0ӾK\7#UA!e2A+ 3/!K(#ᐥ0 /ܴs`}_|v55o9hA9a]S? i3:zQxz3Ɨ%_/t:6|vt;ٜet:Sܳv&p6Cg۞rf+Qfy^{]/YkJ{3XdW}\V 09U-r&ճFUԵ~-c9-ֱԣdlI#{"D~+WUUG6K+sXvCާZ(` u1 xK+lb9m+N vx8\ WKsSQ녭:o;,. m!*ӡ2~Uw D w?)QZBKfi/ wBAYn %UѶv򳂦tu Zĺ] un=7/‰lͪ'~8^(Ҭlp7o+p^ZVW`#^@ops/L<Fb< LΒL9)eZ3z  <\gTw|' / :!W@&[IxFG^ q(WSfA2=A$̒C}Bd2}u/Q@rZen`ԣI8,2 dWfST(=-p>JC qV!'< at>#DBp}d|< (h~m׺3"=D Ԙ|D!*X=BiIo3eZKJ('~ e.R hu+Pk\mXN4銍v/HoSk/:Mg~v|٬=6Oֳ{r~;D%=E8ӳО7ղkU9G.̣Xp$흢F#|)i0 ,E LjQ{B:k{OIAgG*h qjsqV~jS˅ DSS2ukPܶcz!h-b&${/Ǿk(6苭 m4l{ r-/ߋ|۫SSIԫs}Ԯܵ9+\&u^q;t誖 SeYY 쨎yW aճU}Ik:#l>ÍE5g0Y>w<_K2LZ뉓aQMC#btap@=>DW )0ukg1  x4)Ҫ2 .U d NW6Gq[1 w/V4ZNp>+.εݗvvUWB!kxi s9_&G҈&&4y"]l_m=t}8 zwE{yX,_v״L"G0rrC 5ih)5\ rd ;H ^ In`3g/=>tMYdL"v5Zp)n4IXLԱ15!JhBl(.*.Ao] >i ZRGX:䨂 hM$5p>"2ح^<)MM\DG]vU( /lbm4/,FjJz]JLi7ˑXSG Y~Si tp@:ߟC)a7(tI+7)烙5[W\s-idi>X>'՗պ[0K쇘>BNbUrD*qP8|5ofs-1-#uۦWT]TSΫ&>z&W"8)<줶6t&27*mǒ؋ m'ʡpOm"0 HG:Jajj*/ Sza}'hM=ZbinnbW2_k & WGoqׯ &Bϒj];:US*g,muLMa`jh 5nj1M";`r[.q`0]k_~<8Tggv̮4xDUWe>DoKNwV:SD'}LO5zhO 5]`\о]g5 3\%_݇.ʆVs2/j4,ea~ e$Р_"}4*I%;q{Gy:z|[<,-ϟڃ3U`> ̿DAyk<.t4qXBU5o$G貝w@o*YO2 XMDlRI?~Ja[F%j5'[OM~hpHSԩ_R PEbT# JV7CmIwDaŕwt`ʦڄ-Xgf^2` &`[% C'yY''7=6+[\؛:/m .$ͳj+"!@?Α짹&ReW2>!bkF z~bD8XS, AḎ6 P~8z>cw6${s_+_\ {`p-!0vd^rEVH xlͳEwzu&/r|т,Q{W̉i@IYD}aF|(Iwe,_^eava' W_sXF|wp,P[OI ~X92V'47 jPX"%$-_'=WDQ&{WbS 8L"adǪK[΋w8|#DQ4M.P"Drݝu̳EoR^`3PUׅZ^8y0&٭~`bs:R)>6b\ I h q >pW)>hGZku+-A^)[_ܔ,Υ:EzsM<4KIx?4r ִI\8n0d Im.^PF?5gE'?cXnK+]X?A>x%6a-$GivCOb=w^J(Y'SHU R]s ^XʀPñ#ٟmMa+8n4 jE3+N'l`fK0[%>pw;Sk<^#"i8!( 013֜Kl±G,'. 5-\Wq$F; T³轟qxX{,טH<OѺ¾p0zRYOd' \ 4ˊi2o^%3Vj> NM,6J(\PsRz6ǹ7hĽ#'xA32RƗIUT^cL(TYcfg7|\&vRvE?|Yj#A=hs(5{?[='d=~D;ԏ8 nPUNUMԧ2u!Bs}r O2Ƃ$=inAoAҹO1'B`\̓M`2@IZ7:*͒S25+FBo4}j R\ sѺ%@ Oq^dQ 7a,?wŁ:!3*j ۟lD{ߋk]phJ`cջAI[Nh}*n 0lWSꮴaxi)JOWFYeckPRþ8vOc򛅣inӚ \77uΒh"#[(W[Qw[S\1Ҟc#fDB!+_hő~oND2!`LVù-x9ʣR?D3hJCy8&F#n8@os$>TBK˭@ NL^rruBܔy"-iL*>5h~+/CcPBc38 et).[ P&Rj\>8sO-sRwRAmm>uS(vkltV1m|M`MF[~+,clrG^r%2?^ PGfUYY4xΫ8蹌T♻0[jO9N-3pfפEi*"Χdi|jѱ\V #tw#] 9֦NZ$RihO47i&E &6~u3ڵM 8"g_~' yM5zV2wwW<WCA׺{2_ dq}kqw=U2udorz 3(Lw|7i ֵ$?#;) OjPCjQxtD#F(ULɓ}tm[x̑%,o٭wzg>" ,(@`k^T=uJ l}q_? RV(earM9􁥇Z\=)L^>Tu4+\ Y : 3bywMBtT"e5FY`BOŞ8ڕ1Ҳ,Y9GNT :!{'ߝ~ ÙeSHP,O;q_k%~ui:l8xC:-[|XѦ1K2{`f0yFlw;]A>cfBG,S#22\m REɍHm[8y }qôq𹻠 6y\ }[6֥2jI(=E!SI;đ(]D>fA]77@8?DZyH]qpMY+ mX㔢Y:!\u`q'ko3-cԽE+g"AbF<g-,[mN4&QkT"y#e?9&%TaQ9Ջ9n>VB$#.iB8A k12f;VYӈJlEWOlf܃7p ~mgiYj1AU:w\l,}޷1r-X@1ѸgX ROA}並t0KA:oR>x$SO4όsɊ=ɵѾ[Y3QY \[@/-&Iht Av`[\^5Xږ7ϝ)ǀKtg~l|"HbGE !a]hr).>|O^2$]ϻV(%5%CU늍e!C(bh޴ @d R^~ 귾Cx=LE0RIV( ⋙^1Tjyl!+; j5%=bdQɹj Һy$+GL4{18F}TYK`,iepFjc9.>(͋}glܬWebp6qC}j)Åʱ -_?[(7 ̖ vy'ײ&v;IVn>2 _5GE0=i&~ik~B/ꆡ؀A^Rt5` f'7؅z>ԓ!H3҅SKƔܠFl(~Fc"VkQ.Y:C46 \$zm+ Wi.!CkMxAw4[6f&4l1'-'?rRe(T AڇZ=\JƝO.)\ϯvNP߹4yo6nPLP{Y7$),/K ״!}̢ |3&'YŸ~"TŽA)5e\U'H1IIli_ 6&Z s\ MUw<-'+ ۔~,Q86%9=Ū=9YWir?_W 7UjqH ~YAcrac^jν n}byǒ,ROm⃕5X{!{aY-(AyX,k[4-rTˢjF`Sþx?L?k`.DkO҇nߞ`8 Q*Q(H8zʃYZfqEl%^s@ P[miƥƗOr&0Ꙧ^ҵNB[9p:Wx.JYo hk_iD/vU,ъpx3I San2_] =ܖdEw^L QPJp}=<_%z ]@BbR5n13n.!&a8`Z k8-Mמ.u^nHɨ~١|)$;MyWewf yn:MxMQ.zvweZP ~J2rTR{mmm#}E^)WWp]ևK l(!F "6H/D4vѵ:\ɸ&jɖ^T:œoqK[wo< TC-Rn g[$,|xHmxKtÑ_Gb6 )(Rf"w&TB *u$ƈ})N, 35-ƺ{c8ho6wBY# ;H#wg)3#R!XVpDq YQ )ߪ\(=fhQ6ҝ҅%}PG`ƾym詊9C@~1NŵMC~]# :);(#;! p_m I99a/VP}?2j6vR:}yA E.Gٺ㶳/>M^  ~*ԸfP*Zr/e,WJ5R{ftzVR>}ȢJi.~VZNgx!fRm&ˉ_:aoUHqiA*uM)1QYZ;g)(}mhdߊd:Ց1dtz^hW9]N)[vB aS0:BaΥzKw7>dOr*x1QBC14[zHRDpEcrP Lsyybgtσ[.\Ѥdr-6ouh4*ws TE E'lԫs}EP_訾ܖ9.-*F̎AQN?vmo쇵Е۸QYe}&T Lrd![4 cZwV_t %QmeB!5p@ΫZLSG5V*-c]| oY#j҉@/@Gpx*vRlrtrݸg7<7I$@HyEިbla5( !?hng;3T#uߑك=ƁG*EqD r\6ITkg1awkzw}p,糠SC=H?FI[ֻn]}u,k>Iھc))(Gn@mH`Y@3H`Jjmjg,ϳ Ec,^vޕ4'2~gBFaDŊ X_lC~1lΘ%РU,e%٭*TwLMգ vydLhMWK{ieK!hU `Cnn0mn: [BOe#ph#C{kFaQsgen՗g^f R*`;μm;&A&"cADgq֬oZq!Au,LߕD:&7XgލpHl ⑐F[=;kHMQ&1'X/F" $k01С8dntc/m&tI_?~lmo#U AW8x1VSĸ¹ʭ 9ϨVgwR[mlpZ&h[Kr_[1-̭/kU+QyZs9 }=1Gɫ#l+3iH5ڇU,\8vȸB[2X15B0{07xn]D'zk|Lݭ%(gbs7d"mCHPH R_reL+Yhl*Gܒ{B"~_ /UɎ$b\EAG?V={Ӛ+_+e:ɿ?0LY=w!USK~a|w1YXro9D: +6@"^c0ͽ|d d`gPՃEg7aWxfSt؟Sb~r,rDyoކ[q;8늻S#i,C6 7Y$*>ELh`G|jtK* 䕇1XIPUdCVN>yNl5^|h-q ; ƒP;7<︽mIIj$Mu>4bf5tvAH߳O@؄x|A wvSJhT.6[W8Õgp 4륄!}N֔rc ]Q;Է/;#nsSw ac?pk6@> 3tiSvMc3]2&T#/ʗ3]UŎ* >ZܩK+iEi ,yA Qy4<\ob mB4 `ԑ]U \T A) rZmX\@Rp^$G#+^&aU*:fE=䣤 c svV 8s}XZVpoW= V4 #i{y])gĞ}CMXJ53za yM숓Yl2tJ`%r[4D{Oe 3&ˬTǩiE >"4r<-h꟪4RiGuwޚHײ2V|ZJԥm$|>cW y:~ـ-8[J]{FUC $ͻ`;MG+#mAڽaIpCMW;>BXu }^RgvqٌqcQ(5|Vukwq$joH@ ?1 q65Pq-TV' =;mOAy)wܜ8^_]vW{@ˏwU ܙ=iH="&&qn "'C14)20^^ W/ b61uە٭CbVًژbb?W_Bl3H$A /1TR$}ˇt\w#g]ɦb]X%Wdy˞?|EZAu?p z6:]d3m&jmYy${cn8<椦?$Fm].?h+R}pQQ@SrgYog)j~›00A'~Mi)og4iTp%k0d1' }~|Kbg]S?!M.lmŔxx)^'< [Ye.&$P,`n*No*gL 3Ql!> qT]P'GUhR3@|=LftAuRWYK&Աr7}t#ꂈ `a,`Rj)ql13E+p3Yl+HU  -Cu#s~!8@B҈\uY}J #,T][ngwt .<}zf.̹>CVT4? (];Bj=oaV! nT8I* pՉHD0i~LY]VPO|$TORIkHinHs !0zI~`[2S~p!sA|>ܗf8֭hS8l`{DOngRQdgK)l9o5;@@˂1+3?ctoOKc Έw!>3}DTyq8oX6Hг쓚ш4F~%`1Hi;G^vc )zm*j {U׍8l}礩/oaJB1"6?U9Be/ R1!5CG lumG|}б{O=q52D=[Z%@05;1$П*z7еG/{uD!c`P mIPԝ hP .ncbN@V7W -)1nS5QUFsl1ah9eO iRB@6Dt3&Miő˼fMEQ$\)(R".2Tmjffv%]<τ-3(V<ŧ,3{B6@zzn~H<'JR8FB{bg$i3k,7 #\ҦSWMA#U˕0ᝨ:tNCs 3[ab"]S:&aYpP '22 zq+SX/}5й?KcHhgonA(ZGt}mL (Nj'[lZWW3+(1k$^! 0΄MWpPH5I.c g6eDǂOgW_$I٭ءA% s`{wNZ?/\t*-oԧ% T(=-ocEa6A]#+.#Qdq ig `p&`%OMi9Bn%<_g"k/jawq\)! N vggSÏNJ6<>!lԪ9C+?Kܰ5mi1.4OHDWWl+^?ϲ3 #ශ_/sU gXbWRq1GxZ )`%f 9^VZ+3\eu'l^ KVQ=(,`R75o|BC'M.XQ%Džamj:M|;J淰 y\&= DUruw]i3G6Z .,4wͅ *JWzqILZB$v-FR861U%]"V jAZF{Ax錦fW[5H *?uGЪX0V|ES,{ն!} F>0 YZmice/data/pops.pred.rda0000644000176200001440000000166713416657163014530 0ustar liggesusersBZh91AY&SYX tL  PP|pѓFhdd02dh2CFM4ii@xhTئQzM @4@ɧCAM U"hL"fz C h42xE^8D`"j(ThX(f"@a"򩹘 uyF]>>Y=)Q5€RH\?3𨷥3zk}XUzϡsq0IQdɝbm#;Irִ(I+F+z#3448M&vf֗RY40@=H%C D^|(~0͝RߢNwCQkmfp*`{µRTӢ8Gl*Qbr+\6lo=s@pWR'-Ye&Yddd+x \rGnT݀rGN@= G N K nίR.C Phb$7MM ?i; 5BoH-Ŋ¼vWT휓 E9  bq \AEy㍗:NN筘?ZIGR8̃|a;}Th!ìaVHZdTy$ѯڗ5BKݕkʛ[-hQ7[sk;RMmjLDMEk S\q ;'p4e6.qDm.II%myjY2kێT86ۛ˒0k"C'HdʼnMedf=r,q4P#z*xW/L7l0ME9IIt ŋۚ$cI,k"Ǎ'Ғ/4 @?w$S ̮=mice/data/fdgs.rda0000644000176200001440000032010013416657163013523 0ustar liggesusers7zXZi"6!X])TW"nRʟ[^ nʣ7]ɷqh⎠lp,;z{7kFh=7UlcB}p++jMx<'x< fcwQ&SUh-^5&ڛG ! OeoUr{+8*y5 gY鍩eiFԚ~22 +84`,*VwVCA >6(cͽx{;9NPf0C\:mMYC>6-갮%-4ϒPolxA&.Vroi:Ĕ^3MxoV2EKbF,<ֈ`՜562]/pYIq1O&Jgtz2q޷LPj왮'?Uh*o cnd|5uRG3Kq^)cҴ}XǶT&D_~!ѩتi&ja!} a K8j_7RXB#/ݜ}6PLyYx((2~r]ص9ո{m>9pF~,8߻/{4gXM7 ]{-}U,$q_?.՗Ĺ־y3qw/S"`x7{#j?r\Jl5o0@-h>#z$q%Ơa~} S 0Tµ`B9aD$ԝ+Ơہq7N2R]Pwie)6n~|eO`OmvyN<u6 w;΍; uOV鰨#w;&<$1RFiےL(e_v,H**^bË R7d+f..N[vWؑ!lҵ)Bw4h>+-i'Gi:lM`;YN8ͣ N"?I_L񰴫{w~+ ݥJMxڷfHk$u[p kX:(\Mқ?<~G"onuhݜvv!`ڐ}PRiR'z 7rNDGYn$ Q"JTP3N49G"Sc]9k0KBd}g<5-1`w1pSMe=K|6ٯY#4oXP/@NB/yfcc}73s_hEsL޽ pD2Ϝ>Z/'ڨ%窒D@*ɨ4WMl3ȥ3)N0N1% 9yo enjcMn=zTgܾgBE/kE"p.{#v"2ADe^~'V˗;rLk>/7Y!ͯO]'$Eqưxfe]YEQyȵ@7nd<ʙY)u L&b#ĭזtzj!JzvZhA!sUpN`"nf|e/x[j[5A;e!qU^e 頳aÁ:$({Qф peIţD*1.B_pnEqK8k);8TgRKF'ZHCI>F 1uV6rDw ]#D傔u՘,+&𞧖|RЩCH;!5й$5.y- Fj )RNsQ֓,ЦO]*4*= &oGASE\B' 6 gڵw7ha9])˴='.Mw&&kdERq1 hC{V/QۚNiv:|7[7ovUkIM>@pYVcn @Z?=qxg("LaZE ; ;"S32a!*tE/ўgJ/Bt%3ڽEK0I.ZeA1+L@lpj}=J旁`qSz@L4ެ΍pBa{>K# k¿a()|̎~J=ґ)b018FcQqd+50.t$8JaֱU zgp$e}Q'DЋuicе0^ F=RЯ3KTqtM7꜕g7v*8x.eg9v^mro຿.Cwʭ6THBT筠H &d׈Y'"| hS IOǤqlQVYE]EK!A4ѾdI!(Ͽ.cg]mZI M)I? 62:3>Jy٘߰옊kp:lJlpm!ہ1ҵնW{P uHpBı]m=8&QdE2G]ȴ^6މlSu9ӵ"-#Gno75p&|lPa}X<00&-;_z`L犩T,'4H}T/k6i"UP).ϩsAX Xvgj#l4o+҄HR=/ig z$XT4ܬ,Lँ&GS~]'C_1L]m7/& >[{*HM`/o vlcBZusTq'PDqoz; D԰I jh D1* jln\VaXY9 0B^nRNÛ[]ܩN(?i?Bm6r; ߏ5C@RwP[:/_Kh_-9`m~FTmoʊcVǞaѽJjR̅ >Vlyb `\^j8Tiq{ =21ix.SϺD&z9hI tŘ0_aE)JƜ8)q=y\ aD<帵wC'X }| ' yA" t #QW=M.Lme % ʽ}zG/ѽu,~P!qcj.WAzB0t8cʦ!Bv+c@k ֭:j 5]Пej}jǍz K+qv5ػgRiKYH p2]%l$i}RM_}T~ޢ(K˿9YdFW x#}n،K_$/{ 7?8tS\SZ B,oi[[v iI\77SnPu'bQ"eBY]a|7>̆Ѐ O(ec}>תڌ2{x8]Wr#j(48ڻr u'GKn/CHA;]T^W7׫PDF~ ]^ {EmBimwf&V 0|@x`5a[4Q#2!]bҟxSh\scJv>u,D7ֶṘ0J{2o/~{0[(>VVa+DEFHW1 }>P?+?JӜ%z q g s[SqI ӖY~ v dH%#F<)e6E5b[3d@Sv*g9Y ]M{o_j"giQjYh< ҷ7=%p}_ u&5k&35R]M2t'I:6-:y|!t?˵36Er8ƠgSnx1 Mch`dt pcf.WI%>p&P%L0Pߨ(:| -5 W }5@(eV%O<)3^n/xJjHZ_1bZ/-W fkD9cF"d`7?Ϋha ֣ZL;T75 54A,1`5&!K|?MAۣ8*65ll@#a64! "PxX(So Lz<9;7O NyUǏփƣ^;RD xK\4PoMcBs\tE'0֥\I{>,ڒ5u|'^gG";E:[x5G-X&I-R \<tItm:4Mҭn "rΰea3ʩ{s*_0=c _VeѴO CZX79: l>2 ҭi!M| Ŵr os%69 "S])_!n ~c!8R͊0zG[|Zez^,YeyTԾE\Lv*ִovPܑ5Y<:Y>RO!Ej >,D]s0m2.4t\yyaw, OΊf>WsJq"{pNf}R t bAJ!etBSPВ^aiD<;?&/>EZcz?[HNai' 0>5X9d c^#eײ07Ǵ 5m< LmE!:W{ 4Eov_\E_wz{? B{ɏ\tW®_O)-- -^A%IR o |@Ȋw}Cw|KiXtV@Y ﮅS@zȪ*[Q.y\o) cjcTT|la󾵃[?$*b0iPȀDVb Z? a7.o(ÙYlޔ(L.qv')-T1!H9o<=Chs9ǖ&]ykmv  x#} Y m|C+tdVjsWMKd] NĐG\ݥfggb#ڝ c4)ΓJo?:+7"!_ؙa37~X:k6yv -j<ɬY2*~[Bv{zq<#PSX(Q2 B|` Xv}DOZ>Q]'>Ŀ"φd.aXgP3qKVB]w*%HCM y3EjP:_cq#8nѴE*rƎ˥Pjj GQ@rL0}Ԭ()dSzY#AV̈5 $GS9 GtH63_WdžGr2no]B!ʰ-o{!Z JB)s͓ lmS$S(e0;vXFljPX޸Ua2))?˱1 ސH QA0Z^Z%BGR/Fh8y@Ln>(ӇϋF>ՍpTv&YbɷZ +VfD-:Na4p v} sؘkXf^8f_Ekt-EU-o􅱑g5GQOO*Rixq傷4X(5鱥KݪQZˆ#;Ԣ}J8(ԹrfUx"o[Yew%;U{I\w$UqUvk܀4j-`_赁퐹T@"*ZMcSZPdӉ:ۛ$~tXj Mɴw}Z.ksj W+Ozs$$:xjAgC+'t^Rsc'+S#2qTu㒀|t[ Y} =nrcgA dveP<'ceUHqEi,d먝wR'ҳרT hx+̊ 62+#H4%W0>oX\8vEuIk^57o#EiM(D?rtnaeKvC6,:xF}ri8~Ҝ* jN/\Vvp8 ^DHȵD1npW=ίpl+2܌ {W粏r;6&exg:ĸy˱6xӔꎔ$Zhz,='zT 7Vr▽دڮ9y- y% !챒" `g/3c'jMVp5Cn\Jw(䔙-e-^Lwϩ?!+Ս` ~"DbCPhQ4_Ro=d鴡Lˢ1?1yYsCP+:60~3_`Ŷ(Kvo?}5K2KVn^.dϽKqÚ\#e(w>3$=b~b׉~2yN~/iS#{ mp G|U>Wfd6Xhdb@eΪ_ƠCs.1jhRAjjUU㴴 uTJ$El8 0^U *LnzY9LUT&%k+)Z]LJ#a,W35H'BdyJ<]}RM rE:%O ߈)"Hڪa~L jҙmÎaP&R*cJirgwJRa_ᗅ}i7r@Ҥ@9U{z݂_ڽx nH(6i*r<3UCDq^P XtcWTNҐ1@Mu_#]$Zn6 Yb%^Ϧ,K߳GnqJ7u9qCUc ~0Q\jSJ  O~ᜍy@M_6暪d_]2qk>;8ޙdbb靮0s=& Ҋb9gi=Ʌ_sr@7?ϣ"d"J5Iqz\Ӧͩ8Jv6]^yRe6l}8¸)ңA+7'|ְ!߯&SZka 0*.Tk>}+amG`z'F(>$vߔ^G8T4Ax7'Ýrlݕ:a\Bs f:*X5u1A{21tHc;NtsrRd{|1¯&\Q#lLoI°\h8~ N(R[dyŴjGeHyT!/X![S˒mt2)_ݒ;piA#dJٗ'ٳpi*Aךּĝ?cOS,ByRLh~_tgg(LGҬki:  m &us]̕yN@I?l R~59QDU=K&dP/Hc7_0$~wN@Xyn0Mθ; 2.:>mֱ~O7_x@ (!]]4Dw&6=RC\I;Rah4,EBIoU$z㑠+I.zt|3)ٻJJr9'WnXO٦u=ZB=aK*z[Mt$pkQ~ W !{q.*nn`#&ia6y"9 0pFWE+!c(;=@Ћ'}?Q/' L0 <~Y DTgC)ʿ.pwh%U*!z(/.ίI{U9d{u0E,nNVA߯!siVOBx*Ĥ œݭ}\>C?P:7U@8r)w9s*c lzLh&Q9ݫ~rbyq<ފ _oX)r 1MP"<:$$82BGX1~ˤ>e$?"]-NZ2hǫ4IT -gx T@З7_.3z?Ciekd+$#¾AfX6KVBb(~ȧmWl/F}\󥢠N8⣎,֥TK:~xHdlFŻ:HsSi}G*45|rﴉP$[ĸm^Np`'!օ?Xs>O{:Cx8^L8z_|Ƀ $b={D!`ִS~[0`<`>p!+v^"k*m:۰mU~BG2;6nUfEy ˗jy+;QmiPQ\&zƤbё1Hy健<, a]I'{Pve]R-#jZ0B-YV^YW2#:ٓю3տ@dZJuMƝj60W齁# c!v6VF UU\ Za 0 O: [g7yL,}?ivS0p=@[V]A\g>D$I/??jәr1^ P()Nn.lh^\YKs߽!ѥ"?v҈^ճhRAɅɷСc8>XJHZy 'GR ?o &r n)A96&Պtb,I|@kRD%䤬f?Up8:0t~GlXk9eV>(ur"#߆uJbbmMD5mso[30k/<,M1FP {7z}LR0o2 B-"ǮW% HJhKb WG$#)tާ3 բQHƃe+ӡIulZ)hox.&,Y !@T[$os _:~9ogDprt\je[+߁:ү5HIײQ̚9P0;8@ԤP+Q`q$ݡ$+l3?;ϫՃJUL\ۼMԁCxʵm8f H"7XU؋a˺ugJH㴼E0v_VH "ƨ?PsVɑ:X@"aGEǤ=3O{ a\?UivX%/`ka Dq]p" Eʺ8:DRoz>ƽV'`JDWRCG|H8^8n$纨t-",GE-(^5puA3O5Sh8N<AnF!5_IQ_Ɋ ؗ?>|sbmrWtySW-O#V>w0^e7CjQ&|χ9#gšLsSzn825zO+P4V^<eZ˹YᱴSpf;w1=̜Ȗ/v?[M*:[[?WoGOTZxܖMt-n5iC2X-> cqѻe6 N[?ZI\OdEL:SfkGBBt2b!U̒nZp<r/.ï#_/iU*,9ٱW1JVN0εi[w~Q<$XT#EךXc-)UsP3PuBjTeo$aA\ ^bt!Fs>D1(Ɗe_-@TDWMTEN<.5F.S @M)h,@ )wSMqY0mvѿ8d<@YάpU+xӬEp̲T4xQDPR^oAtw#+R9ɖ Q!HSt&PEq21~FWXjȟnâAp9B:M.E$A%`#G*g?qɗ+4v 6y¾ (JTXW2VM7l<&7Ct5jeA_h/S~DfTE79f?>#gԮ@[?-*S_5 iu,OOvmQ;ՠxv" xFr=q %#PTeo=!Co,=n$B)*ShɝCαDLb ^3-%7ѐ"BO]+w->NBI\Ys"뻟ԥruV o L4$@#VT/Ԗȿ/VMOtaw0/|I٣Ln27.(P- g2k%Q0MrbX ˦*adí,Vҹ'b &=ȼKz',V!:v)hAX1`:ǙSD>8\YfQ^1Ğ0S󼯏IK1`~oYNvΈ~Ƨ": `.sDKmn@8˓)R8 (/ڄ izEz ™~#Ifʘ KvP$ui)ͤRu{Zq*>/a@Q>Ëut ,}jϡF-Fjɵ-m߉!2*]L&i;`U?!R='%pᦼD{HCU9ꑞX ,`O)iW%" 5Cyw Fh8A)<hˆ^2eb|-2c-V8CcbҞ9c(*4lcU86H?lv6)=K'>4(%Z7p&'a2M9qX.xeoPor,E*<>UFvtth ^VϭF'!,HcQ/:lͷF#xz8ŜHE$FʸIw3An(*sș/l~~dO'fA®YN%נUx7k/ Tq,rG…y5R/jPvdSa1so@5m0p4L+@EF;A?0NLJ0O rxw42ncn񃅋[w~`ڥ bfnӭl.]{X0 W+5vk>"hߘ=.|2xxO <}6,ʹ0]C|A^ɫNS`(V8U>F慶OSL}{ %nZoԯ||*6@:Y8xӈX0FB.5 $aOA+#:$ٙN™u+sK?µ]ΓUL։z[Qǝ>fn^ 10S\q(NFd t>D֭_НTǿI?Ɗφ/]OƇR2Hxe䬽W#G}_ 2cؔca'Hlj cQIbѦԂ5:eSxme_["(CMT#3ր4{WK0bE fJ\J\kM wNvmAS17KnZ +ECGWqqz5jNmUF}dnV=Ӵ|Cf~U,xI B=a` O`b/Mv %Dbֵ$G>D0=Dܭm^Bdz:[(ohnk \f#V݈hD~4RO& 2%C=.١wc fa8?9h#5Õ/`dل\yl6$T|@խ U[]8:;,|,bnKCr '0צnG{yݜnIK#"˞ h@\09V!^ץ6Aaa(M{r+ P~Q𖆏Zv!JY9kcSp 莭w&z [gM@%m[)#ǰEB@A{m;],p50YV[Lcw8o__ gQ%( 7,߈R7m" o]; zhFH'Tx~E (|R?^m"t)ә,^I mAE}DÒFSjGVOf^>T<)٪cCzfŽ#jV4݆h`ak_ 5#.@H3)]mPs /-}S *5t'T8֗:VJyܦ~1_Ѓ*v>|vGLū} `DmTM+^ׯfw($.`LGS);UZaRdʹNJl+Hbφ̞deXoN-Xf Nu3{W& qW*`|]8gQd}(/r9cD7ϟ^cep}fF ݜGBj9n܎Fz5_^h }F(q YR/ZR3dt3O"U p,gqu~HbqvLe)4\@^NƟKrLPqac?ԿؾI?e9nsS<ɚ x{Yv'_y#CW@GI\=\a]֘> ּ^NE |3׳R8`&BrHP$ |L *(Kݕ_M< ?]/HUՄY=`H3g{kI*:Cgav߄f¬Hxlz0/ѻPFj \|O<$`Ttc|Xx3ըk+ y0G.C6n[lz6wy&S,5_Y> k!JDR/F|Q?!1O S3k7B/4?}bm|纳Bdnςy* KY:cBIzć@~u$\>bd~P.V37F64YiXHUx_y8x֫Co +I7%K HnDF&t>[vS- dBZ>_5'eUr:51f-$뱜7XcFi() əWwCF Ardhs"3Ѩ̚[i&u˲=wf =D~uҍ%fT~@I[.K-lΟ> 'i?$⥅;gCd~,y`GnS^AfHUsɖka괁eNpRyn8\[HEP4zxL)F!#r90$vRW` ^R #LJr3L~A(HUa>fƑr=7j8, [3#GrqP})ۘg:4{Muy{;0J/PP+J 5Gx9D6^;TIJFA$G.G%} \D$b~MT\ 6(iKm*lzCի.dElƐ*9q}pCaJEcXɂ J$. ҴT:<@P&^whw5.:pOӓwغwK)e?|FG8;[|xW1*c57rYh|cҝx]vUI{E[HnAP3q80Z0Ffq^KhAQ=EcAẼ*(o4d1tL"K#%#,ӈx? W//_6l>-EYQc:Kao GZ[$é lJ qpwãn‰hed>78VIqLebEsm~}ϊ/p+(e\y?gFXqvqg7R**TTw.0]r`OFEOYn2WRLܨ]3? lWjaY|L]選ktet僞` <^BLf&/SXVs׀b=+˛}~l)O< s]zc:m!y. 8M.=0aFxvnm\uSGvk=" "q?^ c%>sWk~H2(eJ0(KCuEFOl=^i{^ |`dY 1u Q@[G2fי,Z?Q?p֝_=_Z+g| 0+Wm4"0Pz/_Tֆc&A1x~ ;Kbf*$VA!7.IeL>{ςͻMq5obD+8 0`^qWw 1,8˶( &xz{ dr9JWf&X@x"V H0aK}X =CD\پү e|̧ /!X+7£U{ BPPYݨyegA ,Z:I8Wy$X"M]0.١8_;::D[|'m8O$?ըTXo^Q13mnd>U0)zC$nnk֣գ&&:*43F`˝hR,Ɛ!{n<B,e=gcB! { = U@'Zⱖ6opJaJAfͤ`<\Puo~6^$-Gqb3OA*PS | zٵ.chj"}3LdM=k>NwqҲ(KKdZułR:>iЅ _мxZR\y2VPW_aʺaw^?7+LM;A^jE榔^GTş3k5/Hqzʏݦi aBx'0d`(oqρ=*N (~\Rp"d0jc׭n%q;)=y?I#ݜְƝ֧|bhM1i)A[R+B+w$ ;6UomG gž!2sw{P~޹0`tUUXGJVdC ?9PCCjV!c$$/.8bMdPQ oʴ(JTPdܑQXEHP*:Y4Ho̶+ ݡ*1$iwZjؿ8(@,9SNxUnЩrom^㡔 'T;PtN~ԋ <#V?:3~Ac)|yoԌׁ@Ɔ&ρN&~A-wLHqɅKm!6m&9SRhEʾWvi=A ,$!ubvs_wC] e"<2_kT('D20,[rP}Q RVjt{38f/:dt'>}< znn,&i)AӽMɈճ'Sk/`1N8bWްc%PO wHtžF;i QY.ZR8F#i&~gB^ͨ^xdt9IJ8GR?- Ҳ>yEʼn Ƴ N*.wҌ *SzF"CKf1Z1eo ÆL*pt!@z|L;JHzÁl@.EFh|{B;Kx8Ge:SnvznB>!nhb|Գ}(V }&]V7tquIE񗺙ec_$os[q & 29.1|k?o 6[zK8kdm'= .u'5;Ku<wk o?+`ܓ. ]2,degsl+^J1=j>i83Qh=ÞE%*-Ux"*oj!`eˬحح;ƚO\[Add;X82蟋gRq#/'+V^| 48Q@؂e#ǗB8zecQя({noMyC>1%{ q%!pE)2oEl,N1}li)*Z(]QҀ~|("p̫0˵1ZXV0=V:#Vyd/O(U z()6 H,#jH~? u$t} RAIL&1DM[k cY+oc10Tzo gAֽjA:+*K%c-Cwbt_@@xÉ/)L.$pZ݃.ԿgK!%ˎ՝C*/>&.k[zچTQp:7;0H+3DSpgUC \ Ĩ7y%6Bde'Yg.$YǞD$u*1M̍S6UbeKk u]Vٜp-H,(&Թi@Q¿b-d ~cJBھg0_O>jƥrcHU|iPcظ[4c]Hrm?$ eW_~o<gc}q yױr 7/%*fe.Pã-Җ9nLNjzAx'qv Ɍ"r~5h{żb< 7Ό` ty{)b !R?BCہL%!9㌳oz%LMΙ1yDn1O|wpc_dh,o%4|ћBuMY" hvrR! khȽFp¼- "<6*(J">~Y{"PWlzi>YV 3}*+{qe[7%}3)2KsGbLJy)k ѷJ 5Vrs/[]i C~X`q`X\=OʪwKؐ|d]g!L\\ʦ *>.#|UlPA68:ңKQ^O @ W;%F G5+,"ZYl!eQM^x^RSӕ3)>13+@EeO jw_ k A'Y.[5J Yw>m8ܙ{^hՍ&s9߶7QɅ@%mhv*q2S*a];8nj*&6v '^(=ER+r+ ,ڑ!>^ DiF[GU@@:"Rctyuq͒5N6Y_1f* y 7`da8 -UUsoYEɠvG؈JO己`]xGbpHΰrE}C#GӔ$HQoFLS=ȭRwO0PG"%7HX9[7z7x62('sugW@7B<{{^>ľNi;'E؇{߀t^Pu8刺RY &Hھ&|5}JGr[@RQ&NAr(>>2_Hi AK E~U\и2|o " tHnib$Pi~rsPqc&ΥA( wR ៽}j%Q>n$ h*^^ƜPM_%OrZ9sQĉd5 '|n0 'dt@ąٹr^cR}dsP[T-x3uvܡ])VFAcV?p_++koD X[|08Rf>Z7#%a^ PPTSnb_f~էɚ^`+~#y*"鮮e$YFF|Vtײ"a,r#Ӣk+tt]՛}<WK͊G(6ha?$M9:WZJSa\o)O@5(^JaGöks7y  lwdD3e0iN*Ḩn3Tל1=0M>5<~-!>IY-iBA6Gh!;~"=.Qb͒I9\Chѕm,&}" ~MN, &H?~=%{ jX`-pR^fC_*XE{xyR1 qrF>N,pntZ@C /Kͦa3keֽ݄<Kݿ\Fq"pKx,YCxjZp{hEյs̲lMx.yh*z:%^6P9Vu#}PSd'+1,M+ u HLC+V}QsD}T2-H6ӶzP[vA`~v$X(%@f=nE>=JwJW)SAuJI 0^[Ni?yBsRteMѼcY'{!]a夤W}t-pҏ?נ4]_Se4طЉ(!XA>ݡTLȅ}z{Xd!pj=hd!e:(-#R4'7 0) c?n~~Ї{[8UM$0zhb]v] OoaYvN`^Y7;C1[H.^YڟfÂ= ʓ CE%]Q6[-CBe6u4DP!xwd׏i_O[L۸FQMŢ k&ڞ 0ҽ; @8vU+Ժ>j-+oWv{osL1>09p0/8HU0iڧDqI?9Z/}Ѯ4&C"Zb߆iثuH39P⤳!E+C _(p^C#tjԡ)o-0G}cvФ+V95!Ti"k~︽# M?e2BqČ* ヂ y+^?c Y/pb`PSy>t> F^[ 4F|ća  HL8[5k1Fca\\GizGP]57HeγM/`/ t!#pN6ȴ0j;DhlFuFQ&QwlcJG8 ?y}QS86%lcß[m8w]`l2YvyM5j}ƿ^(rI4:2lnPR5_Y@) Qyт Zy_snG g$৩cߪ9kme;nyCYgl=`7syye凟9W~AX%nbo!"8]u_ǀT1-2;ݧT`&9#s1^YN񒊒rcDB.J;8evJB2ә5dHօޢy M|گM@<{_p7Sg>ȈH|SSY\I晦|Ԟ10bMC%XW)afg٥d:Jl\j۲&y\EAtNܠ9[|eHFAhp=!҉0 71O`F_&fD7϶&{x_,b$)*1=J[rXTyyPQk1%ƣ$,IXgr_7?eʗk,*NqfnPی)[Ip7EԟgpMQSP IA>ܛ Ub|;8`>t'|ԋSnN 벎J7ډ>O =Z-NlG,9gj/^scۖ \kDF`_@Lȹs*sON\p d? fbe\gQ?~Jzq/:8\.+E˄iېBY wM!Fw V;iլLnj w~%*ݼȨuqU\ãI;!jzoH-j#Xzev/v;{܌W" j+ФJp4 KS2;"T\NV[_vXWֽħ|8ay_ص&Qӕ^)d9svIvP5'@}7cU!'z?CwXޞozŁ6߂e% |tZ[S)PxE/b^%!|,;V-V@W~g*X9cc&v0 J˝_p?N]bn(D"{&ok(4gkj16`=c(]-9lCISa:ALj؋'SEeCko$jӴ1qj7p}kFCt*<,y" A (iQص2󾁭d=10QA]GJiD!\2fUt2},9,r(m~ޚyHt\CCi&b? qQ_蒏 Xaq_vġSX;vYؑ}"2.KG%@(5QvN;A9ij,l"+MsB 8{7*Ls 蚴2"62kW:3ʼnmٿPv3c3>Ys3v&lb+Ewˍg&~r6GcFUh2=kHt6b@ ! }TKRKKV Nb,F9hrCCߏM%8\֠(q2 VP-]Dk%Qc2zek-`}\O;`V2%OG_dRK[%I2߉)/ʦ՝ J 2¥c',ܻVL&Tp:+.Q_[တpD[nsx#%U- b 3TH*0n~ I (~00=ȸr?J=^HA$$C:7aQ?3i8iFC-iB&Nk2ݵID8>)69(+0,Jy>4qGTIeҭam$lNE~BOtTJ'SIB0 +zфV+(V1i+>#(c_*q]o-#Ϲ3BH)v\snSZbKnɶkPq3}vd*I6^{:K$´cN3>fzz<tU7DGUŸbJGX*(D&k;44#s"τ+TOY"tZg# =(~;>mï=pV,ɬ 7M)u2kKB-?0 ڍL(W;,cRNDodhk{Ε!Ђs(OL-)fӜl*OepZ rU~wHW1v=s; ޖy˴ ^{82dDʐx+tljpC?+]LI%K}GkMv1 T":'@s, dgN>#y%5̎sW"M`sv eeGQϹ5 d2,t^*^D/wmlN"4gCE$cPv쁤 }*|!F?샘xgQj$XW>/6D ckG5A+O[tBx*D i +cܙZWAtw45rxn-mn*_PDu᳾NNwb^_kՌ]>p;I/~nd5 [T*nAc chQeAKKdˍTŀl8aI%tslm=\HEINj,3hMc:6I1eN([km,z {Xv]p$1rj#\IyT',fq}'bD :ot$<-ȥOەU߯no`Fр*jEuD=:8_{p"FتSs~>a(tIQ̎bSP.7\9d!ߚƬ~,D׉]`DTz lߒyr5W0`=;Űz4ll_yq@ 4'MUTz,=U&WhQ849L:ybk[R?T?PF:bdYO>"N=Ͼk,B%X>lQRp?ڧ_YSR&=Vdc<,F ǟ)ytJ=WQZzmy2Q3d dfIZ'f+q{;jPg\)1TEQArvTկɣŸ 2:ܫ9AAƹ}PWMoeHÔQmȧV@aTSژ afu=`KPGFyrD/Qtֻ$IW< UoG&rA["]1^gʡ沊Zo.Mn|O*B+r].ҙ^eX\]N 81~xPyS v&6Ux6ELE0/s1Y1K]&Qv.5bu0tF.%civ H e ø#{o4r?V&aJG /RJ1 =d߆vX5 к^NpD0*&FD1wsXWߪ qb .4:BfI_P}Ҏ ^4(@X} U;:%F=hqLׯ.nDܶN|tl\*n"5Y^!q5X}&A,GeMUugXǺ!\io& 5'3/! )vI`ֲ-GLwhݤmyΛB @m3RKVx l3 z{`Z?j~]mD6( ~qj8M_(W2zn?[;ש; iΞ:+Co! TiPh#ʚ,&ӭ{-wY0"CZ Tr0X80"2O7t$R],;7~èPk a!.?$d"e F!]ϮcFDŽ;2aTۨqYlMEi9nܦM3sV|Gi<ތj@dU+{ !z|Vd>HG}w&Xw䣲!;~_UMW mE9Yr2t)Gan6ڮe > p^U)`61$6$mEw ]K2^߰DtsĴ s([]W`Ar"{%0E$&4 pV W9?#?w:YA|(1ʠob^:-TwcKڄϽ#مF?fh tC(9 }lɨw,0h [z3&_f?q8ۡ q8|źII(:F9L͚jL򜀅]HMXmNss=V nR ERߟHe)48}N;S=kmcfMe% B<97iEDVnirg _{V茎,՛Yډw$噾Su TXiM2Ɠ"=VE|?"CwjSJ vtlf ^HYV |M]Uɟ%%)@AobFM5$4iH,Q"9&u*&P\9`L 5a~7q9'gwqWMK/ vB3@@8qq:+.k̓gLw+_<{"/m+OL n-FS}!6_>1VB<y Z/5@|ƌ4"ࡄsiYe^*IlۯV+#uSTqIN`BftAq}^ ;\`=Ƨ l2d;ʼnI#\ldn = ݬ\|V9eߔvU\V̕ $5? Vϥ_Q C&NgϤq[ݲaq9lClZ{h5;Nm>Bc(-f]g2۝ҞHz(fD+N= sTJjmF`򙚑ؕa?d.y0POyQ(H)jJ."!k:vwւҨ+ CT(s1ʵsj `W m<|kb%dй3(:tF @TAdË=_ǰCD Dž zmˁ$lyf#hQj-zY]LצKBF@lbHNn܁lx`9sD'A_P4e%6"-$+B\4oJuD{-&Q*naUM`l|f_Jsʽ8`Q~g_4}f,!c"b*T9/ޗ )85v lH|tjEƁ'=N a9g\Մ J|( }?˺l̨-c|kz~ʎ]+ QI5k^9ҵ/~Jk0dOv b/1iYeThjpj$Sk|*b5K0paR1 6Q*t&*O^!s:"SŒͫFb]mB_$8AuFc|)SWB.JgM An8f=Y6]k)p ۏ+En5o Ykl=!l\$Ld0w@n_Av ۡK0^S7aLY4.qԡM[qBΰBn2]qs;DFvo\ ȏ2ᒎ!NwSGshy ԤHV Ը(3=}# "8j޵?rm [Fu>Vw.'IA\YBna&ՠ8W|1|ģγ$(Q^+?[}})8t}k8*XPd p:.@(YH\zQs;Q༖d=4M6*ȶDtB9Dvb>%b%rWlZ&P:6BwVN`u[u\'\sX:7e\ Q.P<Qbkh˧'CpS9O`_GڑF+˻UH?\!4 K GPl34`Q4n"UqO-Bi7?[Q"b] ZhW~J6OEa"R֙\:]CIL{,!o:Li6LF?)&[rI:o?}R[;GMɝhQeK_*Rقm 㶵\4$eiⴄYo$^_ 'hO JH&JڋߍRg.~LO}npmEy yHהCE==.@Do탱$ؘI{ . 0) sQEhpDJ`tA8)A X=zpIIMfiH*?e{u$3ɓ+Aׄ>?JCFmm=L h8$oG:`SZ^^e+D7)`i?ⶋU}+R&9>IrSՂqM \>Lȴgi8<@rӝC -f= 0UH_:J%Z)n w"djrL{yy}GGbS.7?3OvԚ(;ƴSF*#7CNWT VQLH?FC҅dY}BYeL[Uܡsjx\6tD.yAX)uUkEc~PεI*Uo%Zb,M5HjWc8#HC4Dl8]u^Gm/%S J6{8])܅3z>\s3dXizMsŕXUx A<[ >7YPşPW\N_ @_[38)G 7u^ Ed e@#΍j߅ZA~띡9KsnhhCWu;$nt"6oA(*|OY)r׵Or@Z:WLRvqɛ͍TɲRrA*PhAVr>dl Tq^)D㎵SiYCI`zAV_b6HQnRdNhcliQ6 =Bp.~]mKu(<,Λ_\̆#+e皆Jۓ0Pě8<_7mJe,188<_GN"xT(DrcdԠkVqmCHA8P 36g<\'K+QeغEGg{oĕր:1#~w(جyjOeD~IlJ K/"dl*e7y?h=*$B+ST_OAxl*`$(O" 'ݪp18?36#u1oӗ M͟yًRX3O0L_3~mO$AFWU,2~Cmq. ZAeQO2(9U\= +9 ae`zي %'?cTC CX2r Sϛ]ACl6Lz-sw8Zq `mqYH8j3$~Cgpב:΁pvw,2DT9L P^1[jkck\俓VЍ޵ YvPT`Y jQ#B-qW΂/:6%<6>+q9:ݘ:W})KrA~ē{>iB\LV>ǛmRac.WN{:*3x-W!.'{e?aҊԇ)?TR[]dm$]. -zeYhӞ~. 6XZRdd^yI9 ^tZ б-bU$& } ܎dRDy%j8 vO3ᔹ~^ M޳`"-ݰ{PSP%QYSdNŸ><%\D$lrXRxz CsG1(9v7Hw$ &>kNw`aL[]p MdS->'Wg%?k`lcr)mXSC~ִl> :\q{54%yPvJVz].OZ 6M q ɽ]dyh{=O%G,zO .Ĩ\u|#8IxqZ XR X9lS{ʗkvW4M*f!?VtIv&2w6G-o!/U-6*H?]D) fXsFX гy? VWkhmӶә(kT= 3i0x}`B۔({"t6Fya!>Mi񦪇]邚EjmhYyΎ&J ^ ^^VQm`) uhS1` ՌMg臁b0 ;S1\Z Nj 7kj( []D&c~B/NPbO&:j_#WI ,Y͌u 0T`x\ܫޔcZ_# 3v98MZ0ʲj #8w;˨΍\钂a-9 M {=DTtU%Mp8(8sx5ӝw˜ܾtF$ )V1ZE;ʴlfcIc0-_ǀǫ՚gBv}Os4\"MW ;T*)No= Xj$bm4}ӨPP&zL`ͦI/W(*6_b#\}OYQ/[Z|s2~XʼnJZ)t&@@huQ}cgg QJUaFtv}a A>L!;NW ͎&- Lu؎NȀɇ\^m) I!SWmuZkx7j\7mw;++]T[m놜)ՈSL̟+fsjW/x×4|5𝘬^s w|c /2.VNאTO(@Ӗ/m4|]Rzv p(-.`W.bu9o^i^Am*g˪yJsEіHтZ,EOA$«񟩓ꛬ__aAɦ>!Îe?qxV:mɞ4̮jv=^h,;0=T*meO1.i''+佅@v}ZnpxQ3oWU6xiv(@Gтg1*ö=vv=Znvf#,7 1J_4Ⱂ.  jig]SL+H$ZsKY&mߠ4W ]QIXOb`%Bf?1] #*oRx. ρ _Z-KrF1_@0X>VU4%L/S3] -qNa^= #vEK,iYQv D}m#![}Iʮ˹Š\y AӌdƒكTb PЛnj~!X4F&Wo6=8 rp ,AuE(7eR`Ba^fkzD^Iuͱ.'j!}n;pP#]×K,-OҊ=mý>.6Gy"‹ׇSnU70 M4g/F0}j;R݋RyHDV2הk :q)9hJ|" :8p5wXf V4OJ)6OrF0V }7cS{1$Z^=kPҦ^k޹|Te) CiRzZ^l  5>Gio-$&΃N<ֆXOYHhw5mS.?TSׅ][- 5_U6+;YbYO.; u $s}wrL:x0%N җιA4թWJv,juE&Y>/sZL0J2#Ɨ]Еy@H>xq'Z]6t/ŹޣqB+!!iBF_[1W),HkR /7kAe2PmX_Zr+wraiM}Cl3Cۀ]%MZoLktO*E3zr{ݜ7]2ŻKO=Ø_GiSG-"Fcada;"љ0Em⊗;?7prsF؏+?/XAs /^2+`yH˵ڶ1px\/B@Om4⨖XW0K'vNYR>e.f[`n K{^üjC܂,`6':'Ȩ);eG Uw&PiZt>I˷6wѵ U d?g  $F˃ U.6H̔)::Q|EG,=٪3?P~z*Oim^eeޏVAM_ü&D%߽?h{H?Q,q*չ5@Nvl,ITәÎ|fwX <0eZ>Id$cϣ&\r&A>~_%4KHFDot6;Xmjw`" Owlܖf:A/CRXbGmw)hHqma92ZL >>Bw'vM׉VE,5][MkYbԆG :.v9}mWBFi^yS,#WhA$ [/ҫ࿩`ׇץAG/s@SXL9YE-b ٙYfK:<sz3Q?S3)(kZAa/_Άw) kUA0,rq\Syb Ye;΃D*P`2I=.9E < +VKi<ެ{+9!y wVN|h#vx¾ )/Sبׇh<|)g,z)x9}]tq@/Gr~ `qZX%s zI̔qF} $l0@ԧ/0xqoO 8h1GCqMDګ=$cuSG]uSL)eLC XAQ/oӁ p'Pihk`D޷vb(&\>x\B9wr6Z]OVڬi8QpdG{aڊ.]ynoΆ;c̔V)9P#68D@rx Z~wۊNJ2O~չpǫsNOZkm aAaևeqEu4 &#&7n%\Z.wC Am Y*ᧀ| Umrt7j@/ +*t1 "ffrI؜4+uA>\f>apjcBǴ/i&bDc4R$  ߧpH*.vNN; 6gg[LgN-0r7ԫ:jʉCRÕ6./y3(_!t"$/ f:;L9$N]X0ft|u CCe5ȁSR4h=iq|=Ӧ$+H RUҩ:ra_jށ?rB]29O%bAAXIjM`hD5vH܃J?yr{1d|DtPă$7yWcp5<"/ أ\>!No N`VXwz~ y\%D38ک,Z?;, J.E .`CJ OQߤSryb]3(i.E(_u46:DᜠaUGΕj7ͦ~4if+턨@;@@nJ:/LDg9X)QׅlErP:\Gx=*'>rF0㏟=A^(df1\L&#%# 3=tIR *ø\RL$>#t_.G43_AxTx5!i &{"wvѹݐ]VG o|:u+V(qPKV`J|{_j4NlR,V^xpa2ٞ@j4^3r9t-kCÙL#րknf[j]>S1 "~^ 6)=dTsUoBVt 5~C$$lnϵ 3" :(g-ߗ2iTN aY(#/%bTpghl~߶\#ǐCY+a _ٹž&gKmy~Q[ZܐD]'mEʘ1:hVV2=( ЩQBRi'O~TAGib: zye #R![8?M,&W^ܺ{otv/A ^w6[gOz`'[c?5Iԃ"D+RjEDV;{voD0=XƝqmgOxb]05r2Q*BjD׮1~BWB /Wߒzaw5\;9k']|:k+r/xJ*@qi"4>mu(!(spą*&Ɯ7*NIl8ulǨt)#ۙ}rti8cM63L6 Gҝo<9&NydG00Aa5+ 5 h!DX!Dq:^z)Qi&,Öտ /@_ue;=*hO (Ա&T{av~Ir t<, 7q% jf%́+h-H 5@j[VXۄPbY(%$\ dczi}5:2}g#ut@o:7qj{:Em-l%'yWwt_ x>ֽ/ ~cޑChފKE>OFب8 "5d$r4/u_Cm{S&?`{V\˾~08a.y2콿 \3R. U_S l( ӏZ(c:A9p2ŜgXTd{EPMKL0A8 l֖]ů \ 1C>+UC\HvِuNE~}WPG-ۉ[{ M6ya5j4;jN=Qy1B)XP%IAkg65$Q9"'g /T) oَ۵¾ B;[_ev^„[yO{Ӥ}x/n#v1 >3\;b:A`L=)1EJQLϡot1{\ySrUգ5jeyR[X.!Fey7)_`Iwe둎CB8N[f7E>+Q/>ę@%k`-S}`G5!;R*B9۳ԛ+;07{}b\9Q 9&+gq:cvdlxKBAO8ĴmJM5aUJY^QW>9: ki\'Mݑ_Z"u;-p/ Ρ ?(%?[0_[\.otԪa8Z_P@bd#it>&u"@/S*clj=H9ݬji^rohfg>SHAsqVջ52q̓C[q\f;XrN3\YnqD.W i<àLY΁o7a= Rk?=kҍ*($MY'rp$t&Eq60L x'cA /s! xۛ 1j+%9`,2 UJ,t}%0XÆbXdiw8+\4BKZ#+Aogٷĺu ȇScd 91& |eM/_}4B qcn/9Jb21s 3^ԍLJz*49.hi|IIVnдd3)TcBz :H76>J} b0բ ;gǦ ˍ]{c|d[)ep me4boxD}R%11Dgjqq :?&\B4:5[₀!pV\Af~!f3ugXrO-qP}{|5l ݂oŸX0pZorIənlq~"}qK}мD[~? G ӤY@&lg[&c[K"#Anc!r\';Qi#]#D5eD(C g2֮M7# D QL}# 1;d"(AשbŭxдLz(U@B!oW6)"9HF#-dCÊPX% 3K<$ܣ@MMr#rux6u*?k)hҺA<Ŭ ֈ, fdIob"mc0 %lt|YE#"XŮ(0<@93]Z4?R^N&E-e#Mw^Y0E٥6ț*!Urr3I'y{AtF"i&ueqNs)Ki c>7\B|6-FtMK۱.!I<۞d֑㷑k!E`_ ح- 9p-!~Cǣ6MPNY5wr*:#Q齎V-"4I~t͌*Gl! 8>7%/oTڧ"K*լ#S,\26IՐM|%uR6xb]\c˘#1vDU5zP 0%u EPLïϋƤ1 l!n$C4h$0bL٪%ew$~_Jp؁Dʙ1/{'ar#P7? [~R @Ne$zAՐP\+*Zk({ږYR畼}z6x4WJ`NⱞJ*]ZN$t]ޤ ֱJSQ9kLydgGdxT? IYy-5xqVt?q9([7s1?Փ!lLg$d PUSE%{r5 !?o+_K}Sj} gtf9B$?lŤJ}UЈ»ax]%V/$C o .qG2?!K ؝Qc8iW%MYh/xS*U$@kjYdƲEMw=w0×jPE87<*KP*Ky:@!H]hH1o"PWTW)<'(?SPg-4cz3AwY67cT= PGB;ljGnGoHڟ|>t?䐍Jlj"i5|ԋ 8}!7y/vd-l>:ԑOw:g 5ZoKH.=GvF Be)&Gx` N7J,vX4. [`^L璽prQ!M^~'qQEw™44{)OڶeN+gu嬭?̐-NXN]z"'ƹhUer>TBHeK ~~ޏR#/g\?bxߗla-pk1Dd.WoHޓ#o|oۙ+0óD|pUѶppBC=]/fdLX 90zMjm0Fa]G梲xޑ(v| ~4V_oMʠbV2sVc4Xu>eM7[-Muo|]bX\: -<$P7<Н1I$ ԋR:udNG^کvYq8{sd*ygi.v|{g\(f e*3UY F{昕#8ouwXUYPwKz۠(p8 /]u˵H8 )+ARh -m)9}5d*tl=֝gk>y\dE wL qI 2@o=$jL:lZI]fnp%(2Mȁ6/yU a !e9ua˕&^c;33mrswma}?|.|r6K(32B_4$51~azH(lZj-hniG0?('"?ݒwxaYZh#x<16m)DNp"koǣ%g!NP{]{rO?8w"8ur#Q$„j~4m8"0ިr_?+1)p\ 뚩J$sfU,!kﻧ5HB-9`i =Z Jdt6N!0k·4;vB||kY)Adm+R_XV_RPO| wI@?swN!Bb6ӻM~ er%P }TU(wÝoprKZ@&;w9*+rn ^S\E]a>_tc-oj[t2z~uFͶfgQ)g [xk3&Rn9,8yjmnKnTΤT_Vy9qc: = !GzƲ=9%׳r Л@3D2/Nf~KOW*0Ԃ헰Ҭ,`0N9`S.7' haxVTwn k(RcMne3E, =DF6j2pı b#D9pԌK.jY.N܌--S(ډ=]vC,};^ל%VKR8zA)Эg&/8A@.Դ:BF:1,HuI!zH:tȗf*> nŰߚ+݋p"IË\?vc=!)|Y=[?yk}ڃ\z‚O<W!=`0L2^-˫-x qiך4i:VW 5-5E>`yO1X s6Ugn )G"ED'-V%\n*Lz'kG 4 *']F_bvBٸ"]G4s%T'7bc s&%bW uwH֎ńuFkng-- dwbPC{яOxX6Gie\|YB 7Җ^ FyPQ'7lw(Ũ|Z(eTZe݆k^'ܪ[n8-;'rt9HGydAhV$[a7t(e\N,V( 9$Oc,:rܵ׉ œWSݧ {PtFG%硐iQu'ΰ84Yboke őyA{P!35XQʀVՄ9'yt#mx)hG`%'];}lj3$=w:^[$ tyؚpیo?lNA,'xS7;ӻ$gӺUsxӯM; 1=F @cXKc\plko̳/qûB$!h(e3郔;/Il{A$--Ҹuvh'K]_:Vm]D;D{.0]*XpzkG@DR~;M5Qkl}k[5& w+5#@yi-F/G=m|ȻG,X)^AWM \<щmFQYq/:uz)i6,L&qz;v.tK'h(/ 6O +[xùL- >i#cp:e9s2dgEՑa'c/F-#WïLDΞSb­4J5Y(8e[bwe` #HVHJ3w_F"Ɵ2+($w&|w=u3Anc) Ca$C㷭^ Qw4)Q\go]΋=!$Gjzʑ#.haVM"0yBk|xh|oCǀoh=7 ^k˟YŻqfާ̊ )\A5 /Gϻ[y/=ܖYX?[s0Q;j?s|}6 Jڧ }m>5)Wms-]5v?Z7rL{D4ɗ0O-I |xlAoL4Qb;hUpcOaۏ ~ (#d̢Y7*فF y0Bd6K.17QEة}j~焈BwCۢLhm7,޺Z8}kz=^%EI9smM(KqwcآN@'yN@_FakO]fA'V~58E0Di:)j&?h[í%NBxgޫ/¤V'tRdo`_v :V8"6/ܢs~P*-2l]I%!vD~p~Tړ|fHD`qh Pm 6q);vaQiHoN[`n$#[h<=skX{*j P(n&>Ql.&]Hf) 8'X䛿ΟXE:ճ \Wq;G!-I4WP`~hNJwDq. PA BD7 0M&I0]Eq\ xrkJU>Qmw}%8??L Kcv&b!>c;d\'q[: iN_O}cqyPK *!r#'M$T ͐D6X~V3k 3ԍgXkKRW\\?_ 3; F_]A޾ `mz|W b>__T["iWRt>6._g PBjEٙt7nS "~,y=4X9C#rlCtem& NT&\N̛x])E&zsO"E,Jmd/Csj8vM a8kxzucğf(p)74oq=P.بwy)y%?-&C#4ϖʶD4,z(Pi5xn<5`]v<',Oz-eC/QJ?$u Ey v5淘RHW2QkliWUYy~҅~gܚD29Y=v[.;x3 ("xW0c'(DҺz-`W+}3X!A3X5MPw@ZiyS3(ˡ],6v;aBb){rð.2xD^oYS T(uv<VY:IGyNvArsyxNd5D7澧UƧTn1b~\?jx"y+6+<neƠ("we tkq*{ܯya2]M/ry`$z_-­G[J]2CP=B~7[ }{W8|nAZ ưt vjoRnlO2.+\0{4V)s&J \6hkf  K̞ ^oc] ט*+ǜv4yϪ)C;h|[fkLL9CQA wdjNGi;qM4  a4f8jC L &,z(Q̬TDΚAcK&A=ƭ%2R ܪ)eD4@1d4JrkUV#o3pH"2V؈6m-$P7ghW~vk m*\*r3Ibꤽv`ޟ=)Eo|HTFj7W~c]\ էhSFr|jzfxm_ 92N2s'S-tCFr ts.{ٳ.N,+ (B!ci@hfIA3ouύl- }eդu_" Aɪl!Ls0Rr/ȯrik8@kJ|vʚb8WG L.Dc"ݢLkggr NDJ< vﲄηP]l?_ W3P#Lh?u!jWTB}O+ Wx"VcCZ˩81L6k/X(LL!eozYwCHwiG&h2DTÛ\;HfSp>]d"SB܅8%Uߢ? Ԉ[$W[o]nn#Ohޒ]\Q<`TO.#8T{]5@ط͚PAx0tʂ2j<`|-h&]ĥ,.8FN|WŅٹ VD`͊ߔvcty|WoOEK 9BpG)?ʽ*)zҹ*YDfB ={i3\Xl~H 4ҚZp[o1 *~$'iUb* +z=i^ceUqpN2>v{hl-'a+_ҲZU}]J' kudnXJR mWkO̶c0Oe vq-plk?, :FHJKs |<FŒ-z82x Zt<~ct߅s #E'Y"7ϕNlL"e.#˅ mA[P˂YNz:XpY[MhjN($m!X]G!MͰ~Zn輏6N?-Xyޱ*Sz3@8,i\lO 0@Q*P%ȵ\7aͯQPkHU,uq,}6S}hi/:&*s|% %qhuV7oˠK֓%bC}<$/GnfG=M]g/q,Lg6A„f 02;k\IUnhлoocz4ES0][!^`(?6ڊԌΜDlk[c>Ptui:AYF/I0 C uLU/lm%5$$OL}]Rgh]_YZ]6Rk%qծs|o `g=c:2țS@ӎY@3  Gw zB%\2=5;JG}#ܣ ky),ET3s+^2OP}8twk3j\۴:uٮ;YCA yoP J*'8JJY$0.nTڧf8RR15/|Ա.&o!C=;cy>!ZF`]9(,Fl!!#Ote4#Mu ]#AUA̓3@nM&3)ϣI-" k/i3K ;ujuVxkg披:a|Rǥ"+Ff2%Y6.i]S-f얬+*ґQb[eK Pn% vihqb"3* Kvy$h1Ȩ&B+lC^|x&AW||rD;PƩ]}0PGȑ"8/<@U<8}HIt Iٹ51CHD,@\#g3Q&x_ldiMkЫ;+ O(^Қ);WDI U4*bf5k{g xXqlƁ^'8NXe+ǻ~ G@E}S堯D9$O+yrU=( kzc値Vcl26Lyy9EK×㴈ŃiK_^jW"irG(/joBܠߛoSE9OoYv-,׎X$ؗ8sQ)XBHV]7EEj'kN~60“R.vh`U3y4΂$1ǔ|lOh3U; HtOm*y9V\u{ Gdwd;@xOz#OmVs/'ڨy…F! Ev ћo$ev}oN]ʾÌx˂_>cieةa qF下c:z[zCQӥ(x'Y$džB8mwqZ [r "%E㢘Τ--{gNU'cӽ%N^vsTԍSk[u1h1\:> @بg-ogLD2uזJߜ%BR S0Hv r~w6m|ThWbBw=8ÓX!#Ey?Qe?TS1ЄiS_Dm h55*UM^ >F~u*tlt`cY>|zg^YKn,VQiW^,E~=i>@[j쵦 uiysY'OL9A続SpFJ$~@|~aSZ8'ݤ1[VgFl1#YΊ~r- !-[8Hfq03nt~Udz!/Y gˠ צw}N:Y ycuXIF`0k>P iR >N4'@$/8! z t@Hx&.KQ#qE 6} ezyp>l }]6?z<)L{\mB4ha silC[k$-LiQn!SH8ڍI"^@{JCl`UW6zXݜ#wwBQo| var,^vʿoh|d>Asu%_omGX:PêK"p91xghQHek̮#5OD< S0Ngo}aםΰi {+Z45t!|YvS}|}ANU / }vQc(rF[SSff1N'Al V 5WVtĒչerŽDRM{<ӼXVY鋧1;m|0oI^ g\19q/a̼FxԧPwX{q6W稦r 1G[@){3w8wŨWVsuCi;^ȹwZ g'zq+{S)G'0RjN v1 ĵ{X:qy@4gJ=fX签$l8܍O`k 0,(1q%XWvlj>h^>ͩEIbtJ]/\,̲k_5n^ăp^h(i|W={J-Is~ sm>Fnf>%&D{J@5Zq5gQʅJʉ5=f׀)Va odTd(zTQ4:eJHznj-\X)ie$ jdZWUF1{B\t/Ԯ+A+zǬJ M!.nފgҩ `s~Qʯ3T OW!ǚ|iZ e"A3KJMخ{fF5=8/ٷ.2O)ZW6Ӓ&@HwxIU\Yn$[1*X9dhGV_%-&ϓ;cL&JXmYY;Lk%_GzH"*MO ]n!o?ET|x*K4RؓQ.E"e%88c1E&z@zBbTMFl6w<0ɫh+ S' *|Cg^/~G#T*U1 e ^j Qi b牄6B 6JduQTm T(!8}eGxI"ܛDɍ]uxlK1:,ha\({ʃcQ:ZxS5baZ| ,P)Ai3{M1A+9s<`:يUʌdf^[7'*衐FyUipZ,?<)Ӓ̂2 ȴYs1c]WU&~WBYp:4`'gfHYinUb fhAsmbO>ʒq-xz%Шf,$ݐGkpmKX78c%1H<́VMX͕tc"Dmq^[`!Yqv@F :-w((-vNE E^7Y`f nPe~K[X]j('Q [A E°=BaM[p"4|H nx.N9Md*Iԭ[$G7`n+(G`B|e_nu@3C2ró;SBK(S9")5u׉GSPH(J4\i2CC=3uGeLZ=ܓ_ԃ&mkIߠע7]veY$Pŏ>=}z7?b`oDr;I<ҟGBhq; +0Xѣѝ8|q?Pה"xJ1u'X}4x@K}JK+'x[ؑ6y4"RUEj "$ ưXp $O_QCxQ}6Q+8ڐU ULdn9iJW蘥]ml<9a2 {S:ooh.N|]Ì)͇1RmsQY/RԛGÄ|խlp};I piF&Y/e@~{?,`Y;P2a'CG~d`8o0BB&< 9dG]Cm4h_ M>opSzԞrmY`447EHC 06=JZ6p ;^\1-g7*NTنhLyڲYQo U9#\ñkHvY|+oBԙ1X/eɁB6ҳhEOF^`ru꿏?OdN|?<@k4RE{@چpZz yk1, ]yథ'/:S+v:Nfw(35.Qsoxr.=tH/dEM"ֻbC4?LIz@g8rM\3ΉSF ^Ehә#bCl]+;M-gz&N^T;P(jjR_륦a`0[&)gTM3hc@cWl62A#GEhRh4Og(^JWcŢ_y;=ZC|v=9a"Uy[=+b㱾<9ᢇ{kz"X$%X|] FSUhzEhV֠S3~) /w#FXc)܌럸!A2/qu4BrKvtt= Dxu j Yॖ$^B*2|WcT82?姭\bZ Kaǟ#3pW;1dzƸPیؤP}c" %IOHcQds5&?wt_;Df2Vs;K,R̐C32V~^Do&4nA[VYFf|h5"~r; g1u˃R42 ̥W $pb.OU A17QIJa=l|{"Pǥ[*ZәlMTZKmvk\ƐsW>*_7(+agA >u7Cr;o)rևpP]3\ŗiBk=#cTi"˾s=,LLxP (CCb=`#~N-ICFHZ&ˢYdO`[cH 8 |^U- CV˜:8'"?noD'S`%3`> fU}A MkCR fW~^x#Hf81^aMIMHSR䶘א랼oք|wVx.v'䯃 y]Jj&w%#]Yډ;r-L6(DX2}j,ש!*>j\zI>JdMYl-vK;o0T\(/[,7 Y%!ю*TЕٚ_.'*f,#@\-uѽmض`]SƐ}ϴ}.DO&19: S6?{~yBbUFHEbN)a͵7N5B,VHw ǎ l gzv Hr62t|=mwW_E̤-9 xQPnAZyLj*: :OENn<{tJy2J2hqCTX!4xܸOOx6=}c]7Bw?ɰpM[p]"pQ/3ƯÍ6mg^,qg0}j;PU aN &)P&)yd>; *][t8hOI]r@ O)I4w*q Cfrfy7vN$>%FInߝKZ(OtVq!٧JqAsׁ[jWf۸wNT\>$(:OyQT7ˌ%3%/5<~32./Ry=7{.v`axJhEwﰢu ʼA&SjNltS /-2Wn,se[`Yr??vIa{&+QZZwEc˚zVnE))?șM%VvE]fp.( xjyre>R_U,<7WW1D,NC*P0.pdgE++ ; 'z•gj,8$Œ;WǪҔd\ʊ}tOp1BM⪌W^3#o[P󡐨M\6!2:R'RhIl]`/n\ʥ$ ~w/E% 7NW6'nq;2F.LywC'$zTf؞e:P|&':;PQ%CÅgb4un6~x9:c"+MJƧQ[zH6z[P%2ڃE-7aȓ"7d'^'R//"$PZ KڊL3V^0u :9VȪ TK7rL>(`bl^plufڨo3zYZNtI(g*;t^Y%OUpRUZxG#ȑ K \FYJqݑ.c|{_WԜ}ͭKux*6IbqCn[P;(f)Loe%D #@KE:gн5,%żͣ܄w+}>BsKj ШgiXy;}7@~G ԃV=u$R1!`}8an ڌ(WIJ]IGEorPep𵟷Δ!Kҏ=.rQ̗\[DsXN`-&R +>rqp7orS1H^D:z;i^fH,zw$j] C\qs|3kwܾ<tX<'Tr˪/ksj!h %[Zq 5Ca"c* L:= K)^U%ݬ6i\;Q׺ JY̵d5*9!G~'dEl6+sIEZz E|H?TN܁1lM1T Ivwy5g9~W)"Va'5ˌKE$\5s Wxsl \kPBڢKI98p#o 䏥4@,1>wfdEe̋]=Q}8;/Qݜtjlhu˃n 0xtRM{F'H(a#gNؼb{hmdVins\qmuˎwńE5`!+-`lzwGN:%d'{uhv&t]NT{5W8q'I/V$0Ntfh{1%jXhl $lADBwcDpxT_L?Ižۭt㴻<ᯏt5s$JUP{Lu~'$ۥ ĜW{EnjVi_E|C(xo>~'^Yzb+Zb(6 XW0Ǻ1p8a)PDeÖ5 bp <6s_|"46-|I`R\!c\r.[3H*ԙpįyeVΉQi4L2}o-}9!KnXxe|iv_ ZE4ڪ;̧XDф/:x\%̕ OA!tD[,dK 3/[J&LFC<ƺ=>딟E@ǂDLAA P1 Eۣ2m MsXlz楦b }*$epO=@7RtTH9Y_|S*]њ2OHW nXB 8P%]D*s7ƶ{>f$)WqdBOX݊Ϡ4ݥYwwp1aI"FWv W)Aᚫe:6EԖ!F 7 <ޚXAϞrERp{ye13J)AQ)ǤI`0a-~І1'ؘffCSi osra>X-/e|DC-T[**ENJQ"4 M {' iU#@:iwwUQW7TD HW71"\1!5SFeNR𧐪gq rѬ:.fo Y#~;6:RKxmm yN~^؜AwpcuL)w&6[&H,yngP@{53n0DW=sxlnCOebao ><\A.5cXdmeBO@Pu#n`-Wq!򠑶$|j{R15x0hXblg 70S$k:-Q`Dȹ̚7/],Ueȩt"+W6+AXڵݷ=1U]mPH ~Z-9LR9Ƣ`tdk J1\g7y%G4^7 DjI{6J%J aY;_9wx&~ڵTZ67w*:U6eck@]J'>"p,!1%חl6*o)z8i| iOmqTeSUд`EJk$Ǧ!¾:]~4QCnl>%M@ |z2,Dp(?G݋kDŕ=Bp6n ,,f _TW*Tl* (߄U*si\Azr,؜Ȓoa,7"5*S۱Npm$J=^yRtˈmMd_qҡ?Hs2"yѨܽ=(LͪE:x`-<=^zٳ`\m%@b|Nʯ?[q8[Z9o8w-Z SU)N% !|xS1 o\rMuA| V;3W+Ǡr˺V)!/b]f~6_3C_Htr.boRwQə[Z->{=K5xOVG7UX1:<:,&&>nYz0Ru`/P0s*&Tń)u2DbC<<%z+ռ/jy _A7l{ѽdLh뉮t1fuIq? 9;sZWR4$To(߀># 𗧼O왮٩ENvG0 gϗOK0wyK4h M7&{&oa&=~J @: ]PK/Qw$Å,c:*1])30Lx6]%8Eځe|{2r~wIZTd!=ٚgM=%߷g7P@H}֨!"e1k$Cs IqD|*†q{ymGKںjY׵PN<׮_ݯ@ub&1"D ^NT m:26JGcb_ MXҙon ?XW푏!#aVjW>?-C-0UX(xp%6&H ncLX |&U9OG,FyB;Jku\r i^j3F礡U;GF|qO^5u4ZI{t/7VTB]V9$(*+j8/:3]U0NoP,|-Rø|bm]o*T=d*1[Lj<[:9)z5Apsiҳ6)-XRjo cZr (v"s9?҉1?+`'x!.~pR=|\nzG|R7G]* UuRBmG!ٕ[$j ';儨tp1HV |$>,2>à@ekBTT)_)1yPڱF} -/PІGIXa }PAd$+ '9/Da$+)cԺ/ti1;a%y怔L- mQ:o5S|d^or}KXgK64*ܡL[ ! z]68"52aLV!ovߵ*Ym<#rW c35ҵ,GeO?oL$z&q#s![d q@%Ad?֬ۗڍ(gjT@p 40oCޒ(CDKj9 )\UĘߔZ7fgd0ab -5 >;_@ybDW*.=w0U&ŬV.- *s6 ,&+H_^(H9z");FZƅ J<ƭ:TOJ&`g= ޶|kzؼ5<]x[<d1@Tˑ^8 Q?%ĎkZ_H oe"61ӭvJ$IOb7]!s0 HE$X/)Zp//%$^(!Ds~ Mt 6H%+/Hzxw~5LH,3>w%^w= :**зMV}&rh=˪`or& p=¥ͷw|=opV!Rw߄6IsC>,vAn *$=:Oy$w}piSiЁj-J'jyvWٕ^RIPE7#GPlGo\;Z&.  捭NذAdX1}>]iW V ˱i!F"w~>!vnbdk} >]P[aT&>c>|>@M,v"J TG$aQJ=ȧ-Ay?-|y͖nsI(|VwwC2zexoj"1y Ӣu "tFǀ" /&-ݣ+l ͙͜awPD @Sɺ? p0,ROlYBe%M%uQj;m'mvnD Tsǽob9@Mux80.Z=~\0 R?M%aGdlWve{c(00e%ʃ?#(%]IJl3ȕ5 F+(1@-8?1N$!p=YEB^8UQUAI=UCtZutjgDZpE@/^ mPv=f'hLv7 o%fmѷ EYO\!v_˿6 It^̻Bb|jt>vp&d=x4^dЩ7ggxqSiHl6j= e '%O$Ov$XGcGC0FzvO;h9({S4| jHnt:v[tw5$\W]p9p"ܳvJ%saЂ'Gnm"QDthaSL䝦1ft?K$!("M7C'9KO]{.ᐇUھ20OeM,a u'eaY,*a[V$34R HwF/N/IYSEt|庅,F n 7F0B۳6>X!CArE>LFb11ucbV ft)C7__}l<-.5= P#&dXbm: (9884 C:%|#%q~в1oXd ݢK4Z~@ъٳ"ɦe'9#&b^[-+Zgv[JV$dIvoYLpPM(9 `kjk[ 5DԊ8/gKzIq~Ĉ3z8""ܘ7rގ8Zގ;ȯ|(ޅۨ`]⫪C303+/x19ȁnh"X+v=x6p/5lO~z̤$ͷ1Xv^0S;hDž+V;.+'S&yrpP9{C&ll*ݪxZ#nNִC/JSx׎6W]᦭hZnZ_" 'Axo5뉤/u?29 )dU!E_{ o7}"֚^qY%}~Դܣ\0dՉqnb2:ֽBzvӤqˋNNQ)Eϧ\Fד F? ey(k%ބ 9EI}8d"$H )PU\`ai ;/`kß+O" #Md͇NV1/dv6ҏG@a]v AES6,8cЙTNЗjZqyNj䜐H:*ΉWhwMP #8mw#4\/4rfW0]`NO::ҲT$2:hpI'`aֶ:[vť$c ;"uv A#.}d-L<-ڹhiL0h'5p2T#ḓ(r2/78g.&3p/:Z )8:pKy*?‚WJѧB̯;AN{XZdf?'НFM}^B#ɜNʡxcf) M^ Yހ%%P0{-w{1tẙ eR}Sd;y`dƙueTsej!]$cS5vEW>VEGL_F rHN0mH#4Rwr訇b9x'4dT-U=y ѐ͊yq^U[N2` eF >8>-NehnS 'BeOq`_gkj[ oDA"qDO1;~$g0j# '0{aPj0yE{ۜm5>7<Ds²!%Zm#a($wK(O 1C_\|Q,iu~<[kY=I&~a')ћ+Zf @ OgF=mn$[wҀ ZM^>I4t@L{9avD5X"-9D|h YUB%"'Uoo&b[\ _@|#К]gN;;XAL$NV? 3QPN.JF6י^*"5Hƭmd@c8Ьk+MG}Jwӷa80UMZ9'\LC(`gw&e /[&wz?jg<3d 4=ښH"WbAs3jbY)pӹAm|0 Cn'0_ɰމ,"NM(HQ7> ?Qٓ#z2C]Ron Xԁ {#sj?ul{d0Rq3paпJ|پu2= Jv'k^O kDlt9jYzhuM|2B>vN'mN1caZrSo~PJl}壸w{ijKhUl.x:z+s8}n<`G\]; ;laylS6]?@P8hg? Н4CVueu⑯ȮV/ᠳdi;dՖ5>Z5<d 5&Gh⮔A(~0aQ>٩;3 u,@Bj4D|xFJ:xbM=ǘ!P6?vZ-Yђ{mOUv=7!E2h Gh8zSv⊭P>G@ET)<\aN<ܠsM= !U4NI<ȇr2ZL'7 !)mTKUG)s!X|Sd~ ´#,9hڰ!f c=hǣ5,$$.pAaz/.3E(+uhg[O/sn w q\ yl9'r-mBx&׍IپleRtz,{׼*'wJfW-x7TkV,%Omjw>7fT 1%r! #wӓ֐ҭ lsueLPїpx24cR]L xEXtzMdEr}hiLy]Oi}?n |@%#01RILY4`_lc%Gd̷@EzsF|B4ؤ9516?x^[&=9h!,_p(/# y@dQRd(0Ұ-3lA{33a J_Jl0 J{(r߫/H7D?_f- !*[*?jD%I]0Np9^# sLܡe3( n(`f\'y2@:OCf/vT;)#(&G&oni4/>bأ%~Ϋs.9v~P :nl|jE"@V> > e_L6Qǂ{,B o_# _:d~c |%*֘ƾCy;mcC5=o4e܇NDؽe|FDDW y_kilm R$$bKB7Q?~bÍՊnD:6wJ&vЙR*yee mc{lZZlߙϪoch澤\k<"P5-cQ7h:ׅaFo~ 璼Hc7oS:1O%I`@aLEOp?KWe-y}8d=Pt=\/ak{ؗ s?;@T>jЪ0450$>>ޱTY4sĈyguQBI/; ;s!%g+C<} h.rC!a {\BuQLxz~%){PZw*5sR޷m"-1''yhSLc;S[So6e:죵W)TsOfĮr\Rl₰(z7DVˋS"XJDwq | v_ꂩ\zO'@g"PkO>!@e6RCϜ$j2@`ޕ2 /p]CN/@LIg"yםd=$^l 4iT6T,#l^ M tc"e{8Fvh8zЋA 'lIyk$5ST.mmIn_ y* ]i:SkK?ׅYU\Bi'ʂvlT_B;$_7/mI93B{m?JN [5XhGD!+w75nZU pL?6+_!h՜p[\G8мw>R'l`E+s] v@آ.2s?w\Ǔo"^#8#&*cѐ Zynv3SU^ð4y7~#XէԬnhp>UȘ+hv<s)0ǰZ`j{~B;q;@ /iQM#K̞=HM,d* 0dCG_gx)6!k Q.u1 kdG1vqJHRئ^i웜L*̵R B1"&aj/ =xZU_R2뜾S['i`kuj['1T!7ǰ9 D'WAse Ն@ğDBjK1tzsjf>YX%XUh}>ee@fd v8EX v@AWHJΰmI06~pMbQnY!rGfTe5wLT~TYu"LؑWWi,oVIҝ Kz @_O\""dpWɌ4Z;~0)$/(YCuPqDC6P,g4,ҸYǿsHӁts$†uFVodZhS3(mKG#rzPyZ%&Xk6Aw*7\c=X '9$# x]G4էH ª: ϝkS}5f3VMɇ; R]NYGޝaPfUCb#%t[F(k,w E}苹wx=ym&ׂYN^xΩIܣDRCu#AkTF3.T+ws̖dAJiw*&݈J0Aj*3A347|򐠨,nSJ%#3L{ueS;` VQJ^㫥v!R|<ϺeTy0W h޺5xlj -=6/\ uPdBE5UsbVcDi^89 & :i6";2ŐT%'SeVNWB+mg. LgɅkjmHc{O;c(\qlCkhR_slՌԹW'RDjU̞ӕBm7P~oON4Նn/j07PPkZ<(-[G"֠P֒wu@.'E`_[wf4sA,A)qʁh^.ZVdz\C7hkqP*^6G_"[5]3bK9Tsq-IJ ۥVڿ gI|<9^uٻ/wɵ26LGG [t@;"8MȐ5kV9-p]y.&wǦAyG{ֱM)A--Nq {Ta8(ôv}y;g9U qZr%yMXP aFXi45j=SbtR{1U5Nl )Oj&@(yDh3Flw` "8 1Ȑmax3cd7tVxY]Hex a}$Ha-?;atLTpkl:\sKZ o9CƎĹ$R 5z㔪u՗Jgd]V1EQd8,o5 & G(_~q) hf^MaL)r@ʅy2Yᆆ@S"ҚmD[`G\ UhMǾ0cI61`-j*"3DQ*̥6RAB iA[%8]Wwh0$7Cz]nz3"W,+UMp.k~ʞ/c\)CXm0:+FasVH[2|3~ Eެ0 x-CHW[6Ʊ^0#5[zlv+\.۝YXփ'w`nqi UK588RFed ϋ1dWf;R!)cs ?2QpV!c1& WN74{t7ʟA[/6ݱzufm {Z^KyF!!QgUccbBDt5 _MGJZ5[Z@&+ Uttz($_ F2Yj VBeX=ThjꗦRR:R'p+׏<+ \Mm~[0DOr^vg17*J]~PLG,ʝI$Ճ?ъ[W+F!jN6[VK_W05z )eu-ic[ QFqz \@h1BOlW,hpSwFfbۭ'òWAY <87ca?a|y8 ''>Rk X@:j8]6(/^$3);''uTWL(uw&v=bL;In <GeO8Hu286KW N+$(Ǯ$_8[,Q >OtW=sJ9P)qZ[IOG䂐vHHRZrEήcAM ;׿xa#7.YX`WC畗t&<јpю@"v}./JS=sy5P蒆 pe@6NP+O ƂNM&T}R` $p f4}dqۃ梬f#+}ieBـ'R/} X#*C<>I/W282hw8_x%0Onz͎?aK9ݪ|RˡG_D5%0z{::LGI1Gp$=IgU Wb=wz[31a UA5ߔκ  1g8JCO5W3a)6^bɋy*UOZ $!kmOpܱy^eGq]}  ׫ZI*Fl x2 %Ma5x!?/?<įy6Qũzd~W򂢏fS<j70ȧm #C $Ӈo,?DtXfZs7ԥl?ZCXtrxAjo=GW9OmSg>GPBCRw/Fс裰5E$P"¯AD{VM:խ"N=k=FpyDhGNh!KNb ʏ/{cHw =iؔPn;̞ڗNVID^Yon/. Ib'˷)LXɆlcɌR+jϖJg8hzbE$h2#ӵ؀ڶ=njdLvҚ˯Gć]xU}~sPz)D9xՍTr%ɢHd<>=#}Sr?>&keӼ]M*W[ \%-}S70aT.*,[lعL-tG1nB,_4"5Qq׌ ";RqNDEJ`^8w=x"XiHJ3sݚOR~&y\kozٷ72ә!E ՃW$3Pp2eVh}7R77U6K"sǑ]rPm9MhLȔrlro{hw|+mHJa"xPViW)r"Zy#$9V4ůȐ/t}1U՜sǏ:X%QBxE= I?7= Ef5)%a m|X1@S+'⪏C3ZE;X;myVb R eԳiN=Er @h툃]Đ챸~liNevf 3/A [yXRJ,BԢ>$ *Kϟe\?cld\b8goYLg)q7LI l{cSYW@lLt Vl]&P|ubP Oak|p!uYݰ#=<$Z_xo0[ "M3HXJ;5:#Rnbqbc'L#|ĆV_9dtk[6aܟQv4Y\ FeP /v/*NFx{1w' ##6Mv'+NDz;u"˭YUϵ/AWֈgF̛`*̣aq򔕐NwuߒO7lG-^1#({ 0I0kle*M[AZ 3s)LS{@䆸@ү}N Mnz5~O~$v[)ʉ^F ތ`B7ȸRښfZn>C>m_ke`Nh"L#ritҶJ^ʟנw@Ґn+t៘`Vi"IXG^J^l{)(+6i }ėBU^C z%8+dqeB_y3jӀV>]Zδ*;f2zٚѻ ϷY/t*-* T[,#a>ܕsj2o׹d3uV1(f oSzKd.zPр+OtUyq ݘ#wD.Z5gTaT4?tG&}macpCDaAa7M{${\8Fl+!|/hÝSpor\V~(gD70pއ{I"o ^埔bn:ùwMnA%]-45f+Q&9)\*ԝle^򙎛h_Kl&1Z& S"QA!un,X{IԷjl%rr;I)i(tfOp: gQip%# Lc LR)-AK*|ߺ&v!P25Mo2{k>#)S!јP&Bg֥w&qs,ퟂ}m0> kMxL _{LPDWן뾍òfA'9xӵ?C̬ȣY6WUo#eOHze~W5!rIx ӵ I!-"zd%$6PZ}(Āv,_8-{;f֒D`fЃ8}2#{f]gw2!/D <]hhMQL"V^Њ jL؋p82u #*T1po 5DKԆu%UȌzoγCVvk^@#ka_:MMO+I7jxً!)ߙsqoIY; F8<!{bklB>I TJKr kHn[j9i4~t9PRn!J dG$=:oedo4Y9_|y[ZYtJXe5hh|'[*=O2x9!c({KqwTA8$R^M>fgZbO'HS;..=!U-^y[ޫ%a3Wͺk6em!]M˶\ .ϡ}HzO1a^?`,7~#0 EgH5JIʣQxy# |.^ N'xd$J0VύaIxo4$'( $.ڣbr릜W0quH(M8@dA]~c ڡ5 /K5 &"TgJ]z\aY(I??Xt ABx&kØʱl2?rM}Nbͺ )w^2-*9C 4I6`_!c̊#Þ"ȴJ pö&ٺ?sY'^*,eޜq]Z"3bVk?KȐ"ӷ Jn[wCh 7#oT0F0H% qE 3&\܈k: [E5:=6eNdYE*>w2DہҾ{؉!5y] 0̣D|kP7=!})D/}ȣd;F;!VPg,rjө4pPX\ =ot=CnA^#ZU& 6?'JǪ-$uUXNwɍjq z׫gmCYT0gCV,9]㥅:$\\PxSkM^Ԟ*|vBh pu~F剋ǎb>prN"׈( 42eCWv;(4d ) S@#Kp_ boHDi}J7RC@iz察GP2聋L2q%STB0*Z=Y(A;/rgX;/A"wG7Џp"s}5g]k{ -P#ZصlxI@!ҀVY[.'UgIGҧ#,}7uaѮrԤ^<\='"w!wl. nq<@R"qD '|"_^҉U1.IZ?LelsҺ aGu@pdmŸ%`b4/n >S1;D*@,|UfS}Sr.\1Got9 US+^e:bmŤ/p/p2jWW/YpjzLx z%Ⱥ+>"A N(?He.q5nu85wBO]dѐ I' ϣ<.ۻ=c$t Ew܏*<+@:Y:S^`-/,itgݿv-8;sEzKoj wĨ*2_/PԐ 4K?ؖҤ+A`T)?jOK-j5s *б#R}pJN.sIsscQ}F.T$_r{چaWqẲz Zv|gg G-/k= Uv;Ł3IRev)3G ,V#@p|j^{{i/M.^;cIp khR%iN~")$O$X=G$Cu, J\; Jq0R=]lOtĘxn֍zjVY]xd? &+@@l!h3nlF~뚳RV\icN>zƹvТ$~(ۈyt"v fh)M,<$mTOY3%"-FYcG1d&vOQ%6xڅl5ץW; ߥO^ +{I@#c'Voj<Tn@̂j9Ws"eGZ`1U&a\ܬ4heh(6))Q  r~*=i?ߙEIg`hz Ǿ SM_[/1]=xj'R}qc,KÙԔ|`L,-ʼnJvgrXwUgQt]T5>P FU;ey҄Cxڏ lPDގc&S I6x#f98 OJ_;B%Hp^Rh~z;z!S4o IG߃ aTj?>YZ]GMWsC}|3  x(ŏ[.@_sS5,512C Ylorؼ4W'd|NfQfӾo,즪:dt禱)w}^:sA(6HZւD ΅BV?[iG7tru9+ح|x:P&w΂{?|&٦>.:e,a$=gɪP.{Q,H6z'YRr)n)ߍim'krKz+4̌=I6JpIT@ fVIClDj#I5\ZaPJt>JRFe~Y!9:ڒ\鮜s_Je7ZܱNɗze9e!yd/ (WLFd}Z7#=F1:j XYo@Sd'AP=/W~aAݿME5^@tJqd6[*GޡY+[Rl;3?WorȾL×^!gtv3pX \ItO>sqJXUw5JN1ֶbOaDÁ%  ⁽]5DU҆wT"]|XYqZ2b̦1>+ 4iguY|@kX{l1LEJy֑;b4-\p0/*ϑE% DYLJl66skӞ}kj3KŨ*(vI,ƛuQ٧Pՠ$pN_i-GO]hPig%t@K/,;_/ƑDl O>s ?Ԫ^)k?dř 0 jC6a?vEwqF#)3 `/~0|3 -n'@Dj0jL&s-j i*OD 4= ?ꍒ r[Kt*Gr{_;}cs6YCZ9/ߺMJ:|ir]b̆c +:Ŕ5 R!k`T$?B" % ³[)wra.Izu[ ;fi(XWÅ3q-x?IcK=]iʉCI͈:@Ȋ 6}3^PV^BX. lq@7}fWv+~^(NYL Q~"0Z̻ր0pF0HPF;g TeV"n(3zNI53B]&"/r P8#Vtܘ 'ClƉdkmW^@ ~Q G(V8Uaΰ7;QI/Es \3(3gO ȑ ۛkұ8eie7M vYuɍKѯ!+c@wBebvtzNocx/2]oJx`it_գ #TP%@ ލP&n\Mż"$Xj}(|󁜻`ʁK"חaG~ӏvQ;:iUluW ؊R2Ğ!?&8,XUd99= L x=9ɒi AZăE`Хk(,n%!v`S7dڋr3”n8'۶=n_QJVvla `J%w;lP;vW<`&  .jH1 P}}dy}$*(xrpDÑ]{ u4^. xWֶ<> SxΘ:/] " J}-? W><VÃPn&x S+semY芻Wq0-̤nj9jW~ia<*8+Mcq= e<3h+-`Qi0bY(NDs?_80ҶEcwޜUs H9 \kژ-Yp?ߚ$1_goA2Nj{L,nG2ANmH՜XCP+$ׄ(PT<5 pf^qk P¨[K|4o+u^]t,?l|"{T̥j>lJ{_Ԙ3E h^k(5 >e3U_cD+yaP'=CDM=pZ嶐C8KɵA|J5W`(>f&<Á0f1M[$l_*%[2}/>*WRV4HvN,W|Ip(ևg)Ě7x Dlɋy4 H: ΍nVL ܷ>U,䀦ݠ&vk*oav6%,h8/cRgģr{D.Wc&C>qtx꾝siT:mN_hEhut/bO2١Bv*4s1+,,'|vD ,;_[FvBX18-먲1etS%k}k[$5w8*#Huz}u)6(D"/RTW{4Zqׁ@kWFh҅&L$icQVb9"zҿ&{iX'\E] ViY˹Ssh\IGi$z6듍,QT:-5t9$bٵ  U7O܎ XKr]p_8:lD u-J|]`6q$-VN)Ru])R5C PzD 1Ul&\l:tp +)8;OUg$ F Tra& ]ZR$+̅9?@;\! `l?_52h8@WH"9sҀvNm@z蹧 .LoqE1F%!t 3{eRAAv*ӂQYP C%FQ/dMƸ"+> QX\@XmBaitv̤ Bi_5g$^v9BZŝ5$ KRԜZ2x]H[?"l1] q>fS2:Nâ"Lͽ䨬l#kԅ\LYlsiZ1+7-t!|g}Ɓ3@Z8J-^Fwmv%aV7x*'ۯ=aP~Y//Z;'S0.qB{>-wt4P'}x:>w.`9NǷhu9{u+4"|3}m6{MP (f]Bp0)ِ Ap46" \Ue[߅?̲kLE_'"SWdcؙ Xb ob0=?r>xQ|Τ"ߙ/ I1`8 5|3Ab}֨ }rif!Z[APNC Q膢Nh >{WE 2 Efqq5X_ϴ7]yՊRR/T6KLȿU)$ o%"*Hk5HUy,\R5ߤ? C + m"a߮qd:o8+U9Þ]ΘƃZ!OEU/:I,84#p݄u2o#M'sbC6GP Uruj"hA.㱠>w5ؖm鯹ZgQl6C亞~sJ{M7_s }_IS!!Ƣp+4* 껌F{[0֟*sbYӍ莠':9N#ܥ3q,aFu|KV".t?3 .fqy(&cʚ )h6@vf$S/2/z%#˗uĩ!WYW8%;U)S-Up35#s}X8CNS)E4/QhgM<v :XLv0xȃMU(؋0/0nH7FhGyH@ʎ ]tR~ofo,qpUK,l*3h kޛJGe \*k6OGK>| 2e Ѳ̢:% ok 8[]bN=s"ZQ+ scDYlʁ-XE*A# (VHGL/2\ \ rzaX!ic޿ЀG-噻#"Ǣp +ˉѳzߋ'{ 9іZ66r˴Eaa@ITn_EnhxNW~V96 <ܰ ]xk{9m?S\%ZhrB7YA zlt_FV;K,}Bѝ]"g<>0 YZmice/data/fdd.rda0000644000176200001440000000757713416657163013361 0ustar liggesusersBZh91AY&SY g(w~Rg+MOKĉ1D)wz%uhTM1dъzShh4@ EO& $'==M)MF4@hߩ(  @b44hhd4i@h$QM#HS@hMhFM@ѡPI'S@рCF D4jzOPdidd =FOPz2'zhD=HSi@d<ɦz hh ;EI\kڠ$RjPZ,^}՘ VC&ep c6OݿkXD f\)k,vR|!{a0/$k.H ^ S:;kx޲ub،!ozjk sq$y0+`lԲ@z͕2]\ Bf7Td[L2cA m<궤u|J|Z\) 1 @1Ǯ1t1dZv`4ۙ#c*\e /;%6Z༴:c3rÆY%܉.S@<]5mXyŵs\*Tn7~UF9oLb\0X^NPb8ⴘ$쬱s3iiY5|2>lMdͭXœ`ݽeRjaMm&hmRo8 4,7hLY0+NU֘Y%yOB'e _-aNzύEY!#S)[(Mw\9*OT1FNiɆ|u]lhSyUYVȞ]Oja]b"E4)*I ³&Xn י5F! pNb:|kkH6U")GJ~p(`뗞\IPˋ=7KHƏn?>nY q !jj&HF֒Fr10<"^evծeB jzgrP-jWXƙ!RԐ`l㡐ȨƊ Z`Kkt{*pGt/(j>CW{1ҸU:n5zx9"kA0PǂƊeh+k76w+2)TFiD]Ν k{RQy s/}NBRG<3o HU+m^&Wz-USQoMP-W>rL"\s^Eҧy:ex&"F>8d;N;N<mws$ ,#Y˳`AqMO .-͇ؖ+w!ÆOADx&7'3&^up}gwemL;¡lWUU`X#b#DXXj.# dXQE1V(J"U""ί`kd&*4F`D FF+H FEY ܦ_r aw HqRC.@HHUS$PROPu+.nURUJETYN4sYU6^14j*P,HBP(1m]#34[;cm۷ʪ8l٦4ļŢq=yjK)iYS5=08q xy@ !'7kX @7Q2`"47$M$$U2 H% 6$PzaPAƊ(NvE\zg^Mh35C\pgIR9g̭RF "Ј=!zcUd!JX@a|εu>]DJR)Mp9x|wnDMkZֵ0 kDD;FDC)>9Fi2wI:{*%`E!@' q`,'$SFcM´~0sE@=&͛%]tDDDq&P@6aX>""%6@fΰ*Ք= 'k::@ Ds%q6-k0i%WçCM` TxqˊUhmeC,)Hy ӣrl< .,ޏ @0ALLl 4*(5(ʜn ̽zh8%t(fbֆF Nha:M2R=5V^Ebٮ1<֘o{mv4 F EPXmJĢk.]#v[+;GqSKWPPX**IJ} ,PaMym#e6ШaEDq˱2n`S[(Ij6T9g&c&Í1ZviQT7`vSUF+{*78 Yu`*T<(?ũϿpd~ڎ̫}F{a[*Hd!RTPօ"3T4P*Dm"(] +DuVTV.F [(JMYҩsTV ]ChhPmR eZoRiu U[mUUvFPqu-4+&GbD8m(-km3*%d(MiZ`Lҳ:ct9دbI5pY`W# e鰀v) 3/V_#v.Ӗ0WSq<^Bp q1mj`(i2}̍.*osb+].Kw 7DA[@o O˫Yj?wݞ9?!jv!g{KCB{Zs)@Q.@@jxnYM43J`Nx"(HTmice/data/popmis.rda0000644000176200001440000000507013416657163014115 0ustar liggesusersBZh91AY&SYJhdog@@A[+Q $(-|0 .{ᔧ 4L422C&M I"!hhIꔥSɠhщ0L AF U%SCG4b 4j0'a HF#SSHh= \? )J*+Bgn\NĔA! d .* WVfsÌBD::ihQꂍ3:k'+pcaEZYfϝU٠p0\^Ai=o ;H! 'μ.ɠKBUX(@8R2,@,&)2f$),,fgp"u;"- 1d`&ٹp]<+< BUr)RX$qG:I<':03EOOO/G"$3 K(*\^]rםъ] N\W5tG9E)^.Ix3 -IJ:y:+,y琗v.$AT}JnwF.phQéć1>q}>9}fQG(s$GOOt$IJ4u(꣬N;2vQIS&mN3wQއ|>O x@RqعҊ?Jsqy0UPgV UI*͹/sÈ ET ҈3Ņz6FG$ub]Mve#pbHEV!!-|@1'};MD$aXkk+񑄰 `NJME&/la?o&{^PΌʒN6\XQXJDN]p'FZ$kgM v g6ZTHԂYmeVi4e-$Be18[˩ӈ@`e'+;ɽuN1'TaJN cD4RJ3g>4G|FgzkvЃXCma!kPT0kmnArNjTMs*P"PJ#S[6aV1E%%mlUfURmd&mFFZf54E)64QLf bJiKCMXVka* bHIV6V-Rm#m6,JEE&jLYf&@%C$c10B Ĥ%XF*ԐUEh"5cdf1S-mZ(d,[+kY"BHɑ$FIeH`Ri$J`PIi0QAjؙ5f%4ȥIj `4^@ P8`UI{qMFªi>&ˮi[)i0-YHd9jn!V{pf(f sF֦ \FUF,k.i|zTtR%2qaKfpQSw۶.wI7e,F\*t*H! J% FܪHBmIּ׎RhM tĐ' 6A#UzVr EpF/}.,AbM2 ښ hiv[!%VÄ@HI7,[,)%dԸM֝(k`7n[)L(n٤C`]ݢt M>'3l.p mice/data/toenail2.rda0000644000176200001440000001015213617305523014311 0ustar liggesusersBZh91AY&SYUR\PƳkImlHzz|6ǐ$TiM5zSj 4hGhh4 d)<2hM CM@4 фd 42ih14hd 1")Cڞ(hP 4)DJ h Ph&%*i  &@44iɓFALd44 4F4hFM2UA"Q4m4i64LL& F2&`&&&d2`0@gP??hp%PQM0g@ǝ+l@Қht"&z 6"{ %tl!ܵAGIA $kL9 0bd.P2"2콰k zМQFzC8=4=IVz0ǯ='='Ƿ'=6{ӎ=π| |Ciǟ!O|>aO|DGӟP֜ϰ}>Ϲ>yrs/¹%~INPNX~s9NdG?a~s͟8sk?NrϝSOƳ:ib'EEgjjjjjjMkSSSUSSUz$ IJ0M$ %Q!j%L8uY%6Lly^P $L̐EH&]UX*FdvdYdYUٲI$I$dI& dd*cfM*.fwx$b 4MݒI&6LnIi\ *+Ā(C4"bH`P¬!l˜M*i2b]&*$r!Nѥ`J % z3 ,d݊`*2g',1 VLbw*r Z. UU4źr QEPEw=8w4su`]}%A-[kdZ3L̙z72`3C3&eYdt,SCC*ɓda@h:.N`$M%UiZe 5{LLy'%*AP*8AT,Z&Vlˆoh2jM"*8TATKY.pL\1V*o$2a\ZX¡d0 YX.*q`ӈDe Qc|wА|& "^)ٙݙ[5]鐪$/7dDYXDDIUyDDHXDWk`jVƕZX-uD01*33(10m^存֪תƽ$UXZȉfUb"&feQ""&"R]bfQ&b"tTCYhN@UUG:wfwhhfUr$ՑUUb͐*y^1 Q3U҉LFImax@) h vv3g|l*bG`ljwxhxwvhmUSEYgwyEUVuv*.ffvww|tdyݐUX77ZofffffPݣƪKU$N]bf (ƒ02k&202Ƞ37iXAcfFa6XfqyEdaPRiiXbdSح" 2"3΅~^m5rD I"-3LFDurriCI>q2(ߣpm5LnYeĤ(ɂpD @&,%X7\$(@щ4cAV!(nmxvg 6yG,L2a2sWj%!@S".CF*K ݖ0RY+L40eXFI[eFe(50s \F #6f;g#ZASU *KJ (3YfU2TVUL $gIUGXHt8}peGkâI5Z6F)P֙H26I$(!PjdPMıT$`2":'a_.!Y1 UQK EA$0-fDK 30ÂD(EMW ( p(+xDKKX,إ2dŠa1HbaaٳF. G2c EO0m1ƒ-25[*wADҝQ2RMxQԢЭKҠa!)1-PBj. F {+-D"Ux G"(2(ITX;p]`Wb:@LH/[&&tXP!lTSjh1cimìNSA6}{J$n&Lu0Dp%)ZYȈ ]%*b}4Bq$77 $> ULN% h¬Z'k+TGz6ʦVNi)- A ܑN$9Ubmice/data/pops.rda0000644000176200001440000000022113416657163013560 0ustar liggesusersM10 Eݴ 8aa`d1tC#P?}۵D䨮*rQQC9q 0Qn8+gDpz ?/%ı,O˽5}?I^mice/data/tbc.target.rda0000644000176200001440000001342013416657163014641 0ustar liggesusers7zXZi"6!X])TW"nRʟ[^ ntEB*u͓a@Yw3g0x: A6!u ewmaEqD?CN^0w|OZT5M'i;LZ<+A_:o$ֈfKC Iny<t<xOd@,?˜ eXr%_UF"Wv)rcՎG#'$ Z6lvJ \Gj[xC^b5W^L|ȳCIȚe]eIuh.V7ʑFHwMOu"q4 лl,Uib iyXǒk[[H;oVUkY RNLȺ.-]T q z_rf &Aa# zeAO#?ypa5{!ד GXb&9 t O">Z6vE !~,}$ Y$=tz$454{mƣ6,9x  g;)`Oy ۭ\߁"Ht FH7H|A>8;IVsfL?#}rR Z lb#oY|k82sp LW>(bdmdDk"pM++)09/=EdžZkC}a,䴰 RZ#⠖Eܫ))cCRVe}ǚ0/|X1ut4G=1ƉmTMmH ݔsEeg, M]`ָ$TfNnRRl3&"/Ew?>ʦA4'ɬ|f(/ YR܇`rH1aJl`%`AM柼_'i7 QeЎD#HЩ8^f[BM_B#!(2 f."&65~D_yzCě0Q'3Q2'δl\hY_ xaZ}ȕOV]*bS+N,=B!QD&gšJ1!c!. "*vr<]7/A_΀aUwg,Y^*g)miF A@BVR06$5[#A?{/bQMG'Tlb >?2`ѹ.ra4;c/hForTkBN'JS"7ct,;5΢`5`4N7Dyrb)sf]6{4!~"sIAEZ{tB/gCPlԨyC~:4y&n|jDDŽ0ts߼tGK1ˋCm/+B[ʺ !vf㪅a!NWh{p^n.ۆK]:9uC^#K?V(j#[SaWG˄"0oW!sET=#EFQu$5Eo9qiϰ>yng'nXk+⧎ wImgPxy6/qq`hwQKT;YONJ^4I긴q3&2@2A09T[LqҤr>wFtI=A(^{fIӕ@L}6+7D OTɼL(H'} s\" GڐA JKA%@|uqz*l^DBuQo5襶Y]"&6$tA9WBn9r9AzTm77I!rMi/c. ޸NyC 2f" 2ٺ%I$&'-ٖv 6Ryig0^nq]r*.:SAB\r2J֘Ցygxe&hW奸:>r r[9su+jG4/Qi nV([PLi<+%ADBg""5niih)~>"$P uFa,h,F6#.o֏%>DX]58V;Zrj¤:Vt%B3fZxn'@M $]@7Y?m޹\Q`O܇MrorGo Z0IGUPo  g p2쀓de9l\*vx/FGdޙd XqJm gJMї?h%w^̓'z( & dz!&{YZA4 ~mDO D*)O4E+CL^`ۛQ@ۜ}iWx?iBOZ_WW}]k1T9R٦WuDV*YSY BUt+) {2|#U:wM4J͘%qK}V:Sb W5/|J c3Xw_2`Cy}Қ.J1<\LEJϾA=Y\A]܄*X' ɶI50[| C5$qE>8/qCbUȎMGÁ@_BɓVL2mqNcVK $6dj5V`qғ%˓'krZa_=x ێ~wUҽi%HU ^x-I>0 hC 45t\ḮkSʡڿDe\FKgcy Oy-k@r%+P\baW)tg35JF4AgdkeSk_v`89~ .. m7hh.@Tر]j7J7[@`ݜ (kFkȷ$!c@6} _ux1Cb ^ZH2cfCdqά:HjU]\Di]HS[LC]k_=jqb|}tEt l'`PfpٽL1lC{^m6 ǻck^IHW e0މEU95 L#\LN/Q͏Ql0" %C3Ε ?}* fE\IY[1'$)q2͠RI|,W[6Q3~B:f_>_lu>&f 'TrV^ d{tӧ{V&>#!+OyF^VTBy nǯ=OzDAm8I"7Q #;ܳaZ #F 7 s =+OKrEgZK@> 3bx& l_F] vj}S D:ɨN}6B#WnB,lz`8;e݁񨤊-cU᛿T1eCK!y(A| dZ+/&78?4J^)fS_ _ThD=bNIj *2ݙxȈMt?2 xJ /^GT{g#5{$wiNUW¥~Ӽ07NYC@)ifGf()_ĺsdHx=ԎͰj`"z<+]6B[:%qqt29QYջ7&/)UDCNlVReVژ#k_AW2b8u}i OwqK̠HHq|ERuF4%@Q>{l6qlJR|hoilAÎ'`i =w,Ů3j˶̧QgZqD5 ;RKyj?.:+VykHVS|27fyVwS#`UΩ _i>;1fd q1UrqI98m*Ȅu 騺>w! *4 U7)}A V67uvNPϙ5A ՋLݒ,}HBaWqE6nPa:<|a돁>U-ݲUC? ILPQ20 YZmice/data/brandsma.rda0000644000176200001440000007261413416657163014405 0ustar liggesusersBZh91AY&SY651N:/D@@R˻p$XڵJͶNu <׀ EG2!ZQA[BP$(c {yl}π}U/@PB)UBg|+L>>`= GE@AH @H{w:ĝՆ8G`ʥ$T*Q4h7 4h44 AhP7jTUUB)& y{*h=#iH44h 4h2SBHddmy'zGdz@&h4@ 4 44zJ'a 12fL&F M42`mLL#dѡhd2HbaLj&&)TQ{?kC%߱S,eڕU.H?*d 9ga1PU ۅByLBeu lZ?OHD΃1=ۉ>/+fY2;v&ro<;^- kRZYy[>JLa%VF>FvRڮ"^OiA\mwe_e|+'/AA Am1sRUhYj LXCgHZf8P;%^jR[."⵮4^;ȹU.qz IM7tJ;Y7ЄB4.6ІV'2'*o.bևL!Џ1A$t0 3 Ici$R ܤ]1$VlyXLyljJcC!^7E+ H%􊨪ZURU֤I66.Dg<ϔ5Y߂-m@N4)aV DHNf(CQg $ B5]{wkzLbIhz/{ch 0͉(7ЂmuW=łEPa0.I. ZRaPQQC2[h6ݤ:4$,)J!r,Φmtv{2:e=9NR]o(:8BϼmƜViO\9<Qc[hKBYj+uJrr P!3@$l"6\nχ|wܥ4h4 u8E LÄО2O$R,\ BRX)'AQ9!;i)擗8p*Tk| IB& a&ַ(uYByu5jj֩^x}9Ng彭k{r&=]y^|R '𸕈.FxHe3s֣)\@DI]miv1c!ɬH4*YdsSw'U!0R&J_a?@.9Ɨ@QtH\ k V`&@%c>5 aإ=-|ZFRUƘ#56j$cf"@.&+TdvNv9YT*煨7 L(C9NB+ g Gr+4S{^sBrz>"!Kt˹˟y].M/q#C~:εPwu02Y?4,Dޔ<B3: V(kF֪pNHE\S KPӁDz<}yN޾ͻ{zt@r)&a%LSr:8溱I$D^[z|gC~gWH@PC-f"6:(KiP8mYj3 r8aQ:")!& W[C'!aK- LQtЧW=hB׷(IvƘ`c(qʭYQLjLb. I@q&jJ#G3^.$BHȁzI1j6'=t׳:i3C{S!+8`2+FMwYvgM xb ־i_=eLU[HBkceC6dь^Xa1L(M]|mYNjELVjzk.e$eK+Jp:9+ӎ(qeaʠZX`Dtcm,@ xN dΖ.V}eU!.|̃EfvA|DjUlp9CJRIA)\k|Pf5]gYd;! OOpTiWC@a,@ qvWe,0;Sfg];MVj ! K"!ܲ1& =%^d\ZS! DҢ ~FIu}ڡS9_@7Ѐhr*YFQd~5LaD}l1,i9:tMgm4xR3nBs UyZı"bP6Β' (6 /J}"5ݞʩe}ug{ %;f 98S@I`2gV~iWߣn*pǍ.ҾI:0\( j##(4>{8khAePeMݙS&ZXwZA2 ܀Iִ#~'ws^5[Zb 1ܐ. lAҝSXJU:x/HшsTѣ0LJb[~=xYм]_N!IiI=;RT.[Z@D,ֹ&RH4bs j֝3mQГ]-jI@ù$A\A4u^ltOl'ۯ cSmVZ*D!/WOZ|bWs#`!JklҖHFrMu>/9c-iEmdrpn=u iXg%-sʴD[Nw c/m'Kܹn: j!2\G*lnaF# FVӭ|=u=#r ޛyby/[8V'Ĺ^<%iʸ7.WX(- B,!edWDewǴ.Rg,>E:HtIIt'%99ZG.H 01> dtJ†kdmF |^(rdkZM릵0/ើ ߷Y\=5Uzi,-0%1-^:'놖ܴ 2r60$>%=-Kim={|@{|r\@<A~n?oKX;O>z4y.o,c2Ǜ$2m/9~-[9k`O]!4I),!2 @2)ÄpLk ,_0j a7/# ozCAky ؏9;iNi094N{kdڏl'"%XԄG_t4Yg徔: 6ۈŹi]췕ߩ.7O,Z}⽭s B7Ba)P듏VVlZד]N;v>y'BcEI t S!qwf:dt޷Ӭ(@e*NM7^H<7' W׎Z=zw%iTAܼ*Jpoy꾤㶙z.g^r9y)pq <+J{z|UNSnn֗ayPi=> IOzz8PYL񋍽#S=uW'y1&/Ǜ[N?1϶yz,@(GҔzztX͛X6徯ÿsќMM69CxCy8î=Ygd<χ_/4BXז8֬54yq(bPLaK䃱Q,,ah=OSϯ~t~޲+8>^!lxifmkqڽ[E k=4iqԽ %>Ҽ]哀eA4v@q:/$cqӳCo3Ru5mn,8fyJCi .~]Z3^l H_-_A։=X3 U |J뽽o-?WS5f_gʪu>vs\o[8.Ӓ/e6oҙ9)oH:h{L-="ӑB;g$CAtT˿Ezet44nwiMLDw0A eerDQ9R溷\xK]mK]5 !_ǡd4^D?_笎 V?hӔX[ܮ6`c*8qV-a[maSgS}Crjo6æ$6@"tʭ!(JlUp%RGҒUƎ8"7m$܌~1_&r q1Ϡ= w%MGKg}qw*{vzǎOjU7Q>[k,cQ}?~TVYsڹr0Q-o([-uUݛTLtlNR;eYYqP"&Y6|I˼Plՙ\rs Ev8LYoKړgCbD[ !Rn_/k_4$qXEsoWYzj"}Yׯ+Z$8xԈ'8_#~hՠ]-UAMZu|մԮ-^$)ah"n-t-̹x\|2A*`G("qIQ u8P&%NjSL)ek 䗞Mg(JבbC"D>o~v"u= EMib&m{^|E3ڟP d%H bCJP?z.BseNG9=جNOȦI 0-PzoFUS~IH,jǜM\uD`!dLC,b!W橵2OR&'3gklZLLmSl{{"VuiQ#thf 'uz`+ A-˩?(_b*)c_E M-FHe޹OO-ATPڔ=}V{|S9PBcp yg\71A[kpԔ@(oz U8)J|K64ijQm9h:CxޛY'Y|:W#6}jq81^ [/0jgbڴ{ :g#,hd" p(>o;ށNo' 1|F}#شϦ-Z1m^F.}8Q#nB1czi3k-&gk O3EZE+MfJRfz'ڙf[D]bo0tmzb IGQ};"ƝvEQы5_MKlB^#;%ґs:ʻGNt-J0|5hu LiGI NqKi[yX¥4]C6 / .C9f贕0񤛶n84ԟC0D9bDZbmSG;Jϣכc}zznj!oAk&k33Sd:vUj:GqcsR A[_[)QjflcfPͱR)`T὘4x/!I3 ҐyAR@s@["kgMs \'Ew`C88̫R0pqLgMn;J8诮L rPׇv%>[wijT X^,)zI.;Tl׿'%\0Lk,(&EBTnvKfT@SoaE&ԂKEJ]HAL8zFz^d)+0̲PBtxTd5|yׇ/O>>X4#ѾּDT}ѰOmo=SMjM~-(Y~_O :\z{([Q:knN_/_2N:v% ~=Ro϶<}>^|~^?do[}y>.JUJ;Fw@Q0 :JH@lZB i*R@VW:ە^Ǐ1 T8B($ErDLo4ѵ_kW5*RdJ  @+C9"9.J *:B@ ʘ@E$tE"2)T !  )C$@L@2G$M$iP\( 2D#IT !rTdAR%"/-mArܪP@d@d(9d(pD4 'IT (  BNKZWH+{T 8+@$ pM'-s/S+ۀYopgo`"h>HثX^#B=M `5`-s,3(YqT+ jgƂWʍz^[|hkkB69ٞZ@ Uk`k|D `WǪQ7p_"q>@B5(VQM3k݂Q/t^J7ϕTRP v{=hAKC>ꍉvQޑ>!2)|?|zp4ϝT;G&^m{m[~M/ ~$Ot^B3iO"}H=|+|6)|N|UT|]QA@"alϩ @ ~e4_J!j}:??}B'_R+_Q*]f~om9Ry.ݣmzc|[GcGj"Ds>S3 >\ࣙBvހW_Z+_[魷>?ӴTTo_?Xuh}j>c655o8c{chYk[]V>l}}X-h}}1kkV[G?>Z>+G+"akbE+p)͙}O? 3T"tp<='ws?3^lUxz:#̺oz{{{{{{{4I6{{z^W߿~߿=;~߿~߿~_O߿~}|I66 SGFm`VM1c1c1c1c1c1c1c1c1c1c1c1c1c1c1$MtiX1c1o?wc1c1c1c1c1c7c1c1c1c1<1]ct1<1c1{H&fffffffffffffffffffffffgmڞ3L fffffffffffffffffffffffffffffffffffffffffff 333333333333?(xxxxxxxxxxxxxxxxxxxxxx{{{{{G{&G{{^{{{{{{{{{{{{ٽ{{{{{{զѰZֵkZֵi6KZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵk_i6oZֵkZֵkZֵ۷nݻv۷nݻv۷nݻv۷nݻv۷nݻv۷`9yyy矐\<<<<<<<<<<<<<<<<<<<<7{׽{{{W{{{{{{{{{{{{{{~5{{{{{{{{{{{ 9s9s9s9s9s9s9~],K9s9s9s9s9s9s9s9s9s9s9s9s99s9s9s9s9s9s9s9s9s9s9s94gDmGD=l{tILSѦu4t` =X-SU>3z]{ޯ/:|uA1L0AQ~-:;;tpn8뻜;]\u]7LxJxvutaA iws9ӮK]ۘ.4w]:Kx .Nn:3!#4A| 2ԪcCչfʍʁ)Z4kVHѡ 5fbG M~~;3{ _f*ozI $aG)giO( y(8]*lH#B{t%_m &/`6;4ሂ%ɨndLnp tHXP: XAۿOmIEB@¤@,+;S誦ؗ+! .ϖBmOU_"fEl h[LM!u6nި#sPQV  /Prr#PJXDn[DMbC?B L-0hSD)ʮ'ŷ, ۑCUbsj؈YKff*A1r3gRSŴۤ븽e LH{%Q&~$Q" Q%*(})"NoڈD>BרYA aEPSAF[uwľgYuG麾R_U".lp6L܎Z3_O__Z]j f7d58i؜z^ѡPUJ*o!aFiOC@T 4B^Gv1`R8a0T'P{ptfQE q 9FV{lDLLE? jB5a~44h}=jHL\bC[(X܇R?Ag'"~J{J7G a#J¦<_oy?}ki86h5t 𐀐B4P8VA? ..4fZ@۟&PxJ޷qhOej!jv:Õ)W)3^OƄD A٨i rr #~ ci=011AفêێB")T+;|LytPv? o_S{E>)U DP>+(( () R* o< s ṔQhQZU AD|_/|^@򠤰@si'qqqq"x燞/0x"sϟ>|ϟ>zѧJR)JR) mymյZmmB!U8:88qs [mm4qOo N*ֵkZֵkXM4DDDqqqJ%m|<>U O} T4Q2Q￘⪅-mxh-mnzva`a8baaAMFɒd$(FhY $aS)3IB1JHbf  Tw}}}}dPz@mPQV[ukZֵkZְiDDDDDDDDE !TTNd mmޟQ2t9-@m"m(I8)!4䉃ph!6 %Q (6BG!Ln8b#n7"c 䄒iI"`MC@qM6)QDɡII4ऑhB&LH$bPM5!"H@B>~s9Rڡzvޭ[z^@vmVkZֵkZ֧dOӵkZֵkZ֣IpZmב4ZֵkZֵi ^r6ֽ dDaɣ2*Fa6UQ8 㧏gmY]i4NֵkZֵk[@mmh(0-moXDt# /+IyRi6M8888MP6mmmmEDmmAAn<DDCM$$DDDDDDDBLY"yW-pti4ݓjֵkZֵkE>m$DDDDGFIVikZֵkZֵM4tҳM$ߕ&kZֵkZEmm-tII%kZֵkZֵj'I5Vg_?з?+{ae(:o\}n3 ᜩAu.nNnl4rorڽw]ڝ|3a]fE{f fM,W"θқ<|s;mr"\YT{Vo-bg'{0|ߕ:B5s!ȥa/7!6,wݫ|jG_ *o77;ud¡s2乸Q&Js_3g)UU^85.ü7Ǖ]h*̊x gq۝}Oe_'E '0_)y+|VE3t{n+y魻Tdg+mr:&l7hG2\+J Ӽٽ̿^d":bsUO#PcrsI LǕ躇=%rX9ڗofF v<GfQ=B9ˉ,ti%a&{H0n^ 1"YK2>RUȻ9@Q]1Tr:[۝9VP;iӮN^\u;E T% mwH \EI;PykyAe>'If_O~AOd cXRaȝj,@yeύ yVLcapcLH)x/#NX36D!~qv$ %iy#_%_\P@C_=t1HcHmՇ'3Dm\[ .؁eԭO{HAFٲasKIy.Qd,G hV޾1u|GOC\u30"DBB݄uCAg2ʢHRP%4(P\;{pkJ).|+" q>: 8ZִZ֐i%ZDi@^Y3. `)hFkZ%~V6܈/+q{e[^T{c.@@ǓzDOv [yT5k ҂RPУ$^-]YVTo$$T ("鞞sWDQ Q(F$^xq΢;-0NKSVGB Ӧ懄U8kCvhBIU5n)q<^ qq鶥3M8iӮJ2"%$Ż_wuՋwؗ6f ܊ߧ7OV\I{ĴKHZ@2 E A)h riUn{ ^^y[1˞qULWUxn4!Fn*J%൭j5UcVTZ;d( C#^4٦ ( 4ŤbzkjĪF6f]uUC%-Fut띺kH @ PvJPdBvt=}ӆNzpW:(C^I - %<8!9۷2ML& ԲC)SdBd,E-MӝyS+B-@R4E9CKEP9uG"tвuͷE8{%̵١rÎPDZrmU+]nW !h56}IꮏT Js&_qTQF Lesv] n1'nmt浭AkZ^ --5TTUWwmF\dJ&#w]QCR"@ &fenTkQQ2jƪ[֋EkLF\7:ѵHEJ@4&BUElcQ`F)6FpX9sknVQTU-(*҅*&FIM.dїw,k$,n\ܫEl1B!iJQiRQ s0a.\0!M]ݫV#k`tnVknE14F2j$D3!I\]5nk+kmNv͢1((QID`mrAti6Cddn](IAF2I&(M"$f!!e)qp+Hq Y#b(F5Th1&-F0 4bIB$דjyT2cF&`זhbDƈf4e1y2ddfH:d۝6HQd5ʂ!2ʌ`1hPMBd e\]b(,J!& Fi ѡ4ȣF0  cEF"biEAˌHI`9̊"$e1b5ˈRlZ,\Bl0I$Ѳ&61Rd6d\eHI`҃!I !%u0E ,$Y Dq2PcEE3$F(a;S4%9s\$M"I L%ˬ IdNĬClIa*"9bƀb42L5$ "d&16d\d1&J5˔F4!@RQ%HRDW7401&$T!"#FD]2S,&l$DW9 IQ1IQ"B̰EFȗuԤ%\Kp1&r @,A DAf3I#Iuݰ`&HCK$L@ QFl2FRA$QaH HQ #%d#؉ 1fR S1&4Q(6heL0L 3 \bΆQ`0d`Bg5 m2DJi"Pad- 0IL]shDE2 &Q"bH$LP@$R) ,#FFD(L(RLE@Dbs2IQ#(Rd: M0M%&;Ѓ!wME'6FLFr(eHI3H#&39DQI.qE$spD nfN])[#a)2A0ΑH I1$ &d 9]&3$Fw`ABF6 4YLhJ$2 ɀ1;LBfb`XλdQ,0Q%`cwnO-'zCD#["(Em4Ĉ0F #H3(Ѵ$ 4 T@ŶO?~mMGm6"""""""($I:IkZֵkZֵ6I(M(iJm$""""""")6N${Myi䵭kZֵkZ+"!J$ǘv%㻓..ws.pnCpܹnܖr;Jk$ˮ蓝.w]%t]9p\qvw;ƈ 2yϟ>|ϟ>|vm&DzFᦓQѾI8I8/״}qk"mm@f bf.#$I$zR I$I$JУqDDDDyIѦJR)JRym&{I>M@@SW"׵s;{x;Sr!MSZ 8m HAFB5IF0mFJ) 8ۃzjM8&!٠J8L61d#N8B'(_-Л9JN[L}GvnӴ$1S$I$H$I$O~cff`9B@ P $JHLL C1D# 0P3(0CL$C#&iAK$D44I %4$cLE+^Qe&ڼkbٔP[}}}CC{zֵk$lz@{߳JR)4Fҥ)JR)JRiҔ)JR)JQ/i$Zֵm?' 8)Do$3 A-Cit|qqq|Q|M5{^zׯ^ziIU'Zֵ}kZֵ~H^"""""=qkZֵZֵ<ЬVkZֵkZ; (ۈMu?t^~j+"_ Di mv;6Tѩ("m(^5hK?S2di !dhޙdSnݻv۷nݻgԌ '" ; mmmmO~s2&2N]r3wWvtw]9.$r9̻9ݎsnn.v:jtpTnt.wl[L*q&]7vt..Kwu]uua?<xU@C>|9ϟ>|ϟ? o }}}뽇w=7}}"qqqq@u888㏰/>+k8888Z=[]N:4FlRF!H&lE!Q%ƂI%&F#0lw%YeqqiX_x47}}GqHmDDDDDDDG]{{AtBmm 6""""""#M:$)JR)M{ {{2I$I$M4J)JR)J|bm4$2ub[mmPQI$hBlK2Q&d2Fɻ.Nvِn.Gw';\+;7wyx&4HVMbDt]vww99(B (!C5bh(6ŴZbmՍbڋZ5ckEnX[6آ\ih"hgG}tQ+߂ZOulا%eeIF5-1ZkehTحB]T zZx\PIDLd *@?PGk8ӥۺ3ssKTT%P~mS/ S6Qmo|7wQeW)ƥfRo}{IQU_"WOWߞlSVs&[TiG nuotKی(-xnPV*v e{Q!2Q=rzԝsU1Ky>]u=h9jEǁUjd;VNԲnd1g!u:;\ ?& >Jt]Nfz>>k&2F12ĄBfb 3D@(dVסk%DiuNR)JRZֵZֵk^K$I$HI$I=(I$I'!$I$k $I$ˢBI$I$ E ;Ҕ)JRo7yo7y)JR)JRI$I'`f9`80M I0MB%fffffffI:rI$ qqqq\d6]dI4QIFڥ)JR)NUDG_!|NQf8fcn&pЕ.nu;Wsw.]R]w'bㅮu(s]M)u\];N3wtN9G^bI+ZֵkZֵi }iDڦ{dI$I|f{>#5MJMSTh j U(1:/eqqq` 1c1c,I$I$$I$AI$I$h8$I&ukZֵkZ {{ kZֵkZֵ~QWÂyFmmG@b(AI$I&kZֵkZPB:ֵkZB $I$(?>QW888 dI$r˗.\r˗.^xC""f ,IٲLbD(LI3131(i "LR2DbHR&jf -mn88rU\r˗.\r*#]k^$I'#1\@VEdѵkQZ6MX|m(06,l,&26"R1#Bشkyӛv6,Ax"ׅ3%("wwt$r4 "5"#MF!F޴$$"ōItr+CsxB'ۮE9GEǎٚ4!E:HaLiRK0wtƼn@EExqX(,bwsWWAO+t\+Ɠ軮xp:Xaڿ?RRVJ 4ti/T6knFVdlF0v&6T$vH"=6K.ti~@l1YA!Qi:p٥)+ɩVݼ"hF¥*ҡGE[5T#JM MVT2ޱiJ2=[Ǐ1StЬjBfL%MD5 SQEQ\J#m\T\W4=* TYiԂjBԻW,gd:yϟ>zͲ,,{CR`@h_ b b#2I$I'DI$I'EdI$MZֵkZֵ >8xDCя9`HCLK *Vm{Q2I$I<$z ;C{{"$I$I$I$ {~W aɱ5..]tn;\ۅ].swnwtQۗf]1dHeq˹\ԓNӜ\]wlvT3t8XXkw:c]wsˉ_ફ-Zߦ/Gz-kZֵ$iQ6JR)JRֵkZֵkZ5kZֵkZ.kZֵkZGZֵkZֵ(kZֵkZֵMkZֵkZּ8 J";88Hrd}6Gyx$Gk48("KNy NH'1V?,+6Wu2:[HOT$iPD;̘I eY^`d(EC!֦z59o/̀JLl `wS19Y_{qw:ۜoNmV)V*˧T[͐าnv1r[O7j;o9sjcv!e̳|LJ ͬUB86\jB8%18YeYrnwujMero9)6.  $9)Jk#d JyvQg>Tΐ/O30 4,˾iGܯ[Cw/|DĂD%ˬDi) )%iFĊ7BƂ39[B1Br׎y|uyJO~/vGZ)j/)a-LPMU{])B4Ws ) 5yvMywܻr|Kw#%yuk⵩A,e;&s0c|%N1B `{1)wjr\su/Mjyߏ%\i9ݜG4+}+G}>gBC8_PR.R:3 s^ӣ$Q:)zwYy4SC\! tB2*zxUW66$ Ѥ##+$ZD\\y$s{U^pjg+Z2G;5e_>R[4^MW|.yWknfWN=Ͻ<̭^[W<8nWng3v nW7I.:&=wfg)HJJ2?{?D> }]ZG1%D6;|՜_Ya8Ҧ}}aDcMgk/'SқL>s qTs,ɅlhyL@!U8"+y7S}*ծUExg|˘廷sNY\y\*9DY3Bc3q\1Ea6Sz]@:wɸ6@#0z5|jrرy ťRPK90LhI8W@bC DNM>@~ 01i%E XysUۋmd,R% (XLj{~9y "}䥱Ш]T;oAt.Y$5( l kSiɟOm>l+;dlB32WJ)# k*Tۨ-kw_{ 4imk&\w%^L/gk fD:Paz!CPEKQ }ab[8& zk澍"b$FbP }_5Mnc&i6@|ys`_A$A<ŋ ߏ}>eBH!H RDfC%P20XWQnFcPϴ0'"o9EztP)8‰ړt;mxs>9n`/pliIQB>nJTorr܋|R3 ' n*=wze>뇆2C uݼTl^biKuOXwyV Ddyh{`-ɜG}~ U$ ?"gI .Znz)&F;g3{|$> ?~ϊ۟`uB.@y!'^~=J'OmdI$I I$I$I$I?HE6.{{:S E0 4l֔efSUSVim,ڔS 9I$6{{D:$tI:N_A]6mmmo]a4SHWNNɫZֵkZֵO &_Sߠ{j)JR)JpI67I$I$|qo{$I$N@I$I($I$NA$I$v`I$I$>iQPxuxmmmT IFKIlR!,E@`9qmdI$I E@}imm R)JR!4PQi4ItIi:ֵkm ?(mmh<uBmmUOUQmoETIQ ]:6t_m4ӵkZֵkZ4N'JR)JRD8+lH=}8oWCF:dBJd_96*Ddj\Wf &&: rOףt Sm%3?W<"T !죝#'E01:?e抭<"-4Rddp=ul̙7PU2\eL+,ug"48 !㤲>Am9Z+ߚS<9w|qK 2D2s=El%y j 2U6Rvf0v1淏[z6A:!0q$Әcۢn,vHKe`h (-9X ^\BM(^~ #HtX\ahXgJiLW('UF^^̌,Xx%'~!@@%+P3Y-/Pk>k%-NMܞKջM"b6ūYEWx\@+?fDx81Pf7bgN잰M$ىAjtf}*QCwcY$vh/J|AfP@uAU_{^ Of.cker1`11*FEi_@&-8B~Qln 'qEͧ6P$Ͷho iJX8Kuy`exU1 Rdp3 eA6O %4aiS^9## L|\[E,N7ɮC_aoX>8>54Ct)3YcoLJ=ǚ #XDv={|}!Q=AR͆kv>/ep 3>TZ4ЎΊrxr,ϓJ2N(ؔ[rkDW@@Pjhy1t?x>F<'h$0=>Z-mxT)m[!n}tL'[V:ᐢ3SsM.pՅXiDc'q褱E "~F=R^H3H`LO9@7eks";*z0SR5 EA(3_g}X(m5eS 325bdMgy>C$QJaLP29Ϊj8 dwr85YP0-(_p QWjMk}%ҿMbJD&$]vZ;d>.V&8c[&` w$DpD'|33׻-1_TwH* US/_~6o07gcz#p}'HSU #XE)kI :.4' q{͇TꂞADk%[uOj+f mPgm!Q$ wWl_ G ٠Y c,4? x j^FЬa/דZ >nb0$&" YM+rr{`[h^]='l3]M%F#[O &|%2(7 A5,!uaVJo"\O':kf>)"si:3ai'!qf.o>Xẘ`(6O&@vr,\jx`9PZK 4Khں'^g؇,8P^ 0A*/ ojM@`'' 7gpsm<'MNe<Ȥ Z |-ޡaA +cIxcu ǾK{k bo~y3NuOJblϪy'mT@x([Ui%׺p۾NW>f<宵4v,hL ?tS6I0J,JH`ɖ+G?nkgtN;b~p}D&<  uG؏ (7 MIyfF/W=Z0s*@ ʙ`sxU4Fay}dRUGa?,3S.P>zRx u;;km ݽ r%pQּmVZbNῆ(Y)Ñn$SB~ $0tf4ol>߮ɫ 67$lz&5шL$촤<qm֙cD0{,@Aldƕ("@??%8ʟs ht3~qY;2]ɯǺ0iwy45Ϣ|U7wc=u),#2uu* .yc*9uӘ^ K+s7FwzO/(-:q ٿ!*+_ ?a+x9v?#b;|_ŐC~qL"tv{'Vb_ NV}ý\I#ֹ?pķ e)FV5R ohHLY~ MsòNJ!>췹 Q]?G9{.B:8WL?JgB/5=vjbˤ 3HZ^pd$n@@o ;O\vgPyajѢbݢ Ȫ'a'4*#"R6#yYO!Iĩ\ST % qy)TUy3\*-&bv&g Z2"Φz,;3qA:^2Ӕ5|hXygFbl8ʲ+vD# M!'{hQZ7/ֆi+"3BĤ3EpzIR/q< xrִ<]D4`٦dXc=1X#镾DzEVBfj4z{]U`I|x[4IOl)Sin3A>>ꭼ{[*hĉ{Z(M?̻e7̘i/ _h >rBH9̟[=X=DM:;Hаj Z~e`YOw<堳ʭ[x˙X- F"Bq~L $B7<DF@亠 Эz K{ >נ Qդ#ò۪_:NlZ+A+5' [?J܃ 97r_ZQtoQ1y=%'wj͙8Eҥ&-y 4$\Ȁߜxxb _V4w V̻\M{"הjZ|D&- _sYb3\} u+]y;j?@Rq:<ٮH|J9cr,zK'acj,H4ݗ,DK1\6Kcd\4|xGGg[Y76PB`)*c?Hd t8>KJDZ_^Ug\Yc+#LLGP IFx7ShRCث`Fn:XԷ=bk <M- "t=;i8=2/naH#siC/½RK+4 wi49Z̗s nf}ybR\yhOus'HŹЮ`3g `Ұ^f 7 dr@agn0I{ݫƹ )E}jio0y1k.ʹ1= O0VN&4w(4+$^A 5%o:*y,l`VYC";=ArYrJ; Ud#LjECGd _30ˆsDgYZ_O h PF,`:Y/'4ho c8Ĕ'oYzPz#ײ7 7'[Ef4OI:nj䏱+>K9FDž^9= hv[#livIOvIiKTH#: :,ǥac7` 򬥐LPgG{J9qf&|QfkHnGuB* 65qìjTK80 xL֗5{ɋ7w :>\gl2s+eVL@;ߏ]{@]>c֡w wo3HO \m\zoaL[_,Vy7h̒j:t0ՓL6X>ks ?nh#!>˚Y/Ձy-~J|dǗVs8.Y`\qcwaGg6XdB%'Ukg(:,ZU ;QfJ[|"*־Y5u~qv8ᘂ%DTX^vX lwyS>ʤi0 fwyܕ'rUxX7I0@au *SsďNb Qg0 JD%1䢧nQs7#i,`^J<7Q]q"y..-X(ͮ0 YZmice/data/tbc.rda0000644000176200001440000010366013416657163013362 0ustar liggesusers7zXZi"6!XHs])TW"nRʟ[^ ntXb=7l([T2دn |~3+DXjcݻu)ZڹVs 3joueQM }.W 5A,VvoIwN0u0|kb σxaTh#KS tBU\IAdb|PDe"ڻa 2Xrq>Peȯ+'^ ,E8ZIJ=9Ȱ1bG)R|L0S8DQQV4b5ZoJ@c.ϷpH'F$C ˷%S"}ab^򇳳V-۰L0` ]^W{q gC+xho,T7ayڳ*-l2#T6]Jsw`Cbx.B t-TmM ̑=Y%d(/MV 00J:=8 p] wѽ!; 2\2=\FҌzTi>,\K[wPȑ D܄#OS#6T5۱}GΩF?!1f4sC1"I3ܔ>;lc߭x Ge6 qH" P]e2zcCK#>7W+rWh0 2Mte:7S($)mzD: Hctȃ^F޶Nѻjf 2Ԅ# |NZ0{"» nSu}R늃v+fC{ݚ=0Ԓ8{KצMgL#UaKlE q*1k*TùJKڲfɊZ(+r6Y)tʜ ĩ~g*ˏ}.yl0G)VfZzf"o EDbjH' '[Sx.iQ$h;` BKR)܄IYK L3%<-[g %r *W.]afGBZJ?U @Xx;<ֿb0ܲcd+C=AWNO`' UHUjWGءkQ\ 94y?S3K#sj̀"ڥ\B}|m^#3Wɯ8Z) qHurKRBv"qGPA|ԓ!I#<.ﵮgh&Ϳ["D l"GDo@G j"$m{Aݮ1ޕo`kxILv*(8pF8u4Rz6d6"͘ʍY|#KHݣla.n6/[G+д\Ngaje_Y+߸D9JnVP:ŃN/0[Ef]b4 㲚5yKH`U@P }>Usعk>p ( }~(PL3'CF?DZYˊX?Kny5IQ`}]Rmnn?⼯hft*WèVu/) e-a6@?mhi_>Qzמ{mMc`(X*h\jX}Äj`5dO^NX9G4G m װ1AkvmQ:MjХL4#d?jyAuծ5a6׈-{XfE?|R%]b%W.R7y0"d·Ce@%JŰGrG!>?O_KNj=Z ߙV*h1`' |(e5byPPjcM@'4 e;} NxKL$PO/biÚMDt7XS+@E7N&W'4V]:ڑYyCԭLuG7]Zʙ3_M' fԬ8[qy7KοxLQ;HPgKg]{J;'?7|vt8)gMxcəXdcY_|7;~mkP- |LVm B "GaYgEl߻_%*@naͽX!7D,k"o!Z<+2eSh{* d9)ӀBr[E#σ2@ҭ$q?HSU\2fq?OkkPUlr t*T8 /R*,9fM@tEbzz3̔A!H-XٍТrj,d[qZe;T6"!~!,a#ˆ13:WUOOʚ?[ܜХS0%h!v+.Ibr*;L579#|b1f/S"(R[I6dˉ/ńٵѱ]Ηks_? 857`b/+2N̓"[]ɦ%H sx2zrOڪҥޟR)8.mv쮢'JDn7!14q? ?g v :l]d&Q0lKSD ^L=,r^H.lfո) Z+wToYBBcdn 8A+"Hn Q4e4m+\(Ȱ4=4VtOak魹6~}~b)]~ @DRX+QKNAh\0<\Ʌ73$n Cx#=aN!ήWM ifqwkEj#o31&\C 4͘ !gFo`.[OڋǯPY_u*֨2cFNʘ*r{$ȼ]էqOGpl$9{<q"[PNGy1#VϜ/ZK8>(mdڧl sA飧Įm.SIs :Smhɕ,Ke4HA^@|0 (1Yt9~R.a1r%ɩ52k)H5qY TT2L$f8Z:ƾY_)T3henu\z=BM =trfPT>neIkV&{Fs )&v(\WPU0*FPBblksåȓb(+F.m?LMrL"' J'۪E$ǯ;1_kS^CdL,m,x2Ky)^O؃|췴w.dCˆCg<2,{,~QC$i:T{yxݨ؄A*zΓ9/7oV =mZ|1 Oo5|:yY^ӔE/[3Doo)GVϫP^#|ñ[ PļvDZ 5n"gLEK>5Aћv̏3 iF 4Ĵ6Qς.4Bis?u(ꌑ1bMp#i1:V>T&Eo,n_(!$ *EyTFwgvuȲXt_ܰb'ճ?2OZ>K=3H?rURs'AF| ?1 W=0sœO!GGئoo+Ztdx?&69 i3\ґ8]Q-^7{O! ]9#uP$ri.@兖-ﱻ\]@hƓ@6U3GǰCQ?`HXyV &:ۥi7(C/Gx?N }y-Gdjܱ[ QxBaf!䒹]+u"\ڈZYEFqi HnϱS\Hߘ?r'?֊ߠfo]' H&9t{ u&4eMY~jW/kb?s:tf{ y A%4~kPN_hT-6g,0Ђ % ο!#&igb?L_c,58h#75S@̟hs r&I"m\\2t4!ҹ!~7N&rmV~ -L`dC$./0 }є]xs|_uDS:.f 0a#uutXL{MmrDBb\14R̶`b%6 գr%)z$ aMIv1(1?܇94v䩰jn,}od * .A⎉O.'3l{4PƏ@,I2I|7>pCMw|=PxIqh#5CYIER╭=|4]]iO?E|cBNWp1%!r+\.5'胸{.?gy0"~1c uHGVseQ- ^TB"fS@y:2.p#,Įl^6{ZȚvfH?IMu}bVL۴)$-^K@qo7u79u IA/THmW{֠àeTsǩ٦+ATg 6 ڦ=r':6pA:;q[KvWpsKhnC͉|oG>c~ޣF _@/k3E:5ׄNU-ró,6A@P0p`_ 8EɂqT9XvGb A^񺬊J V2{BiΰEl8mGQiF\Z1Ⱥ]/ ]XӒ ?DލE_˲41jY2TP?JNO҉zA?*s֥ί!<;oS ʴ0{cX#]촕AUR<ʆzI:0֌.%ƦU 4OCҖd, b+쁨$r{.\t~hscV;B-|i}/DCB+y7X)W ua-\F\vW:t{hSU7I;2Uyؤ!Z3r4c*MIe{, n IG_h6vjѻ7.hGZ8gSGR`t؅ a};) UۏSA6md"a9&΁8o~H r=>܃P*۶PbW]=/qiOUՇ>b,WZҝ7{vao+aC̲? D΢ey ^62LЦ@e7ۅ%'\T'nn_0X:BFU2u/<v5⡂ !19Hf'\6PsN8bIê<$YsL5"( n~+>l 5pk>0ۂSE>cb47v3iѿ0z5 JkKVnh?AYTOL[x} c}:.5|Hm ,AjF{(%3L;dFS*o@T*GE_X Դ)om>:=,Փ_UeޥTa>^v a#vm%2kr|9ΫqYK@54ād] "x?k CTYk5rlekHPlU9LTsmX4#Dm4^m}QX,T?/޷g{\sN$-+&X{\p F~ łoZKe Hjwȶ`fx*NÜ7{5/f:i>s8A~S(:pq8tvݾʘN1̖ O5~ FM{ sֈE HUanxq\Ǽ7HU k {| &۴_~rXPjUj?O\`J=~ș!,BLh3sba$M ,C$9{Z;ȁ1m )_B G@uZPX$rF$"_qyYyu'IH^VDuT̰M:c mN(J&/ Pn֮wYeW77q8)8[ϝd#?=b[΀բ$>X *I@j vl91qC{TsAO{:NQB8۾`Zi(d^2=5e-KTAwNI@nw0= s vfY0+uXk#PY}\WIQ͈Qy%7./(,KV8 KKJ6Q K^^_fJ3:Lܢ@YtX쐰?6t88#w];UJk,^S=vÆ$ݿEX7G+=~MgON68^S,/)t3#/^ZpS봆k=ty~wPBPZdRřnl0~%NF9{zt>bYs4$>a>R0JWQ!Xxfx ~ x^5”+Kgr%O5W<+F3h_hB  Su'l'|YqkIIN;[bAφ߲,&s`$5**Rܫ*kQ%7l &q(1]Edu412PLaX\7*GU@˕ѽ9V[Z}4yh)hvҹ9/[B F?9l }6`GJ0LkUFZ,w 4˞; J\"HUE0j0zKSlI4pG}8[nr0ƉYaUA죍k[fs/A4Q  ,'N~;:;kt ^"Z`ҋ j qm&]G8L_Kyg8j#B \qg BݱTmG5*Z-tj_/qTmLsRl,9!rI,$OZ>hVkUδ"*uL\}b) X@h/ϡ0)Ce6ilN^A "fڪ1f"6a[ePtc(y=#} n;1#@D!á[oʇ+IftuT˥M鎖`;&2eS+T\FO!" BÈ@hQIv홹d,liEJ jC6y9~1+@^m=@M ӌIѤ:ѲQzl]z!mb@G[B-v܋' C_B/d>,%p+U 8 FC WP4S e Wcqta$ S%w&L}F@0VrڸthI,HأSKr;(a[?n` pMꒆJDxzh6?}B*Q{zeՌ/rd8="w֗['*_XSJJA2 ׽~P5M\ZZ-|&e,l2~a~tTԔ(1Gl| rT8I-oI.ԅzdT8XydeGP._=g֓O*+ö?]GZ3UZ|RX֕aYrN/…!ݨ?*o|˂/ZUZGV֔ HؒӽTK^ɪ>D*߹xfTB6[C5O<vnw.=zldBs\Cn% k*FQ/;iPg #yE!Bsr 0~ jy@g}[1{0Br&/kpXʟCȓ /]ےS2m0VǤu>LO K#OyzH 2b*x|k@T{s뻩v?N12Qցi4BZg=,eXi.:zVWoK@bX71krKYIRbQ82F\Vej,d4i=8؂ 61MnfP?ZDs'Uk!(ɉ^L!i8+ Rz~K0g~5N XX.Ɓ0x鬏36G;h5  Cz"rBMO;ݗ*?kQ0?<ǵ7Ckx]36 qhi@MǴqreyq<rtݲv>Iǥ>pݝOB % `t(b>VGϨV&eI߾P0*PפM)dxKb4C%7iߡWfY[^a2N_N.~,JUqg]`d΃bRJT[^:yM!&ѩ-@QAuʼnٳfq̫?߭bGs\SΙEYg hcO{b}Y;))2K"d4!C:=eԸ(?;xn6O+JI.*EyJTþ ]WuNkdbv1# O1ϻNU: k CpY_DyI )Ď>Y͉4Sfÿf*l: kX 3k01}G˅ ZK HdP"<[{P//?W2-.|hY^"AsN\w_@di y8GbWSԼF*:n a`+աo7QteHQ&ua.ZM+e'yW=r'=LoevSj 냺E4^;e-ŸuБ߼qխ{ :Xd@urTݛ‹[x%V sK.F `a!{5l);y5V8nK#yqh`hVwփče?$siQ ž63# "흗06ǀ?(as [Z:6߁F^@6$ЈCTbU/S}w3d]0&*=a}x39,0vX>6ȸt =m>@s9pCr? T<-)긡y`KJ?s^BDH>(?UNwf9§]ⰁpRND9IhpaW`s5tŷ/E Z ! ޿+ժxveU+ лXOڽ?, VկE^#B)"aදq 5eb.O,U~ gyhydͤxdz ԫwd*Zp8m9g M+.[PSCӳܦF˨#TثY3' N,e0jĒX^DA ~l- TvD4<4l. wuOJWl@fm8\?8Sp_~CTB7z|<&^%!gR[JlXuf7;پ̍S2ڠ`ỹru2X#~M,P{vlqjx:A^l_h_`|i,6O] [P$yaٯ`}{>68J .|2zϙ# Txߝ+;">hO$.K\gfg}J"׊Cy>OݟW↠ଽs[%_5bXcIcrOZQH0VDߩ!zrZKcy_׵R&6 3E !j${x9ru5w]+$i< .-QIO&HaS(+&¤NxFj^bk3!ih5b}Vs 1c$s(qWE#6KӝAS `AcKs<ɛy S Cdpñ]N#A8kv=1)91%ji@xbXsO*&'G{*!z~A{OL| fh?cq LWͳ7n%/M@ǔpA iEd։'Z(\i3*VnSʩ}]pZBVdSdĔ /H-c;z Zi(GW*&*Pu; lvLXdr X#{p$lJzyPשMޓD9SgMЕϹdž^୵]Bcxi<Xf H38~xj0#|{\-]3޷!|š2+Ԣ(VZϠI֯ &x<6`#z$h$u.Aa"X|aW'Ўf{>̦C2YMˏPTkk1jNCo.OI>Gg5@wY5_ˤSIMϼ*QEO9u0@  _)H<OFg¢]*ɼHS ˢܜ'CaVA(Z!L*̉CD-ᑛj >Rj+㝽\=\ѯ&}+l0*, TH $#&PcFp5!1&ʕ띅~HvDԽn(w~G_RI+s@jrQh_ QSSpDTF~-ݲq FRZN!ʙÑQ^[PJŢ+Y|AZj8YR̽"&!;c$ЮtxK03kN~Ze-4&z6dnC)TGdZכzU({MuV6}kLۚy#FʆO惼`j$\Xk,O‰6-U{QK3^{Q(e~m%uRwH&GW5pѣ?ykI#4'SdF!|mԬ[NjmUdo4"{bs#t|\VĀ~li}8񰂡2\peLGKzVRɎI |X=3$a}&?(xvEeJ LW-=6uACw׏cFVfSl ;ZQJԶDo"p*c7\ xVϿ1nEǶXzHK(P85<*wKq EA]s3qNϩT,/g^ ;>minEr/#+ʻR 2"A[+i m$_(+UYB@5j^](d̚7PfJI ,FhO1צkt[xۈ$^yPra#QVNWY hl g2 _ҽ([BGC6/x OHhXJ ({Zb1p sx>g[8RΫm}2 an7Zms1iǫo_ 8G\+BblMFHj։gpf/v+&`JNBo6fmx'bX.zXuk+ yB:r$9W!CQ]}Qn'_ m#a6(Ef<7[9&Hu0͔U׫jQ4U[Ϸ,ԉ#HLr-dk%9$5֋db&K#_Dr6mtE klO3,XpbD'{ڑoDB&ڄEt4M;[~+GmA%*8X" ֪ax8PdA[u=!同8/ԴGΔwe74-o"TWe`O_@K{l~L6 n#-Y36ŜPrn10ƳN ;4c/y&!j2 1|dm`lW'3 >T ؟v|gsdR#䑪-ψZYF`fev>54 ^߂7t8Ôeƀ' ϙ=9$N9ˣT~(p@ܫ/QQLz#4SId)iSf# <@hT2dMC Ns[ ~ ޭʅ;NE{ } 8GAޱS"NQvCO?-WH-)W","SP@i:4D'#/~1YmwXF̩~=owv5ε~ 1dyB%OptZ0I[48F\,=HHI9qe{ˁf>M d [Ze GqJ88 @قCȋ WbR 24Y7k8dl?rsNl8 5 +>a)yNeÒ\ m3uxFH65=`< Z$nD dx.ۀ:k%C7+scẎb͚!o2X҈Ͳo~IүU_碪zz1ڐK/hWpg?9#=I4Cڸ{|[/aZq4!^KYuQW';i%zٸةPZ7YYn4 Y 79(1Xm-9D( "HaRG^9ͧM)rX8fGFϰ_[Cԡz 7Pd/Sɥ9&.*I8򆹈EƘK% [H"oUu-sNoXzsتT1tE1P؛jGӾ3M7ƎQaܺVaJ=&멾_*j* $eoBx!bQ/!'?jtU^T-rf,_[H`D4t cU}۴I"9]?ۀdC%&]yˆڟHgPSFg{E>B*fg"hhS&G"6PnO5C߸U$C/([7r-xǣeP/B&x<#OI6JӐLxmdfW*\#Td` *6տDXY Q >PTbQ;s)puW_MIF-G3Sݰg~|T XGDRrc$:?z6-?MΑ970|DHy7tx#FҞxhNO}}ͯ)V^+^:8e 8xDmITʿ»+gCΰn\A2^- ;GU>{M/ Ŋ;lޖPN 9~c7*aXIf34ߘ*G =$ :}^#,|>y71%9rܾid!lA]4LZUao7r12 H)p ?xvihl:.v}N{6[O|9s"TB(6W:O͠TdYoEb7zrk r-٢F9gN& F#UпWtݠ WъNYiqw$۷"T($à𰻖P"S{Ê!IJB4o1Rv a ƺ-+IIX2TuH"ﶢ"w7>l)dAjV: 6,y_j L ssS:ƒ;?LV~ն: 4]>è=^Hu8fz)m.=e|ez#ZS>zdav)r[uHYoU , =zsB?<İod-,AЪJknf9Cpi..+EK[qeC 'pPΊMdrp`]/{S6E$:"k;+20yV6Ћr0˹=m~2IbO1NC#2\ݶ{u%@$䌋'm3nYVCz$S5ă)}ǹDOhG})n)]L -g˼t# lWE\ofY@T-Q-:Z(-|ςIg!QP8f,>cmF`}z? n 0|yB^N\3PmI!"ul0]\OVL!-{ g HKn7ѫgA_];q)x@k{o+ 8OEzBJ=|8Z$ f MHf LopЁf/Cs$<\^L^ 1 3`u%,MNfUo|E{iwegenM&R*t[:PP;Fiݠ+x#kuď׾!9VN-p|=@H)K#qW^J`nw>˦1汃E>P>Q8xlPtLQ0=SI1M=ushe5H Qy")˪]:yncN_6@!5)H" &4N_V ӑ]_$A!HsAmĔ;;  *X&&"aL`B8vw- *cU5k ew (htusMUTXb[ɍ󵐤嵚HYG(9ycaޟItCed~?4QM6&ɮ^|DB.$K/c)KC@XI\D^=Fr*sWNn$7n,ABEҟ:fz㋄u$ኦKN1Aqʠ28v.!]#8r/|xԵގ~D77?6{G/(3q9Ҽb@%ԫNЇ@! *MVc0+M;sx]nԹu_}1*')b%XUNgz N*@R0;3tT5h똜[0ɞR%r@&rhw"%(qO[IfO|7ofbMdqV7@j:5#q&J$9!9S76}.~4Kfrg|P /tRr#Wg_^v,6@}x91tm83\sAOdٙeBοAZG;c6퐆ڃmkr euM Ajܡ+] wK-ԘcrYMuԟ^i<]}RV f_x|5?k}Ȕ> LG a([pRg 9]XLiRh:9g\U%0nn+u_zP b")FvtLdUMuA`:-V3 :bulI;=8ATHR%mq:W tCWfvwئ1W/EO}6]\sVNh?b*V✔-Qޛ5ZoH֪ƌ8&bp_JwP ȗhk N:L[VgCO3)Ut$'kF,!e/u|lw;'mAqZgyvF1Z7{E5h˲UA0 (@ܘ? 6qLIHvI7ESS iCZ] 8Jr28gNFUe3[jB*'][ ;Yӓ ZՑX =V3)^?ͼ:vZ|D9/uGϞ j;y Džmo+8‡Q-Xs8^(eK[C7/_վi,~2Qg4u$@bS)<lߒt&ZJ )M^fge( '<^uffV6VBwJPu|:nn9X^KjВ+J%7\ꇎWy3[νCڣh #Ufhta:P*z@${m9p㽫BumS7}C;b8Cxe SdݕB$ҕ \C&3 DSuͧy]IGk!1ܒrF9LA1Ńbq߯9ېy'mP!>fZƑ.jgQ49AlbJY BX'9.;=X GҘRL3uMS-}@ ̰`(H+/J{O\7=e;!.Zd6ODG*eEz̲0 L]P҉j9Jmi.+||.hyWVO^*dwWAOH9́O#􍢶MO9i/ۉum@JObZR{:Tz;Cݏ.NJ }Du@n=o=i^} rۊ,3$fUlm[*g+8@HGڔ'8m܀Fncb~UB=!Lk D'(wިc1[8I 3-v6Un$:bFc4PI,h+hy," S]ؑc8'$vGwV=IT`^(H^:S.meS[s+#Ą>|#UCM#S,-d+me$O dlĐ.ixG"fa)jK]j~$ޘV7@fG}oPdHvB@^^{C9:Bfc}#W?*?N\YB6\:U̓l#*EVԒ2 @m}= j;xmASϩF$\?/>VcA X׽M2)C F2Q֬sct9CQOyYglWs3tbbx HnR yܕIţj75p夷FWO)VMN C5(bbeAuϕPR0G?5IJ&+KIhY=mT ,nkf#Cҳ~n9# aP ^ϴIӬqK`l:Gg4}v^)ਘl.cy2NhWwэ-$=,׃ <ѥ$"]1 䎷]R=}!*v'Mi^`@W|L6vvN}Ibg|H;ʾH d_S^64eRZ4I]mD@KQ%jҌ$KS]!5?{(a J' C N'ztX3Y83' )~MώeCvSnE(,I?pI(Lt$ԔU~n #\Bֽ H$gJKEp܍8ݦ$vz)n؁զlJ] CʡƿE WA@ߵ0N0 S"\X|JS\kNh1xW XNҀAZ#&{<Q7A?'7fŽ0)ݳzC rpċU/\lC{zz"w2YLmgX ^CgsꫛO+@cߑÿ/A9 }4Mv'D6R۲zn7pW >Y&BaY!ȸIGo2+94Urv#%ܼkð6UCD[@K +O(XrV:#.w"MhP `,H/5} ۶lf!4|YA`rXK|4]uҘygoM酹f먢o`p+g`CmqRo hm @j8YgT6#eʬ&kT+ *zQ޺r2j#<PdO8k7cJhwi!Kpb^T)6lf ei,3dn^}@=#VOMMN$Aš+_WA)7B|5n2JAKK!6[n($K;lDwmϋ6: fGkTFRD;$'g.=?Ul=hM~Kb'QG>`# ȏ{BLv$ҧlbuwQu djM}٤i|edvކo^QmW|!4?BmE/w {}rrdP3Sb "j C])wt%|<>o RC/iJ1EףZ޵AM% bmt$ךE݈ߊXJWp|Y49'i҇SJ#-M}v'q% XBV#Awل{H+8{ dS_ai987OQ)_;`Ƅ R'{x;c%W*fufH+OG:qP.z̟f=~h᮲_I3KJOsJ"qJLR'.p(7"WP}r`K@|E4(Kl0]?Rճ n7m#,zKk9U(T8$2f٘6 ,ڢKFm!:j x6:CG.jVY~$K8&& ;vAjEӒ|gNj}L:ef `^Nke'0]zT IQWt^nxgTQc2I3?tce7@_IԜ~7-} bG8zVk esh{x|jxlyI|d8eڅE!UT b-.Џ~)ltabkW;F*{6ws)yջU72dsH:e޴1᫋&(fÝ1VbĻs O&1gC%I9!Kw~EVʗA?GRh<0#pcRlf[5]_r#ۅ٩= oJHmQwu~q\A`ߍ3W@:E ,V]|5 0Lffm)]DZ7Mx"RpS_Ѽ] ͕g,򉫏&WH)d)Mw2|9R/ޣĉwx A.'/#*NPR(zt#::7"e6\ o2;9R)6Ī[ՐM#O];!16m ˼V,?0@ֻ酶z뵬@PwOuqz3X3".n] -"xa$ɨrlQ ęɒl 4IJ04wTpU#haʐ'Elz[&S?q6o8g93i^o{Ju#r byPp#͈MWʄ:XThB Kbmm1m)pF` &TYe1iȟҳYBLpEf '-|^V%Q_˿hͽˎ'P ,B,tP00Vsho"/ˑڅߌDvT0&N}u\at['|aPaZPUFt?^&@&%HI@c s81N}K&O0eYkNSU<re)egP)[cwm Meb杤v V`*b,чMRlJ!T.rˆ&+`*] k.WɥfEx %OExj8// ^U-b8jG6n})S~ Y(L[D;~ח%UU#TtOf3.0)ۿG^t,-45nP0a]ip-f~.;xɝbƈ q^N!id\E% xFZUlAo0kUl70^Bm=t'gO6+NR;|O,, Dܓ6\;_uB~žApAc.s|_^ r fWTwu|pa=@5*:2hO߁)P- |d|dOBRo<%W i1]XC?@Qnľ:vahΪ MB0`:PO8mkïTy},DnF,4$%36Y^_xh,5xG,#7rᢕ8 ?sYcEv|vZ?)1Tپ$a\d imo@u}c{Ecr`d|~dN* ,)%Tފׯ)''Ѽ̉>%s]&V}8Ykgr/I!-Mξ*Lph1e"GKd1M:ҁER42[rD"&e4|C Ev pnУ܁$.~Kplq^[p D'hM[bп .tOO3H2 W$6+)5!$B`*Hcm4u_"蠘BM|:ǣ/WƾHB@Ct#~q0>8E/x!FXV o =a@MHpB!@X9xn݉-ȳq F so{yMK$PbEpHp9Ϯ_DKR=Ͷ+9ib| ZO#JjaKTA<#t9:k^rugKmB( 9N&;`>y` eNVmSR;EseLߔ&SSvֲ5 C6-7p_ze#-5*{횕ksfsv wYZT]2 OkJwHV0$䝩ڹPvxF-P:-io!,"{D)>()U {]*?])D}.t~;kqěZ! 2B7NGu!-BIM7%Ւu!"|aT!mw({ɱAu.֗Gڍ)~Q<]?P7;?Ja; j-+x7dqXRGU"7\c f]!p[$zMG0ČRu-JYMt7hye?]iL,{ Yхv[ZXV4&-~Xvx޼S}67["VbI] ױn>uAt.a,jjȴnr07nr:JpM@"XsUЋr4vKYymm^Q&_ [̈́Ŷ[6py23ѣГ6L@E ĝzNewc(ajwu֏@eE6$E 1i[=pTۨds2D^$2:mND~5cjaOE&u5>,H1ׅt x5'?yX)%ks,ɳWMN,U,?(FnTkR&%-f)]XN%/f=(T2$iC':PtlH\pa-T,5*<*+ Zx+NGff+譊Qjy~Kk?OR1n`iHYƽm&a MI1ⲙPs y[GbV !.bb>SkʬUoQ_jq~A>^ctӰYsk4374<$TwEr?}DK|~BRA& ]xqe 4z//+LP7"$#i v" Ƀk&{yu:6Z0QG0&‌%KvTǬCSdntԲRBlB8gJSFǩނqt;uaB9XL활!4颥 dH{:0ҫ%4v\@b?XhR8uru.(6 kU<J[4rOvKdӆQ,RXB8!#t?3jIY/\MxtSW'`~0u}bG# >Q*чd[vA5D32-xkq|ra'y9 + Yh{汬6q ^=99PNB]23N_ٜ#esyY9KB:6045;4|4^,t)uf}=: 1!lNh50oYS# }(tiUV'3Saj5:i41YlE 3nWAlaFYpޞ}~E*c Bn`p4%Z=/V3mԆF, <˸EpHȗ7>Nn@*XzPT495W㯌F)w'Upp+<,K*~\V#5hXQᱬϛn JCv2[8׮6u|l>OTJpsR3_(9k=>0٧G 7pȮD4:slWH` ?$ԄʞI>X2׉$9w?cHJ ǿͯ7_kɤQR`>0 YZmice/data/employee.rda0000644000176200001440000000043713416657163014427 0ustar liggesusersBZh91AY&SYVsDLDUPD ;ܠ@@ hA4`F `"RM 04s2`0&`0L`DI@h`0L`Q$fSh 4 1=J)Ql $_EKƢ&HYlR_O $ C@RI8XKwݎ11cfiUc c1cc1c $$TywlQIn`RGԺ8m0`[Q%ZSX4aQB1lA"5gk6j]VIU o>Mk5Ιe%HЂ .쁧[aZ5r_CUU1;j'-\mInwmcF)sS-^Nj!lFMLJ2QV*)iXQFa3)z5c ^iIT,z]N,~VB RI#لMT?i{zŎ...cM4ѲohnppcfN}6Nn ꪯܪ%cM4M4/wkgC䪪y-rujrNM4ьiBtwwƝUJМiR)LLIyZ_H 4`mice/data/pattern4.rda0000644000176200001440000000027713416657163014353 0ustar liggesusers]= 0KRAVt n(Hu*m_. Oz^r&q@D$BOb䑏oItAۂCf[ۄ WCK8ŽkSؙ[#*bA9gggq.C:Ɯ ^@K[mice/data/pattern2.rda0000644000176200001440000000027213416657163014344 0ustar liggesusers]; 0oҊ6>Q'wu IWxBݜ{r_}T@d(tUHr[0>XsCSm,Ax2p6]B?bR՘bbbE8׻.y1*ӕos}-?V[mice/data/windspeed.rda0000644000176200001440000001061313416657163014567 0ustar liggesusersBZh91AY&SYn?oB,I*rX$ n_}3xݾ]pws"2L LCMih4z@"(z=Fz45IHid 2$D2P ~zO'==AAQ4=F@ O%T@ 4 @4iFɐB hFbmOSF ѦF@44y BSIaцFf wwj p# 0_njr8m X0^o;WrԈ*pB.)P-D9`י i{zj۹ ؇yM6ͅv\Ъojuq^j-jE&˙ TDRM!Q ]%΍j*vjIyhae,ڻsf<0C6 ,v+)0ݯM[ S\/`E"Oz-,jmyht lT'7{cv.XD0^QU@F 39ԑ##MS;iT)C2#ʊR-i1!JPn+$HQA;>˗D]/Wm6g[oA0OpQ 3$J# Db0#Q$0# L/ɘ.n1lnڔnY=5UHeXUYIP[o{DʿN> {MJ3mB ߭N8kw~MKuz^] /CWL"(,( #,,0QAGoXTP; Y lledd@,¾ʀc Pٯ$`^?7q-dk(7~(|+JP#n4AVʇȢgK PL1RS2"U m+cuL;ҾkUW)P맪KIM*vb%GuDqx}Cɛe.+bF}em㒐dQ #o @ͼoB%ZKi8Ԁ X[yP3 TJ 6YGTUB:lRLJںFQۢ}6LDl$$e2LD"C$a6!9x\ <'I(rV6Z=r{A4OGd"uV8$FD u"=lz,[٫z^SLYeF |sлĘ1RSs(v,ean%1Ƽ!c[; 7D,Hv䒆re g6D\<`- ($6v7  آ0mQlu PWʟq8Ek#PX2폅wvftn8)u󰴟}jqQK{:"&ViC^Knj8@FdcAp4xgu'30 ($jh`b#|M:JGj׉DS210 #ѽCaΣ+xM."c2;t7y -rր4[ڵ At'&ayros#wfYwFCvXXHFMA^An ]RYa1r&Ǝg8E,ta4eN5SIr*0pKBMߛIeqE·70| #(n,JR R0P!l"g[b1C e`acdspw(l9"XC"0:rXHeʠ:"}t8["8:U6Y(vI(< WD$jydlGKn!h 8IK Tf5-l0Nןfl|ǎEwyAxjoG\ MeܒZK,{_5vtBwq`WS^~y!bnϾt;3ldRB ȢQr5ukGr pHh\ron7f"9iEM5n(v|` kk Z@N6ׁ1qgǀO, "q<FDpQ*_fE.ϱc5Vt+TPhq?};p&tepu!&.)]< ̴o5m' MLDQkT$J =:$IUռco cɇ`0)l#qk;`M 7m\ğlSBƕN3~ͫ$z{@ ^O]{+$9(6c t~X)JFjUX#HI4m^D͛͝ˍ&IARS7h[0"3[&2Ej&bX\3wx@a0"`(38&feeVLۿ]BBamice/data/mnar_demo_data.rda0000644000176200001440000001016113620753345015531 0ustar liggesusersBZh91AY&SY: - }}»]]tiMLhO(ڦGf)Mkrqj"0xhI'`|>d@ _ᇺ@PAH 0I ADA b) E) DGs|3dà 1Ad A*ȡ ,RXI_\Jb2j !Pؐ25?E㐘ץICADL%8wO!k~$Zx)]kp `vwU*€&P9B!y7n3"n2&m7trZSygHRKQ7la \@{D'a05S`F=v~pgY{ +dw!akW0aZF.DPcmAl(E hTQ~CL{y~A.8}w 3XPxh 8]8ܤ_i yFsO6#g[^ʷ3&u`(9ɯsǞqaN $/>w#PIq 9&-q"mq)"nԙu\mŠBTWb{w5/^Ķ4 ѯ]=kt' ے\KtcdHBR>`hL:R52DEY' ݄E@9>)imnzG!Dcgp~!?jaX !x4ǤCIG~NR8({:(@j|?42^͵[;?VC~=9TqOS?iτN'bVmV4'(>y흁 dS"}gQ~R5Yj3+!-le)Pe §moOfH[4t;7?/~<"jb3Gkb+B#T~ Л mC$9cb ehs:(@ɐ?V}B wX0aA-4')E5g-C!PtU_zm լbFska3p H Mʗ6kc 5$/hP`uUjژuJm!תeaңZ5 }ɼQG@G$%M}>e {J6AfŘXǬn>XE& SXU?g?||cx4SY7㥼Zi=Ҫ?/^{;[v܈?+wDp@(<*پZ!~C=ۧ艆MZY"q߄41f ; &H'|$d*ёY.fℎP5@}Wf0Q/Kpx\d6?HQEV5Y2R>[]]oym9]RRŨx.|kFʕ^ё*Ʉ]_=Q_]T 'L4uZʱxGXV(U暷;q%dW/] ;2F9GUf}Ŗ:9FA7 SYAk/0*B扢'C/I$^UeJX%Wك EIB8ъѣ["C.p tnZmice/man/0000755000176200001440000000000013621214614011742 5ustar liggesusersmice/man/cc.Rd0000644000176200001440000000162213416657163012633 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cc.R \name{cc} \alias{cc} \title{Select complete cases} \usage{ cc(x) } \arguments{ \item{x}{An \code{R} object. Methods are available for classes \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} could be a vector.} } \value{ A \code{vector}, \code{matrix} or \code{data.frame} containing the data of the complete cases. } \description{ Extracts the complete cases, also known as \emph{listwise deletion}. \code{cc(x)} is similar to \code{na.omit(x)}, but returns an object of the same class as the input data. Dimensions are not dropped. For extracting incomplete cases, use \code{\link{ici}}. } \examples{ # cc(nhanes) # get the 13 complete cases # cc(nhanes$bmi) # extract complete bmi } \seealso{ \code{\link{na.omit}}, \code{\link{cci}}, \code{\link{ici}} } \author{ Stef van Buuren, 2017. } \keyword{univar} mice/man/as.mitml.result.Rd0000644000176200001440000000116313416657163015307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as.R \name{as.mitml.result} \alias{as.mitml.result} \title{Converts into a \code{mitml.result} object} \usage{ as.mitml.result(x) } \arguments{ \item{x}{An object of class \code{mira}} } \value{ An S3 object of class \code{mitml.result}, a list containing $m$ fitted analysis objects. } \description{ The \code{as.mitml.result()} function takes the results of repeated complete-data analysis stored as a list, and turns it into an object of class \code{mitml.result}. } \seealso{ \code{\link[mitml]{with.mitml.list}} } \author{ Stef van Buuren } mice/man/nimp.Rd0000644000176200001440000000322313416657163013210 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nimp.R \name{nimp} \alias{nimp} \title{Number of imputations per block} \usage{ nimp(where, blocks = make.blocks(where)) } \arguments{ \item{where}{A data frame or matrix with logicals of the same dimensions as \code{data} indicating where in the data the imputations should be created. The default, \code{where = is.na(data)}, specifies that the missing data should be imputed. The \code{where} argument may be used to overimpute observed data, or to skip imputations for selected missing values.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} } \value{ A numeric vector of length \code{length(blocks)} containing the number of cells that need to be imputed within a block. } \description{ Calculates the number of cells within a block for which imputation is requested. } \examples{ where <- is.na(nhanes) # standard FCS nimp(where) # user-defined blocks nimp(where, blocks = name.blocks(list(c("bmi", "hyp"), "age", "chl"))) } \seealso{ \code{\link{mice}} } mice/man/md.pattern.Rd0000644000176200001440000000363513416664706014331 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/md.pattern.R \name{md.pattern} \alias{md.pattern} \title{Missing data pattern} \usage{ md.pattern(x, plot = TRUE, rotate.names = FALSE) } \arguments{ \item{x}{A data frame or a matrix containing the incomplete data. Missing values are coded as NA's.} \item{plot}{Should the missing data pattern be made into a plot. Default is `plot = TRUE`.} \item{rotate.names}{Whether the variable names in the plot should be placed horizontally or vertically. Default is `rotate.names = FALSE`.} } \value{ A matrix with \code{ncol(x)+1} columns, in which each row corresponds to a missing data pattern (1=observed, 0=missing). Rows and columns are sorted in increasing amounts of missing information. The last column and row contain row and column counts, respectively. } \description{ Display missing-data patterns. } \details{ This function is useful for investigating any structure of missing observations in the data. In specific case, the missing data pattern could be (nearly) monotone. Monotonicity can be used to simplify the imputation model. See Schafer (1997) for details. Also, the missing pattern could suggest which variables could potentially be useful for imputation of missing entries. } \examples{ md.pattern(nhanes) # age hyp bmi chl # 13 1 1 1 1 0 # 1 1 1 0 1 1 # 3 1 1 1 0 1 # 1 1 0 0 1 2 # 7 1 0 0 0 3 # 0 8 9 10 27 } \references{ Schafer, J.L. (1997), Analysis of multivariate incomplete data. London: Chapman&Hall. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \author{ Gerko Vink, 2018, based on an earlier version of the same function by Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{univar} mice/man/mice.mids.Rd0000644000176200001440000000412413416657163014116 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.mids.R \name{mice.mids} \alias{mice.mids} \title{Multivariate Imputation by Chained Equations (Iteration Step)} \usage{ mice.mids(obj, maxit = 1, printFlag = TRUE, ...) } \arguments{ \item{obj}{An object of class \code{mids}, typically produces by a previous call to \code{mice()} or \code{mice.mids()}} \item{maxit}{The number of additional Gibbs sampling iterations.} \item{printFlag}{A Boolean flag. If \code{TRUE}, diagnostic information during the Gibbs sampling iterations will be written to the command window. The default is \code{TRUE}.} \item{...}{Named arguments that are passed down to the univariate imputation functions.} } \description{ Takes a \code{mids} object, and produces a new object of class \code{mids}. } \details{ This function enables the user to split up the computations of the Gibbs sampler into smaller parts. This is useful for the following reasons: \itemize{ \item RAM memory may become easily exhausted if the number of iterations is large. Returning to prompt/session level may alleviate these problems. \item The user can compute customized convergence statistics at specific points, e.g. after each iteration, for monitoring convergence. - For computing a 'few extra iterations'. } Note: The imputation model itself is specified in the \code{mice()} function and cannot be changed with \code{mice.mids}. The state of the random generator is saved with the \code{mids} object. } \examples{ imp1 <- mice(nhanes, maxit=1, seed = 123) imp2 <- mice.mids(imp1) # yields the same result as imp <- mice(nhanes, maxit=2, seed = 123) # verification identical(imp$imp, imp2$imp) # } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{complete}}, \code{\link{mice}}, \code{\link{set.seed}}, \code{\link[=mids-class]{mids}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{iteration} mice/man/estimice.Rd0000644000176200001440000000341213416657163014047 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.R \name{estimice} \alias{estimice} \title{Computes least squares parameters} \usage{ estimice(x, y, ls.meth = "qr", ridge = 1e-05, ...) } \arguments{ \item{x}{Matrix (\code{n} x \code{p}) of complete covariates.} \item{y}{Incomplete data vector of length \code{n}} \item{ls.meth}{the method to use for obtaining the least squares estimates. By default parameters are drawn by means of QR decomposition.} \item{ridge}{A small numerical value specifying the size of the ridge used. The default value \code{ridge = 1e-05} represents a compromise between stability and unbiasedness. Decrease \code{ridge} if the data contain many junk variables. Increase \code{ridge} for highly collinear data.} \item{...}{Other named arguments.} } \value{ A \code{list} containing components \code{c} (least squares estimate), \code{r} (residuals), \code{v} (variance/covariance matrix) and \code{df} (degrees of freedom). } \description{ This function computes least squares estimates, variance/covariance matrices, residuals and degrees of freedom according to ridge regression, QR decomposition or Singular Value Decomposition. This function is internally called by .norm.draw(), but can be called by any user-specified imputation function. } \details{ When calculating the inverse of the crossproduct of the predictor matrix, problems may arise. For example, taking the inverse is not possible when the predictor matrix is rank deficient, or when the estimation problem is computationally singular. This function detects such error cases and automatically falls back to adding a ridge penalty to the diagonal of the crossproduct to allow for proper calculation of the inverse. } \author{ Gerko Vink, 2018 } mice/man/fix.coef.Rd0000644000176200001440000000360513416657163013752 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fix.coef.R \name{fix.coef} \alias{fix.coef} \title{Fix coefficients and update model} \usage{ fix.coef(model, beta = NULL) } \arguments{ \item{model}{An R model, e.g., produced by \code{lm} or \code{glm}} \item{beta}{A numeric vector with \code{length(coef)} model coefficients. If the vector is not named, the coefficients should be given in the same order as in \code{coef(model)}. If the vector is named, the procedure attempts to match on names.} } \value{ An updated R model object } \description{ Refits a model with a specified set of coefficients. } \details{ The function calculates the linear predictor using the new coefficients, and reformulates the model using the \code{offset} argument. The linear predictor is called \code{offset}, and its coefficient will be \code{1} by definition. The new model only fits the intercept, which should be \code{0} if we set \code{beta = coef(model)}. } \examples{ model0 <- lm(Volume ~ Girth + Height, data = trees) formula(model0) coef(model0) deviance(model0) # refit same model model1 <- fix.coef(model0) formula(model1) coef(model1) deviance(model1) # change the beta's model2 <- fix.coef(model0, beta = c(-50, 5, 1)) coef(model2) deviance(model2) # compare predictions plot(predict(model0), predict(model1)); abline(0,1) plot(predict(model0), predict(model2)); abline(0,1) # compare proportion explained variance cor(predict(model0), predict(model0) + residuals(model0))^2 cor(predict(model1), predict(model1) + residuals(model1))^2 cor(predict(model2), predict(model2) + residuals(model2))^2 # extract offset from constrained model summary(model2$model$offset) # it also works with factors and missing data model0 <- lm(bmi ~ age + hyp + chl, data = nhanes2) model1 <- fix.coef(model0) model2 <- fix.coef(model0, beta = c(15, -8, -8, 2, 0.2)) } \author{ Stef van Buuren, 2018 } mice/man/mice.impute.2l.norm.Rd0000644000176200001440000000600313574715125015747 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2l.norm.R \name{mice.impute.2l.norm} \alias{mice.impute.2l.norm} \title{Imputation by a two-level normal model} \usage{ mice.impute.2l.norm(y, ry, x, type, wy = NULL, intercept = TRUE, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. Random variables are identified by a '2'. The class variable (only one is allowed) is coded as '-2'. Random variables also include the fixed effect.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{intercept}{Logical determining whether the intercept is automatically added.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using a two-level normal model } \details{ Implements the Gibbs sampler for the linear multilevel model with heterogeneous with-class variance (Kasim and Raudenbush, 1998). Imputations are drawn as an extra step to the algorithm. For simulation work see Van Buuren (2011). The random intercept is automatically added in \code{mice.impute.2L.norm()}. A model within a random intercept can be specified by \code{mice(..., intercept = FALSE)}. } \note{ Added June 25, 2012: The currently implemented algorithm does not handle predictors that are specified as fixed effects (type=1). When using \code{mice.impute.2l.norm()}, the current advice is to specify all predictors as random effects (type=2). Warning: The assumption of heterogeneous variances requires that in every class at least one observation has a response in \code{y}. } \references{ Kasim RM, Raudenbush SW. (1998). Application of Gibbs sampling to nested variance components models with heterogeneous within-group variance. Journal of Educational and Behavioral Statistics, 23(2), 93--116. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Van Buuren, S. (2011) Multiple imputation of multilevel data. In Hox, J.J. and and Roberts, J.K. (Eds.), \emph{The Handbook of Advanced Multilevel Analysis}, Chapter 10, pp. 173--196. Milton Park, UK: Routledge. } \seealso{ Other univariate-2l: \code{\link{mice.impute.2l.bin}()}, \code{\link{mice.impute.2l.lmer}()}, \code{\link{mice.impute.2l.pan}()} } \author{ Roel de Jong, 2008 } \concept{univariate-2l} \keyword{datagen} mice/man/toenail2.Rd0000644000176200001440000000412613617306761013763 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/toenail2.R \docType{data} \name{toenail2} \alias{toenail2} \title{Toenail data} \format{A data frame with 1908 observations on the following 5 variables: \describe{ \item{\code{patientID}}{a numeric vector giving the ID of patient} \item{\code{outcome}}{a factor with 2 levels giving the response} \item{\code{treatment}}{a factor with 2 levels giving the treatment group} \item{\code{time}}{a numeric vector giving the time of the visit (not exactly monthly intervals hence not round numbers)} \item{\code{visit}}{an integer giving the number of the visit} }} \source{ De Backer, M., De Vroey, C., Lesaffre, E., Scheys, I., and De Keyser, P. (1998). Twelve weeks of continuous oral therapy for toenail onychomycosis caused by dermatophytes: A double-blind comparative trial of terbinafine 250 mg/day versus itraconazole 200 mg/day. Journal of the American Academy of Dermatology, 38, 57-63. } \description{ The toenail data come from a Multicenter study comparing two oral treatments for toenail infection. Patients were evaluated for the degree of separation of the nail. Patients were randomized into two treatments and were followed over seven visits - four in the first year and yearly thereafter. The patients have not been treated prior to the first visit so this should be regarded as the baseline. } \details{ Apart from formatting, this dataset is identical to \code{toenail}. The formatting is taken identical to \code{data("toenail", package = "HSAUR3")}. } \references{ Lesaffre, E. and Spiessens, B. (2001). On the effect of the number of quadrature points in a logistic random-effects model: An example. Journal of the Royal Statistical Society, Series C, 50, 325-335. G. Fitzmaurice, N. Laird and J. Ware (2004) Applied Longitudinal Analysis, Wiley and Sons, New York, USA. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-catoutcome.html#example}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{toenail}} } \keyword{datasets} mice/man/appendbreak.Rd0000644000176200001440000000210313574715125014513 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/auxiliary.R \name{appendbreak} \alias{appendbreak} \title{Appends specified break to the data} \usage{ appendbreak(data, brk, warp.model = warp.model, id = NULL, typ = "pred") } \arguments{ \item{data}{A data frame in the long long format} \item{brk}{A vector of break ages} \item{warp.model}{A time warping model} \item{id}{The subject identifier} \item{typ}{Label to signal that this is a newly added observation} } \value{ A long data frame with additional rows for the break ages } \description{ A custom function to insert rows in long data with new pseudo-observations that are being done on the specified break ages. There should be a column called \code{first} in \code{data} with logical data that codes whether the current row is the first for subject \code{id}. Furthermore, the function assumes that columns \code{age}, \code{occ}, \code{hgt.z}, \code{wgt.z} and \code{bmi.z} are available. This function is used on the \code{tbc} data in FIMD chapter 9. Check that out to see it in action. } mice/man/make.visitSequence.Rd0000644000176200001440000000253413416657163016014 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/visitSequence.R \name{make.visitSequence} \alias{make.visitSequence} \title{Creates a \code{visitSequence} argument} \usage{ make.visitSequence(data = NULL, blocks = NULL) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} } \value{ Vector containing block names } \description{ This helper function creates a valid \code{visitSequence}. The \code{visitSequence} is an argument to the \code{mice} function that specifies the sequence in which blocks are imputed. } \examples{ make.visitSequence(nhanes) } \seealso{ \code{\link{mice}} } mice/man/print.mads.Rd0000644000176200001440000000065213416657163014327 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print.R \name{print.mads} \alias{print.mads} \title{Print a \code{mads} object} \usage{ \method{print}{mads}(x, ...) } \arguments{ \item{x}{Object of class \code{mads}} \item{...}{Other parameters passed down to \code{print.default()}} } \value{ \code{NULL} } \description{ Print a \code{mads} object } \seealso{ \code{\link[=mads-class]{mads}} } mice/man/as.mids.Rd0000644000176200001440000000712713460401111013565 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as.R \name{as.mids} \alias{as.mids} \title{Converts an multiply imputed dataset (long format) into a \code{mids} object} \usage{ as.mids(long, where = NULL, .imp = ".imp", .id = ".id") } \arguments{ \item{long}{A multiply imputed data set in long format, for example produced by a call to \code{complete(..., action = 'long', include = TRUE)}, or by other software.} \item{where}{A data frame or matrix with logicals of the same dimensions as \code{data} indicating where in the data the imputations should be created. The default, \code{where = is.na(data)}, specifies that the missing data should be imputed. The \code{where} argument may be used to overimpute observed data, or to skip imputations for selected missing values.} \item{.imp}{An optional column number or column name in \code{long}, indicating the imputation index. The values are assumed to be consecutive integers between 0 and \code{m}. Values \code{1} through \code{m} correspond to the imputation index, value \code{0} indicates the original data (with missings). By default, the procedure will search for a variable named \code{".imp"}.} \item{.id}{An optional column number or column name in \code{long}, indicating the subject identification. If not specified, then the function searches for a variable named \code{".id"}. If this variable is found, the values in the column will define the row names in the \code{data} element of the resulting \code{mids} object.} } \value{ An object of class \code{mids} } \description{ This function converts imputed data stored in long format into an object of class \code{mids}. The original incomplete dataset needs to be available so that we know where the missing data are. The function is useful to convert back operations applied to the imputed data back in a \code{mids} object. It may also be used to store multiply imputed data sets from other software into the format used by \code{mice}. } \note{ The function expects the input data \code{long} to be sorted by imputation number (variable \code{".imp"} by default), and in the same sequence within each imputation block. } \examples{ # impute the nhanes dataset imp <- mice(nhanes, print = FALSE) # extract the data in long format X <- complete(imp, action = "long", include = TRUE) # create dataset with .imp variable as numeric X2 <- X # nhanes example without .id test1 <- as.mids(X) is.mids(test1) identical(complete(test1, action = "long", include = TRUE), X) # nhanes example without .id where .imp is numeric test2 <- as.mids(X2) is.mids(test2) identical(complete(test2, action = "long", include = TRUE), X) # nhanes example, where we explicitly specify .id as column 2 test3 <- as.mids(X, .id = ".id") is.mids(test3) identical(complete(test3, action = "long", include = TRUE), X) # nhanes example with .id where .imp is numeric test4 <- as.mids(X2, .id = 2) is.mids(test4) identical(complete(test4, action = "long", include = TRUE), X) # example without an .id variable # variable .id not preserved X3 <- X[, -2] test5 <- as.mids(X3) is.mids(test5) identical(complete(test5, action = "long", include = TRUE)[, -2], X[, -2]) # as() syntax has fewer options test7 <- as(X, "mids") test8 <- as(X2, "mids") test9 <- as(X2[, -2], "mids") rev <- ncol(X):1 test10 <- as(X[, rev], "mids") # where argument copies also observed data into $imp element where <- matrix(TRUE, nrow = nrow(nhanes), ncol = ncol(nhanes)) colnames(where) <- colnames(nhanes) test11 <- as.mids(X, where = where) identical(complete(test11, action = "long", include = TRUE), X) } \author{ Gerko Vink } \keyword{mids} mice/man/anova.Rd0000644000176200001440000000116313416657163013352 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/anova.R \name{anova.mira} \alias{anova.mira} \title{Compare several nested models} \usage{ \method{anova}{mira}(object, ..., method = "D1", use = "wald") } \arguments{ \item{object}{Two or more objects of class \code{mira}} \item{...}{Other parameters passed down to \code{D1()}, \code{D2()}, \code{D3()} and \code{mitml::testModels}.} \item{method}{Either \code{"D1"}, \code{"D2"} or \code{"D3"}} \item{use}{An character indicating the test statistic} } \value{ Object of class \code{mice.anova} } \description{ Compare several nested models } mice/man/pops.Rd0000644000176200001440000000461113416661213013217 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pops.R \docType{data} \name{pops} \alias{pops} \alias{pops.pred} \title{Project on preterm and small for gestational age infants (POPS)} \format{\code{pops} is a data frame with 959 rows and 86 columns. \code{pops.pred} is the 86 by 86 binary predictor matrix used for specifying the multiple imputation model.} \source{ Hille, E. T. M., Elbertse, L., Bennebroek Gravenhorst, J., Brand, R., Verloove-Vanhorick, S. P. (2005). Nonresponse bias in a follow-up study of 19-year-old adolescents born as preterm infants. Pediatrics, 116(5):662666. Hille, E. T. M., Weisglas-Kuperus, N., Van Goudoever, J. B., Jacobusse, G. W., Ens-Dokkum, M. H., De Groot, L., Wit, J. M., Geven, W. B., Kok, J. H., De Kleine, M. J. K., Kollee, L. A. A., Mulder, A. L. M., Van Straaten, H. L. M., De Vries, L. S., Van Weissenbruch, M. M., Verloove-Vanhorick, S. P. (2007). Functional outcomes and participation in young adulthood for very preterm and very low birth weight infants: The Dutch project on preterm and small for gestational age infants at 19 years of age. Pediatrics, 120(3):587595. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-selective.html#pops-study-19-years-follow-up}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Subset of data from the POPS study, a national, prospective study on preterm children, including all liveborn infants <32 weeks gestational age and/or <1500 g from 1983 (n = 1338). } \details{ The data set concerns of subset of 959 children that survived up to the age of 19 years. Hille et al (2005) divided the 959 survivors into three groups: Full responders (examined at an outpatient clinic and completed the questionnaires, n = 596), postal responders (only completed the mailed questionnaires, n = 109), non-responders (did not respond to any of the mailed requests or telephone calls, or could not be traced, n = 254). Compared to the postal and non-responders, the full response group consists of more girls, contains more Dutch children, has higher educational and social economic levels and has fewer handicaps. The responders form a highly selective subgroup in the total cohort. Multiple imputation of this data set has been described in Hille et al (2007) and Van Buuren (2012), chapter 8. } \examples{ pops <- data(pops) } \keyword{datasets} mice/man/make.method.Rd0000644000176200001440000000447113574715125014445 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/method.R \name{make.method} \alias{make.method} \title{Creates a \code{method} argument} \usage{ make.method( data, where = make.where(data), blocks = make.blocks(data), defaultMethod = c("pmm", "logreg", "polyreg", "polr") ) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} \item{where}{A data frame or matrix with logicals of the same dimensions as \code{data} indicating where in the data the imputations should be created. The default, \code{where = is.na(data)}, specifies that the missing data should be imputed. The \code{where} argument may be used to overimpute observed data, or to skip imputations for selected missing values.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} \item{defaultMethod}{A vector of length 4 containing the default imputation methods for 1) numeric data, 2) factor data with 2 levels, 3) factor data with > 2 unordered levels, and 4) factor data with > 2 ordered levels. By default, the method uses \code{pmm}, predictive mean matching (numeric data) \code{logreg}, logistic regression imputation (binary data, factor with 2 levels) \code{polyreg}, polytomous regression imputation for unordered categorical data (factor > 2 levels) \code{polr}, proportional odds model for (ordered, > 2 levels).} } \value{ Vector of \code{length(blocks)} element with method names } \description{ This helper function creates a valid \code{method} vector. The \code{method} vector is an argument to the \code{mice} function that specifies the method for each block. } \examples{ make.method(nhanes2) } \seealso{ \code{\link{mice}} } mice/man/rbind.mids.Rd0000644000176200001440000000650413416657163014303 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rbind.R \name{rbind.mids} \alias{rbind.mids} \title{Combine \code{mids} objects by rows} \usage{ rbind.mids(x, y = NULL, ...) } \arguments{ \item{x}{A \code{mids} object.} \item{y}{A \code{mids} object, or a \code{data.frame}, \code{matrix}, \code{factor} or \code{vector}.} \item{\dots}{Additional \code{data.frame}, \code{matrix}, \code{vector} or \code{factor}. These can be given as named arguments.} } \value{ An S3 object of class \code{mids} } \description{ This function combines two \code{mids} objects rowwise into a single \code{mids} object, or combines a \code{mids} object with a vector, matrix, factor or dataframe rowwise into a \code{mids} object. } \details{ If \code{y} is a \code{mids} object, then \code{rbind} requires that the number of multiple imputations in \code{x} and \code{y} is identical. Also, columns of \code{x$data} and \code{y$data} should match. If \code{y} is not a \code{mids} object, the columns of \code{x$data} and \code{y} should match. The \code{where} matrix for \code{y} is set to \code{FALSE}, signaling that any missing values in \code{y} were not imputed. } \note{ The function construct the elements of the new \code{mids} object as follows: \tabular{ll}{ \code{data} \tab Rowwise combination of the (incomplete) data in \code{x} and \code{y}\cr \code{imp} \tab Equals \code{rbind(x$imp[[j]], y$imp[[j]])} if \code{y} is \code{mids} object; otherwise the data of \code{y} will be copied\cr \code{m} \tab Equals \code{x$m}\cr \code{where} \tab Rowwise combination of \code{where} arguments\cr \code{blocks} \tab Equals \code{x$blocks}\cr \code{call} \tab Vector, \code{call[1]} creates \code{x}, \code{call[2]} is call to \code{rbind.mids}\cr \code{nmis} \tab \code{x$nmis} + \code{y$nmis}\cr \code{method} \tab Taken from \code{x$method}\cr \code{predictorMatrix} \tab Taken from \code{x$predictorMatrix}\cr \code{visitSequence} \tab Taken from \code{x$visitSequence}\cr \code{formulas} \tab Taken from \code{x$formulas}\cr \code{post} \tab Taken from \code{x$post}\cr \code{blots} \tab Taken from \code{x$blots}\cr \code{seed} \tab Taken from \code{x$seed}\cr \code{iteration} \tab Taken from \code{x$iteration}\cr \code{lastSeedValue} \tab Taken from \code{x$lastSeedValue}\cr \code{chainMean} \tab Set to \code{NA}\cr \code{chainVar} \tab Set to \code{NA}\cr \code{loggedEvents} \tab Taken from \code{x$loggedEvents}\cr \code{version} \tab Taken from \code{x$version}\cr \code{date} \tab Taken from \code{x$date} } } \examples{ imp1 <- mice(nhanes[1:13, ], m = 2, maxit = 1, print = FALSE) imp5 <- mice(nhanes[1:13, ], m = 2, maxit = 2, print = FALSE) mylist <- list(age = NA, bmi = NA, hyp = NA, chl = NA) nrow(complete(rbind(imp1, imp5))) nrow(complete(rbind(imp1, mylist))) nrow(complete(rbind(imp1, data.frame(mylist)))) nrow(complete(rbind(imp1, complete(imp5)))) } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{cbind.mids}}, \code{\link{ibind}}, \code{\link[=mids-class]{mids}} } \author{ Karin Groothuis-Oudshoorn, Stef van Buuren } \keyword{manip} mice/man/make.blocks.Rd0000644000176200001440000000554113574715125014441 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/blocks.R \name{make.blocks} \alias{make.blocks} \title{Creates a \code{blocks} argument} \usage{ make.blocks( data, partition = c("scatter", "collect", "void"), calltype = "type" ) } \arguments{ \item{data}{A \code{data.frame}, character vector with variable names, or \code{list} with variable names.} \item{partition}{A character vector of length 1 used to assign variables to blocks when \code{data} is a \code{data.frame}. Value \code{"scatter"} (default) will assign each column to it own block. Value \code{"collect"} assigns all variables to one block, whereas \code{"void"} produces an empty list.} \item{calltype}{A character vector of \code{length(block)} elements that indicates how the imputation model is specified. If \code{calltype = "type"} (the default), the underlying imputation model is called by means of the \code{type} argument. The \code{type} argument for block \code{h} is equivalent to row \code{h} in the \code{predictorMatrix}. The alternative is \code{calltype = "formula"}. This will pass \code{formulas[[h]]} to the underlying imputation function for block \code{h}, together with the current data. The \code{calltype} of a block is set automatically during initialization. Where a choice is possible, calltype \code{"formula"} is preferred over \code{"type"} since this is more flexible and extendable. However, what precisely happens depends also on the capabilities of the imputation function that is called.} } \value{ A named list of character vectors with variables names. } \description{ This helper function generates a list of the type needed for \code{blocks} argument in the \code{[=mice]{mice}} function. } \details{ Choices \code{"scatter"} and \code{"collect"} represent to two extreme scenarios for assigning variables to imputation blocks. Use \code{"scatter"} to create an imputation model based on \emph{fully conditionally specification} (FCS). Use \code{"collect"} to gather all variables to be imputed by a \emph{joint model} (JM). Scenario's in-between these two extremes represent \emph{hybrid} imputation models that combine FCS and JM. Any variable not listed in will not be imputed. Specification \code{"void"} represents the extreme scenario that skips imputation of all variables. A variable may be a member of multiple blocks. The variable will be re-imputed in each block, so the final imputations for variable will come from the last block that was executed. This scenario may be useful where the same complete background factors appear in multiple imputation blocks. A variable may appear multiple times within a given block. If a univariate imputation model is applied to such a block, then the variable is re-imputed each time as it appears in the block. } \examples{ make.blocks(nhanes) make.blocks(c("age", "sex", "edu")) } mice/man/mice.Rd0000644000176200001440000004567013574715125013174 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice-package.R, R/mice.R \docType{package} \name{mice} \alias{mice} \title{\pkg{mice}: Multivariate Imputation by Chained Equations} \usage{ mice( data, m = 5, method = NULL, predictorMatrix, where = NULL, blocks, visitSequence = NULL, formulas, blots = NULL, post = NULL, defaultMethod = c("pmm", "logreg", "polyreg", "polr"), maxit = 5, printFlag = TRUE, seed = NA, data.init = NULL, ... ) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} \item{m}{Number of multiple imputations. The default is \code{m=5}.} \item{method}{Can be either a single string, or a vector of strings with length \code{length(blocks)}, specifying the imputation method to be used for each column in data. If specified as a single string, the same method will be used for all blocks. The default imputation method (when no argument is specified) depends on the measurement level of the target column, as regulated by the \code{defaultMethod} argument. Columns that need not be imputed have the empty method \code{""}. See details.} \item{predictorMatrix}{A numeric matrix of \code{length(blocks)} rows and \code{ncol(data)} columns, containing 0/1 data specifying the set of predictors to be used for each target column. Each row corresponds to a variable block, i.e., a set of variables to be imputed. A value of \code{1} means that the column variable is used as a predictor for the target block (in the rows). By default, the \code{predictorMatrix} is a square matrix of \code{ncol(data)} rows and columns with all 1's, except for the diagonal. Note: For two-level imputation models (which have \code{"2l"} in their names) other codes (e.g, \code{2} or \code{-2}) are also allowed.} \item{where}{A data frame or matrix with logicals of the same dimensions as \code{data} indicating where in the data the imputations should be created. The default, \code{where = is.na(data)}, specifies that the missing data should be imputed. The \code{where} argument may be used to overimpute observed data, or to skip imputations for selected missing values.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} \item{visitSequence}{A vector of block names of arbitrary length, specifying the sequence of blocks that are imputed during one iteration of the Gibbs sampler. A block is a collection of variables. All variables that are members of the same block are imputed when the block is visited. A variable that is a member of multiple blocks is re-imputed within the same iteration. The default \code{visitSequence = "roman"} visits the blocks (left to right) in the order in which they appear in \code{blocks}. One may also use one of the following keywords: \code{"arabic"} (right to left), \code{"monotone"} (ordered low to high proportion of missing data) and \code{"revmonotone"} (reverse of monotone).} \item{formulas}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names. The \code{formulas} argument is an alternative to the \code{predictorMatrix} argument that allows for more flexibility in specifying imputation models, e.g., for specifying interaction terms.} \item{blots}{A named \code{list} of \code{alist}'s that can be used to pass down arguments to lower level imputation function. The entries of element \code{blots[[blockname]]} are passed down to the function called for block \code{blockname}.} \item{post}{A vector of strings with length \code{ncol(data)} specifying expressions as strings. Each string is parsed and executed within the \code{sampler()} function to post-process imputed values during the iterations. The default is a vector of empty strings, indicating no post-processing.} \item{defaultMethod}{A vector of length 4 containing the default imputation methods for 1) numeric data, 2) factor data with 2 levels, 3) factor data with > 2 unordered levels, and 4) factor data with > 2 ordered levels. By default, the method uses \code{pmm}, predictive mean matching (numeric data) \code{logreg}, logistic regression imputation (binary data, factor with 2 levels) \code{polyreg}, polytomous regression imputation for unordered categorical data (factor > 2 levels) \code{polr}, proportional odds model for (ordered, > 2 levels).} \item{maxit}{A scalar giving the number of iterations. The default is 5.} \item{printFlag}{If \code{TRUE}, \code{mice} will print history on console. Use \code{print=FALSE} for silent computation.} \item{seed}{An integer that is used as argument by the \code{set.seed()} for offsetting the random number generator. Default is to leave the random number generator alone.} \item{data.init}{A data frame of the same size and type as \code{data}, without missing data, used to initialize imputations before the start of the iterative process. The default \code{NULL} implies that starting imputation are created by a simple random draw from the data. Note that specification of \code{data.init} will start all \code{m} Gibbs sampling streams from the same imputation.} \item{...}{Named arguments that are passed down to the univariate imputation functions.} } \value{ Returns an S3 object of class \code{\link[=mids-class]{mids}} (multiply imputed data set) } \description{ The \pkg{mice} package implements a method to deal with missing data. The package creates multiple imputations (replacement values) for multivariate missing data. The method is based on Fully Conditional Specification, where each incomplete variable is imputed by a separate model. The MICE algorithm can impute mixes of continuous, binary, unordered categorical and ordered categorical data. In addition, MICE can impute continuous two-level data, and maintain consistency between imputations by means of passive imputation. Many diagnostic plots are implemented to inspect the quality of the imputations. Generates Multivariate Imputations by Chained Equations (MICE) } \details{ The \pkg{mice} package contains functions to \itemize{ \item Inspect the missing data pattern \item Impute the missing data \emph{m} times, resulting in \emph{m} completed data sets \item Diagnose the quality of the imputed values \item Analyze each completed data set \item Pool the results of the repeated analyses \item Store and export the imputed data in various formats \item Generate simulated incomplete data \item Incorporate custom imputation methods } Generates multiple imputations for incomplete multivariate data by Gibbs sampling. Missing data can occur anywhere in the data. The algorithm imputes an incomplete column (the target column) by generating 'plausible' synthetic values given other columns in the data. Each incomplete column must act as a target column, and has its own specific set of predictors. The default set of predictors for a given target consists of all other columns in the data. For predictors that are incomplete themselves, the most recently generated imputations are used to complete the predictors prior to imputation of the target column. A separate univariate imputation model can be specified for each column. The default imputation method depends on the measurement level of the target column. In addition to these, several other methods are provided. You can also write their own imputation functions, and call these from within the algorithm. The data may contain categorical variables that are used in a regressions on other variables. The algorithm creates dummy variables for the categories of these variables, and imputes these from the corresponding categorical variable. Built-in univariate imputation methods are: \tabular{lll}{ \code{pmm} \tab any \tab Predictive mean matching\cr \code{midastouch} \tab any \tab Weighted predictive mean matching\cr \code{sample} \tab any \tab Random sample from observed values\cr \code{cart} \tab any \tab Classification and regression trees\cr \code{rf} \tab any \tab Random forest imputations\cr \code{mean} \tab numeric \tab Unconditional mean imputation\cr \code{norm} \tab numeric \tab Bayesian linear regression\cr \code{norm.nob} \tab numeric \tab Linear regression ignoring model error\cr \code{norm.boot} \tab numeric \tab Linear regression using bootstrap\cr \code{norm.predict} \tab numeric \tab Linear regression, predicted values\cr \code{quadratic} \tab numeric \tab Imputation of quadratic terms\cr \code{ri} \tab numeric \tab Random indicator for nonignorable data\cr \code{logreg} \tab binary \tab Logistic regression\cr \code{logreg.boot} \tab binary \tab Logistic regression with bootstrap\cr \code{polr} \tab ordered \tab Proportional odds model\cr \code{polyreg} \tab unordered\tab Polytomous logistic regression\cr \code{lda} \tab unordered\tab Linear discriminant analysis\cr \code{2l.norm} \tab numeric \tab Level-1 normal heteroscedastic\cr \code{2l.lmer} \tab numeric \tab Level-1 normal homoscedastic, lmer\cr \code{2l.pan} \tab numeric \tab Level-1 normal homoscedastic, pan\cr \code{2l.bin} \tab binary \tab Level-1 logistic, glmer\cr \code{2lonly.mean} \tab numeric \tab Level-2 class mean\cr \code{2lonly.norm} \tab numeric \tab Level-2 class normal\cr \code{2lonly.pmm} \tab any \tab Level-2 class predictive mean matching } These corresponding functions are coded in the \code{mice} library under names \code{mice.impute.method}, where \code{method} is a string with the name of the univariate imputation method name, for example \code{norm}. The \code{method} argument specifies the methods to be used. For the \code{j}'th column, \code{mice()} calls the first occurrence of \code{paste('mice.impute.', method[j], sep = '')} in the search path. The mechanism allows uses to write customized imputation function, \code{mice.impute.myfunc}. To call it for all columns specify \code{method='myfunc'}. To call it only for, say, column 2 specify \code{method=c('norm','myfunc','logreg',\dots{})}. \emph{Passive imputation:} \code{mice()} supports a special built-in method, called passive imputation. This method can be used to ensure that a data transform always depends on the most recently generated imputations. In some cases, an imputation model may need transformed data in addition to the original data (e.g. log, quadratic, recodes, interaction, sum scores, and so on). Passive imputation maintains consistency among different transformations of the same data. Passive imputation is invoked if \code{~} is specified as the first character of the string that specifies the univariate method. \code{mice()} interprets the entire string, including the \code{~} character, as the formula argument in a call to \code{model.frame(formula, data[!r[,j],])}. This provides a simple mechanism for specifying deterministic dependencies among the columns. For example, suppose that the missing entries in variables \code{data$height} and \code{data$weight} are imputed. The body mass index (BMI) can be calculated within \code{mice} by specifying the string \code{'~I(weight/height^2)'} as the univariate imputation method for the target column \code{data$bmi}. Note that the \code{~} mechanism works only on those entries which have missing values in the target column. You should make sure that the combined observed and imputed parts of the target column make sense. An easy way to create consistency is by coding all entries in the target as \code{NA}, but for large data sets, this could be inefficient. Note that you may also need to adapt the default \code{predictorMatrix} to evade linear dependencies among the predictors that could cause errors like \code{Error in solve.default()} or \code{Error: system is exactly singular}. Though not strictly needed, it is often useful to specify \code{visitSequence} such that the column that is imputed by the \code{~} mechanism is visited each time after one of its predictors was visited. In that way, deterministic relation between columns will always be synchronized. #'A new argument \code{ls.meth} can be parsed to the lower level \code{.norm.draw} to specify the method for generating the least squares estimates and any subsequently derived estimates. Argument \code{ls.meth} takes one of three inputs: \code{"qr"} for QR-decomposition, \code{"svd"} for singular value decomposition and \code{"ridge"} for ridge regression. \code{ls.meth} defaults to \code{ls.meth = "qr"}. \emph{Auxiliary predictors in formulas specification: } For a given block, the \code{formulas} specification takes precedence over the corresponding row in the \code{predictMatrix} argument. This precedence is, however, restricted to the subset of variables specified in the terms of the block formula. Any variables not specified by \code{formulas} are imputed according to the \code{predictMatrix} specification. Variables with non-zero \code{type} values in the \code{predictMatrix} will be added as main effects to the \code{formulas}, which will act as supplementary covariates in the imputation model. It is possible to turn off this behavior by specifying the argument \code{auxiliary = FALSE}. } \section{Functions}{ The main functions are: \tabular{ll}{ \code{mice()} \tab Impute the missing data *m* times\cr \code{with()} \tab Analyze completed data sets\cr \code{pool()} \tab Combine parameter estimates\cr \code{complete()} \tab Export imputed data\cr \code{ampute()} \tab Generate missing data\cr} } \section{Vignettes}{ There is a detailed series of six online vignettes that walk you through solving realistic inference problems with mice. We suggest going through these vignettes in the following order \enumerate{ \item \href{https://gerkovink.github.io/miceVignettes/Ad_hoc_and_mice/Ad_hoc_methods.html}{Ad hoc methods and the MICE algorithm} \item \href{https://gerkovink.github.io/miceVignettes/Convergence_pooling/Convergence_and_pooling.html}{Convergence and pooling} \item \href{https://gerkovink.github.io/miceVignettes/Missingness_inspection/Missingness_inspection.html}{Inspecting how the observed data and missingness are related} \item \href{https://gerkovink.github.io/miceVignettes/Passive_Post_processing/Passive_imputation_post_processing.html}{Passive imputation and post-processing} \item \href{https://gerkovink.github.io/miceVignettes/Multi_level/Multi_level_data.html}{Imputing multilevel data} \item \href{https://gerkovink.github.io/miceVignettes/Sensitivity_analysis/Sensitivity_analysis.html}{Sensitivity analysis with \pkg{mice}} } #'Van Buuren, S. (2018). Boca Raton, FL.: Chapman & Hall/CRC Press. The book \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} contains a lot of \href{https://github.com/stefvanbuuren/fimdbook/tree/master/R}{example code}. } \section{Methodology}{ The \pkg{mice} software was published in the \href{https://www.jstatsoft.org/article/view/v045i03}{Journal of Statistical Software} (Van Buuren and Groothuis-Oudshoorn, 2011). The first application of the method concerned missing blood pressure data (Van Buuren et. al., 1999). The term \emph{Fully Conditional Specification} was introduced in 2006 to describe a general class of methods that specify imputations model for multivariate data as a set of conditional distributions (Van Buuren et. al., 2006). Further details on mixes of variables and applications can be found in the book \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \examples{ # do default multiple imputation on a numeric matrix imp <- mice(nhanes) imp # list the actual imputations for BMI imp$imp$bmi # first completed data matrix complete(imp) # imputation on mixed data with a different method per column mice(nhanes2, meth=c('sample','pmm','logreg','norm')) } \references{ van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. van Buuren, S., Groothuis-Oudshoorn, K. (2011). \href{https://www.jstatsoft.org/v45/i03/}{\code{mice}: Multivariate Imputation by Chained Equations in \code{R}}. \emph{Journal of Statistical Software}, \bold{45}(3), 1--67. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-FCS.html#sec:MICE}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. Van Buuren, S. (2007) Multiple imputation of discrete and continuous data by fully conditional specification. \emph{Statistical Methods in Medical Research}, \bold{16}, 3, 219--242. Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} Dissertation. Rotterdam: Erasmus University. } \seealso{ \code{\link{mice}}, \code{\link{with.mids}}, \code{\link{pool}}, \code{\link{complete}}, \code{\link{ampute}} \code{\link[=mids-class]{mids}}, \code{\link{with.mids}}, \code{\link{set.seed}}, \code{\link{complete}} } \author{ Stef van Buuren \email{stef.vanbuuren@tno.nl}, Karin Groothuis-Oudshoorn \email{c.g.m.oudshoorn@utwente.nl}, 2000-2010, with contributions of Alexander Robitzsch, Gerko Vink, Shahab Jolani, Roel de Jong, Jason Turner, Lisa Doove, John Fox, Frank E. Harrell, and Peter Malewski. } \keyword{iteration} mice/man/ampute.default.patterns.Rd0000644000176200001440000000116613416657163017026 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.patterns} \alias{ampute.default.patterns} \title{Default \code{patterns} in \code{ampute}} \usage{ ampute.default.patterns(n) } \arguments{ \item{n}{A scalar specifying the #variables in the data.} } \value{ A square matrix of size #variables where \code{0} indicates a variable } \description{ This function creates a default pattern matrix for the multivariate amputation function \code{ampute()}. } \seealso{ \code{\link{ampute}}, \code{\link{md.pattern}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/is.mids.Rd0000644000176200001440000000051513416657163013614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mids} \alias{is.mids} \title{Check for \code{mids} object} \usage{ is.mids(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mids} } \description{ Check for \code{mids} object } mice/man/mads-class.Rd0000644000176200001440000000662213416657163014302 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Mads.R \docType{class} \name{mads-class} \alias{mads-class} \title{Multivariate Amputed Data Set (\code{mads})} \description{ The \code{mads} object contains an amputed data set. The \code{mads} object is generated by the \code{ampute} function. The \code{mads} class of objects has methods for the following generic functions: \code{print}, \code{summary}, \code{bwplot}and \code{xyplot}. } \note{ Many of the functions of the \code{mice} package do not use the S4 class definitions, and instead rely on the S3 list equivalent \code{oldClass(obj) <- "mads"}. } \section{Contents}{ \describe{ \item{\code{call}:}{The function call.} \item{\code{prop}:}{Proportion of cases with missing values. Note: even when the proportion is entered as the proportion of missing cells (when \code{bycases == TRUE}), this object contains the proportion of missing cases.} \item{\code{patterns}:}{A data frame of size #patterns by #variables where \code{0} indicates a variable has missing values and \code{1} indicates a variable remains complete.} \item{\code{freq}:}{A vector of length #patterns containing the relative frequency with which the patterns occur. For example, if the vector is \code{c(0.4, 0.4, 0.2)}, this means that of all cases with missing values, 40 percent is candidate for pattern 1, 40 percent for pattern 2 and 20 percent for pattern 3. The vector sums to 1.} \item{\code{mech}:}{A string specifying the missingness mechanism, either \code{"MCAR"}, \code{"MAR"} or \code{"MNAR"}.} \item{\code{weights}:}{A data frame of size #patterns by #variables. It contains the weights that were used to calculate the weighted sum scores. The weights may differ between patterns and between variables.} \item{\code{cont}:}{Logical, whether probabilities are based on continuous logit functions or on discrete odds distributions.} \item{\code{type}:}{A vector of strings containing the type of missingness for each pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or \code{"RIGHT"}. The first type refers to the first pattern, the second type to the second pattern, etc.} \item{\code{odds}:}{A matrix where #patterns defines the #rows. Each row contains the odds of being missing for the corresponding pattern. The amount of odds values defines in how many quantiles the sum scores were divided. The values are relative probabilities: a quantile with odds value 4 will have a probability of being missing that is four times higher than a quantile with odds 1. The #quantiles may differ between patterns, NA is used for cells remaining empty.} \item{\code{amp}:}{A data frame containing the input data with NAs for the amputed values.} \item{\code{cand}:}{A vector that contains the pattern number for each case. A value between 1 and #patterns is given. For example, a case with value 2 is candidate for missing data pattern 2.} \item{\code{scores}:}{A list containing vectors with weighted sum scores of the candidates. The first vector refers to the candidates of the first pattern, the second vector refers to the candidates of the second pattern, etc. The length of the vectors differ because the number of candidates is different for each pattern.} \item{\code{data}:}{The complete data set that was entered in \code{ampute}.} } } \seealso{ \code{\link{ampute}}, Vignette titled "Multivariate Amputation using Ampute". } \author{ Rianne Schouten, 2016 } mice/man/stripplot.mids.Rd0000644000176200001440000002134713617553123015241 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stripplot.R \name{stripplot.mids} \alias{stripplot.mids} \alias{stripplot} \title{Stripplot of observed and imputed data} \usage{ \method{stripplot}{mids}( x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), panel = lattice::lattice.getOption("panel.stripplot"), default.prepanel = lattice::lattice.getOption("prepanel.default.stripplot"), jitter.data = TRUE, horizontal = FALSE, ..., subscripts = TRUE, subset = TRUE ) } \arguments{ \item{x}{A \code{mids} object, typically created by \code{mice()} or \code{mice.mids()}.} \item{data}{Formula that selects the data to be plotted. This argument follows the \pkg{lattice} rules for \emph{formulas}, describing the primary variables (used for the per-panel display) and the optional conditioning variables (which define the subsets plotted in different panels) to be used in the plot. The formula is evaluated on the complete data set in the \code{long} form. Legal variable names for the formula include \code{names(x$data)} plus the two administrative factors \code{.imp} and \code{.id}. \bold{Extended formula interface:} The primary variable terms (both the LHS \code{y} and RHS \code{x}) may consist of multiple terms separated by a \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in \emph{separate panels}. This behavior differs from standard \pkg{lattice}. \emph{Only combine terms of the same type}, i.e. only factors or only numerical variables. Mixing numerical and categorical data occasionally produces odds labeling of vertical axis. For convenience, in \code{stripplot()} and \code{bwplot} the formula \code{y~.imp} may be abbreviated as \code{y}. This applies only to a single \code{y}, and does not (yet) work for \code{y1+y2~.imp}.} \item{na.groups}{An expression evaluating to a logical vector indicating which two groups are distinguished (e.g. using different colors) in the display. The environment in which this expression is evaluated in the response indicator \code{is.na(x$data)}. The default \code{na.group = NULL} contrasts the observed and missing data in the LHS \code{y} variable of the display, i.e. groups created by \code{is.na(y)}. The expression \code{y} creates the groups according to \code{is.na(y)}. The expression \code{y1 & y2} creates groups by \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as \code{is.na(y1) | is.na(y2)}, and so on.} \item{groups}{This is the usual \code{groups} arguments in \pkg{lattice}. It differs from \code{na.groups} because it evaluates in the completed data \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas \code{na.groups} evaluates in the response indicator. See \code{\link{xyplot}} for more details. When both \code{na.groups} and \code{groups} are specified, \code{na.groups} takes precedence, and \code{groups} is ignored.} \item{as.table}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{theme}{A named list containing the graphical parameters. The default function \code{mice.theme} produces a short list of default colors, line width, and so on. The extensive list may be obtained from \code{trellis.par.get()}. Global graphical parameters like \code{col} or \code{cex} in high-level calls are still honored, so first experiment with the global parameters. Many setting consists of a pair. For example, \code{mice.theme} defines two symbol colors. The first is for the observed data, the second for the imputed data. The theme settings only exist during the call, and do not affect the trellis graphical parameters.} \item{allow.multiple}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{outer}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{drop.unused.levels}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{panel}{See \code{\link{xyplot}}.} \item{default.prepanel}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{jitter.data}{See \code{\link[lattice:panel.xyplot]{panel.xyplot}}.} \item{horizontal}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{\dots}{Further arguments, usually not directly processed by the high-level functions documented here, but instead passed on to other functions.} \item{subscripts}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{subset}{See \code{\link[lattice:xyplot]{xyplot}}.} } \value{ The high-level functions documented here, as well as other high-level Lattice functions, return an object of class \code{"trellis"}. The \code{\link[lattice:update.trellis]{update}} method can be used to subsequently update components of the object, and the \code{\link[lattice:print.trellis]{print}} method (usually called by default) will plot it on an appropriate plotting device. } \description{ Plotting methods for imputed data using \pkg{lattice}. \code{stripplot} produces one-dimensional scatterplots. The function automatically separates the observed and imputed data. The functions extend the usual features of \pkg{lattice}. } \details{ The argument \code{na.groups} may be used to specify (combinations of) missingness in any of the variables. The argument \code{groups} can be used to specify groups based on the variable values themselves. Only one of both may be active at the same time. When both are specified, \code{na.groups} takes precedence over \code{groups}. Use the \code{subset} and \code{na.groups} together to plots parts of the data. For example, select the first imputed data set by by \code{subset=.imp==1}. Graphical parameters like \code{col}, \code{pch} and \code{cex} can be specified in the arguments list to alter the plotting symbols. If \code{length(col)==2}, the color specification to define the observed and missing groups. \code{col[1]} is the color of the 'observed' data, \code{col[2]} is the color of the missing or imputed data. A convenient color choice is \code{col=mdc(1:2)}, a transparent blue color for the observed data, and a transparent red color for the imputed data. A good choice is \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the duration of the session by running \code{mice.theme()}. } \note{ The first two arguments (\code{x} and \code{data}) are reversed compared to the standard Trellis syntax implemented in \pkg{lattice}. This reversal was necessary in order to benefit from automatic method dispatch. In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas in \pkg{lattice} the argument \code{x} is always a formula. In \pkg{mice} the argument \code{data} is always a formula object, whereas in \pkg{lattice} the argument \code{data} is usually a data frame. All other arguments have identical interpretation. } \examples{ imp <- mice(boys, maxit=1) ### stripplot, all numerical variables \dontrun{stripplot(imp)} ### same, but with improved display \dontrun{stripplot(imp, col=c("grey",mdc(2)),pch=c(1,20))} ### distribution per imputation of height, weight and bmi ### labeled by their own missingness \dontrun{stripplot(imp, hgt+wgt+bmi~.imp, cex=c(2,4), pch=c(1,20),jitter=FALSE, layout=c(3,1))} ### same, but labeled with the missingness of wgt (just four cases) \dontrun{stripplot(imp, hgt+wgt+bmi~.imp, na=wgt, cex=c(2,4), pch=c(1,20),jitter=FALSE, layout=c(3,1))} ### distribution of age and height, labeled by missingness in height ### most height values are missing for those around ### the age of two years ### some additional missings occur in region WEST \dontrun{stripplot(imp, age + hgt ~ .imp | reg, hgt, col = c(grDevices::hcl(0, 0, 40, 0.2), mdc(2)), pch = c(1, 20))} ### heavily jitted relation between two categorical variables ### labeled by missingness of gen ### aggregated over all imputed data sets \dontrun{stripplot(imp, gen~phb, factor=2, cex=c(8,1), hor=TRUE)} ### circle fun stripplot(imp, gen~.imp, na = wgt, factor = 2, cex = c(8.6), hor = FALSE, outer = TRUE, scales = "free", pch = c(1,19)) } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{densityplot}}, \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the package, as well as \code{\link[lattice:stripplot]{stripplot}}, \code{\link[lattice:panel.stripplot]{panel.stripplot}}, \code{\link[lattice:print.trellis]{print.trellis}}, \code{\link[lattice:trellis.par.set]{trellis.par.set}} } \author{ Stef van Buuren } \keyword{hplot} mice/man/parlmice.Rd0000644000176200001440000000750713574715125014050 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/parlmice.R \name{parlmice} \alias{parlmice} \title{Wrapper function that runs MICE in parallel} \usage{ parlmice( data, m = 5, seed = NA, cluster.seed = NA, n.core = NULL, n.imp.core = NULL, cl.type = "PSOCK", ... ) } \arguments{ \item{data}{A data frame or matrix containing the incomplete data. Similar to the first argument of \code{\link{mice}}.} \item{m}{The number of desired imputated datasets. By default $m=5$ as with \code{mice}} \item{seed}{A scalar to be used as the seed value for the mice algorithm within each parallel stream. Please note that the imputations will be the same for all streams and, hence, this should be used if and only if \code{n.core = 1} and if it is desired to obtain the same output as under \code{mice}.} \item{cluster.seed}{A scalar to be used as the seed value. It is recommended to put the seed value here and not outside this function, as otherwise the parallel processes will be performed with separate, random seeds.} \item{n.core}{A scalar indicating the number of cores that should be used.} \item{n.imp.core}{A scalar indicating the number of imputations per core.} \item{cl.type}{The cluster type. Default value is \code{"PSOCK"}. Posix machines (linux, Mac) generally benefit from much faster cluster computation if \code{type} is set to \code{type = "FORK"}.} \item{...}{Named arguments that are passed down to function \code{\link{mice}} or \code{\link{makeCluster}}.} } \value{ A mids object as defined by \code{\link{mids-class}} } \description{ This is a wrapper function for \code{\link{mice}}, using multiple cores to execute \code{\link{mice}} in parallel. As a result, the imputation procedure can be sped up, which may be useful in general. } \details{ This function relies on package \code{\link{parallel}}, which is a base package for R versions 2.14.0 and later. We have chosen to use parallel function \code{parLapply} to allow the use of \code{parlmice} on Mac, Linux and Windows systems. For the same reason, we use the Parallel Socket Cluster (PSOCK) type by default. On systems other than Windows, it can be hugely beneficial to change the cluster type to \code{FORK}, as it generally results in improved memory handling. When memory issues arise on a Windows system, we advise to store the multiply imputed datasets, clean the memory by using \code{\link{rm}} and \code{\link{gc}} and make another run using the same settings. This wrapper function combines the output of \code{\link{parLapply}} with function \code{\link{ibind}} in \code{\link{mice}}. A \code{mids} object is returned and can be used for further analyses. Note that if a seed value is desired, the seed should be entered to this function with argument \code{seed}. Seed values outside the wrapper function (in an R-script or passed to \code{\link{mice}}) will not result to reproducible results. We refer to the manual of \code{\link{parallel}} for an explanation on this matter. } \examples{ # 150 imputations in dataset nhanes, performed by 3 cores \dontrun{ imp1 <- parlmice(data = nhanes, n.core = 3, n.imp.core = 50) # Making use of arguments in mice. imp2 <- parlmice(data = nhanes, method = "norm.nob", m = 100) imp2$method fit <- with(imp2, lm(bmi ~ hyp)) pool(fit) } } \references{ Schouten, R. and Vink, G. (2017). parlmice: faster, paraleller, micer. \url{https://gerkovink.github.io/parlMICE/Vignette_parlMICE.html} #'Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/parallel-computation.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{parallel}}, \code{\link{parLapply}}, \code{\link{makeCluster}}, \code{\link{mice}}, \code{\link{mids-class}} } \author{ Gerko Vink, 2018, based on an earlier version by Rianne Schouten and Gerko Vink, 2017. } mice/man/mice.impute.norm.boot.Rd0000644000176200001440000000424513620753345016401 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.boot.R \name{mice.impute.norm.boot} \alias{mice.impute.norm.boot} \alias{norm.boot} \title{Imputation by linear regression, bootstrap method} \usage{ mice.impute.norm.boot(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using linear regression with bootstrap } \details{ Draws a bootstrap sample from \code{x[ry,]} and \code{y[ry]}, calculates regression weights and imputes with normal residuals. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Gerko Vink, Stef van Buuren, 2018 } \concept{univariate imputation functions} \keyword{datagen} mice/man/D1.Rd0000644000176200001440000000320313621066000012465 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/D1.R \name{D1} \alias{D1} \title{Compare two nested models using D1-statistic} \usage{ D1(fit1, fit0 = NULL, df.com = NULL, ...) } \arguments{ \item{fit1}{An object of class \code{mira}, produced by \code{with()}.} \item{fit0}{An object of class \code{mira}, produced by \code{with()}. The model in \code{fit0} is a nested within \code{fit1}. The default null model \code{fit0 = NULL} compares \code{fit1} to the intercept-only model.} \item{df.com}{A single number or a numeric vector denoting the complete-data degrees of freedom for the hypothesis test. If not specified, it is set equal to \code{df.residual} of model \code{fit1}.} \item{\dots}{Not used.} } \description{ The D1-statistics is the multivariate Wald test. } \examples{ # Compare two linear models: imp <- mice(nhanes2, seed = 51009, print = FALSE) mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) D1(mi1, mi0) # Compare two logistic regression models imp <- mice(boys, maxit = 2, print = FALSE) fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) D1(fit1, fit0) } \references{ Li, K. H., T. E. Raghunathan, and D. B. Rubin. 1991. Large-Sample Significance Levels from Multiply Imputed Data Using Moment-Based Statistics and an F Reference Distribution. \emph{Journal of the American Statistical Association}, 86(416): 1065–73. \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:wald} } \seealso{ \code{\link[mitml]{testModels}} } mice/man/pool.scalar.Rd0000644000176200001440000000501513416661213014452 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.scalar.R \name{pool.scalar} \alias{pool.scalar} \title{Multiple imputation pooling: univariate version} \usage{ pool.scalar(Q, U, n = Inf, k = 1) } \arguments{ \item{Q}{A vector of univariate estimates of \code{m} repeated complete data analyses.} \item{U}{A vector containing the corresponding \code{m} variances of the univariate estimates.} \item{n}{A number providing the sample size. If nothing is specified, an infinite sample \code{n = Inf} is assumed.} \item{k}{A number indicating the number of parameters to be estimated. By default, \code{k = 1} is assumed.} } \value{ Returns a list with components. Component \code{m} is the number of imputations. Component \code{qhat} contains the \code{m} univariate estimates of repeated complete data analyses. Component \code{u} contains the corresponding \code{m} variances of the univariate estimates. Component \code{qbar} is the pooled univariate estimate, formula (3.1.2) Rubin (1987). Component \code{ubar} is the mean of the variances (i.e. the pooled within-imputation variance), formula (3.1.3) Rubin (1987). Component \code{b} is the between-imputation variance, formula (3.1.4) Rubin (1987). Component \code{t} is the total variance of the pooled estimated, formula (3.1.5) Rubin (1987). Component \code{r} is the relative increase in variance due to nonresponse, formula (3.1.7) Rubin (1987). Component \code{df} is the degrees of freedom for t reference distribution, formula (3.1.6) Rubin (1987) or method of Barnard-Rubin (1999) (if \code{method = "smallsample"}). Component \code{fmi} is the fraction missing information due to nonresponse, formula (3.1.10) Rubin (1987). } \description{ Pools univariate estimates of m repeated complete data analysis } \details{ The function averages the univariate estimates of the complete data model, computes the total variance over the repeated analyses, and computes the relative increase in variance due to nonresponse and the fraction of missing information. } \examples{ imp <- mice(nhanes) m <- imp$m Q <- rep(NA, m) U <- rep(NA, m) for (i in 1:m) { Q[i] <- mean(complete(imp, i)$bmi) U[i] <- var(complete(imp, i)$bmi) / nrow(nhanes) # (standard error of estimate)^2 } pool.scalar(Q, U, n = nrow(nhanes), k = 1) # Barnard-Rubin 1999 } \references{ Rubin, D.B. (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley and Sons. } \seealso{ \code{\link{pool}} } \author{ Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 } \keyword{htest} mice/man/name.blocks.Rd0000644000176200001440000000324313416657163014443 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/blocks.R \name{name.blocks} \alias{name.blocks} \title{Name imputation blocks} \usage{ name.blocks(blocks, prefix = "B") } \arguments{ \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} \item{prefix}{A character vector of length 1 with the prefix to be using for naming any unnamed blocks with two or more variables.} } \value{ A named list of character vectors with variables names. } \description{ This helper function names any unnamed elements in the \code{blocks} specification. This is a convenience function. } \details{ This function will name any unnamed list elements specified in the optional argument \code{blocks}. Unnamed blocks consisting of just one variable will be named after this variable. Unnamed blocks containing more than one variables will be named by the \code{prefix} argument, padded by an integer sequence stating at 1. } \examples{ blocks <- list(c("hyp", "chl"), AGE = "age", c("bmi", "hyp"), "edu") name.blocks(blocks) } \seealso{ \code{\link{mice}} } mice/man/quickpred.Rd0000644000176200001440000001044013574715125014231 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quickpred.R \name{quickpred} \alias{quickpred} \title{Quick selection of predictors from the data} \usage{ quickpred( data, mincor = 0.1, minpuc = 0, include = "", exclude = "", method = "pearson" ) } \arguments{ \item{data}{Matrix or data frame with incomplete data.} \item{mincor}{A scalar, numeric vector (of size \code{ncol(data))} or numeric matrix (square, of size \code{ncol(data)} specifying the minimum threshold(s) against which the absolute correlation in the data is compared.} \item{minpuc}{A scalar, vector (of size \code{ncol(data))} or matrix (square, of size \code{ncol(data)} specifying the minimum threshold(s) for the proportion of usable cases.} \item{include}{A string or a vector of strings containing one or more variable names from \code{names(data)}. Variables specified are always included as a predictor.} \item{exclude}{A string or a vector of strings containing one or more variable names from \code{names(data)}. Variables specified are always excluded as a predictor.} \item{method}{A string specifying the type of correlation. Use \code{'pearson'} (default), \code{'kendall'} or \code{'spearman'}. Can be abbreviated.} } \value{ A square binary matrix of size \code{ncol(data)}. } \description{ Selects predictors according to simple statistics } \details{ This function creates a predictor matrix using the variable selection procedure described in Van Buuren et al.~(1999, p.~687--688). The function is designed to aid in setting up a good imputation model for data with many variables. Basic workings: The procedure calculates for each variable pair (i.e. target-predictor pair) two correlations using all available cases per pair. The first correlation uses the values of the target and the predictor directly. The second correlation uses the (binary) response indicator of the target and the values of the predictor. If the largest (in absolute value) of these correlations exceeds \code{mincor}, the predictor will be added to the imputation set. The default value for \code{mincor} is 0.1. In addition, the procedure eliminates predictors whose proportion of usable cases fails to meet the minimum specified by \code{minpuc}. The default value is 0, so predictors are retained even if they have no usable case. Finally, the procedure includes any predictors named in the \code{include} argument (which is useful for background variables like age and sex) and eliminates any predictor named in the \code{exclude} argument. If a variable is listed in both \code{include} and \code{exclude} arguments, the \code{include} argument takes precedence. Advanced topic: \code{mincor} and \code{minpuc} are typically specified as scalars, but vectors and squares matrices of appropriate size will also work. Each element of the vector corresponds to a row of the predictor matrix, so the procedure can effectively differentiate between different target variables. Setting a high values for can be useful for auxiliary, less important, variables. The set of predictor for those variables can remain relatively small. Using a square matrix extends the idea to the columns, so that one can also apply cellwise thresholds. } \examples{ # default: include all predictors with absolute correlation over 0.1 quickpred(nhanes) # all predictors with absolute correlation over 0.4 quickpred(nhanes, mincor=0.4) # include age and bmi, exclude chl quickpred(nhanes, mincor=0.4, inc=c('age','bmi'), exc='chl') # only include predictors with at least 30\% usable cases quickpred(nhanes, minpuc=0.3) # use low threshold for bmi, and high thresholds for hyp and chl pred <- quickpred(nhanes, mincor=c(0,0.1,0.5,0.5)) pred # use it directly from mice imp <- mice(nhanes, pred=quickpred(nhanes, minpuc=0.25, include='age')) } \references{ van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. van Buuren, S. and Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{mice}}, \code{\link[=mids-class]{mids}} } \author{ Stef van Buuren, Aug 2009 } \keyword{misc} mice/man/cci.Rd0000644000176200001440000000160713416657163013007 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cci.R \name{cci} \alias{cci} \title{Complete case indicator} \usage{ cci(x) } \arguments{ \item{x}{An \code{R} object. Currently supported are methods for the following classes: \code{mids}.} } \value{ Logical vector indicating the complete cases. } \description{ The complete case indicator is useful for extracting the subset of complete cases. The function \code{cci(x)} calls \code{complete.cases(x)}. The companion function \code{ici()} selects the incomplete cases. } \examples{ cci(nhanes) # indicator for 13 complete cases cci(mice(nhanes, maxit = 0)) f <- cci(nhanes[,c("bmi","hyp")]) # complete data for bmi and hyp nhanes[f,] # obtain all data from those with complete bmi and hyp } \seealso{ \code{\link{complete.cases}}, \code{\link{ici}}, \code{\link{cc}} } \author{ Stef van Buuren, 2017. } \keyword{univar} mice/man/ampute.default.weights.Rd0000644000176200001440000000233113416657163016633 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.weights} \alias{ampute.default.weights} \title{Default \code{weights} in \code{ampute}} \usage{ ampute.default.weights(patterns, mech) } \arguments{ \item{patterns}{A matrix of size #patterns by #variables where \code{0} indicates a variable should have missing values and \code{1} indicates a variable should remain complete. Could be the result of \code{\link{ampute.default.patterns}}.} \item{mech}{A string specifying the missingness mechanism.} } \value{ A matrix of size #patterns by #variables containing the weights that will be used to calculate the weighted sum scores. Equal weights are given to all variables. When mechanism is MAR, variables that will be amputed will be weighted with \code{0}. If it is MNAR, variables that will be observed will be weighted with \code{0}. If mechanism is MCAR, the weights matrix will not be used. A default MAR matrix will be returned. } \description{ Defines the default weights matrix for the multivariate amputation function \code{ampute}. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.patterns}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/leiden85.Rd0000644000176200001440000000323113416661213013650 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/leiden85.R \docType{data} \name{leiden85} \alias{leiden85} \title{Leiden 85+ study} \format{\code{leiden85} is a data frame with 956 rows and 336 columns.} \source{ Lagaay, A. M., van der Meij, J. C., Hijmans, W. (1992). Validation of medical history taking as part of a population based survey in subjects aged 85 and over. \emph{Brit. Med. J.}, \emph{304}(6834), 1091-1092. Izaks, G. J., van Houwelingen, H. C., Schreuder, G. M., Ligthart, G. J. (1997). The association between human leucocyte antigens (HLA) and mortality in community residents aged 85 and older. \emph{Journal of the American Geriatrics Society}, \emph{45}(1), 56-60. Boshuizen, H. C., Izaks, G. J., van Buuren, S., Ligthart, G. J. (1998). Blood pressure and mortality in elderly people aged 85 and older: Community based study. \emph{Brit. Med. J.}, \emph{316}(7147), 1780-1784. Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-toomany.html#sec:leiden85cohort}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Subset of data from the Leiden 85+ study } \details{ The data set concerns of subset of 956 members of a very old (85+) cohort in Leiden. Multiple imputation of this data set has been described in Boshuizen et al (1998), Van Buuren et al (1999) and Van Buuren (2012), chapter 7. The data set is not available as part of \code{mice}. } \keyword{datasets} mice/man/is.mitml.result.Rd0000644000176200001440000000057513416657163015325 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mitml.result} \alias{is.mitml.result} \title{Check for \code{mitml.result} object} \usage{ is.mitml.result(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mitml.result} } \description{ Check for \code{mitml.result} object } mice/man/ampute.default.type.Rd0000644000176200001440000000152013416657163016141 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.type} \alias{ampute.default.type} \title{Default \code{type} in \code{ampute()}} \usage{ ampute.default.type(patterns) } \arguments{ \item{patterns}{A matrix of size #patterns by #variables where 0 indicates a variable should have missing values and 1 indicates a variable should remain complete. Could be the result of \code{\link{ampute.default.patterns}}.} } \value{ A string vector of length #patterns containing the missingness types. Each pattern will be amputed with a "RIGHT" missingness. } \description{ Defines the default type vector for the multivariate amputation function \code{ampute}. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.patterns}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/tbc.Rd0000644000176200001440000000460313416661213013007 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tbc.R \docType{data} \name{tbc} \alias{tbc} \alias{tbc.target} \alias{terneuzen} \title{Terneuzen birth cohort} \format{\code{tbs} is a data frame with 3951 rows and 11 columns: \describe{ \item{id}{Person number} \item{occ}{Occasion number} \item{nocc}{Number of occasions} \item{first}{Is this the first record for this person? (TRUE/FALSE)} \item{typ}{Type of data (all observed)} \item{age}{Age (years)} \item{sex}{Sex 1=M, 2=F} \item{hgt.z}{Height Z-score} \item{wgt.z}{Weight Z-score} \item{bmi.z}{BMI Z-score} \item{ao}{Adult overweight (0=no, 1=yes)} } \code{tbc.target} is a data frame with 2612 rows and 3 columns: \describe{ \item{id}{Person number} \item{ao}{Adult overweight (0=no, 1=yes)} \item{bmi.z.jv}{BMI Z-score as young adult (18-29 years)} }} \source{ De Kroon, M. L. A., Renders, C. M., Kuipers, E. C., van Wouwe, J. P., van Buuren, S., de Jonge, G. A., Hirasing, R. A. (2008). Identifying metabolic syndrome without blood tests in young adults - The Terneuzen birth cohort. \emph{European Journal of Public Health}, \emph{18}(6), 656-660. De Kroon, M. L. A., Renders, C. M., Van Wouwe, J. P., Van Buuren, S., Hirasing, R. A. (2010). The Terneuzen birth cohort: BMI changes between 2 and 6 years correlate strongest with adult overweight. \emph{PLoS ONE}, \emph{5}(2), e9155. De Kroon, M. L. A. (2011). \emph{The Terneuzen Birth Cohort. Detection and Prevention of Overweight and Cardiometabolic Risk from Infancy Onward.} Dissertation, Vrije Universiteit, Amsterdam. \url{http://dare.ubvu.vu.nl/handle/1871/23806} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-rastering.html#terneuzen-birth-cohort}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Data of subset of the Terneuzen Birth Cohort data on child growth. } \details{ This \code{tbc} data set is a random subset of persons from a much larger collection of data from the Terneuzen Birth Cohort. The total cohort comprises of 2604 unique persons, whereas the subset in \code{tbc} covers 306 persons. The \code{tbc.target} is an auxiliary data set containing two outcomes at adult age. For more details, see De Kroon et al (2008, 2010, 2011). The imputation methodology is explained in Chapter 9 of Van Buuren (2012). } \examples{ data <- tbc md.pattern(data) } \keyword{datasets} mice/man/toenail.Rd0000644000176200001440000000413213617306761013676 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/toenail.R \docType{data} \name{toenail} \alias{toenail} \title{Toenail data} \format{A data frame with 1908 observations on the following 5 variables: \describe{ \item{\code{ID}}{a numeric vector giving the ID of patient} \item{\code{outcome}}{a numeric vector giving the response (0=none or mild seperation, 1=moderate or severe)} \item{\code{treatment}}{a numeric vector giving the treatment group} \item{\code{month}}{a numeric vector giving the time of the visit (not exactly monthly intervals hence not round numbers)} \item{\code{visit}}{a numeric vector giving the number of the visit} }} \source{ De Backer, M., De Vroey, C., Lesaffre, E., Scheys, I., and De Keyser, P. (1998). Twelve weeks of continuous oral therapy for toenail onychomycosis caused by dermatophytes: A double-blind comparative trial of terbinafine 250 mg/day versus itraconazole 200 mg/day. Journal of the American Academy of Dermatology, 38, 57-63. } \description{ The toenail data come from a Multicenter study comparing two oral treatments for toenail infection. Patients were evaluated for the degree of separation of the nail. Patients were randomized into two treatments and were followed over seven visits - four in the first year and yearly thereafter. The patients have not been treated prior to the first visit so this should be regarded as the baseline. } \details{ This dataset was copied from the \code{DPpackage}, which is scheduled to be discontinued from CRAN in August 2019. } \references{ Lesaffre, E. and Spiessens, B. (2001). On the effect of the number of quadrature points in a logistic random-effects model: An example. Journal of the Royal Statistical Society, Series C, 50, 325-335. G. Fitzmaurice, N. Laird and J. Ware (2004) Applied Longitudinal Analysis, Wiley and Sons, New York, USA. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-catoutcome.html#example}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{toenail2}} } \keyword{datasets} mice/man/ici.Rd0000644000176200001440000000133713416657163013015 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cci.R \name{ici} \alias{ici} \alias{ici,data.frame-method} \alias{ici,matrix-method} \alias{ici,mids-method} \title{Incomplete case indicator} \usage{ ici(x) } \arguments{ \item{x}{An \code{R} object. Currently supported are methods for the following classes: \code{mids}.} } \value{ Logical vector indicating the incomplete cases, } \description{ This array is useful for extracting the subset of incomplete cases. The companion function \code{cci()} selects the complete cases. } \examples{ ici(nhanes) # indicator for 12 rows with incomplete cases } \seealso{ \code{\link{cci}}, \code{\link{ic}} } \author{ Stef van Buuren, 2017. } \keyword{univar} mice/man/mice.impute.rf.Rd0000644000176200001440000000700513620753345015070 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.rf.R \name{mice.impute.rf} \alias{mice.impute.rf} \title{Imputation by random forests} \usage{ mice.impute.rf(y, ry, x, wy = NULL, ntree = 10, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{ntree}{The number of trees to grow. The default is 10.} \item{\dots}{Other named arguments passed down to \code{mice:::install.on.demand()}, \code{randomForest::randomForest()} and \code{randomForest:::randomForest.default()}.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using random forests. } \details{ Imputation of \code{y} by random forests. The method calls \code{randomForrest()} which implements Breiman's random forest algorithm (based on Breiman and Cutler's original Fortran code) for classification and regression. See Appendix A.1 of Doove et al. (2014) for the definition of the algorithm used. } \note{ An alternative implementation was independently developed by Shah et al (2014). This were available as functions \code{CALIBERrfimpute::mice.impute.rfcat} and \code{CALIBERrfimpute::mice.impute.rfcont} (now archived). Simulations by Shah (Feb 13, 2014) suggested that the quality of the imputation for 10 and 100 trees was identical, so mice 2.22 changed the default number of trees from \code{ntree = 100} to \code{ntree = 10}. } \examples{ library("lattice") imp <- mice(nhanes2, meth = "rf", ntree = 3) plot(imp) } \references{ Doove, L.L., van Buuren, S., Dusseldorp, E. (2014), Recursive partitioning for missing data imputation in the presence of interaction Effects. Computational Statistics \& Data Analysis, 72, 92-104. Shah, A.D., Bartlett, J.W., Carpenter, J., Nicholas, O., Hemingway, H. (2014), Comparison of random forest and parametric imputation models for imputing missing data using MICE: A CALIBER study. American Journal of Epidemiology, doi: 10.1093/aje/kwt312. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-cart.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice}}, \code{\link{mice.impute.cart}}, \code{\link[randomForest]{randomForest}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.ri}()} } \author{ Lisa Doove, Stef van Buuren, Elise Dusseldorp, 2012 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.logreg.boot.Rd0000644000176200001440000000466513620753345016713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.logreg.R \name{mice.impute.logreg.boot} \alias{mice.impute.logreg.boot} \title{Imputation by logistic regression using the bootstrap} \usage{ mice.impute.logreg.boot(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using logistic regression by a bootstrapped logistic regression model. The bootstrap method draws a simple bootstrap sample with replacement from the observed data \code{y[ry]} and \code{x[ry, ]}. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-categorical.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice}}, \code{\link{glm}}, \code{\link{glm.fit}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000, 2011 } \concept{univariate imputation functions} \keyword{datagen} mice/man/D3.Rd0000644000176200001440000000276713621065624012520 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/D3.R \name{D3} \alias{D3} \title{Compare two nested models using D3-statistic} \usage{ D3(fit1, fit0 = NULL, df.com = Inf, ...) } \arguments{ \item{fit1}{An object of class \code{mira}, produced by \code{with()}.} \item{fit0}{An object of class \code{mira}, produced by \code{with()}. The model in \code{fit0} is a nested within \code{fit1}. The default null model \code{fit0 = NULL} compares \code{fit1} to the intercept-only model.} \item{df.com}{A single number or a numeric vector denoting the complete-data degrees of freedom for the hypothesis test. If not specified, it is set equal to \code{df.residual} of model \code{fit1}.} \item{...}{Not used.} } \description{ The D3-statistics is a likelihood-ratio test statistic. } \examples{ # Compare two linear models: imp <- mice(nhanes2, seed = 51009, print = FALSE) mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) D3(mi1, mi0) # Compare two logistic regression models imp <- mice(boys, maxit = 2, print = FALSE) fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) D3(fit1, fit0) } \references{ Meng, X. L., and D. B. Rubin. 1992. Performing Likelihood Ratio Tests with Multiply-Imputed Data Sets. \emph{Biometrika}, 79 (1): 103–11. \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:likelihoodratio} } mice/man/ibind.Rd0000644000176200001440000000223413416661213013322 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ibind.R \name{ibind} \alias{ibind} \title{Enlarge number of imputations by combining \code{mids} objects} \usage{ ibind(x, y) } \arguments{ \item{x}{A \code{mids} object.} \item{y}{A \code{mids} object.} } \value{ An S3 object of class \code{mids} } \description{ This function combines two \code{mids} objects \code{x} and \code{y} into a single \code{mids} object, with the objective of increasing the number of imputed data sets. If the number of imputations in \code{x} and \code{y} are \code{m(x)} and \code{m(y)}, then the combined object will have \code{m(x)+m(y)} imputations. } \details{ The two \code{mids} objects are required to have the same underlying multiple imputation model and should be fitted on the same data. } \examples{ data(nhanes) imp1 <- mice(nhanes, m = 1, maxit = 2, print = FALSE) imp1$m imp2 <- mice(nhanes, m = 3, maxit = 3, print = FALSE) imp2$m imp12 <- ibind(imp1, imp2) imp12$m plot(imp12) } \seealso{ \code{\link[=mids-class]{mids}}, \code{\link{rbind.mids}}, \code{\link{cbind.mids}} } \author{ Karin Groothuis-Oudshoorn, Stef van Buuren } \keyword{manip} mice/man/mice.impute.mnar.Rd0000644000176200001440000002016313620753345015416 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.mnar.logreg.R, % R/mice.impute.mnar.norm.R \name{mice.impute.mnar.logreg} \alias{mice.impute.mnar.logreg} \alias{mice.impute.mnar.norm} \alias{mnar.norm} \alias{mnar.logreg} \title{Imputation under MNAR mechanism by NARFCS} \usage{ mice.impute.mnar.logreg(y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...) mice.impute.mnar.norm(y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{ums}{A string containing the specification of the unidentifiable part of the imputation model (the *unidentifiable model specification”), that is, the desired \eqn{\delta}-adjustment (offset) as a function of other variables and values for the corresponding deltas (sensitivity parameters). See details.} \item{umx}{An auxiliary data matrix containing variables that do not appear in the identifiable part of the imputation procedure but that have been specified via \code{ums} as being predictors in the unidentifiable part of the imputation model. See details.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate data under a user-specified MNAR mechanism by linear or logistic regression and NARFCS. Sensitivity analysis under different model specifications may shed light on the impact of different MNAR assumptions on the conclusions. } \details{ This function imputes data that are thought to be Missing Not at Random (MNAR) by the NARFCS method. The NARFCS procedure (Tompsett et al, 2018) generalises the so-called \eqn{\delta}-adjustment sensitivity analysis method of Van Buuren, Boshuizen & Knook (1999) to the case with multiple incomplete variables within the FCS framework. In practical terms, the NARFCS procedure shifts the imputations drawn at each iteration of \code{mice} by a user-specified quantity that can vary across subjects, to reflect systematic departures of the missing data from the data distribution imputed under MAR. Specification of the NARFCS model is done by the \code{blots} argument of \code{mice()}. The \code{blots} parameter is a named list. For each variable to be imputed by \code{mice.impute.mnar.norm()} or \code{mice.impute.mnar.logreg()} the corresponding element in \code{blots} is a list with at least one argument \code{ums} and, optionally, a second argument \code{umx}. For example, the high-level call might like something like \code{mice(nhanes[, c(2, 4)], method = c("pmm", "mnar.norm"), blots = list(chl = list(ums = "-3+2*bmi")))}. The \code{ums} parameter is required, and might look like this: \code{"-4+1*Y"}. The \code{ums} specifcation must have the following characteristics: \enumerate{ \item{A single term corresponding to the intercept (constant) term, not multiplied by any variable name, must be included in the expression;} \item{Each term in the expression (corresponding to the intercept or a predictor variable) must be separated by either a \code{"+"} or \code{"-"} sign, depending on the sign of the sensitivity parameter;} \item{Within each non-intercept term, the sensitivity parameter value comes first and the predictor variable comes second, and these must be separated by a \code{"*"} sign;} \item{For categorical predictors, for example a variable \code{Z} with K + 1 categories \code{("Cat0","Cat1", ...,"CatK")}, K category-specific terms are needed, and those not in \code{umx} (see below) must be specified by concatenating the variable name with the name of the category (e.g. \code{ZCat1}) as this is how they are named in the design matrix (argument \code{x}) passed to the univariate imputation function. An example is \code{"2+1*ZCat1-3*ZCat2"}.} } If given, the \code{umx} specification must have the following characteristics: \enumerate{ \item{It contains only complete variables, with no missing values;} \item{It is a numeric matrix. In particular, categorical variables must be represented as dummy indicators with names corresponding to what is used in \code{ums} to refer to the category-specific terms (see above);} \item{It has the same number of rows as the \code{data} argument passed on to the main \code{mice} function;} \item{It does not contain variables that were already predictors in the identifiable part of the model for the variable under imputation.} } Limitation: The present implementation can only condition on variables that appear in the identifiable part of the imputation model (\code{x}) or in complete auxiliary variables passed on via the \code{umx} argument. It is not possible to specify models where the offset depends on incomplete auxiliary variables. For an MNAR alternative see also \code{\link{mice.impute.ri}}. } \examples{ # 1: Example with no auxiliary data: only pass unidentifiable model specification (ums) # Specify argument to pass on to mnar imputation functions via "blots" argument mnar.blot <- list(X = list(ums = "-4"), Y = list(ums = "2+1*ZCat1-3*ZCat2")) # Run NARFCS by using mnar imputation methods and passing argument via blots impNARFCS <- mice(mnar_demo_data, method = c("mnar.logreg", "mnar.norm", ""), blots = mnar.blot, seed = 234235, print = FALSE) # Obtain MI results: Note they coincide with those from old version at # https://github.com/moreno-betancur/NARFCS pool(with(impNARFCS,lm(Y ~ X + Z)))$pooled$estimate # 2: Example passing also auxiliary data to MNAR procedure (umx) # Assumptions: # - Auxiliary data are complete, no missing values # - Auxiliary data are a numeric matrix # - Auxiliary data have same number of rows as x # - Auxiliary data have no overlapping variable names with x # Specify argument to pass on to mnar imputation functions via "blots" argument aux <- matrix(0:1, nrow = nrow(mnar_demo_data)) dimnames(aux) <- list(NULL, "even") mnar.blot <- list(X = list(ums = "-4"), Y = list(ums = "2+1*ZCat1-3*ZCat2+0.5*even", umx = aux)) # Run NARFCS by using mnar imputation methods and passing argument via blots impNARFCS <- mice(mnar_demo_data, method = c("mnar.logreg", "mnar.norm", ""), blots = mnar.blot, seed = 234235, print = FALSE) # Obtain MI results: As expected they differ (slightly) from those # from old version at https://github.com/moreno-betancur/NARFCS pool(with(impNARFCS,lm(Y ~ X + Z)))$pooled$estimate } \references{ Tompsett, D. M., Leacy, F., Moreno-Betancur, M., Heron, J., & White, I. R. (2018). On the use of the not-at-random fully conditional specification (NARFCS) procedure in practice. \emph{Statistics in Medicine}, \bold{37}(15), 2338-2353. \url{https://doi.org/10.1002/sim.7643}. Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Margarita Moreno-Betancur, Stef van Buuren, Ian R. White, 2020. } \concept{univariate imputation functions} \keyword{datagen} mice/man/ampute.default.freq.Rd0000644000176200001440000000160013416657163016114 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.freq} \alias{ampute.default.freq} \title{Default \code{freq} in \code{ampute}} \usage{ ampute.default.freq(patterns) } \arguments{ \item{patterns}{A matrix of size #patterns by #variables where \code{0} indicates a variable should have missing values and \code{1} indicates a variable should remain complete. Could be the result of \code{\link{ampute.default.patterns}}.} } \value{ A vector of length #patterns containing the relative frequencies with which the patterns should occur. An equal probability is given to each pattern. } \description{ Defines the default relative frequency vector for the multivariate amputation function \code{ampute}. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.patterns}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/nhanes2.Rd0000644000176200001440000000175213416657163013610 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nhanes2.R \docType{data} \name{nhanes2} \alias{nhanes2} \title{NHANES example - mixed numerical and discrete variables} \format{A data frame with 25 observations on the following 4 variables. \describe{ \item{age}{Age group (1=20-39, 2=40-59, 3=60+)} \item{bmi}{Body mass index (kg/m**2)} \item{hyp}{Hypertensive (1=no,2=yes)} \item{chl}{Total serum cholesterol (mg/dL)} }} \source{ Schafer, J.L. (1997). \emph{Analysis of Incomplete Multivariate Data.} London: Chapman & Hall. Table 6.14. } \description{ A small data set with non-monotone missing values. } \details{ A small data set with missing data and mixed numerical and discrete variables. The data set \code{nhanes} is the same data set, but with all data treated as numerical. } \examples{ imp <- mice(nhanes2) # create 5 imputed data sets complete(imp) # print the first imputed data set } \seealso{ \code{\link{nhanes}} } \keyword{datasets} mice/man/supports.transparent.Rd0000644000176200001440000000134513416657163016507 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/supports.transparent.R \name{supports.transparent} \alias{supports.transparent} \alias{transparent} \title{Supports semi-transparent foreground colors?} \usage{ supports.transparent() } \value{ \code{TRUE} or \code{FALSE} } \description{ This function is used by \code{mdc()} to find out whether the current device supports semi-transparent foreground colors. } \details{ The function calls the function \code{dev.capabilities()} from the package \code{grDevices}. The function return \code{FALSE} if the status of the current device is unknown. } \examples{ supports.transparent() } \seealso{ \code{\link{mdc}} \code{\link{dev.capabilities}} } \keyword{hplot} mice/man/summary.Rd0000644000176200001440000000242013574715125013736 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summary.R \name{summary.mira} \alias{summary.mira} \alias{summary.mids} \alias{summary.mads} \alias{summary.mice.anova} \title{Summary of a \code{mira} object} \usage{ \method{summary}{mira}(object, type = c("tidy", "glance", "summary"), ...) \method{summary}{mids}(object, ...) \method{summary}{mads}(object, ...) \method{summary}{mice.anova}(object, ...) } \arguments{ \item{object}{A \code{mira} object} \item{type}{A length-1 character vector indicating the type of summary. There are three choices: \code{type = "tidy"} return the parameters estimates of each analyses as a data frame. \code{type = "glance"} return the fit statistics of each analysis as a data frame. \code{type = "summary"} returns a list of length \code{m} with the analysis results. The default is \code{"tidy"}.} \item{...}{Other parameters passed down to \code{print()} and \code{summary()}} } \value{ \code{NULL} \code{NULL} \code{NULL} \code{NULL} } \description{ Summary of a \code{mira} object Summary of a \code{mids} object Summary of a \code{mads} object Print a \code{mice.anova} object } \seealso{ \code{\link[=mira-class]{mira}} \code{\link[=mids-class]{mids}} \code{\link[=mads-class]{mads}} \code{\link{mipo}} } mice/man/pattern.Rd0000644000176200001440000000332413416661213013713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pattern1.R \docType{data} \name{pattern} \alias{pattern} \alias{pattern1} \alias{pattern2} \alias{pattern3} \alias{pattern4} \title{Datasets with various missing data patterns} \format{\describe{ \item{list("pattern1")}{Data with a univariate missing data pattern} \item{list("pattern2")}{Data with a monotone missing data pattern} \item{list("pattern3")}{Data with a file matching missing data pattern} \item{list("pattern4")}{Data with a general missing data pattern} } Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL.} \description{ Four simple datasets with various missing data patterns } \details{ Van Buuren (2012) uses these four artificial datasets to illustrate various missing data patterns. } \examples{ require(lattice) require(MASS) pattern4 data <- rbind(pattern1, pattern2, pattern3, pattern4) mdpat <- cbind(expand.grid(rec = 8:1, pat = 1:4, var = 1:3), r=as.numeric(as.vector(is.na(data)))) types <- c("Univariate","Monotone","File matching","General") tp41 <- levelplot(r~var+rec|as.factor(pat), data=mdpat, as.table=TRUE, aspect="iso", shrink=c(0.9), col.regions = mdc(1:2), colorkey=FALSE, scales=list(draw=FALSE), xlab="", ylab="", between = list(x=1,y=0), strip = strip.custom(bg = "grey95", style = 1, factor.levels = types)) print(tp41) md.pattern(pattern4) p <- md.pairs(pattern4) p ### proportion of usable cases p$mr/(p$mr+p$mm) ### outbound statistics p$rm/(p$rm+p$rr) fluxplot(pattern2) } \keyword{datasets} mice/man/cbind.Rd0000644000176200001440000000271713574715125013331 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/generics.R \name{cbind} \alias{cbind} \alias{rbind} \title{Combine R Objects by Rows and Columns} \usage{ cbind(...) rbind(...) } \arguments{ \item{...}{(generalized) vectors or matrices. These can be given as named arguments. Other \R objects may be coerced as appropriate, or S4 methods may be used: see sections \sQuote{Details} and \sQuote{Value}. (For the \code{"data.frame"} method of \code{cbind} these can be further arguments to \code{\link[base]{data.frame}} such as \code{stringsAsFactors}.)} } \description{ Functions \code{cbind()} and \code{rbind()} are defined in the \code{mice} package in order to enable dispatch to \code{cbind.mids()} and \code{rbind.mids()} when one of the arguments is a \code{data.frame}. } \details{ The standard \code{base::cbind()} and \code{base::rbind()} always dispatch to \code{base::cbind.data.frame()} or \code{base::rbind.data.frame()} if one of the arguments is a \code{data.frame}. The versions defined in the \code{mice} package intercept the user command and test whether the first argument has class \code{"mids"}. If so, function calls \code{cbind.mids()}, respectively \code{rbind.mids()}. In all other cases, the call is forwarded to standard functions in the \code{base} package. } \seealso{ \code{\link[base]{cbind}}, \code{\link[base]{rbind}}, \code{\link{cbind.mids}}, \code{\link{rbind.mids}} } \keyword{internal} mice/man/cbind.mids.Rd0000644000176200001440000000727713416661213014263 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cbind.R \name{cbind.mids} \alias{cbind.mids} \title{Combine \code{mids} objects by columns} \usage{ cbind.mids(x, y = NULL, ...) } \arguments{ \item{x}{A \code{mids} object.} \item{y}{A \code{mids} object, or a \code{data.frame}, \code{matrix}, \code{factor} or \code{vector}.} \item{\dots}{Additional \code{data.frame}, \code{matrix}, \code{vector} or \code{factor}. These can be given as named arguments.} } \value{ An S3 object of class \code{mids} } \description{ This function combines two \code{mids} objects columnwise into a single object of class \code{mids}, or combines a single \code{mids} object with a \code{vector}, \code{matrix}, \code{factor} or \code{data.frame} columnwise into a \code{mids} object. } \details{ \emph{Pre-requisites:} If \code{y} is a \code{mids}-object, the rows of \code{x$data} and \code{y$data} should match, as well as the number of imputations (\code{m}). Other \code{y} are transformed into a \code{data.frame} whose rows should match with \code{x$data}. The function renames any duplicated variable or block names by appending \code{".1"}, \code{".2"} to duplicated names. } \note{ The function constructs the elements of the new \code{mids} object as follows: \tabular{ll}{ \code{data} \tab Columnwise combination of the data in \code{x} and \code{y}\cr \code{imp} \tab Combines the imputed values from \code{x} and \code{y}\cr \code{m} \tab Taken from \code{x$m}\cr \code{where} \tab Columnwise combination of \code{x$where} and \code{y$where}\cr \code{blocks} \tab Combines \code{x$blocks} and \code{y$blocks}\cr \code{call} \tab Vector, \code{call[1]} creates \code{x}, \code{call[2]} is call to \code{cbind.mids}\cr \code{nmis} \tab Equals \code{c(x$nmis, y$nmis)}\cr \code{method} \tab Combines \code{x$method} and \code{y$method}\cr \code{predictorMatrix} \tab Combination with zeroes on the off-diagonal blocks\cr \code{visitSequence} \tab Combined as \code{c(x$visitSequence, y$visitSequence)}\cr \code{formulas} \tab Combined as \code{c(x$formulas, y$formulas)}\cr \code{post} \tab Combined as \code{c(x$post, y$post)}\cr \code{blots} \tab Combined as \code{c(x$blots, y$blots)}\cr \code{seed} \tab Taken from \code{x$seed}\cr \code{iteration} \tab Taken from \code{x$iteration}\cr \code{lastSeedValue} \tab Taken from \code{x$lastSeedValue}\cr \code{chainMean} \tab Combined from \code{x$chainMean} and \code{y$chainMean}\cr \code{chainVar} \tab Combined from \code{x$chainVar} and \code{y$chainVar}\cr \code{loggedEvents} \tab Taken from \code{x$loggedEvents}\cr \code{version} \tab Current package version\cr \code{date} \tab Current date\cr } } \examples{ # impute four variables at once (default) imp <- mice(nhanes, m = 1, maxit = 1, print = FALSE) imp$predictorMatrix # impute two by two data1 <- nhanes[, c("age", "bmi")] data2 <- nhanes[, c("hyp", "chl")] imp1 <- mice(data1, m = 2, maxit = 1, print = FALSE) imp2 <- mice(data2, m = 2, maxit = 1, print = FALSE) # Append two solutions imp12 <- cbind(imp1, imp2) # This is a different imputation model imp12$predictorMatrix # Append the other way around imp21 <- cbind(imp2, imp1) imp21$predictorMatrix # Append 'forgotten' variable chl data3 <- nhanes[, 1:3] imp3 <- mice(data3, maxit = 1,m = 2, print = FALSE) imp4 <- cbind(imp3, chl = nhanes$chl) # Of course, chl was not imputed head(complete(imp4)) # Combine mids object with data frame imp5 <- cbind(imp3, nhanes2) head(complete(imp5)) } \seealso{ \code{\link{cbind}}, \code{\link{rbind.mids}}, \code{\link{ibind}}, \code{\link[=mids-class]{mids}} } \author{ Karin Groothuis-Oudshoorn, Stef van Buuren } \keyword{manip} mice/man/make.formulas.Rd0000644000176200001440000000226313416657163015014 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{make.formulas} \alias{make.formulas} \title{Creates a \code{formulas} argument} \usage{ make.formulas(data, blocks = make.blocks(data), predictorMatrix = NULL) } \arguments{ \item{data}{A \code{data.frame} with the source data} \item{blocks}{An optional specification for blocks of variables in the rows. The default assigns each variable in its own block.} \item{predictorMatrix}{A \code{predictorMatrix} specified by the user.} } \value{ A list of formula's. } \description{ This helper function creates a valid \code{formulas} object. The \code{formulas} object is an argument to the \code{mice} function. It is a list of formula's that specifies the target variables and the predictors by means of the standard \code{~} operator. } \examples{ f1 <- make.formulas(nhanes) f1 f2 <- make.formulas(nhanes, blocks = make.blocks(nhanes, "collect")) f2 # for editing, it may be easier to work with the character vector c1 <- as.character(f1) c1 # fold it back into a formula list f3 <- name.formulas(lapply(c1, as.formula)) f3 } \seealso{ \code{\link{make.blocks}}, \code{\link{make.predictorMatrix}} } mice/man/extend.formula.Rd0000644000176200001440000000150213574715125015174 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{extend.formula} \alias{extend.formula} \title{Extends a formula with predictors} \usage{ extend.formula( formula = ~0, predictors = NULL, auxiliary = TRUE, include.intercept = FALSE, ... ) } \arguments{ \item{formula}{A formula. If it is not a formula, the formula is internally reset to \code{~0}.} \item{predictors}{A character vector of variable names.} \item{auxiliary}{A logical that indicates whether the variables listed in \code{predictors} should be added to the formula as main effects. The default is \code{TRUE}.} \item{include.intercept}{A logical that indicated whether the intercept should be included in the result.} } \value{ A formula } \description{ Extends a formula with predictors } \keyword{internal} mice/man/name.formulas.Rd0000644000176200001440000000466413416657163015026 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{name.formulas} \alias{name.formulas} \title{Name formula list elements} \usage{ name.formulas(formulas, prefix = "F") } \arguments{ \item{formulas}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names. The \code{formulas} argument is an alternative to the \code{predictorMatrix} argument that allows for more flexibility in specifying imputation models, e.g., for specifying interaction terms.} \item{prefix}{A character vector of length 1 with the prefix to be using for naming any unnamed blocks with two or more variables.} } \value{ Named list of formulas } \description{ This helper function names any unnamed elements in the \code{formula} list. This is a convenience function. } \details{ This function will name any unnamed list elements specified in the optional argument \code{formula}. Unnamed formula's consisting with just one response variable will be named after this variable. Unnamed formula's containing more than one variable will be named by the \code{prefix} argument, padded by an integer sequence stating at 1. } \examples{ # fully conditionally specified main effects model form1 <- list(bmi ~ age + chl + hyp, hyp ~ age + bmi + chl, chl ~ age + bmi + hyp) form1 <- name.formulas(form1) imp1 <- mice(nhanes, formulas = form1, print = FALSE, m = 1, seed = 12199) # same model using dot notation form2 <- list(bmi ~ ., hyp ~ ., chl ~ .) form2 <- name.formulas(form2) imp2 <- mice(nhanes, formulas = form2, print = FALSE, m = 1, seed = 12199) identical(complete(imp1), complete(imp2)) # same model using repeated multivariate imputation form3 <- name.blocks(list(all = bmi + hyp + chl ~ .)) imp3 <- mice(nhanes, formulas = form3, print = FALSE, m = 1, seed = 12199) cmp3 <- complete(imp3) identical(complete(imp1), complete(imp3)) # same model using predictorMatrix imp4 <- mice(nhanes, print = FALSE, m = 1, seed = 12199, auxiliary = TRUE) identical(complete(imp1), complete(imp4)) # different model: multivariate imputation for chl and bmi form5 <- list(chl + bmi ~ ., hyp ~ bmi + age) form5 <- name.formulas(form5) imp5 <- mice(nhanes, formulas = form5, print = FALSE, m = 1, seed = 71712) } \seealso{ \code{\link{mice}} } mice/man/fico.Rd0000644000176200001440000000205513416657163013167 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/flux.R \name{fico} \alias{fico} \title{Fraction of incomplete cases among cases with observed} \usage{ fico(data) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as NA's.} } \value{ A vector of length \code{ncol(data)} of FICO statistics. } \description{ FICO is an outbound statistic defined by the fraction of incomplete cases among cases with \code{Yj} observed (White and Carlin, 2010). } \references{ Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation compared with complete-case analysis for missing covariate values. \emph{Statistics in Medicine}, \emph{29}, 2920-2931. } \seealso{ \code{\link{fluxplot}}, \code{\link{flux}}, \code{\link{md.pattern}} } \author{ Stef van Buuren, 2012 } \keyword{misc} mice/man/lm.mids.Rd0000644000176200001440000000276313416657163013620 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lm.R \name{lm.mids} \alias{lm.mids} \title{Linear regression for \code{mids} object} \usage{ lm.mids(formula, data, ...) } \arguments{ \item{formula}{a formula object, with the response on the left of a ~ operator, and the terms, separated by + operators, on the right. See the documentation of \code{\link{lm}} and \code{\link{formula}} for details.} \item{data}{An object of type 'mids', which stands for 'multiply imputed data set', typically created by a call to function \code{mice()}.} \item{\dots}{Additional parameters passed to \code{\link{lm}}} } \value{ An objects of class \code{mira}, which stands for 'multiply imputed repeated analysis'. This object contains \code{data$m} distinct \code{lm.objects}, plus some descriptive information. } \description{ Applies \code{lm()} to multiply imputed data set } \details{ This function is included for backward compatibility with V1.0. The function is superseded by \code{\link{with.mids}}. } \examples{ imp <- mice(nhanes) fit <- lm.mids(bmi~hyp+chl, data = imp) fit } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{lm}}, \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{multivariate} mice/man/with.mids.Rd0000644000176200001440000000273113621065624014147 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/with.R \name{with.mids} \alias{with.mids} \title{Evaluate an expression in multiple imputed datasets} \usage{ \method{with}{mids}(data, expr, ...) } \arguments{ \item{data}{An object of type \code{mids}, which stands for 'multiply imputed data set', typically created by a call to function \code{mice()}.} \item{expr}{An expression with a formula object, with the response on the left of a \code{~} operator, and the terms, separated by \code{+} operators, on the right. See the documentation of \code{\link{lm}} and \code{\link{formula}} for details.} \item{\dots}{Additional parameters passed to \code{expr}} } \value{ A list object of S3 class \code{mira} } \description{ Performs a computation of each of imputed datasets in data. } \examples{ imp <- mice(nhanes2) fit1 <- with(data=imp,exp=lm(bmi~age+hyp+chl)) fit2 <- with(data=imp,exp=glm(hyp~age+bmi+chl,family=binomial)) anova.imp <- with(data=imp,exp=anova(lm(bmi~age+hyp+chl))) } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}}, \code{\link{pool}}, \code{\link{D1}}, \code{\link{D3}}, \code{\link{pool.r.squared}} } \author{ Karin Oudshoorn, Stef van Buuren 2009-2012 } \keyword{multivariate} mice/man/ampute.Rd0000644000176200001440000002716313617562135013546 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Ampute.R \name{ampute} \alias{ampute} \title{Generate Missing Data for Simulation Purposes} \usage{ ampute( data, prop = 0.5, patterns = NULL, freq = NULL, mech = "MAR", weights = NULL, std = TRUE, cont = TRUE, type = NULL, odds = NULL, bycases = TRUE, run = TRUE ) } \arguments{ \item{data}{A complete data matrix or dataframe. Values should be numeric. Categorical variables should have been transformed into dummies.} \item{prop}{A scalar specifying the proportion of missingness. Should be a value between 0 and 1. Default is a missingness proportion of 0.5.} \item{patterns}{A matrix or data frame of size #patterns by #variables where \code{0} indicates a variable should have missing values and \code{1} indicates a variable should remain complete. The user may specify as many patterns as desired. One pattern (a vector) or double patterns are possible as well. Default is a square matrix of size #variables where each pattern has missingness on one variable only (created with \code{\link{ampute.default.patterns}}). After the amputation procedure, \code{\link{md.pattern}} can be used to investigate the missing data patterns in the data.} \item{freq}{A vector of length #patterns containing the relative frequency with which the patterns should occur. For example, for three missing data patterns, the vector could be \code{c(0.4, 0.4, 0.2)}, meaning that of all cases with missing values, 40 percent should have pattern 1, 40 percent pattern 2 and 20 percent pattern 3. The vector should sum to 1. Default is an equal probability for each pattern, created with \code{\link{ampute.default.freq}}.} \item{mech}{A string specifying the missingness mechanism, either MCAR (Missing Completely At Random), MAR (Missing At Random) or MNAR (Missing Not At Random). Default is a MAR missingness mechanism.} \item{weights}{A matrix or data frame of size #patterns by #variables. The matrix contains the weights that will be used to calculate the weighted sum scores. For a MAR mechanism, weights of the variables that will be made incomplete, should be zero. For a MNAR mechanism, these weights might have any possible value. Furthermore, the weights may differ between patterns and between variables. They may be negative as well. Within each pattern, the relative size of the values are of importance. The default weights matrix is made with \code{\link{ampute.default.weights}} and returns a matrix with equal weights for all variables. In case of MAR, variables that will be amputed will be weighted with \code{0}. If it is MNAR, variables that will be observed will be weighted with \code{0}. If mechanism is MCAR, the weights matrix will not be used.} \item{std}{Logical. Whether the weighted sum scores should be calculated with standardized data or with non-standardized data. The latter is advised when making use of train and testsets in order to prevent leakage.} \item{cont}{Logical. Whether the probabilities should be based on a continuous or discrete distribution. If TRUE, the probabilities of being missing are based on a continuous logistic distribution function. \code{\link{ampute.continuous}} will be used to calculate and assign the probabilities. These will be based on argument \code{type}. If FALSE, the probabilities of being missing are based on a discrete distribution (\code{\link{ampute.discrete}}) based on the \code{odds} argument. Default is TRUE.} \item{type}{A vector of strings containing the type of missingness for each pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or '\code{"RIGHT"}. If a single missingness type is entered, all patterns will be created by the same type. If missingness types should differ over patterns, a vector of missingness types should be entered. Default is RIGHT for all patterns and is the result of \code{\link{ampute.default.type}}.} \item{odds}{A matrix where #patterns defines the #rows. Each row should contain the odds of being missing for the corresponding pattern. The amount of odds values defines in how many quantiles the sum scores will be divided. The values are relative probabilities: a quantile with odds value 4 will have a probability of being missing that is four times higher than a quantile with odds 1. The #quantiles may differ between the patterns, specify NA for cells remaining empty. Default is 4 quantiles with odds values 1, 2, 3 and 4, the result of \code{\link{ampute.default.odds}}.} \item{bycases}{Logical. If TRUE, the proportion of missingness is defined in terms of cases. If FALSE, the proportion of missingness is defined in terms of cells. Default is TRUE.} \item{run}{Logical. If TRUE, the amputations are implemented. If FALSE, the return object will contain everything but the amputed data set.} } \value{ Returns an S3 object of class \code{\link{mads-class}} (multivariate amputed data set) } \description{ This function generates multivariate missing data in a MCAR, MAR or MNAR manner. Imputation of data sets containing missing values can be performed with \code{\link{mice}}. } \details{ When new multiple imputation techniques are tested, missing values need to be generated in simulated data sets. The generation of missing values is what we call: amputation. The function \code{ampute} is developed to perform any kind of amputation desired by the researcher. An extensive example and more explanation of the function can be found in the vignette \emph{Generate missing values with ampute}, available in \pkg{mice} as well. For imputation, the function \code{\link{mice}} is advised. Until recently, univariate amputation procedures were used to generate missing data in complete, simulated data sets. With this approach, variables are made incomplete one variable at a time. When several variables need to be amputed, the procedure is repeated multiple times. With this univariate approach, it is difficult to relate the missingness on one variable to the missingness on another variable. A multivariate amputation procedure solves this issue and moreover, it does justice to the multivariate nature of data sets. Hence, \code{ampute} is developed to perform the amputation according the researcher's desires. The idea behind the function is the specification of several missingness patterns. Each pattern is a combination of variables with and without missing values (denoted by \code{0} and \code{1} respectively). For example, one might want to create two missingness patterns on a data set with four variables. The patterns could be something like: \code{0, 0, 1, 1} and \code{1, 0, 1, 0}. Each combination of zeros and ones may occur. Furthermore, the researcher specifies the proportion of missingness, either the proportion of missing cases or the proportion of missing cells, and the relative frequency each pattern occurs. Consequently, the data is divided over the patterns with these probabilities. Now, each case is candidate for a certain missingness pattern, but whether the case will have missing values eventually, depends on other specifications. The first of these specifications is the missing mechanism. There are three possible mechanisms: the missingness depends completely on chance (MCAR), the missingness depends on the values of the observed variables (i.e. the variables that remain complete) (MAR) or on the values of the variables that will be made incomplete (MNAR). For a more thorough explanation of these definitions, I refer to Van Buuren (2012). When the user sets the missingness mechanism to \code{"MCAR"}, the candidates have an equal probability of having missing values. No other specifications have to be made. For a \code{"MAR"} or \code{"MNAR"} mechanism, weighted sum scores are calculated. These scores are a linear combination of the variables. In order to calculate the weighted sum scores, the data is standardized. That is the reason the data has to be numeric. Second, for each case, the values in the data set are multiplied with the weights, specified by argument \code{weights}. These weighted scores will be summed, resulting in a weighted sum score for each case. The weights may differ between patterns and they may be negative or zero as well. Naturally, in case of a \code{MAR} mechanism, the weights corresponding to the variables that will be made incomplete, have a \code{0}. Note that this might be different for each pattern. In case of \code{MNAR} missingness, especially the weights of the variables that will be made incomplete are of importance. However, the other variables might be weighted as well. It is the relative difference between the weights that will result in an effect in the sum scores. For example, for the first missing data pattern mentioned above, the weights for the third and fourth variables might be set to 2 and 4. However, weight values of 0.2 and 0.4 will have the exact same effect on the weighted sum score: the fourth variable is weighted twice as much as variable 3. Based on the weighted sum scores, either a discrete or continuous distribution of probabilities is used to calculate whether a candidate will have missing values. For a discrete distribution of probabilities, the weighted sum scores are divided into subgroups of equal size (quantiles). Thereafter, the user specifies for each subgroup the odds of being missing. Both the number of subgroups and the odds values are important for the generation of missing data. For example, for a RIGHT-like mechanism, scoring in one of the higher quantiles should have high missingness odds, whereas for a MID-like mechanism, the central groups should have higher odds. Again, not the size of the odds values are of importance, but the relative distance between the values. The continuous distributions of probabilities are based on the logit function, as described by Van Buuren (2012). The user can specify the type of missingness, which, again, may differ between patterns. For an extensive example of the working of the function, I gladly refer to the vignette \emph{Generate missing values with ampute}. } \examples{ # Simulate data set with \code{mvrnorm} from package \code{\pkg{MASS}}. sigma <- matrix(data = c(1, 0.2, 0.2, 0.2, 1, 0.2, 0.2, 0.2, 1), nrow = 3) complete.data <- MASS::mvrnorm(n = 100, mu = c(5, 5, 5), Sigma = sigma) # Perform quick amputation result1 <- ampute(data = complete.data) # Change default matrices as desired patterns <- result1$patterns patterns[1:3, 2] <- 0 odds <- result1$odds odds[2,3:4] <- c(2, 4) odds[3,] <- c(3, 1, NA, NA) # Rerun amputation result2 <- ampute(data = complete.data, patterns = patterns, freq = c(0.3, 0.3, 0.4), cont = FALSE, odds = odds) # Run an amputation procedure with continuous probabilities result3 <- ampute(data = complete.data, type = c("RIGHT", "TAIL", "LEFT")) } \references{ Brand, J.P.L. (1999). \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets} (pp. 110-113). Dissertation. Rotterdam: Erasmus University. Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn, C.G.M., Rubin, D.B. (2006). Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, 76\emph{(12)}, Appendix B. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-linearnormal.html#sec:generateuni}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Boca Raton, FL.: Chapman & Hall/CRC Press. Vink, G. (2016). Towards a standardized evaluation of multiple imputation routines. } \seealso{ \code{\link{mads-class}}, \code{\link{bwplot}}, \code{\link{xyplot}}, \code{\link{mice}} } \author{ Rianne Schouten [aut, cre], Gerko Vink [aut], Peter Lugtig [ctb], 2016 } mice/man/mice.impute.2lonly.mean.Rd0000644000176200001440000000560013574715125016620 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2lonly.mean.R \name{mice.impute.2lonly.mean} \alias{mice.impute.2lonly.mean} \alias{2lonly.mean} \title{Imputation of most likely value within the class} \usage{ mice.impute.2lonly.mean(y, ry, x, type, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. The class variable (only one is allowed) is coded as \code{-2}.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Method \code{2lonly.mean} replicates the most likely value within a class of a second-level variable. It works for numeric and factor data. The function is primarily useful as a quick fixup for data in which the second-level variable is inconsistent. } \details{ Observed values in \code{y} are averaged within the class, and replicated to the missing \code{y} within that class. This function is primarily useful for repairing incomplete data that are constant within the class, but vary over classes. For numeric variables, \code{mice.impute.2lonly.mean()} imputes the class mean of \code{y}. If \code{y} is a second-level variable, then conventionally all observed \code{y} will be identical within the class, and the function just provides a quick fix for any missing \code{y} by filling in the class mean. For factor variables, \code{mice.impute.2lonly.mean()} imputes the most frequently occuring category within the class. If there are no observed \code{y} in the class, all entries of the class are set to \code{NA}. Note that this may produce problems later on in \code{mice} if imputation routines are called that expects predictor data to be complete. Methods designed for imputing this type of second-level variables include \code{\link{mice.impute.2lonly.norm}} and \code{\link{mice.impute.2lonly.pmm}}. } \references{ Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Boca Raton, FL.: Chapman & Hall/CRC Press. } \seealso{ Other univariate-2lonly: \code{\link{mice.impute.2lonly.norm}()}, \code{\link{mice.impute.2lonly.pmm}()} } \author{ Gerko Vink, Stef van Buuren, 2019 } \concept{univariate-2lonly} \keyword{datagen} mice/man/densityplot.mids.Rd0000644000176200001440000002117113617553123015552 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/densityplot.R \name{densityplot.mids} \alias{densityplot.mids} \alias{densityplot} \title{Density plot of observed and imputed data} \usage{ \method{densityplot}{mids}( x, data, na.groups = NULL, groups = NULL, as.table = TRUE, plot.points = FALSE, theme = mice.theme(), mayreplicate = TRUE, thicker = 2.5, allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), panel = lattice::lattice.getOption("panel.densityplot"), default.prepanel = lattice::lattice.getOption("prepanel.default.densityplot"), ..., subscripts = TRUE, subset = TRUE ) } \arguments{ \item{x}{A \code{mids} object, typically created by \code{mice()} or \code{mice.mids()}.} \item{data}{Formula that selects the data to be plotted. This argument follows the \pkg{lattice} rules for \emph{formulas}, describing the primary variables (used for the per-panel display) and the optional conditioning variables (which define the subsets plotted in different panels) to be used in the plot. The formula is evaluated on the complete data set in the \code{long} form. Legal variable names for the formula include \code{names(x$data)} plus the two administrative factors \code{.imp} and \code{.id}. \bold{Extended formula interface:} The primary variable terms (both the LHS \code{y} and RHS \code{x}) may consist of multiple terms separated by a \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in \emph{separate panels}. This behavior differs from standard \pkg{lattice}. \emph{Only combine terms of the same type}, i.e. only factors or only numerical variables. Mixing numerical and categorical data occasionally produces odds labeling of vertical axis. The function \code{densityplot} does not use the \code{y} terms in the formula. Density plots for \code{x1} and \code{x2} are requested as \code{~ x1 + x2}.} \item{na.groups}{An expression evaluating to a logical vector indicating which two groups are distinguished (e.g. using different colors) in the display. The environment in which this expression is evaluated in the response indicator \code{is.na(x$data)}. The default \code{na.group = NULL} contrasts the observed and missing data in the LHS \code{y} variable of the display, i.e. groups created by \code{is.na(y)}. The expression \code{y} creates the groups according to \code{is.na(y)}. The expression \code{y1 & y2} creates groups by \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as \code{is.na(y1) | is.na(y2)}, and so on.} \item{groups}{This is the usual \code{groups} arguments in \pkg{lattice}. It differs from \code{na.groups} because it evaluates in the completed data \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas \code{na.groups} evaluates in the response indicator. See \code{\link{xyplot}} for more details. When both \code{na.groups} and \code{groups} are specified, \code{na.groups} takes precedence, and \code{groups} is ignored.} \item{as.table}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{plot.points}{A logical used in \code{densityplot} that signals whether the points should be plotted.} \item{theme}{A named list containing the graphical parameters. The default function \code{mice.theme} produces a short list of default colors, line width, and so on. The extensive list may be obtained from \code{trellis.par.get()}. Global graphical parameters like \code{col} or \code{cex} in high-level calls are still honored, so first experiment with the global parameters. Many setting consists of a pair. For example, \code{mice.theme} defines two symbol colors. The first is for the observed data, the second for the imputed data. The theme settings only exist during the call, and do not affect the trellis graphical parameters.} \item{mayreplicate}{A logical indicating whether color, line widths, and so on, may be replicated. The graphical functions attempt to choose "intelligent" graphical parameters. For example, the same color can be replicated for different element, e.g. use all reds for the imputed data. Replication may be switched off by setting the flag to \code{FALSE}, in order to allow the user to gain full control.} \item{thicker}{Used in \code{densityplot}. Multiplication factor of the line width of the observed density. \code{thicker=1} uses the same thickness for the observed and imputed data.} \item{allow.multiple}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{outer}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{drop.unused.levels}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{panel}{See \code{\link{xyplot}}.} \item{default.prepanel}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{\dots}{Further arguments, usually not directly processed by the high-level functions documented here, but instead passed on to other functions.} \item{subscripts}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{subset}{See \code{\link[lattice:xyplot]{xyplot}}.} } \value{ The high-level functions documented here, as well as other high-level Lattice functions, return an object of class \code{"trellis"}. The \code{\link[lattice:update.trellis]{update}} method can be used to subsequently update components of the object, and the \code{\link[lattice:print.trellis]{print}} method (usually called by default) will plot it on an appropriate plotting device. } \description{ Plotting methods for imputed data using \pkg{lattice}. \code{densityplot} produces plots of the densities. The function automatically separates the observed and imputed data. The functions extend the usual features of \pkg{lattice}. } \details{ The argument \code{na.groups} may be used to specify (combinations of) missingness in any of the variables. The argument \code{groups} can be used to specify groups based on the variable values themselves. Only one of both may be active at the same time. When both are specified, \code{na.groups} takes precedence over \code{groups}. Use the \code{subset} and \code{na.groups} together to plots parts of the data. For example, select the first imputed data set by by \code{subset=.imp==1}. Graphical parameters like \code{col}, \code{pch} and \code{cex} can be specified in the arguments list to alter the plotting symbols. If \code{length(col)==2}, the color specification to define the observed and missing groups. \code{col[1]} is the color of the 'observed' data, \code{col[2]} is the color of the missing or imputed data. A convenient color choice is \code{col=mdc(1:2)}, a transparent blue color for the observed data, and a transparent red color for the imputed data. A good choice is \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the duration of the session by running \code{mice.theme()}. } \note{ The first two arguments (\code{x} and \code{data}) are reversed compared to the standard Trellis syntax implemented in \pkg{lattice}. This reversal was necessary in order to benefit from automatic method dispatch. In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas in \pkg{lattice} the argument \code{x} is always a formula. In \pkg{mice} the argument \code{data} is always a formula object, whereas in \pkg{lattice} the argument \code{data} is usually a data frame. All other arguments have identical interpretation. \code{densityplot} errs on empty groups, which occurs if all observations in the subgroup contain \code{NA}. The relevant error message is: \code{Error in density.default: ... need at least 2 points to select a bandwidth automatically}. There is yet no workaround for this problem. Use the more robust \code{bwplot} or \code{stripplot} as a replacement. } \examples{ imp <- mice(boys, maxit=1) ### density plot of head circumference per imputation ### blue is observed, red is imputed densityplot(imp, ~hc|.imp) ### All combined in one panel. densityplot(imp, ~hc) } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{stripplot}}, \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the package, as well as \code{\link[lattice:densityplot]{densityplot}}, \code{\link[lattice:panel.densityplot]{panel.densityplot}}, \code{\link[lattice:print.trellis]{print.trellis}}, \code{\link[lattice:trellis.par.set]{trellis.par.set}} } \author{ Stef van Buuren } \keyword{hplot} mice/man/mice.impute.2l.lmer.Rd0000644000176200001440000000540013574715125015733 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2l.lmer.R \name{mice.impute.2l.lmer} \alias{mice.impute.2l.lmer} \title{Imputation by a two-level normal model using \code{lmer}} \usage{ mice.impute.2l.lmer(y, ry, x, type, wy = NULL, intercept = TRUE, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. Random variables are identified by a '2'. The class variable (only one is allowed) is coded as '-2'. Fixed effects are indicated by a '1'.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{intercept}{Logical determining whether the intercept is automatically added.} \item{\dots}{Arguments passed down to \code{lmer}} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate systematically and sporadically missing data using a two-level normal model using \code{lme4::lmer()} } \details{ Data are missing systematically if they have not been measured, e.g., in the case where we combine data from different sources. Data are missing sporadically if they have been partially observed. While the method is fully Bayesian, it may fix parameters of the variance-covariance matrix or the random effects to their estimated value in cases where creating draws from the posterior is not possible. The procedure throws a warning when this happens. } \references{ Jolani S. (2017) Hierarchical imputation of systematically and sporadically missing data: An approximate Bayesian approach using chained equations. Forthcoming. Jolani S., Debray T.P.A., Koffijberg H., van Buuren S., Moons K.G.M. (2015). Imputation of systematically missing predictors in an individual participant data meta-analysis: a generalized approach using MICE. \emph{Statistics in Medicine}, 34:1841-1863. Van Buuren, S. (2011) Multiple imputation of multilevel data. In Hox, J.J. and and Roberts, J.K. (Eds.), \emph{The Handbook of Advanced Multilevel Analysis}, Chapter 10, pp. 173--196. Milton Park, UK: Routledge. } \seealso{ Other univariate-2l: \code{\link{mice.impute.2l.bin}()}, \code{\link{mice.impute.2l.norm}()}, \code{\link{mice.impute.2l.pan}()} } \author{ Shahab Jolani, 2017 } \concept{univariate-2l} \keyword{datagen} mice/man/mice.impute.polr.Rd0000644000176200001440000001055513620753345015441 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.polr.R \name{mice.impute.polr} \alias{mice.impute.polr} \title{Imputation of ordered data by polytomous regression} \usage{ mice.impute.polr( y, ry, x, wy = NULL, nnet.maxit = 100, nnet.trace = FALSE, nnet.MaxNWts = 1500, polr.to.loggedEvents = FALSE, ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{nnet.maxit}{Tuning parameter for \code{nnet()}.} \item{nnet.trace}{Tuning parameter for \code{nnet()}.} \item{nnet.MaxNWts}{Tuning parameter for \code{nnet()}.} \item{polr.to.loggedEvents}{A logical indicating whether each fallback to the \code{multinom()} function should be written to \code{loggedEvents}. The default is \code{FALSE}.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes missing data in a categorical variable using polytomous regression } \details{ The function \code{mice.impute.polr()} imputes for ordered categorical response variables by the proportional odds logistic regression (polr) model. The function repeatedly applies logistic regression on the successive splits. The model is also known as the cumulative link model. By default, ordered factors with more than two levels are imputed by \code{mice.impute.polr}. The algorithm of \code{mice.impute.polr} uses the function \code{polr()} from the \code{MASS} package. In order to avoid bias due to perfect prediction, the algorithm augment the data according to the method of White, Daniel and Royston (2010). The call to \code{polr} might fail, usually because the data are very sparse. In that case, \code{multinom} is tried as a fallback. If the local flag \code{polr.to.loggedEvents} is set to TRUE, a record is written to the \code{loggedEvents} component of the \code{\link{mids}} object. Use \code{mice(data, polr.to.loggedEvents = TRUE)} to set the flag. } \note{ In December 2019 Simon White alerted that the \code{polr} could always fail silently. I can confirm this behaviour for versions \code{mice 3.0.0 - mice 3.6.6}, so any method requests for \code{polr} in these versions were in fact handled by \code{multinom}. See \url{https://github.com/stefvanbuuren/mice/issues/206} for details. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} Dissertation. Rotterdam: Erasmus University. White, I.R., Daniel, R. Royston, P. (2010). Avoiding bias due to perfect prediction in multiple imputation of incomplete categorical variables. \emph{Computational Statistics and Data Analysis}, 54, 2267-2275. Venables, W.N. & Ripley, B.D. (2002). \emph{Modern applied statistics with S-Plus (4th ed)}. Springer, Berlin. } \seealso{ \code{\link{mice}}, \code{\link[nnet]{multinom}}, \code{\link[MASS]{polr}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000-2010 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mammalsleep.Rd0000644000176200001440000000364713416661213014543 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mammalsleep.R \docType{data} \name{mammalsleep} \alias{mammalsleep} \alias{sleep} \title{Mammal sleep data} \format{\code{mammalsleep} is a data frame with 62 rows and 11 columns: \describe{ \item{species}{Species of animal} \item{bw}{Body weight (kg)} \item{brw}{Brain weight (g)} \item{sws}{Slow wave ("nondreaming") sleep (hrs/day)} \item{ps}{Paradoxical ("dreaming") sleep (hrs/day)} \item{ts}{Total sleep (hrs/day) (sum of slow wave and paradoxical sleep)} \item{mls}{Maximum life span (years)} \item{gt}{Gestation time (days)} \item{pi}{Predation index (1-5), 1 = least likely to be preyed upon} \item{sei}{Sleep exposure index (1-5), 1 = least exposed (e.g. animal sleeps in a well-protected den), 5 = most exposed} \item{odi}{Overall danger index (1-5) based on the above two indices and other information, 1 = least danger (from other animals), 5 = most danger (from other animals)} }} \source{ Allison, T., Cicchetti, D.V. (1976). Sleep in Mammals: Ecological and Constitutional Correlates. Science, 194(4266), 732-734. } \description{ Dataset from Allison and Cicchetti (1976) of 62 mammal species on the interrelationship between sleep, ecological, and constitutional variables. The dataset contains missing values on five variables. } \details{ Allison and Cicchetti (1976) investigated the interrelationship between sleep, ecological, and constitutional variables. They assessed these variables for 39 mammalian species. The authors concluded that slow-wave sleep is negatively associated with a factor related to body size. This suggests that large amounts of this sleep phase are disadvantageous in large species. Also, paradoxical sleep (REM sleep) was associated with a factor related to predatory danger, suggesting that large amounts of this sleep phase are disadvantageous in prey species. } \examples{ sleep <- data(mammalsleep) } \keyword{datasets} mice/man/D2.Rd0000644000176200001440000000313513621066000012472 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/D2.R \name{D2} \alias{D2} \title{Compare two nested models using D2-statistic} \usage{ D2(fit1, fit0 = NULL, use = "wald", ...) } \arguments{ \item{fit1}{An object of class \code{mira}, produced by \code{with()}.} \item{fit0}{An object of class \code{mira}, produced by \code{with()}. The model in \code{fit0} is a nested within \code{fit1}. The default null model \code{fit0 = NULL} compares \code{fit1} to the intercept-only model.} \item{use}{A character string denoting Wald- or likelihood-based based tests. Can be either \code{"wald"} or \code{"likelihood"}. Only used if \code{method="D2"}.} \item{...}{Not used.} } \description{ The D2-statistic pools test statistics from the repeated analyses. The method is less powerful than the D1- and D3-statistics. } \examples{ # Compare two linear models: imp <- mice(nhanes2, seed = 51009, print = FALSE) mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) D2(mi1, mi0) # Compare two logistic regression models imp <- mice(boys, maxit = 2, print = FALSE) fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) D2(fit1, fit0) } \references{ Li, K. H., X. L. Meng, T. E. Raghunathan, and D. B. Rubin. 1991. Significance Levels from Repeated p-Values with Multiply-Imputed Data. \emph{Statistica Sinica} 1 (1): 65–92. \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:chi} } \seealso{ \code{\link[mitml]{testModels}} } mice/man/is.mipo.Rd0000644000176200001440000000051513416657163013624 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mipo} \alias{is.mipo} \title{Check for \code{mipo} object} \usage{ is.mipo(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mipo} } \description{ Check for \code{mipo} object } mice/man/xyplot.mids.Rd0000644000176200001440000001636113617553123014540 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/xyplot.R \name{xyplot.mids} \alias{xyplot.mids} \alias{xyplot} \title{Scatterplot of observed and imputed data} \usage{ \method{xyplot}{mids}( x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), ..., subscripts = TRUE, subset = TRUE ) } \arguments{ \item{x}{A \code{mids} object, typically created by \code{mice()} or \code{mice.mids()}.} \item{data}{Formula that selects the data to be plotted. This argument follows the \pkg{lattice} rules for \emph{formulas}, describing the primary variables (used for the per-panel display) and the optional conditioning variables (which define the subsets plotted in different panels) to be used in the plot. The formula is evaluated on the complete data set in the \code{long} form. Legal variable names for the formula include \code{names(x$data)} plus the two administrative factors \code{.imp} and \code{.id}. \bold{Extended formula interface:} The primary variable terms (both the LHS \code{y} and RHS \code{x}) may consist of multiple terms separated by a \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in \emph{separate panels}. This behavior differs from standard \pkg{lattice}. \emph{Only combine terms of the same type}, i.e. only factors or only numerical variables. Mixing numerical and categorical data occasionally produces odds labeling of vertical axis.} \item{na.groups}{An expression evaluating to a logical vector indicating which two groups are distinguished (e.g. using different colors) in the display. The environment in which this expression is evaluated in the response indicator \code{is.na(x$data)}. The default \code{na.group = NULL} contrasts the observed and missing data in the LHS \code{y} variable of the display, i.e. groups created by \code{is.na(y)}. The expression \code{y} creates the groups according to \code{is.na(y)}. The expression \code{y1 & y2} creates groups by \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as \code{is.na(y1) | is.na(y2)}, and so on.} \item{groups}{This is the usual \code{groups} arguments in \pkg{lattice}. It differs from \code{na.groups} because it evaluates in the completed data \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas \code{na.groups} evaluates in the response indicator. See \code{\link{xyplot}} for more details. When both \code{na.groups} and \code{groups} are specified, \code{na.groups} takes precedence, and \code{groups} is ignored.} \item{as.table}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{theme}{A named list containing the graphical parameters. The default function \code{mice.theme} produces a short list of default colors, line width, and so on. The extensive list may be obtained from \code{trellis.par.get()}. Global graphical parameters like \code{col} or \code{cex} in high-level calls are still honored, so first experiment with the global parameters. Many setting consists of a pair. For example, \code{mice.theme} defines two symbol colors. The first is for the observed data, the second for the imputed data. The theme settings only exist during the call, and do not affect the trellis graphical parameters.} \item{allow.multiple}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{outer}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{drop.unused.levels}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{\dots}{Further arguments, usually not directly processed by the high-level functions documented here, but instead passed on to other functions.} \item{subscripts}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{subset}{See \code{\link[lattice:xyplot]{xyplot}}.} } \value{ The high-level functions documented here, as well as other high-level Lattice functions, return an object of class \code{"trellis"}. The \code{\link[lattice:update.trellis]{update}} method can be used to subsequently update components of the object, and the \code{\link[lattice:print.trellis]{print}} method (usually called by default) will plot it on an appropriate plotting device. } \description{ Plotting methods for imputed data using \pkg{lattice}. \code{xyplot()} produces a conditional scatterplots. The function automatically separates the observed (blue) and imputed (red) data. The function extends the usual features of \pkg{lattice}. } \details{ The argument \code{na.groups} may be used to specify (combinations of) missingness in any of the variables. The argument \code{groups} can be used to specify groups based on the variable values themselves. Only one of both may be active at the same time. When both are specified, \code{na.groups} takes precedence over \code{groups}. Use the \code{subset} and \code{na.groups} together to plots parts of the data. For example, select the first imputed data set by by \code{subset=.imp==1}. Graphical parameters like \code{col}, \code{pch} and \code{cex} can be specified in the arguments list to alter the plotting symbols. If \code{length(col)==2}, the color specification to define the observed and missing groups. \code{col[1]} is the color of the 'observed' data, \code{col[2]} is the color of the missing or imputed data. A convenient color choice is \code{col=mdc(1:2)}, a transparent blue color for the observed data, and a transparent red color for the imputed data. A good choice is \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the duration of the session by running \code{mice.theme()}. } \note{ The first two arguments (\code{x} and \code{data}) are reversed compared to the standard Trellis syntax implemented in \pkg{lattice}. This reversal was necessary in order to benefit from automatic method dispatch. In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas in \pkg{lattice} the argument \code{x} is always a formula. In \pkg{mice} the argument \code{data} is always a formula object, whereas in \pkg{lattice} the argument \code{data} is usually a data frame. All other arguments have identical interpretation. } \examples{ imp <- mice(boys, maxit=1) ### xyplot: scatterplot by imputation number ### observe the erroneous outlying imputed values ### (caused by imputing hgt from bmi) xyplot(imp, hgt~age|.imp, pch=c(1,20),cex=c(1,1.5)) ### same, but label with missingness of wgt (four cases) xyplot(imp, hgt~age|.imp, na.group=wgt, pch=c(1,20),cex=c(1,1.5)) } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{mice}}, \code{\link{stripplot}}, \code{\link{densityplot}}, \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the package, as well as \code{\link[lattice:xyplot]{xyplot}}, \code{\link[lattice:panel.xyplot]{panel.xyplot}}, \code{\link[lattice:print.trellis]{print.trellis}}, \code{\link[lattice:trellis.par.set]{trellis.par.set}} } \author{ Stef van Buuren } \keyword{hplot} mice/man/complete.mids.Rd0000644000176200001440000000657013620062074015004 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/complete.R \name{complete.mids} \alias{complete.mids} \alias{complete} \title{Extracts the completed data from a \code{mids} object} \usage{ \method{complete}{mids}(data, action = 1L, include = FALSE, mild = FALSE, ...) } \arguments{ \item{data}{An object of class \code{mids} as created by the function \code{mice()}.} \item{action}{A numeric vector or a keyword. Numeric values between 1 and \code{data$m} return the data with imputation number \code{action} filled in. The value of \code{action = 0} return the original data, with missing values. \code{action} can also be one of the following keywords: \code{"all"}, \code{"long"}, \code{"broad"} and \code{"repeated"}. See the Details section for the interpretation. The default is \code{action = 1L} returns the first imputed data set.} \item{include}{A logical to indicate whether the original data with the missing values should be included.} \item{mild}{A logical indicating whether the return value should always be an object of class \code{mild}. Setting \code{mild = TRUE} overrides \code{action} keywords \code{"long"}, \code{"broad"} and \code{"repeated"}. The default is \code{FALSE}.} \item{\dots}{Additional arguments. Not used.} } \value{ Complete data set with missing values replaced by imputations. A \code{data.frame}, or a list of data frames of class \code{mild}. } \description{ Takes an object of class \code{mids}, fills in the missing data, and returns the completed data in a specified format. } \details{ The argument \code{action} can be length-1 character, which is matched to one of the following keywords: \describe{ \item{\code{"all"}}{produces a \code{mild} object of imputed data sets. When \code{include = TRUE}, then the original data are appended as the first list element;} \item{\code{"long"}}{ produces a data set where imputed data sets are stacked vertically. The columns are added: 1) \code{.imp}, integer, referring the imputation number, and 2) \code{.id}, character, the row names of \code{data$data};} \item{\code{"stacked"}}{ same as \code{"long"} but without the two additional columns;} \item{\code{"broad"}}{ produces a data set with where imputed data sets are stacked horizontally. Columns are ordered as in the original data. The imputation number is appended to each column name;} \item{\code{"repeated"}}{ same as \code{"broad"}, but with columns in a different order.} } } \note{ Technical note: \code{mice 3.7.5} renamed the \code{complete()} function to \code{complete.mids()} and exported it as an S3 method of the generic \code{tidyr::complete()}. Name clashes between \code{mice::complete()} and \code{tidyr::complete()} should no longer occur. } \examples{ # obtain first imputed data set sum(is.na(nhanes2)) imp <- mice(nhanes2, print = FALSE, maxit = 1) dat <- complete(imp) sum(is.na(dat)) # obtain stacked third and fifth imputation dat <- complete(imp, c(3, 5)) # obtain all datasets, with additional identifiers head(complete(imp, "long")) # same, but now as list, mild object dslist <- complete(imp, "all") length(dslist) # same, but also include the original data dslist <- complete(imp, "all", include = TRUE) length(dslist) # select original + 3 + 5, store as mild dslist <- complete(imp, c(0, 3, 5), mild = TRUE) names(dslist) } \seealso{ \code{\link{mice}}, \code{\link[=mids-class]{mids}} } \keyword{manip} mice/man/construct.blocks.Rd0000644000176200001440000000441213416657163015546 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/blocks.R \name{construct.blocks} \alias{construct.blocks} \title{Construct blocks from \code{formulas} and \code{predictorMatrix}} \usage{ construct.blocks(formulas = NULL, predictorMatrix = NULL) } \arguments{ \item{formulas}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names. The \code{formulas} argument is an alternative to the \code{predictorMatrix} argument that allows for more flexibility in specifying imputation models, e.g., for specifying interaction terms.} \item{predictorMatrix}{A numeric matrix of \code{length(blocks)} rows and \code{ncol(data)} columns, containing 0/1 data specifying the set of predictors to be used for each target column. Each row corresponds to a variable block, i.e., a set of variables to be imputed. A value of \code{1} means that the column variable is used as a predictor for the target block (in the rows). By default, the \code{predictorMatrix} is a square matrix of \code{ncol(data)} rows and columns with all 1's, except for the diagonal. Note: For two-level imputation models (which have \code{"2l"} in their names) other codes (e.g, \code{2} or \code{-2}) are also allowed.} } \value{ A \code{blocks} object. } \description{ This helper function attempts to find blocks of variables in the specification of the \code{formulas} and/or \code{predictorMatrix} objects. Blocks specified by \code{formulas} may consist of multiple variables. Blocks specified by \code{predictorMatrix} are assumed to consist of single variables. Any duplicates in names are removed, and the formula specification is preferred. \code{predictorMatrix} and \code{formulas}. When both arguments specify models for the same block, the model for the \code{predictMatrix} is removed, and priority is given to the specification given in \code{formulas}. } \examples{ form <- name.formulas(list(bmi + hyp ~ chl + age, chl ~ bmi)) pred <- make.predictorMatrix(nhanes[, c("age", "chl")]) construct.blocks(formulas = form, pred = pred) } \seealso{ \code{\link{make.blocks}}, \code{\link{name.blocks}} } mice/man/nhanes.Rd0000644000176200001440000000171113416657163013521 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nhanes.R \docType{data} \name{nhanes} \alias{nhanes} \title{NHANES example - all variables numerical} \format{A data frame with 25 observations on the following 4 variables. \describe{ \item{age}{Age group (1=20-39, 2=40-59, 3=60+)} \item{bmi}{Body mass index (kg/m**2)} \item{hyp}{Hypertensive (1=no,2=yes)} \item{chl}{Total serum cholesterol (mg/dL)} }} \source{ Schafer, J.L. (1997). \emph{Analysis of Incomplete Multivariate Data.} London: Chapman & Hall. Table 6.14. } \description{ A small data set with non-monotone missing values. } \details{ A small data set with all numerical variables. The data set \code{nhanes2} is the same data set, but with \code{age} and \code{hyp} treated as factors. } \examples{ imp <- mice(nhanes) # create 5 imputed data sets complete(imp) # print the first imputed data set } \seealso{ \code{\link{nhanes2}} } \keyword{datasets} mice/man/mids-class.Rd0000644000176200001440000001103013416657163014277 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mids.R \name{mids-class} \alias{mids-class} \alias{mids} \title{Multiply imputed data set (\code{mids})} \description{ The \code{mids} object contains a multiply imputed data set. The \code{mids} object is generated by functions \code{mice()}, \code{mice.mids()}, \code{cbind.mids()}, \code{rbind.mids()} and \code{ibind.mids()}. } \details{ The \code{mids} class of objects has methods for the following generic functions: \code{print}, \code{summary}, \code{plot}. The \code{loggedEvents} entry is a matrix with five columns containing a record of automatic removal actions. It is \code{NULL} is no action was made. At initialization the program does the following three actions: \describe{ \item{1}{A variable that contains missing values, that is not imputed and that is used as a predictor is removed} \item{2}{A constant variable is removed} \item{3}{A collinear variable is removed.} } During iteration, the program does the following actions: \describe{ \item{1}{One or more variables that are linearly dependent are removed (for categorical data, a 'variable' corresponds to a dummy variable)} \item{2}{Proportional odds regression imputation that does not converge and is replaced by \code{polyreg}.} } Explanation of elements in \code{loggedEvents}: \describe{ \item{\code{it}}{iteration number at which the record was added,} \item{\code{im}}{imputation number,} \item{\code{dep}}{name of the dependent variable,} \item{\code{meth}}{imputation method used,} \item{\code{out}}{a (possibly long) character vector with the names of the altered or removed predictors.} } } \note{ The \code{mice} package does not use the S4 class definitions, and instead relies on the S3 list equivalent \code{oldClass(obj) <- "mids"}. } \section{Slots}{ \describe{ \item{\code{.Data}:}{Object of class \code{"list"} containing the following slots:} \item{\code{data}:}{Original (incomplete) data set.} \item{\code{imp}:}{A list of \code{ncol(data)} components with the generated multiple imputations. Each list components is a \code{data.frame} (\code{nmis[j]} by \code{m}) of imputed values for variable \code{j}.} \item{\code{m}:}{Number of imputations.} \item{\code{where}:}{The \code{where} argument of the \code{mice()} function.} \item{\code{blocks}:}{The \code{blocks} argument of the \code{mice()} function.} \item{\code{call}:}{Call that created the object.} \item{\code{nmis}:}{An array containing the number of missing observations per column.} \item{\code{method}:}{A vector of strings of \code{length(blocks} specifying the imputation method per block.} \item{\code{predictorMatrix}:}{A numerical matrix of containing integers specifying the predictor set.} \item{\code{visitSequence}:}{The sequence in which columns are visited.} \item{\code{formulas}:}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names.} \item{\code{post}:}{A vector of strings of length \code{length(blocks)} with commands for post-processing.} \item{\code{seed}:}{The seed value of the solution.} \item{\code{iteration}:}{Last Gibbs sampling iteration number.} \item{\code{lastSeedValue}:}{The most recent seed value.} \item{\code{chainMean}:}{A list of \code{m} components. Each component is a \code{length(visitSequence)} by \code{maxit} matrix containing the mean of the generated multiple imputations. The array can be used for monitoring convergence. Note that observed data are not present in this mean.} \item{\code{chainVar}:}{A list with similar structure of \code{chainMean}, containing the covariances of the imputed values.} \item{\code{loggedEvents}:}{A \code{data.frame} with five columns containing warnings, corrective actions, and other inside info.} \item{\code{version}:}{Version number of \code{mice} package that created the object.} \item{\code{date}:}{Date at which the object was created.} } } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{mice}}, \code{\link[=mira-class]{mira}}, \code{\link{mipo}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{classes} mice/man/nelsonaalen.Rd0000644000176200001440000000352413416657163014550 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nelsonaalen.R \name{nelsonaalen} \alias{nelsonaalen} \alias{hazard} \title{Cumulative hazard rate or Nelson-Aalen estimator} \usage{ nelsonaalen(data, timevar, statusvar) } \arguments{ \item{data}{A data frame containing the data.} \item{timevar}{The name of the time variable in \code{data}.} \item{statusvar}{The name of the event variable, e.g. death in \code{data}.} } \value{ A vector with \code{nrow(data)} elements containing the Nelson-Aalen estimates of the cumulative hazard function. } \description{ Calculates the cumulative hazard rate (Nelson-Aalen estimator) } \details{ This function is useful for imputing variables that depend on survival time. White and Royston (2009) suggested using the cumulative hazard to the survival time H0(T) rather than T or log(T) as a predictor in imputation models. See section 7.1 of Van Buuren (2012) for an example. } \examples{ require(MASS) leuk$status <- 1 ## no censoring occurs in leuk data (MASS) ch <- nelsonaalen(leuk, time, status) plot(x = leuk$time, y = ch, ylab='Cumulative hazard', xlab='Time') ### See example on http://www.engineeredsoftware.com/lmar/pe_cum_hazard_function.htm time <- c(43, 67, 92, 94, 149, rep(149,7)) status <- c(rep(1,5),rep(0,7)) eng <- data.frame(time, status) ch <- nelsonaalen(eng, time, status) plot(x = time, y = ch, ylab='Cumulative hazard', xlab='Time') } \references{ White, I. R., Royston, P. (2009). Imputing missing covariate values for the Cox model. \emph{Statistics in Medicine}, \emph{28}(15), 1982-1998. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-toomany.html#a-further-improvement-survival-as-predictor-variable}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \author{ Stef van Buuren, 2012 } \keyword{misc} mice/man/mdc.Rd0000644000176200001440000000524013617544647013016 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mdc.R \name{mdc} \alias{mdc} \title{Graphical parameter for missing data plots.} \usage{ mdc( r = "observed", s = "symbol", transparent = TRUE, cso = grDevices::hcl(240, 100, 40, 0.7), csi = grDevices::hcl(0, 100, 40, 0.7), csc = "gray50", clo = grDevices::hcl(240, 100, 40, 0.8), cli = grDevices::hcl(0, 100, 40, 0.8), clc = "gray50" ) } \arguments{ \item{r}{A numerical or character vector. The numbers 1-6 request colors as follows: 1=\code{cso}, 2=\code{csi}, 3=\code{csc}, 4=\code{clo}, 5=\code{cli} and 6=\code{clc}. Alternatively, \code{r} may contain the strings '\code{observed}', '\code{missing}', or '\code{both}', or abbreviations thereof.} \item{s}{A character vector containing the strings '\code{symbol}' or '\code{line}', or abbreviations thereof.} \item{transparent}{A logical indicating whether alpha-transparency is allowed. The default is \code{TRUE}.} \item{cso}{The symbol color for the observed data. The default is a transparent blue.} \item{csi}{The symbol color for the missing or imputed data. The default is a transparent red.} \item{csc}{The symbol color for the combined observed and imputed data. The default is a grey color.} \item{clo}{The line color for the observed data. The default is a slightly darker transparent blue.} \item{cli}{The line color for the missing or imputed data. The default is a slightly darker transparent red.} \item{clc}{The line color for the combined observed and imputed data. The default is a grey color.} } \value{ \code{mdc()} returns a vector containing color definitions. The length of the output vector is calculate from the length of \code{r} and \code{s}. Elements of the input vectors are repeated if needed. } \description{ \code{mdc} returns colors used to distinguish observed, missing and combined data in plotting. \code{mice.theme} return a partial list of named objects that can be used as a theme in \code{stripplot}, \code{bwplot}, \code{densityplot} and \code{xyplot}. } \details{ This function eases consistent use of colors in plots. The default follows the Abayomi convention, which uses blue for observed data, red for missing or imputed data, and black for combined data. } \examples{ # all six colors mdc(1:6) # lines color for observed and missing data mdc(c('obs','mis'), 'lin') } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. } \seealso{ \code{\link{hcl}}, \code{\link{rgb}}, \code{\link[mice:xyplot]{xyplot.mids}}, \code{\link[lattice:xyplot]{xyplot}}, \code{\link[lattice:trellis.par.set]{trellis.par.set}} } \author{ Stef van Buuren, sept 2012. } \keyword{hplot} mice/man/pool.Rd0000644000176200001440000000710013621065624013205 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.R \name{pool} \alias{pool} \title{Combine estimates by Rubin's rules} \usage{ pool(object, dfcom = NULL) } \arguments{ \item{object}{An object of class \code{mira} (produced by \code{with.mids()} or \code{as.mira()}), or a \code{list} with model fits.} \item{dfcom}{A positive number representing the degrees of freedom in the complete-data analysis. The default (\code{dfcom = NULL}) is to extract this information from the first fitted model. When that fails the warning \code{"Large sample assumed"} is printed, and the parameter is set \code{dfcom = 999999}. Use the \code{dfcom} parameter to specify the correct degrees of freedom.} } \value{ An object of class \code{mipo}, which stands for 'multiple imputation pooled outcome'. } \description{ The \code{pool()} function combines the estimates from \code{m} repeated complete data analyses. The typical sequence of steps to do a multiple imputation analysis is: \enumerate{ \item Impute the missing data by the \code{mice} function, resulting in a multiple imputed data set (class \code{mids}); \item Fit the model of interest (scientific model) on each imputed data set by the \code{with()} function, resulting an object of class \code{mira}; \item Pool the estimates from each model into a single set of estimates and standard errors, resulting is an object of class \code{mipo}; \item Optionally, compare pooled estimates from different scientific models by the \code{D1()} or \code{D3()} functions. } A common error is to reverse steps 2 and 3, i.e., to pool the multiply-imputed data instead of the estimates. Doing so may severely bias the estimates of scientific interest and yield incorrect statistical intervals and p-values. The \code{pool()} function will detect this case. } \details{ The \code{pool()} function averages the estimates of the complete data model, computes the total variance over the repeated analyses by Rubin's rules (Rubin, 1987, p. 76), and computes the following diagnostic statistics per estimate: \enumerate{ \item Relative increase in variance due to nonresponse {\code{r}}; \item Residual degrees of freedom for hypothesis testing {\code{df}}; \item Proportion of total variance due to missingness {\code{lambda}}; \item Fraction of missing information {\code{fmi}}. } The function requires the following input from each fitted model: \enumerate{ \item the estimates of the model, usually obtainable by \code{coef()} \item the standard error of each estimate; \item the residual degrees of freedom of the model. } The \code{pool()} function relies on the \code{broom::tidy} and \code{broom::glance} function for extracting this information from a list of fitted models. The degrees of freedom calculation uses the Barnard-Rubin adjustment for small samples (Barnard and Rubin, 1999). } \examples{ # pool using the classic MICE workflow imp <- mice(nhanes, maxit = 2, m = 2) fit <- with(data = imp, exp = lm(bmi ~ hyp + chl)) summary(pool(fit)) } \references{ Barnard, J. and Rubin, D.B. (1999). Small sample degrees of freedom with multiple imputation. \emph{Biometrika}, 86, 948-955. Rubin, D.B. (1987). \emph{Multiple Imputation for Nonresponse in Surveys}. New York: John Wiley and Sons. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{with.mids}}, \code{\link{as.mira}}, \code{\link[broom]{glance}}, \code{\link[broom]{tidy}} } \keyword{htest} mice/man/nic.Rd0000644000176200001440000000124613416657163013021 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ncc.R \name{nic} \alias{nic} \title{Number of incomplete cases} \usage{ nic(x) } \arguments{ \item{x}{An \code{R} object. Currently supported are methods for the following classes: \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} can be a vector.} } \value{ Number of elements in \code{x} with incomplete data. } \description{ Calculates the number of incomplete cases. } \examples{ nic(nhanes) # the remaining 12 rows nic(nhanes[,c("bmi","hyp")]) # number of cases with incomplete bmi and hyp } \seealso{ \code{\link{ncc}}, \code{\link{cci}} } \author{ Stef van Buuren, 2017 } mice/man/ic.Rd0000644000176200001440000000154313416657163012643 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cc.R \name{ic} \alias{ic} \title{Select incomplete cases} \usage{ ic(x) } \arguments{ \item{x}{An \code{R} object. Methods are available for classes \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} could be a vector.} } \value{ A \code{vector}, \code{matrix} or \code{data.frame} containing the data of the complete cases. } \description{ Extracts incomplete cases from a data set. The companion function for selecting the complete cases is \code{\link{cc}}. } \examples{ ic(nhanes) # get the 12 rows with incomplete cases ic(nhanes[1:10,]) # incomplete cases within the first ten rows ic(nhanes[, c("bmi", "hyp")]) # restrict extraction to variables bmi and hyp } \seealso{ \code{\link{cc}}, \code{\link{ici}} } \author{ Stef van Buuren, 2017. } \keyword{univar} mice/man/as.mira.Rd0000644000176200001440000000107213416657163013577 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as.R \name{as.mira} \alias{as.mira} \title{Create a \code{mira} object from repeated analyses} \usage{ as.mira(fitlist) } \arguments{ \item{fitlist}{A list containing $m$ fitted analysis objects} } \value{ An S3 object of class \code{mira}. } \description{ The \code{as.mira()} function takes the results of repeated complete-data analysis stored as a list, and turns it into a \code{mira} object that can be pooled. } \seealso{ \code{\link[=mira-class]{mira}} } \author{ Stef van Buuren } mice/man/selfreport.Rd0000644000176200001440000000676613416661213014440 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/selfreport.R \docType{data} \name{selfreport} \alias{selfreport} \alias{mgg} \title{Self-reported and measured BMI} \format{A data frame with 2060 rows and 15 variables: \describe{ \item{src}{Study, either \code{krul} or \code{mgg} (factor)} \item{id}{Person identification number} \item{pop}{Population, all \code{NL} (factor)} \item{age}{Age of respondent in years} \item{sex}{Sex of respondent (factor)} \item{hm}{Height measured (cm)} \item{wm}{Weight measured (kg)} \item{hr}{Height reported (cm)} \item{wr}{Weight reported (kg)} \item{prg}{Pregnancy (factor), all \code{Not pregnant}} \item{edu}{Educational level (factor)} \item{etn}{Ethnicity (factor)} \item{web}{Obtained through web survey (factor)} \item{bm}{BMI measured (kg/m2)} \item{br}{BMI reported (kg/m2)} }} \source{ Krul, A., Daanen, H. A. M., Choi, H. (2010). Self-reported and measured weight, height and body mass index (BMI) in Italy, The Netherlands and North America. \emph{European Journal of Public Health}, \emph{21}(4), 414-419. Van Keulen, H.M.,, Chorus, A.M.J., Verheijden, M.W. (2011). \emph{Monitor Convenant Gezond Gewicht Nulmeting (determinanten van) beweeg- en eetgedrag van kinderen (4-11 jaar), jongeren (12-17 jaar) en volwassenen (18+ jaar)}. TNO/LS 2011.016. Leiden: TNO. Van der Klauw, M., Van Keulen, H.M., Verheijden, M.W. (2011). \emph{Monitor Convenant Gezond Gewicht Beweeg- en eetgedrag van kinderen (4-11 jaar), jongeren (12-17 jaar) en volwassenen (18+ jaar) in 2010 en 2011.} TNO/LS 2011.055. Leiden: TNO. (in Dutch) Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-prevalence.html#sec:srcdata}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Dataset containing height and weight data (measured, self-reported) from two studies. } \details{ This dataset combines two datasets: \code{krul} data (Krul, 2010) (1257 persons) and the \code{mgg} data (Van Keulen 2011; Van der Klauw 2011) (803 persons). The \code{krul} dataset contains height and weight (both measures and self-reported) from 1257 Dutch adults, whereas the \code{mgg} dataset contains self-reported height and weight for 803 Dutch adults. Section 7.3 in Van Buuren (2012) shows how the missing measured data can be imputed in the \code{mgg} data, so corrected prevalence estimates can be calculated. } \examples{ md.pattern(selfreport[,c("age","sex","hm","hr","wm","wr")]) ### FIMD Section 7.3.5 Application bmi <- function(h,w){return(w/(h/100)^2)} init <- mice(selfreport,maxit=0) meth <- init$meth meth["bm"] <- "~bmi(hm,wm)" pred <- init$pred pred[,c("src","id","web","bm","br")] <- 0 imp <- mice(selfreport, pred=pred, meth=meth, seed=66573, maxit=2, m=1) ## imp <- mice(selfreport, pred=pred, meth=meth, seed=66573, maxit=20, m=10) ### Like FIMD Figure 7.6 cd <- complete(imp, 1) xy <- xy.coords(cd$bm, cd$br-cd$bm) plot(xy,col=mdc(2),xlab="Measured BMI",ylab="Reported - Measured BMI", xlim=c(17,45),ylim=c(-5,5), type="n",lwd=0.7) polygon(x=c(30,20,30),y=c(0,10,10),col="grey95",border=NA) polygon(x=c(30,40,30),y=c(0,-10,-10),col="grey95",border=NA) abline(0,0,lty=2,lwd=0.7) idx <- cd$src=="krul" xyc <- xy; xyc$x <- xy$x[idx]; xyc$y <- xy$y[idx] xys <- xy; xys$x <- xy$x[!idx]; xys$y <- xy$y[!idx] points(xyc,col=mdc(1), cex=0.7) points(xys,col=mdc(2), cex=0.7) lines(lowess(xyc),col=mdc(4),lwd=2) lines(lowess(xys),col=mdc(5),lwd=2) text(1:4,x=c(40,28,20,32),y=c(4,4,-4,-4),cex=3) box(lwd=1) } \keyword{datasets} mice/man/mice.impute.2lonly.norm.Rd0000644000176200001440000001340213621214607016642 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2lonly.norm.R \name{mice.impute.2lonly.norm} \alias{mice.impute.2lonly.norm} \alias{2lonly.norm} \title{Imputation at level 2 by Bayesian linear regression} \usage{ mice.impute.2lonly.norm(y, ry, x, type, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Group identifier must be specified by '-2'. Predictors must be specified by '1'.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ A vector of length \code{nmis} with imputations. } \description{ Imputes univariate missing data at level 2 using Bayesian linear regression analysis. Variables are level 1 are aggregated at level 2. The group identifier at level 2 must be indicated by \code{type = -2} in the \code{predictorMatrix}. } \details{ This function allows in combination with \code{\link{mice.impute.2l.pan}} switching regression imputation between level 1 and level 2 as described in Yucel (2008) or Gelman and Hill (2007, p. 541). The function checks for partial missing level-2 data. Level-2 data are assumed to be constant within the same cluster. If one or more entries are missing, then the procedure aborts with an error message that identifies the cluster with incomplete level-2 data. In such cases, one may first fill in the cluster mean (or mode) by the \code{2lonly.mean} method to remove inconsistencies. } \note{ For a more general approach, see \code{miceadds::mice.impute.2lonly.function()}. } \examples{ ################################################## # simulate some data # x,y ... level 1 variables # v,w ... level 2 variables G <- 250 # number of groups n <- 20 # number of persons beta <- .3 # regression coefficient rho <- .30 # residual intraclass correlation rho.miss <- .10 # correlation with missing response missrate <- .50 # missing proportion y1 <- rep( rnorm( G , sd = sqrt( rho ) ) , each=n ) + rnorm(G*n , sd = sqrt( 1 - rho )) w <- rep( round( rnorm(G ) , 2 ) , each=n ) v <- rep( round( runif( G , 0 , 3 ) ) , each=n ) x <- rnorm( G*n ) y <- y1 + beta * x + .2 * w + .1 * v dfr0 <- dfr <- data.frame( "group" = rep(1:G , each=n ) , "x" = x , "y" = y , "w" = w , "v" = v ) dfr[ rho.miss * x + rnorm( G*n , sd = sqrt( 1 - rho.miss ) ) < qnorm( missrate ) , "y" ] <- NA dfr[ rep( rnorm(G) , each=n ) < qnorm( missrate ) , "w" ] <- NA dfr[ rep( rnorm(G) , each=n ) < qnorm( missrate ) , "v" ] <- NA #.... # empty mice imputation imp0 <- mice( as.matrix(dfr) , maxit=0 ) predM <- imp0$predictorMatrix impM <- imp0$method #... # multilevel imputation predM1 <- predM predM1[c("w","y","v"),"group"] <- -2 predM1["y","x"] <- 1 # fixed x effects imputation impM1 <- impM impM1[c("y","w","v")] <- c("2l.pan" , "2lonly.norm" , "2lonly.pmm" ) # y ... imputation using pan # w ... imputation at level 2 using norm # v ... imputation at level 2 using pmm imp1 <- mice( as.matrix( dfr ) , m = 1 , predictorMatrix = predM1 , method = impM1 , maxit=1 , paniter=500) # # Demonstration that 2lonly.norm aborts for partial missing data. # Better use 2lonly.mean for repair. data <- data.frame(patid = rep(1:4, each = 5), sex = rep(c(1, 2, 1, 2), each = 5), crp = c(68, 78, 93, NA, 143, 5, 7, 9, 13, NA, 97, NA, 56, 52, 34, 22, 30, NA, NA, 45)) pred <- make.predictorMatrix(data) pred[, "patid"] <- -2 # only missing value (out of five) for patid == 1 data[3, "sex"] <- NA \dontrun{ # The following fails because 2lonly.norm found partially missing # level-2 data # imp <- mice(data, method = c("", "2lonly.norm", "2l.pan"), # predictorMatrix = pred, maxit = 1, m = 2) # > iter imp variable # > 1 1 sex crpError in .imputation.level2(y = y, ... : # > Method 2lonly.norm found the following clusters with partially missing #> level-2 data: 1 #> Method 2lonly.mean can fix such inconsistencies. } # In contrast, if all sex values are missing for patid == 1, it runs fine, # except on r-patched-solaris-x86. I used dontrun to evade CRAN errors. \dontrun{ data[1:5, "sex"] <- NA imp <- mice(data, method = c("", "2lonly.norm", "2l.pan"), predictorMatrix = pred, maxit = 1, m = 2) } } \references{ Gelman, A. and Hill, J. (2007). \emph{Data analysis using regression and multilevel/hierarchical models}. Cambridge, Cambridge University Press. Yucel, RM (2008). Multiple imputation inference for multivariate multilevel continuous data with ignorable non-response. \emph{Philosophical Transactions of the Royal Society A}, \bold{366}, 2389-2404. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice.impute.norm}}, \code{\link{mice.impute.2lonly.pmm}}, \code{\link{mice.impute.2l.pan}}, \code{\link{mice.impute.2lonly.mean}} Other univariate-2lonly: \code{\link{mice.impute.2lonly.mean}()}, \code{\link{mice.impute.2lonly.pmm}()} } \author{ Alexander Robitzsch (IPN - Leibniz Institute for Science and Mathematics Education, Kiel, Germany), \email{robitzsch@ipn.uni-kiel.de} plus some tweaks by Stef van Buuren } \concept{univariate-2lonly} mice/man/glm.mids.Rd0000644000176200001440000000307213416657163013761 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lm.R \name{glm.mids} \alias{glm.mids} \title{Generalized linear model for \code{mids} object} \usage{ glm.mids(formula, family = gaussian, data, ...) } \arguments{ \item{formula}{a formula expression as for other regression models, of the form response ~ predictors. See the documentation of \code{\link{lm}} and \code{\link{formula}} for details.} \item{family}{The family of the glm model} \item{data}{An object of type \code{mids}, which stands for 'multiply imputed data set', typically created by function \code{mice()}.} \item{\dots}{Additional parameters passed to \code{\link{glm}}.} } \value{ An objects of class \code{mira}, which stands for 'multiply imputed repeated analysis'. This object contains \code{data$m} distinct \code{glm.objects}, plus some descriptive information. } \description{ Applies \code{glm()} to a multiply imputed data set } \details{ This function is included for backward compatibility with V1.0. The function is superseded by \code{\link{with.mids}}. } \examples{ imp <- mice(nhanes) # logistic regression on the imputed data fit <- glm.mids((hyp==2)~bmi+chl, data=imp, family = binomial) fit } \references{ Van Buuren, S., Groothuis-Oudshoorn, C.G.M. (2000) \emph{Multivariate Imputation by Chained Equations: MICE V1.0 User's manual.} Leiden: TNO Quality of Life. } \seealso{ \code{\link{with.mids}}, \code{\link{glm}}, \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{multivariate} mice/man/is.mads.Rd0000644000176200001440000000051513416657163013604 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mads} \alias{is.mads} \title{Check for \code{mads} object} \usage{ is.mads(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mads} } \description{ Check for \code{mads} object } mice/man/extend.formulas.Rd0000644000176200001440000000527613574715125015373 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{extend.formulas} \alias{extend.formulas} \title{Extends formula's with predictor matrix settings} \usage{ extend.formulas( formulas, data, blocks, predictorMatrix = NULL, auxiliary = TRUE, include.intercept = FALSE, ... ) } \arguments{ \item{formulas}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names. The \code{formulas} argument is an alternative to the \code{predictorMatrix} argument that allows for more flexibility in specifying imputation models, e.g., for specifying interaction terms.} \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} \item{predictorMatrix}{A numeric matrix of \code{length(blocks)} rows and \code{ncol(data)} columns, containing 0/1 data specifying the set of predictors to be used for each target column. Each row corresponds to a variable block, i.e., a set of variables to be imputed. A value of \code{1} means that the column variable is used as a predictor for the target block (in the rows). By default, the \code{predictorMatrix} is a square matrix of \code{ncol(data)} rows and columns with all 1's, except for the diagonal. Note: For two-level imputation models (which have \code{"2l"} in their names) other codes (e.g, \code{2} or \code{-2}) are also allowed.} \item{auxiliary}{A logical that indicates whether the variables listed in \code{predictors} should be added to the formula as main effects. The default is \code{TRUE}.} \item{include.intercept}{A logical that indicated whether the intercept should be included in the result.} \item{...}{Named arguments that are passed down to the univariate imputation functions.} } \value{ A list of formula's } \description{ Extends formula's with predictor matrix settings } \keyword{internal} mice/man/md.pairs.Rd0000644000176200001440000000276213416657163013771 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/md.pairs.R \name{md.pairs} \alias{md.pairs} \title{Missing data pattern by variable pairs} \usage{ md.pairs(data) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} } \value{ A list of four components named \code{rr}, \code{rm}, \code{mr} and \code{mm}. Each component is square numerical matrix containing the number observations within four missing data pattern. } \description{ Number of observations per variable pair. } \details{ The four components in the output value is have the following interpretation: \describe{ \item{list('rr')}{response-response, both variables are observed} \item{list('rm')}{response-missing, row observed, column missing} \item{list('mr')}{missing -response, row missing, column observed} \item{list('mm')}{missing -missing, both variables are missing} } } \examples{ pat <- md.pairs(nhanes) pat # show that these four matrices decompose the total sample size # for each pair pat$rr + pat$rm + pat$mr + pat$mm # percentage of usable cases to impute row variable from column variable round(100*pat$mr/(pat$mr+pat$mm)) } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2009 } \keyword{univar} mice/man/pool.r.squared.Rd0000644000176200001440000000407313416657163015125 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.r.squared.R \name{pool.r.squared} \alias{pool.r.squared} \title{Pooling: R squared} \usage{ pool.r.squared(object, adjusted = FALSE) } \arguments{ \item{object}{An object of class 'mira', produced by \code{lm.mids} or \code{with.mids} with \code{lm} as modeling function.} \item{adjusted}{A logical value. If adjusted=TRUE then the adjusted R^2 is calculated. The default value is FALSE.} } \value{ Returns a 1x4 table with components. Component \code{est} is the pooled R^2 estimate. Component \code{lo95} is the 95 \% lower bound of the pooled R^2. Component \code{hi95} is the 95 \% upper bound of the pooled R^2. Component \code{fmi} is the fraction of missing information due to nonresponse. } \description{ Pools R^2 of m repeated complete data models. } \details{ The function pools the coefficients of determination R^2 or the adjusted coefficients of determination (R^2_a) obtained with the \code{lm} modeling function. For pooling it uses the Fisher \emph{z}-transformation. } \examples{ imp<-mice(nhanes) fit<-lm.mids(chl~age+hyp+bmi,imp) pool.r.squared(fit) pool.r.squared(fit,adjusted=TRUE) #fit<-lm.mids(chl~age+hyp+bmi,imp) # #> pool.r.squared(fit) # est lo 95 hi 95 fmi #R^2 0.5108041 0.1479687 0.7791927 0.3024413 # #> pool.r.squared(fit,adjusted=TRUE) # est lo 95 hi 95 fmi #adj R^2 0.4398066 0.08251427 0.743172 0.3404165 # } \references{ Harel, O (2009). The estimation of R^2 and adjusted R^2 in incomplete data sets using multiple imputation, Journal of Applied Statistics, 36:1109-1118. Rubin, D.B. (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley and Sons. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{pool}},\code{\link{pool.scalar}} } \author{ Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 } \keyword{htest} mice/man/bwplot.mids.Rd0000644000176200001440000001743113617553603014512 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bwplot.R \name{bwplot.mids} \alias{bwplot.mids} \alias{bwplot} \title{Box-and-whisker plot of observed and imputed data} \usage{ \method{bwplot}{mids}( x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), mayreplicate = TRUE, allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), ..., subscripts = TRUE, subset = TRUE ) } \arguments{ \item{x}{A \code{mids} object, typically created by \code{mice()} or \code{mice.mids()}.} \item{data}{Formula that selects the data to be plotted. This argument follows the \pkg{lattice} rules for \emph{formulas}, describing the primary variables (used for the per-panel display) and the optional conditioning variables (which define the subsets plotted in different panels) to be used in the plot. The formula is evaluated on the complete data set in the \code{long} form. Legal variable names for the formula include \code{names(x$data)} plus the two administrative factors \code{.imp} and \code{.id}. \bold{Extended formula interface:} The primary variable terms (both the LHS \code{y} and RHS \code{x}) may consist of multiple terms separated by a \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in \emph{separate panels}. This behavior differs from standard \pkg{lattice}. \emph{Only combine terms of the same type}, i.e. only factors or only numerical variables. Mixing numerical and categorical data occasionally produces odds labeling of vertical axis. For convenience, in \code{stripplot()} and \code{bwplot} the formula \code{y~.imp} may be abbreviated as \code{y}. This applies only to a single \code{y}, and does not (yet) work for \code{y1+y2~.imp}.} \item{na.groups}{An expression evaluating to a logical vector indicating which two groups are distinguished (e.g. using different colors) in the display. The environment in which this expression is evaluated in the response indicator \code{is.na(x$data)}. The default \code{na.group = NULL} contrasts the observed and missing data in the LHS \code{y} variable of the display, i.e. groups created by \code{is.na(y)}. The expression \code{y} creates the groups according to \code{is.na(y)}. The expression \code{y1 & y2} creates groups by \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as \code{is.na(y1) | is.na(y2)}, and so on.} \item{groups}{This is the usual \code{groups} arguments in \pkg{lattice}. It differs from \code{na.groups} because it evaluates in the completed data \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas \code{na.groups} evaluates in the response indicator. See \code{\link{xyplot}} for more details. When both \code{na.groups} and \code{groups} are specified, \code{na.groups} takes precedence, and \code{groups} is ignored.} \item{as.table}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{theme}{A named list containing the graphical parameters. The default function \code{mice.theme} produces a short list of default colors, line width, and so on. The extensive list may be obtained from \code{trellis.par.get()}. Global graphical parameters like \code{col} or \code{cex} in high-level calls are still honored, so first experiment with the global parameters. Many setting consists of a pair. For example, \code{mice.theme} defines two symbol colors. The first is for the observed data, the second for the imputed data. The theme settings only exist during the call, and do not affect the trellis graphical parameters.} \item{mayreplicate}{A logical indicating whether color, line widths, and so on, may be replicated. The graphical functions attempt to choose "intelligent" graphical parameters. For example, the same color can be replicated for different element, e.g. use all reds for the imputed data. Replication may be switched off by setting the flag to \code{FALSE}, in order to allow the user to gain full control.} \item{allow.multiple}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{outer}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{drop.unused.levels}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{\dots}{Further arguments, usually not directly processed by the high-level functions documented here, but instead passed on to other functions.} \item{subscripts}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{subset}{See \code{\link[lattice:xyplot]{xyplot}}.} } \value{ The high-level functions documented here, as well as other high-level Lattice functions, return an object of class \code{"trellis"}. The \code{\link[lattice:update.trellis]{update}} method can be used to subsequently update components of the object, and the \code{\link[lattice:print.trellis]{print}} method (usually called by default) will plot it on an appropriate plotting device. } \description{ Plotting methods for imputed data using \pkg{lattice}. \code{bwplot} produces box-and-whisker plots. The function automatically separates the observed and imputed data. The functions extend the usual features of \pkg{lattice}. } \details{ The argument \code{na.groups} may be used to specify (combinations of) missingness in any of the variables. The argument \code{groups} can be used to specify groups based on the variable values themselves. Only one of both may be active at the same time. When both are specified, \code{na.groups} takes precedence over \code{groups}. Use the \code{subset} and \code{na.groups} together to plots parts of the data. For example, select the first imputed data set by by \code{subset=.imp==1}. Graphical parameters like \code{col}, \code{pch} and \code{cex} can be specified in the arguments list to alter the plotting symbols. If \code{length(col)==2}, the color specification to define the observed and missing groups. \code{col[1]} is the color of the 'observed' data, \code{col[2]} is the color of the missing or imputed data. A convenient color choice is \code{col=mdc(1:2)}, a transparent blue color for the observed data, and a transparent red color for the imputed data. A good choice is \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the duration of the session by running \code{mice.theme()}. } \note{ The first two arguments (\code{x} and \code{data}) are reversed compared to the standard Trellis syntax implemented in \pkg{lattice}. This reversal was necessary in order to benefit from automatic method dispatch. In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas in \pkg{lattice} the argument \code{x} is always a formula. In \pkg{mice} the argument \code{data} is always a formula object, whereas in \pkg{lattice} the argument \code{data} is usually a data frame. All other arguments have identical interpretation. } \examples{ imp <- mice(boys, maxit=1) ### box-and-whisker plot per imputation of all numerical variables bwplot(imp) ### tv (testicular volume), conditional on region bwplot(imp, tv~.imp|reg) ### same data, organized in a different way bwplot(imp, tv~reg|.imp, theme=list()) } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{densityplot}}, \code{\link{stripplot}}, \code{\link{lattice}} for an overview of the package, as well as \code{\link[lattice:bwplot]{bwplot}}, \code{\link[lattice:panel.bwplot]{panel.bwplot}}, \code{\link[lattice:print.trellis]{print.trellis}}, \code{\link[lattice:trellis.par.set]{trellis.par.set}} } \author{ Stef van Buuren } \keyword{hplot} mice/man/mice.impute.norm.Rd0000644000176200001440000000605713620753345015442 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.R \name{mice.impute.norm} \alias{mice.impute.norm} \alias{norm} \title{Imputation by Bayesian linear regression} \usage{ mice.impute.norm(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Calculates imputations for univariate missing data by Bayesian linear regression, also known as the normal model. } \details{ Imputation of \code{y} by the normal model by the method defined by Rubin (1987, p. 167). The procedure is as follows: \enumerate{ \item{Calculate the cross-product matrix \eqn{S=X_{obs}'X_{obs}}.} \item{Calculate \eqn{V = (S+{diag}(S)\kappa)^{-1}}, with some small ridge parameter \eqn{\kappa}.} \item{Calculate regression weights \eqn{\hat\beta = VX_{obs}'y_{obs}.}} \item{Draw a random variable \eqn{\dot g \sim \chi^2_\nu} with \eqn{\nu=n_1 - q}.} \item{Calculate \eqn{\dot\sigma^2 = (y_{obs} - X_{obs}\hat\beta)'(y_{obs} - X_{obs}\hat\beta)/\dot g.}} \item{Draw \eqn{q} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_1}.} \item{Calculate \eqn{V^{1/2}} by Cholesky decomposition.} \item{Calculate \eqn{\dot\beta = \hat\beta + \dot\sigma\dot z_1 V^{1/2}}.} \item{Draw \eqn{n_0} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_2}.} \item{Calculate the \eqn{n_0} values \eqn{y_{imp} = X_{mis}\dot\beta + \dot z_2\dot\sigma}.} } Using \code{mice.impute.norm} for all columns emulates Schafer's NORM method (Schafer, 1997). } \references{ Rubin, D.B (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley & Sons. Schafer, J.L. (1997). Analysis of incomplete multivariate data. London: Chapman & Hall. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn } \concept{univariate imputation functions} \keyword{datagen} mice/man/employee.Rd0000644000176200001440000000253013416657163014064 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/employee.R \docType{data} \name{employee} \alias{employee} \title{Employee selection data} \format{A data frame with 20 rows and 3 variables: \describe{ \item{IQ}{candidate IQ score} \item{wbeing}{candidate well-being score} \item{jobperf}{candidate job performance score} }} \source{ Enders (2010), Applied Missing Data Analysis, p. 218 } \usage{ employee } \description{ A toy example from Craig Enders. } \details{ Enders describes these data as follows: I designed these data to mimic an employee selection scenario in which prospective employees complete an IQ test and a psychological well-being questionnaire during their interview. The company subsequently hires the applications that score in the upper half of the IQ distribution, and a supervisor rates their job performance following a 6-month probationary period. Note that the job performance scores are missing at random (MAR) (i.e. individuals in the lower half of the IQ distribution were never hired, and thus have no performance rating). In addition, I randomly deleted three of the well-being scores in order to mimic a situation where the applicant's well-being questionnaire is inadvertently lost. A larger version of this data set in present as \code{\link[miceadds]{data.enders.employee}}. } \keyword{datasets} mice/man/mice.impute.2lonly.pmm.Rd0000644000176200001440000001145613574715125016477 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2lonly.pmm.R \name{mice.impute.2lonly.pmm} \alias{mice.impute.2lonly.pmm} \alias{2lonly.pmm} \title{Imputation at level 2 by predictive mean matching} \usage{ mice.impute.2lonly.pmm(y, ry, x, type, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Group identifier must be specified by '-2'. Predictors must be specified by '1'.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ A vector of length \code{nmis} with imputations. } \description{ Imputes univariate missing data at level 2 using predictive mean matching. Variables are level 1 are aggregated at level 2. The group identifier at level 2 must be indicated by \code{type = -2} in the \code{predictorMatrix}. } \details{ This function allows in combination with \code{\link{mice.impute.2l.pan}} switching regression imputation between level 1 and level 2 as described in Yucel (2008) or Gelman and Hill (2007, p. 541). The function checks for partial missing level-2 data. Level-2 data are assumed to be constant within the same cluster. If one or more entries are missing, then the procedure aborts with an error message that identifies the cluster with incomplete level-2 data. In such cases, one may first fill in the cluster mean (or mode) by the \code{2lonly.mean} method to remove inconsistencies. } \note{ The extension to categorical variables transform a dependent factor variable by means of the \code{as.integer()} function. This may make sense for categories that are approximately ordered, but less so for pure nominal measures. For a more general approach, see \code{miceadds::mice.impute.2lonly.function()}. } \examples{ ################################################## # simulate some data # x,y ... level 1 variables # v,w ... level 2 variables G <- 250 # number of groups n <- 20 # number of persons beta <- .3 # regression coefficient rho <- .30 # residual intraclass correlation rho.miss <- .10 # correlation with missing response missrate <- .50 # missing proportion y1 <- rep( rnorm( G , sd = sqrt( rho ) ) , each=n ) + rnorm(G*n , sd = sqrt( 1 - rho )) w <- rep( round( rnorm(G ) , 2 ) , each=n ) v <- rep( round( runif( G , 0 , 3 ) ) , each=n ) x <- rnorm( G*n ) y <- y1 + beta * x + .2 * w + .1 * v dfr0 <- dfr <- data.frame( "group" = rep(1:G , each=n ) , "x" = x , "y" = y , "w" = w , "v" = v ) dfr[ rho.miss * x + rnorm( G*n , sd = sqrt( 1 - rho.miss ) ) < qnorm( missrate ) , "y" ] <- NA dfr[ rep( rnorm(G) , each=n ) < qnorm( missrate ) , "w" ] <- NA dfr[ rep( rnorm(G) , each=n ) < qnorm( missrate ) , "v" ] <- NA #.... # empty mice imputation imp0 <- mice( as.matrix(dfr) , maxit=0 ) predM <- imp0$predictorMatrix impM <- imp0$method #... # multilevel imputation predM1 <- predM predM1[c("w","y","v"),"group"] <- -2 predM1["y","x"] <- 1 # fixed x effects imputation impM1 <- impM impM1[c("y","w","v")] <- c("2l.pan" , "2lonly.norm" , "2lonly.pmm" ) # turn v into a categorical variable dfr$v <- as.factor(dfr$v) levels(dfr$v) <- LETTERS[1:4] # y ... imputation using pan # w ... imputation at level 2 using norm # v ... imputation at level 2 using pmm imp <- mice(dfr, m = 1, predictorMatrix = predM1 , method = impM1, maxit = 1, paniter = 500) } \references{ Gelman, A. and Hill, J. (2007). \emph{Data analysis using regression and multilevel/hierarchical models}. Cambridge, Cambridge University Press. Yucel, RM (2008). Multiple imputation inference for multivariate multilevel continuous data with ignorable non-response. \emph{Philosophical Transactions of the Royal Society A}, \bold{366}, 2389-2404. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice.impute.pmm}}, \code{\link{mice.impute.2lonly.norm}}, \code{\link{mice.impute.2l.pan}}, \code{\link{mice.impute.2lonly.mean}} Other univariate-2lonly: \code{\link{mice.impute.2lonly.mean}()}, \code{\link{mice.impute.2lonly.norm}()} } \author{ Alexander Robitzsch (IPN - Leibniz Institute for Science and Mathematics Education, Kiel, Germany), \email{robitzsch@ipn.uni-kiel.de}, plus some tweaks by Stef van Buuren } \concept{univariate-2lonly} mice/man/mice.impute.logreg.Rd0000644000176200001440000000621313620753345015740 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.logreg.R \name{mice.impute.logreg} \alias{mice.impute.logreg} \title{Imputation by logistic regression} \usage{ mice.impute.logreg(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using logistic regression. } \details{ Imputation for binary response variables by the Bayesian logistic regression model (Rubin 1987, p. 169-170). The Bayesian method consists of the following steps: \enumerate{ \item Fit a logit, and find (bhat, V(bhat)) \item Draw BETA from N(bhat, V(bhat)) \item Compute predicted scores for m.d., i.e. logit-1(X BETA) \item Compare the score to a random (0,1) deviate, and impute. } The method relies on the standard \code{glm.fit} function. Warnings from \code{glm.fit} are suppressed. Perfect prediction is handled by the data augmentation method. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. ISBN 90-74479-08-1. Venables, W.N. & Ripley, B.D. (1997). Modern applied statistics with S-Plus (2nd ed). Springer, Berlin. White, I., Daniel, R. and Royston, P (2010). Avoiding bias due to perfect prediction in multiple imputation of incomplete categorical variables. Computational Statistics and Data Analysis, 54:22672275. } \seealso{ \code{\link{mice}}, \code{\link{glm}}, \code{\link{glm.fit}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn } \concept{univariate imputation functions} \keyword{datagen} mice/man/pmm.match.Rd0000644000176200001440000000340713416657163014135 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.pmm.R \name{.pmm.match} \alias{.pmm.match} \title{Finds an imputed value from matches in the predictive metric (deprecated)} \usage{ .pmm.match(z, yhat = yhat, y = y, donors = 5, ...) } \arguments{ \item{z}{A scalar containing the predicted value for the current case to be imputed.} \item{yhat}{A vector containing the predicted values for all cases with an observed outcome.} \item{y}{A vector of \code{length(yhat)} elements containing the observed outcome} \item{donors}{The size of the donor pool among which a draw is made. The default is \code{donors = 5}. Setting \code{donors = 1} always selects the closest match. Values between 3 and 10 provide the best results. Note: This setting was changed from 3 to 5 in version 2.19, based on simulation work by Tim Morris (UCL).} \item{\dots}{Other parameters (not used).} } \value{ A scalar containing the observed value of the selected donor. } \description{ This function finds matches among the observed data in the predictive mean metric. It selects the \code{donors} closest matches, randomly samples one of the donors, and returns the observed value of the match. } \details{ This function is included for backward compatibility. It was used up to \code{mice 2.21}. The current \code{mice.impute.pmm()} function calls the faster \code{C} function \code{matcher} instead of \code{.pmm.match()}. } \references{ Schenker N \& Taylor JMG (1996) Partially parametric techniques for multiple imputation. \emph{Computational Statistics and Data Analysis}, 22, 425-446. Little RJA (1988) Missing-data adjustments in large surveys (with discussion). \emph{Journal of Business Economics and Statistics}, 6, 287-301. } \author{ Stef van Buuren } mice/man/xyplot.mads.Rd0000644000176200001440000000441313617553123014523 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/xyplot.mads.R \name{xyplot.mads} \alias{xyplot.mads} \title{Scatterplot of amputed and non-amputed data against weighted sum scores} \usage{ \method{xyplot}{mads}( x, data, which.pat = NULL, standardized = TRUE, layout = NULL, colors = mdc(1:2), ... ) } \arguments{ \item{x}{A \code{mads} object, typically created by \code{\link{ampute}}.} \item{data}{A string or vector of variable names that needs to be plotted. As a default, all variables will be plotted.} \item{which.pat}{A scalar or vector indicating which patterns need to be plotted. As a default, all patterns are plotted.} \item{standardized}{Logical. Whether the scatterplots need to be created from standardized data or not. Default is TRUE.} \item{layout}{A vector of two values indicating how the scatterplots of one pattern should be divided over the plot. For example, \code{c(2, 3)} indicates that the scatterplots of six variables need to be placed on 3 rows and 2 columns. There are several defaults for different #variables. Note that for more than 9 variables, multiple plots will be created automatically.} \item{colors}{A vector of two RGB values defining the colors of the non-amputed and amputed data respectively. RGB values can be obtained with \code{\link{hcl}}.} \item{\dots}{Not used, but for consistency with generic} } \value{ A list containing the scatterplots. Note that a new pattern will always be shown in a new plot. } \description{ Plotting method to investigate relation between amputed data and the weighted sum scores. Based on \code{\link{lattice}}. \code{xyplot} produces scatterplots. The function plots the variables against the weighted sum scores. The function automatically separates the amputed and non-amputed data to see the relation between the amputation and the weighted sum scores. } \note{ The \code{mads} object contains all the information you need to make any desired plots. Check \code{\link{mads-class}} or the vignette \emph{Multivariate Amputation using Ampute} to understand the contents of class object \code{mads}. } \seealso{ \code{\link{ampute}}, \code{\link{bwplot}}, \code{\link{Lattice}} for an overview of the package, \code{\link{mads-class}} } \author{ Rianne Schouten, 2016 } mice/man/mira-class.Rd0000644000176200001440000000474013574247310014277 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mira.R \docType{class} \name{mira-class} \alias{mira-class} \alias{mira} \title{Multiply imputed repeated analyses (\code{mira})} \description{ The \code{mira} object is generated by the \code{with.mids()} function. The \code{as.mira()} function takes the results of repeated complete-data analysis stored as a list, and turns it into a \code{mira} object that can be pooled. } \details{ In versions prior to \code{mice 3.0} pooling required only that \code{coef()} and \code{vcov()} methods were available for fitted objects. \emph{This feature is no longer supported}. The reason is that \code{vcov()} methods are inconsistent across packages, leading to buggy behaviour of the \code{pool()} function. Since \code{mice 3.0+}, the \code{broom} package takes care of filtering out the relevant parts of the complete-data analysis. It may happen that you'll see the messages like \code{No method for tidying an S3 object of class ...} or \code{Error: No glance method for objects of class ...}. The royal way to solve this problem is to write your own \code{glance()} and \code{tidy()} methods and add these to \code{broom} according to the specifications given in \url{https://broom.tidyverse.org/articles/adding-tidiers.html}. #'The \code{mira} class of objects has methods for the following generic functions: \code{print}, \code{summary}. Many of the functions of the \code{mice} package do not use the S4 class definitions, and instead rely on the S3 list equivalent \code{oldClass(obj) <- "mira"}. } \section{Slots}{ \describe{ #' \item{\code{.Data}:}{Object of class \code{"list"} containing the following slots:} \item{\code{call}:}{The call that created the object.} \item{\code{call1}:}{The call that created the \code{mids} object that was used in \code{call}.} \item{\code{nmis}:}{An array containing the number of missing observations per column.} \item{\code{analyses}:}{A list of \code{m} components containing the individual fit objects from each of the \code{m} complete data analyses.} } } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{with.mids}}, \code{\link[=mids-class]{mids}}, \code{\link{mipo}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{classes} mice/man/fdgs.Rd0000644000176200001440000000366513416661213013171 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fdgs.R \docType{data} \name{fdgs} \alias{fdgs} \title{Fifth Dutch growth study 2009} \format{\code{fdgs} is a data frame with 10030 rows and 8 columns: \describe{ \item{id}{Person number} \item{reg}{Region (factor, 5 levels)} \item{age}{Age (years)} \item{sex}{Sex (boy, girl)} \item{hgt}{Height (cm)} \item{wgt}{Weight (kg)} \item{hgt.z}{Height Z-score} \item{wgt.z}{Weight Z-score} }} \source{ Schonbeck, Y., Talma, H., van Dommelen, P., Bakker, B., Buitendijk, S. E., Hirasing, R. A., van Buuren, S. (2011). Increase in prevalence of overweight in Dutch children and adolescents: A comparison of nationwide growth studies in 1980, 1997 and 2009. \emph{PLoS ONE}, \emph{6}(11), e27608. Schonbeck, Y., Talma, H., van Dommelen, P., Bakker, B., Buitendijk, S. E., Hirasing, R. A., \& van Buuren, S. (2013). The world's tallest nation has stopped growing taller: the height of Dutch children from 1955 to 2009. \emph{Pediatric Research}, \emph{73}(3), 371-377. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-nonresponse.html#fifth-dutch-growth-study}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Boca Raton, FL.: Chapman & Hall/CRC Press. } \description{ Age, height, weight and region of 10030 children measured within the Fifth Dutch Growth Study 2009 } \details{ The data set contains data from children of Dutch descent (biological parents are born in the Netherlands). Children with growth-related diseases were excluded. The data were used to construct new growth charts of children of Dutch descent (Schonbeck 2013), and to calculate overweight and obesity prevalence (Schonbeck 2011). Some groups were underrepresented. Multiple imputation was used to create synthetic cases that were used to correct for the nonresponse. See Van Buuren (2012), chapter 8 for details. } \examples{ data <- data(fdgs) summary(data) } \keyword{datasets} mice/man/mice.impute.sample.Rd0000644000176200001440000000273113416657163015747 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.sample.R \name{mice.impute.sample} \alias{mice.impute.sample} \title{Imputation by simple random sampling} \usage{ mice.impute.sample(y, ry, x = NULL, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes a random sample from the observed \code{y} data } \details{ This function takes a simple random sample from the observed values in \code{y}, and returns these as imputations. } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000, 2017 } \keyword{datagen} mice/man/mice.impute.ri.Rd0000644000176200001440000000455613620753345015103 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.ri.R \name{mice.impute.ri} \alias{mice.impute.ri} \alias{ri} \title{Imputation by the random indicator method for nonignorable data} \usage{ mice.impute.ri(y, ry, x, wy = NULL, ri.maxit = 10, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{ri.maxit}{Number of inner iterations} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes nonignorable missing data by the random indicator method. } \details{ The random indicator method estimates an offset between the distribution of the observed and missing data using an algorithm that iterates over the response and imputation models. This routine assumes that the response model and imputation model have same predictors. For an MNAR alternative see also \code{\link{mice.impute.mnar.logreg}}. } \references{ Jolani, S. (2012). \emph{Dual Imputation Strategies for Analyzing Incomplete Data}. Dissertation. University of Utrecht, Dec 7 2012. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()} } \author{ Shahab Jolani (University of Utrecht) \email{s.jolani@uu.nl} } \concept{univariate imputation functions} \keyword{datagen} mice/man/ncc.Rd0000644000176200001440000000112013416657163013002 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ncc.R \name{ncc} \alias{ncc} \title{Number of complete cases} \usage{ ncc(x) } \arguments{ \item{x}{An \code{R} object. Currently supported are methods for the following classes: \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} can be a vector.} } \value{ Number of elements in \code{x} with complete data. } \description{ Calculates the number of complete cases. } \examples{ ncc(nhanes) # 13 complete cases } \seealso{ \code{\link{nic}}, \code{\link{cci}} } \author{ Stef van Buuren, 2017 } mice/man/version.Rd0000644000176200001440000000076213416657163013737 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/zzz.R \name{version} \alias{version} \title{Echoes the package version number} \usage{ version(pkg = "mice") } \arguments{ \item{pkg}{A character vector with the package name.} } \value{ A character vector containing the package name, version number and installed directory. } \description{ Echoes the package version number } \examples{ version() version("base") } \author{ Stef van Buuren, Oct 2010 } \keyword{misc} mice/man/popmis.Rd0000644000176200001440000000167613416661213013555 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/popmis.R \docType{data} \name{popmis} \alias{popmis} \title{Hox pupil popularity data with missing popularity scores} \format{A data frame with 2000 rows and 7 columns: \describe{ \item{pupil}{Pupil number within school} \item{school}{School number} \item{popular}{Pupil popularity with 848 missing entries} \item{sex}{Pupil gender} \item{texp}{Teacher experience (years)} \item{const}{Constant intercept term} \item{teachpop}{Teacher popularity} }} \source{ Hox, J. J. (2002) \emph{Multilevel analysis. Techniques and applications.} Mahwah, NJ: Lawrence Erlbaum. } \description{ Hox pupil popularity data with some missing popularity scores } \details{ The original, complete dataset was generated by Joop Hox as an example of well-behaved multilevel data set. The distributed data contains missing data in pupil popularity. } \examples{ popmis[1:3,] } \keyword{datasets} mice/man/plot.mids.Rd0000644000176200001440000000330413574715125014154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R \name{plot.mids} \alias{plot.mids} \title{Plot the trace lines of the MICE algorithm} \usage{ \method{plot}{mids}( x, y = NULL, theme = mice.theme(), layout = c(2, 3), type = "l", col = 1:10, lty = 1, ... ) } \arguments{ \item{x}{An object of class \code{mids}} \item{y}{A formula that specifies which variables, stream and iterations are plotted. If omitted, all streams, variables and iterations are plotted.} \item{theme}{The trellis theme to applied to the graphs. The default is \code{mice.theme()}.} \item{layout}{A vector of length 2 given the number of columns and rows in the plot. The default is \code{c(2, 3)}.} \item{type}{Parameter \code{type} of \code{\link{panel.xyplot}}.} \item{col}{Parameter \code{col} of \code{\link{panel.xyplot}}.} \item{lty}{Parameter \code{lty} of \code{\link{panel.xyplot}}.} \item{...}{Extra arguments for \code{\link{xyplot}}.} } \value{ An object of class \code{"trellis"}. } \description{ Trace line plots portray the value of an estimate against the iteration number. The estimate can be anything that you can calculate, but typically are chosen as parameter of scientific interest. The \code{plot} method for a \code{mids} object plots the mean and standard deviation of the imputed (not observed) values against the iteration number for each of the $m$ replications. By default, the function plot the development of the mean and standard deviation for each incomplete variable. On convergence, the streams should intermingle and be free of any trend. } \seealso{ \code{\link{mice}}, \code{\link[=mids-class]{mids}}, \code{\link{xyplot}} } \author{ Stef van Buuren 2011 } mice/man/mnar_demo_data.Rd0000644000176200001440000000100213620753345015164 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nmar_demo_data.R \docType{data} \name{mnar_demo_data} \alias{mnar_demo_data} \title{MNAR demo data} \format{An object of class \code{data.frame} with 500 rows and 3 columns.} \source{ \url{https://github.com/moreno-betancur/NARFCS/blob/master/datmis.csv} } \usage{ mnar_demo_data } \description{ A toy example from Margarita Moreno-Betancur for checking NARFCS. } \details{ A small dataset with just three columns. } \keyword{datasets} mice/man/squeeze.Rd0000644000176200001440000000161013574715125013722 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/squeeze.R \name{squeeze} \alias{squeeze} \title{Squeeze the imputed values to be within specified boundaries.} \usage{ squeeze(x, bounds = c(min(x[r]), max(x[r])), r = rep.int(TRUE, length(x))) } \arguments{ \item{x}{A numerical vector with values} \item{bounds}{A numerical vector of length 2 containing the lower and upper bounds. By default, the bounds are to the minimum and maximum values in \code{x}.} \item{r}{A logical vector of length \code{length(x)} that is used to select a subset in \code{x} before calculating automatic bounds.} } \value{ A vector of length \code{length(x)}. } \description{ This function replaces any values in \code{x} that are lower than \code{bounds[1]} by \code{bounds[1]}, and replaces any values higher than \code{bounds[2]} by \code{bounds[2]}. } \author{ Stef van Buuren, 2011. } mice/man/extractBS.Rd0000644000176200001440000000065713416657163014154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/auxiliary.R \name{extractBS} \alias{extractBS} \title{Extract broken stick estimates from a \code{lmer} object} \usage{ extractBS(fit) } \arguments{ \item{fit}{An object of class \code{lmer}} } \value{ A matrix containing broken stick estimates } \description{ Extract broken stick estimates from a \code{lmer} object } \author{ Stef van Buuren, 2012 } mice/man/mipo.Rd0000644000176200001440000000537713574715125013223 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mipo.R \name{mipo} \alias{mipo} \alias{summary.mipo} \alias{print.mipo} \alias{print.mipo.summary} \alias{process_mipo} \title{\code{mipo}: Multiple imputation pooled object} \usage{ mipo(mira.obj, ...) \method{summary}{mipo}( object, type = c("tests", "all"), conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE, ... ) \method{print}{mipo}(x, ...) \method{print}{mipo.summary}(x, ...) process_mipo(z, x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE) } \arguments{ \item{mira.obj}{An object of class \code{mira}} \item{\dots}{Arguments passed down} \item{object}{An object of class \code{mipo}} \item{conf.int}{Logical indicating whether to include a confidence interval. The default is \code{FALSE}.} \item{conf.level}{Confidence level of the interval, used only if \code{conf.int = TRUE}. Number between 0 and 1.} \item{exponentiate}{Flag indicating whether to exponentiate the coefficient estimates and confidence intervals (typical for logistic regression).} \item{x}{An object of class \code{mipo}} \item{z}{Data frame with a tidied version of a coefficient matrix} } \value{ The \code{summary} method returns a data frame with summary statistics of the pooled analysis. } \description{ The \code{mipo} object contains the results of the pooling step. The function \code{\link{pool}} generates an object of class \code{mipo}. } \details{ An object class \code{mipo} is a \code{list} with three elements: \code{call}, \code{m} and \code{pooled}. The \code{pooled} elements is a data frame with columns: \tabular{ll}{ \code{estimate}\tab Pooled complete data estimate\cr \code{ubar} \tab Within-imputation variance of \code{estimate}\cr \code{b} \tab Between-imputation variance of \code{estimate}\cr \code{t} \tab Total variance, of \code{estimate}\cr \code{dfcom} \tab Degrees of freedom in complete data\cr \code{df} \tab Degrees of freedom of $t$-statistic\cr \code{riv} \tab Relative increase in variance\cr \code{lambda} \tab Proportion attributable to the missingness\cr \code{fmi} \tab Fraction of missing information\cr } The names of the terms are stored as \code{row.names(pooled)}. The \code{process_mipo} is a helper function to process a tidied mipo object, and is normally not called directly. It adds a confidence interval, and optionally exponentiates, the result. } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{pool}}, \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} } \keyword{classes} \keyword{internal} mice/man/fdd.Rd0000644000176200001440000000775713416661213013011 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fdd.R \docType{data} \name{fdd} \alias{fdd} \alias{fdd.pred} \title{SE Fireworks disaster data} \format{\code{fdd} is a data frame with 52 rows and 65 columns: \describe{ \item{id}{Client number} \item{trt}{Treatment (E=EMDR, C=CBT)} \item{pp}{Per protocol (Y/N)} \item{trtp}{Number of parental treatments} \item{sex}{Sex: M/F} \item{etn}{Ethnicity: NL/OTHER} \item{age}{Age (years)} \item{trauma}{Trauma count (1-5)} \item{prop1}{PROPS total score T1} \item{prop2}{PROPS total score T2} \item{prop3}{PROPS total score T3} \item{crop1}{CROPS total score T1} \item{crop2}{CROPS total score T2} \item{crop3}{CROPS total score T3} \item{masc1}{MASC score T1} \item{masc2}{MASC score T2} \item{masc3}{MASC score T3} \item{cbcl1}{CBCL T1} \item{cbcl3}{CBCL T3} \item{prs1}{PRS total score T1} \item{prs2}{PRS total score T2} \item{prs3}{PRS total score T3} \item{ypa1}{PTSD-RI B intrusive recollection parent T1} \item{ypb1}{PTSD-RI C avoidant/numbing parent T1} \item{ypc1}{PTSD-RI D hyper-arousal parent T1} \item{yp1}{PTSD-RI B+C+D parent T1} \item{ypa2}{PTSD-RI B intrusive recollection parent T2} \item{ypb2}{PTSD-RI C avoidant/numbing parent T2} \item{ypc2}{PTSD-RI D hyper-arousal parent T2} \item{yp2}{PTSD-RI B+C+D parent T1} \item{ypa3}{PTSD-RI B intrusive recollection parent T3} \item{ypb3}{PTSD-RI C avoidant/numbing parent T3} \item{ypc3}{PTSD-RI D hyper-arousal parent T3} \item{yp3}{PTSD-RI B+C+D parent T3} \item{yca1}{PTSD-RI B intrusive recollection child T1} \item{ycb1}{PTSD-RI C avoidant/numbing child T1} \item{ycc1}{PTSD-RI D hyper-arousal child T1} \item{yc1}{PTSD-RI B+C+D child T1} \item{yca2}{PTSD-RI B intrusive recollection child T2} \item{ycb2}{PTSD-RI C avoidant/numbing child T2} \item{ycc2}{PTSD-RI D hyper-arousal child T2} \item{yc2}{PTSD-RI B+C+D child T2} \item{yca3}{PTSD-RI B intrusive recollection child T3} \item{ycb3}{PTSD-RI C avoidant/numbing child T3} \item{ycc3}{PTSD-RI D hyper-arousal child T3} \item{yc3}{PTSD-RI B+C+D child T3} \item{ypf1}{PTSD-RI parent full T1} \item{ypf2}{PTSD-RI parent full T2} \item{ypf3}{PTSD-RI parent full T3} \item{ypp1}{PTSD parent partial T1} \item{ypp2}{PTSD parent partial T2} \item{ypp3}{PTSD parent partial T3} \item{ycf1}{PTSD child full T1} \item{ycf2}{PTSD child full T2} \item{ycf3}{PTSD child full T3} \item{ycp1}{PTSD child partial T1} \item{ycp2}{PTSD child partial T2} \item{ycp3}{PTSD child partial T3} \item{cbin1}{CBCL Internalizing T1} \item{cbin3}{CBCL Internalizing T3} \item{cbex1}{CBCL Externalizing T1} \item{cbex3}{CBCL Externalizing T3} \item{bir1}{Birlison T1} \item{bir2}{Birlison T2} \item{bir3}{Birlison T3} } \code{fdd.pred} is the 65 by 65 binary predictor matrix used to impute \code{fdd}.} \source{ de Roos, C., Greenwald, R., den Hollander-Gijsman, M., Noorthoorn, E., van Buuren, S., de Jong, A. (2011). A Randomised Comparison of Cognitive Behavioral Therapy (CBT) and Eye Movement Desensitisation and Reprocessing (EMDR) in disaster-exposed children. \emph{European Journal of Psychotraumatology}, \emph{2}, 5694. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-fdd.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Boca Raton, FL.: Chapman & Hall/CRC Press. } \description{ Multiple outcomes of a randomized study to reduce post-traumatic stress. } \details{ Data from a randomized experiment to reduce post-traumatic stress by two treatments: Eye Movement Desensitization and Reprocessing (EMDR) (experimental treatment), and cognitive behavioral therapy (CBT) (control treatment). 52 children were randomized to one of these two treatments. Outcomes were measured at three time points: at baseline (pre-treatment, T1), post-treatment (T2, 4-8 weeks), and at follow-up (T3, 3 months). For more details, see de Roos et al (2011). Some person covariates were reshuffled. The imputation methodology is explained in Chapter 9 of van Buuren (2012). } \examples{ data <- fdd md.pattern(fdd) } \keyword{datasets} mice/man/mids2spss.Rd0000644000176200001440000000604413574715125014176 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mids2spss.R \name{mids2spss} \alias{mids2spss} \title{Export \code{mids} object to SPSS} \usage{ mids2spss( imp, filedat = "midsdata.txt", filesps = "readmids.sps", path = getwd(), sep = "\\t", dec = ".", silent = FALSE ) } \arguments{ \item{imp}{The \code{imp} argument is an object of class \code{mids}, typically produced by the \code{mice()} function.} \item{filedat}{A character string describing the name of the output data file.} \item{filesps}{A character string describing the name of the output syntax file.} \item{path}{A character string containing the path of the output file. The value in \code{path} is appended to \code{filedat} and \code{filesps}. By default, files are written to the current \code{R} working directory. If \code{path=NULL} then no file path appending is done.} \item{sep}{The separator between the data fields.} \item{dec}{The decimal separator for numerical data.} \item{silent}{A logical flag stating whether the names of the files should be printed.} } \value{ The return value is \code{NULL}. } \description{ Converts a \code{mids} object into a format recognized by SPSS, and writes the data and the SPSS syntax files. } \details{ This function automates most of the work needed to export a \code{mids} object to SPSS. It uses a modified version of \code{writeForeignSPSS()} from the \code{foreign} package. The modified version allows for a choice of the field and decimal separators, and makes some improvements to the formatting, so that the generated syntax file is amenable to the \code{INCLUDE} statement in SPSS. Below are some things to pay attention to. The \code{SPSS} syntax file has the proper file names and separators set, so in principle it should run and read the data without alteration. \code{SPSS} is more strict than \code{R} with respect to the paths. Always use the full path, otherwise \code{SPSS} may not be able to find the data file. Factors in \code{R} translate into categorical variables in \code{SPSS}. The internal coding of factor levels used in \code{R} is exported. This is generally acceptable for \code{SPSS}. However, when the data are to be combined with existing \code{SPSS} data, watch out for any changes in the factor levels codes. The \code{read.spss()} in package \code{foreign} for reading \code{.sav} uses its own internal numbering scheme \code{1,2,3,...} for the levels of a factor. Consequently, changes in factor code can cause discrepancies in factor level when re-imported to \code{SPSS}. The solution is to manually recode the factor level in \code{SPSS}. \code{SPSS} will recognize the data set as a multiply imputed data set, and do automatic pooling in procedures where that is supported. Note however that pooling is an extra option only available to those who license the \code{MISSING VALUES} module. Without this license, \code{SPSS} will still recognize the structure of the data, but not do any pooling. } \seealso{ \code{\link[=mids-class]{mids}} } \author{ Stef van Buuren, dec 2010. } \keyword{manip} mice/man/mice.impute.mean.Rd0000644000176200001440000000463313620753345015405 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.mean.R \name{mice.impute.mean} \alias{mice.impute.mean} \title{Imputation by the mean} \usage{ mice.impute.mean(y, ry, x = NULL, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes the arithmetic mean of the observed data } \section{Warning}{ Imputing the mean of a variable is almost never appropriate. See Little and Rubin (2002, p. 61-62) or Van Buuren (2012, p. 10-11) } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Little, R.J.A. and Rubin, D.B. (2002). Statistical Analysis with Missing Data. New York: John Wiley and Sons. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-simplesolutions.html#sec:meanimp}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice}}, \code{\link{mean}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.cart.Rd0000644000176200001440000000654513620753345015422 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.cart.R \name{mice.impute.cart} \alias{mice.impute.cart} \alias{cart} \title{Imputation by classification and regression trees} \usage{ mice.impute.cart(y, ry, x, wy = NULL, minbucket = 5, cp = 1e-04, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{minbucket}{The minimum number of observations in any terminal node used. See \code{\link{rpart.control}} for details.} \item{cp}{Complexity parameter. Any split that does not decrease the overall lack of fit by a factor of cp is not attempted. See \code{\link{rpart.control}} for details.} \item{...}{Other named arguments passed down to \code{rpart()}.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} Numeric vector of length \code{sum(!ry)} with imputations } \description{ Imputes univariate missing data using classification and regression trees. } \details{ Imputation of \code{y} by classification and regression trees. The procedure is as follows: \enumerate{ \item Fit a classification or regression tree by recursive partitioning; \item For each \code{ymis}, find the terminal node they end up according to the fitted tree; \item Make a random draw among the member in the node, and take the observed value from that draw as the imputation. } } \examples{ require(rpart) imp <- mice(nhanes2, meth = 'cart', minbucket = 4) plot(imp) } \references{ Doove, L.L., van Buuren, S., Dusseldorp, E. (2014), Recursive partitioning for missing data imputation in the presence of interaction Effects. Computational Statistics \& Data Analysis, 72, 92-104. Breiman, L., Friedman, J. H., Olshen, R. A., and Stone, C. J. (1984), Classification and regression trees, Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-cart.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice}}, \code{\link{mice.impute.rf}}, \code{\link[rpart]{rpart}}, \code{\link[rpart]{rpart.control}} Other univariate imputation functions: \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Lisa Doove, Stef van Buuren, Elise Dusseldorp, 2012 } \concept{univariate imputation functions} \keyword{datagen} mice/man/flux.Rd0000644000176200001440000000536513416657163013234 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/flux.R \name{flux} \alias{flux} \title{Influx and outflux of multivariate missing data patterns} \usage{ flux(data, local = names(data)) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as NA's.} \item{local}{A vector of names of columns of \code{data}. The default is to include all columns in the calculations.} } \value{ A data frame with \code{ncol(data)} rows and six columns: pobs = Proportion observed, influx = Influx outflux = Outflux ainb = Average inbound statistic aout = Average outbound statistic fico = Fraction of incomplete cases among cases with \code{Yj} observed } \description{ Influx and outflux are statistics of the missing data pattern. These statistics are useful in selecting predictors that should go into the imputation model. } \details{ Infux and outflux have been proposed by Van Buuren (2012), chapter 4. Influx is equal to the number of variable pairs \code{(Yj , Yk)} with \code{Yj} missing and \code{Yk} observed, divided by the total number of observed data cells. Influx depends on the proportion of missing data of the variable. Influx of a completely observed variable is equal to 0, whereas for completely missing variables we have influx = 1. For two variables with the same proportion of missing data, the variable with higher influx is better connected to the observed data, and might thus be easier to impute. Outflux is equal to the number of variable pairs with \code{Yj} observed and \code{Yk} missing, divided by the total number of incomplete data cells. Outflux is an indicator of the potential usefulness of \code{Yj} for imputing other variables. Outflux depends on the proportion of missing data of the variable. Outflux of a completely observed variable is equal to 1, whereas outflux of a completely missing variable is equal to 0. For two variables having the same proportion of missing data, the variable with higher outflux is better connected to the missing data, and thus potentially more useful for imputing other variables. FICO is an outbound statistic defined by the fraction of incomplete cases among cases with \code{Yj} observed (White and Carlin, 2010). } \references{ Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation compared with complete-case analysis for missing covariate values. \emph{Statistics in Medicine}, \emph{29}, 2920-2931. } \seealso{ \code{\link{fluxplot}}, \code{\link{md.pattern}}, \code{\link{fico}} } \author{ Stef van Buuren, 2012 } \keyword{misc} mice/man/make.predictorMatrix.Rd0000644000176200001440000000200613416657163016337 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predictorMatrix.R \name{make.predictorMatrix} \alias{make.predictorMatrix} \title{Creates a \code{predictorMatrix} argument} \usage{ make.predictorMatrix(data, blocks = make.blocks(data)) } \arguments{ \item{data}{A \code{data.frame} with the source data} \item{blocks}{An optional specification for blocks of variables in the rows. The default assigns each variable in its own block.} } \value{ A matrix } \description{ This helper function creates a valid \code{predictMatrix}. The \code{predictorMatrix} is an argument to the \code{mice} function. It specifies the target variable or block in the rows, and the predictor variables on the columns. An entry of \code{0} means that the column variable is NOT used to impute the row variable or block. A nonzero value indicates that it is used. } \examples{ make.predictorMatrix(nhanes) make.predictorMatrix(nhanes, blocks = make.blocks(nhanes, "collect")) } \seealso{ \code{\link{make.blocks}} } mice/man/norm.draw.Rd0000644000176200001440000000261613416664706014162 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.R \name{norm.draw} \alias{norm.draw} \alias{.norm.draw} \title{Draws values of beta and sigma by Bayesian linear regression} \usage{ norm.draw(y, ry, x, rank.adjust = TRUE, ...) .norm.draw(y, ry, x, rank.adjust = TRUE, ...) } \arguments{ \item{y}{Incomplete data vector of length \code{n}} \item{ry}{Vector of missing data pattern (\code{FALSE}=missing, \code{TRUE}=observed)} \item{x}{Matrix (\code{n} x \code{p}) of complete covariates.} \item{rank.adjust}{Argument that specifies whether \code{NA}'s in the coefficients need to be set to zero. Only relevant when \code{ls.meth = "qr"} AND the predictor matrix is rank-deficient.} \item{...}{Other named arguments.} } \value{ A \code{list} containing components \code{coef} (least squares estimate), \code{beta} (drawn regression weights) and \code{sigma} (drawn value of the residual standard deviation). } \description{ This function draws random values of beta and sigma under the Bayesian linear regression model as described in Rubin (1987, p. 167). This function can be called by user-specified imputation functions. } \references{ Rubin, D.B. (1987). \emph{Multiple imputation for nonresponse in surveys}. New York: Wiley. } \author{ Gerko Vink, 2018, for this version, based on earlier versions written by Stef van Buuren, Karin Groothuis-Oudshoorn, 2017 } mice/man/fluxplot.Rd0000644000176200001440000000652513574715125014130 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/flux.R \name{fluxplot} \alias{fluxplot} \title{Fluxplot of the missing data pattern} \usage{ fluxplot( data, local = names(data), plot = TRUE, labels = TRUE, xlim = c(0, 1), ylim = c(0, 1), las = 1, xlab = "Influx", ylab = "Outflux", main = paste("Influx-outflux pattern for", deparse(substitute(data))), eqscplot = TRUE, pty = "s", lwd = 1, ... ) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as NA's.} \item{local}{A vector of names of columns of \code{data}. The default is to include all columns in the calculations.} \item{plot}{Should a graph be produced?} \item{labels}{Should the points be labeled?} \item{xlim}{See \code{par}.} \item{ylim}{See \code{par}.} \item{las}{See \code{par}.} \item{xlab}{See \code{par}.} \item{ylab}{See \code{par}.} \item{main}{See \code{par}.} \item{eqscplot}{Should a square plot be produced?} \item{pty}{See \code{par}.} \item{lwd}{See \code{par}. Controls axis line thickness and diagonal} \item{\dots}{Further arguments passed to \code{plot()} or \code{eqscplot()}.} } \value{ An invisible data frame with \code{ncol(data)} rows and six columns: pobs = Proportion observed, influx = Influx outflux = Outflux ainb = Average inbound statistic aout = Average outbound statistic fico = Fraction of incomplete cases among cases with \code{Yj} observed } \description{ Influx and outflux are statistics of the missing data pattern. These statistics are useful in selecting predictors that should go into the imputation model. } \details{ Infux and outflux have been proposed by Van Buuren (2012), chapter 4. Influx is equal to the number of variable pairs \code{(Yj , Yk)} with \code{Yj} missing and \code{Yk} observed, divided by the total number of observed data cells. Influx depends on the proportion of missing data of the variable. Influx of a completely observed variable is equal to 0, whereas for completely missing variables we have influx = 1. For two variables with the same proportion of missing data, the variable with higher influx is better connected to the observed data, and might thus be easier to impute. Outflux is equal to the number of variable pairs with \code{Yj} observed and \code{Yk} missing, divided by the total number of incomplete data cells. Outflux is an indicator of the potential usefulness of \code{Yj} for imputing other variables. Outflux depends on the proportion of missing data of the variable. Outflux of a completely observed variable is equal to 1, whereas outflux of a completely missing variable is equal to 0. For two variables having the same proportion of missing data, the variable with higher outflux is better connected to the missing data, and thus potentially more useful for imputing other variables. } \references{ Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation compared with complete-case analysis for missing covariate values. \emph{Statistics in Medicine}, \emph{29}, 2920-2931. } \seealso{ \code{\link{flux}}, \code{\link{md.pattern}}, \code{\link{fico}} } \author{ Stef van Buuren, 2012 } \keyword{misc} mice/man/mice.impute.2l.bin.Rd0000644000176200001440000000510413617311060015531 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2l.bin.R \name{mice.impute.2l.bin} \alias{mice.impute.2l.bin} \title{Imputation by a two-level logistic model using \code{glmer}} \usage{ mice.impute.2l.bin(y, ry, x, type, wy = NULL, intercept = TRUE, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. Random variables are identified by a '2'. The class variable (only one is allowed) is coded as '-2'. Fixed effects are indicated by a '1'.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{intercept}{Logical determining whether the intercept is automatically added.} \item{\dots}{Arguments passed down to \code{glmer}} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate systematically and sporadically missing data using a two-level logistic model using \code{lme4::glmer()} } \details{ Data are missing systematically if they have not been measured, e.g., in the case where we combine data from different sources. Data are missing sporadically if they have been partially observed. } \examples{ library(tidyr) library(dplyr) data("toenail2") data <- tidyr::complete(toenail2, patientID, visit) \%>\% tidyr::fill(treatment) \%>\% dplyr::select(-time) \%>\% dplyr::mutate(patientID = as.integer(patientID)) \dontrun{ pred <- mice(data, print = FALSE, maxit = 0, seed = 1)$pred pred["outcome", "patientID"] <- -2 imp <- mice(data, method = "2l.bin", pred = pred, maxit = 1, m = 1, seed = 1) } } \references{ Jolani S., Debray T.P.A., Koffijberg H., van Buuren S., Moons K.G.M. (2015). Imputation of systematically missing predictors in an individual participant data meta-analysis: a generalized approach using MICE. \emph{Statistics in Medicine}, 34:1841-1863. } \seealso{ Other univariate-2l: \code{\link{mice.impute.2l.lmer}()}, \code{\link{mice.impute.2l.norm}()}, \code{\link{mice.impute.2l.pan}()} } \author{ Shahab Jolani, 2015; adapted to mice, SvB, 2018 } \concept{univariate-2l} \keyword{datagen} mice/man/is.mira.Rd0000644000176200001440000000051513416657163013610 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mira} \alias{is.mira} \title{Check for \code{mira} object} \usage{ is.mira(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mira} } \description{ Check for \code{mira} object } mice/man/getfit.Rd0000644000176200001440000000204313416657163013526 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getfit.R \name{getfit} \alias{getfit} \title{Extract list of fitted model} \usage{ getfit(x, i = -1L, simplify = FALSE) } \arguments{ \item{x}{An object of class \code{mira} or \code{mitml.result}, typically produced by a call to \code{with()}.} \item{i}{An integer between 1 and \code{x$m} signaling the number of the repeated analysis. The default \code{i= -1} return a list with all analyses.} \item{simplify}{Should the return value be unlisted?} } \value{ If \code{i = -1} an object of class \code{mitml.result} containing all analyses, otherwise it returns the fitted object of the i'th repeated analysis. } \description{ \code{getfit} returns the list of objects containing the repeated analysis results, or optionally, one of these fit objects. } \examples{ imp <- mice(nhanes) fit <- with(imp, lm(bmi~chl+hyp)) getfit(fit) getfit(fit, 2) } \seealso{ \code{\link[=mira-class]{mira}}, \code{\link{with.mids}} } \author{ Stef van Buuren, March 2012. } \keyword{manip} mice/man/ampute.continuous.Rd0000644000176200001440000000411313416657163015744 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.continuous.R \name{ampute.continuous} \alias{ampute.continuous} \title{Multivariate Amputation Based On Continuous Probability Functions} \usage{ ampute.continuous(P, scores, prop, type) } \arguments{ \item{P}{A vector containing the pattern numbers of the cases's candidacies. For each case, a value between 1 and #patterns is given. For example, a case with value 2 is candidate for missing data pattern 2.} \item{scores}{A list containing vectors with the candidates's weighted sum scores, the result of an underlying function in \code{\link{ampute}}.} \item{prop}{A scalar specifying the proportion of missingness. Should be a value between 0 and 1. Default is a missingness proportion of 0.5.} \item{type}{A vector of strings containing the type of missingness for each pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or '\code{"RIGHT"}. If a single missingness type is entered, all patterns will be created by the same type. If missingness types should differ over patterns, a vector of missingness types should be entered. Default is RIGHT for all patterns and is the result of \code{\link{ampute.default.type}}.} } \value{ A list containing vectors with \code{0} if a case should be made missing and \code{1} if a case should remain complete. The first vector refers to the first pattern, the second vector to the second pattern, etcetera. } \description{ This function creates a missing data indicator for each pattern. The continuous probability distributions (Van Buuren, 2012, pp. 63, 64) will be induced on the weighted sum scores, calculated earlier in the multivariate amputation function \code{\link{ampute}}. } \references{ #'Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-linearnormal.html#sec:generateuni}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.type}} } \author{ Rianne Schouten [aut, cre], Gerko Vink [aut], Peter Lugtig [ctb], 2016 } \keyword{internal} mice/man/mice.impute.pmm.Rd0000644000176200001440000001303413620753345015251 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.pmm.R \name{mice.impute.pmm} \alias{mice.impute.pmm} \alias{pmm} \title{Imputation by predictive mean matching} \usage{ mice.impute.pmm( y, ry, x, wy = NULL, donors = 5L, matchtype = 1L, ridge = 1e-05, ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{donors}{The size of the donor pool among which a draw is made. The default is \code{donors = 5L}. Setting \code{donors = 1L} always selects the closest match, but is not recommended. Values between 3L and 10L provide the best results in most cases (Morris et al, 2015).} \item{matchtype}{Type of matching distance. The default choice (\code{matchtype = 1L}) calculates the distance between the \emph{predicted} value of \code{yobs} and the \emph{drawn} values of \code{ymis} (called type-1 matching). Other choices are \code{matchtype = 0L} (distance between predicted values) and \code{matchtype = 2L} (distance between drawn values).} \item{ridge}{The ridge penalty used in \code{.norm.draw()} to prevent problems with multicollinearity. The default is \code{ridge = 1e-05}, which means that 0.01 percent of the diagonal is added to the cross-product. Larger ridges may result in more biased estimates. For highly noisy data (e.g. many junk variables), set \code{ridge = 1e-06} or even lower to reduce bias. For highly collinear data, set \code{ridge = 1e-04} or higher.} \item{\dots}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Calculates imputations for univariate missing data by predictive mean matching. } \details{ Imputation of \code{y} by predictive mean matching, based on van Buuren (2012, p. 73). The procedure is as follows: \enumerate{ \item{Calculate the cross-product matrix \eqn{S=X_{obs}'X_{obs}}.} \item{Calculate \eqn{V = (S+{diag}(S)\kappa)^{-1}}, with some small ridge parameter \eqn{\kappa}.} \item{Calculate regression weights \eqn{\hat\beta = VX_{obs}'y_{obs}.}} \item{Draw \eqn{q} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_1}.} \item{Calculate \eqn{V^{1/2}} by Cholesky decomposition.} \item{Calculate \eqn{\dot\beta = \hat\beta + \dot\sigma\dot z_1 V^{1/2}}.} \item{Calculate \eqn{\dot\eta(i,j)=|X_{{obs},[i]|}\hat\beta-X_{{mis},[j]}\dot\beta} with \eqn{i=1,\dots,n_1} and \eqn{j=1,\dots,n_0}.} \item{Construct \eqn{n_0} sets \eqn{Z_j}, each containing \eqn{d} candidate donors, from Y_{obs} such that \eqn{\sum_d\dot\eta(i,j)} is minimum for all \eqn{j=1,\dots,n_0}. Break ties randomly.} \item{Draw one donor \eqn{i_j} from \eqn{Z_j} randomly for \eqn{j=1,\dots,n_0}.} \item{Calculate imputations \eqn{\dot y_j = y_{i_j}} for \eqn{j=1,\dots,n_0}.} } The name \emph{predictive mean matching} was proposed by Little (1988). } \examples{ # We normally call mice.impute.pmm() from within mice() # But we may call it directly as follows (not recommended) set.seed(53177) xname <- c('age', 'hgt', 'wgt') r <- stats::complete.cases(boys[, xname]) x <- boys[r, xname] y <- boys[r, 'tv'] ry <- !is.na(y) table(ry) # percentage of missing data in tv sum(!ry) / length(ry) # Impute missing tv data yimp <- mice.impute.pmm(y, ry, x) length(yimp) hist(yimp, xlab = 'Imputed missing tv') # Impute all tv data yimp <- mice.impute.pmm(y, ry, x, wy = rep(TRUE, length(y))) length(yimp) hist(yimp, xlab = 'Imputed missing and observed tv') plot(jitter(y), jitter(yimp), main = 'Predictive mean matching on age, height and weight', xlab = 'Observed tv (n = 224)', ylab = 'Imputed tv (n = 224)') abline(0, 1) cor(y, yimp, use = 'pair') } \references{ Little, R.J.A. (1988), Missing data adjustments in large surveys (with discussion), Journal of Business Economics and Statistics, 6, 287--301. Morris TP, White IR, Royston P (2015). Tuning multiple imputation by predictive mean matching and local residual draws. BMC Med Res Methodol. ;14:75. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-pmm.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn } \concept{univariate imputation functions} \keyword{datagen} mice/man/getqbar.Rd0000644000176200001440000000047613416657163013701 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getfit.R \name{getqbar} \alias{getqbar} \title{Extract estimate from \code{mipo} object} \usage{ getqbar(x) } \arguments{ \item{x}{An object of class \code{mipo}} } \description{ \code{getqbar} returns a named vector of pooled estimates. } mice/man/mice.impute.norm.nob.Rd0000644000176200001440000000616213620753345016214 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.nob.R \name{mice.impute.norm.nob} \alias{mice.impute.norm.nob} \alias{norm.nob} \title{Imputation by linear regression without parameter uncertainty} \usage{ mice.impute.norm.nob(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using linear regression analysis without accounting for the uncertainty of the model parameters. } \details{ This function creates imputations using the spread around the fitted linear regression line of \code{y} given \code{x}, as fitted on the observed data. This function is provided mainly to allow comparison between proper (e.g., as implemented in \code{mice.impute.norm} and improper (this function) normal imputation methods. For large data, having many rows, differences between proper and improper methods are small, and in those cases one may opt for speed by using \code{mice.impute.norm.nob}. } \section{Warning}{ The function does not incorporate the variability of the regression weights, so it is not 'proper' in the sense of Rubin. For small samples, variability of the imputed data is therefore underestimated. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. } \seealso{ \code{\link{mice}}, \code{\link{mice.impute.norm}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Gerko Vink, Stef van Buuren, Karin Groothuis-Oudshoorn, 2018 } \concept{univariate imputation functions} \keyword{datagen} mice/man/make.post.Rd0000644000176200001440000000117613416657163014153 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/post.R \name{make.post} \alias{make.post} \title{Creates a \code{post} argument} \usage{ make.post(data) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} } \value{ Character vector of \code{ncol(data)} element } \description{ This helper function creates a valid \code{post} vector. The \code{post} vector is an argument to the \code{mice} function that specifies post-processing for a variable just after imputation. } \examples{ make.post(nhanes2) } \seealso{ \code{\link{mice}} } mice/man/windspeed.Rd0000644000176200001440000000262713416661213014225 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/windspeed.R \docType{data} \name{windspeed} \alias{windspeed} \title{Subset of Irish wind speed data} \format{A data frame with 433 rows and 6 columns containing the daily average wind speeds within the period 1961-1978 at meteorological stations in the Republic of Ireland. The data are a random sample from a larger data set. \describe{ \item{RochePt}{Roche Point} \item{Rosslare}{Rosslare} \item{Shannon}{Shannon} \item{Dublin}{Dublin} \item{Clones}{Clones} \item{MalinHead}{Malin Head} }} \description{ Subset of Irish wind speed data } \details{ The original data set is much larger and was analyzed in detail by Haslett and Raftery (1989). Van Buuren et al (2006) used this subset to investigate the influence of extreme MAR mechanisms on the quality of imputation. } \examples{ windspeed[1:3,] } \references{ Haslett, J. and Raftery, A. E. (1989). \emph{Space-time Modeling with Long-memory Dependence: Assessing Ireland's Wind Power Resource (with Discussion)}. Applied Statistics 38, 1-50. \url{http://lib.stat.cmu.edu/datasets/wind.desc} and \url{http://lib.stat.cmu.edu/datasets/wind.data} van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. } \keyword{datasets} mice/man/brandsma.Rd0000644000176200001440000000430313416657163014034 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/brandsma.R \docType{data} \name{brandsma} \alias{brandsma} \title{Brandsma school data used Snijders and Bosker (2012)} \format{\code{brandsma} is a data frame with 4106 rows and 14 columns: \describe{ \item{\code{sch}}{School number} \item{\code{pup}}{Pupil ID} \item{\code{iqv}}{IQ verbal} \item{\code{iqp}}{IQ performal} \item{\code{sex}}{Sex of pupil} \item{\code{ses}}{SES score of pupil} \item{\code{min}}{Minority member 0/1} \item{\code{rpg}}{Number of repeated groups, 0, 1, 2} \item{\code{lpr}}{language score PRE} \item{\code{lpo}}{language score POST} \item{\code{apr}}{Arithmetic score PRE} \item{\code{apo}}{Arithmetic score POST} \item{\code{den}}{Denomination classification 1-4 - at school level} \item{\code{ssi}}{School SES indicator - at school level} }} \source{ Constructed from \code{MLbook_2nded_total_4106-99.sav} from \url{https://www.stats.ox.ac.uk/~snijders/mlbook.htm} by function \code{data-raw/R/brandsma.R} } \description{ Dataset with raw data from Snijders and Bosker (2012) containing data from 4106 pupils attending 216 schools. This dataset includes all pupils and schools with missing data. } \note{ This dataset is constructed from the raw data. There are a few differences with the data set used in Chapter 4 and 5 of Snijders and Bosker: \enumerate{ \item All schools are included, including the five school with missing values on \code{langpost}. \item Missing \code{denomina} codes are left as missing. \item Aggregates are undefined in the presence of missing data in the underlying values. Variables \code{ses}, \code{iqv} and \code{iqp} are in their original scale, and not globally centered. No aggregate variables at the school level are included. \item There is a wider selection of original variables. Note however that the source data contain an even wider set of variables. } } \references{ Brandsma, HP and Knuver, JWM (1989), Effects of school and classroom characteristics on pupil progress in language and arithmetic. International Journal of Educational Research, 13(7), 777 - 788. Snijders, TAB and Bosker RJ (2012). Multilevel Analysis, 2nd Ed. Sage, Los Angeles, 2012. } \keyword{datasets} mice/man/ampute.discrete.Rd0000644000176200001440000000422313416657163015342 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.discrete.R \name{ampute.discrete} \alias{ampute.discrete} \title{Multivariate Amputation Based On Discrete Probability Functions} \usage{ ampute.discrete(P, scores, prop, odds) } \arguments{ \item{P}{A vector containing the pattern numbers of the cases's candidacies. For each case, a value between 1 and #patterns is given. For example, a case with value 2 is candidate for missing data pattern 2.} \item{scores}{A list containing vectors with the candidates's weighted sum scores, the result of an underlying function in \code{\link{ampute}}.} \item{prop}{A scalar specifying the proportion of missingness. Should be a value between 0 and 1. Default is a missingness proportion of 0.5.} \item{odds}{A matrix where #patterns defines the #rows. Each row should contain the odds of being missing for the corresponding pattern. The amount of odds values defines in how many quantiles the sum scores will be divided. The values are relative probabilities: a quantile with odds value 4 will have a probability of being missing that is four times higher than a quantile with odds 1. The #quantiles may differ between the patterns, specify NA for cells remaining empty. Default is 4 quantiles with odds values 1, 2, 3 and 4, the result of \code{\link{ampute.default.odds}}.} } \value{ A list containing vectors with \code{0} if a case should be made missing and \code{1} if a case should remain complete. The first vector refers to the first pattern, the second vector to the second pattern, etcetera. } \description{ This function creates a missing data indicator for each pattern. Odds probabilities (Brand, 1999, pp. 110-113) will be induced on the weighted sum scores, calculated earlier in the multivariate amputation function \code{\link{ampute}}. } \references{ Brand, J.P.L. (1999). \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} Dissertation. Rotterdam: Erasmus University. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.odds}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/mice.theme.Rd0000644000176200001440000000156713416657163014274 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.theme.R \name{mice.theme} \alias{mice.theme} \title{Set the theme for the plotting Trellis functions} \usage{ mice.theme(transparent = TRUE, alpha.fill = 0.3) } \arguments{ \item{transparent}{A logical indicating whether alpha-transparency is allowed. The default is \code{TRUE}.} \item{alpha.fill}{A numerical values between 0 and 1 that indicates the default alpha value for fills.} } \value{ \code{mice.theme()} returns a named list that can be used as a theme in the functions in \pkg{lattice}. By default, the \code{mice.theme()} function sets \code{transparent <- TRUE} if the current device \code{.Device} supports semi-transparent colors. } \description{ The \code{mice.theme()} function sets default choices for Trellis plots that are built into \pkg{mice}. } \author{ Stef van Buuren 2011 } mice/man/walking.Rd0000644000176200001440000000445613416661213013701 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/walking.R \docType{data} \name{walking} \alias{walking} \title{Walking disability data} \format{A data frame with 890 rows on the following 5 variables: \describe{ \item{sex}{Sex of respondent (factor)} \item{age}{Age of respondent} \item{YA}{Item administered in samples A and E (factor)} \item{YB}{Item administered in samples B and E (factor)} \item{src}{Source: Sample A, B or E (factor)} }} \description{ Two items YA and YB measuring walking disability in samples A, B and E. } \details{ Example dataset to demonstrate imputation of two items (YA and YB). Item YA is administered to sample A and sample E, item YB is administered to sample B and sample E, so sample E acts as a bridge study. Imputation using a bridge study is better than simple equating or than imputation under independence. Item YA corresponds to the HAQ8 item, and item YB corresponds to the GAR9 items from Van Buuren et al (2005). Sample E (as well as sample B) is the Euridiss study (n=292), sample A is the ERGOPLUS study (n=306). See Van Buuren (2012) chapter 7 for more details on the imputation methodology. } \examples{ md.pattern(walking) micemill <- function(n) { for (i in 1:n) { imp <<- mice.mids(imp) # global assignment cors <- with(imp, cor(as.numeric(YA), as.numeric(YB), method="kendall")) tau <<- rbind(tau, getfit(cors, s=TRUE)) # global assignment } } plotit <- function() matplot(x=1:nrow(tau),y=tau, ylab=expression(paste("Kendall's ",tau)), xlab="Iteration", type="l", lwd=1, lty=1:10,col="black") tau <- NULL imp <- mice(walking, max=0, m=10, seed=92786) pred <- imp$pred pred[,c("src","age","sex")] <- 0 imp <- mice(walking, max=0, m=3, seed=92786, pred=pred) micemill(5) plotit() ### to get figure 7.8 van Buuren (2012) use m=10 and micemill(20) } \references{ van Buuren, S., Eyres, S., Tennant, A., Hopman-Rock, M. (2005). Improving comparability of existing data by Response Conversion. \emph{Journal of Official Statistics}, \bold{21}(1), 53-72. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-codingsystems.html#sec:impbridge}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \keyword{datasets} mice/man/mice.impute.midastouch.Rd0000644000176200001440000001347713620753345016633 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.midastouch.R \name{mice.impute.midastouch} \alias{mice.impute.midastouch} \title{Imputation by predictive mean matching with distance aided donor selection} \usage{ mice.impute.midastouch( y, ry, x, wy = NULL, ridge = 1e-05, midas.kappa = NULL, outout = TRUE, neff = NULL, debug = NULL, ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{ridge}{The ridge penalty used in \code{.norm.draw()} to prevent problems with multicollinearity. The default is \code{ridge = 1e-05}, which means that 0.01 percent of the diagonal is added to the cross-product. Larger ridges may result in more biased estimates. For highly noisy data (e.g. many junk variables), set \code{ridge = 1e-06} or even lower to reduce bias. For highly collinear data, set \code{ridge = 1e-04} or higher.} \item{midas.kappa}{Scalar. If \code{NULL} (default) then the optimal \code{kappa} gets selected automatically. Alternatively, the user may specify a scalar. Siddique and Belin 2008 find \code{midas.kappa = 3} to be sensible.} \item{outout}{Logical. If \code{TRUE} (default) one model is estimated for each donor (leave-one-out principle). For speedup choose \code{outout = FALSE}, which estimates one model for all observations leading to in-sample predictions for the donors and out-of-sample predictions for the recipients. Mind the inappropriateness, though.} \item{neff}{FOR EXPERTS. Null or character string. The name of an existing environment in which the effective sample size of the donors for each loop (CE iterations times multiple imputations) is supposed to be written. The effective sample size is necessary to compute the correction for the total variance as originally suggested by Parzen, Lipsitz and Fitzmaurice 2005. The objectname is \code{midastouch.neff}.} \item{debug}{FOR EXPERTS. Null or character string. The name of an existing environment in which the input is supposed to be written. The objectname is \code{midastouch.inputlist}.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using predictive mean matching. } \details{ Imputation of \code{y} by predictive mean matching, based on Rubin (1987, p. 168, formulas a and b) and Siddique and Belin 2008. The procedure is as follows: \enumerate{ \item Draw a bootstrap sample from the donor pool. \item Estimate a beta matrix on the bootstrap sample by the leave one out principle. \item Compute type II predicted values for \code{yobs} (nobs x 1) and \code{ymis} (nmis x nobs). \item Calculate the distance between all \code{yobs} and the corresponding \code{ymis}. \item Convert the distances in drawing probabilities. \item For each recipient draw a donor from the entire pool while considering the probabilities from the model. \item Take its observed value in \code{y} as the imputation. } } \examples{ # do default multiple imputation on a numeric matrix imp <- mice(nhanes, method = 'midastouch') imp # list the actual imputations for BMI imp$imp$bmi # first completed data matrix complete(imp) # imputation on mixed data with a different method per column mice(nhanes2, method = c('sample', 'midastouch', 'logreg', 'norm')) } \references{ Gaffert, P., Meinfelder, F., Bosch V. (2015) Towards an MI-proper Predictive Mean Matching, Discussion Paper. \url{https://www.uni-bamberg.de/fileadmin/uni/fakultaeten/sowi_lehrstuehle/statistik/Personen/Dateien_Florian/properPMM.pdf} Little, R.J.A. (1988), Missing data adjustments in large surveys (with discussion), Journal of Business Economics and Statistics, 6, 287--301. Parzen, M., Lipsitz, S. R., Fitzmaurice, G. M. (2005), A note on reducing the bias of the approximate Bayesian bootstrap imputation variance estimator. Biometrika \bold{92}, 4, 971--974. Rubin, D.B. (1987), Multiple imputation for nonresponse in surveys. New York: Wiley. Siddique, J., Belin, T.R. (2008), Multiple imputation using an iterative hot-deck with distance-based donor selection. Statistics in medicine, \bold{27}, 1, 83--102 Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006), Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. Van Buuren, S., Groothuis-Oudshoorn, K. (2011), \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}, 3, 1--67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Philipp Gaffert, Florian Meinfelder, Volker Bosch 2015 } \concept{univariate imputation functions} \keyword{datagen} mice/man/boys.Rd0000644000176200001440000000524713617562135013226 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/boys.R \docType{data} \name{boys} \alias{boys} \title{Growth of Dutch boys} \format{A data frame with 748 rows on the following 9 variables: \describe{ \item{age}{Decimal age (0-21 years)} \item{hgt}{Height (cm)} \item{wgt}{Weight (kg)} \item{bmi}{Body mass index} \item{hc}{Head circumference (cm)} \item{gen}{Genital Tanner stage (G1-G5)} \item{phb}{Pubic hair (Tanner P1-P6)} \item{tv}{Testicular volume (ml)} \item{reg}{Region (north, east, west, south, city)} }} \source{ Fredriks, A.M,, van Buuren, S., Burgmeijer, R.J., Meulmeester JF, Beuker, R.J., Brugman, E., Roede, M.J., Verloove-Vanhorick, S.P., Wit, J.M. (2000) Continuing positive secular growth change in The Netherlands 1955-1997. \emph{Pediatric Research}, \bold{47}, 316-323. Fredriks, A.M., van Buuren, S., Wit, J.M., Verloove-Vanhorick, S.P. (2000). Body index measurements in 1996-7 compared with 1980. \emph{Archives of Disease in Childhood}, \bold{82}, 107-112. } \description{ Height, weight, head circumference and puberty of 748 Dutch boys. } \details{ Random sample of 10\% from the cross-sectional data used to construct the Dutch growth references 1997. Variables \code{gen} and \code{phb} are ordered factors. \code{reg} is a factor. } \examples{ # create two imputed data sets imp <- mice(boys, m=1, maxit=2) z <- complete(imp, 1) # create imputations for age <8yrs plot(z$age, z$gen, col=mdc(1:2)[1+is.na(boys$gen)], xlab = "Age (years)", ylab = "Tanner Stage Genital") # figure to show that the default imputation method does not impute BMI # consistently plot(z$bmi,z$wgt/(z$hgt/100)^2, col=mdc(1:2)[1+is.na(boys$bmi)], xlab = "Imputed BMI", ylab="Calculated BMI") # also, BMI distributions are somewhat different oldpar <- par(mfrow=c(1,2)) MASS::truehist(z$bmi[!is.na(boys$bmi)],h=1,xlim=c(10,30),ymax=0.25, col=mdc(1),xlab="BMI observed") MASS::truehist(z$bmi[is.na(boys$bmi)],h=1,xlim=c(10,30),ymax=0.25, col=mdc(2),xlab="BMI imputed") par(oldpar) # repair the inconsistency problem by passive imputation meth <- imp$meth meth["bmi"] <- "~I(wgt/(hgt/100)^2)" pred <- imp$predictorMatrix pred["hgt","bmi"] <- 0 pred["wgt","bmi"] <- 0 imp2 <- mice(boys, m=1, maxit=2, meth=meth, pred=pred) z2 <- complete(imp2, 1) # show that new imputations are consistent plot(z2$bmi,z2$wgt/(z2$hgt/100)^2, col=mdc(1:2)[1+is.na(boys$bmi)], ylab="Calculated BMI") # and compare distributions oldpar <- par(mfrow=c(1,2)) MASS::truehist(z2$bmi[!is.na(boys$bmi)],h=1,xlim=c(10,30),ymax=0.25,col=mdc(1), xlab="BMI observed") MASS::truehist(z2$bmi[is.na(boys$bmi)],h=1,xlim=c(10,30),ymax=0.25,col=mdc(2), xlab="BMI imputed") par(oldpar) } \keyword{datasets} mice/man/mice.impute.norm.predict.Rd0000644000176200001440000000605313620753345017067 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.predict.R \name{mice.impute.norm.predict} \alias{mice.impute.norm.predict} \alias{norm.predict} \title{Imputation by linear regression through prediction} \usage{ mice.impute.norm.predict(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes the "best value" according to the linear regression model, also known as \emph{regression imputation}. } \details{ Calculates regression weights from the observed data and returns predicted values to as imputations. This method is known as \emph{regression imputation}. } \section{Warning}{ THIS METHOD SHOULD NOT BE USED FOR DATA ANALYSIS. This method is seductive because it imputes the most likely value according to the model. However, it ignores the uncertainty of the missing values and artificially amplifies the relations between the columns of the data. Application of richer models having more parameters does not help to evade these issues. Stochastic regression methods, like \code{\link{mice.impute.pmm}} or \code{\link{mice.impute.norm}}, are generally preferred. At best, prediction can give reasonable estimates of the mean, especially if normality assumptions are plausible. See Little and Rubin (2002, p. 62-64) or Van Buuren (2012, p. 11-13, p. 45-46) for a discussion of this method. } \references{ Little, R.J.A. and Rubin, D.B. (2002). Statistical Analysis with Missing Data. New York: John Wiley and Sons. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-linearnormal.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Gerko Vink, Stef van Buuren, 2018 } \concept{univariate imputation functions} \keyword{datagen} mice/man/make.blots.Rd0000644000176200001440000000163713416657163014313 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/blots.R \name{make.blots} \alias{make.blots} \title{Creates a \code{blots} argument} \usage{ make.blots(data, blocks = make.blocks(data)) } \arguments{ \item{data}{A \code{data.frame} with the source data} \item{blocks}{An optional specification for blocks of variables in the rows. The default assigns each variable in its own block.} } \value{ A matrix } \description{ This helper function creates a valid \code{blots} object. The \code{blots} object is an argument to the \code{mice} function. The name \code{blots} is a contraction of blocks-dots. Through \code{blots}, the user can specify any additional arguments that are specifically passed down to the lowest level imputation function. } \examples{ make.predictorMatrix(nhanes) make.blots(nhanes, blocks = name.blocks(c("age", "hyp"), "xxx")) } \seealso{ \code{\link{make.blocks}} } mice/man/potthoffroy.Rd0000644000176200001440000000404713416661213014624 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/potthoffroy.R \docType{data} \name{potthoffroy} \alias{potthoffroy} \title{Potthoff-Roy data} \format{\code{tbs} is a data frame with 27 rows and 6 columns: \describe{ \item{id}{Person number} \item{sex}{Sex M/F} \item{d8}{Distance at age 8 years} \item{d10}{Distance at age 10 years} \item{d12}{Distance at age 12 years} \item{d14}{Distance at age 14 years} }} \source{ Potthoff, R. F., Roy, S. N. (1964). A generalized multivariate analysis of variance model usefully especially for growth curve problems. \emph{Biometrika}, \emph{51}(3), 313-326. Little, R. J. A., Rubin, D. B. (1987). \emph{Statistical Analysis with Missing Data.} New York: John Wiley & Sons. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/ex-ch-longitudinal.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Data from Potthoff-Roy (1964) with repeated measures on dental fissures. } \details{ This data set is the famous Potthoff-Roy data, used to demonstrate MANOVA on repeated measure data. Potthoff and Roy (1964) published classic data on a study in 16 boys and 11 girls, who at ages 8, 10, 12, and 14 had the distance (mm) from the center of the pituitary gland to the pteryomaxillary fissure measured. Changes in pituitary-pteryomaxillary distances during growth is important in orthodontic therapy. The goals of the study were to describe the distance in boys and girls as simple functions of age, and then to compare the functions for boys and girls. The data have been reanalyzed by many authors including Jennrich and Schluchter (1986), Little and Rubin (1987), Pinheiro and Bates (2000), Verbeke and Molenberghs (2000) and Molenberghs and Kenward (2007). See Chapter 9 of Van Buuren (2012) for a challenging exercise using these data. } \examples{ ### create missing values at age 10 as in Little and Rubin (1987) phr <- potthoffroy idmis <- c(3,6,9,10,13,16,23,24,27) phr[idmis, 4] <- NA phr md.pattern(phr) } \keyword{datasets} mice/man/mice.impute.quadratic.Rd0000644000176200001440000001013013620753345016427 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.quadratic.R \name{mice.impute.quadratic} \alias{mice.impute.quadratic} \alias{quadratic} \title{Imputation of quadratic terms} \usage{ mice.impute.quadratic(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes incomplete variable that appears as both main effect and quadratic effect in the complete-data model. } \details{ This function implements the "polynomial combination" method. First, the polynomial combination \eqn{Z = Y \beta_1 + Y^2 \beta_2} is formed. \eqn{Z} is imputed by predictive mean matching, followed by a decomposition of the imputed data \eqn{Z} into components \eqn{Y} and \eqn{Y^2}. See Van Buuren (2012, pp. 139-141) and Vink et al (2012) for more details. The method ensures that 1) the imputed data for \eqn{Y} and \eqn{Y^2} are mutually consistent, and 2) that provides unbiased estimates of the regression weights in a complete-data linear regression that use both \eqn{Y} and \eqn{Y^2}. } \note{ There are two situations to consider. If only the linear term \code{Y} is present in the data, calculate the quadratic term \code{YY} after imputation. If both the linear term \code{Y} and the the quadratic term \code{YY} are variables in the data, then first impute \code{Y} by calling \code{mice.impute.quadratic()} on \code{Y}, and then impute \code{YY} by passive imputation as \code{meth["YY"] <- "~I(Y^2)"}. See example section for details. Generally, we would like \code{YY} to be present in the data if we need to preserve quadratic relations between \code{YY} and any third variables in the multivariate incomplete data that we might wish to impute. } \examples{ require(lattice) # Create Data B1 = .5 B2 = .5 X <- rnorm(1000) XX <- X^2 e <- rnorm(1000, 0, 1) Y <- B1 * X + B2 * XX + e dat <- data.frame(x = X, xx = XX, y = Y) # Impose 25 percent MCAR Missingness dat[0 == rbinom(1000, 1, 1 -.25), 1:2] <- NA # Prepare data for imputation ini <- mice(dat, maxit = 0) meth <- c("quadratic", "~I(x^2)", "") pred <- ini$pred pred[, "xx"] <- 0 # Impute data imp <- mice(dat, meth = meth, pred = pred) # Pool results pool(with(imp, lm(y ~ x + xx))) # Plot results stripplot(imp) plot(dat$x, dat$xx, col = mdc(1), xlab = "x", ylab = "xx") cmp <- complete(imp) points(cmp$x[is.na(dat$x)], cmp$xx[is.na(dat$x)], col = mdc(2)) } \seealso{ \code{\link{mice.impute.pmm}} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-knowledge.html#sec:quadratic}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Vink, G., van Buuren, S. (2013). Multiple Imputation of Squared Terms. \emph{Sociological Methods & Research}, 42:598-607. Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Gerko Vink (University of Utrecht), \email{g.vink@uu.nl} } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.jomoImpute.Rd0000644000176200001440000000636113574715125016617 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.jomoImpute.R \name{mice.impute.jomoImpute} \alias{mice.impute.jomoImpute} \title{Multivariate multilevel imputation using \code{jomo}} \usage{ mice.impute.jomoImpute( data, formula, type, m = 1, silent = TRUE, format = "imputes", ... ) } \arguments{ \item{data}{A data frame containing incomplete and auxiliary variables, the cluster indicator variable, and any other variables that should be present in the imputed datasets.} \item{formula}{A formula specifying the role of each variable in the imputation model. The basic model is constructed by \code{model.matrix}, thus allowing to include derived variables in the imputation model using \code{I()}. See \code{\link[mitml]{jomoImpute}}.} \item{type}{An integer vector specifying the role of each variable in the imputation model (see \code{\link[mitml]{jomoImpute}})} \item{m}{The number of imputed data sets to generate. Default is to 10.} \item{silent}{(optional) Logical flag indicating if console output should be suppressed. Default is to \code{FALSE}.} \item{format}{A character vector specifying the type of object that should be returned. The default is \code{format = "list"}. No other formats are currently supported.} \item{...}{Other named arguments: \code{n.burn}, \code{n.iter}, \code{group}, \code{prior}, \code{silent} and others.} } \value{ A list of imputations for all incomplete variables in the model, that can be stored in the the \code{imp} component of the \code{mids} object. } \description{ This function is a wrapper around the \code{jomoImpute} function from the \code{mitml} package so that it can be called to impute blocks of variables in \code{mice}. The \code{mitml::jomoImpute} function provides an interface to the \code{jomo} package for multiple imputation of multilevel data \url{https://CRAN.R-project.org/package=jomo}. Imputations can be generated using \code{type} or \code{formula}, which offer different options for model specification. } \note{ The number of imputations \code{m} is set to 1, and the function is called \code{m} times so that it fits within the \code{mice} iteration scheme. This is a multivariate imputation function using a joint model. } \examples{ \donttest{ # Note: Requires mitml 0.3-5.7 blocks <- list(c("bmi", "chl", "hyp"), "age") method <- c("jomoImpute", "pmm") ini <- mice(nhanes, blocks = blocks, method = method, maxit = 0) pred <- ini$pred pred["B1", "hyp"] <- -2 imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1) } } \references{ Grund S, Luedtke O, Robitzsch A (2016). Multiple Imputation of Multilevel Missing Data: An Introduction to the R Package \code{pan}. SAGE Open. Quartagno M and Carpenter JR (2015). Multiple imputation for IPD meta-analysis: allowing for heterogeneity and studies with missing covariates. Statistics in Medicine, 35:2938-2954, 2015. } \seealso{ \code{\link[mitml]{jomoImpute}} Other multivariate-2l: \code{\link{mice.impute.panImpute}()} } \author{ Stef van Buuren, 2018, building on work of Simon Grund, Alexander Robitzsch and Oliver Luedtke (authors of \code{mitml} package) and Quartagno and Carpenter (authors of \code{jomo} package). } \concept{multivariate-2l} \keyword{datagen} mice/man/ifdo.Rd0000644000176200001440000000065213416657163013171 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/auxiliary.R \name{ifdo} \alias{ifdo} \title{Conditional imputation helper} \usage{ ifdo(cond, action) } \arguments{ \item{cond}{a condition} \item{action}{the action to do} } \value{ Currently returns an error message. } \description{ Sorry, the \code{ifdo()} function is not yet implemented. } \author{ Stef van Buuren, 2012 } \keyword{internal} mice/man/ampute.default.odds.Rd0000644000176200001440000000154713416657163016122 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.odds} \alias{ampute.default.odds} \title{Default \code{odds} in \code{ampute()}} \usage{ ampute.default.odds(patterns) } \arguments{ \item{patterns}{A matrix of size #patterns by #variables where 0 indicates a variable should have missing values and 1 indicates a variable should remain complete. Could be the result of \code{\link{ampute.default.patterns}}.} } \value{ A matrix where #rows equals #patterns. Default is 4 quantiles with odds values 1, 2, 3 and 4, for each pattern, imitating a RIGHT type of missingness. } \description{ Defines the default odds matrix for the multivariate amputation function \code{ampute}. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.patterns}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/print.Rd0000644000176200001440000000153113416657163013401 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print.R \name{print.mids} \alias{print.mids} \alias{print.mira} \alias{print.mice.anova} \alias{print.mice.anova.summary} \title{Print a \code{mids} object} \usage{ \method{print}{mids}(x, ...) \method{print}{mira}(x, ...) \method{print}{mice.anova}(x, ...) \method{print}{mice.anova.summary}(x, ...) } \arguments{ \item{x}{Object of class \code{mids}, \code{mira} or \code{mipo}} \item{...}{Other parameters passed down to \code{print.default()}} } \value{ \code{NULL} \code{NULL} \code{NULL} \code{NULL} } \description{ Print a \code{mids} object Print a \code{mira} object Print a \code{mice.anova} object Print a \code{summary.mice.anova} object } \seealso{ \code{\link[=mids-class]{mids}} \code{\link[=mira-class]{mira}} \code{\link{mipo}} \code{\link{mipo}} } mice/man/mids2mplus.Rd0000644000176200001440000000317613574715125014351 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mids2mplus.R \name{mids2mplus} \alias{mids2mplus} \title{Export \code{mids} object to Mplus} \usage{ mids2mplus( imp, file.prefix = "imp", path = getwd(), sep = "\\t", dec = ".", silent = FALSE ) } \arguments{ \item{imp}{The \code{imp} argument is an object of class \code{mids}, typically produced by the \code{mice()} function.} \item{file.prefix}{A character string describing the prefix of the output data files.} \item{path}{A character string containing the path of the output file. By default, files are written to the current \code{R} working directory.} \item{sep}{The separator between the data fields.} \item{dec}{The decimal separator for numerical data.} \item{silent}{A logical flag stating whether the names of the files should be printed.} } \value{ The return value is \code{NULL}. } \description{ Converts a \code{mids} object into a format recognized by Mplus, and writes the data and the Mplus input files } \details{ This function automates most of the work needed to export a \code{mids} object to \code{Mplus}. The function writes the multiple imputation datasets, the file that contains the names of the multiple imputation data sets and an \code{Mplus} input file. The \code{Mplus} input file has the proper file names, so in principle it should run and read the data without alteration. \code{Mplus} will recognize the data set as a multiply imputed data set, and do automatic pooling in procedures where that is supported. } \seealso{ \code{\link[=mids-class]{mids}}, \code{\link{mids2spss}} } \author{ Gerko Vink, 2011. } \keyword{manip} mice/man/ampute.mcar.Rd0000644000176200001440000000302413416657163014460 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.mcar.R \name{ampute.mcar} \alias{ampute.mcar} \title{Multivariate Amputation In A MCAR Manner} \usage{ ampute.mcar(P, patterns, prop) } \arguments{ \item{P}{A vector containing the pattern numbers of the cases's candidacies. For each case, a value between 1 and #patterns is given. For example, a case with value 2 is candidate for missing data pattern 2.} \item{patterns}{A matrix of size #patterns by #variables where \code{0} indicates a variable should have missing values and \code{1} indicates a variable should remain complete. The user may specify as many patterns as desired. One pattern (a vector) is also possible. Could be the result of \code{\link{ampute.default.patterns}}, default will be a square matrix of size #variables where each pattern has missingness on one variable only.} \item{prop}{A scalar specifying the proportion of missingness. Should be a value between 0 and 1. Default is a missingness proportion of 0.5.} } \value{ A list containing vectors with \code{0} if a case should be made missing and \code{1} if a case should remain complete. The first vector refers to the first pattern, the second vector to the second pattern, etcetera. } \description{ This function creates a missing data indicator for each pattern, based on a MCAR missingness mechanism. The function is used in the multivariate amputation function \code{\link{ampute}}. } \seealso{ \code{\link{ampute}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/make.where.Rd0000644000176200001440000000164713416657163014303 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/where.R \name{make.where} \alias{make.where} \title{Creates a \code{where} argument} \usage{ make.where(data, keyword = c("missing", "all", "none", "observed")) } \arguments{ \item{data}{A \code{data.frame} with the source data} \item{keyword}{An optional keyword, one of \code{"missing"} (missing values are imputed), \code{"observed"} (observed values are imputed), \code{"all"} and \code{"none"}. The default is \code{keyword = "missing"}} } \value{ A matrix with logical } \description{ This helper function creates a valid \code{where} matrix. The \code{where} matrix is an argument to the \code{mice} function. It has the same size as \code{data} and specifies which values are to be imputed (\code{TRUE}) or nor (\code{FALSE}). } \examples{ head(make.where(nhanes), 3) } \seealso{ \code{\link{make.blocks}}, \code{\link{make.predictorMatrix}} } mice/man/bwplot.mads.Rd0000644000176200001440000000431013617553123014467 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bwplot.mads.R \name{bwplot.mads} \alias{bwplot.mads} \title{Box-and-whisker plot of amputed and non-amputed data} \usage{ \method{bwplot}{mads}( x, data, which.pat = NULL, standardized = TRUE, descriptives = TRUE, layout = NULL, ... ) } \arguments{ \item{x}{A \code{mads} (\code{\link{mads-class}}) object, typically created by \code{\link{ampute}}.} \item{data}{A string or vector of variable names that needs to be plotted. As a default, all variables will be plotted.} \item{which.pat}{A scalar or vector indicating which patterns need to be plotted. As a default, all patterns are plotted.} \item{standardized}{Logical. Whether the box-and-whisker plots need to be created from standardized data or not. Default is TRUE.} \item{descriptives}{Logical. Whether the mean, variance and n of the variables need to be printed. This is useful to examine the effect of the amputation. Default is TRUE.} \item{layout}{A vector of two values indicating how the boxplots of one pattern should be divided over the plot. For example, \code{c(2, 3)} indicates that the boxplots of six variables need to be placed on 3 rows and 2 columns. Default is 1 row and an amount of columns equal to #variables. Note that for more than 6 variables, multiple plots will be created automatically.} \item{\dots}{Not used, but for consistency with generic} } \value{ A list containing the box-and-whisker plots. Note that a new pattern will always be shown in a new plot. } \description{ Plotting method to investigate the result of function \code{\link{ampute}}. the relation between the data variables and the amputed data. The function does not show which data is amputed. It does show how the amputed values are related to the variable values. } \note{ The \code{mads} object contains all the information you need to make any desired plots. Check \code{\link{mads-class}} or the vignette \emph{Multivariate Amputation using Ampute} to understand the contents of class object \code{mads}. } \seealso{ \code{\link{ampute}}, \code{\link{bwplot}}, \code{\link{Lattice}} for an overview of the package, \code{\link{mads-class}} } \author{ Rianne Schouten, 2016 } mice/man/mice.impute.lda.Rd0000644000176200001440000000651213620753345015223 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.lda.R \name{mice.impute.lda} \alias{mice.impute.lda} \title{Imputation by linear discriminant analysis} \usage{ mice.impute.lda(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments. Not used.} } \value{ Vector with imputed data, of type factor, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using linear discriminant analysis } \details{ Imputation of categorical response variables by linear discriminant analysis. This function uses the Venables/Ripley functions \code{lda()} and \code{predict.lda()} to compute posterior probabilities for each incomplete case, and draws the imputations from this posterior. This function can be called from within the Gibbs sampler by specifying \code{"lda"} in the \code{method} argument of \code{mice()}. This method is usually faster and uses fewer resources than calling the function, but the statistical properties may not be as good (Brand, 1999). \code{\link{mice.impute.polyreg}}. } \section{Warning}{ The function does not incorporate the variability of the discriminant weight, so it is not 'proper' in the sense of Rubin. For small samples and rare categories in the \code{y}, variability of the imputed data could therefore be underestimated. Added: SvB June 2009 to include bootstrap - disabled since } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. ISBN 90-74479-08-1. Venables, W.N. & Ripley, B.D. (1997). Modern applied statistics with S-PLUS (2nd ed). Springer, Berlin. } \seealso{ \code{\link{mice}}, \code{link{mice.impute.polyreg}}, \code{\link[MASS]{lda}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.passive.Rd0000644000176200001440000000252213416657163016136 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.passive.R \name{mice.impute.passive} \alias{mice.impute.passive} \title{Passive imputation} \usage{ mice.impute.passive(data, func) } \arguments{ \item{data}{A data frame} \item{func}{A \code{formula} specifying the transformations on data} } \value{ The result of applying \code{formula} } \description{ Calculate new variable during imputation } \details{ Passive imputation is a special internal imputation function. Using this facility, the user can specify, at any point in the \code{mice} Gibbs sampling algorithm, a function on the imputed data. This is useful, for example, to compute a cubic version of a variable, a transformation like \code{Q = W/H^2} based on two variables, or a mean variable like \code{(x_1+x_2+x_3)/3}. The so derived variables might be used in other places in the imputation model. The function allows to dynamically derive virtually any function of the imputed data at virtually any time. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{mice}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{datagen} mice/man/pool.compare.Rd0000644000176200001440000000740313621065624014640 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.compare.R \name{pool.compare} \alias{pool.compare} \title{Compare two nested models fitted to imputed data} \usage{ pool.compare(fit1, fit0, method = c("wald", "likelihood"), data = NULL) } \arguments{ \item{fit1}{An object of class 'mira', produced by \code{with.mids()}.} \item{fit0}{An object of class 'mira', produced by \code{with.mids()}. The model in \code{fit0} is a nested fit0 of \code{fit1}.} \item{method}{Either \code{"wald"} or \code{"likelihood"} specifying the type of comparison. The default is \code{"wald"}.} \item{data}{No longer used.} } \value{ A list containing several components. Component \code{call} is the call to the \code{pool.compare} function. Component \code{call11} is the call that created \code{fit1}. Component \code{call12} is the call that created the imputations. Component \code{call01} is the call that created \code{fit0}. Component \code{call02} is the call that created the imputations. Components \code{method} is the method used to compare two models: 'Wald' or 'likelihood'. Component \code{nmis} is the number of missing entries for each variable. Component \code{m} is the number of imputations. Component \code{qhat1} is a matrix, containing the estimated coefficients of the \emph{m} repeated complete data analyses from \code{fit1}. Component \code{qhat0} is a matrix, containing the estimated coefficients of the \emph{m} repeated complete data analyses from \code{fit0}. Component \code{ubar1} is the mean of the variances of \code{fit1}, formula (3.1.3), Rubin (1987). Component \code{ubar0} is the mean of the variances of \code{fit0}, formula (3.1.3), Rubin (1987). Component \code{qbar1} is the pooled estimate of \code{fit1}, formula (3.1.2) Rubin (1987). Component \code{qbar0} is the pooled estimate of \code{fit0}, formula (3.1.2) Rubin (1987). Component \code{Dm} is the test statistic. Component \code{rm} is the relative increase in variance due to nonresponse, formula (3.1.7), Rubin (1987). Component \code{df1}: df1 = under the null hypothesis it is assumed that \code{Dm} has an F distribution with (df1,df2) degrees of freedom. Component \code{df2}: df2. Component \code{pvalue} is the P-value of testing whether the model \code{fit1} is statistically different from the smaller \code{fit0}. } \description{ This function is deprecated in V3. Use \code{\link{D1}} or \code{\link{D3}} instead. } \details{ Compares two nested models after m repeated complete data analysis The function is based on the article of Meng and Rubin (1992). The Wald-method can be found in paragraph 2.2 and the likelihood method can be found in paragraph 3. One could use the Wald method for comparison of linear models obtained with e.g. \code{lm} (in \code{with.mids()}). The likelihood method should be used in case of logistic regression models obtained with \code{glm()} in \code{with.mids()}. The function assumes that \code{fit1} is the larger model, and that model \code{fit0} is fully contained in \code{fit1}. In case of \code{method='wald'}, the null hypothesis is tested that the extra parameters are all zero. } \references{ Li, K.H., Meng, X.L., Raghunathan, T.E. and Rubin, D. B. (1991). Significance levels from repeated p-values with multiply-imputed data. Statistica Sinica, 1, 65-92. Meng, X.L. and Rubin, D.B. (1992). Performing likelihood ratio tests with multiple-imputed data sets. Biometrika, 79, 103-111. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ \code{\link{lm.mids}}, \code{\link{glm.mids}} } \author{ Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 } \keyword{htest} mice/man/mice.impute.panImpute.Rd0000644000176200001440000000632613574715125016432 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.panImpute.R \name{mice.impute.panImpute} \alias{mice.impute.panImpute} \title{Impute multilevel missing data using \code{pan}} \usage{ mice.impute.panImpute( data, formula, type, m = 1, silent = TRUE, format = "imputes", ... ) } \arguments{ \item{data}{A data frame containing incomplete and auxiliary variables, the cluster indicator variable, and any other variables that should be present in the imputed datasets.} \item{formula}{A formula specifying the role of each variable in the imputation model. The basic model is constructed by \code{model.matrix}, thus allowing to include derived variables in the imputation model using \code{I()}. See \code{\link[mitml]{panImpute}}.} \item{type}{An integer vector specifying the role of each variable in the imputation model (see \code{\link[mitml]{panImpute}})} \item{m}{The number of imputed data sets to generate.} \item{silent}{(optional) Logical flag indicating if console output should be suppressed. Default is to \code{FALSE}.} \item{format}{A character vector specifying the type of object that should be returned. The default is \code{format = "list"}. No other formats are currently supported.} \item{...}{Other named arguments: \code{n.burn}, \code{n.iter}, \code{group}, \code{prior}, \code{silent} and others.} } \value{ A list of imputations for all incomplete variables in the model, that can be stored in the the \code{imp} component of the \code{mids} object. } \description{ This function is a wrapper around the \code{panImpute} function from the \code{mitml} package so that it can be called to impute blocks of variables in \code{mice}. The \code{mitml::panImpute} function provides an interface to the \code{pan} package for multiple imputation of multilevel data (Schafer & Yucel, 2002). Imputations can be generated using \code{type} or \code{formula}, which offer different options for model specification. } \note{ The number of imputations \code{m} is set to 1, and the function is called \code{m} times so that it fits within the \code{mice} iteration scheme. This is a multivariate imputation function using a joint model. } \examples{ blocks <- list(c("bmi", "chl", "hyp"), "age") method <- c("panImpute", "pmm") ini <- mice(nhanes, blocks = blocks, method = method, maxit = 0) pred <- ini$pred pred["B1", "hyp"] <- -2 imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1) } \references{ Grund S, Luedtke O, Robitzsch A (2016). Multiple Imputation of Multilevel Missing Data: An Introduction to the R Package \code{pan}. SAGE Open. Schafer JL (1997). Analysis of Incomplete Multivariate Data. London: Chapman & Hall. Schafer JL, and Yucel RM (2002). Computational strategies for multivariate linear mixed-effects models with missing values. Journal of Computational and Graphical Statistics, 11, 437-457. } \seealso{ \code{\link[mitml]{panImpute}} Other multivariate-2l: \code{\link{mice.impute.jomoImpute}()} } \author{ Stef van Buuren, 2018, building on work of Simon Grund, Alexander Robitzsch and Oliver Luedtke (authors of \code{mitml} package) and Joe Schafer (author of \code{pan} package). } \concept{multivariate-2l} \keyword{datagen} mice/man/mice.impute.2l.pan.Rd0000644000176200001440000001073113574715125015555 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2l.pan.R \name{mice.impute.2l.pan} \alias{mice.impute.2l.pan} \alias{2l.pan} \title{Imputation by a two-level normal model using \code{pan}} \usage{ mice.impute.2l.pan( y, ry, x, type, intercept = TRUE, paniter = 500, groupcenter.slope = FALSE, ... ) } \arguments{ \item{y}{Incomplete data vector of length \code{n}} \item{ry}{Vector of missing data pattern (\code{FALSE}=missing, \code{TRUE}=observed)} \item{x}{Matrix (\code{n} x \code{p}) of complete covariates.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. Random effects are identified by a '2'. The group variable (only one is allowed) is coded as '-2'. Random effects also include the fixed effect. If for a covariates X1 group means shall be calculated and included as further fixed effects choose '3'. In addition to the effects in '3', specification '4' also includes random effects of X1.} \item{intercept}{Logical determining whether the intercept is automatically added.} \item{paniter}{Number of iterations in \code{pan}. Default is 500.} \item{groupcenter.slope}{If \code{TRUE}, in case of group means (\code{type} is '3' or'4') group mean centering for these predictors are conducted before doing imputations. Default is \code{FALSE}.} \item{...}{Other named arguments.} } \value{ A vector of length \code{nmis} with imputations. } \description{ Imputes univariate missing data using a two-level normal model with homogeneous within group variances. Aggregated group effects (i.e. group means) can be automatically created and included as predictors in the two-level regression (see argument \code{type}). This function needs the \code{pan} package. } \details{ Implements the Gibbs sampler for the linear two-level model with homogeneous within group variances which is a special case of a multivariate linear mixed effects model (Schafer & Yucel, 2002). For a two-level imputation with heterogeneous within-group variances see \code{\link{mice.impute.2l.norm}}. % The random intercept is automatically added in % \code{mice.impute.2l.norm()}. } \note{ This function does not implement the \code{where} functionality. It always produces \code{nmis} imputation, irrespective of the \code{where} argument of the \code{mice} function. } \examples{ ################################### # simulate some data # two-level regression model with fixed slope # number of groups G <- 250 # number of persons n <- 20 # regression parameter beta <- .3 # intraclass correlation rho <- .30 # correlation with missing response rho.miss <- .10 # missing proportion missrate <- .50 y1 <- rep( rnorm( G , sd = sqrt( rho ) ) , each=n ) + rnorm(G*n , sd = sqrt( 1 - rho )) x <- rnorm( G*n ) y <- y1 + beta * x dfr0 <- dfr <- data.frame( "group" = rep(1:G , each=n ) , "x" = x , "y" = y ) dfr[ rho.miss * x + rnorm( G*n , sd = sqrt( 1 - rho.miss ) ) < qnorm( missrate ) , "y" ] <- NA #..... # empty imputation in mice imp0 <- mice( as.matrix(dfr) , maxit=0 ) predM <- imp0$predictorMatrix impM <- imp0$method #... # specify predictor matrix and method predM1 <- predM predM1["y","group"] <- -2 predM1["y","x"] <- 1 # fixed x effects imputation impM1 <- impM impM1["y"] <- "2l.pan" # multilevel imputation imp1 <- mice( as.matrix( dfr ) , m = 1 , predictorMatrix = predM1 , method = impM1 , maxit=1 ) # multilevel analysis library(lme4) mod <- lmer( y ~ ( 1 + x | group) + x , data = complete(imp1) ) summary(mod) ############################################ # Examples of predictorMatrix specification # random x effects # predM1["y","x"] <- 2 # fixed x effects and group mean of x # predM1["y","x"] <- 3 # random x effects and group mean of x # predM1["y","x"] <- 4 } \references{ Schafer J L, Yucel RM (2002). Computational strategies for multivariate linear mixed-effects models with missing values. \emph{Journal of Computational and Graphical Statistics}. \bold{11}, 437-457. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} } \seealso{ Other univariate-2l: \code{\link{mice.impute.2l.bin}()}, \code{\link{mice.impute.2l.lmer}()}, \code{\link{mice.impute.2l.norm}()} } \author{ Alexander Robitzsch (IPN - Leibniz Institute for Science and Mathematics Education, Kiel, Germany), \email{robitzsch@ipn.uni-kiel.de}. } \concept{univariate-2l} mice/man/mice.impute.polyreg.Rd0000644000176200001440000000712313620753345016143 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.polyreg.R \name{mice.impute.polyreg} \alias{mice.impute.polyreg} \title{Imputation of unordered data by polytomous regression} \usage{ mice.impute.polyreg( y, ry, x, wy = NULL, nnet.maxit = 100, nnet.trace = FALSE, nnet.MaxNWts = 1500, ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{nnet.maxit}{Tuning parameter for \code{nnet()}.} \item{nnet.trace}{Tuning parameter for \code{nnet()}.} \item{nnet.MaxNWts}{Tuning parameter for \code{nnet()}.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes missing data in a categorical variable using polytomous regression } \details{ The function \code{mice.impute.polyreg()} imputes categorical response variables by the Bayesian polytomous regression model. See J.P.L. Brand (1999), Chapter 4, Appendix B. By default, unordered factors with more than two levels are imputed by \code{mice.impute.polyreg()}. The method consists of the following steps: \enumerate{ \item Fit categorical response as a multinomial model \item Compute predicted categories \item Add appropriate noise to predictions } The algorithm of \code{mice.impute.polyreg} uses the function \code{multinom()} from the \code{nnet} package. In order to avoid bias due to perfect prediction, the algorithm augment the data according to the method of White, Daniel and Royston (2010). } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} Dissertation. Rotterdam: Erasmus University. White, I.R., Daniel, R. Royston, P. (2010). Avoiding bias due to perfect prediction in multiple imputation of incomplete categorical variables. \emph{Computational Statistics and Data Analysis}, 54, 2267-2275. Venables, W.N. & Ripley, B.D. (2002). \emph{Modern applied statistics with S-Plus (4th ed)}. Springer, Berlin. } \seealso{ \code{\link{mice}}, \code{\link[nnet]{multinom}}, \code{\link[MASS]{polr}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000-2010 } \concept{univariate imputation functions} \keyword{datagen} mice/DESCRIPTION0000644000176200001440000000556213624017522012707 0ustar liggesusersPackage: mice Type: Package Version: 3.8.0 Title: Multivariate Imputation by Chained Equations Date: 2020-02-12 Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"), email = "stef.vanbuuren@tno.nl"), person("Karin", "Groothuis-Oudshoorn", role = "aut", email = "c.g.m.oudshoorn@utwente.nl"), person("Gerko","Vink", role = "ctb", email = "g.vink@uu.nl"), person("Rianne","Schouten", role = "ctb", email = "R.M.Schouten@uu.nl"), person("Alexander", "Robitzsch", role = "ctb", email = "robitzsch@ipn.uni-kiel.de"), person("Lisa","Doove", role = "ctb", email = "lisa.doove@ppw.kuleuven.be"), person("Shahab","Jolani", role = "ctb", email = "s.jolani@maastrichtuniversity.nl"), person("Margarita","Moreno-Betancur", role="ctb", email = "margarita.moreno@mcri.edu.au"), person("Ian", "White", role="ctb", email = "ian.white@ucl.ac.uk"), person("Philipp","Gaffert", role = "ctb", email = "philipp.gaffert@gfk.com"), person("Florian","Meinfelder", role = "ctb", email = "florian.meinfelder@uni-bamberg.de"), person("Bernie","Gray", role = "ctb", email = "bfgray3@gmail.com")) Maintainer: Stef van Buuren Depends: R (>= 2.10.0) Imports: broom, dplyr, graphics, lattice, methods, stats, tidyr, utils Suggests: knitr, lme4, MASS, mitml, miceadds, nnet, pan, randomForest, rmarkdown, rpart, survival, testthat LinkingTo: Rcpp Description: Multiple imputation using Fully Conditional Specification (FCS) implemented by the MICE algorithm as described in Van Buuren and Groothuis-Oudshoorn (2011) . Each variable has its own imputation model. Built-in imputation models are provided for continuous data (predictive mean matching, normal), binary data (logistic regression), unordered categorical data (polytomous logistic regression) and ordered categorical data (proportional odds). MICE can also impute continuous two-level data (normal model, pan, second-level variables). Passive imputation can be used to maintain consistency between variables. Various diagnostic plots are available to inspect the quality of the imputations. Encoding: UTF-8 License: GPL-2 | GPL-3 LazyLoad: yes LazyData: yes URL: https://github.com/stefvanbuuren/mice, https://stefvanbuuren.name/mice/, https://stefvanbuuren.name/fimd/ BugReports: https://github.com/stefvanbuuren/mice/issues RoxygenNote: 7.0.2 NeedsCompilation: yes Packaged: 2020-02-21 13:57:45 UTC; buurensv Author: Stef van Buuren [aut, cre], Karin Groothuis-Oudshoorn [aut], Gerko Vink [ctb], Rianne Schouten [ctb], Alexander Robitzsch [ctb], Lisa Doove [ctb], Shahab Jolani [ctb], Margarita Moreno-Betancur [ctb], Ian White [ctb], Philipp Gaffert [ctb], Florian Meinfelder [ctb], Bernie Gray [ctb] Repository: CRAN Date/Publication: 2020-02-21 18:20:02 UTC mice/tests/0000755000176200001440000000000013553050056012333 5ustar liggesusersmice/tests/testthat/0000755000176200001440000000000013624017522014173 5ustar liggesusersmice/tests/testthat/test-mice.R0000644000176200001440000001434013466315061016215 0ustar liggesuserscontext("mice: complete") nhanes_mids <- mice(nhanes, m = 2, print = FALSE) nhanes_complete <- complete(nhanes_mids) test_that("No missing values remain in imputed nhanes data set", { expect_gt(sum(is.na(nhanes)), 0) expect_equal(sum(is.na(nhanes_complete)), 0) }) test_that("Data set in returned mids object is identical to nhanes data set", { expect_identical(nhanes_mids$data, nhanes) }) context("mice: blocks") test_that("blocks run as expected", { expect_silent(imp1b <<- mice(nhanes, blocks = list(c("age", "hyp"), chl = "chl", "bmi"), print = FALSE, m = 1, maxit = 1, seed = 1)) expect_silent(imp2b <<- mice(nhanes2, blocks = list(c("age", "hyp", "bmi"), "chl", "bmi"), print = FALSE, m = 1, maxit = 1, seed = 1)) # expect_silent(imp3b <<- mice(nhanes2, # blocks = list(c("hyp", "hyp", "hyp"), "chl", "bmi"), # print = FALSE, m = 1, maxit = 1, seed = 1)) expect_silent(imp4b <<- mice(boys, blocks = list(c("gen", "phb"), "tv"), print = FALSE, m = 1, maxit = 1, seed = 1)) expect_silent(imp5b <<- mice(nhanes, blocks = list(c("age", "hyp")), print = FALSE, m = 1, maxit = 1, seed = 1)) }) test_that("Block names are generated automatically", { expect_identical(names(imp1b$blocks), c("B1", "chl", "bmi")) }) test_that("Method `pmm` is used for mixed variable types", { expect_identical(unname(imp2b$method[1]), "pmm") }) # test_that("Method `logreg` if all are binary", { # expect_identical(unname(imp3b$method[1]), "logreg") # }) test_that("Method `polr` if all are ordered", { expect_identical(unname(imp4b$method[1]), "polr") }) test_that("Method `polr` works with one block", { expect_identical(unname(imp5b$method[1]), "pmm") }) # check for equality of `scatter` and `collect` for univariate models # the following models yield the same imputations imp1 <- mice(nhanes, blocks = make.blocks(nhanes, "scatter"), print = FALSE, m = 1, maxit = 1, seed = 123) imp2 <- mice(nhanes, blocks = make.blocks(nhanes, "collect"), print = FALSE, m = 1, maxit = 1, seed = 123) imp3 <- mice(nhanes, blocks = list("age", c("bmi", "hyp", "chl")), print = FALSE, m = 1, maxit = 1, seed = 123) imp4 <- mice(nhanes, blocks = list(c("bmi", "hyp", "chl"), "age"), print = FALSE, m = 1, maxit = 1, seed = 123) test_that("Univariate yield same imputes for `scatter` and `collect`", { expect_identical(complete(imp1), complete(imp2)) expect_identical(complete(imp1), complete(imp3)) expect_identical(complete(imp1), complete(imp4)) }) # potentially, we may also change the visitSequence, but mice # is quite persistent in overwriting a user-specified # visitSequence for complete columns, so this not # currently not an option. Defer optimizing this to later. # another trick is to specify where for age by hand, so it forces # mice to impute age by pmm, but then, this would need to be # done in both imp1 and imp2 models. context("mice: formulas") test_that("formulas run as expected", { expect_silent(imp1f <<- mice(nhanes, formulas = list(age + hyp ~ chl + bmi, chl ~ age + hyp + bmi, bmi ~ age + hyp + chl), print = FALSE, m = 1, maxit = 1, seed = 1)) expect_warning(imp2f <<- mice(nhanes2, formulas = list(age + hyp + bmi ~ chl + bmi, chl ~ age + hyp + bmi + bmi, bmi ~ age + hyp + bmi + chl), print = FALSE, m = 1, maxit = 1, seed = 1)) # expect_silent(imp3f <<- mice(nhanes2, # formulas = list( hyp + hyp + hyp ~ chl + bmi, # chl ~ hyp + hyp + hyp + bmi, # bmi ~ hyp + hyp + hyp + chl), # print = FALSE, m = 1, maxit = 1, seed = 1)) expect_silent(imp4f <<- mice(boys, formulas = list(gen + phb ~ tv, tv ~ gen + phb), print = FALSE, m = 1, maxit = 1, seed = 1)) expect_silent(imp5f <<- mice(nhanes, formulas = list(age + hyp ~ 1), print = FALSE, m = 1, maxit = 1, seed = 1)) }) test_that("Formula names are generated automatically", { expect_identical(names(imp1f$blocks), c("F1", "chl", "bmi")) }) test_that("Method `pmm` is used for mixed variable types", { expect_identical(unname(imp2f$method[1]), "pmm") }) # test_that("Method `logreg` if all are binary", { # expect_identical(unname(imp3f$method[1]), "logreg") # }) test_that("Method `polr` if all are ordered", { expect_identical(unname(imp4f$method[1]), "polr") }) test_that("Method `polr` works with one block", { expect_identical(unname(imp5f$method[1]), "pmm") }) context("mice: where") # # all TRUE imp1 <- mice(nhanes, where = matrix(TRUE, nrow = 25, ncol = 4), maxit = 1, m = 1, print = FALSE) # # all FALSE imp2 <- mice(nhanes, where = matrix(FALSE, nrow = 25, ncol = 4), maxit = 1, m = 1, print = FALSE) # # alternate imp3 <- mice(nhanes, where = matrix(c(FALSE, TRUE), nrow = 25, ncol = 4), maxit = 1, m = 1, print = FALSE) # # whacky situation where we expect no imputes for the incomplete cases imp4 <- mice(nhanes2, where = matrix(TRUE, nrow = 25, ncol = 4), maxit = 1, meth = c("pmm", "", "", ""), m = 1, print = FALSE) test_that("`where` produces correct number of imputes", { expect_identical(nrow(imp1$imp$age), 25L) expect_identical(nrow(imp2$imp$age), 0L) expect_identical(nrow(imp3$imp$age), 12L) expect_identical(sum(is.na(imp4$imp$age)), nrow(nhanes2) - sum(complete.cases(nhanes2))) }) mice/tests/testthat/test-make.predictorMatrix.R0000644000176200001440000000041513416657163021401 0ustar liggesuserscontext("make.predictorMatrix") blocks1 <- make.blocks(c("bmi", "chl", "hyp", "age")) test_that("errors on invalid data arguments", { expect_error(make.predictorMatrix(data, blocks = blocks1), "Data should be a matrix or data frame") }) mice/tests/testthat/test-mice.impute.norm.R0000644000176200001440000001141113416657163020474 0ustar liggesuserscontext("mice.impute.norm") ######################### #TEST 1: Simple problem # ######################### set.seed(123) #generate data y <- rnorm(10) x <- y * .3 + rnorm(10, 0, .25) x2 <- x + rnorm(10, 2, 3) x <- cbind(1, x, x2) #make missingness y[5:6] <- NA ry <- !is.na(y) set.seed(123) svd <- .norm.draw(y, ry, x, ls.meth = "svd") set.seed(123) ridge <- .norm.draw(y, ry, x, ls.meth = "ridge") set.seed(123) qr <- .norm.draw(y, ry, x, ls.meth = "qr") #tests for test1 test_that("Estimates are equal", { expect_equal(svd$coef, matrix(qr$coef)) expect_equal(svd$beta, matrix(qr$beta)) expect_equal(svd$sigma, qr$sigma) }) test_that("Correct estimation method used", { expect_equal(svd$estimation, "svd") expect_equal(qr$estimation, "qr") expect_equal(ridge$estimation, "ridge") }) #svd and qr deliver same estimates; ridge should be different! ##################################### #TEST 2: extremely high correlation # ##################################### x <- matrix(c(1:1000, seq(from = 2, to = 2000, by=2)) + rnorm(1000), nrow = 1000, ncol = 2) y <- t(c(5, 3) %*% t(x)) y[5:6] <- NA ry <- !is.na(y) svd <- .norm.draw(y, ry, x, ls.meth = "svd") ridge <- .norm.draw(y, ry, x, ls.meth = "ridge") qr <- .norm.draw(y, ry, x, ls.meth = "qr") #tests for test2 test_that("Estimates are equal", { expect_equal(svd$coef, matrix(qr$coef)) expect_equal(svd$beta, matrix(qr$beta)) expect_equal(svd$sigma, qr$sigma) }) test_that("Correct estimation method used", { expect_equal(svd$estimation, "svd") expect_equal(qr$estimation, "qr") expect_equal(ridge$estimation, "ridge") }) #svd and qr deliver same estimates; ridge should be different! ##################################### #TEST 3: correct imputation model # ##################################### expect_warning(imp.qr <- mice(mammalsleep[, -1], ls.meth = "qr", seed = 123, print = FALSE)) expect_warning(imp.svd <- mice(mammalsleep[, -1], ls.meth = "svd", seed = 123, print = FALSE)) expect_warning(imp.ridge <- mice(mammalsleep[, -1], ls.meth = "ridge", seed = 123, print = FALSE)) test_that("Imputations are equal", { expect_equal(imp.qr$imp, imp.svd$imp) expect_false(identical(imp.qr$imp, imp.ridge$imp)) }) ##################################### #TEST 4: exactly singular system # ##################################### # test on faulty imputation model (exactly singular system) expect_warning(imp.qr <- mice(mammalsleep, ls.meth = "qr", seed = 123, print = FALSE)) expect_warning(imp.svd <- mice(mammalsleep, ls.meth = "svd", seed = 123, print = FALSE)) expect_warning(imp.ridge <- mice(mammalsleep, ls.meth = "ridge", seed = 123, print = FALSE)) test_that("Imputations are not equal", { expect_false(identical(imp.qr$imp, imp.svd$imp)) expect_false(identical(imp.qr$imp, imp.ridge$imp)) }) #difference stems from added ridge penalty when necessary (when and where depends #on starting state of algorithm). ##################################### #TEST 4: returns requested length # ##################################### xname <- c("age", "hgt", "wgt") br <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ] r <- stats::complete.cases(br[, xname]) x <- br[r, xname] y <- br[r, "tv"] ry <- !is.na(y) wy1 <- !ry wy2 <- rep(TRUE, length(y)) wy3 <- rep(FALSE, length(y)) wy4 <- rep(c(TRUE, FALSE), times = c(1, length(y) - 1)) test_that("Returns requested length", { expect_equal(length(mice.impute.norm(y, ry, x)), sum(!ry)) expect_equal(length(mice.impute.norm(y, ry, x, wy = wy1)), sum(wy1)) expect_equal(length(mice.impute.norm(y, ry, x, wy = wy2)), sum(wy2)) expect_equal(length(mice.impute.norm(y, ry, x, wy = wy3)), sum(wy3)) expect_equal(length(mice.impute.norm(y, ry, x, wy = wy4)), sum(wy4)) }) ### x <- airquality[, c("Wind", "Temp", "Month")] y <- airquality[, "Ozone"] ry <- !is.na(y) # do imputations depend on x column order? x1 <- x[, c(1, 2, 3)] x2 <- x[, c(1, 3, 2)] set.seed(123); pmm1 <- mice.impute.pmm(y, ry, x1) set.seed(123); pmm2 <- mice.impute.pmm(y, ry, x2) set.seed(123); norm1 <- mice.impute.norm(y, ry, x1) set.seed(123); norm2 <- mice.impute.norm(y, ry, x2) set.seed(123); norm.nob1 <- mice.impute.norm.nob(y, ry, x1) set.seed(123); norm.nob2 <- mice.impute.norm.nob(y, ry, x2) set.seed(123); norm.predict1 <- mice.impute.norm.predict(y, ry, x1) set.seed(123); norm.predict2 <- mice.impute.norm.predict(y, ry, x2) set.seed(123); norm.boot1 <- mice.impute.norm.boot(y, ry, x1) set.seed(123); norm.boot2 <- mice.impute.norm.boot(y, ry, x2) test_that("Imputations are invariant to column order", { # expect_equal(pmm1, pmm2) # expect_equal(norm1, norm2) expect_equal(norm.nob1, norm.nob2) expect_equal(norm.predict1, norm.predict2) expect_equal(norm.boot1, norm.boot2) }) mice/tests/testthat/test-complete.R0000644000176200001440000000170113617511603017103 0ustar liggesuserscontext("complete") imp <- mice(nhanes, maxit = 1, m = 2, seed = 123, print = FALSE) lng <- subset(complete(imp, "long"), .imp == 1, select = c(age, bmi, hyp, chl)) all <- complete(imp, "all")[[1]] test_that("long and all produce same data", { expect_equal(lng, all) }) # mids workflow using saved objects imp <- mice(nhanes, seed = 123, print = FALSE) fit <- with(imp, lm(chl ~ age + bmi + hyp)) est <- pool(fit) est.mice <- est # mild workflow using saved objects and base::lapply idl <- complete(imp, "all") fit <- lapply(idl, lm, formula = chl ~ age + bmi + hyp) est <- pool(fit) est.mild <- est # long workflow using base::by cmp <- complete(imp, "long") fit <- by(cmp, as.factor(cmp$.imp), lm, formula = chl ~ age + bmi + hyp) est <- pool(fit) est.long <- est test_that("workflow mids, mild and long produce same estimates", { expect_identical(getqbar(est.mice), getqbar(est.mild)) expect_identical(getqbar(est.mice), getqbar(est.long)) }) mice/tests/testthat/test-mice.impute.2l.lmer.R0000644000176200001440000000126113416657163020776 0ustar liggesuserscontext("mice.impute.2l.lmer") d <- brandsma[1:200, c("sch", "lpo")] pred <- make.predictorMatrix(d) pred["lpo", "sch"] <- -2 test_that("mice::mice.impute.2l.lmer() runs empty model", { expect_silent(imp <- mice(d, method = "2l.lmer", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) d <- brandsma[1:200, c("sch", "lpo", "iqv")] d[c(1, 11, 21), "iqv"] <- NA pred <- make.predictorMatrix(d) pred[c("lpo", "iqv"), "sch"] <- -2 test_that("2l.lmer() runs random intercept, one predictor", { expect_silent(imp <- mice(d, method = "2l.lmer", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) mice/tests/testthat/test-mice.impute.2l.bin.R0000644000176200001440000000227413617511631020604 0ustar liggesuserscontext("mice.impute.2l.bin") # toenail: outcome is factor data("toenail2") data <- tidyr::complete(toenail2, patientID, visit) %>% tidyr::fill(treatment) %>% dplyr::select(-time) %>% dplyr::mutate(patientID = as.integer(patientID)) summary(data) # fit1 <- glm(outcome ~ treatment * month, data = toenail2, family = binomial) # fit2 <- glm(outcome ~ treatment * visit, data = toenail2, family = binomial) # fit3 <- lme4::glmer(outcome ~ treatment * visit + (1 | ID), data = data, family = binomial) pred <- make.predictorMatrix(data) pred["outcome", "patientID"] <- -2 test_that("mice::mice.impute.2l.bin() accepts factor outcome", { expect_silent(imp <- mice(data, method = "2l.bin", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) # toenail: outcome is 0/1 data("toenail") data <- tidyr::complete(toenail, ID, visit) %>% tidyr::fill(treatment) %>% dplyr::select(-month) summary(data) pred <- make.predictorMatrix(data) pred["outcome", "ID"] <- -2 test_that("mice::mice.impute.2l.bin() accepts 0/1 outcome", { expect_silent(imp <- mice(data, method = "2l.bin", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) mice/tests/testthat/test-mice.impute.panImpute.R0000644000176200001440000000135413416657163021470 0ustar liggesuserscontext("mice.impute.panImpute") data <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ] type <- c(2, 0, 0, 0, -2, 0, 1, 1, 0) names(type) <- names(data) z1 <- mice.impute.panImpute(data = data, type = type, format = "native") test_that("panImpute returns native class", { expect_is(z1, "mitml") }) blocks <- make.blocks(list(c("bmi", "chl", "hyp"), "age")) method <- c("panImpute", "pmm") pred <- make.predictorMatrix(nhanes, blocks) pred["B1", "hyp"] <- -2 imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1, seed = 1, print = FALSE) z <- complete(imp) test_that("mice can call panImpute", { expect_equal(sum(is.na(z$bmi)), 0) expect_equal(sum(is.na(z$chl)), 0) }) mice/tests/testthat/test-blocks.R0000644000176200001440000000036113416657163016562 0ustar liggesuserscontext("blocks") imp <- mice(nhanes, blocks = make.blocks(list(c("bmi", "chl"), "bmi", "age")), m = 10, print = FALSE) # plot(imp) test_that("removes variables from 'where'", { expect_identical(sum(imp$where[, "hyp"]), 0L) }) mice/tests/testthat/test-mice.impute.polr.R0000644000176200001440000000030313416657163020473 0ustar liggesuserscontext("polr") # following halts with # "Error in apply(draws, 2, sum) : dim(X) must have a positive length" # imp1 <- mice(boys, blocks = list("gen"), print = FALSE, m = 1, maxit = 1) mice/tests/testthat/test-rbind.R0000644000176200001440000000661213540673625016407 0ustar liggesuserscontext("rbind.mids") expect_warning(imp1 <<- mice(nhanes[1:13, ], m = 2, maxit = 1, print = FALSE)) test_that("Constant variables are not imputed by default", { expect_equal(sum(is.na(complete(imp1))), 6L) }) expect_warning(imp1b <<- mice(nhanes[1:13, ], m = 2, maxit = 1, print = FALSE, remove.constant = FALSE)) test_that("Constant variables are imputed for remove.constant = FALSE", { expect_equal(sum(is.na(complete(imp1b))), 0L) }) imp2 <- mice(nhanes[14:25, ], m = 2, maxit = 1, print = FALSE) imp3 <- mice(nhanes2, m = 2, maxit = 1, print = FALSE) imp4 <- mice(nhanes2, m = 1, maxit = 1, print = FALSE) expect_warning(imp5 <<- mice(nhanes[1:13, ], m = 2, maxit = 2, print = FALSE)) expect_error(imp6 <<- mice(nhanes[1:13, 2:3], m = 2, maxit = 2, print = FALSE), "`mice` detected constant and/or collinear variables. No predictors were left after their removal.") nh3 <- nhanes colnames(nh3) <- c("AGE", "bmi", "hyp", "chl") imp7 <- mice(nh3[14:25, ], m = 2, maxit = 2, print = FALSE) expect_warning(imp8 <<- mice(nhanes[1:13, ], m = 2, maxit = 2, print = FALSE)) mylist <- list(age = NA, bmi = NA, hyp = NA, chl = NA) nhalf <- nhanes[13:25, ] test_that("Expands number of rows and imputes", { expect_equal(nrow(complete(rbind(imp1, imp2))), 25L) expect_equal(nrow(rbind(imp1, imp2)$imp$bmi), 9L) }) test_that("throws error", { expect_error(rbind(imp1, imp3), "datasets have different factor variables") expect_error(rbind(imp3, imp4), "number of imputations differ") expect_error(rbind(imp1, imp7), "datasets have different variable names") }) test_that("throws warning", { expect_warning(rbind(imp1, imp5), "iterations differ, so no convergence diagnostics calculated") }) r1 <- rbind(imp8, imp5) r2 <- rbind(imp1, mylist) r3 <- rbind(imp1, nhalf) r4 <- rbind(imp1, imp2) test_that("Produces longer imputed data", { expect_identical(nrow(complete(r1)), 26L) expect_identical(nrow(complete(r2)), 14L) }) test_that("Constant variables are not imputed", { expect_equal(sum(is.na(complete(r3))), 15L) expect_equal(sum(is.na(complete(r4))), 6L) }) # r11 <- mice.mids(rbind(imp1, imp5), print = FALSE) # test_that("plot throws error on convergence diagnostics", { # expect_error(plot(r11), "no convergence diagnostics found") # }) r21 <- mice.mids(r2, print = FALSE) r31 <- mice.mids(r3, print = FALSE) # issue #59 set.seed <- 818 x <- rnorm(10) D <- data.frame(x=x, y=2*x+rnorm(10)) D[c(2:4, 7), 1] <- NA expect_error(D_mids <<- mice(D[1:5,], print = FALSE), "`mice` detected constant and/or collinear variables. No predictors were left after their removal.") expect_warning(D_mids <<- mice(D[1:5,], print = FALSE, remove.collinear = FALSE)) D_rbind <- mice:::rbind.mids(D_mids, D[6:10,]) cmp <- complete(D_rbind, 1) test_that("Solves issue #59, rbind", expect_identical(cmp[6:10, ], D[6:10, ])) test_that("rbind does not throw a warning (#114)", { expect_silent(rbind(ordered(c(1,2)))) }) # calculate chainMean and chainVar # imp1 <- mice(nhanes[1:13, ], m = 5, maxit = 25, print = FALSE, seed = 123) # imp2 <- mice(nhanes[14:25, ], m = 5, maxit = 25, print = FALSE, seed = 456) # z <- rbind(imp1, imp2) # plot(z) # # imp3 <- mice(nhanes, m = 5, maxit = 25, print = FALSE, seed = 123) # plot(imp3) # # An interesting observation is that the SD(hyp, a) < SD(hyp, imp3). This is # because SD(hyp, imp1) = 0. mice/tests/testthat/test-ampute.R0000644000176200001440000002337713617561077016615 0ustar liggesuserscontext("ampute") # make objects for testfunctions sigma <- matrix(data = c(1, 0.2, 0.2, 0.2, 1, 0.2, 0.2, 0.2, 1), nrow = 3) complete.data <- MASS::mvrnorm(n = 100, mu = c(5, 5, 5), Sigma = sigma) test_that("all examples work", { expect_error(ampute(data = complete.data), NA) result1 <- ampute(data = complete.data) patterns <- result1$patterns patterns[1:3, 2] <- 0 odds <- result1$odds odds[2,3:4] <- c(2, 4) odds[3,] <- c(3, 1, NA, NA) expect_error(ampute(data = complete.data, patterns = patterns, freq = c(0.3, 0.3, 0.4), cont = FALSE, odds = odds), NA) expect_error(ampute(data = complete.data, type = c("RIGHT", "TAIL", "LEFT")), NA) }) test_that("all arguments work", { # empty run expect_error(ampute(data = complete.data, run = FALSE), NA) # missingness by cells expect_error(ampute(data = complete.data, prop = 0.1, bycases = FALSE), NA) # prop with 3 dec, weigths with negative values, unequal odds matrix expect_error(ampute(data = complete.data, prop = 0.314, freq = c(0.25, 0.4, 0.35), patterns = matrix(data = c(1, 0, 1, 0, 1, 0, 0, 1, 1), nrow = 3, byrow = TRUE), weights = matrix(data = c(-1, 1, 0, -4, -4, 1, 0, 0, -1), nrow = 3, byrow = TRUE), odds = matrix(data = c(1, 4, NA, NA, 0, 3, 3, NA, 4, 1, 1, 4), nrow = 3, byrow = TRUE), cont = FALSE), NA) # 1 pattern with vector for patterns and weights expect_error(ampute(data = complete.data, freq = 1, patterns = c(1, 0, 1), weights = c(3, 3, 0)), NA) # multiple patterns given in vectors expect_error(ampute(data = complete.data, patterns = c(1, 0, 1, 1, 0, 0), cont = TRUE, weights = c(1, 4, -2, 0, 1, 2), type = c("LEFT","TAIL")), NA) # one pattern with odds vector expect_error(ampute(data = complete.data, patterns = c(1, 0, 1), weights = c(4, 1, 0), odds = c(2, 1), cont = FALSE), NA) # argument standardized expect_error(ampute(data = complete.data, std = FALSE), NA) # sum scores cannot be NaN dich.data <- matrix(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), ncol = 2, byrow = FALSE) wss <- ampute(data = dich.data, mech = "MNAR")$scores check_na <- function(x){return(any(is.na(x)))} expect_false(any(unlist(lapply(wss, check_na)))) }) test_that("function works around unusual arguments", { # data nasty.data <- complete.data nasty.data[, 1] <- rep(c("one", "two"), 50) # when data is categorical and mech != mcar, warning is expected expect_warning(ampute(data = nasty.data), "Data is made numeric because the calculation of weights requires numeric data") # when data is categorical and mech = mcar, function can continue expect_warning(ampute(data = nasty.data, mech = "MCAR"), NA) # patterns expect_error(ampute(data = complete.data, patterns = c(0, 0, 0), mech = "MCAR"), NA) expect_error(ampute(data = complete.data, patterns = c(0, 0, 1, 0, 0, 0), mech = "MNAR"), NA) expect_warning(ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0))) # freq expect_warning(ampute(data = complete.data, freq = c(0.8, 0.4))) # prop expect_warning(ampute(data = complete.data, prop = 1)) expect_error(ampute(data = complete.data, prop = 48.5), NA) # mech, type and weights expect_warning(ampute(data = complete.data, mech = c("MCAR", "MAR")), "Mechanism should contain merely MCAR, MAR or MNAR. First element is used") expect_warning(ampute(data = complete.data, type = c("LEFT", "RIGHT")), "Type should either have length 1 or length equal to #patterns, first element is used for all patterns") expect_warning(ampute(data = complete.data, mech = "MCAR", odds = matrix(data = c(1, 4, NA, NA, 0, 3, 3, NA, 4, 1, 1, 4), nrow = 3, byrow = TRUE), cont = FALSE), "Odds matrix is not used when mechanism is MCAR") expect_warning(ampute(data = complete.data, mech = "MCAR", weights = c(1, 3, 4)), "Weights matrix is not used when mechanism is MCAR") expect_warning(ampute(data = complete.data, odds = matrix(data = c(1, 4, NA, NA, 0, 3, 3, NA, 4, 1, 1, 4), nrow = 3, byrow = TRUE))) expect_warning(ampute(data = complete.data, cont = FALSE, type = "LEFT")) }) test_that("error messages work properly", { # data expect_error(ampute(data = as.list(complete.data)), "Data should be a matrix or data frame") nasty.data <- complete.data nasty.data[1:10, 1] <- NA expect_error(ampute(data = nasty.data), "Data cannot contain NAs") expect_error(ampute(data = as.data.frame(complete.data[, 1])), "Data should contain at least two columns") # prop expect_error(ampute(data = complete.data, prop = 104)) expect_error(ampute(data = complete.data, prop = 0.9, bycases = FALSE), "Proportion of missing cells is too large in combination with the desired number of missing variables") # patterns expect_error(ampute(data = complete.data, patterns = c(1, 1, 1)), "One pattern with merely ones results to no amputation at all, the procedure is therefore stopped") expect_error(ampute(data = complete.data, patterns = c(0, 0, 0), mech = "MAR"), "Patterns object contains merely zeros and this kind of pattern is not possible when mechanism is MAR") expect_error(ampute(data = complete.data, patterns = c(1, 0, 1, 1)), "Length of pattern vector does not match #variables") expect_error(ampute(data = complete.data, patterns = c(1, 0, 2)), "Argument patterns can only contain 0 and 1, pattern 1 contains another element") expect_error(ampute(data = complete.data, mech = "MAR", patterns = c(0, 0, 1, 0, 0, 0)), "Patterns object contains merely zeros and this kind of pattern is not possible when mechanism is MAR") # mech, type, weights and odds expect_error(ampute(data = complete.data, mech = "MAAR"), "Mechanism should be either MCAR, MAR or MNAR") expect_error(ampute(data = complete.data, type = "MARLEFT"), "Type should contain LEFT, MID, TAIL or RIGHT") expect_error(ampute(data = complete.data, weights = c(1, 2, 1, 4)), "Length of weight vector does not match #variables") expect_error(ampute(data = complete.data, odds = matrix(c(1, 4, -3, 2, 1, 1), nrow = 3), cont = FALSE), "Odds matrix can only have positive values") expect_error(ampute(data = complete.data, patterns = matrix(data = c(1, 0, 1, 0, 1, 0, 0, 1, 1), nrow = 3, byrow = TRUE), weights = matrix(data = c(-1, 1, 0, -4, -4, 1, 0, 0, -1, 1, 1, 0), nrow = 4, byrow = TRUE)), "The objects patterns and weights are not matching") expect_error(ampute(data = complete.data, patterns = matrix(data = c(1, 0, 1, 0, 1, 0, 0, 1, 1), nrow = 3, byrow = TRUE), odds = matrix(data = c(1, 4, NA, NA, 0, 3, 3, 0), nrow = 2, byrow = TRUE), cont = FALSE), "The objects patterns and odds are not matching") }) # The following test was contributed by Shangzhi-hong (#216) Dec 2019 context("ampute robust version") set.seed(1) # Set-up # Dataset NUM_OBS_DF <- 25 NUM_VAR_DF <- 10 data <- replicate( n = NUM_VAR_DF, expr = { rnorm(n = NUM_OBS_DF, mean = 1, sd = 1) }, simplify = "matrix") # Ampute pattern covNum <- NUM_VAR_DF - 1 misPatCov1 <- t(combn(x = covNum, m = 1, FUN = function(x) replace(rep(1, covNum), x, 0))) misPat1 <- cbind(rep(1, choose(covNum, 1)), misPatCov1) misPatCov2 <- t(combn(x = covNum, m = 2, FUN = function(x) replace(rep(1, covNum), x, 0))) misPat2 <- cbind(rep(1, choose(covNum, 2)), misPatCov2) patterns <- rbind(misPat1, misPat2) weights <- matrix(0, nrow = nrow(patterns), ncol = ncol(patterns)) weights[,1] <- 1 prop <- 0.5 mech <- "MAR" type <- "RIGHT" bycases <- TRUE # Other params # freq <- NULL # std <- TRUE # cont <- TRUE # type <- NULL # odds <- NULL # run <- TRUE test_that("ampute() works under extreme condition", { ampDf <- ampute( data = data, prop = prop, mech = mech, type = type, bycases = bycases, patterns = patterns, weights = weights )$amp outProp <- sum(complete.cases(ampDf)) / NUM_OBS_DF expect_true(outProp > 0.3 & outProp < 0.7) }) # --- end test Shangzhi-hong (#216) Dec 2019mice/tests/testthat/test-as.mids.R0000644000176200001440000000415313416657163016646 0ustar liggesuserscontext("as.mids") nhanes3 <- nhanes rownames(nhanes3) <- LETTERS[1:nrow(nhanes3)] imp <- mice(nhanes3, m = 2, maxit = 1, print = FALSE) X <- complete(imp, action = "long", include = TRUE) # create dataset with .imp variable as numeric X2 <- X # nhanes example test1 <- as.mids(X) # nhanes example test2 <- as.mids(X2) # nhanes example, where we explicitly specify .id as column 2 test3 <- as.mids(X, .id = 2) # nhanes example with .id where .imp is numeric test4 <- as.mids(X2, .id = 2) #'# example without an .id variable #'# variable .id not preserved test5 <- as.mids(X[, -2]) #'# reverse data order rev <- ncol(X):1 test6 <- as.mids(X[, rev]) # as() syntax has fewer options test7 <- as(X, "mids") test8 <- as(X2, "mids") test9 <- as(X2[, -2], "mids") test10 <- as(X[, rev], "mids") test_that("as.mids() produces a `mids` object", { expect_is(test1, "mids") expect_is(test2, "mids") expect_is(test3, "mids") expect_is(test4, "mids") expect_is(test5, "mids") expect_is(test7, "mids") expect_is(test8, "mids") expect_is(test9, "mids") expect_is(test10, "mids") expect_error(as(X[-(1:10), ], "mids"), "Unequal group sizes in imputation index `.imp`") expect_error(as(X[, -(1:2)], "mids"), "Imputation index `.imp` not found") }) test_that("complete() reproduces the original data", { expect_true(all(complete(test1, action = "long", include = TRUE) == X, na.rm = TRUE)) expect_true(all(complete(test2, action = "long", include = TRUE) == X, na.rm = TRUE)) expect_true(all(complete(test3, action = "long", include = TRUE) == X, na.rm = TRUE)) expect_true(all(complete(test4, action = "long", include = TRUE) == X, na.rm = TRUE)) expect_true(all(complete(test5, action = "long", include = TRUE)[, -2] == X[, -2], na.rm = TRUE)) expect_true(all(complete(test6, action = "long", include = TRUE)[,-(1:2)] == X[, rev][, -(5:6)], na.rm = TRUE)) }) # works with dplyr library(dplyr) X3 <- X %>% group_by(hyp) %>% mutate(chlm = mean(chl, na.rm = TRUE)) test_that("handles grouped_df", { expect_silent(as.mids(X3)) }) mice/tests/testthat/test-mice.impute.jomoImpute.R0000644000176200001440000000140113416657163021647 0ustar liggesuserscontext("mice.impute.jomoImpute") data <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ] type <- c(2, 0, 0, 0, -2, 0, 1, 1, 0) names(type) <- names(data) z1 <- mice.impute.jomoImpute(data = data, type = type, format = "native") test_that("jomoImpute returns native class", { expect_is(z1, "mitml") }) blocks <- make.blocks(list(c("bmi", "chl", "hyp"), "age")) method <- c("jomoImpute", "pmm") pred <- make.predictorMatrix(nhanes, blocks) pred["B1", "hyp"] <- -2 # imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, # maxit = 1, seed = 1, print = FALSE) # z <- complete(imp) # # test_that("mice can call jomoImpute", { # expect_equal(sum(is.na(z$bmi)), 0) # expect_equal(sum(is.na(z$chl)), 0) # }) mice/tests/testthat/test-mice.impute.pmm.R0000644000176200001440000000134313416657163020315 0ustar liggesuserscontext("mice.impute.pmm") xname <- c("age", "hgt", "wgt") br <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ] r <- stats::complete.cases(br[, xname]) x <- br[r, xname] y <- br[r, "tv"] ry <- !is.na(y) wy1 <- !ry wy2 <- rep(TRUE, length(y)) wy3 <- rep(FALSE, length(y)) wy4 <- rep(c(TRUE, FALSE), times = c(1, length(y) - 1)) test_that("Returns requested length", { expect_equal(length(mice.impute.pmm(y, ry, x)), sum(!ry)) expect_equal(length(mice.impute.pmm(y, ry, x, wy = wy1)), sum(wy1)) expect_equal(length(mice.impute.pmm(y, ry, x, wy = wy2)), sum(wy2)) expect_equal(length(mice.impute.pmm(y, ry, x, wy = wy3)), sum(wy3)) expect_equal(length(mice.impute.pmm(y, ry, x, wy = wy4)), sum(wy4)) }) mice/tests/testthat/test-mice.impute.2lonly.mean.R0000644000176200001440000000351713556276477021701 0ustar liggesuserscontext("mice.impute.2lonly.mean") set.seed(66322) y <- popmis$texp y[rbinom(length(y), size = 1, prob = 0.5) == 1] <- NA x <- popmis[, c("pupil", "school", "sex")] ry <- !is.na(y) wy1 <- !ry wy2 <- rep(TRUE, length(y)) wy3 <- rep(FALSE, length(y)) wy4 <- rep(c(TRUE, FALSE), times = c(1, length(y) - 1)) type <- c(1, -2, 1) yn <- y test_that("Returns requested length, for numeric", { expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy1)), sum(wy1)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy2)), sum(wy2)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy3)), sum(wy3)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy4)), sum(wy4)) }) # test extension to factors set.seed(66322) y <- popmis$texp y <- cut(y, breaks = c(0, 5, 10, 20, 30)) y[rbinom(length(y), size = 1, prob = 0.5) == 1] <- NA test_that("Returns requested length, for factor", { expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy1)), sum(wy1)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy2)), sum(wy2)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy3)), sum(wy3)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy4)), sum(wy4)) }) # check whether imputes for numeric and factor are identical # tn <- mice.impute.2lonly.mean(yn, ry, x, type, wy1) # tf <- mice.impute.2lonly.mean(y, ry, x, type, wy1) # check what happens if all values within a class are missing yn[1:100] <- NA imn <- mice.impute.2lonly.mean(yn, ry, x, type, wy1) zn <- table(imn, useNA = "al") y[1:100] <- NA imf <- mice.impute.2lonly.mean(y, ry, x, type, wy1) zf <- table(imf, useNA = "al") test_that("Return NA for classes without values", { expect_equal(as.numeric(zn[length(zn)]), 39) expect_equal(as.numeric(zf[length(zf)]), 39) }) mice/tests/testthat/test-mice-initialize.R0000644000176200001440000002116413416657163020365 0ustar liggesuserscontext("mice-initialize") data <- nhanes # case A: no predictorMatrix, blocks or formulas arguments imp1 <- mice(data, print = FALSE, m = 1, maxit = 1) pred <- imp1$predictorMatrix form <- imp1$formulas test_that("Case A finds blocks", { expect_identical(names(imp1$blocks), colnames(data)) }) test_that("Case A finds formulas", { expect_identical(attr(terms(form[["bmi"]]), "term.labels"), names(pred["bmi", ])[pred["bmi", ] == 1]) }) # case B: only predictorMatrix argument pred1 <- matrix(1, nrow = 4, ncol = 4) pred2 <- matrix(1, nrow = 2, ncol = 2) pred3 <- matrix(1, nrow = 2, ncol = 2, dimnames = list(c("bmi", "hyp"), c("bmi", "hyp"))) pred4 <- matrix(1, nrow = 2, ncol = 3, dimnames = list(c("bmi", "hyp"), c("bmi", "hyp", "chl"))) imp1 <- mice(data, predictorMatrix = pred1, print = FALSE, m = 1, maxit = 1) imp3 <- mice(data, predictorMatrix = pred3, print = FALSE, m = 1, maxit = 1) test_that("Case B tests the predictorMatrix", { expect_equal(nrow(imp1$predictorMatrix), 4L) expect_error(mice(data, predictorMatrix = pred2, "Missing row/column names in `predictorMatrix`.")) expect_equal(nrow(imp3$predictorMatrix), 2L) expect_error(mice(data, predictorMatrix = pred4)) }) pred <- imp3$predictorMatrix blocks <- imp3$blocks test_that("Case B finds blocks", { expect_identical(names(blocks), c("bmi", "hyp")) }) form <- imp3$formulas test_that("Case B finds formulas", { expect_identical(attr(terms(form[["bmi"]]), "term.labels"), names(pred["bmi", ])[pred["bmi", ] == 1]) }) # Case C: Only blocks argument imp1.0 <- mice(data, blocks = list("bmi", "chl", "hyp"), m = 1, maxit = 0, seed = 11) imp2.0 <- mice(data, blocks = list(c("bmi", "chl"), "hyp"), m = 1, maxit = 0, seed = 11) imp3.0 <- mice(data, blocks = list(all = c("bmi", "chl", "hyp")), m = 1, maxit = 0, seed = 11) test_that("Case C imputations are identical after initialization", { expect_identical(complete(imp1.0), complete(imp2.0)) expect_identical(complete(imp1.0), complete(imp3.0)) }) imp1 <- mice(data, blocks = list("bmi", "chl", "hyp"), print = FALSE, m = 1, maxit = 1, seed = 11) imp2 <- mice(data, blocks = list(c("bmi", "chl"), "hyp"), print = FALSE, m = 1, maxit = 1, seed = 11) imp3 <- mice(data, blocks = list(all = c("bmi", "chl", "hyp")), print = FALSE, m = 1, maxit = 1, seed = 11) test_that("Case C finds blocks", { expect_identical(names(imp2$blocks), c("B1", "hyp")) expect_identical(names(imp3$blocks), c("all")) }) test_that("Case C finds predictorMatrix", { expect_identical(imp2$predictorMatrix["hyp", "hyp"], 0) expect_identical(dim(imp3$predictorMatrix), c(1L, 4L)) }) test_that("Case C finds formulas", { expect_identical(sort(all.vars(imp2$formulas[["B1"]])), sort(colnames(data))) }) test_that("Case C yields same imputations for FCS and multivariate", { expect_identical(complete(imp1), complete(imp2)) expect_identical(complete(imp1), complete(imp3)) }) # Case D: Only formulas argument # univariate models form1 <- list(bmi ~ age + hyp + chl, hyp ~ age + bmi + chl, chl ~ age + bmi + hyp) imp1 <- mice(data, formulas = form1, method = "norm.nob", print = FALSE, m = 1, maxit = 1, seed = 12199) # same model using dot notation form2 <- list(bmi ~ ., hyp ~ ., chl ~ .) imp2 <- mice(data, formulas = form2, method = "norm.nob", print = FALSE, m = 1, maxit = 1, seed = 12199) # multivariate models (= repeated univariate) form3 <- list(bmi + hyp ~ age + chl, chl ~ age + bmi + hyp) imp3 <- mice(data, formulas = form3, method = "norm.nob", print = FALSE, m = 1, maxit = 1, seed = 12199) # same model using dot notation form4 <- list(bmi + hyp ~ ., chl ~ .) imp4 <- mice(data, formulas = form4, method = "norm.nob", print = FALSE, m = 1, maxit = 1, seed = 12199) test_that("Case D yields same imputations for dot notation", { expect_identical(complete(imp1), complete(imp2)) expect_identical(complete(imp3), complete(imp4)) }) test_that("Case D yields same imputations for FCS and multivariate", { expect_equal(complete(imp1), complete(imp3)) expect_equal(complete(imp2), complete(imp4)) }) # Case E: predictMatrix and blocks blocks1 <- make.blocks(c("bmi", "chl", "hyp", "age")) blocks2 <- make.blocks(list(c("bmi", "chl"), "hyp")) blocks3 <- make.blocks(list(all = c("bmi", "chl", "hyp"))) pred1 <- make.predictorMatrix(data, blocks = blocks1) pred2 <- make.predictorMatrix(data, blocks = blocks2) pred3 <- make.predictorMatrix(data, blocks = blocks3) imp1 <- mice(data, blocks = blocks1, pred = pred1, m = 1, maxit = 1, print = FALSE) imp1a <- mice(data, blocks = blocks1, pred = matrix(1, nr=4, nc=4), m = 1, maxit = 1, print = FALSE) imp2 <- mice(data, blocks = blocks2, pred = pred2, m = 1, maxit = 1, print = FALSE) imp2a <- mice(data, blocks = blocks2, pred = matrix(1, nr=2, nc=4), m = 1, maxit = 1, print = FALSE) imp3 <- mice(data, blocks = blocks3, pred = pred3, m = 1, maxit = 1, print = FALSE) imp3a <- mice(data, blocks = blocks3, pred = matrix(1, nr=1, nc=4), m = 1, maxit = 1, print = FALSE) test_that("Case E borrows rownames from blocks", { expect_identical(rownames(imp1a$predictorMatrix), names(blocks1)) expect_identical(rownames(imp2a$predictorMatrix), names(blocks2)) expect_identical(rownames(imp3a$predictorMatrix), names(blocks3)) }) test_that("Case E borrows colnames from data", { expect_identical(colnames(imp1a$predictorMatrix), names(data)) expect_identical(colnames(imp2a$predictorMatrix), names(data)) expect_identical(colnames(imp3a$predictorMatrix), names(data)) }) test_that("Case E name setting fails on incompatible sizes", { expect_error(mice(data, blocks = blocks2, pred = matrix(1, nr=2, nc=2)), "Unable to set column names of predictorMatrix") expect_error(mice(data, blocks = blocks2, pred = matrix(1, nr=1, nc=4)), "Unable to set row names of predictorMatrix") expect_error(mice(data, blocks = blocks2, pred = matrix(1, nr=4, nc=4))) }) colnames(pred1) <- c("A", "B", "chl", "bmi") pred2a <- pred2[, -(1:4), drop = FALSE] test_that("Case E detects incompatible arguments", { expect_error(mice(data, blocks = blocks1, pred = pred1), "Names not found in data: A, B") expect_error(mice(data, blocks = blocks1, pred = pred2), "Names not found in blocks: B1") expect_error(mice(data, blocks = blocks2, pred = matrix(1, nr=1, nc=4)), "Unable to set row names of predictorMatrix") expect_error(mice(data, blocks = blocks2, pred = matrix(1, nr=4, nc=4))) expect_error(mice(data, blocks = blocks2, pred = pred2a), "predictorMatrix has no rows or columns") }) # Case F: predictMatrix and formulas blocks1 <- make.blocks(c("bmi", "chl", "hyp", "age")) blocks2 <- make.blocks(list(c("bmi", "hyp"), "hyp")) pred1 <- make.predictorMatrix(data, blocks = blocks1) pred2 <- make.predictorMatrix(data, blocks = blocks2) form1 <- list(bmi ~ age + hyp + chl, hyp ~ age + bmi + chl, chl ~ age + bmi + hyp) form2 <- list(bmi ~ ., hyp ~ ., chl ~ .) form3 <- list(bmi + hyp ~ age + chl, chl ~ age + bmi + hyp) form4 <- list(bmi + hyp ~ ., chl ~ .) # blocks1 and form1 are compatible imp1 <- mice(data, formulas = form1, pred = matrix(1, nr = 4, nc = 4), m = 1, maxit = 1, print = FALSE, seed = 3) test_that("Case F combines forms and pred in blocks", { expect_identical(unname(attr(imp1$blocks, "calltype")), c(rep("formula", 3), "type")) }) # dots and unnamed predictorMatrix imp2 <- mice(data, formulas = form2, pred = matrix(1, nr = 4, nc = 4), m = 1, maxit = 1, print = FALSE, seed = 3) test_that("Case F dots and specified form produce same imputes", { expect_identical(complete(imp1), complete(imp2)) }) # error test_that("Case F generates error if it cannot handle non-square predictor", { expect_error(mice(data, formulas = form2, pred = pred2), "If no blocks are specified, predictorMatrix must have same number of rows and columns") }) ## Error in formulas[[h]] : subscript out of bounds imp3 <- mice(data, formulas = form3, pred = pred1, m = 1, maxit = 0, print = FALSE, seed = 3) imp3a <- mice(data, formulas = form3, pred = pred1, m = 1, maxit = 1, print = FALSE, seed = 3) # err on matrix columns nh <- nhanes nh$hyp <- as.matrix(nh$hyp) test_that("MICE does not accept data.frames with embedded matrix ", { expect_error(mice(nh), "Cannot handle columns with class matrix: hyp") }) mice/tests/testthat/test-check.visitSequence.R0000644000176200001440000000044613416657163021214 0ustar liggesuserscontext("check.visitSequence") data <- nhanes test_that("mice() takes numerical and character visitSequence", { expect_silent(imp <- mice(data, visitSequence = 4:1, m = 1, print = FALSE)) expect_silent(imp <- mice(data, visitSequence = rev(names(data)), m = 1, print = FALSE)) }) mice/tests/testthat/test-md.pattern.R0000644000176200001440000000114313416657163017360 0ustar liggesuserscontext("md.pattern") test_that("patterns run as expected", { #boys expect_silent(md.pattern(boys, plot = FALSE)) #nhanes expect_silent(md.pattern(nhanes, plot = FALSE)) #one whole column missing (single pattern) - should not produce output expect_silent(md.pattern(cbind(na.omit(nhanes), NA), plot = FALSE)) #no missings (no pattern) - should produce output expect_output(md.pattern(na.omit(nhanes), plot = FALSE)) #feed single column - expect error expect_error(md.pattern(nhanes$bmi)) #feed other than dataframe/matrix - expect error expect_error(md.pattern(as.list(nhanes))) # }) mice/tests/testthat/test-D1.R0000644000176200001440000000321613416664706015554 0ustar liggesuserscontext("D1") imp <- mice(nhanes2, print = FALSE, m = 10, seed = 219) fit1 <- with(data = imp, expr = glm(hyp == "yes" ~ age + chl, family = binomial)) fit0 <- with(data = imp, expr = glm(hyp == "yes" ~ 1, family = binomial)) empty <- with(data = imp, expr = glm(hyp == "yes" ~ 0, family = binomial)) # stat1 <- pool.compare(fit1, fit0, method = "wald") # deprecated because it relies on full vcov, which is not present anymore # in the mipo object # the next tests were remove because they failed on many # systems, not yet clear what the cause is (#128) # This is solved in #132 # three new ways to compare fit1 to the intercept-only model z1 <- D1(fit1, fit0) z2 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), df.com = 21) z3 <- D1(fit1) test_that("compares fit1 to the intercept-only model", { expect_identical(z1$result, z2$test) expect_identical(z1$test, z3$test) }) # two ways to compare fit1 to the empty model z4 <- D1(fit1, empty) z5 <- mitml::testModels(as.mitml.result(fit1), NULL, df.com = 21) test_that("compares fit1 to empty model", { expect_identical(z4$result, z5$test) }) context("D2") z1 <- D2(fit1, fit0) z2 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), method = "D2") z3 <- D2(fit1) test_that("compares fit1 to the intercept-only model", { expect_identical(z1$result, z2$test) expect_identical(z1$test, z3$test) }) # two ways to compare fit1 to the empty model z4 <- D2(fit1, empty) z5 <- mitml::testModels(as.mitml.result(fit1), NULL, method = "D2") test_that("compares fit1 to empty model", { expect_identical(z4$result, z5$test) }) mice/tests/testthat/test-loggedEvents.R0000644000176200001440000000060313416657163017732 0ustar liggesuserscontext("mice: loggedEvents") imp1 <- mice(nhanes, m = 2, print = FALSE) # copy of data, different names data2 <- cbind(nhanes, nhanes) colnames(data2)[5:8] <- c("age2", "bmi2", "hyp2", "chl2") imp2 <- suppressWarnings(mice(data2, m = 2, print = FALSE)) test_that("loggedEvents is NULL", { expect_null(imp1$loggedEvents) expect_type(imp2$loggedEvents, "list") }) mice/tests/testthat/test-blots.R0000644000176200001440000000117213416657163016431 0ustar liggesuserscontext("blots") # global change of donors argument blocks1 <- name.blocks(list(c("bmi", "chl"), "hyp")) imp0 <- mice(nhanes, blocks = blocks1, donors = 10, m = 1, maxit = 1, print = FALSE) # vary donors, depending on block blots1 <- list(B1 = list(donors = 10), hyp = list(donors = 1)) imp1 <- mice(nhanes, blocks = blocks1, blots = blots1, m = 1, maxit = 1, print = FALSE) test_that("errors when mixing same global and local argument", { expect_error(mice(nhanes, blocks = blocks1, blots = blots1, donors = 7, print = FALSE), 'formal argument "donors" matched by multiple actual arguments') }) mice/tests/testthat/test-anova.R0000644000176200001440000000103113621214305016365 0ustar liggesuserscontext("anova") imp <- mice(nhanes2, m = 10, print = FALSE, seed = 71242) m2 <- with(imp, lm(chl ~ age + bmi)) m1 <- with(imp, lm(chl ~ bmi)) m0 <- with(imp, lm(chl ~ 1)) # anova methods test_that("anova.mira() produces silent D1 and D3", { expect_silent(z1 <- anova(m2, m1, m0)) expect_silent(z3 <- anova(m2, m1, m0, method = "D3")) }) test_that("anova.mira() produces warnings on D2", { expect_warning(z2a <- anova(m2, m1, m0, method = "D2")) expect_warning(z2b <- anova(m2, m1, m0, method = "D2", use = "likelihood")) }) mice/tests/testthat/test-check.formula.R0000644000176200001440000000466013416657163020034 0ustar liggesuserscontext("check.formulas") data <- nhanes where <- is.na(data) # blocks <- name.blocks(list("bmi", "age", "chl")) # ini <- mice(data, blocks = blocks, maxit = 0) # # # classic type specification # setup <- list(blocks = blocks, # predictorMatrix = ini$predictorMatrix, # formulas = NULL) # # v1 <- mice:::check.formulas(setup, data) # # # using a formula # #formulas <- v1$formulas # setup <- list(blocks = blocks, # predictorMatrix = ini$predictorMatrix, # formulas = formulas) # #v2 <- mice:::check.formulas(setup, data) # #v2$formulas # # test_that("updates `mode.formula` attribute", { # # expect_false(identical(v2$formulas, v2$formulas.arg)) # # expect_identical(v2$formulas[[1]], v2$formulas.arg[[1]]) # }) # # # try dot in formula # formulas <- list(bmi ~ ., age ~ ., chl ~ .) # formulas <- name.formulas(formulas) # setup <- list(blocks = blocks, # predictorMatrix = ini$predictorMatrix, # formulas = formulas) # #v3 <- mice:::check.formulas(setup, data) # #v3$formulas # # # classic specification using predictorMatrix # imp1 <- mice(nhanes, seed = 51212, print = FALSE, m = 1) # cmp1 <- complete(imp1) # # # formula specification # form <- list(age ~ ., bmi ~ ., hyp ~., chl ~ .) # imp2 <- mice(nhanes, formulas = form, seed = 51212, print = FALSE, m = 1) # cmp2 <- complete(imp2) # # test_that("predictorMatrix and formula yield same imputations", { # expect_identical(cmp1, cmp2) # expect_identical(imp1$imp, imp2$imp) # }) # formula specification form <- name.blocks(list(bmi ~ ., hyp ~., chl ~ .)) imp3 <- mice(nhanes, formulas = form, seed = 51212, print = FALSE, m = 1) cmp3 <- complete(imp3) # old.form <- c("", "bmi ~ chl + hyp", "hyp ~ bmi + chl", "chl ~ bmi + hyp") # imp <- mice(nhanes, formula = old.form, m = 1, maxit = 2, print = FALSE) # # form1 <- list(bmi = ~ 1, chl = ~ 1, hyp = ~ 1) # # impute given predictors # imp1 <- mice(nhanes, formula = form1, m = 1, maxit = 2, method = "norm.predict", # print = FALSE, seed = 1) # # impute the mean # imp2 <- mice(nhanes, formula = form1, m = 1, maxit = 2, method = "norm.predict", # print = FALSE, seed = 1) # # form2 <- list(bmi = "hyp ~ 1", chl = "hyp ~ 1", hyp = "hyp ~ 1") # imp3 <- mice(nhanes, formula = form2, m = 1, maxit = 2, method = "norm.predict", # print = FALSE, seed = 1) # mice/tests/testthat/test-remove.lindep.R0000644000176200001440000000342213416657163020055 0ustar liggesuserscontext("remove.lindep") set.seed(1) td <- matrix(rnorm(20), nrow = 5, ncol = 4) dimnames(td) <- list(1:5, LETTERS[1:4]) y <- td[, 1] ry <- rep(TRUE, 5) # data frame for storing the event log state <- list(it = 0, im = 0, dep = "y", meth = "test", log = FALSE) loggedEvents <- data.frame(it = 0, im = 0, dep = "", meth = "", out = "") fr <- 2 state$meth <- "k1" x <- td[, 2:4] k1 <- mice:::remove.lindep(x, y, ry, frame = fr) state$meth <- "k2" x[, 2] <- x[, 1] k2 <- mice:::remove.lindep(x, y, ry, frame = fr) state$meth <- "k3" x[, 3] <- 2 * x[, 1] k3 <- mice:::remove.lindep(x, y, ry, frame = fr) state$meth <- "k4" x <- td[, 2:4] y <- x[, 2] k4 <- mice:::remove.lindep(x, y, ry, frame = fr) state$meth <- "k5" x <- td[, 2:4] y <- x[, 2] x[, 3] <- x[, 1] <- x[, 2] k5 <- mice:::remove.lindep(x, y, ry, frame = fr) # one column x, same as y --> FALSE state$meth <- "k6" x <- td[, 2, drop = FALSE] y <- x[, 1] k6 <- mice:::remove.lindep(x, y, ry, frame = fr) # one column x, different from u --> TRUE state$meth <- "k7" x <- td[, 2, drop = FALSE] y <- td[, 1] k7 <- mice:::remove.lindep(x, y, ry, frame = fr) # two columns, same x and y --> FALSE, FALSE state$meth <- "k8" x <- td[, 2:3, drop = FALSE] x[, 2] <- x[, 1] y <- x[, 1] k8 <- mice:::remove.lindep(x, y, ry, frame = fr) loggedEvents test_that("removes copies", { expect_identical(unname(k1), c(TRUE, TRUE, TRUE)) expect_identical(unname(k2), c(FALSE, TRUE, TRUE)) # expect_identical(unname(k3), c(FALSE, FALSE, TRUE)) expect_identical(unname(k4), c(TRUE, FALSE, TRUE)) expect_identical(unname(k5), c(FALSE, FALSE, FALSE)) expect_identical(unname(k6), c(FALSE)) expect_identical(unname(k7), c(TRUE)) expect_identical(unname(k8), c(FALSE, FALSE)) }) mice/tests/testthat/test-mira.R0000644000176200001440000000030013416657163016226 0ustar liggesuserscontext("mira") imp <- mice(nhanes, print = FALSE, maxit = 1, seed = 121, m = 1) fit <- with(imp, sd(bmi)) test_that("list prints without an error", { expect_output(print(fit)) }) mice/tests/testthat/test-update.design.R0000644000176200001440000000263013416657163020040 0ustar liggesuserscontext("update.design") data <- nhanes2 design <- mice:::obtain.design(data) d1 <- mice:::update.design(design, data, varname = "age") d2 <- mice:::update.design(design, data, varname = "bmi") d3 <- mice:::update.design(design, data, varname = "hyp") d4 <- mice:::update.design(design, data, varname = ".") d5 <- mice:::update.design(design, data, varname = "xxxx") d6 <- mice:::update.design(design, data, varname = "") d7 <- mice:::update.design(design, data, varname = c("xxx", "age")) test_that("updates itself", { expect_identical(design, d1) expect_identical(design, d2) expect_identical(design, d3) expect_identical(design, d4) expect_identical(design, d5) expect_identical(design, d6) expect_identical(design, d7) }) data <- nhanes2 design <- mice:::obtain.design(data) data$age[1:4] <- data$age[4:1] data$bmi[1:4] <- data$bmi[4:1] data$hyp[1:4] <- data$hyp[4:1] d1 <- mice:::update.design(design, data, varname = "age") d2 <- mice:::update.design(design, data, varname = "bmi") d3 <- mice:::update.design(design, data, varname = ".") test_that("caries through selected row reversals", { expect_identical(as.vector(design[1:4, 2:3]), as.vector(d1[4:1, 2:3])) expect_identical(as.vector(design[1:4, 4]), as.vector(d2[4:1, 4])) expect_identical(as.vector(design[1:4, 2:5]), as.vector(d3[4:1, 2:5])) }) mice/tests/testthat/test-D3.R0000644000176200001440000000461713621065117015551 0ustar liggesuserscontext("D3") imp <- mice(nhanes, print = FALSE, m = 10, seed = 219) fit1 <- with(data = imp, expr = lm(hyp ~ age + chl)) fit0 <- with(data = imp, expr = lm(hyp ~ 1)) empty <- with(data = imp, expr = lm(hyp ~ 0)) # stat1 <- pool.compare(fit1, fit0, method = "likelihood") z1 <- D3(fit1, fit0) z2 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), method = "D3") # This test fails # FIXME # test_that("lm: mice and mitml calculate same F", { # expect_equal(z1$Dm, unname(z2$test[1, "F.value"])) # }) # using lmer suppressPackageStartupMessages(library(mitml, quietly = TRUE)) library(lme4, quietly = TRUE) data(studentratings) fml <- ReadDis + SES ~ ReadAchiev + (1|ID) set.seed(26262) imp <- mitml::panImpute(studentratings, formula=fml, n.burn=1000, n.iter=100, m=5, silent = TRUE) implist <- mitml::mitmlComplete(imp, print=1:5) fit0 <- with(implist, lmer(ReadAchiev ~ (1|ID), REML=FALSE)) fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + SES + (1|ID), REML=FALSE)) # likelihood test z3 <- D3(fit1, fit0) z4 <- mitml::testModels(fit1, fit0, method = "D3") # This test fails. # FIXME # test_that("lmer: mice and mitml calculate same F", { # expect_equal(z3$Dm, unname(z4$test[1, "F.value"])) # }) # glm # imp <- mice(nhanes2, print = FALSE, m = 10, seed = 219) # # fit1 <- with(data = imp, expr = glm(hyp == "yes" ~ age + chl, family = binomial)) # fit0 <- with(data = imp, expr = glm(hyp == "yes" ~ 1, family = binomial)) # empty <- with(data = imp, expr = glm(hyp == "yes" ~ 0, family = binomial)) # # model dev1.L does not look right, negative Dm, convergence problems # FIXME # z5 <- D3(fit1, fit0) # mitml can't do this case # z6 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), method = "D3") # crashes on terms # FIXME # z5a <- D3(fit1, empty) # This test fails. # FIXME # test_that("glm: mice and mitml calculate same F", { # expect_equal(z5$Dm, unname(z6$test[1, "F.value"])) # }) # data with factors imp <- mice(nhanes2, print = FALSE, m = 10, seed = 219) fit1 <- with(data = imp, expr = lm(bmi ~ age + chl + hyp)) fit0 <- with(data = imp, expr = lm(bmi ~ age)) empty <- with(data = imp, expr = lm(bmi ~ 0)) z7 <- D3(fit1, fit0) z8 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), method = "D3") # This test fails. # FIXME # test_that("factors: mice and mitml calculate same F", { # expect_equal(z7$Dm, unname(z8$test[1, "F.value"])) # }) mice/tests/testthat/test-mice.impute.2l.norm.R0000644000176200001440000000052313416657163021012 0ustar liggesuserscontext("mice.impute.2l.norm") d1 <- brandsma[1:200, c("sch", "lpo")] pred <- make.predictorMatrix(d1) pred["lpo", "sch"] <- -2 test_that("mice::mice.impute.2l.norm() runs empty model", { expect_silent(imp <- mice(d1, method = "2l.norm", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) mice/tests/testthat/test-cbind.R0000644000176200001440000001030113416664706016360 0ustar liggesuserscontext("cbind.mids") data <- nhanes data1 <- data[, c("age", "bmi")] data2 <- data[, c("hyp", "chl")] imp1 <- mice(data1, m = 2, maxit = 1, print = FALSE) imp2 <- mice(data2, m = 2, maxit = 1, print = FALSE) imp <- cbind(imp1, imp2) test_that("combines imputations", { expect_identical(ncol(complete(imp)), 4L) expect_identical(complete(imp1), complete(imp)[, c("age", "bmi")]) }) # when using blocks data <- nhanes data1 <- data[, c("age", "bmi")] data2 <- data[, c("hyp", "chl")] imp1 <- mice(data1, m = 1, maxit = 1, print = FALSE) imp2 <- mice(data2, blocks = list(c("hyp", "chl")), m = 1, maxit = 1, print = FALSE) imp <- cbind(imp1, imp2) test_that("combines imputations with blocks", { expect_identical(ncol(complete(imp)), 4L) expect_identical(complete(imp1), complete(imp)[, c("age", "bmi")]) }) # handling of duplicate variable names data <- nhanes data1 <- data[, c("age", "bmi", "hyp")] data2 <- data[, c("hyp", "chl")] imp1 <- mice(data1, m = 1, maxit = 1, print = FALSE) imp2 <- mice(data2, m = 1, maxit = 1, print = FALSE) imp <- cbind(imp1, imp2) impc <- mice.mids(imp, max = 2, print = FALSE) test_that("duplicate variable adds a column", { expect_identical(ncol(complete(impc)), 5L) }) # handling of duplicate blocks imp1 <- mice(data1, blocks = list(c("age", "bmi"), "hyp"), m = 1, maxit = 1, print = FALSE) imp2 <- mice(data2, blocks = list(c("hyp", "chl")), m = 1, maxit = 1, print = FALSE) imp <- cbind(imp1, imp2) impc <- mice.mids(imp, max = 2, print = FALSE) test_that("duplicate blocks names renames block", { expect_identical(names(impc$blocks)[3], "B1.1") }) # cbind - no second argument imp1 <- mice(nhanes, blocks = list(c("bmi", "chl"), "hyp"), print = FALSE, maxit = 1, m = 1) imp2 <- cbind(imp1) imp3 <- cbind(imp1, NULL) imp4 <- cbind(imp1, character(0)) test_that("returns imp1 object if there is nothing to bind", { expect_identical(imp2, imp1) expect_identical(imp3, imp1) expect_identical(imp4, imp1) }) # cbind - unnamed constant imp2 <- cbind(imp1, 1) imp3 <- cbind(imp1, NA) imp4 <- cbind(imp1, "male") test_that("replicates unnamed constant", { expect_identical(ncol(complete(imp2)), 5L) expect_identical(ncol(complete(imp3)), 5L) expect_identical(ncol(complete(imp4)), 5L) }) imp6 <- cbind(imp1, int = 51:75, out = 15, NA) test_that("appends names vectors and constants", { expect_identical(ncol(complete(imp6)), 7L) expect_error(cbind(imp1, c(NA, 9)), "arguments imply differing number of rows: 25, 2") }) # matrix, factor, data.frame # NOTE: cbind() dispatches to wrong function if there is a data.frame # so use cbind.mids() imp8 <- mice:::cbind.mids(imp1, ma = matrix(1:50, nrow = 25, ncol = 2), age = nhanes2$age, df = nhanes2[, c("hyp", "chl")]) test_that("appends matrix, factor and data.frame", { expect_identical(ncol(complete(imp8)), 9L) }) # impc <- mice.mids(imp8, max = 2, print = FALSE) # NOTE: now using own version of cbind() imp9 <- cbind(imp1, ma = matrix(1:50, nrow = 25, ncol = 2), age = nhanes2$age, df = nhanes2[, c("hyp", "chl")]) test_that("appends matrix, factor and data.frame", { expect_identical(ncol(complete(imp9)), 9L) }) impc <- mice.mids(imp9, max = 2, print = FALSE) test_that("combined object works as input to mice.mids", { expect_true(is.mids(impc)) }) test_that("cbind does not throw a warning (#114)", { expect_silent(cbind(ordered(c(1,2)))) }) # # cbind data.frame (rename to age.1) # imp1 <- mice(nhanes, blocks = list(c("bmi", "chl"), "hyp"), print = FALSE, maxit = 1, m = 1) # agevar <- nhanes$age # agevar[1:5] <- NA # imp2 <- mice:::cbind.mids(imp1, data.frame(age = agevar, hyp = "test")) # imp3 <- mice.mids(imp2, max = 2, print = FALSE) # complete(imp3) # # # cbind data.frame (use quoted name) # imp1 <- mice(nhanes, blocks = list(c("bmi", "chl"), "hyp"), print = FALSE, maxit = 1, m = 1) # agevar <- nhanes$age # agevar[1:5] <- NA # imp2 <- mice:::cbind.mids(imp1, age = agevar, hyp = "test") # imp3 <- mice.mids(imp2, max = 2, print = FALSE) # complete(imp3) # mice/tests/testthat/test-mice.impute.2lonly.norm.R0000644000176200001440000000274413556602652021721 0ustar liggesuserscontext("mice.impute.2lonly.norm") ## https://stackoverflow.com/questions/58266785/mice-2l-pan-multilevel-multiple-imputation-error-missing-values-in-pred-not-all?sem=2 library("pan") #Not multilevel to illustrate need set.seed(100) data <- data.frame(patid = rep(1:4, each = 5), sex = rep(c(1, 2, 1, 2), each = 5), crp = c(68, 78, 93, NA, 143, 5, 7, 9, 13, NA, 97, NA, 56, 52, 34, 22, 30, NA, NA, 45)) pred <- make.predictorMatrix(data) pred[, "patid"] <- -2 # only missing value (out of five) for patid == 1 data[3, "sex"] <- NA test_that("2lonly.norm stops with partially missing level-2 data", { expect_error(mice(data, method = c("","2lonly.norm", "2l.pan"), predictorMatrix = pred, maxit = 1, m = 2, print = FALSE), "Method 2lonly.norm found the following clusters with partially missing\n level-2 data: 1\n Method 2lonly.mean can fix such inconsistencies.") }) set.seed(66322) y <- popmis[1:200, "texp"] x <- popmis[1:200, c("pupil", "school", "sex")] y[x$school %in% 1:3] <- NA ry <- !is.na(y) wy1 <- !ry wy2 <- rep(TRUE, length(y)) wy3 <- rep(FALSE, length(y)) wy4 <- rep(c(TRUE, FALSE), times = c(1, length(y) - 1)) type <- c(1, -2, 1) yn <- y y <- as.numeric(y) set.seed(1) z1 <- mice.impute.2lonly.norm(y, ry = ry, x, type) z2 <- mice.impute.2lonly.pmm(y, ry = ry, x, type) mice/tests/testthat/test-formulas.R0000644000176200001440000000126313416657163017137 0ustar liggesuserscontext("formulas") data <- nhanes test_that("model.matrix() deletes incomplete cases", { expect_identical(dim(model.matrix(~ age, data)), c(25L, 2L)) expect_identical(dim(model.matrix(~ chl, data)), c(15L, 2L)) expect_identical(dim(model.matrix(~ poly(age), data)), c(25L, 2L)) expect_error(model.matrix(~ poly(chl), data), "missing values are not allowed in 'poly'") expect_identical(dim(model.matrix(~ poly(chl, raw = TRUE), data)), c(15L, 2L)) }) # in MICE we can now use poly() form <- list(bmi ~ poly(chl, 2) + age + hyp) pred <- make.predictorMatrix(nhanes) imp1 <- mice(data, form = form, pred = pred, m = 1, maxit = 2, print = FALSE) mice/tests/testthat/test-parlmice.R0000644000176200001440000000500113416664706017076 0ustar liggesusers#Same seed - multiple cores - #Result: Imputations not equal between mice and parlmice test_that("Warning and Imputations between mice and parlmice are unequal", { skip_if_not(parallel::detectCores() > 2) expect_warning(A <- parlmice(nhanes, m = 2, seed = 123)) B <- mice(nhanes, m = 2, print = FALSE, seed = 123) expect_false(all(complete(A, "long") == complete(B, "long"))) }) #Same seed - single core - #Result: Imputations equal between mice and parlmice test_that("Imputations are equal between mice and parlmice", { C <- parlmice(nhanes, n.core = 1, n.imp.core = 5, seed = 123) D <- mice(nhanes, m = 5, print = FALSE, seed = 123) expect_identical(complete(C, "long"), complete(D, "long")) }) #Should return m = 8 I <- parlmice(nhanes, n.core = 2, n.imp.core = 4) test_that("Cores and n.imp.core specified. Override m", { expect_identical(I$m, 2*4) }) #Should return m = 3x5=15 test_that("n.imp.core not specified", { expect_warning(J <- parlmice(nhanes, n.core = 2)) expect_identical(J$m, 2 * 5) }) #Should return m = 2x7=42 test_that("n.imp.core not specified", { expect_warning(K <- parlmice(nhanes, n.core = 2, m = 7)) expect_identical(K$m, 2 * 7) }) #Should return error test_that("n.core larger than logical CPU cores", { expect_error(parlmice(nhanes, n.core = parallel::detectCores() + 1)) }) # # NOT RUN ON R CMD CHECK AND CRAN CHECK - TOO MANY PARALLEL PROCESSES SPAWNED # # Should return m = n.imp.core * parallel::detectCores() - 1 # test_that("Warning because n.core not specified", { # expect_warning(H <- parlmice(nhanes, n.imp.core = 3)) # expect_identical(H$m, 3 * (parallel::detectCores() - 1)) # }) # # #Same cluster.seed - multiple cores # #Result: Imputations equal between parlmice instances # imp1 <- parlmice(nhanes, m=2, cluster.seed = 123) # imp2 <- parlmice(nhanes, m=2, cluster.seed = 123) # test_that("cluster.seed", { # expect_equal(imp1, imp2) # }) # # #Should run without failure # df <- boys # meth <- make.method(df) # pred <- make.predictorMatrix(df) # visit <- 9:1 # imp3 <- parlmice(df, method = meth, # predictorMatrix = pred, # visitSequence = visit, # n.core = 2, # n.imp.core = 4, # maxit = 3, # cluster.seed = 123) # test_that("Runs when overriding defaults", { # expect_identical(imp3$pred, pred) # expect_identical(imp3$iteration, 3) # expect_identical(imp3$method, meth) # expect_identical(imp3$visitSequence, names(df)[visit]) # expect_identical(imp3$m, 2*4) # }) mice/tests/testthat/test-pool.R0000644000176200001440000001347413621065377016265 0ustar liggesuserscontext("pool") # set the random generator to V3.5.0 to ensure that this test # passes in V3.6.0 and later # see mail Kurt Hornik, dated 06mar19 # FIXME: consider using the new generator once V3.6.0 is out, # at the expense of breaking reproducibility of the examples in # https://stefvanbuuren.name/fimd/ suppressWarnings(RNGversion("3.5.0")) imp <- mice(nhanes2, print = FALSE, maxit = 2, seed = 121) fit <- with(imp, lm(bmi ~ chl + age + hyp)) est <- pool(fit) #fitlist <- fit$analyses #est <- mice:::pool.fitlist(fitlist) mn <- c(18.76175, 0.05359003, -4.573652, -6.635969, 2.163629) se <- c(4.002796, 0.02235067, 2.033986, 2.459769, 2.02898) test_that("retains same numerical result", { expect_equal(unname(getqbar(est)), mn, tolerance = 0.00001) expect_equal(unname(summary(est)[, "std.error"]), se, tolerance = 0.00001) }) imp <- mice(nhanes2, print = FALSE, m = 10, seed = 219) fit0 <- with(data = imp, expr = glm(hyp == "yes" ~ 1, family = binomial)) fit1 <- with(data = imp, expr = glm(hyp == "yes" ~ chl + bmi, family = binomial)) D1(fit1, fit0) D3(fit1, fit0) # test_that("retains same numerical result", { # expect_equal(round(as.vector(stat1$pvalue), 3), 0.188) # expect_equal(stat2$pvalue, 0) # }) # # Venables & Ripley, 2nd, p 235 birthwt <- MASS::birthwt bwt <- with(birthwt, data.frame( low = factor(low), age = age, lwt = lwt, race = factor(race, labels = c("white", "black", "other")), smoke = smoke > 0, ptd = factor(ptl > 0), ht = ht > 0, ui = ui > 0, ftv = factor(ftv))) levels(bwt$ftv)[-(1:2)] <- "2" birthwt.glm <- glm(low ~ ., family = binomial, data = bwt) summary(birthwt.glm) birthwt.step <- step(birthwt.glm, trace = FALSE) LLlogistic <- function(formula, data, coefs) { ### Calculates -2 loglikelihood of a model. logistic <- function(mu) exp(mu)/(1 + exp(mu)) Xb <- model.matrix(formula, data) %*% coefs y <- model.frame(formula, data)[1][, 1] if (is.factor(y)) y <- (0:1)[y] p <- logistic(Xb) ## in case values of categorical var are other than 0 and 1. y <- (y - min(y))/(max(y) - min(y)) term1 <- term2 <- rep(0, length(y)) term1[y != 0] <- y[y != 0] * log(y[y != 0]/p[y != 0]) term2[y == 0] <- (1 - y[y == 0]) * log((1 - y[y == 0])/(1 - p[y == 0])) 2 * sum(term1 + term2) } model1 <- glm(low ~ ., family = binomial, data = bwt) model0 <- update(model1, formula = . ~ . -age - ftv) model.null <- update(model1, formula = . ~ 1 ) ll1 <- LLlogistic(formula = formula(model1), data = bwt, coefs = coef(model1)) ll0 <- LLlogistic(formula = formula(model0), data = bwt, coefs = coef(model0)) llnull <- LLlogistic(formula = formula(model.null), data = bwt, coefs = coef(model.null)) identical(deviance(model1), ll1, num.eq = FALSE) identical(deviance(model0), ll0, num.eq = FALSE) identical(deviance(model.null), llnull, num.eq = FALSE) # try out coef.fix for binary data f1 <- fix.coef(model1, beta = coef(model1)) broom::glance(model1) broom::glance(f1) identical(broom::glance(f1)$deviance, broom::glance(model1)$deviance) beta <- coef(model1) beta["age"] <- 0 beta["smokeTRUE"] <- 0 f2 <- fix.coef(model1, beta) broom::glance(f2)$deviance set.seed(123) bwt.mis <- bwt bwt.mis$smoke[runif(nrow(bwt)) < 0.001] <- NA bwt.mis$lwt[runif(nrow(bwt)) < 0.01] <- NA imp <- mice(bwt.mis, print = FALSE, m = 10) fit1 <- with(data = imp, expr = glm(low ~ age + lwt + race + smoke + ptd + ht + ui + ftv, family = binomial)) fit0 <- with(data = imp, glm(low ~ lwt + race + smoke + ptd + ht + ui, family = binomial)) D1(fit1, fit0) D3(fit1, fit0) # --- test restriction of parameters # all parameters estimated fit <- lm(bmi ~ age + hyp + chl, data = nhanes) coef(fit) formula(fit) newformula <- bmi ~ 0 + I(18.26966503 - 5.78652468 * age + 2.10467529 * hyp + 0.08044924 * chl) newformula <- . ~ 0 + I(18.26966503 * 1L - 5.78652468 * age + 2.10467529 * hyp + 0.08044924 * chl) fit2 <- update(fit, formula = newformula) coef(fit2) summary(fit) summary(fit2) cor(predict(fit), predict(fit) + residuals(fit))^2 cor(predict(fit2), predict(fit2) + residuals(fit2))^2 newformula <- bmi ~ 0 + offset(18.26966503 - 5.78652468 * age + 2.10467529 * hyp + 0.08044924 * chl) fit3 <- update(fit, formula = newformula) coef(fit3) summary(fit3) cor(predict(fit3), predict(fit3) + residuals(fit3))^2 # compare to mitml::anova.mitml.result suppressPackageStartupMessages(library(mitml, quietly = TRUE)) library(lme4, quietly = TRUE) data(studentratings) fml <- ReadDis + SES ~ ReadAchiev + (1|ID) imp <- mitml::panImpute(studentratings, formula=fml, n.burn=1000, n.iter=100, m=5, silent = TRUE) implist <- mitml::mitmlComplete(imp, print=1:5) # * Example 1: multiparameter hypothesis test for 'ReadDis' and 'SES' # This tests the hypothesis that both effects are zero. fit0 <- with(implist, lmer(ReadAchiev ~ (1|ID), REML=FALSE)) fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + (1|ID), REML=FALSE)) # apply Rubin's rules testEstimates(fit1) # Wald test # multiparameter hypothesis test using D1 (default) mitml::testModels(fit1, fit0) # stats <- pool.compare(as.mira(fit1), as.mira(fit0), method = "wald") # Is the same, but probably consequence of single parameter differerence # Wald test - multiparameter difference - incorrect because now our # ubar is vector, not a matrix anymore fit0 <- with(implist, lmer(ReadAchiev ~ (1|ID), REML=FALSE)) fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + SES + (1|ID), REML=FALSE)) mitml::testModels(fit1, fit0) # stats <- pool.compare(as.mira(fit1), as.mira(fit0), method = "wald") # Is the same, but probably consequence of single parameter differerence # likelihood test mitml::testModels(fit1, fit0, method = "D3") # stats <- pool.compare(as.mira(fit1), as.mira(fit0), method = "likelihood") # --- fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + SES + (1|ID), REML=FALSE)) mice/tests/testthat.R0000755000176200001440000000007013416657163014330 0ustar liggesuserslibrary(testthat) library(mice) test_check("mice") mice/src/0000755000176200001440000000000013623760731011766 5ustar liggesusersmice/src/match.cpp0000644000176200001440000000376113416657163013601 0ustar liggesusers#include #include using namespace std; using namespace Rcpp; // [[Rcpp::export]] IntegerVector matcher(NumericVector obs, NumericVector mis, int k) { // fast predictive mean matching algorithm // for each of the n0 elements in mis // 1) calculate the difference with obs // 2) add small noise to break ties // 3) find the k indices of the k closest predictors // 4) randomly draw one index // and return the vector of n0 matched positions // SvB 26/01/2014 // declarations int jj; int n1 = obs.size(); int n0 = mis.size(); double dk = 0; int count = 0; int goal = 0; NumericVector d(n1); NumericVector d2(n1); IntegerVector matched(n0); // restrict 1 <= k <= n1 k = (k <= n1) ? k : n1; k = (k >= 1) ? k : 1; // in advance, uniform sample from k potential donors NumericVector which = floor(runif(n0, 1, k + 1)); NumericVector mm = range(obs); double small = (mm[1] - mm[0]) / 65536; // loop over the missing values for(int i = 0; i < n0; i++) { // calculate the distance and add noise to break ties d2 = runif(n1, 0, small); dk = mis[i]; for (int j = 0; j < n1; j++) d[j] = std::abs(obs[j] - dk) + d2[j]; // find the k'th lowest value in d for (int j = 0; j < n1; j++) d2[j] = d[j]; std::nth_element (d2.begin(), d2.begin() + k - 1, d2.end()); // find index of donor which[i] dk = d2[k-1]; count = 0; goal = (int) which[i]; for (jj = 0; jj < n1; jj++) { if (d[jj] <= dk) count++; if (count == goal) break; } // and store the result matched[i] = jj; } // increase index to offset 1 return matched + 1; } static R_CallMethodDef callMethods[] = { {"matcher", (DL_FUNC) &matcher, 3}, {NULL, NULL, 0} }; void attribute_visible R_init_mice(DllInfo *dll) { R_registerRoutines(dll, NULL, callMethods, NULL, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); } mice/src/Makevars0000644000176200001440000000166613416657163013477 0ustar liggesusers## Use the R_HOME indirection to support installations of multiple R version PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` ## As an alternative, one can also add this code in a file 'configure' ## ## PKG_LIBS=`${R_HOME}/bin/Rscript -e "Rcpp:::LdFlags()"` ## ## sed -e "s|@PKG_LIBS@|${PKG_LIBS}|" \ ## src/Makevars.in > src/Makevars ## ## which together with the following file 'src/Makevars.in' ## ## PKG_LIBS = @PKG_LIBS@ ## ## can be used to create src/Makevars dynamically. This scheme is more ## powerful and can be expanded to also check for and link with other ## libraries. It should be complemented by a file 'cleanup' ## ## rm src/Makevars ## ## which removes the autogenerated file src/Makevars. ## ## Of course, autoconf can also be used to write configure files. This is ## done by a number of packages, but recommended only for more advanced users ## comfortable with autoconf and its related tools. mice/src/Makevars.win0000644000176200001440000000024113416657163014257 0ustar liggesusers ## Use the R_HOME indirection to support installations of multiple R version PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::LdFlags()") mice/src/RcppExports.cpp0000644000176200001440000000127413416657163014773 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; // matcher IntegerVector matcher(NumericVector obs, NumericVector mis, int k); RcppExport SEXP _mice_matcher(SEXP obsSEXP, SEXP misSEXP, SEXP kSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< NumericVector >::type mis(misSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); rcpp_result_gen = Rcpp::wrap(matcher(obs, mis, k)); return rcpp_result_gen; END_RCPP } mice/R/0000755000176200001440000000000013623760433011377 5ustar liggesusersmice/R/with.R0000644000176200001440000000577413621065562012511 0ustar liggesusers# ------------------------------with.mids---------------------------- #'Evaluate an expression in multiple imputed datasets #' #'Performs a computation of each of imputed datasets in data. #' #' #'@param data An object of type \code{mids}, which stands for 'multiply imputed #'data set', typically created by a call to function \code{mice()}. #'@param expr An expression with a formula object, with the response on the #'left of a \code{~} operator, and the terms, separated by \code{+} operators, #'on the right. See the documentation of \code{\link{lm}} and #'\code{\link{formula}} for details. #'@param \dots Additional parameters passed to \code{expr} #'@return A list object of S3 class \code{mira} # '@returnItem call The call that created the \code{mira} object. # '@returnItem call1 The call that created the \code{mids} object that was used # 'in \code{call}. # '@returnItem nmis An array containing the number of missing observations per # 'column. # '@returnItem analyses A list of \code{m} components containing the individual # 'fit objects from each of the \code{m} complete data analyses. #'@author Karin Oudshoorn, Stef van Buuren 2009-2012 #'@seealso \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}}, \code{\link{pool}}, #'\code{\link{D1}}, \code{\link{D3}}, \code{\link{pool.r.squared}} #'@references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords multivariate #'@examples #' #' #'imp <- mice(nhanes2) #'fit1 <- with(data=imp,exp=lm(bmi~age+hyp+chl)) #'fit2 <- with(data=imp,exp=glm(hyp~age+bmi+chl,family=binomial)) #'anova.imp <- with(data=imp,exp=anova(lm(bmi~age+hyp+chl))) #'@method with mids #'@export with.mids <- function(data, expr, ...) { # General function to do repeated analyses. # Generalisation of lm.mids and glm.mids. # KO, 2009. # # repeated complete data regression on a mids data set. # Depending on 'expr' different types of regressions are preformed. # for 'expr' can be used: lm, lme, glm, etc. # SvB formula deleted, 13Aug09: expr can contain any executable expression # SvB: now works for both calls and expressions call <- match.call() if (!is.mids(data)) stop("The data must have class mids") analyses <- as.list(seq_len(data$m)) # do the repeated analysis, store the result. for (i in seq_along(analyses)) { data.i <- complete(data, i) analyses[[i]] <- eval(expr = substitute(expr), envir = data.i, enclos = parent.frame()) if (is.expression(analyses[[i]])) analyses[[i]] <- eval(expr = analyses[[i]], envir = data.i, enclos = parent.frame()) } # return the complete data analyses as a list of length nimp object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) # formula=formula(analyses[[1]]$terms)) oldClass(object) <- c("mira", "matrix") return(object) } mice/R/md.pattern.R0000644000176200001440000001041413511163460013567 0ustar liggesusers# ------------------------------MD.PATTERN------------------------------- #'Missing data pattern #' #'Display missing-data patterns. #' #'This function is useful for investigating any structure of missing #'observations in the data. In specific case, the missing data pattern could be #'(nearly) monotone. Monotonicity can be used to simplify the imputation model. #'See Schafer (1997) for details. Also, the missing pattern could suggest which #'variables could potentially be useful for imputation of missing entries. #' #'@param x A data frame or a matrix containing the incomplete data. Missing #'values are coded as NA's. #'@param plot Should the missing data pattern be made into a plot. Default is #'`plot = TRUE`. #'@param rotate.names Whether the variable names in the plot should be placed #'horizontally or vertically. Default is `rotate.names = FALSE`. #'@return A matrix with \code{ncol(x)+1} columns, in which each row corresponds #'to a missing data pattern (1=observed, 0=missing). Rows and columns are #'sorted in increasing amounts of missing information. The last column and row #'contain row and column counts, respectively. #'@author Gerko Vink, 2018, based on an earlier version of the same function by #'Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #'@references Schafer, J.L. (1997), Analysis of multivariate incomplete data. #'London: Chapman&Hall. #' #'Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@keywords univar #'@examples #' #' #'md.pattern(nhanes) #'# age hyp bmi chl #'# 13 1 1 1 1 0 #'# 1 1 1 0 1 1 #'# 3 1 1 1 0 1 #'# 1 1 0 0 1 2 #'# 7 1 0 0 0 3 #'# 0 8 9 10 27 #' #' #'@export md.pattern <- function(x, plot = TRUE, rotate.names = FALSE){ if (!(is.matrix(x) || is.data.frame(x))) stop("Data should be a matrix or dataframe") if (ncol(x) < 2) stop("Data should have at least two columns") R <- is.na(x) nmis <- colSums(R) R <- matrix(R[, order(nmis)], dim(x)) #sort columnwise pat <- apply(R, 1, function(x) paste(as.numeric(x), collapse='')) sortR <- matrix(R[order(pat), ], dim(x)) #sort rowwise if (nrow(x) == 1){ mpat <- is.na(x) } else { mpat <- sortR[!duplicated(sortR), ] } #update row and column margins if (all(!is.na(x))){ cat(" /\\ /\\\n{ `---' }\n{ O O }\n==> V <==") cat(" No need for mice. This data set is completely observed.\n") cat(" \\ \\|/ /\n `-----'\n\n") mpat <- t(as.matrix(mpat, byrow = TRUE)) rownames(mpat) <- table(pat) } else { if(is.null(dim(mpat))){ mpat <- t(as.matrix(mpat)) } rownames(mpat) <- table(pat) } r <- cbind(abs(mpat - 1), rowSums(mpat)) r <- rbind(r, c(nmis[order(nmis)], sum(nmis))) if (plot){ #add plot plot.new() if (is.null(dim(sortR[!duplicated(sortR), ]))){ R <- t(as.matrix(r[1:nrow(r)-1, 1:ncol(r)-1])) } else { if(is.null(dim(R))){ R <- t(as.matrix(R)) } R <- r[1:nrow(r)-1, 1:ncol(r)-1] } op <- par(mar = rep(0, 4)) on.exit(par(op)) if (rotate.names) { adj = c(0, 0.5) srt = 90 length_of_longest_colname = max(nchar(colnames(r)))/2.6 plot.window(xlim = c(-1, ncol(R) + 1), ylim = c(-1, nrow(R) + length_of_longest_colname), asp = 1) } else { adj = c(0.5, 0) srt = 0 plot.window(xlim = c(-1, ncol(R) + 1), ylim = c(-1, nrow(R) + 1), asp = 1) } M <- cbind(c(row(R)), c(col(R))) - 1 shade <- ifelse(R[nrow(R):1, ], mdc(1), mdc(2)) rect(M[, 2], M[, 1], M[, 2] + 1, M[, 1] + 1, col = shade) for(i in 1:ncol(R)) { text(i - .5, nrow(R) + .3, colnames(r)[i], adj = adj, srt = srt) text(i - .5, -.3, nmis[order(nmis)][i]) } for(i in 1:nrow(R)){ text(ncol(R) + .3, i - .5, r[(nrow(r)-1):1, ncol(r)][i], adj = 0) text(-.3, i - .5, rownames(r)[(nrow(r)-1):1][i], adj = 1) } text(ncol(R) + .3, -.3, r[nrow(r), ncol(r)]) return(r) } else { return(r) } } mice/R/leiden85.R0000644000176200001440000000320113416657163013140 0ustar liggesusers#'Leiden 85+ study #' #'Subset of data from the Leiden 85+ study #' #'The data set concerns of subset of 956 members of a very old (85+) cohort in #'Leiden. #' #'Multiple imputation of this data set has been described in Boshuizen et al #'(1998), Van Buuren et al (1999) and Van Buuren (2012), chapter 7. #' #'The data set is not available as part of \code{mice}. #' #'@name leiden85 #'@docType data #'@format \code{leiden85} is a data frame with 956 rows and 336 columns. #'@source #' #'Lagaay, A. M., van der Meij, J. C., Hijmans, W. (1992). Validation of #'medical history taking as part of a population based survey in subjects aged #'85 and over. \emph{Brit. Med. J.}, \emph{304}(6834), 1091-1092. #' #'Izaks, G. J., van Houwelingen, H. C., Schreuder, G. M., Ligthart, G. J. #'(1997). The association between human leucocyte antigens (HLA) and mortality #'in community residents aged 85 and older. \emph{Journal of the American #'Geriatrics Society}, \emph{45}(1), 56-60. #' #'Boshuizen, H. C., Izaks, G. J., van Buuren, S., Ligthart, G. J. (1998). #'Blood pressure and mortality in elderly people aged 85 and older: Community #'based study. \emph{Brit. Med. J.}, \emph{316}(7147), 1780-1784. #' #'Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of #'missing blood pressure covariates in survival analysis. \emph{Statistics in #'Medicine}, \bold{18}, 681--694. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-toomany.html#sec:leiden85cohort}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords datasets NULL mice/R/ampute.mcar.R0000644000176200001440000000470113416657163013745 0ustar liggesusers#'Multivariate Amputation In A MCAR Manner #' #'This function creates a missing data indicator for each pattern, based on a MCAR #'missingness mechanism. The function is used in the multivariate amputation function #'\code{\link{ampute}}. #' #'@param P A vector containing the pattern numbers of the cases's candidacies. #'For each case, a value between 1 and #patterns is given. For example, a #'case with value 2 is candidate for missing data pattern 2. #'@param patterns A matrix of size #patterns by #variables where \code{0} indicates #'a variable should have missing values and \code{1} indicates a variable should #'remain complete. The user may specify as many patterns as desired. One pattern #'(a vector) is also possible. Could be the result of \code{\link{ampute.default.patterns}}, #'default will be a square matrix of size #variables where each pattern has missingness #'on one variable only. #'@param prop A scalar specifying the proportion of missingness. Should be a value #'between 0 and 1. Default is a missingness proportion of 0.5. #'@return A list containing vectors with \code{0} if a case should be made missing #'and \code{1} if a case should remain complete. The first vector refers to the #'first pattern, the second vector to the second pattern, etcetera. #'@author Rianne Schouten, 2016 #'@seealso \code{\link{ampute}} #'@keywords internal #'@export ampute.mcar <- function(P, patterns, prop) { # Multivariate Amputation In A MCAR Manner # # This function creates a missing data indicator for each pattern, based on # a MCAR missingness mechanism. The function is used in the multivariate # amputation function ampute(). f <- function(i) { # If there are no candidates in a certain pattern, the list will receive a 0 if (length(P[P == (i + 1)]) == 0) { return(0) } else { # Otherwise, for all candidates in the pattern, the total proportion of # missingness is used to define the probabilities to be missing. nf <- length(P[P == (i + 1)]) R.temp <- 1 - rbinom(n = nf, size = 1, prob = prop) # Based on the probabilities, each candidate will receive a missing data # indicator 0, meaning he will be made missing or missing data indicator 1, # meaning the candidate will remain complete. R.temp <- replace(P, P == (i + 1), R.temp) R.temp <- replace(R.temp, P != (i + 1), 1) return(R.temp) } } R <- lapply(seq_len(nrow(patterns)), f) return(R) }mice/R/print.R0000644000176200001440000000521113416657163012662 0ustar liggesusers#'Print a \code{mids} object #' #'@rdname print #'@param x Object of class \code{mids}, \code{mira} or \code{mipo} #'@param ... Other parameters passed down to \code{print.default()} #'@return \code{NULL} #'@seealso \code{\link[=mids-class]{mids}} #'@method print mids #'@export print.mids <- function(x, ...) { cat("Class: mids\n") cat("Number of multiple imputations: ", x$m, "\n") cat("Imputation methods:\n") print(x$method, ...) cat("PredictorMatrix:\n") print(head(x$predictorMatrix), ...) if (!is.null(x$loggedEvents)) { cat("Number of logged events: ", nrow(x$loggedEvents), "\n") print(head(x$loggedEvents), ...) } invisible(x) } #'Print a \code{mira} object #' #'@rdname print #'@return \code{NULL} #'@seealso \code{\link[=mira-class]{mira}} #'@method print mira #'@export print.mira <- function(x, ...) { if (is.mira(x)) print.listof(x, ...) else print(x, ...) invisible() } #'Print a \code{mice.anova} object #' #'@rdname print #'@return \code{NULL} #'@seealso \code{\link{mipo}} #'@method print mice.anova #'@export print.mice.anova <- function(x, ...) { z <- summary(x, ...) print(z$comparisons, row.names = FALSE) invisible(x) } #'Print a \code{summary.mice.anova} object #' #'@rdname print #'@return \code{NULL} #'@seealso \code{\link{mipo}} #'@method print mice.anova.summary #'@export print.mice.anova.summary <- function(x, ...) { cat("\nModels:\n") print(x$models, row.names = FALSE) cat("\nComparisons:\n") print(x$comparisons, row.names = FALSE) cat("\nNumber of imputations: ", x$m, " Method", x$method) if (x$method == "D2") cat(" (", x$use, ")", sep = "") cat("\n") invisible(x) } # # --------------------------------PRINT.MADS-------------------------------------- # #'Print a \code{mads} object #' #'@param x Object of class \code{mads} #'@param ... Other parameters passed down to \code{print.default()} #'@return \code{NULL} #'@seealso \code{\link[=mads-class]{mads}} #'@method print mads #'@export print.mads <- function(x, ...) { if (is.mads(x)) { cat("Multivariate Amputed Data Set") cat("\nCall: ") print(x$call) cat("Class:", class(x)) cat("\nProportion of Missingness: ", x$prop) cat("\nFrequency of Patterns: ", x$freq) cat("\nPattern Matrix:\n") print(x$patterns) cat("Mechanism:") print(x$mech) cat("Weight Matrix:\n") print(x$weights) cat("Type Vector:\n") print(x$type) cat("Odds Matrix:\n") print(x$odds) cat("Head of Amputed Data Set\n") print(head(x$amp)) } else print(x, ...) invisible() } mice/R/mice.impute.2lonly.pmm.R0000644000176200001440000001440013556602652015751 0ustar liggesusers### contributed by Alexander Robitzsch (robitzsch@ipn.uni-kiel.de) #' Imputation at level 2 by predictive mean matching #' #' Imputes univariate missing data at level 2 using predictive mean matching. #' Variables are level 1 are aggregated at level 2. The group identifier at #' level 2 must be indicated by \code{type = -2} in the \code{predictorMatrix}. #' #' @aliases 2lonly.pmm #' @inheritParams mice.impute.pmm #' @param type Group identifier must be specified by '-2'. Predictors must be #' specified by '1'. #' @param ... Other named arguments. #' @return A vector of length \code{nmis} with imputations. #' @author Alexander Robitzsch (IPN - Leibniz Institute for Science and #' Mathematics Education, Kiel, Germany), \email{robitzsch@@ipn.uni-kiel.de}, #' plus some tweaks by Stef van Buuren #' @seealso \code{\link{mice.impute.pmm}}, #' \code{\link{mice.impute.2lonly.norm}}, \code{\link{mice.impute.2l.pan}}, #' \code{\link{mice.impute.2lonly.mean}} #' @details #' This function allows in combination with \code{\link{mice.impute.2l.pan}} #' switching regression imputation between level 1 and level 2 as described in #' Yucel (2008) or Gelman and Hill (2007, p. 541). #' #' The function checks for partial missing level-2 data. Level-2 data #' are assumed to be constant within the same cluster. If one or more #' entries are missing, then the procedure aborts with an error #' message that identifies the cluster with incomplete level-2 data. #' In such cases, one may first fill in the cluster mean (or mode) by #' the \code{2lonly.mean} method to remove inconsistencies. #' @references Gelman, A. and Hill, J. (2007). \emph{Data analysis using #' regression and multilevel/hierarchical models}. Cambridge, Cambridge #' University Press. #' #' Yucel, RM (2008). Multiple imputation inference for multivariate multilevel #' continuous data with ignorable non-response. \emph{Philosophical #' Transactions of the Royal Society A}, \bold{366}, 2389-2404. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' @note The extension to categorical variables transform #' a dependent factor variable by means of the \code{as.integer()} #' function. This may make sense for categories that are #' approximately ordered, but less so for pure nominal measures. #' #' For a more general approach, see #' \code{miceadds::mice.impute.2lonly.function()}. #' @family univariate-2lonly #' @examples #' #'################################################## #'# simulate some data #'# x,y ... level 1 variables #'# v,w ... level 2 variables #' #'G <- 250 # number of groups #'n <- 20 # number of persons #'beta <- .3 # regression coefficient #'rho <- .30 # residual intraclass correlation #'rho.miss <- .10 # correlation with missing response #'missrate <- .50 # missing proportion #'y1 <- rep( rnorm( G , sd = sqrt( rho ) ) , each=n ) + rnorm(G*n , sd = sqrt( 1 - rho )) #'w <- rep( round( rnorm(G ) , 2 ) , each=n ) #'v <- rep( round( runif( G , 0 , 3 ) ) , each=n ) #'x <- rnorm( G*n ) #'y <- y1 + beta * x + .2 * w + .1 * v #'dfr0 <- dfr <- data.frame( "group" = rep(1:G , each=n ) , "x" = x , "y" = y , "w" = w , "v" = v ) #'dfr[ rho.miss * x + rnorm( G*n , sd = sqrt( 1 - rho.miss ) ) < qnorm( missrate ) , "y" ] <- NA #'dfr[ rep( rnorm(G) , each=n ) < qnorm( missrate ) , "w" ] <- NA #'dfr[ rep( rnorm(G) , each=n ) < qnorm( missrate ) , "v" ] <- NA #' #'#.... #'# empty mice imputation #'imp0 <- mice( as.matrix(dfr) , maxit=0 ) #'predM <- imp0$predictorMatrix #'impM <- imp0$method #' #'#... #'# multilevel imputation #'predM1 <- predM #'predM1[c("w","y","v"),"group"] <- -2 #'predM1["y","x"] <- 1 # fixed x effects imputation #'impM1 <- impM #'impM1[c("y","w","v")] <- c("2l.pan" , "2lonly.norm" , "2lonly.pmm" ) #' #'# turn v into a categorical variable #'dfr$v <- as.factor(dfr$v) #'levels(dfr$v) <- LETTERS[1:4] #' #'# y ... imputation using pan #'# w ... imputation at level 2 using norm #'# v ... imputation at level 2 using pmm #' #'imp <- mice(dfr, m = 1, predictorMatrix = predM1 , #' method = impM1, maxit = 1, paniter = 500) #' #'@export mice.impute.2lonly.pmm <- function (y, ry, x, type, wy = NULL, ...) { .imputation.level2(y = y, ry = ry, x = x, type = type, wy = wy, method = "pmm", ... ) } #****************************************** # imputation function at level 2 # can be done with norm and pmm .imputation.level2 <- function(y, ry, x, type, wy, method, ... ){ if (sum(type == -2L) != 1L) stop( "No class variable") if (is.null(wy)) wy <- !ry # handle categorical data if (is.factor(y)) y <- as.integer(y) # extract cluster index clusterx <- x[, type == -2L] # clusters with one or more missing y's cm <- unique(clusterx[!ry]) # clusters with one or more observed y's co <- unique(clusterx[ry]) # cluster where all y's are observed cobs <- setdiff(co, cm) # clusters where some y's are missing csom <- intersect(co, cm) if (length(csom) > 0L) stop(paste0("Method 2lonly.", method, " found the following clusters with partially missing\n", " level-2 data: ", paste(csom, collapse = ", "), "\n", " Method 2lonly.mean can fix such inconsistencies.")) # calculate aggregated values x <- cbind(1, as.matrix(x[, type %in% c(1L, 2L)])) a2 <- rowsum(cbind(x, y), clusterx, na.rm = TRUE) a2 <- a2 / rowsum(1 * cbind(!is.na(x), ry), clusterx) clusterx0 <- as.numeric(paste0(rownames(a2))) a1 <- cbind(clusterx0, a2) ry2 <- a1[, 1L] %in% cobs wy2 <- !(a1[, 1L] %in% unique(clusterx[!wy])) y2 <- a1[, ncol(a1)] x2 <- as.matrix(a1[, -c(1L:2L, ncol(a1))]) # norm imputation at level 2 if (method == "norm") ximp2 <- mice.impute.norm(y = y2, ry = ry2, x = x2, wy = wy2, ...) # pmm imputation at level 2 if (method == "pmm") ximp2 <- mice.impute.pmm(y = y2, ry = ry2, x = x2, wy = wy2, ...) # expland to full matrix cly2 <- a1[wy2, 1L] i1 <- match(clusterx, cly2) ximp <- (ximp2[i1])[wy] ximp } mice/R/mice.impute.cart.R0000644000176200001440000000761413617526122014677 0ustar liggesusers#'Imputation by classification and regression trees #' #'Imputes univariate missing data using classification and regression trees. #' #'@aliases mice.impute.cart cart #' #'@inheritParams mice.impute.pmm #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@param minbucket The minimum number of observations in any terminal node used. #'See \code{\link{rpart.control}} for details. #'@param cp Complexity parameter. Any split that does not decrease the overall #'lack of fit by a factor of cp is not attempted. See \code{\link{rpart.control}} #'for details. #'@param ... Other named arguments passed down to \code{rpart()}. #'@return Numeric vector of length \code{sum(!ry)} with imputations #'@details #'Imputation of \code{y} by classification and regression trees. The procedure #'is as follows: #'\enumerate{ #'\item Fit a classification or regression tree by recursive partitioning; #'\item For each \code{ymis}, find the terminal node they end up according to the fitted tree; #'\item Make a random draw among the member in the node, and take the observed value from that #'draw as the imputation. #'} #'@seealso \code{\link{mice}}, \code{\link{mice.impute.rf}}, #'\code{\link[rpart]{rpart}}, \code{\link[rpart]{rpart.control}} #'@author Lisa Doove, Stef van Buuren, Elise Dusseldorp, 2012 #'@references #' #' Doove, L.L., van Buuren, S., Dusseldorp, E. (2014), Recursive partitioning #' for missing data imputation in the presence of interaction Effects. #' Computational Statistics \& Data Analysis, 72, 92-104. #' #'Breiman, L., Friedman, J. H., Olshen, R. A., and Stone, C. J. #'(1984), Classification and regression trees, Monterey, CA: Wadsworth & #'Brooks/Cole Advanced Books & Software. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-cart.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #'@family univariate imputation functions #'@examples #'require(rpart) #' #'imp <- mice(nhanes2, meth = 'cart', minbucket = 4) #'plot(imp) #' #'@keywords datagen #'@export mice.impute.cart <- function(y, ry, x, wy = NULL, minbucket = 5, cp = 1e-04, ...) { install.on.demand("rpart", ...) if (is.null(wy)) wy <- !ry minbucket <- max(1, minbucket) if (dim(x)[2] == 0) { x <- cbind(x, 1) dimnames(x) <- list(NULL, "int") } xobs <- data.frame(x[ry, , drop = FALSE]) xmis <- data.frame(x[wy, , drop = FALSE]) yobs <- y[ry] if (!is.factor(yobs)) { fit <- rpart::rpart(yobs ~ ., data = cbind(yobs, xobs), method = "anova", control = rpart::rpart.control(minbucket = minbucket, cp = cp, ...)) leafnr <- floor(as.numeric(row.names(fit$frame[fit$where, ]))) fit$frame$yval <- as.numeric(row.names(fit$frame)) nodes <- predict(object = fit, newdata = xmis) donor <- lapply(nodes, function(s) yobs[leafnr == s]) impute <- vapply(seq_along(donor), function(s) sample(donor[[s]], 1), numeric(1)) } else { # escape with same impute if the dependent does not vary cat.has.all.obs <- table(yobs) == sum(ry) if (any(cat.has.all.obs)) return(rep(levels(yobs)[cat.has.all.obs], sum(wy))) xy <- cbind(yobs, xobs) xy <- droplevels(xy) # FIXME: rpart fails to runs on empty categories in yobs, # droplevels() removes empty levels, and this is # likely to present problems further down the road # potential problem case: table(yobs): 0 10 15, then # droplevels may forget about category 1 fit <- rpart::rpart(yobs ~ ., data = xy, method = "class", control = rpart::rpart.control(minbucket = minbucket, cp = cp, ...)) nodes <- predict(object = fit, newdata = xmis) impute <- apply(nodes, MARGIN = 1, FUN = function(s) sample(colnames(nodes), size = 1, prob = s)) } return(impute) } mice/R/potthoffroy.R0000644000176200001440000000405713416657163014120 0ustar liggesusers#'Potthoff-Roy data #' #'Data from Potthoff-Roy (1964) with repeated measures on dental fissures. #' #'This data set is the famous Potthoff-Roy data, used to demonstrate MANOVA on #'repeated measure data. Potthoff and Roy (1964) published classic data on a #'study in 16 boys and 11 girls, who at ages 8, 10, 12, and 14 had the distance #'(mm) from the center of the pituitary gland to the pteryomaxillary fissure #'measured. Changes in pituitary-pteryomaxillary distances during growth is #'important in orthodontic therapy. The goals of the study were to describe the #'distance in boys and girls as simple functions of age, and then to compare #'the functions for boys and girls. The data have been reanalyzed by many #'authors including Jennrich and Schluchter (1986), Little and Rubin (1987), #'Pinheiro and Bates (2000), Verbeke and Molenberghs (2000) and Molenberghs and #'Kenward (2007). See Chapter 9 of Van Buuren (2012) for a challenging #'exercise using these data. #' #'@name potthoffroy #'@docType data #'@format \code{tbs} is a data frame with 27 rows and 6 columns: #'\describe{ #'\item{id}{Person number} #'\item{sex}{Sex M/F} #'\item{d8}{Distance at age 8 years} #'\item{d10}{Distance at age 10 years} #'\item{d12}{Distance at age 12 years} #'\item{d14}{Distance at age 14 years} #'} #'@source Potthoff, R. F., Roy, S. N. (1964). A generalized multivariate #'analysis of variance model usefully especially for growth curve problems. #'\emph{Biometrika}, \emph{51}(3), 313-326. #' #'Little, R. J. A., Rubin, D. B. (1987). \emph{Statistical Analysis with #'Missing Data.} New York: John Wiley & Sons. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/ex-ch-longitudinal.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords datasets #'@examples #' #' #'### create missing values at age 10 as in Little and Rubin (1987) #' #'phr <- potthoffroy #'idmis <- c(3,6,9,10,13,16,23,24,27) #'phr[idmis, 4] <- NA #'phr #' #'md.pattern(phr) #' #' NULL mice/R/check.R0000644000176200001440000000247713574256036012615 0ustar liggesuserscheck.data <- function(data, method) { check.dataform(data) } check.dataform <- function(data) { if (!(is.matrix(data) || is.data.frame(data))) stop("Data should be a matrix or data frame", call. = FALSE) if (ncol(data) < 2) stop("Data should contain at least two columns", call. = FALSE) data <- as.data.frame(data) mat <- sapply(data, is.matrix) df <- sapply(data, is.data.frame) if (any(mat)) stop("Cannot handle columns with class matrix: ", colnames(data)[mat]) if (any(df)) stop("Cannot handle columns with class data.frame: ", colnames(data)[df]) dup <- duplicated(colnames(data)) if (any(dup)) stop("Duplicate names found: ", paste(colnames(data)[dup], collapse = ", ")) data } check.m <- function(m) { m <- m[1L] if (!is.numeric(m)) stop("Argument m not numeric", call. = FALSE) m <- floor(m) if (m < 1L) stop("Number of imputations (m) lower than 1.", call. = FALSE) m } check.cluster <- function(data, predictorMatrix) { # stop if the cluster variable is a factor isclassvar <- apply(predictorMatrix == -2, 2, any) for (j in colnames(predictorMatrix)) { if (isclassvar[j] && lapply(data, is.factor)[[j]]) stop("Convert cluster variable ", j, " to integer by as.integer()") } TRUE } mice/R/mice.mids.R0000644000176200001440000001153113416657163013400 0ustar liggesusers#'Multivariate Imputation by Chained Equations (Iteration Step) #' #'Takes a \code{mids} object, and produces a new object of class \code{mids}. #' #'This function enables the user to split up the computations of the Gibbs #'sampler into smaller parts. This is useful for the following reasons: #'\itemize{ \item RAM memory may become easily exhausted if the number of #'iterations is large. Returning to prompt/session level may alleviate these #'problems. \item The user can compute customized convergence statistics at #'specific points, e.g. after each iteration, for monitoring convergence. - #'For computing a 'few extra iterations'. } Note: The imputation model itself #'is specified in the \code{mice()} function and cannot be changed with #'\code{mice.mids}. The state of the random generator is saved with the #'\code{mids} object. #' #'@param obj An object of class \code{mids}, typically produces by a previous #'call to \code{mice()} or \code{mice.mids()} #'@param maxit The number of additional Gibbs sampling iterations. #'@param printFlag A Boolean flag. If \code{TRUE}, diagnostic information #'during the Gibbs sampling iterations will be written to the command window. #'The default is \code{TRUE}. #'@param ... Named arguments that are passed down to the univariate imputation #'functions. #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #'@seealso \code{\link{complete}}, \code{\link{mice}}, \code{\link{set.seed}}, #'\code{\link[=mids-class]{mids}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords iteration #'@examples #' #'imp1 <- mice(nhanes, maxit=1, seed = 123) #'imp2 <- mice.mids(imp1) #' #'# yields the same result as #'imp <- mice(nhanes, maxit=2, seed = 123) #' #'# verification #'identical(imp$imp, imp2$imp) #'# #'@export mice.mids <- function(obj, maxit = 1, printFlag = TRUE, ...) { if (!is.mids(obj)) stop("Object should be of type mids.") if (maxit < 1) return(obj) loggedEvents <- obj$loggedEvents state <- list(it = 0, im = 0, co = 0, dep = "", meth = "", log = !is.null(loggedEvents)) if (is.null(loggedEvents)) loggedEvents <- data.frame(it = 0, im = 0, co = 0, dep = "", meth = "", out = "") # Initialize local variables call <- match.call() imp <- obj$imp where <- obj$where if (is.null(where)) where <- is.na(obj$data) blocks <- obj$blocks if (is.null(blocks)) blocks <- make.blocks(obj$data) assign(".Random.seed", obj$lastSeedValue, pos = 1) ## OK. Iterate. sumIt <- obj$iteration + maxit from <- obj$iteration + 1 to <- from + maxit - 1 q <- sampler(obj$data, obj$m, where, imp, blocks, obj$method, obj$visitSequence, obj$predictorMatrix, obj$formulas, obj$blots, obj$post, c(from, to), printFlag, ...) imp <- q$imp ## combine with previous chainMean and chainVar vnames <- unique(unlist(obj$blocks)) nvis <- length(vnames) if (!is.null(obj$chainMean)) { chainMean <- chainVar <- array(0, dim = c(nvis, to, obj$m), dimnames = list(vnames, seq_len(to), paste("Chain", seq_len(obj$m)))) for (j in seq_len(nvis)) { if (obj$iteration == 0) { chainMean[j, , ] <- q$chainMean[j, , ] chainVar[j, , ] <- q$chainVar[j, , ] } else { chainMean[j, seq_len(obj$iteration), ] <- obj$chainMean[j, , ] chainVar[j, seq_len(obj$iteration), ] <- obj$chainVar[j, , ] chainMean[j, from:to, ] <- q$chainMean[j, , ] chainVar[j, from:to, ] <- q$chainVar[j, , ] } } } else { chainMean <- chainVar <- NULL } if (!state$log) loggedEvents <- NULL if (state$log) row.names(loggedEvents) <- seq_len(nrow(loggedEvents)) ## save, and return midsobj <- list(data = obj$data, imp = imp, m = obj$m, where = where, blocks = obj$blocks, call = call, nmis = obj$nmis, method = obj$method, predictorMatrix = obj$predictorMatrix, visitSequence = obj$visitSequence, formulas = obj$formulas, post = obj$post, blots = obj$blots, seed = obj$seed, iteration = sumIt, lastSeedValue = .Random.seed, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date()) oldClass(midsobj) <- "mids" return(midsobj) } mice/R/selfreport.R0000644000176200001440000000720013416657163013713 0ustar liggesusers#'Self-reported and measured BMI #' #'Dataset containing height and weight data (measured, self-reported) from two #'studies. #' #'This dataset combines two datasets: \code{krul} data (Krul, 2010) (1257 #'persons) and the \code{mgg} data (Van Keulen 2011; Van der Klauw 2011) (803 #'persons). The \code{krul} dataset contains height and weight (both measures #'and self-reported) from 1257 Dutch adults, whereas the \code{mgg} dataset #'contains self-reported height and weight for 803 Dutch adults. Section 7.3 in #'Van Buuren (2012) shows how the missing measured data can be imputed in the #'\code{mgg} data, so corrected prevalence estimates can be calculated. #' #'@name selfreport #'@aliases selfreport mgg #'@docType data #'@format A data frame with 2060 rows and 15 variables: #'\describe{ #'\item{src}{Study, either \code{krul} or \code{mgg} (factor)} #'\item{id}{Person identification number} #'\item{pop}{Population, all \code{NL} (factor)} #'\item{age}{Age of respondent in years} #'\item{sex}{Sex of respondent (factor)} #'\item{hm}{Height measured (cm)} #'\item{wm}{Weight measured (kg)} #'\item{hr}{Height reported (cm)} #'\item{wr}{Weight reported (kg)} #'\item{prg}{Pregnancy (factor), all \code{Not pregnant}} #'\item{edu}{Educational level (factor)} #'\item{etn}{Ethnicity (factor)} #'\item{web}{Obtained through web survey (factor)} #'\item{bm}{BMI measured (kg/m2)} #'\item{br}{BMI reported (kg/m2)} #'} #'@source Krul, A., Daanen, H. A. M., Choi, H. (2010). Self-reported and #'measured weight, height and body mass index (BMI) in Italy, The Netherlands #'and North America. \emph{European Journal of Public Health}, \emph{21}(4), #'414-419. #' #'Van Keulen, H.M.,, Chorus, A.M.J., Verheijden, M.W. (2011). \emph{Monitor #'Convenant Gezond Gewicht Nulmeting (determinanten van) beweeg- en eetgedrag #'van kinderen (4-11 jaar), jongeren (12-17 jaar) en volwassenen (18+ jaar)}. #'TNO/LS 2011.016. Leiden: TNO. #' #'Van der Klauw, M., Van Keulen, H.M., Verheijden, M.W. (2011). \emph{Monitor #'Convenant Gezond Gewicht Beweeg- en eetgedrag van kinderen (4-11 jaar), #'jongeren (12-17 jaar) en volwassenen (18+ jaar) in 2010 en 2011.} TNO/LS #'2011.055. Leiden: TNO. (in Dutch) #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-prevalence.html#sec:srcdata}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords datasets #'@examples #' #' #'md.pattern(selfreport[,c("age","sex","hm","hr","wm","wr")]) #' #'### FIMD Section 7.3.5 Application #' #'bmi <- function(h,w){return(w/(h/100)^2)} #'init <- mice(selfreport,maxit=0) #'meth <- init$meth #'meth["bm"] <- "~bmi(hm,wm)" #'pred <- init$pred #'pred[,c("src","id","web","bm","br")] <- 0 #'imp <- mice(selfreport, pred=pred, meth=meth, seed=66573, maxit=2, m=1) #'## imp <- mice(selfreport, pred=pred, meth=meth, seed=66573, maxit=20, m=10) #' #'### Like FIMD Figure 7.6 #' #'cd <- complete(imp, 1) #'xy <- xy.coords(cd$bm, cd$br-cd$bm) #'plot(xy,col=mdc(2),xlab="Measured BMI",ylab="Reported - Measured BMI", #' xlim=c(17,45),ylim=c(-5,5), type="n",lwd=0.7) #'polygon(x=c(30,20,30),y=c(0,10,10),col="grey95",border=NA) #'polygon(x=c(30,40,30),y=c(0,-10,-10),col="grey95",border=NA) #'abline(0,0,lty=2,lwd=0.7) #' #'idx <- cd$src=="krul" #'xyc <- xy; xyc$x <- xy$x[idx]; xyc$y <- xy$y[idx] #'xys <- xy; xys$x <- xy$x[!idx]; xys$y <- xy$y[!idx] #'points(xyc,col=mdc(1), cex=0.7) #'points(xys,col=mdc(2), cex=0.7) #'lines(lowess(xyc),col=mdc(4),lwd=2) #'lines(lowess(xys),col=mdc(5),lwd=2) #'text(1:4,x=c(40,28,20,32),y=c(4,4,-4,-4),cex=3) #'box(lwd=1) #' #' NULL mice/R/stripplot.R0000644000176200001440000003133413617545240013566 0ustar liggesusers#'Stripplot of observed and imputed data #' #'Plotting methods for imputed data using \pkg{lattice}. #'\code{stripplot} produces one-dimensional #'scatterplots. The function #'automatically separates the observed and imputed data. The #'functions extend the usual features of \pkg{lattice}. #' #'The argument \code{na.groups} may be used to specify (combinations of) #'missingness in any of the variables. The argument \code{groups} can be used #'to specify groups based on the variable values themselves. Only one of both #'may be active at the same time. When both are specified, \code{na.groups} #'takes precedence over \code{groups}. #' #'Use the \code{subset} and \code{na.groups} together to plots parts of the #'data. For example, select the first imputed data set by by #'\code{subset=.imp==1}. #' #'Graphical parameters like \code{col}, \code{pch} and \code{cex} can be #'specified in the arguments list to alter the plotting symbols. If #'\code{length(col)==2}, the color specification to define the observed and #'missing groups. \code{col[1]} is the color of the 'observed' data, #'\code{col[2]} is the color of the missing or imputed data. A convenient color #'choice is \code{col=mdc(1:2)}, a transparent blue color for the observed #'data, and a transparent red color for the imputed data. A good choice is #'\code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the #'duration of the session by running \code{mice.theme()}. #' #'@aliases stripplot #'@param x A \code{mids} object, typically created by \code{mice()} or #'\code{mice.mids()}. #'@param data Formula that selects the data to be plotted. This argument #'follows the \pkg{lattice} rules for \emph{formulas}, describing the primary #'variables (used for the per-panel display) and the optional conditioning #'variables (which define the subsets plotted in different panels) to be used #'in the plot. #' #'The formula is evaluated on the complete data set in the \code{long} form. #'Legal variable names for the formula include \code{names(x$data)} plus the #'two administrative factors \code{.imp} and \code{.id}. #' #'\bold{Extended formula interface:} The primary variable terms (both the LHS #'\code{y} and RHS \code{x}) may consist of multiple terms separated by a #'\sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be #'taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and #'\code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in #'\emph{separate panels}. This behavior differs from standard \pkg{lattice}. #'\emph{Only combine terms of the same type}, i.e. only factors or only #'numerical variables. Mixing numerical and categorical data occasionally #'produces odds labeling of vertical axis. #' #'For convenience, in \code{stripplot()} and \code{bwplot} the formula #'\code{y~.imp} may be abbreviated as \code{y}. This applies only to a single #'\code{y}, and does not (yet) work for \code{y1+y2~.imp}. #' #'@param na.groups An expression evaluating to a logical vector indicating #'which two groups are distinguished (e.g. using different colors) in the #'display. The environment in which this expression is evaluated in the #'response indicator \code{is.na(x$data)}. #' #'The default \code{na.group = NULL} contrasts the observed and missing data #'in the LHS \code{y} variable of the display, i.e. groups created by #'\code{is.na(y)}. The expression \code{y} creates the groups according to #'\code{is.na(y)}. The expression \code{y1 & y2} creates groups by #'\code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as #'\code{is.na(y1) | is.na(y2)}, and so on. #'@param groups This is the usual \code{groups} arguments in \pkg{lattice}. It #'differs from \code{na.groups} because it evaluates in the completed data #'\code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas #'\code{na.groups} evaluates in the response indicator. See #'\code{\link{xyplot}} for more details. When both \code{na.groups} and #'\code{groups} are specified, \code{na.groups} takes precedence, and #'\code{groups} is ignored. #'@param theme A named list containing the graphical parameters. The default #'function \code{mice.theme} produces a short list of default colors, line #'width, and so on. The extensive list may be obtained from #'\code{trellis.par.get()}. Global graphical parameters like \code{col} or #'\code{cex} in high-level calls are still honored, so first experiment with #'the global parameters. Many setting consists of a pair. For example, #'\code{mice.theme} defines two symbol colors. The first is for the observed #'data, the second for the imputed data. The theme settings only exist during #'the call, and do not affect the trellis graphical parameters. #'@param jitter.data See \code{\link[lattice:panel.xyplot]{panel.xyplot}}. #'@param horizontal See \code{\link[lattice:xyplot]{xyplot}}. #'@param as.table See \code{\link[lattice:xyplot]{xyplot}}. #'@param panel See \code{\link{xyplot}}. #'@param default.prepanel See \code{\link[lattice:xyplot]{xyplot}}. #'@param outer See \code{\link[lattice:xyplot]{xyplot}}. #'@param allow.multiple See \code{\link[lattice:xyplot]{xyplot}}. #'@param drop.unused.levels See \code{\link[lattice:xyplot]{xyplot}}. #'@param subscripts See \code{\link[lattice:xyplot]{xyplot}}. #'@param subset See \code{\link[lattice:xyplot]{xyplot}}. #'@param \dots Further arguments, usually not directly processed by the #'high-level functions documented here, but instead passed on to other #'functions. #'@return The high-level functions documented here, as well as other high-level #'Lattice functions, return an object of class \code{"trellis"}. The #'\code{\link[lattice:update.trellis]{update}} method can be used to #'subsequently update components of the object, and the #'\code{\link[lattice:print.trellis]{print}} method (usually called by default) #'will plot it on an appropriate plotting device. #'@note The first two arguments (\code{x} and \code{data}) are reversed #'compared to the standard Trellis syntax implemented in \pkg{lattice}. This #'reversal was necessary in order to benefit from automatic method dispatch. #' #'In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas #'in \pkg{lattice} the argument \code{x} is always a formula. #' #'In \pkg{mice} the argument \code{data} is always a formula object, whereas in #'\pkg{lattice} the argument \code{data} is usually a data frame. #' #'All other arguments have identical interpretation. #' #'@author Stef van Buuren #'@seealso \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{densityplot}}, #'\code{\link{bwplot}}, \code{\link{lattice}} for an overview of the #'package, as well as \code{\link[lattice:stripplot]{stripplot}}, #'\code{\link[lattice:panel.stripplot]{panel.stripplot}}, #'\code{\link[lattice:print.trellis]{print.trellis}}, #'\code{\link[lattice:trellis.par.set]{trellis.par.set}} #'@references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #'Visualization with R}, Springer. #' #'van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@keywords hplot #'@examples #'imp <- mice(boys, maxit=1) #' #'### stripplot, all numerical variables #'\dontrun{stripplot(imp)} #' #'### same, but with improved display #'\dontrun{stripplot(imp, col=c("grey",mdc(2)),pch=c(1,20))} #' #'### distribution per imputation of height, weight and bmi #'### labeled by their own missingness #'\dontrun{stripplot(imp, hgt+wgt+bmi~.imp, cex=c(2,4), pch=c(1,20),jitter=FALSE, #'layout=c(3,1))} #' #'### same, but labeled with the missingness of wgt (just four cases) #'\dontrun{stripplot(imp, hgt+wgt+bmi~.imp, na=wgt, cex=c(2,4), pch=c(1,20),jitter=FALSE, #'layout=c(3,1))} #' #'### distribution of age and height, labeled by missingness in height #'### most height values are missing for those around #'### the age of two years #'### some additional missings occur in region WEST #'\dontrun{stripplot(imp, age + hgt ~ .imp | reg, hgt, #' col = c(grDevices::hcl(0, 0, 40, 0.2), mdc(2)), pch = c(1, 20))} #' #'### heavily jitted relation between two categorical variables #'### labeled by missingness of gen #'### aggregated over all imputed data sets #'\dontrun{stripplot(imp, gen~phb, factor=2, cex=c(8,1), hor=TRUE)} #' #'### circle fun #'stripplot(imp, gen~.imp, na = wgt, factor = 2, cex = c(8.6), #' hor = FALSE, outer = TRUE, scales = "free", pch = c(1,19)) #' #'@export stripplot.mids <- function(x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), panel = lattice::lattice.getOption("panel.stripplot"), default.prepanel = lattice::lattice.getOption("prepanel.default.stripplot"), jitter.data = TRUE, horizontal = FALSE, ..., subscripts = TRUE, subset = TRUE) { call <- match.call() if (!is.mids(x)) stop("Argument 'x' must be a 'mids' object") ## unpack data and response indicator cd <- data.frame(complete(x, "long", include=TRUE)) r <- as.data.frame(is.na(x$data)) ## evaluate na.group in response indicator nagp <- eval(expr=substitute(na.groups), envir=r, enclos=parent.frame()) if (is.expression(nagp)) nagp <- eval(expr=nagp, envir=r, enclos=parent.frame()) ## evaluate groups in imputed data ngp <- eval(expr=substitute(groups), envir=cd, enclos=parent.frame()) if (is.expression(ngp)) ngp <- eval(expr=ngp, envir=cd, enclos=parent.frame()) groups <- ngp ## evaluate subset in imputed data ss <- eval(expr=substitute(subset), envir=cd, enclos=parent.frame()) if (is.expression(ss)) ss <- eval(expr=ss, envir=cd, enclos=parent.frame()) subset <- ss ## evaluate further arguments before parsing dots <- list(...) args <- list(panel = panel, default.prepanel = default.prepanel, allow.multiple = allow.multiple, outer = outer, drop.unused.levels = drop.unused.levels, subscripts = subscripts, as.table = as.table, jitter.data = jitter.data, horizontal = horizontal) ## create formula if not given (in call$data !) vnames <- names(cd)[-seq_len(2)] allfactors <- unlist(lapply(cd,is.factor))[-seq_len(2)] if (missing(data)) { vnames <- vnames[!allfactors] formula <- as.formula(paste0(paste0(vnames,collapse="+"),"~ as.factor(.imp)")) } else { ## pad abbreviated formula abbrev <- ! any(grepl("~", call$data)) if (abbrev) { if (length(call$data)>1) stop("Cannot pad extended formula.") else formula <- as.formula(paste(call$data,"~ as.factor(.imp)",sep="")) } else { formula <- data } } ## determine the y-variables form <- lattice::latticeParseFormula(model=formula, data=cd, subset = subset, groups = groups, multiple = allow.multiple, outer = outer, subscripts = TRUE, drop = drop.unused.levels) ynames <- unlist(lapply(strsplit(form$left.name," \\+ "), rm.whitespace)) ## Jul2011 xnames <- unlist(lapply(strsplit(form$right.name," \\+ "), rm.whitespace)) ## Jul2011 ## calculate selection vector gp nona <- is.null(call$na.groups) if (!is.null(call$groups) && nona) gp <- call$groups else { if (nona) { na.df <- r[, ynames, drop=FALSE] gp <- unlist(lapply(na.df, rep, x$m+1)) } else { gp <- rep(nagp, length(ynames)*(x$m+1)) } } ## change axis defaults of extended formula interface if (is.null(call$xlab) && !is.na(match(".imp",xnames))) { dots$xlab <- "" if (length(xnames)==1) dots$xlab <- "Imputation number" } if (is.null(call$ylab)) { args$ylab <- "" if (length(ynames)==1) args$ylab <- ynames } if (is.null(call$scales)) { args$scales <- list() if (length(ynames)>1) args$scales <- list(x=list(relation="free"), y=list(relation="free")) } ## ready args <- c(x=formula, data=list(cd), groups=list(gp), args, dots, subset=call$subset) ## go tp <- do.call("stripplot", args) tp <- update(tp, par.settings = theme) return(tp) } mice/R/ampute.continuous.R0000644000176200001440000001646313416657163015241 0ustar liggesusers#'Multivariate Amputation Based On Continuous Probability Functions #' #'This function creates a missing data indicator for each pattern. The continuous #'probability distributions (Van Buuren, 2012, pp. 63, 64) will be induced on the #'weighted sum scores, calculated earlier in the multivariate amputation function #'\code{\link{ampute}}. #' #'@param P A vector containing the pattern numbers of the cases's candidacies. #'For each case, a value between 1 and #patterns is given. For example, a #'case with value 2 is candidate for missing data pattern 2. #'@param scores A list containing vectors with the candidates's weighted sum scores, #'the result of an underlying function in \code{\link{ampute}}. #'@param prop A scalar specifying the proportion of missingness. Should be a value #'between 0 and 1. Default is a missingness proportion of 0.5. #'@param type A vector of strings containing the type of missingness for each #'pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or '\code{"RIGHT"}. #'If a single missingness type is entered, all patterns will be created by the same #'type. If missingness types should differ over patterns, a vector of missingness #'types should be entered. Default is RIGHT for all patterns and is the result of #'\code{\link{ampute.default.type}}. #'@return A list containing vectors with \code{0} if a case should be made missing #'and \code{1} if a case should remain complete. The first vector refers to the #'first pattern, the second vector to the second pattern, etcetera. #'@author Rianne Schouten [aut, cre], Gerko Vink [aut], Peter Lugtig [ctb], 2016 #'@seealso \code{\link{ampute}}, \code{\link{ampute.default.type}} #'@references #'#'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-linearnormal.html#sec:generateuni}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords internal #'@export ampute.continuous <- function(P, scores, prop, type) { # Multivariate Amputation Based On Continuous Probability Functions # # This function creates a missing data indicator for each pattern. The continuous # probability distributions (Van Buuren, 2012, pp. 63, 64) will be induced on the # weighted sum scores calculated earlier in the multivariate amputation function # ampute(). # # ------------------------ bin.search -------------------------------------- # bin.search <- function (fun, range = c(-8, 8), ..., target = 0, lower = ceiling(min(range)), upper = floor(max(range)), maxiter = 100, showiter = FALSE) { # This is a custom adaptation of function binsearch from package gtools # (version 3.5.0) that returns the adjustment of the probability curves used # in the function ampute.continuous in ampute. lo <- lower hi <- upper counter <- 0 val.lo <- round(fun(lo, ...), 3) val.hi <- round(fun(hi, ...), 3) sign <- if (val.lo > val.hi) -1 else 1 if (target * sign < val.lo * sign) outside.range <- TRUE else if (target * sign > val.hi * sign) outside.range <- TRUE else outside.range <- FALSE while (counter < maxiter && !outside.range) { counter <- counter + 1 if (hi - lo <= (1 / (10 ^ 3)) || lo < lower || hi > upper) break center <- round((hi - lo)/2 + lo, 3) val <- round(fun(center, ...), 3) if (showiter) { cat("--------------\n") cat("Iteration #", counter, "\n") cat("lo=", lo, "\n") cat("hi=", hi, "\n") cat("center=", center, "\n") cat("fun(lo)=", val.lo, "\n") cat("fun(hi)=", val.hi, "\n") cat("fun(center)=", val, "\n") } if (val == target) { val.lo <- val.hi <- val lo <- hi <- center break } else if (sign * val < sign * target) { lo <- center val.lo <- val } else { hi <- center val.hi <- val } if (showiter) { cat("new lo=", lo, "\n") cat("new hi=", hi, "\n") cat("--------------\n") } } retval <- list(call = match.call(), numiter = counter) if (outside.range) { if (target * sign < val.lo * sign) { warning("The desired proportion of ", target, " is too small; ", val.lo, " is used instead.") retval$flag = "Lower Boundary" retval$where = lo retval$value = val.lo } else { warning("The desired proportion of ", target, " is too large; ", val.hi, " is used instead.") retval$flag = "Upper Boundary" retval$where = hi retval$value = val.hi } } else if (counter >= maxiter) { retval$flag = "Maximum number of iterations reached" retval$where = (lo + hi) / 2 retval$value = (val.lo + val.hi) / 2 } else if (val.lo == target) { retval$flag = "Found" retval$where = lo retval$value = val.lo } else if (val.hi == target) { retval$flag = "Found" retval$where = hi retval$value = val.hi } else { retval$flag = "Between Elements" retval$where = (lo + hi) / 2 retval$value = (val.lo + val.hi) / 2 } return(retval) } # # ----------------------- ampute.continuous -------------------------------- # # For a test data set, the shift of the logit function is calculated # in order to obtain the right proportion of missingness (area beneath the curve) # The set-up for this is created in subsequent lines, it is executed within # the for loop over i. testset <- scale(rnorm(n = 10000, mean = 0, sd = 1)) logit <- function(x) exp(x) / (1 + exp(x)) # An empty list is created, type argument is given the right length R <- vector(mode = "list", length = length(scores)) if (length(type) == 1) { type <- rep.int(type, length(scores)) } for (i in seq_along(scores)) { # The desired function is chosen formula <- switch(type[i], LEFT = function(x, b) logit(mean(x) - x + b), MID = function(x, b) logit(-abs(x - mean(x)) + 0.75 + b), TAIL = function(x, b) logit(abs(x - mean(x)) - 0.75 + b), function(x, b) logit(-mean(x) + x + b)) shift <- bin.search(fun = function(shift) sum(formula(x = testset, b = shift)) / length(testset), target = prop)$where if (length(shift) > 1) { shift <- shift[1] } scores.temp <- scores[[i]] if (length(scores.temp) == 1 && scores.temp == 0) { R[[i]] <- 0 } else { if (length(scores.temp) == 1) { probs <- prop } else if (length(unique(scores.temp)) == 1) { probs <- prop } else { probs <- formula(x = scores.temp, b = shift) } # Based on the probabilities, each candidate will receive a missing data # indicator 0, meaning he will be made missing or missing data indicator 1, # meaning the candidate will remain complete. R.temp <- 1 - rbinom(n = length(scores.temp), size = 1, prob = probs) R[[i]] <- replace(P, P == (i + 1), R.temp) R[[i]] <- replace(R[[i]], P != (i + 1), 1) } } return(R) } mice/R/blots.R0000644000176200001440000000253213416657163012654 0ustar liggesusers#' Creates a \code{blots} argument #' #' This helper function creates a valid \code{blots} object. The #' \code{blots} object is an argument to the \code{mice} function. #' The name \code{blots} is a contraction of blocks-dots. #' Through \code{blots}, the user can specify any additional #' arguments that are specifically passed down to the lowest level #' imputation function. #' @param data A \code{data.frame} with the source data #' @param blocks An optional specification for blocks of variables in #' the rows. The default assigns each variable in its own block. #' @return A matrix #' @seealso \code{\link{make.blocks}} #' @examples #' make.predictorMatrix(nhanes) #' make.blots(nhanes, blocks = name.blocks(c("age", "hyp"), "xxx")) #' @export make.blots <- function(data, blocks = make.blocks(data)) { data <- check.dataform(data) blots <- vector("list", length(blocks)) for (i in seq_along(blots)) blots[[i]] <- alist() names(blots) <- names(blocks) blots } check.blots <- function(blots, data, blocks = NULL) { data <- check.dataform(data) if (is.null(blots)) return(make.blots(data, blocks)) blots <- as.list(blots) for (i in seq_along(blots)) blots[[i]] <- as.list(blots[[i]]) if (length(blots) == length(blocks) && is.null(names(blots))) names(blots) <- names(blocks) blots } mice/R/blocks.R0000644000176200001440000002044013416657163013004 0ustar liggesusers#' Creates a \code{blocks} argument #' #' This helper function generates a list of the type needed for #' \code{blocks} argument in the \code{[=mice]{mice}} function. #' @param data A \code{data.frame}, character vector with #' variable names, or \code{list} with variable names. #' @param partition A character vector of length 1 used to assign #' variables to blocks when \code{data} is a \code{data.frame}. Value #' \code{"scatter"} (default) will assign each column to it own #' block. Value \code{"collect"} assigns all variables to one block, #' whereas \code{"void"} produces an empty list. #' @param calltype A character vector of \code{length(block)} elements #' that indicates how the imputation model is specified. If #' \code{calltype = "type"} (the default), the underlying imputation #' model is called by means of the \code{type} argument. The #' \code{type} argument for block \code{h} is equivalent to #' row \code{h} in the \code{predictorMatrix}. #' The alternative is \code{calltype = "formula"}. This will pass #' \code{formulas[[h]]} to the underlying imputation #' function for block \code{h}, together with the current data. #' The \code{calltype} of a block is set automatically during #' initialization. Where a choice is possible, calltype #' \code{"formula"} is preferred over \code{"type"} since this is #' more flexible and extendable. However, what precisely happens #' depends also on the capabilities of the imputation #' function that is called. #' @return A named list of character vectors with variables names. #' @details Choices \code{"scatter"} and \code{"collect"} represent to two #' extreme scenarios for assigning variables to imputation blocks. #' Use \code{"scatter"} to create an imputation model based on #' \emph{fully conditionally specification} (FCS). Use \code{"collect"} to #' gather all variables to be imputed by a \emph{joint model} (JM). #' Scenario's in-between these two extremes represent #' \emph{hybrid} imputation models that combine FCS and JM. #' #' Any variable not listed in will not be imputed. #' Specification \code{"void"} represents the extreme scenario that #' skips imputation of all variables. #' #' A variable may be a member of multiple blocks. The variable will be #' re-imputed in each block, so the final imputations for variable #' will come from the last block that was executed. This scenario #' may be useful where the same complete background factors appear in #' multiple imputation blocks. #' #' A variable may appear multiple times within a given block. If a univariate #' imputation model is applied to such a block, then the variable is #' re-imputed each time as it appears in the block. #' @examples #' make.blocks(nhanes) #' make.blocks(c("age", "sex", "edu")) #' @export make.blocks <- function(data, partition = c("scatter", "collect", "void"), calltype = "type") { if (is.vector(data) && !is.list(data)) { v <- as.list(as.character(data)) names(v) <- as.character(data) ct <- rep(calltype, length(v)) names(ct) <- names(v) attr(v, "calltype") <- ct return(v) } if (is.list(data) && !is.data.frame(data)) { v <- name.blocks(data) if (length(calltype) == 1L) { ct <- rep(calltype, length(v)) names(ct) <- names(v) attr(v, "calltype") <- ct } else { ct <- calltype names(ct) <- names(v) attr(v, "calltype") <- ct } return(v) } data <- as.data.frame(data) partition <- match.arg(partition) switch(partition, scatter = { v <- as.list(names(data)) names(v) <- names(data) }, collect = { v <- list(names(data)) names(v) <- "collect" }, void = { v <- list() }, { v <- as.list(names(data)) names(v) <- names(data) }) if (length(calltype) == 1L) { ct <- rep(calltype, length(v)) names(ct) <- names(v) attr(v, "calltype") <- ct } else { ct <- calltype names(ct) <- names(v) attr(v, "calltype") <- ct } v } #' Name imputation blocks #' #' This helper function names any unnamed elements in the \code{blocks} #' specification. This is a convenience function. #' @inheritParams mice #' @param prefix A character vector of length 1 with the prefix to #' be using for naming any unnamed blocks with two or more variables. #' @return A named list of character vectors with variables names. #' @seealso \code{\link{mice}} #' @details #' This function will name any unnamed list elements specified in #' the optional argument \code{blocks}. Unnamed blocks #' consisting of just one variable will be named after this variable. #' Unnamed blocks containing more than one variables will be named #' by the \code{prefix} argument, padded by an integer sequence #' stating at 1. #' @examples #' blocks <- list(c("hyp", "chl"), AGE = "age", c("bmi", "hyp"), "edu") #' name.blocks(blocks) #' @export name.blocks <- function(blocks, prefix = "B") { if (!is.list(blocks)) return(make.blocks(blocks)) if (is.null(names(blocks))) names(blocks) <- rep("", length(blocks)) inc <- 1 for (i in seq_along(blocks)) { if (names(blocks)[i] != "") next if (length(blocks[[i]]) == 1) names(blocks)[i] <- blocks[[i]][1] else { names(blocks)[i] <- paste0(prefix, inc) inc <- inc + 1 } } blocks } check.blocks <- function(blocks, data, calltype = "type") { data <- check.dataform(data) blocks <- name.blocks(blocks) # check that all variable names exists in data bv <- unique(unlist(blocks)) notFound <- !bv %in% colnames(data) if (any(notFound)) stop(paste("The following names were not found in `data`:", paste(bv[notFound], collapse = ", "))) if (length(calltype) == 1L) { ct <- rep(calltype, length(blocks)) names(ct) <- names(blocks) attr(blocks, "calltype") <- ct } else { ct <- calltype names(ct) <- names(blocks) attr(blocks, "calltype") <- ct } blocks } #' Construct blocks from \code{formulas} and \code{predictorMatrix} #' #' This helper function attempts to find blocks of variables in the #' specification of the \code{formulas} and/or \code{predictorMatrix} #' objects. Blocks specified by \code{formulas} may consist of #' multiple variables. Blocks specified by \code{predictorMatrix} are #' assumed to consist of single variables. Any duplicates in names are #' removed, and the formula specification is preferred. #' \code{predictorMatrix} and \code{formulas}. When both arguments #' specify models for the same block, the model for the #' \code{predictMatrix} is removed, and priority is given to the #' specification given in \code{formulas}. #' @inheritParams mice #' @return A \code{blocks} object. #' @seealso \code{\link{make.blocks}}, \code{\link{name.blocks}} #' @examples #' form <- name.formulas(list(bmi + hyp ~ chl + age, chl ~ bmi)) #' pred <- make.predictorMatrix(nhanes[, c("age", "chl")]) #' construct.blocks(formulas = form, pred = pred) #' @export construct.blocks <- function(formulas = NULL, predictorMatrix = NULL) { blocks.f <- blocks.p <- NULL if (!is.null(formulas)) { if (!all(sapply(formulas, is.formula))) return(NULL) blocks.f <- name.blocks(lapply(name.formulas(formulas), lhs)) ct <- rep("formula", length(blocks.f)) names(ct) <- names(blocks.f) attr(blocks.f, "calltype") <- ct if (is.null(predictorMatrix)) return(blocks.f) } if (!is.null(predictorMatrix)) { if (is.null(row.names(predictorMatrix))) stop("No row names in predictorMatrix", call. = FALSE) blocks.p <- name.blocks(row.names(predictorMatrix)) ct <- rep("type", length(blocks.p)) names(ct) <- names(blocks.p) attr(blocks.p, "calltype") <- ct if (is.null(formulas)) return(blocks.p) } # combine into unique blocks blocknames <- unique(c(names(blocks.f), names(blocks.p))) keep <- setdiff(blocknames, names(blocks.f)) blocks <- c(blocks.f, blocks.p[keep]) ct <- c(rep("formula", length(formulas)), rep("type", length(keep))) names(ct) <- names(blocks) attr(blocks, "calltype") <- ct blocks } mice/R/rm.whitespace.R0000644000176200001440000000050413416657163014277 0ustar liggesusers## Authors: Gerko Vink, Stef van Buuren rm.whitespace <- function(string, side = "both"){ side <- match.arg(side, c("left", "right", "both")) pattern <- switch(side, left = "^\\s+", right = "\\s+$", both = "^\\s+|\\s+$") return(sub(pattern, '', string)) } mice/R/pops.R0000644000176200001440000000465013416657163012515 0ustar liggesusers#'Project on preterm and small for gestational age infants (POPS) #' #'Subset of data from the POPS study, a national, prospective study on preterm #'children, including all liveborn infants <32 weeks gestational age and/or <1500 #'g from 1983 (n = 1338). #' #'The data set concerns of subset of 959 children that survived up to the age #'of 19 years. #' #'Hille et al (2005) divided the 959 survivors into three groups: Full #'responders (examined at an outpatient clinic and completed the #'questionnaires, n = 596), postal responders (only completed the mailed #'questionnaires, n = 109), non-responders (did not respond to any of the #'mailed requests or telephone calls, or could not be traced, n = 254). #' #'Compared to the postal and non-responders, the full response group consists #'of more girls, contains more Dutch children, has higher educational and #'social economic levels and has fewer handicaps. The responders form a highly #'selective subgroup in the total cohort. #' #'Multiple imputation of this data set has been described in Hille et al (2007) #'and Van Buuren (2012), chapter 8. #' #'@name pops #'@aliases pops pops.pred #'@docType data #'@format \code{pops} is a data frame with 959 rows and 86 columns. #'\code{pops.pred} is the 86 by 86 binary predictor matrix used for specifying #'the multiple imputation model. #'@source #' #'Hille, E. T. M., Elbertse, L., Bennebroek Gravenhorst, J., Brand, R., #'Verloove-Vanhorick, S. P. (2005). Nonresponse bias in a follow-up study of #'19-year-old adolescents born as preterm infants. Pediatrics, 116(5):662666. #' #'Hille, E. T. M., Weisglas-Kuperus, N., Van Goudoever, J. B., Jacobusse, G. #'W., Ens-Dokkum, M. H., De Groot, L., Wit, J. M., Geven, W. B., Kok, J. H., De #'Kleine, M. J. K., Kollee, L. A. A., Mulder, A. L. M., Van Straaten, H. L. M., #'De Vries, L. S., Van Weissenbruch, M. M., Verloove-Vanhorick, S. P. (2007). #'Functional outcomes and participation in young adulthood for very preterm and #'very low birth weight infants: The Dutch project on preterm and small for #'gestational age infants at 19 years of age. Pediatrics, 120(3):587595. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-selective.html#pops-study-19-years-follow-up}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords datasets #'@examples #' #' #'pops <- data(pops) #' NULL mice/R/df.residual.R0000644000176200001440000000062313416657163013730 0ustar liggesusers df.residual.mira <- function(object, ...) { fit <- object$analyses[[1]] return(df.residual(fit)) } df.residual.lme <- function(object, ...) { return(object$fixDF[["X"]][1]) } df.residual.mer <- function(object, ...) { return(sum(object@dims[2:4] * c(1, -1, -1)) + 1) } df.residual.multinom <- function(object, ...) { return(nrow(object$residuals) - object$edf) } mice/R/mids.R0000644000176200001440000001140713416657163012466 0ustar liggesusers#'Multiply imputed data set (\code{mids}) #' #'The \code{mids} object contains a multiply imputed data set. The \code{mids} object is #'generated by functions \code{mice()}, \code{mice.mids()}, \code{cbind.mids()}, #'\code{rbind.mids()} and \code{ibind.mids()}. #' #'The \code{mids} #'class of objects has methods for the following generic functions: #'\code{print}, \code{summary}, \code{plot}. #' #'@section Slots: #' \describe{ #' \item{\code{.Data}:}{Object of class \code{"list"} containing the #' following slots:} #' \item{\code{data}:}{Original (incomplete) data set.} #' \item{\code{imp}:}{A list of \code{ncol(data)} components with #' the generated multiple imputations. Each list components is a #' \code{data.frame} (\code{nmis[j]} by \code{m}) of imputed values #' for variable \code{j}.} #' \item{\code{m}:}{Number of imputations.} #' \item{\code{where}:}{The \code{where} argument of the #' \code{mice()} function.} #' \item{\code{blocks}:}{The \code{blocks} argument of the #' \code{mice()} function.} #' \item{\code{call}:}{Call that created the object.} #' \item{\code{nmis}:}{An array containing the number of missing #' observations per column.} #' \item{\code{method}:}{A vector of strings of \code{length(blocks} #' specifying the imputation method per block.} #' \item{\code{predictorMatrix}:}{A numerical matrix of containing #' integers specifying the predictor set.} #' \item{\code{visitSequence}:}{The sequence in which columns are visited.} #' \item{\code{formulas}:}{A named list of formula's, or expressions that #' can be converted into formula's by \code{as.formula}. List elements #' correspond to blocks. The block to which the list element applies is #' identified by its name, so list names must correspond to block names.} #' \item{\code{post}:}{A vector of strings of length \code{length(blocks)} #' with commands for post-processing.} #' \item{\code{seed}:}{The seed value of the solution.} #' \item{\code{iteration}:}{Last Gibbs sampling iteration number.} #' \item{\code{lastSeedValue}:}{The most recent seed value.} #' \item{\code{chainMean}:}{A list of \code{m} components. Each #' component is a \code{length(visitSequence)} by \code{maxit} matrix #' containing the mean of the generated multiple imputations. #' The array can be used for monitoring convergence. #' Note that observed data are not present in this mean.} #' \item{\code{chainVar}:}{A list with similar structure of \code{chainMean}, #' containing the covariances of the imputed values.} #' \item{\code{loggedEvents}:}{A \code{data.frame} with five columns #' containing warnings, corrective actions, and other inside info.} #' \item{\code{version}:}{Version number of \code{mice} package that #' created the object.} #' \item{\code{date}:}{Date at which the object was created.} #'} #' #'@details #'The \code{loggedEvents} entry is a matrix with five columns containing a #'record of automatic removal actions. It is \code{NULL} is no action was #'made. At initialization the program does the following three actions: #'\describe{ #'\item{1}{A variable that contains missing values, that is not imputed #'and that is used as a predictor is removed} #'\item{2}{A constant variable is removed} #'\item{3}{A collinear variable is removed.} #'} #'During iteration, the program does the following #'actions: #'\describe{ #'\item{1}{One or more variables that are linearly dependent are removed #'(for categorical data, a 'variable' corresponds to a dummy variable)} #'\item{2}{Proportional odds regression imputation that does not converge #'and is replaced by \code{polyreg}.} #'} #' #'Explanation of elements in \code{loggedEvents}: #'\describe{ #'\item{\code{it}}{iteration number at which the record was added,} #'\item{\code{im}}{imputation number,} #'\item{\code{dep}}{name of the dependent variable,} #'\item{\code{meth}}{imputation method used,} #'\item{\code{out}}{a (possibly long) character vector with the #'names of the altered or removed predictors.} #'} #' #' @note The \code{mice} package does not use #' the S4 class definitions, and instead relies on the S3 list #' equivalent \code{oldClass(obj) <- "mids"}. #' #'@name mids-class #'@rdname mids-class #'@aliases mids-class mids #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #'@seealso \code{\link{mice}}, \code{\link[=mira-class]{mira}}, #'\code{\link{mipo}} #'@references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords classes NULLmice/R/mice.impute.jomoImpute.R0000644000176200001440000000655413617734724016111 0ustar liggesusers#'Multivariate multilevel imputation using \code{jomo} #' #'This function is a wrapper around the \code{jomoImpute} function #'from the \code{mitml} package so that it can be called to #'impute blocks of variables in \code{mice}. The \code{mitml::jomoImpute} #'function provides an interface to the \code{jomo} package for #'multiple imputation of multilevel data #'\url{https://CRAN.R-project.org/package=jomo}. #'Imputations can be generated using \code{type} or \code{formula}, #'which offer different options for model specification. #' #'@name mice.impute.jomoImpute #'@inheritParams mitml::jomoImpute #'@param data A data frame containing incomplete and auxiliary variables, #'the cluster indicator variable, and any other variables that should be #'present in the imputed datasets. #'@param type An integer vector specifying the role of each variable #'in the imputation model (see \code{\link[mitml]{jomoImpute}}) #'@param formula A formula specifying the role of each variable #'in the imputation model. The basic model is constructed #'by \code{model.matrix}, thus allowing to include derived variables #'in the imputation model using \code{I()}. See #'\code{\link[mitml]{jomoImpute}}. #'@param format A character vector specifying the type of object that should #'be returned. The default is \code{format = "list"}. No other formats are #'currently supported. #'@param ... Other named arguments: \code{n.burn}, \code{n.iter}, #'\code{group}, \code{prior}, \code{silent} and others. #'@return A list of imputations for all incomplete variables in the model, #'that can be stored in the the \code{imp} component of the \code{mids} #'object. #'@seealso \code{\link[mitml]{jomoImpute}} #'@note The number of imputations \code{m} is set to 1, and the function #'is called \code{m} times so that it fits within the \code{mice} #'iteration scheme. #' #'This is a multivariate imputation function using a joint model. #'@author Stef van Buuren, 2018, building on work of Simon Grund, #'Alexander Robitzsch and Oliver Luedtke (authors of \code{mitml} package) #'and Quartagno and Carpenter (authors of \code{jomo} package). #'@references #' Grund S, Luedtke O, Robitzsch A (2016). Multiple #' Imputation of Multilevel Missing Data: An Introduction to the R #' Package \code{pan}. SAGE Open. #' #' Quartagno M and Carpenter JR (2015). #' Multiple imputation for IPD meta-analysis: allowing for heterogeneity #' and studies with missing covariates. Statistics in Medicine, #' 35:2938-2954, 2015. #' #'@family multivariate-2l #'@keywords datagen #'@examples #'\donttest{ #'# Note: Requires mitml 0.3-5.7 #'blocks <- list(c("bmi", "chl", "hyp"), "age") #'method <- c("jomoImpute", "pmm") #'ini <- mice(nhanes, blocks = blocks, method = method, maxit = 0) #'pred <- ini$pred #'pred["B1", "hyp"] <- -2 #'imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1) #'} #'@export mice.impute.jomoImpute <- function(data, formula, type, m = 1, silent = TRUE, format = "imputes", ...) { install.on.demand("mitml", ...) nat <- mitml::jomoImpute(data = data, formula = formula, type = type, m = m, silent = silent, ...) if (format == "native") return(nat) cmp <- mitml::mitmlComplete(nat, print = 1)[, names(data)] if (format == "complete") return(cmp) if (format == "imputes") return(single2imputes(cmp, is.na(data))) NULL } mice/R/fdgs.R0000644000176200001440000000372313416657163012457 0ustar liggesusers#'Fifth Dutch growth study 2009 #' #'Age, height, weight and region of 10030 children measured within the Fifth #'Dutch Growth Study 2009 #' #'The data set contains data from children of Dutch descent (biological parents #'are born in the Netherlands). Children with growth-related diseases were #'excluded. The data were used to construct new growth charts of children of #'Dutch descent (Schonbeck 2013), and to calculate overweight and obesity #'prevalence (Schonbeck 2011). #' #'Some groups were underrepresented. Multiple imputation was used to create #'synthetic cases that were used to correct for the nonresponse. See Van Buuren #'(2012), chapter 8 for details. #' #'@name fdgs #'@aliases fdgs #'@docType data #'@format \code{fdgs} is a data frame with 10030 rows and 8 columns: #'\describe{ #'\item{id}{Person number} #'\item{reg}{Region (factor, 5 levels)} #'\item{age}{Age (years)} #'\item{sex}{Sex (boy, girl)} #'\item{hgt}{Height (cm)} #'\item{wgt}{Weight (kg)} #'\item{hgt.z}{Height Z-score} #'\item{wgt.z}{Weight Z-score} #'} #'@source Schonbeck, Y., Talma, H., van Dommelen, P., Bakker, B., Buitendijk, #'S. E., Hirasing, R. A., van Buuren, S. (2011). Increase in prevalence of #'overweight in Dutch children and adolescents: A comparison of nationwide #'growth studies in 1980, 1997 and 2009. \emph{PLoS ONE}, \emph{6}(11), #'e27608. #' #'Schonbeck, Y., Talma, H., van Dommelen, P., Bakker, B., Buitendijk, S. E., #'Hirasing, R. A., \& van Buuren, S. (2013). The world's tallest nation has #'stopped growing taller: the height of Dutch children from 1955 to 2009. #'\emph{Pediatric Research}, \emph{73}(3), 371-377. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-nonresponse.html#fifth-dutch-growth-study}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Boca Raton, FL.: Chapman & Hall/CRC Press. #'@keywords datasets #'@examples #' #' #'data <- data(fdgs) #'summary(data) #' NULL mice/R/visitSequence.R0000644000176200001440000000350713416657163014363 0ustar liggesusers#' Creates a \code{visitSequence} argument #' #' This helper function creates a valid \code{visitSequence}. The #' \code{visitSequence} is an argument to the \code{mice} function that #' specifies the sequence in which blocks are imputed. #' @inheritParams mice #' @return Vector containing block names #' @seealso \code{\link{mice}} #' @examples #' make.visitSequence(nhanes) #' @export make.visitSequence <- function(data = NULL, blocks = NULL) { if (!is.null(blocks)) { blocks <- name.blocks(blocks) return(names(blocks)) } data <- check.dataform(data) blocks <- make.blocks(data) names(blocks) } check.visitSequence <- function(visitSequence = NULL, data, where = NULL, blocks) { if (is.null(names(blocks)) || any(is.na(names(blocks)))) stop("Missing names in `blocks`.") if (is.null(visitSequence)) return(make.visitSequence(data, blocks)) if (is.null(where)) where <- is.na(data) nimp <- nimp(where, blocks) if (length(nimp) == 0) visitSequence <- nimp if (length(visitSequence) == 1 && is.character(visitSequence)) { code <- match.arg(visitSequence, c("roman", "arabic", "monotone", "revmonotone")) visitSequence <- switch( code, roman = names(blocks)[nimp > 0], arabic = rev(names(blocks)[nimp > 0]), monotone = names(blocks)[order(nimp)], revmonotone = rev(names(blocks)[order(nimp)]) ) } # legacy handling if (is.numeric(visitSequence)) visitSequence <- colnames(data)[visitSequence] # check against names(blocks) visitSequence <- visitSequence[is.element(visitSequence, names(blocks))] # remove any blocks without missing data visitSequence <- names((nimp > 0L)[visitSequence]) visitSequence } mice/R/parse.ums.R0000644000176200001440000000250013620753345013435 0ustar liggesusersparse.ums <- function(x, ums = NULL, umx = NULL, ...) { if (is.null(ums)) stop("Unidentifiable model specification (ums) not found.") if (!is.null(umx)) x <- base::cbind(x, umx) ## Unidentifiable part # e.g. specified in blots as list(X = list(ums = "-3+2*bmi")) mnar0 <- gsub("-", "+-", ums) mnar0 <- unlist(strsplit(mnar0, "+", fixed = TRUE)) if (mnar0[1L] == "") mnar0 <- mnar0[-1L] if (sum(!grepl("*", mnar0, fixed = TRUE)) == 0L) stop("An intercept (constant) term must be included in the expression") else if (sum(!grepl("*", mnar0, fixed = TRUE)) == 1L) mnar0[!grepl("*", mnar0, fixed = TRUE)] <- paste(mnar0[!grepl("*", mnar0, fixed = TRUE)], "*intercept", sep = "") else if (sum(!grepl("*", mnar0, fixed = TRUE)) > 1L) stop("Only one intercept term allowed") mnar <- strsplit(mnar0, "*", fixed = TRUE) mnar.parm <- as.numeric(unlist(lapply(mnar, function(x) x[1L]))) #e.g. c("-3","2") mnar.vars <- unlist(lapply(mnar, function(x) x[2L])) #e.g. c("intercept","bmi") mnar.parm <- mnar.parm[c(which(mnar.vars == "intercept"), which(mnar.vars != "intercept"))] mnar.vars <- mnar.vars[c(which(mnar.vars == "intercept"), which(mnar.vars != "intercept"))] xmnar <- as.matrix(cbind(1, as.matrix(x[ ,mnar.vars[!mnar.vars == "intercept"]]))) list(delta = mnar.parm, x = xmnar) } mice/R/mice.impute.midastouch.R0000644000176200001440000002102313416657163016103 0ustar liggesusers#' Imputation by predictive mean matching with distance aided donor selection #' #' Imputes univariate missing data using predictive mean matching. #' @aliases mice.impute.midastouch #' @inheritParams mice.impute.pmm #' @param midas.kappa Scalar. If \code{NULL} (default) then the #' optimal \code{kappa} gets selected automatically. Alternatively, the user #' may specify a scalar. Siddique and Belin 2008 find \code{midas.kappa = 3} #' to be sensible. #' @param outout Logical. If \code{TRUE} (default) one model is estimated #' for each donor (leave-one-out principle). For speedup choose #' \code{outout = FALSE}, which estimates one model for all observations #' leading to in-sample predictions for the donors and out-of-sample #' predictions for the recipients. Mind the inappropriateness, though. #' @param neff FOR EXPERTS. Null or character string. The name of an existing #' environment in which the effective sample size of the donors for each #' loop (CE iterations times multiple imputations) is supposed to be written. #' The effective sample size is necessary to compute the correction for the #' total variance as originally suggested by Parzen, Lipsitz and #' Fitzmaurice 2005. The objectname is \code{midastouch.neff}. #' @param debug FOR EXPERTS. Null or character string. The name of an existing #' environment in which the input is supposed to be written. The objectname #' is \code{midastouch.inputlist}. #' @return Vector with imputed data, same type as \code{y}, and of #' length \code{sum(wy)} #' @details Imputation of \code{y} by predictive mean matching, based on #' Rubin (1987, p. 168, formulas a and b) and Siddique and Belin 2008. #' The procedure is as follows: #' \enumerate{ #' \item Draw a bootstrap sample from the donor pool. #' \item Estimate a beta matrix on the bootstrap sample by the leave one out principle. #' \item Compute type II predicted values for \code{yobs} (nobs x 1) and \code{ymis} (nmis x nobs). #' \item Calculate the distance between all \code{yobs} and the corresponding \code{ymis}. #' \item Convert the distances in drawing probabilities. #' \item For each recipient draw a donor from the entire pool while considering the probabilities from the model. #' \item Take its observed value in \code{y} as the imputation. #' } #' @examples #' # do default multiple imputation on a numeric matrix #' imp <- mice(nhanes, method = 'midastouch') #' imp #' #' # list the actual imputations for BMI #' imp$imp$bmi #' #' # first completed data matrix #' complete(imp) #' #' # imputation on mixed data with a different method per column #' mice(nhanes2, method = c('sample', 'midastouch', 'logreg', 'norm')) #' @author Philipp Gaffert, Florian Meinfelder, Volker Bosch 2015 #' @references #' Gaffert, P., Meinfelder, F., Bosch V. (2015) Towards an MI-proper #' Predictive Mean Matching, Discussion Paper. #' \url{https://www.uni-bamberg.de/fileadmin/uni/fakultaeten/sowi_lehrstuehle/statistik/Personen/Dateien_Florian/properPMM.pdf} #' #' Little, R.J.A. (1988), Missing data adjustments in large #' surveys (with discussion), Journal of Business Economics and #' Statistics, 6, 287--301. #' #' Parzen, M., Lipsitz, S. R., Fitzmaurice, G. M. (2005), A note on reducing #' the bias of the approximate Bayesian bootstrap imputation variance estimator. #' Biometrika \bold{92}, 4, 971--974. #' #' Rubin, D.B. (1987), Multiple imputation for nonresponse in surveys. New York: Wiley. #' #' Siddique, J., Belin, T.R. (2008), Multiple imputation using an iterative #' hot-deck with distance-based donor selection. Statistics in medicine, #' \bold{27}, 1, 83--102 #' #' Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006), #' Fully conditional specification in multivariate imputation. #' \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, #' 1049--1064. #' #' Van Buuren, S., Groothuis-Oudshoorn, K. (2011), \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}, 3, 1--67. \url{https://www.jstatsoft.org/v45/i03/} #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.midastouch <- function(y, ry, x, wy = NULL, ridge = 1e-05, midas.kappa = NULL, outout = TRUE, neff = NULL, debug = NULL, ...) { if (is.null(wy)) wy <- !ry #+ auxiliaries +# if (!is.null(debug)) { midastouch.inputlist <- list(y = y, ry = ry, x = x, omega = NULL) } sminx <- .Machine$double.eps^(1/4) #+ ensure data format +# x <- data.matrix(x) storage.mode(x) <- "numeric" X <- cbind(1, x) y <- as.numeric(y) #+ get data dimensions +# nobs <- sum(ry) nmis <- sum(wy) n <- length(ry) obsind <- ry misind <- wy m <- ncol(X) yobs <- y[obsind] Xobs <- X[obsind, , drop = FALSE] Xmis <- X[misind, , drop = FALSE] #+ P-Step +# ##++++ bootstrap omega <- bootfunc.plain(nobs) if (!is.null(debug)) { midastouch.inputlist$omega <- omega assign(x = "midastouch.inputlist", value = midastouch.inputlist, envir = get(debug)) } ##++++ beta estimation CX <- omega * Xobs XCX <- crossprod(Xobs, CX) if (ridge > 0) { diag(XCX) <- diag(XCX) * (1 + c(0, rep(ridge, m - 1))) } # = check if any diagonal element is exactly zero ===========# diag0 <- diag(XCX) == 0 #==# if (any(diag0)) { diag(XCX)[diag0] <- max(sminx, ridge) } #==# # ============================================================# Xy <- crossprod(CX, yobs) beta <- solve(XCX, Xy) yhat.obs <- c(Xobs %*% beta) ##++++ kappa estimation if (is.null(midas.kappa)) { mean.y <- as.vector(crossprod(yobs, omega) / nobs) eps <- yobs - yhat.obs r2 <- 1 - c(crossprod(omega, eps^2)/crossprod(omega, (yobs - mean.y)^2)) ## slight deviation from the paper to ensure real results paper: a tiny ## delta is added to the denominator R Code: min function is used, note ## that this correction gets active for r2>.999 only midas.kappa <- min((50 * r2/(1 - r2))^(3/8), 100) ## if r2 cannot be determined (eg zero variance in yhat), use 3 as ## suggested by Siddique / Belin if (is.na(midas.kappa)) { midas.kappa <- 3 } } #+ I-Step +# if (outout) { ##++++ P-step if out of sample predictions for donors ## estimate one model per donor by leave-one-out XXarray_pre <- t(t(apply(X = Xobs, MARGIN = 1, FUN = tcrossprod)) * omega) ridgeind <- c(1:(m - 1)) * (m + 1) + 1 if (ridge > 0) { XXarray_pre[ridgeind, ] <- XXarray_pre[ridgeind, ] * (1 + ridge) } XXarray <- c(XCX) - XXarray_pre # = check if any diagonal element is exactly zero # =======================# diag0 <- XXarray[ridgeind, ] == 0 #==# if (any(diag0)) { XXarray[ridgeind, ][diag0] <- max(sminx, ridge) } #==# # =======================================================================# Xyarray <- c(Xy) - t(Xobs * yobs * omega) BETAarray <- apply(rbind(XXarray, Xyarray), 2, function(x, m) { solve(a = matrix(head(x, m^2), m), b = tail(x, m)) }, m = m) YHATdon <- rowSums(Xobs * t(BETAarray)) ## each recipient has nobs different yhats YHATrec <- Xmis %*% BETAarray ##++++ distance calculations dist.mat <- YHATdon - t(YHATrec) } else { yhat.mis <- c(Xmis %*% beta) dist.mat <- yhat.obs - matrix(data = yhat.mis, nrow = nobs, ncol = nmis, byrow = TRUE) } ##++++ convert distances to drawing probs // ensure real results delta.mat <- 1/((abs(dist.mat))^midas.kappa) delta.mat <- minmax(delta.mat) probs <- delta.mat * omega csums <- minmax(colSums(probs, na.rm = TRUE)) probs <- t(t(probs)/csums) #+ calculate neff +# if (!is.null(neff)) { if (!exists("midastouch.neff", envir = get(neff))) { assign(x = "midastouch.neff", value = list(), envir = get(neff)) } midastouch.neff <- get("midastouch.neff", envir = get(neff)) midastouch.neff[[length(midastouch.neff) + 1]] <- mean(1/rowSums((t(delta.mat)/csums)^2)) assign(x = "midastouch.neff", value = midastouch.neff, envir = get(neff)) } #+ return result +# index <- apply(probs, 2, sample, x = nobs, size = 1, replace = FALSE) yimp <- y[obsind][index] return(yimp) } mice/R/xyplot.mads.R0000644000176200001440000001124513416657163014014 0ustar liggesusers# # -------------------------- xyplot.mads ------------------------------------- # #'Scatterplot of amputed and non-amputed data against weighted sum scores #' #'Plotting method to investigate relation between amputed data and the weighted sum #'scores. Based on \code{\link{lattice}}. \code{xyplot} produces scatterplots. #'The function plots the variables against the weighted sum scores. The function #'automatically separates the amputed and non-amputed data to see the relation between #'the amputation and the weighted sum scores. #' #'@param x A \code{mads} object, typically created by \code{\link{ampute}}. #'@param data A string or vector of variable names that needs to be plotted. As #'a default, all variables will be plotted. #'@param which.pat A scalar or vector indicating which patterns need to be plotted. #'As a default, all patterns are plotted. #'@param standardized Logical. Whether the scatterplots need to be created #'from standardized data or not. Default is TRUE. #'@param layout A vector of two values indicating how the scatterplots of one #'pattern should be divided over the plot. For example, \code{c(2, 3)} indicates #'that the scatterplots of six variables need to be placed on 3 rows and 2 columns. #'There are several defaults for different #variables. Note that for more than #'9 variables, multiple plots will be created automatically. #'@param colors A vector of two RGB values defining the colors of the non-amputed and #'amputed data respectively. RGB values can be obtained with \code{\link{hcl}}. #'@param \dots Not used, but for consistency with generic #'@return A list containing the scatterplots. Note that a new pattern #'will always be shown in a new plot. #'@note The \code{mads} object contains all the information you need to #'make any desired plots. Check \code{\link{mads-class}} or the vignette \emph{Multivariate #'Amputation using Ampute} to understand the contents of class object \code{mads}. #'@author Rianne Schouten, 2016 #'@seealso \code{\link{ampute}}, \code{\link{bwplot}}, \code{\link{Lattice}} for #'an overview of the package, \code{\link{mads-class}} #'@export xyplot.mads <- function(x, data, which.pat = NULL, standardized = TRUE, layout = NULL, colors = mdc(1:2), ...) { if (!is.mads(x)) { stop("Object is not of class mads") } if (missing(data)) data <- NULL yvar <- data if (is.null(yvar)) { varlist <- colnames(x$amp) } else { varlist <- yvar } if (is.null(which.pat)) { pat <- nrow(x$patterns) which.pat <- seq_len(pat) } else { pat <- length(which.pat) } if (standardized) { dat <- data.frame(scale(x$data)) xlab <- "Standardized values in pattern" } else { dat <- x$data xlab <- "Data values in pattern" } data <- NULL for (i in seq_len(pat)){ can <- which(x$cand == which.pat[i]) mis <- matrix(NA, nrow = length(can), ncol = 3) nc <- which(x$patterns[which.pat[i], ] == 0) if (length(nc) > 1){ mis[apply(is.na(x$amp[can, nc]), 1, all), 1] <- 1 mis[is.na(mis[, 1]), 1] <- 0 } else if (length(nc) == 1) { mis[is.na(x$amp[can, nc]), 1] <- 1 mis[is.na(mis[, 1]), 1] <- 0 } mis[, 2] <- rep.int(which.pat[i], length(can)) mis[, 3] <- unname(x$scores[[which.pat[i]]]) data <- rbind(data, cbind(mis, dat[can, ])) } colnames(data) <- c(".amp", ".pat", "scores", names(x$data)) data$.amp <- factor(data$.amp, levels = c(0, 1)) formula = as.formula(paste0("scores ~ ", paste0(varlist, collapse = "+"))) if (is.null(layout)) { if (length(varlist) > 6) { layout <- c(3, 3) } else if (length(varlist) > 4) { layout <- c(3, 2) } else if (length(varlist) > 2) { layout <- c(2, 2) } else if (length(varlist) > 1) { layout <- c(2, 1) } } theme <- list(superpose.symbol = list(col = colors, pch = 1), plot.symbol = list(col = colors, pch = 1), strip.background = list(col = "grey95")) key <- list(columns = 2, points = list(col = colors, pch = 1), text = list(c("Non-Amputed Data", "Amputed Data"))) p <- stats::setNames(vector(mode = "list", length = pat), paste("Scatterplot Pattern", which.pat)) for (i in seq_len(pat)) { p[[paste("Scatterplot Pattern", which.pat[i])]] <- xyplot(x = formula, data = data[data$.pat == which.pat[i], ], groups = data$.amp, par.settings = theme, multiple = TRUE, outer = TRUE, layout = layout, key = key, ylab = "Weighted sum scores", xlab = paste(xlab, which.pat[i])) } return(p) } mice/R/barnard.rubin.R0000644000176200001440000000037613416664706014265 0ustar liggesusersbarnard.rubin <- function(m, b, t, dfcom = 999999) { lambda <- (1 + 1 / m) * b / t lambda[lambda < 1e-04] <- 1e-04 dfold <- (m - 1) / lambda ^ 2 dfobs <- (dfcom + 1) / (dfcom + 3) * dfcom * (1 - lambda) dfold * dfobs / (dfold + dfobs) } mice/R/zzz.R0000644000176200001440000000127213416657163012366 0ustar liggesusers# zzz.R # # R package MICE: Multivariate Imputation by Chained Equations # # This file is part of the R package MICE. # # System functions for the MICE library #'Echoes the package version number #' #'Echoes the package version number #' #'@param pkg A character vector with the package name. #'@return A character vector containing the package name, version number and #'installed directory. #'@author Stef van Buuren, Oct 2010 #'@keywords misc #'@examples #' #'version() #'version("base") #' #'@export version <- function(pkg="mice"){ lib <- dirname(system.file(package = pkg)) d <- packageDescription(pkg) return(paste(d$Package,d$Version,d$Date,lib)) } mice/R/pool.compare.R0000644000176200001440000001604413621064766014131 0ustar liggesusers#'Compare two nested models fitted to imputed data #' #'This function is deprecated in V3. Use \code{\link{D1}} or #'\code{\link{D3}} instead. #' #'Compares two nested models after m repeated complete data analysis #' #'The function is based on the article of Meng and Rubin (1992). The #'Wald-method can be found in paragraph 2.2 and the likelihood method can be #'found in paragraph 3. One could use the Wald method for comparison of linear #'models obtained with e.g. \code{lm} (in \code{with.mids()}). The likelihood #'method should be used in case of logistic regression models obtained with #'\code{glm()} in \code{with.mids()}. #' #'The function assumes that \code{fit1} is the #'larger model, and that model \code{fit0} is fully contained in \code{fit1}. #'In case of \code{method='wald'}, the null hypothesis is tested that the extra #'parameters are all zero. #' #'@param fit1 An object of class 'mira', produced by \code{with.mids()}. #'@param fit0 An object of class 'mira', produced by \code{with.mids()}. The #'model in \code{fit0} is a nested fit0 of \code{fit1}. #'@param method Either \code{"wald"} or \code{"likelihood"} specifying #'the type of comparison. The default is \code{"wald"}. #'@param data No longer used. #'@return A list containing several components. Component \code{call} is #'the call to the \code{pool.compare} function. Component \code{call11} is #'the call that created \code{fit1}. Component \code{call12} is the #'call that created the imputations. Component \code{call01} is the #'call that created \code{fit0}. Component \code{call02} is the #'call that created the imputations. Components \code{method} is the #'method used to compare two models: 'Wald' or 'likelihood'. Component #'\code{nmis} is the number of missing entries for each variable. #'Component \code{m} is the number of imputations. #'Component \code{qhat1} is a matrix, containing the estimated coefficients of the #'\emph{m} repeated complete data analyses from \code{fit1}. #'Component \code{qhat0} is a matrix, containing the estimated coefficients of the #'\emph{m} repeated complete data analyses from \code{fit0}. #'Component \code{ubar1} is the mean of the variances of \code{fit1}, #'formula (3.1.3), Rubin (1987). #'Component \code{ubar0} is the mean of the variances of \code{fit0}, #'formula (3.1.3), Rubin (1987). #'Component \code{qbar1} is the pooled estimate of \code{fit1}, formula (3.1.2) Rubin #'(1987). #'Component \code{qbar0} is the pooled estimate of \code{fit0}, formula (3.1.2) Rubin #'(1987). #'Component \code{Dm} is the test statistic. #'Component \code{rm} is the relative increase in variance due to nonresponse, formula #'(3.1.7), Rubin (1987). #'Component \code{df1}: df1 = under the null hypothesis it is assumed that \code{Dm} has an F #'distribution with (df1,df2) degrees of freedom. #'Component \code{df2}: df2. #'Component \code{pvalue} is the P-value of testing whether the model \code{fit1} is #'statistically different from the smaller \code{fit0}. #'@author Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 #'@seealso \code{\link{lm.mids}}, \code{\link{glm.mids}} #'@references Li, K.H., Meng, X.L., Raghunathan, T.E. and Rubin, D. B. (1991). #'Significance levels from repeated p-values with multiply-imputed data. #'Statistica Sinica, 1, 65-92. #' #'Meng, X.L. and Rubin, D.B. (1992). Performing likelihood ratio tests with #'multiple-imputed data sets. Biometrika, 79, 103-111. #' #'van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@keywords htest #'@export pool.compare <- function(fit1, fit0, method = c("wald", "likelihood"), data = NULL) { .Deprecated("D1") # Check the arguments call <- match.call() method <- match.arg(method) fits1 <- getfit(fit1) fits0 <- getfit(fit0) if (length(fits1) != length(fits0)) stop("unequal number of imputations for 'fit1' and 'fit0'", call. = FALSE) if (length(fits1) < 2L) stop("at least two imputations are needed", call. = FALSE) m <- length(fits1) est1 <- pool(fit1) est0 <- pool(fit0) dimQ1 <- length(getqbar(est1)) dimQ2 <- dimQ1 - length(getqbar(est0)) # Check: Only need the lm or lmer object formula1 <- formula(getfit(fit1, 1L)) formula0 <- formula(getfit(fit0, 1L)) vars1 <- est1$pooled$term vars0 <- est0$pooled$term if (is.null(vars1) || is.null(vars0)) stop("coefficients do not have names", call. = FALSE) if (dimQ2 < 1L) stop("Model 'fit1' not larger than 'fit0'", call. = FALSE) if (!setequal(vars0, intersect(vars0, vars1))) stop("Model 'fit0' not contained in 'fit1'", call. = FALSE) if (method == "wald") { # Reference: paragraph 2.2, Article Meng & Rubin, # Biometrika, 1992. When two objects are to be compared # we need to calculate matrix Q Q <- diag(dimQ1) where_new_vars = which(!(vars1 %in% vars0)) Q <- Q[where_new_vars, , drop = FALSE] qbar <- Q %*% getqbar(est1) Ubar <- Q %*% diag(est1$pooled$ubar) %*% (t(Q)) Bm <- Q %*% diag(est1$pooled$b) %*% (t(Q)) rm <- (1 + 1/m) * sum(diag(Bm %*% (solve(Ubar))))/dimQ2 Dm <- (t(qbar)) %*% (solve(Ubar)) %*% qbar/(dimQ2 * (1 + rm)) deviances <- NULL } if (method == "likelihood") { # Calculate for each imputed dataset the deviance between the two # models with its estimated coefficients dev1.M <- lapply(fits1, glance) %>% bind_rows() %>% pull(.data$deviance) dev0.M <- lapply(fits0, glance) %>% bind_rows() %>% pull(.data$deviance) # Calculate for each imputed dataset the deviance between the two # models with the pooled coefficients qbar1 <- getqbar(pool(fits1)) mds1 <- lapply(fits1, fix.coef, beta = qbar1) dev1.L <- lapply(mds1, glance) %>% bind_rows() %>% pull(.data$deviance) qbar0 <- getqbar(pool(fits0)) mds0 <- lapply(fits0, fix.coef, beta = qbar0) dev0.L <- lapply(mds0, glance) %>% bind_rows() %>% pull(.data$deviance) deviances <- list(dev1.M = dev1.M, dev0.M = dev0.M, dev1.L = dev1.L, dev0.L = dev0.L) dev.M <- mean(dev0.M - dev1.M) dev.L <- mean(dev0.L - dev1.L) rm <- ((m + 1)/(dimQ2 * (m - 1))) * (dev.M - dev.L) Dm <- dev.L / (dimQ2 * (1 + rm)) } # Degrees of freedom for F distribution, same for both methods v <- dimQ2 * (m - 1) if (v > 4) # according to Li 1991 w <- 4 + (v - 4) * ((1 + (1 - 2 / v) * (1 / rm))^2) else w <- v * (1 + 1 / dimQ2) * ((1 + 1 / rm)^2) / 2 statistic <- list(call = call, call11 = fit1$call, call12 = fit1$call1, call01 = fit0$call, call02 = fit0$call1, method = method, nmis = fit1$nmis, m = m, qbar1 = getqbar(est1), qbar0 = getqbar(est0), ubar1 = est1$pooled$ubar, ubar0 = est0$pooled$ubar, deviances = deviances, Dm = Dm, rm = rm, df1 = dimQ2, df2 = w, pvalue = 1 - pf(Dm, dimQ2, w)) statistic } mice/R/mice.impute.2l.bin.R0000644000176200001440000001152213617734707015035 0ustar liggesusers#'Imputation by a two-level logistic model using \code{glmer} #' #'Imputes univariate systematically and sporadically missing data #'using a two-level logistic model using \code{lme4::glmer()} #' #'Data are missing systematically if they have not been measured, e.g., in the #'case where we combine data from different sources. Data are missing sporadically #'if they have been partially observed. #' #'@inheritParams mice.impute.2l.lmer #'@param intercept Logical determining whether the intercept is automatically #'added. #'@param \dots Arguments passed down to \code{glmer} #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Shahab Jolani, 2015; adapted to mice, SvB, 2018 #'@references #'Jolani S., Debray T.P.A., Koffijberg H., van Buuren S., Moons K.G.M. (2015). #'Imputation of systematically missing predictors in an individual #'participant data meta-analysis: a generalized approach using MICE. #'\emph{Statistics in Medicine}, 34:1841-1863. #'@family univariate-2l #'@keywords datagen #'@examples #'library(tidyr) #'library(dplyr) #'data("toenail2") #'data <- tidyr::complete(toenail2, patientID, visit) %>% #' tidyr::fill(treatment) %>% #' dplyr::select(-time) %>% #' dplyr::mutate(patientID = as.integer(patientID)) #' #'\dontrun{ #'pred <- mice(data, print = FALSE, maxit = 0, seed = 1)$pred #'pred["outcome", "patientID"] <- -2 #'imp <- mice(data, method = "2l.bin", pred = pred, maxit = 1, m = 1, seed = 1) #'} #'@export mice.impute.2l.bin <- function(y, ry, x, type, wy = NULL, intercept = TRUE, ...) { install.on.demand("MASS", ...) install.on.demand("lme4", ...) if (is.null(wy)) wy <- !ry if (intercept) { x <- cbind(1, as.matrix(x)) type <- c(2, type) names(type)[1] <- colnames(x)[1] <- "(Intercept)" } clust <- names(type[type == -2]) rande <- names(type[type == 2]) fixe <- names(type[type > 0]) X <- x[, fixe, drop = FALSE] Z <- x[, rande, drop = FALSE] xobs <- x[ry, , drop = FALSE] yobs <- y[ry] # create formula, use [-1] to remove intercept fr <- ifelse(length(rande) > 1, paste("+ ( 1 +", paste(rande[-1L], collapse = "+")), "+ ( 1 ") randmodel <- paste("yobs ~ ", paste(fixe[-1L], collapse = "+"), fr, "|", clust, ")") suppressWarnings(fit <- try(lme4::glmer(formula(randmodel), data = data.frame(yobs, xobs), family = binomial, ... ), silent = TRUE)) if(!is.null(attr(fit, "class"))) { if(attr(fit, "class") == "try-error") { warning("glmer does not run. Simplify imputation model") return(y[wy]) } } # draw beta* beta <- lme4::fixef(fit) rv <- t(chol(vcov(fit))) beta.star <- beta + rv %*% rnorm(ncol(rv)) # calculate psi* psi.hat <- matrix(lme4::VarCorr(fit)[[1L]], nrow = dim(lme4::VarCorr(fit)[[1L]])[1L]) s <- nrow(psi.hat) * psi.hat rancoef <- as.matrix(lme4::ranef(fit)[[1L]]) lambda <- t(rancoef) %*% rancoef temp <- lambda + s if (attr(suppressWarnings(chol(temp, pivot = TRUE)), "rank") != nrow(temp)) warning("The cov matrix is not full rank") temp <- MASS::ginv(temp) ev <- eigen(temp) if (mode(ev$values) == "complex") { ev$values <- suppressWarnings(as.numeric(ev$values)) ev$vectors <- suppressWarnings(matrix(as.numeric(ev$vectors), nrow = length(ev$values))) warning("The cov matrix is complex") } if(sum(ev$values < 0) > 0) { ev$values[ev$values < 0] <- 0 temp <- ev$vectors %*% diag(ev$values, nrow = length(ev$values)) %*% t(ev$vectors) } deco <- ev$vectors %*% diag(sqrt(ev$values), nrow = length(ev$values)) temp.psi.star <- stats::rWishart(1, nrow(rancoef) + nrow(psi.hat), diag(nrow(psi.hat)))[, , 1L] psi.star <- MASS::ginv(deco %*% temp.psi.star%*%t(deco)) #### psi.star positive definite? if (!isSymmetric(psi.star)) psi.star <- (psi.star + t(psi.star)) / 2 valprop <- eigen(psi.star) if(sum(valprop$values < 0) > 0) { valprop$values[valprop$values < 0] <- 0 psi.star <- valprop$vectors %*% diag(valprop$values) %*% t(valprop$vectors) } # find clusters for which we need imputes clmis <- x[wy, clust] # the main imputation task for (i in clmis) { bi.star <- t(MASS::mvrnorm(n = 1L, mu = rep(0, nrow(psi.star)), Sigma = psi.star)) idx <- wy & (x[, clust] == i) logit <- X[idx, , drop = FALSE] %*% beta.star + Z[idx, , drop = FALSE] %*% matrix(bi.star, ncol = 1) vec <- rbinom(nrow(logit), 1, as.vector(1/(1 + exp(-logit)))) if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } y[idx] <- vec } return(y[wy]) } mice/R/brandsma.R0000644000176200001440000000435713416657163013327 0ustar liggesusers#'Brandsma school data used Snijders and Bosker (2012) #' #'Dataset with raw data from Snijders and Bosker (2012) containing #'data from 4106 pupils attending 216 schools. This dataset #'includes all pupils and schools with missing data. #' #'@name brandsma #'@docType data #'@format \code{brandsma} is a data frame with 4106 rows and 14 columns: #'\describe{ #'\item{\code{sch}}{School number} #'\item{\code{pup}}{Pupil ID} #'\item{\code{iqv}}{IQ verbal} #'\item{\code{iqp}}{IQ performal} #'\item{\code{sex}}{Sex of pupil} #'\item{\code{ses}}{SES score of pupil} #'\item{\code{min}}{Minority member 0/1} #'\item{\code{rpg}}{Number of repeated groups, 0, 1, 2} #'\item{\code{lpr}}{language score PRE} #'\item{\code{lpo}}{language score POST} #'\item{\code{apr}}{Arithmetic score PRE} #'\item{\code{apo}}{Arithmetic score POST} #'\item{\code{den}}{Denomination classification 1-4 - at school level} #'\item{\code{ssi}}{School SES indicator - at school level} #'} #' #'@note This dataset is constructed from the raw data. There are #'a few differences with the data set used in Chapter 4 and 5 #'of Snijders and Bosker: #'\enumerate{ #'\item All schools are included, including the five school with #'missing values on \code{langpost}. #'\item Missing \code{denomina} codes are left as missing. #'\item Aggregates are undefined in the presence of missing data #'in the underlying values. #'Variables \code{ses}, \code{iqv} and \code{iqp} are in their #'original scale, and not globally centered. #'No aggregate variables at the school level are included. #'\item There is a wider selection of original variables. Note #'however that the source data contain an even wider set of #'variables. #'} #' #'@source Constructed from \code{MLbook_2nded_total_4106-99.sav} from #' \url{https://www.stats.ox.ac.uk/~snijders/mlbook.htm} by function #' \code{data-raw/R/brandsma.R} #' #'@references #' Brandsma, HP and Knuver, JWM (1989), Effects of school and #' classroom characteristics on pupil progress in language and arithmetic. #' International Journal of Educational Research, 13(7), 777 - 788. #' #' Snijders, TAB and Bosker RJ (2012). Multilevel Analysis, 2nd Ed. Sage, #' Los Angeles, 2012. #'@keywords datasets NULL mice/R/check.deprecated.R0000644000176200001440000000117713416664706014712 0ustar liggesusers# contributed by Simon Grund, #137 check.deprecated <- function(...) { # print warnings for deprecated argument names nms <- names(list(...)) replace.args <- list(imputationMethod = "method", defaultImputationMethod = "defaultMethod", form = "formulas") wrn <- names(replace.args) %in% nms if(any(wrn)) { for(i in which(wrn)) { msg <- paste0("The '", names(replace.args)[i], "' argument is no longer supported. Please use '", replace.args[i], "' instead.") warning(msg) } } invisible(NULL) } mice/R/mice.impute.mnar.norm.R0000644000176200001440000001672713620753345015665 0ustar liggesusers#' Imputation under MNAR mechanism by NARFCS #' #' Imputes univariate data under a user-specified MNAR mechanism by #' linear or logistic regression and NARFCS. Sensitivity analysis under #' different model specifications may shed light on the impact of #' different MNAR assumptions on the conclusions. #' #' @rdname mice.impute.mnar #' @aliases mice.impute.mnar.norm mnar.norm #' mice.impute.mnar.logreg mnar.logreg #' @inheritParams mice.impute.pmm #' @param ums A string containing the specification of the #' unidentifiable part of the imputation model (the *unidentifiable #' model specification”), that is, the desired \eqn{\delta}-adjustment #' (offset) as a function of other variables and values for the #' corresponding deltas (sensitivity parameters). See details. #' @param umx An auxiliary data matrix containing variables that do #' not appear in the identifiable part of the imputation procedure #' but that have been specified via \code{ums} as being predictors #' in the unidentifiable part of the imputation model. See details. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' This function imputes data that are thought to be Missing Not at #' Random (MNAR) by the NARFCS method. The NARFCS procedure #' (Tompsett et al, 2018) generalises the so-called #' \eqn{\delta}-adjustment sensitivity analysis method of Van Buuren, #' Boshuizen & Knook (1999) to the case with multiple incomplete #' variables within the FCS framework. In practical terms, the #' NARFCS procedure shifts the imputations drawn at each #' iteration of \code{mice} by a user-specified quantity that can #' vary across subjects, to reflect systematic departures of the #' missing data from the data distribution imputed under MAR. #' #' Specification of the NARFCS model is done by the \code{blots} #' argument of \code{mice()}. The \code{blots} parameter is a named #' list. For each variable to be imputed by #' \code{mice.impute.mnar.norm()} or \code{mice.impute.mnar.logreg()} #' the corresponding element in \code{blots} is a list with #' at least one argument \code{ums} and, optionally, a second #' argument \code{umx}. #' For example, the high-level call might like something like #' \code{mice(nhanes[, c(2, 4)], method = c("pmm", "mnar.norm"), blots = list(chl = list(ums = "-3+2*bmi")))}. #' #' The \code{ums} parameter is required, and might look like this: #' \code{"-4+1*Y"}. The \code{ums} specifcation must have the #' following characteristics: #' \enumerate{ #' \item{A single term corresponding to the intercept (constant) term, #' not multiplied by any variable name, must be included in the #' expression;} #' \item{Each term in the expression (corresponding to the intercept #' or a predictor variable) must be separated by either a \code{"+"} #' or \code{"-"} sign, depending on the sign of the sensitivity #' parameter;} #' \item{Within each non-intercept term, the sensitivity parameter #' value comes first and the predictor variable comes second, and these #' must be separated by a \code{"*"} sign;} #' \item{For categorical predictors, for example a variable \code{Z} #' with K + 1 categories \code{("Cat0","Cat1", ...,"CatK")}, K #' category-specific terms are needed, and those not in \code{umx} #' (see below) must be specified by concatenating the variable name #' with the name of the category (e.g. \code{ZCat1}) as this is how #' they are named in the design matrix (argument \code{x}) passed #' to the univariate imputation function. An example is #' \code{"2+1*ZCat1-3*ZCat2"}.} #' } #' #' If given, the \code{umx} specification must have the following #' characteristics: #' \enumerate{ #' \item{It contains only complete variables, with no missing values;} #' \item{It is a numeric matrix. In particular, categorical variables #' must be represented as dummy indicators with names corresponding #' to what is used in \code{ums} to refer to the category-specific terms #' (see above);} #' \item{It has the same number of rows as the \code{data} argument #' passed on to the main \code{mice} function;} #' \item{It does not contain variables that were already predictors #' in the identifiable part of the model for the variable under #' imputation.} #' } #' #' Limitation: The present implementation can only condition on variables #' that appear in the identifiable part of the imputation model (\code{x}) or #' in complete auxiliary variables passed on via the \code{umx} argument. #' It is not possible to specify models where the offset depends on #' incomplete auxiliary variables. #' #' For an MNAR alternative see also \code{\link{mice.impute.ri}}. #' #' @author Margarita Moreno-Betancur, Stef van Buuren, Ian R. White, 2020. #' @references #' Tompsett, D. M., Leacy, F., Moreno-Betancur, M., Heron, J., & #' White, I. R. (2018). On the use of the not-at-random fully #' conditional specification (NARFCS) procedure in practice. #' \emph{Statistics in Medicine}, \bold{37}(15), 2338-2353. #' \url{https://doi.org/10.1002/sim.7643}. #' #' Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple #' imputation of missing blood pressure covariates in survival analysis. #' \emph{Statistics in Medicine}, \bold{18}, 681--694. #' #' @family univariate imputation functions #' @keywords datagen #' @examples #' # 1: Example with no auxiliary data: only pass unidentifiable model specification (ums) #' #' # Specify argument to pass on to mnar imputation functions via "blots" argument #' mnar.blot <- list(X = list(ums = "-4"), Y = list(ums = "2+1*ZCat1-3*ZCat2")) #' #' # Run NARFCS by using mnar imputation methods and passing argument via blots #' impNARFCS <- mice(mnar_demo_data, method = c("mnar.logreg", "mnar.norm", ""), #' blots = mnar.blot, seed = 234235, print = FALSE) #' #' # Obtain MI results: Note they coincide with those from old version at #' # https://github.com/moreno-betancur/NARFCS #' pool(with(impNARFCS,lm(Y ~ X + Z)))$pooled$estimate #' #' # 2: Example passing also auxiliary data to MNAR procedure (umx) #' # Assumptions: #' # - Auxiliary data are complete, no missing values #' # - Auxiliary data are a numeric matrix #' # - Auxiliary data have same number of rows as x #' # - Auxiliary data have no overlapping variable names with x #' #' # Specify argument to pass on to mnar imputation functions via "blots" argument #' aux <- matrix(0:1, nrow = nrow(mnar_demo_data)) #' dimnames(aux) <- list(NULL, "even") #' mnar.blot <- list(X = list(ums = "-4"), #' Y = list(ums = "2+1*ZCat1-3*ZCat2+0.5*even", umx = aux)) #' #' # Run NARFCS by using mnar imputation methods and passing argument via blots #' impNARFCS <- mice(mnar_demo_data, method = c("mnar.logreg", "mnar.norm", ""), #' blots = mnar.blot, seed = 234235, print = FALSE) #' #' # Obtain MI results: As expected they differ (slightly) from those #' # from old version at https://github.com/moreno-betancur/NARFCS #' pool(with(impNARFCS,lm(Y ~ X + Z)))$pooled$estimate #' @export mice.impute.mnar.norm <- function(y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...) { ## Undentifiable part: u <- parse.ums(x, ums = ums, umx = umx, ...) ## Identifiable part: exactly the same as mice.impute.norm if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) parm <- .norm.draw(y, ry, x, ...) ## Draw imputations return(x[wy, ] %*% parm$beta + u$x[wy, ] %*% u$delta + rnorm(sum(wy)) * parm$sigma) } mice/R/mammalsleep.R0000644000176200001440000000356613416657163014036 0ustar liggesusers#'Mammal sleep data #' #'Dataset from Allison and Cicchetti (1976) of 62 mammal species on the #'interrelationship between sleep, ecological, and constitutional variables. #'The dataset contains missing values on five variables. #' #'Allison and Cicchetti (1976) investigated the interrelationship between #'sleep, ecological, and constitutional variables. They assessed these #'variables for 39 mammalian species. The authors concluded that slow-wave #'sleep is negatively associated with a factor related to body size. This #'suggests that large amounts of this sleep phase are disadvantageous in large #'species. Also, paradoxical sleep (REM sleep) was associated with a factor #'related to predatory danger, suggesting that large amounts of this sleep #'phase are disadvantageous in prey species. #' #'@name mammalsleep #'@aliases mammalsleep sleep #'@docType data #'@format \code{mammalsleep} is a data frame with 62 rows and 11 columns: #'\describe{ #'\item{species}{Species of animal} #'\item{bw}{Body weight (kg)} #'\item{brw}{Brain weight (g)} #'\item{sws}{Slow wave ("nondreaming") sleep (hrs/day)} #'\item{ps}{Paradoxical ("dreaming") sleep (hrs/day)} #'\item{ts}{Total sleep (hrs/day) (sum of slow wave and paradoxical sleep)} #'\item{mls}{Maximum life span (years)} #'\item{gt}{Gestation time (days)} #'\item{pi}{Predation index (1-5), 1 = least likely to be preyed upon} #'\item{sei}{Sleep exposure index (1-5), 1 = least exposed (e.g. animal sleeps in a #'well-protected den), 5 = most exposed} #'\item{odi}{Overall danger index (1-5) based on the above two indices and other information, 1 = least #'danger (from other animals), 5 = most danger (from other animals)} #'} #'@source Allison, T., Cicchetti, D.V. (1976). Sleep in Mammals: Ecological and #'Constitutional Correlates. Science, 194(4266), 732-734. #'@keywords datasets #'@examples #' #' #'sleep <- data(mammalsleep) #' #' NULL mice/R/rbind.R0000644000176200001440000002102613416657163012626 0ustar liggesusers#'Combine \code{mids} objects by rows #' #'This function combines two \code{mids} objects rowwise into a single #'\code{mids} object, or combines a \code{mids} object with a vector, matrix, #'factor or dataframe rowwise into a \code{mids} object. #' #'If \code{y} is a #'\code{mids} object, then \code{rbind} requires that the number of #'multiple imputations in \code{x} and \code{y} is identical. Also, #'columns of \code{x$data} and \code{y$data} should match. #' #'If \code{y} is not a \code{mids} object, the columns of \code{x$data} #'and \code{y} should match. The \code{where} matrix for \code{y} is set #'to \code{FALSE}, signaling that any missing values #'in \code{y} were not imputed. #' #'@param x A \code{mids} object. #'@param y A \code{mids} object, or a \code{data.frame}, \code{matrix}, \code{factor} #'or \code{vector}. #'@param \dots Additional \code{data.frame}, \code{matrix}, \code{vector} or \code{factor}. #'These can be given as named arguments. #'@return An S3 object of class \code{mids} #'@note The function construct the elements of the new \code{mids} object as follows: #'\tabular{ll}{ #'\code{data} \tab Rowwise combination of the (incomplete) data in \code{x} and \code{y}\cr #'\code{imp} \tab Equals \code{rbind(x$imp[[j]], y$imp[[j]])} if \code{y} is \code{mids} object; otherwise #'the data of \code{y} will be copied\cr #'\code{m} \tab Equals \code{x$m}\cr #'\code{where} \tab Rowwise combination of \code{where} arguments\cr #'\code{blocks} \tab Equals \code{x$blocks}\cr #'\code{call} \tab Vector, \code{call[1]} creates \code{x}, \code{call[2]} is call to \code{rbind.mids}\cr #'\code{nmis} \tab \code{x$nmis} + \code{y$nmis}\cr #'\code{method} \tab Taken from \code{x$method}\cr #'\code{predictorMatrix} \tab Taken from \code{x$predictorMatrix}\cr #'\code{visitSequence} \tab Taken from \code{x$visitSequence}\cr #'\code{formulas} \tab Taken from \code{x$formulas}\cr #'\code{post} \tab Taken from \code{x$post}\cr #'\code{blots} \tab Taken from \code{x$blots}\cr #'\code{seed} \tab Taken from \code{x$seed}\cr #'\code{iteration} \tab Taken from \code{x$iteration}\cr #'\code{lastSeedValue} \tab Taken from \code{x$lastSeedValue}\cr #'\code{chainMean} \tab Set to \code{NA}\cr #'\code{chainVar} \tab Set to \code{NA}\cr #'\code{loggedEvents} \tab Taken from \code{x$loggedEvents}\cr #'\code{version} \tab Taken from \code{x$version}\cr #'\code{date} \tab Taken from \code{x$date} #'} #'@author Karin Groothuis-Oudshoorn, Stef van Buuren #'@seealso \code{\link{cbind.mids}}, \code{\link{ibind}}, \code{\link[=mids-class]{mids}} #'@references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords manip #'@examples #'imp1 <- mice(nhanes[1:13, ], m = 2, maxit = 1, print = FALSE) #'imp5 <- mice(nhanes[1:13, ], m = 2, maxit = 2, print = FALSE) #'mylist <- list(age = NA, bmi = NA, hyp = NA, chl = NA) #' #'nrow(complete(rbind(imp1, imp5))) #'nrow(complete(rbind(imp1, mylist))) #' #'nrow(complete(rbind(imp1, data.frame(mylist)))) #'nrow(complete(rbind(imp1, complete(imp5)))) rbind.mids <- function(x, y = NULL, ...) { call <- match.call() if (is.mids(y)) return(rbind.mids.mids(x, y, call = call)) # Combine y and dots into data.frame if (is.null(y)) { y <- rbind.data.frame(...) } else { y <- rbind.data.frame(y, ...) } if (is.data.frame(y)) { if (ncol(y) != ncol(x$data)) stop("datasets have different number of columns") } varnames <- colnames(x$data) # Call is a vector, with first argument the mice statement and second argument the call to cbind.mids. call <- c(x$call, call) # The data in x (x$data) and y are combined together. data <- rbind(x$data, y) blocks <- x$blocks # where argument: code all values as observed, including NA wy <- matrix(FALSE, nrow = nrow(y), ncol = ncol(y)) where <- rbind(x$where, wy) # The number of imputations in the new midsobject is equal to that in x. m <- x$m # count the number of missing data in y and add them to x$nmis. nmis <- x$nmis + colSums(is.na(y)) # The listelements method, post, predictorMatrix, visitSequence will be copied from x. method <- x$method post <- x$post formulas <- x$formulas blots <- x$blots predictorMatrix <- x$predictorMatrix visitSequence <- x$visitSequence # Only x contributes imputations imp <- x$imp # seed, lastSeedvalue, number of iterations, chainMean and chainVar is taken as in mids object x. seed <- x$seed lastSeedValue <- x$lastSeedValue iteration <- x$iteration chainMean <- x$chainMean chainVar <- x$chainVar loggedEvents <- x$loggedEvents midsobj <- list(data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, seed = seed, iteration = iteration, lastSeedValue = lastSeedValue, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date()) oldClass(midsobj) <- "mids" return(midsobj) } rbind.mids.mids <- function(x, y, call) { if (!is.mids(y)) stop("argument `y` not a mids object") if (ncol(y$data) != ncol(x$data)) stop("datasets have different number of columns") if (!identical(colnames(x$data), colnames(y$data))) stop("datasets have different variable names") if (!identical(sapply(x$data, is.factor), sapply(y$data, is.factor))) stop("datasets have different factor variables") if (x$m != y$m) stop("number of imputations differ") varnames <- colnames(x$data) # Call is a vector, with first argument the mice statement and second argument the call to cbind.mids. call <- match.call() call <- c(x$call, call) # The data in x (x$data) and y are combined together. data <- rbind(x$data, y$data) # Where argument where <- rbind(x$where, y$where) # The number of imputations in the new midsobject is equal to that in x. m <- x$m # count the number of missing data in y and add them to x$nmis. nmis <- x$nmis + y$nmis # The listelements method, post, predictorMatrix, visitSequence will be copied from x. blocks <- x$blocks method <- x$method post <- x$post formulas <- x$formulas blots <- x$blots predictorMatrix <- x$predictorMatrix visitSequence <- x$visitSequence # The original data of y will be binded into the multiple imputed dataset # including the imputed values of y. imp <- vector("list", ncol(x$data)) for (j in seq_len(ncol(x$data))) { if(!is.null(x$imp[[j]]) || !is.null(y$imp[[j]])) { imp[[j]] <- rbind(x$imp[[j]], y$imp[[j]]) } } names(imp) <- varnames # seed, lastSeedvalue, number of iterations seed <- x$seed lastSeedValue <- x$lastSeedValue iteration <- x$iteration if (x$iteration != y$iteration) { warning("iterations differ, so no convergence diagnostics calculated", call. = FALSE) chainMean = NULL chainVar = NULL } else { w <- colSums(x$where) / colSums(where) chainMean <- x$chainMean * w + y$chainMean * (1 - w) chainVar <- x$chainVar * w + y$chainVar * (1 - w) } loggedEvents <- x$loggedEvents midsobj <- list(data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, seed = seed, iteration = iteration, lastSeedValue = lastSeedValue, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date()) oldClass(midsobj) <- "mids" return(midsobj) } mice/R/mice.theme.R0000644000176200001440000000376713617544610013555 0ustar liggesusers#'Set the theme for the plotting Trellis functions #' #'The \code{mice.theme()} function sets default choices for #'Trellis plots that are built into \pkg{mice}. #' #'@aliases mice.theme #'@param transparent A logical indicating whether alpha-transparency is #'allowed. The default is \code{TRUE}. #'@param alpha.fill A numerical values between 0 and 1 that indicates the #'default alpha value for fills. #'@return \code{mice.theme()} returns a named list that can be used as a theme in the functions in #'\pkg{lattice}. By default, the \code{mice.theme()} function sets #'\code{transparent <- TRUE} if the current device \code{.Device} supports #'semi-transparent colors. #'@author Stef van Buuren 2011 #'@export mice.theme <- function(transparent=TRUE, alpha.fill=0.3){ filler <- function(transparent, alpha){ if(transparent) return(c(grDevices::hcl(240,100,40,alpha), grDevices::hcl(0,100,40,alpha))) return(c(grDevices::hcl(240,100,40),grDevices::hcl(0,100,40))) } if (missing(transparent)) transparent <- supports.transparent() if (missing(alpha.fill)) alpha.fill <- ifelse(transparent, 0.3, 0) list(superpose.symbol = list( col = mdc(1:2), fill = filler(transparent, alpha.fill), pch = 1), superpose.line = list( col = mdc(4:5), lwd = 1 ), box.dot = list( col = mdc(1:2) ), box.rectangle = list( col = mdc(4:5) ), box.symbol = list( col = mdc(1:2) ), plot.symbol = list( col = mdc(1:2), fill = filler(transparent, alpha.fill), pch = 1 ), plot.line = list( col = mdc(4:5) ), superpose.polygon = list( col = filler(transparent, alpha.fill) ), strip.background = list( col = "grey95" ), mice = list( flag = TRUE ) ) } mice/R/ncc.R0000644000176200001440000000212013416657163012265 0ustar liggesusers#'Number of complete cases #' #'Calculates the number of complete cases. #' #'@param x An \code{R} object. Currently supported are methods for the #'following classes: \code{mids}, \code{data.frame} and \code{matrix}. Also, #'\code{x} can be a vector. #'@return Number of elements in \code{x} with complete data. #'@author Stef van Buuren, 2017 #'@seealso \code{\link{nic}}, \code{\link{cci}} #'@examples #' #' ncc(nhanes) # 13 complete cases #' #'@export ncc <- function(x) sum(cci(x)) #'Number of incomplete cases #' #'Calculates the number of incomplete cases. #' #'@param x An \code{R} object. Currently supported are methods for the #'following classes: \code{mids}, \code{data.frame} and \code{matrix}. Also, #'\code{x} can be a vector. #'@return Number of elements in \code{x} with incomplete data. #'@author Stef van Buuren, 2017 #'@seealso \code{\link{ncc}}, \code{\link{cci}} #'@examples #' #' nic(nhanes) # the remaining 12 rows #' nic(nhanes[,c("bmi","hyp")]) # number of cases with incomplete bmi and hyp #' #'@export nic <- function(x) sum(ici(x)) mice/R/expandcov.R0000644000176200001440000000100513416657163013512 0ustar liggesusers # ---------------------------expandvcov-------------------------------- expandvcov <- function(q, u) { err <- is.na(q) return(u) ## if (all(!err)) return(u) k <- length(q) v <- names(q) z <- u for (i in 1:ncol(z)){ if (err[i]) { rbind(z[,],NA,z[,]) j ## <- j + 1 up <- } j <- j + 1 z[i,] <- u[j,] z[,i] <- u[,j] } ## z <- matrix(NA, ncol=k, nrow=k, dimnames = list(v,v)) idx <- (is.na()) j <- 0 for (i in 1:k){ if (err[i]) next j <- j ## + 1 z[i,] <- u[j,] z[,i] <- u[,j] } return(z) } mice/R/is.R0000644000176200001440000000326513416657163012150 0ustar liggesusers # --------------------------------IS.MIDS-------------------------------------- #' Check for \code{mids} object #' #' @aliases is.mids #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mids} #' @export is.mids <- function(x) { inherits(x, "mids") } # --------------------------------IS.MIRA-------------------------------------- #' Check for \code{mira} object #' #' @aliases is.mira #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mira} #' @export is.mira <- function(x) { inherits(x, "mira") } # --------------------------------IS.MIPO-------------------------------------- #' Check for \code{mipo} object #' #' @aliases is.mipo #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mipo} #' @export is.mipo <- function(x) { inherits(x, "mipo") } #' Check for \code{mitml.result} object #' #' @aliases is.mitml.result #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mitml.result} #' @export is.mitml.result <- function(x) { inherits(x, "mitml.result") } # ------------------------------is.passive------------------------------------ is.passive <- function(string) { return("~" == substring(string, 1, 1)) } # # --------------------------------IS.MADS-------------------------------------- # #' Check for \code{mads} object #' #' @aliases is.mads #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mads} #' @export is.mads <- function(x) { inherits(x, "mads") } mice/R/generics.R0000644000176200001440000000266713416664706013342 0ustar liggesusers#' Combine R Objects by Rows and Columns #' #' Functions \code{cbind()} and \code{rbind()} are defined in #' the \code{mice} package in order to #' enable dispatch to \code{cbind.mids()} and \code{rbind.mids()} #' when one of the arguments is a \code{data.frame}. #' #' The standard \code{base::cbind()} and \code{base::rbind()} #' always dispatch to #' \code{base::cbind.data.frame()} or \code{base::rbind.data.frame()} #' if one of the arguments is a #' \code{data.frame}. The versions defined in the \code{mice} #' package intercept the user command #' and test whether the first argument has class \code{"mids"}. If so, #' function calls \code{cbind.mids()}, respectively \code{rbind.mids()}. In #' all other cases, the call is forwarded to standard functions in the #' \code{base} package. #' #' @inheritParams base::cbind #' @seealso \code{\link[base]{cbind}}, \code{\link[base]{rbind}}, #' \code{\link{cbind.mids}}, \code{\link{rbind.mids}} #' @keywords internal #' @export cbind <- function (...) { if (is.null(attr(list(...)[[1]], "class"))) return(base::cbind(...)) if ("mids" %in% attr(list(...)[[1]], "class")) return(cbind.mids(...)) else return(base::cbind(...)) } #' @rdname cbind #' @export rbind <- function (...) { if (is.null(attr(list(...)[[1]], "class"))) return(base::rbind(...)) if ("mids" %in% attr(list(...)[[1]], "class")) return(rbind.mids(...)) else return(base::rbind(...)) } mice/R/getfit.R0000644000176200001440000000270013620030166012771 0ustar liggesusers#'Extract list of fitted model #' #'\code{getfit} returns the list of objects containing the repeated analysis #'results, or optionally, one of these fit objects. #' #'@param x An object of class \code{mira} or \code{mitml.result}, #'typically produced by a call to \code{with()}. #'@param i An integer between 1 and \code{x$m} signaling the number of the #'repeated analysis. The default \code{i= -1} return a list with all analyses. #'@param simplify Should the return value be unlisted? #'@return If \code{i = -1} an object of class \code{mitml.result} containing #'all analyses, otherwise it returns the fitted object of #'the i'th repeated analysis. #'@author Stef van Buuren, March 2012. #'@seealso \code{\link[=mira-class]{mira}}, \code{\link{with.mids}} #'@keywords manip #'@examples #' #'imp <- mice(nhanes) #'fit <- with(imp, lm(bmi~chl+hyp)) #'getfit(fit) #'getfit(fit, 2) #' #'@export getfit <- function(x, i = -1L, simplify = FALSE) { if (is.null(x$analyses)) ra <- x else ra <- x$analyses if (i != -1L) return(ra[[i]]) if (simplify) ra <- unlist(ra) class(ra) <- c("mira", "list") ra } #'Extract estimate from \code{mipo} object #' #'\code{getqbar} returns a named vector of pooled estimates. #' #'@param x An object of class \code{mipo} #'@export getqbar <- function(x) { if (!is.mipo(x)) stop("Not a mipo object") qbar <- x$pooled$estimate # note: not supported: component/y.values names(qbar) <- x$pooled$term qbar } mice/R/mice.impute.sample.R0000644000176200001440000000171713416657163015234 0ustar liggesusers#'Imputation by simple random sampling #' #'Imputes a random sample from the observed \code{y} data #' #'This function takes a simple random sample from the observed values in #'\code{y}, and returns these as imputations. #' #'@inheritParams mice.impute.pmm #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000, 2017 #'@references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords datagen #'@export mice.impute.sample <- function(y, ry, x = NULL, wy = NULL, ...) { if (is.null(wy)) wy <- !ry yry <- y[ry] if (length(yry) < 1) return(rnorm(sum(wy))) if (length(yry) == 1) yry <- rep(yry, 2) return(sample(yry, size = sum(wy), replace = TRUE)) } mice/R/mice.impute.logreg.R0000644000176200001440000001553213416657163015232 0ustar liggesusers#'Imputation by logistic regression #' #'Imputes univariate missing data using logistic regression. #' #'@aliases mice.impute.logreg #'@inheritParams mice.impute.pmm #'@param ... Other named arguments. #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Stef van Buuren, Karin Groothuis-Oudshoorn #'@details #'Imputation for binary response variables by the Bayesian logistic regression #'model (Rubin 1987, p. 169-170). The #'Bayesian method consists of the following steps: #'\enumerate{ #'\item Fit a logit, and find (bhat, V(bhat)) #'\item Draw BETA from N(bhat, V(bhat)) #'\item Compute predicted scores for m.d., i.e. logit-1(X BETA) #'\item Compare the score to a random (0,1) deviate, and impute. #'} #'The method relies on the #'standard \code{glm.fit} function. Warnings from \code{glm.fit} are #'suppressed. Perfect prediction is handled by the data augmentation #'method. #' #'@seealso \code{\link{mice}}, \code{\link{glm}}, \code{\link{glm.fit}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #' #'Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple #'Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. #'Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. ISBN #'90-74479-08-1. #' #'Venables, W.N. & Ripley, B.D. (1997). Modern applied statistics with S-Plus #'(2nd ed). Springer, Berlin. #' #'White, I., Daniel, R. and Royston, P (2010). Avoiding bias due to perfect #'prediction in multiple imputation of incomplete categorical variables. #'Computational Statistics and Data Analysis, 54:22672275. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.logreg <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry # augment data in order to evade perfect prediction aug <- augment(y, ry, x, wy) x <- aug$x y <- aug$y ry <- aug$ry wy <- aug$wy w <- aug$w # fit model x <- cbind(1, as.matrix(x)) expr <- expression(glm.fit(x = x[ry, , drop = FALSE], y = y[ry], family = quasibinomial(link = logit), weights = w[ry])) fit <- eval(expr) fit.sum <- summary.glm(fit) beta <- coef(fit) rv <- t(chol(sym(fit.sum$cov.unscaled))) beta.star <- beta + rv %*% rnorm(ncol(rv)) # draw imputations p <- 1/(1 + exp(-(x[wy, , drop = FALSE] %*% beta.star))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } return(vec) } #'Imputation by logistic regression using the bootstrap #' #'Imputes univariate missing data using logistic regression #'by a bootstrapped logistic regression model. #'The bootstrap method draws a simple bootstrap sample with replacement #'from the observed data \code{y[ry]} and \code{x[ry, ]}. #' #'@aliases mice.impute.logreg.boot #'@inheritParams mice.impute.pmm #'@param ... Other named arguments. #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000, 2011 #'@seealso \code{\link{mice}}, \code{\link{glm}}, \code{\link{glm.fit}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-categorical.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.logreg.boot <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry # draw a bootstrap sample for yobs and xobs xobs <- x[ry, , drop = FALSE] yobs <- y[ry] n1 <- sum(ry) s <- sample(n1, n1, replace = TRUE) doty <- y doty[ry] <- yobs[s] dotx <- x dotx[ry, ] <- xobs[s, , drop = FALSE] x <- dotx y <- doty # fit model x <- cbind(1, as.matrix(x)) expr <- expression(glm.fit(x = x[ry, , drop = FALSE], y = y[ry], family = binomial(link = logit))) fit <- suppressWarnings(eval(expr)) beta.star <- coef(fit) # draw imputations p <- 1/(1 + exp(-(x[wy, ] %*% beta.star))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } return(vec) } augment <- function(y, ry, x, wy, maxcat = 50) { # define augmented data for stabilizing logreg and polyreg # by the ad hoc procedure of White, Daniel & Royston, CSDA, 2010 # This function will prevent augmented data beyond the min and # the max of the data # Input: # x: numeric data.frame (n rows) # y: factor or numeric vector (lengt n) # ry: logical vector (length n) # Output: # return a list with elements y, ry, x, and w with length n+2*(ncol(x))*length(levels(y)) # SvB May 2009 icod <- sort(unique(unclass(y))) k <- length(icod) if (k > maxcat) stop("Maximum number of categories (", maxcat, ") exceeded") p <- ncol(x) # skip augmentation if there are no predictors if (p == 0) return(list(y = y, ry = ry, x = x, wy = wy, w = rep(1, length(y)))) ## skip augmentation if there is only 1 missing value 12jul2012 ## this need to be fixed 12jul2011 if (sum(!ry) == 1) return(list(y = y, ry = ry, x = x, wy = wy, w = rep(1, length(y)))) # calculate values to augment mean <- apply(x, 2, mean, na.rm = TRUE) sd <- sqrt(apply(x, 2, var, na.rm = TRUE)) minx <- apply(x, 2, min, na.rm = TRUE) maxx <- apply(x, 2, max, na.rm = TRUE) nr <- 2 * p * k a <- matrix(mean, nrow = nr, ncol = p, byrow = TRUE) b <- matrix(rep(c(rep.int(c(0.5, -0.5), k), rep.int(0, nr)), length = nr * p), nrow = nr, ncol = p, byrow = FALSE) c <- matrix(sd, nrow = nr, ncol = p, byrow = TRUE) d <- a + b * c d <- pmax(matrix(minx, nrow = nr, ncol = p, byrow = TRUE), d, na.rm = TRUE) d <- pmin(matrix(maxx, nrow = nr, ncol = p, byrow = TRUE), d, na.rm = TRUE) e <- rep(rep(icod, each = 2), p) dimnames(d) <- list(paste0("AUG", seq_len(nrow(d))), dimnames(x)[[2]]) xa <- rbind.data.frame(x, d) # beware, concatenation of factors ya <- if (is.factor(y)) as.factor(levels(y)[c(y, e)]) else c(y, e) rya <- c(ry, rep.int(TRUE, nr)) wya <- c(wy, rep.int(FALSE, nr)) wa <- c(rep.int(1, length(y)), rep.int((p + 1)/nr, nr)) return(list(y = ya, ry = rya, x = xa, w = wa, wy = wya)) } mice/R/squeeze.R0000644000176200001440000000202613416657163013210 0ustar liggesusers# ------------------------------SQUEEZE------------------------------------ #'Squeeze the imputed values to be within specified boundaries. #' #'This function replaces any values in \code{x} that are lower than #'\code{bounds[1]} by \code{bounds[1]}, and replaces any values higher #'than \code{bounds[2]} by \code{bounds[2]}. #' #'@aliases squeeze #'@param x A numerical vector with values #'@param bounds A numerical vector of length 2 containing the lower and upper bounds. #'By default, the bounds are to the minimum and maximum values in \code{x}. #'@param r A logical vector of length \code{length(x)} that is used to select a #'subset in \code{x} before calculating automatic bounds. #'@return A vector of length \code{length(x)}. #'@author Stef van Buuren, 2011. #'@export squeeze <- function(x, bounds = c(min(x[r]), max(x[r])), r = rep.int(TRUE, length(x))) { if (length(r) != length(x)) stop("Different length of vectors x and r") x[x < bounds[1]] <- bounds[1] x[x > bounds[2]] <- bounds[2] return(x) } mice/R/mice.impute.lda.R0000644000176200001440000000532113617734670014510 0ustar liggesusers#'Imputation by linear discriminant analysis #' #'Imputes univariate missing data using linear discriminant analysis #' #'@inheritParams mice.impute.pmm #'@param ... Other named arguments. Not used. #'@return Vector with imputed data, of type factor, and of length #'\code{sum(wy)} #'@details Imputation of categorical response variables by linear discriminant analysis. #'This function uses the Venables/Ripley functions \code{lda()} and #'\code{predict.lda()} to compute posterior probabilities for each incomplete #'case, and draws the imputations from this posterior. #' #'This function can be called from within the Gibbs sampler by specifying #'\code{"lda"} in the \code{method} argument of \code{mice()}. This method is usually #'faster and uses fewer resources than calling the function, but the statistical #'properties may not be as good (Brand, 1999). #'\code{\link{mice.impute.polyreg}}. #'@section Warning: The function does not incorporate the variability of the #'discriminant weight, so it is not 'proper' in the sense of Rubin. For small #'samples and rare categories in the \code{y}, variability of the imputed data #'could therefore be underestimated. #' #'Added: SvB June 2009 to include bootstrap - disabled since # bootstrapping it may easily to constant variables within groups. # which will be difficult to detect when bootstrapped. #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #'@seealso \code{\link{mice}}, \code{link{mice.impute.polyreg}}, #'\code{\link[MASS]{lda}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #' #'Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple #'Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. #'Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. ISBN #'90-74479-08-1. #' #'Venables, W.N. & Ripley, B.D. (1997). Modern applied statistics with S-PLUS #'(2nd ed). Springer, Berlin. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.lda <- function(y, ry, x, wy = NULL, ...) { install.on.demand("MASS", ...) if (is.null(wy)) wy <- !ry fy <- as.factor(y) nc <- length(levels(fy)) # SvB June 2009 - take bootstrap sample of training data # idx <- sample((1:length(y))[ry], size=sum(ry), replace=TRUE) # x[ry,] <- x[idx,] # y[ry] <- y[idx] # end bootstrap fit <- MASS::lda(x, fy, subset = ry) post <- predict(fit, x[wy, , drop = FALSE])$posterior un <- rep(runif(sum(wy)), each = nc) idx <- 1 + apply(un > apply(post, 1, cumsum), 2, sum) return(levels(fy)[idx]) } mice/R/validate.arguments.R0000644000176200001440000000061113416657163015322 0ustar liggesusersvalidate.arguments <- function(y, ry, x, wy, allow.x.NULL = FALSE, allow.x.NA = FALSE) { # validate standard arguments of mice.impute functions if (!allow.x.NULL && is.null(x)) stop("Cannot handle NULL value for `x`") if (!allow.x.NA && anyNA(x)) stop("Cannot handle NA in `x`") if (!is.vector(ry)) stop("`ry` is not a vector") }mice/R/ibind.R0000644000176200001440000000725513416664706012626 0ustar liggesusers#'Enlarge number of imputations by combining \code{mids} objects #' #'This function combines two \code{mids} objects \code{x} and \code{y} into a #'single \code{mids} object, with the objective of increasing the number of #'imputed data sets. If the number of imputations in \code{x} and \code{y} are #'\code{m(x)} and \code{m(y)}, then the combined object will have #'\code{m(x)+m(y)} imputations. #' #'The two \code{mids} objects are required to #'have the same underlying multiple imputation model and should #'be fitted on the same data. #' #'@param x A \code{mids} object. #'@param y A \code{mids} object. #'@return An S3 object of class \code{mids} #'@author Karin Groothuis-Oudshoorn, Stef van Buuren #'@seealso \code{\link[=mids-class]{mids}}, \code{\link{rbind.mids}}, \code{\link{cbind.mids}} #'@keywords manip #'@examples #'data(nhanes) #'imp1 <- mice(nhanes, m = 1, maxit = 2, print = FALSE) #'imp1$m #' #'imp2 <- mice(nhanes, m = 3, maxit = 3, print = FALSE) #'imp2$m #' #'imp12 <- ibind(imp1, imp2) #'imp12$m #'plot(imp12) #'@export ibind <- function(x, y) { call <- match.call() call <- c(x$call, call) if (!is.mids(y) && !is.mids(x)) stop("Arguments `x` and `y` not of class `mids`") if (!identical(is.na(x$data), is.na(y$data))) stop("Differences detected in the missing data pattern") if (!identical(x$data[!is.na(x$data)], y$data[!is.na(y$data)])) stop("Differences detected in the observed data") if (!identical(x$where, y$where)) stop("Differences detected between `x$where` and `y$where`") if (!identical(x$blocks, y$blocks)) stop("Differences detected between `x$blocks` and `y$blocks`") if (!identical(x$method, y$method)) stop("Differences detected between `x$method` and `y$method`") if (!identical(x$predictorMatrix, y$predictorMatrix)) stop("Differences detected between `x$predictorMatrix` and `y$predictorMatrix`") if (!identical(x$visitSequence, y$visitSequence)) stop("Differences detected between `x$visitSequence` and `y$visitSequence`") if (!identical(x$post, y$post)) stop("Differences detected between `x$post` and `y$post`") if (!identical(x$blots, y$blots)) stop("Differences detected between `x$blots` and `y$blots`") visitSequence <- x$visitSequence imp <- vector("list", ncol(x$data)) names(imp) <- names(x$data) for (j in visitSequence) { imp[[j]] <- cbind(x$imp[[j]], y$imp[[j]]) } m <- (x$m + y$m) iteration <- max(x$iteration, y$iteration) chainMean <- chainVar <- initialize.chain(x$blocks, iteration, m) for (j in seq_len(x$m)) { chainMean[, seq_len(x$iteration), j] <- x$chainMean[, , j] chainVar[, seq_len(x$iteration), j] <- x$chainVar[, , j] } for (j in seq_len(y$m)) { chainMean[, seq_len(y$iteration), j + x$m] <- y$chainMean[, , j] chainVar[, seq_len(y$iteration), j + x$m] <- y$chainVar[, , j] } midsobj <- list(data = x$data, imp = imp, m = m, where = x$where, blocks = x$blocks, call = call, nmis = x$nmis, method = x$method, predictorMatrix = x$predictorMatrix, visitSequence = visitSequence, formulas = x$formulas, post = x$post, blots = x$blots, seed = x$seed, iteration = iteration, lastSeedValue = .Random.seed, chainMean = chainMean, chainVar = chainVar, loggedEvents = x$loggedEvents, version = packageVersion("mice"), date = Sys.Date()) oldClass(midsobj) <- "mids" return(midsobj) } mice/R/edit.setup.R0000644000176200001440000000545713540672756013631 0ustar liggesusersedit.setup <- function(data, setup, allow.na = FALSE, remove.constant = TRUE, remove.collinear = TRUE, remove_collinear = TRUE, ...) { # legacy handling if (!remove_collinear) remove.collinear <- FALSE # edits the imputation model setup # When it detec constant or collinear variables, write in loggedEvents # and continues imputation with reduced model pred <- setup$predictorMatrix meth <- setup$method vis <- setup$visitSequence post <- setup$post # FIXME: this function is not yet adapted to blocks if (ncol(pred) != nrow(pred) || length(meth) != nrow(pred) || ncol(data) != nrow(pred)) return(setup) varnames <- colnames(data) # remove constant variables but leave passive variables untouched for (j in seq_len(ncol(data))) { if (!is.passive(meth[j])) { d.j <- data[, j] v <- if (is.character(d.j)) NA else var(as.numeric(d.j), na.rm = TRUE) constant <- if (allow.na) { if (is.na(v)) FALSE else v < 1000 * .Machine$double.eps } else { is.na(v) || v < 1000 * .Machine$double.eps } didlog <- FALSE if (constant && any(pred[, j] != 0) && remove.constant) { out <- varnames[j] pred[, j] <- 0 updateLog(out = out, meth = "constant") didlog <- TRUE } if (constant && meth[j] != "" && remove.constant) { out <- varnames[j] pred[j, ] <- 0 if (!didlog) updateLog(out = out, meth = "constant") meth[j] <- "" vis <- vis[vis != j] post[j] <- "" } } } ## remove collinear variables ispredictor <- apply(pred != 0, 2, any) if (any(ispredictor)) { droplist <- find.collinear(data[, ispredictor, drop = FALSE], ...) } else { droplist <- NULL } if (length(droplist) > 0) { for (k in seq_along(droplist)) { j <- which(varnames %in% droplist[k]) didlog <- FALSE if (any(pred[, j] != 0) && remove.collinear) { # remove as predictor out <- varnames[j] pred[, j] <- 0 updateLog(out = out, meth = "collinear") didlog <- TRUE } if (meth[j] != "" && remove.collinear) { out <- varnames[j] pred[j, ] <- 0 if (!didlog) updateLog(out = out, meth = "collinear") meth[j] <- "" vis <- vis[vis != j] post[j] <- "" } } } if (all(pred == 0L)) stop("`mice` detected constant and/or collinear variables. No predictors were left after their removal.") setup$predictorMatrix <- pred setup$visitSequence <- vis setup$post <- post setup$method <- meth return(setup) }mice/R/mipo.R0000644000176200001440000001326113620070010012447 0ustar liggesusers#'\code{mipo}: Multiple imputation pooled object #' #' The \code{mipo} object contains the results of the pooling step. #' The function \code{\link{pool}} generates an object of class \code{mipo}. #' #' @param x An object of class \code{mipo} #' @param object An object of class \code{mipo} #' @param mira.obj An object of class \code{mira} #' @inheritParams broom::lm_tidiers #' @param z Data frame with a tidied version of a coefficient matrix #' @param conf.int Logical indicating whether to include #' a confidence interval. The default is \code{FALSE}. #' @param conf.level Confidence level of the interval, used only if #' \code{conf.int = TRUE}. Number between 0 and 1. #' @param exponentiate Flag indicating whether to exponentiate the #' coefficient estimates and confidence intervals (typical for #' logistic regression). #' @param \dots Arguments passed down #' @details An object class \code{mipo} is a \code{list} with three #' elements: \code{call}, \code{m} and \code{pooled}. #' #' The \code{pooled} elements is a data frame with columns: #' \tabular{ll}{ #' \code{estimate}\tab Pooled complete data estimate\cr #' \code{ubar} \tab Within-imputation variance of \code{estimate}\cr #' \code{b} \tab Between-imputation variance of \code{estimate}\cr #' \code{t} \tab Total variance, of \code{estimate}\cr #' \code{dfcom} \tab Degrees of freedom in complete data\cr #' \code{df} \tab Degrees of freedom of $t$-statistic\cr #' \code{riv} \tab Relative increase in variance\cr #' \code{lambda} \tab Proportion attributable to the missingness\cr #' \code{fmi} \tab Fraction of missing information\cr #' } #' The names of the terms are stored as \code{row.names(pooled)}. #' #' The \code{process_mipo} is a helper function to process a #' tidied mipo object, and is normally not called directly. #' It adds a confidence interval, and optionally exponentiates, the result. #'@seealso \code{\link{pool}}, #'\code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} #'@references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords classes #' @name mipo NULL #'@rdname mipo #'@export mipo <- function(mira.obj, ...) { if (!is.mira(mira.obj)) stop("`mira.obj` not of class `mira`") structure(pool(mira.obj, ...), class = c("mipo")) } #'@return The \code{summary} method returns a data frame with summary statistics of the pooled analysis. #'@rdname mipo #'@export summary.mipo <- function(object, type = c("tests", "all"), conf.int = FALSE, conf.level = .95, exponentiate = FALSE, ...) { type <- match.arg(type) m <- object$m x <- object$pooled std.error <- sqrt(x$t) statistic <- x$estimate / std.error p.value <- 2 * (1 - pt(abs(statistic), pmax(x$df, 0.001))) z <- data.frame(x, std.error = std.error, statistic = statistic, p.value = p.value) z <- process_mipo(z, object, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate) parnames <- names(z)[1L : (pmatch("m", names(z)) - 1L)] if (type == "tests") { out <- c("m", "riv", "lambda", "fmi", "ubar", "b", "t", "dfcom") keep <- base::setdiff(names(z), out) z <- z[, keep] } class(z) <- c("mipo.summary", "data.frame") z } #'@rdname mipo #'@export print.mipo <- function(x, ...) { cat("Class: mipo m =", x$m, "\n") print.data.frame(x$pooled, ...) invisible(x) } #'@rdname mipo #'@export print.mipo.summary <- function(x, ...) { print.data.frame(x, ...) invisible(x) } #' @rdname mipo #' @keywords internal process_mipo <- function(z, x, conf.int = FALSE, conf.level = .95, exponentiate = FALSE) { if (exponentiate) { # save transformation function for use on confidence interval trans <- exp } else { trans <- identity } CI <- NULL if (conf.int) { # avoid "Waiting for profiling to be done..." message CI <- suppressMessages(confint(x, level = conf.level)) } z$estimate <- trans(z$estimate) # combine and sort columns in desired order parnames <- names(z)[1L : (pmatch("m", names(z)) - 1L)] if (!is.null(CI)) { z <- cbind(z[, parnames, drop = FALSE], z[, c("m", "estimate", "std.error", "statistic", "df", "p.value")], trans(unrowname(CI)), z[, c("riv", "lambda", "fmi", "ubar", "b", "t", "dfcom")]) } else { z <- cbind(z[, parnames, drop = FALSE], z[, c("m", "estimate", "std.error", "statistic", "df", "p.value")], z[, c("riv", "lambda", "fmi", "ubar", "b", "t", "dfcom")]) } z } vcov.mipo <- function(object, ...) { so <- diag(object$t) dimnames(so) <- list(object$term, object$term) so } confint.mipo <- function(object, parm, level = 0.95, ...) { pooled <- object$pooled cf <- getqbar(object) df <- pooled$df se <- sqrt(pooled$t) pnames <- names(df) <- names(se) <- names(cf) <- row.names(pooled) if (missing(parm)) parm <- pnames else if (is.numeric(parm)) parm <- pnames[parm] a <- (1 - level)/2 a <- c(a, 1 - a) fac <- qt(a, df) pct <- format.perc(a, 3) ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm, pct)) ci[, 1] <- cf[parm] + qt(a[1], df[parm]) * se[parm] ci[, 2] <- cf[parm] + qt(a[2], df[parm]) * se[parm] ci } unrowname <- function (x) { rownames(x) <- NULL x } format.perc <- function (probs, digits) paste(format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), "%") mice/R/toenail2.R0000644000176200001440000000411213617306751013237 0ustar liggesusers#' Toenail data #' #' The toenail data come from a Multicenter study comparing two oral #' treatments for toenail infection. Patients were evaluated for the #' degree of separation of the nail. Patients were randomized into two #' treatments and were followed over seven visits - four in the first #' year and yearly thereafter. The patients have not been treated #' prior to the first visit so this should be regarded as the #' baseline. #' @name toenail2 #' @docType data #' @format A data frame with 1908 observations on the following 5 variables: #' \describe{ #' \item{\code{patientID}}{a numeric vector giving the ID of patient} #' \item{\code{outcome}}{a factor with 2 levels giving the response} #' \item{\code{treatment}}{a factor with 2 levels giving the treatment group} #' \item{\code{time}}{a numeric vector giving the time of the visit #' (not exactly monthly intervals hence not round numbers)} #' \item{\code{visit}}{an integer giving the number of the visit} #' } #' @source #' De Backer, M., De Vroey, C., Lesaffre, E., Scheys, I., and De #' Keyser, P. (1998). Twelve weeks of continuous oral therapy for #' toenail onychomycosis caused by dermatophytes: A double-blind #' comparative trial of terbinafine 250 mg/day versus itraconazole 200 #' mg/day. Journal of the American Academy of Dermatology, 38, 57-63. #' @references #' Lesaffre, E. and Spiessens, B. (2001). On the effect of the number of #' quadrature points in a logistic random-effects model: An example. #' Journal of the Royal Statistical Society, Series C, 50, 325-335. #' #' G. Fitzmaurice, N. Laird and J. Ware (2004) Applied Longitudinal Analysis, #' Wiley and Sons, New York, USA. #' #' Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-catoutcome.html#example}{\emph{Flexible #'Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. #'Boca Raton, FL. #' @keywords datasets #' @seealso \code{\link{toenail}} #' @details Apart from formatting, this dataset is identical to #' \code{toenail}. The formatting is taken identical to #' \code{data("toenail", package = "HSAUR3")}. NULL mice/R/mice.impute.polr.R0000644000176200001440000001114213617734632014720 0ustar liggesusers#'Imputation of ordered data by polytomous regression #' #'Imputes missing data in a categorical variable using polytomous regression #'@aliases mice.impute.polr #'@inheritParams mice.impute.pmm #'@param nnet.maxit Tuning parameter for \code{nnet()}. #'@param nnet.trace Tuning parameter for \code{nnet()}. #'@param nnet.MaxNWts Tuning parameter for \code{nnet()}. #'@param polr.to.loggedEvents A logical indicating whether each fallback #'to the \code{multinom()} function should be written to \code{loggedEvents}. #'The default is \code{FALSE}. #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@details #'The function \code{mice.impute.polr()} imputes for ordered categorical response #'variables by the proportional odds logistic regression (polr) model. The #'function repeatedly applies logistic regression on the successive splits. The #'model is also known as the cumulative link model. #' #'By default, ordered factors with more than two levels are imputed by #'\code{mice.impute.polr}. #' #'The algorithm of \code{mice.impute.polr} uses the function \code{polr()} from #'the \code{MASS} package. #' #'In order to avoid bias due to perfect prediction, the algorithm augment the #'data according to the method of White, Daniel and Royston (2010). #' #' The call to \code{polr} might fail, usually because the data are very sparse. #' In that case, \code{multinom} is tried as a fallback. #' If the local flag \code{polr.to.loggedEvents} is set to TRUE, #' a record is written #' to the \code{loggedEvents} component of the \code{\link{mids}} object. #' Use \code{mice(data, polr.to.loggedEvents = TRUE)} to set the flag. #' #' @note #' In December 2019 Simon White alerted that the #' \code{polr} could always fail silently. I can confirm this behaviour for #' versions \code{mice 3.0.0 - mice 3.6.6}, so any method requests #' for \code{polr} in these versions were in fact handled by \code{multinom}. #' See \url{https://github.com/stefvanbuuren/mice/issues/206} for details. #' #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000-2010 #'@seealso \code{\link{mice}}, \code{\link[nnet]{multinom}}, #'\code{\link[MASS]{polr}} #'@references #' #'Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #' #'Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of #'multiple imputation strategies for the statistical analysis of incomplete #'data sets.} Dissertation. Rotterdam: Erasmus University. #' #'White, I.R., Daniel, R. Royston, P. (2010). Avoiding bias due to perfect #'prediction in multiple imputation of incomplete categorical variables. #'\emph{Computational Statistics and Data Analysis}, 54, 2267-2275. #' #'Venables, W.N. & Ripley, B.D. (2002). \emph{Modern applied statistics with #'S-Plus (4th ed)}. Springer, Berlin. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.polr <- function(y, ry, x, wy = NULL, nnet.maxit = 100, nnet.trace = FALSE, nnet.MaxNWts = 1500, polr.to.loggedEvents = FALSE, ...) { install.on.demand("MASS", ...) install.on.demand("nnet", ...) if (is.null(wy)) wy <- !ry # augment data to evade issues with perfect prediction x <- as.matrix(x) aug <- augment(y, ry, x, wy) x <- aug$x y <- aug$y ry <- aug$ry wy <- aug$wy w <- aug$w xy <- cbind.data.frame(y = y, x = x) ## polr may fail on sparse data. We revert to multinom in such cases. fit <- try(suppressWarnings(MASS::polr(formula(xy), data = xy[ry, , drop = FALSE], weights = w[ry], control = list(...))), silent = TRUE) if (inherits(fit, "try-error")) { if (polr.to.loggedEvents) updateLog(out = "polr falls back to multinom", frame = 6) fit <- nnet::multinom(formula(xy), data = xy[ry, , drop = FALSE], weights = w[ry], maxit = nnet.maxit, trace = nnet.trace, MaxNWts = nnet.MaxNWts, ...) } post <- predict(fit, xy[wy, , drop = FALSE], type = "probs") if (sum(wy) == 1) post <- matrix(post, nrow = 1, ncol = length(post)) fy <- as.factor(y) nc <- length(levels(fy)) un <- rep(runif(sum(wy)), each = nc) if (is.vector(post)) post <- matrix(c(1 - post, post), ncol = 2) draws <- un > apply(post, 1, cumsum) idx <- 1 + apply(draws, 2, sum) return(levels(fy)[idx]) } mice/R/mice.impute.mnar.logreg.R0000644000176200001440000000227613620753345016163 0ustar liggesusers#' @rdname mice.impute.mnar #' @export mice.impute.mnar.logreg <- function (y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...) { ## Undentifiable part: u <- parse.ums(x, ums = ums, umx = umx, ...) if (is.null(wy)) wy <- !ry wyold <- wy ## Identifiable part: exactly the same as mice.impute.logreg # augment data in order to evade perfect prediction aug <- augment(y, ry, x, wy) x <- aug$x y <- aug$y ry <- aug$ry wy <- aug$wy w <- aug$w # fit model x <- cbind(1, as.matrix(x)) expr <- expression(glm.fit(x = x[ry, , drop = FALSE], y = y[ry], family = quasibinomial(link = logit), weights = w[ry])) fit <- eval(expr) fit.sum <- summary.glm(fit) beta <- coef(fit) rv <- t(chol(sym(fit.sum$cov.unscaled))) beta.star <- beta + rv %*% rnorm(ncol(rv)) ## Draw imputations p <- 1/(1 + exp(-(x[wy, , drop = FALSE] %*% beta.star + u$x[wyold, , drop = FALSE] %*% u$delta))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } return(vec) } mice/R/initialize.imp.R0000644000176200001440000000153013416657163014453 0ustar liggesusersinitialize.imp <- function(data, m, where, blocks, visitSequence, method, nmis, data.init) { imp <- vector("list", ncol(data)) names(imp) <- names(data) r <- !is.na(data) for (h in visitSequence) { for (j in blocks[[h]]) { y <- data[, j] ry <- r[, j] wy <- where[, j] imp[[j]] <- as.data.frame(matrix(NA, nrow = sum(wy), ncol = m)) dimnames(imp[[j]]) <- list(row.names(data)[wy], 1:m) if (method[h] != "") { for (i in seq_len(m)) { if (nmis[j] < nrow(data)) { if (is.null(data.init)) { imp[[j]][, i] <- mice.impute.sample(y, ry, wy = wy) } else { imp[[j]][, i] <- data.init[wy, j] } } else imp[[j]][, i] <- rnorm(nrow(data)) } } } } imp } mice/R/tbc.R0000644000176200001440000000466013416657163012305 0ustar liggesusers#'Terneuzen birth cohort #' #'Data of subset of the Terneuzen Birth Cohort data on child growth. #' #'This \code{tbc} data set is a random subset of persons from a much larger #'collection of data from the Terneuzen Birth Cohort. The total cohort #'comprises of 2604 unique persons, whereas the subset in \code{tbc} covers 306 #'persons. The \code{tbc.target} is an auxiliary data set containing two #'outcomes at adult age. For more details, see De Kroon et al (2008, 2010, #'2011). The imputation methodology is explained in Chapter 9 of Van Buuren #'(2012). #' #'@name tbc #'@aliases tbc tbc.target terneuzen #'@docType data #'@format \code{tbs} is a data frame with 3951 rows and 11 columns: #'\describe{ #'\item{id}{Person number} #'\item{occ}{Occasion number} #'\item{nocc}{Number of occasions} #'\item{first}{Is this the first record for this person? (TRUE/FALSE)} #'\item{typ}{Type of data (all observed)} #'\item{age}{Age (years)} #'\item{sex}{Sex 1=M, 2=F} #'\item{hgt.z}{Height Z-score} #'\item{wgt.z}{Weight Z-score} #'\item{bmi.z}{BMI Z-score} #'\item{ao}{Adult overweight (0=no, 1=yes)} #'} #' #'\code{tbc.target} is a data frame with 2612 rows and 3 columns: #'\describe{ #'\item{id}{Person number} #'\item{ao}{Adult overweight (0=no, 1=yes)} #'\item{bmi.z.jv}{BMI Z-score as young adult (18-29 years)} #'} #'@source De Kroon, M. L. A., Renders, C. M., Kuipers, E. C., van Wouwe, J. P., #'van Buuren, S., de Jonge, G. A., Hirasing, R. A. (2008). Identifying #'metabolic syndrome without blood tests in young adults - The Terneuzen birth #'cohort. \emph{European Journal of Public Health}, \emph{18}(6), 656-660. #' #'De Kroon, M. L. A., Renders, C. M., Van Wouwe, J. P., Van Buuren, S., #'Hirasing, R. A. (2010). The Terneuzen birth cohort: BMI changes between 2 #'and 6 years correlate strongest with adult overweight. \emph{PLoS ONE}, #'\emph{5}(2), e9155. #' #'De Kroon, M. L. A. (2011). \emph{The Terneuzen Birth Cohort. Detection and #'Prevention of Overweight and Cardiometabolic Risk from Infancy Onward.} #'Dissertation, Vrije Universiteit, Amsterdam. #'\url{http://dare.ubvu.vu.nl/handle/1871/23806} #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-rastering.html#terneuzen-birth-cohort}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords datasets #'@examples #' #'data <- tbc #'md.pattern(data) #' NULL mice/R/mice.impute.quadratic.R0000644000176200001440000001017613416657163015727 0ustar liggesusers#'Imputation of quadratic terms #' #'Imputes incomplete variable that appears as both #'main effect and quadratic effect in the complete-data model. #' #'@aliases mice.impute.quadratic quadratic #'@inheritParams mice.impute.pmm #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@details #'This function implements the "polynomial combination" method. #'First, the polynomial #'combination \eqn{Z = Y \beta_1 + Y^2 \beta_2} is formed. #'\eqn{Z} is imputed by #'predictive mean matching, followed by a decomposition of the imputed #'data \eqn{Z} #'into components \eqn{Y} and \eqn{Y^2}. #'See Van Buuren (2012, pp. 139-141) and Vink #'et al (2012) for more details. The method ensures that 1) the imputed data #'for \eqn{Y} and \eqn{Y^2} are mutually consistent, and 2) that provides unbiased #'estimates of the regression weights in a complete-data linear regression that #'use both \eqn{Y} and \eqn{Y^2}. #' #'@note There are two situations to consider. If only the linear term \code{Y} #'is present in the data, calculate the quadratic term \code{YY} after #'imputation. If both the linear term \code{Y} and the the quadratic term #'\code{YY} are variables in the data, then first impute \code{Y} by calling #'\code{mice.impute.quadratic()} on \code{Y}, and then impute \code{YY} by #'passive imputation as \code{meth["YY"] <- "~I(Y^2)"}. See example section #'for details. Generally, we would like \code{YY} to be present in the data if #'we need to preserve quadratic relations between \code{YY} and any third #'variables in the multivariate incomplete data that we might wish to impute. #'@author Gerko Vink (University of Utrecht), \email{g.vink@@uu.nl} #'@seealso \code{\link{mice.impute.pmm}} #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-knowledge.html#sec:quadratic}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #'Vink, G., van Buuren, S. (2013). Multiple Imputation of Squared Terms. #'\emph{Sociological Methods & Research}, 42:598-607. #'@family univariate imputation functions #'@keywords datagen #'@examples #'require(lattice) #' #'# Create Data #'B1 = .5 #'B2 = .5 #'X <- rnorm(1000) #'XX <- X^2 #'e <- rnorm(1000, 0, 1) #'Y <- B1 * X + B2 * XX + e #'dat <- data.frame(x = X, xx = XX, y = Y) #' #'# Impose 25 percent MCAR Missingness #'dat[0 == rbinom(1000, 1, 1 -.25), 1:2] <- NA #' #'# Prepare data for imputation #'ini <- mice(dat, maxit = 0) #'meth <- c("quadratic", "~I(x^2)", "") #'pred <- ini$pred #'pred[, "xx"] <- 0 #' #'# Impute data #'imp <- mice(dat, meth = meth, pred = pred) #' #'# Pool results #'pool(with(imp, lm(y ~ x + xx))) #' #'# Plot results #'stripplot(imp) #'plot(dat$x, dat$xx, col = mdc(1), xlab = "x", ylab = "xx") #'cmp <- complete(imp) #'points(cmp$x[is.na(dat$x)], cmp$xx[is.na(dat$x)], col = mdc(2)) #'@export mice.impute.quadratic <-function (y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) #create the square of y y2 <- y^2 #create z based on B1 * y + B2 * y^2 parm <- .norm.draw(x[,2], ry, cbind(1, y, y2)) zobs <- cbind(y, y2) %*% parm$coef[-1] #impute z zmis <- mice.impute.pmm(zobs, ry, x[, -1]) zstar <- zobs zstar[!ry] <- zmis # Otherwise the predict function crashes (nmatrix.1 error) zstar <- as.vector(zstar) #decompositions of z into roots b1 <- parm$coef[2] b2 <- parm$coef[3] y.low <- -(1/(2 * b2)) * (sqrt(4 * b2 * zstar + b1^2) + b1) y.up <- (1/(2 * b2)) * (sqrt(4 * b2 * zstar + b1^2) - b1) #calculate the abscissa at the parabolic minimum/maximum y.min <- -b1 / (2 * b2) #calculate regression parameters for q <- x[, 2] vobs <- glm(y > y.min ~ q + zstar + q * zstar, subset = ry, family = binomial) #impute Vmis newdata <- data.frame(q = x[wy, 2], zstar = zstar[wy]) prob <- predict(vobs, newdata = newdata, type = "response", na.action = na.exclude) idy <- rbinom(sum(wy), 1, prob = prob) #create final imputation ystar <- y.low[wy] ystar[idy == 1] <- y.up[wy][idy == 1] return(ystar) } mice/R/lm.R0000644000176200001440000001025413416657163012141 0ustar liggesusers#'Linear regression for \code{mids} object #' #'Applies \code{lm()} to multiply imputed data set #' #'This function is included for backward compatibility with V1.0. The function #'is superseded by \code{\link{with.mids}}. #' #'@param formula a formula object, with the response on the left of a ~ #'operator, and the terms, separated by + operators, on the right. See the #'documentation of \code{\link{lm}} and \code{\link{formula}} for details. #'@param data An object of type 'mids', which stands for 'multiply imputed data #'set', typically created by a call to function \code{mice()}. #'@param \dots Additional parameters passed to \code{\link{lm}} #'@return An objects of class \code{mira}, which stands for 'multiply imputed #'repeated analysis'. This object contains \code{data$m} distinct #'\code{lm.objects}, plus some descriptive information. #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #'@seealso \code{\link{lm}}, \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords multivariate #'@examples #' #' #'imp <- mice(nhanes) #'fit <- lm.mids(bmi~hyp+chl, data = imp) #'fit #' #'@export lm.mids <- function(formula, data, ...) { # adapted 28/1/00 repeated complete data regression (lm) on a mids data set call <- match.call() if (!is.mids(data)) stop("The data must have class mids") analyses <- lapply(seq_len(data$m), function(i) lm(formula, data = complete(data, i), ...)) # return the complete data analyses as a list of length nimp object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) oldClass(object) <- c("mira", "lm") ## FEH return(object) } # -------------------------------GLM.MIDS--------------------------------- setMethod('glm',signature(data = 'mids'), # function( formula, family = gaussian, data, ...) { glm.mids( formula, family = gaussian, data, ...) } ) #'Generalized linear model for \code{mids} object #' #'Applies \code{glm()} to a multiply imputed data set #' #'This function is included for backward compatibility with V1.0. The function #'is superseded by \code{\link{with.mids}}. #' #'@param formula a formula expression as for other regression models, of the #'form response ~ predictors. See the documentation of \code{\link{lm}} and #'\code{\link{formula}} for details. #'@param family The family of the glm model #'@param data An object of type \code{mids}, which stands for 'multiply imputed #'data set', typically created by function \code{mice()}. #'@param \dots Additional parameters passed to \code{\link{glm}}. #'@return An objects of class \code{mira}, which stands for 'multiply imputed #'repeated analysis'. This object contains \code{data$m} distinct #'\code{glm.objects}, plus some descriptive information. #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #'@seealso \code{\link{with.mids}}, \code{\link{glm}}, \code{\link[=mids-class]{mids}}, #'\code{\link[=mira-class]{mira}} #'@references Van Buuren, S., Groothuis-Oudshoorn, C.G.M. (2000) #'\emph{Multivariate Imputation by Chained Equations: MICE V1.0 User's manual.} #'Leiden: TNO Quality of Life. #'@keywords multivariate #'@examples #' #'imp <- mice(nhanes) #' #'# logistic regression on the imputed data #'fit <- glm.mids((hyp==2)~bmi+chl, data=imp, family = binomial) #'fit #' #'@export glm.mids <- function(formula, family = gaussian, data, ...) { # adapted 04/02/00 repeated complete data regression (glm) on a mids data set call <- match.call() if (!is.mids(data)) stop("The data must have class mids") analyses <- lapply(seq_len(data$m), function(i) glm(formula, family = family, data = complete(data, i), ...)) # return the complete data analyses as a list of length nimp object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) oldClass(object) <- c("mira", "glm", "lm") ## FEH return(object) } mice/R/mice.impute.polyreg.R0000644000176200001440000000674213617734653015442 0ustar liggesusers#'Imputation of unordered data by polytomous regression #' #'Imputes missing data in a categorical variable using polytomous regression #' #'@aliases mice.impute.polyreg #'@inheritParams mice.impute.pmm #'@param nnet.maxit Tuning parameter for \code{nnet()}. #'@param nnet.trace Tuning parameter for \code{nnet()}. #'@param nnet.MaxNWts Tuning parameter for \code{nnet()}. #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000-2010 #'@details #'The function \code{mice.impute.polyreg()} imputes categorical response #'variables by the Bayesian polytomous regression model. See J.P.L. Brand #'(1999), Chapter 4, Appendix B. #' #'By default, unordered factors with more than two levels are imputed by #'\code{mice.impute.polyreg()}. #' #'The method consists of the following steps: #'\enumerate{ #'\item Fit categorical response as a multinomial model #'\item Compute predicted categories #'\item Add appropriate noise to predictions #'} #' #'The algorithm of \code{mice.impute.polyreg} uses the function #'\code{multinom()} from the \code{nnet} package. #' #'In order to avoid bias due to perfect prediction, the algorithm augment the #'data according to the method of White, Daniel and Royston (2010). #'@seealso \code{\link{mice}}, \code{\link[nnet]{multinom}}, #'\code{\link[MASS]{polr}} #'@references #' #'Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #' #'Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of #'multiple imputation strategies for the statistical analysis of incomplete #'data sets.} Dissertation. Rotterdam: Erasmus University. #' #'White, I.R., Daniel, R. Royston, P. (2010). Avoiding bias due to perfect #'prediction in multiple imputation of incomplete categorical variables. #'\emph{Computational Statistics and Data Analysis}, 54, 2267-2275. #' #'Venables, W.N. & Ripley, B.D. (2002). \emph{Modern applied statistics with #'S-Plus (4th ed)}. Springer, Berlin. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.polyreg <- function(y, ry, x, wy = NULL, nnet.maxit = 100, nnet.trace = FALSE, nnet.MaxNWts = 1500, ...) { install.on.demand("nnet", ...) if (is.null(wy)) wy <- !ry # augment data to evade issues with perfect prediction x <- as.matrix(x) aug <- augment(y, ry, x, wy) x <- aug$x y <- aug$y ry <- aug$ry wy <- aug$wy w <- aug$w fy <- as.factor(y) nc <- length(levels(fy)) un <- rep(runif(sum(wy)), each = nc) xy <- cbind.data.frame(y = y, x = x) if (ncol(x) == 0L) xy <- data.frame(xy, int = 1) # escape with same impute if the dependent does not vary cat.has.all.obs <- table(y[ry]) == sum(ry) if (any(cat.has.all.obs)) return(rep(levels(fy)[cat.has.all.obs], sum(wy))) fit <- nnet::multinom(formula(xy), data = xy[ry, , drop = FALSE], weights = w[ry], maxit = nnet.maxit, trace = nnet.trace, MaxNWts = nnet.MaxNWts, ...) post <- predict(fit, xy[wy, , drop = FALSE], type = "probs") if (sum(wy) == 1) post <- matrix(post, nrow = 1, ncol = length(post)) if (is.vector(post)) post <- matrix(c(1 - post, post), ncol = 2) draws <- un > apply(post, 1, cumsum) idx <- 1 + apply(draws, 2, sum) return(levels(fy)[idx]) } mice/R/mdc.R0000644000176200001440000000771413617544472012304 0ustar liggesusers#'Graphical parameter for missing data plots. #' #'\code{mdc} returns colors used to distinguish observed, missing and combined #'data in plotting. \code{mice.theme} return a partial list of named objects #'that can be used as a theme in \code{stripplot}, \code{bwplot}, #'\code{densityplot} and \code{xyplot}. #' #'This function eases consistent use of colors in plots. The default follows #'the Abayomi convention, which uses blue for observed data, red for missing or #'imputed data, and black for combined data. #' #'@aliases mdc #'@param r A numerical or character vector. The numbers 1-6 request colors as #'follows: 1=\code{cso}, 2=\code{csi}, 3=\code{csc}, 4=\code{clo}, 5=\code{cli} #'and 6=\code{clc}. Alternatively, \code{r} may contain the strings #''\code{observed}', '\code{missing}', or '\code{both}', or abbreviations #'thereof. #'@param s A character vector containing the strings '\code{symbol}' or #''\code{line}', or abbreviations thereof. #'@param transparent A logical indicating whether alpha-transparency is #'allowed. The default is \code{TRUE}. #'@param cso The symbol color for the observed data. The default is a #'transparent blue. #'@param csi The symbol color for the missing or imputed data. The default is a #'transparent red. #'@param csc The symbol color for the combined observed and imputed data. The #'default is a grey color. #'@param clo The line color for the observed data. The default is a slightly #'darker transparent blue. #'@param cli The line color for the missing or imputed data. The default is a #'slightly darker transparent red. #'@param clc The line color for the combined observed and imputed data. The #'default is a grey color. #'@return \code{mdc()} returns a vector containing color definitions. The length #'of the output vector is calculate from the length of \code{r} and \code{s}. #'Elements of the input vectors are repeated if needed. #'@author Stef van Buuren, sept 2012. #'@seealso \code{\link{hcl}}, \code{\link{rgb}}, #'\code{\link[mice:xyplot]{xyplot.mids}}, \code{\link[lattice:xyplot]{xyplot}}, #'\code{\link[lattice:trellis.par.set]{trellis.par.set}} #'@references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #'Visualization with R}, Springer. #'@keywords hplot #'@examples #' #'# all six colors #'mdc(1:6) #' #'# lines color for observed and missing data #'mdc(c('obs','mis'), 'lin') #' #'@export mdc <- function(r = "observed", s = "symbol", transparent = TRUE, cso = grDevices::hcl(240, 100, 40, 0.7), csi = grDevices::hcl(0, 100, 40, 0.7), csc = "gray50", clo = grDevices::hcl(240, 100, 40, 0.8), cli = grDevices::hcl(0, 100, 40, 0.8), clc = "gray50") { ## cso: blue symbol color for observed data csi: red symbol color for imputations csc: symbol color for combined data ## clo: blue line color for observed data cli: red line color for observed data clc: line color for combined data if (missing(transparent)) { if (!supports.transparent()) { cso <- grDevices::hcl(240, 100, 40) csi <- grDevices::hcl(0, 100, 40) csc <- "black" clo <- grDevices::hcl(240, 100, 40) cli <- grDevices::hcl(0, 100, 40) clc <- "black" } } else if (!transparent) { cso <- grDevices::hcl(240, 100, 40) csi <- grDevices::hcl(0, 100, 40) csc <- "black" clo <- grDevices::hcl(240, 100, 40) cli <- grDevices::hcl(0, 100, 40) clc <- "black" } fallback <- grDevices::palette()[1] if (is.numeric(r)) { idx <- floor(r) idx[r < 1 | r > 6] <- 7 myc <- c(cso, csi, csc, clo, cli, clc, fallback)[idx] return(myc) } rc <- pmatch(r, c("observed", "missing", "both")) sc <- pmatch(s, c("symbol", "line")) idx <- rc + (sc - 1) * 3 idx[is.na(idx)] <- 7 myc <- c(cso, csi, csc, clo, cli, clc, fallback)[idx] return(myc) } mice/R/design.R0000644000176200001440000000131613416657163013001 0ustar liggesusersobtain.design <- function(data, formula = ~ .) { mf <- model.frame(formula, data = data, na.action = na.pass) model.matrix(formula, data = mf) } update.design <- function(design, data, varname = ".") { # Updates columns of the design matrix related to variable # varname in data varname <- as.character(varname[1]) idx <- attr(design, "assign") %in% grep(varname, names(data)) # variable j not found if (varname == "" || !any(idx)) return(design) # create model frame of variable j only fj <- as.formula(paste("~", varname)) mfj <- model.frame(fj, data = data, na.action = na.pass) design[, idx] <- model.matrix(fj, data = mfj)[, -1, drop = FALSE] design } mice/R/parlmice.R0000644000176200001440000001522513416664706013331 0ustar liggesusers#'Wrapper function that runs MICE in parallel #' #'This is a wrapper function for \code{\link{mice}}, using multiple cores to #'execute \code{\link{mice}} in parallel. As a result, the imputation #'procedure can be sped up, which may be useful in general. #' #'This function relies on package \code{\link{parallel}}, which is a base #'package for R versions 2.14.0 and later. We have chosen to use parallel function #'\code{parLapply} to allow the use of \code{parlmice} on Mac, Linux and Windows #'systems. For the same reason, we use the Parallel Socket Cluster (PSOCK) type by default. #' #'On systems other than Windows, it can be hugely beneficial to change the cluster type to #'\code{FORK}, as it generally results in improved memory handling. When memory issues #'arise on a Windows system, we advise to store the multiply imputed datasets, #'clean the memory by using \code{\link{rm}} and \code{\link{gc}} and make another #'run using the same settings. #' #'This wrapper function combines the output of \code{\link{parLapply}} with #'function \code{\link{ibind}} in \code{\link{mice}}. A \code{mids} object is returned #'and can be used for further analyses. #' #'Note that if a seed value is desired, the seed should be entered to this function #'with argument \code{seed}. Seed values outside the wrapper function (in an #'R-script or passed to \code{\link{mice}}) will not result to reproducible results. #'We refer to the manual of \code{\link{parallel}} for an explanation on this matter. #' #'@aliases parlmice #'@param data A data frame or matrix containing the incomplete data. Similar to #'the first argument of \code{\link{mice}}. #'@param m The number of desired imputated datasets. By default $m=5$ as with \code{mice} #'@param seed A scalar to be used as the seed value for the mice algorithm within #'each parallel stream. Please note that the imputations will be the same for all #'streams and, hence, this should be used if and only if \code{n.core = 1} and #'if it is desired to obtain the same output as under \code{mice}. #'@param n.core A scalar indicating the number of cores that should be used. #'@param n.imp.core A scalar indicating the number of imputations per core. #'@param cluster.seed A scalar to be used as the seed value. It is recommended to put the #'seed value here and not outside this function, as otherwise the parallel processes #'will be performed with separate, random seeds. #'@param cl.type The cluster type. Default value is \code{"PSOCK"}. Posix machines (linux, Mac) #'generally benefit from much faster cluster computation if \code{type} is set to \code{type = "FORK"}. #'@param ... Named arguments that are passed down to function \code{\link{mice}} or #'\code{\link{makeCluster}}. #' #'@return A mids object as defined by \code{\link{mids-class}} #' #'@author Gerko Vink, 2018, based on an earlier version by Rianne Schouten and Gerko Vink, 2017. #'@seealso \code{\link{parallel}}, \code{\link{parLapply}}, \code{\link{makeCluster}}, #'\code{\link{mice}}, \code{\link{mids-class}} #'@references #'Schouten, R. and Vink, G. (2017). parlmice: faster, paraleller, micer. #'\url{https://gerkovink.github.io/parlMICE/Vignette_parlMICE.html} #' #'#'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/parallel-computation.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #'@examples #'# 150 imputations in dataset nhanes, performed by 3 cores #'\dontrun{ #'imp1 <- parlmice(data = nhanes, n.core = 3, n.imp.core = 50) #'# Making use of arguments in mice. #'imp2 <- parlmice(data = nhanes, method = "norm.nob", m = 100) #'imp2$method #'fit <- with(imp2, lm(bmi ~ hyp)) #'pool(fit) #' } #' #'@export parlmice <- function(data, m = 5, seed = NA, cluster.seed = NA, n.core = NULL, n.imp.core = NULL, cl.type = "PSOCK", ...){ # check form of data and m data <- check.dataform(data) m <- check.m(m) # check if data complete if (sum(is.na(data)) == 0){ stop("Data has no missing values") } # check if arguments match CPU specifications if (!is.null(n.core)){ if(n.core > parallel::detectCores()){ stop("Number of cores specified is greater than the number of logical cores in your CPU") } } # determine course of action when not all arguments specified if (!is.null(n.core) & is.null(n.imp.core)){ n.imp.core = m warning(paste("Number of imputations per core not specified: n.imp.core = m =", m, "has been used")) } if (is.null(n.core) & !is.null(n.imp.core)){ n.core = parallel::detectCores() - 1 warning(paste("Number of cores not specified. Based on your machine a value of n.core =", parallel::detectCores()-1, "is chosen")) } if (is.null(n.core) & is.null(n.imp.core)) { specs <- match.cluster(n.core = parallel::detectCores() - 1, m = m) n.core = specs$cores n.imp.core = specs$imps } if (!is.na(seed)){ if(n.core > 1){ warning("Be careful; the specified seed is equal for all imputations. Please consider specifying cluster.seed instead.") } } # create arguments to export to cluster args <- match.call(mice, expand.dots = TRUE) args[[1]] <- NULL args$m <- n.imp.core # make computing cluster cl <- parallel::makeCluster(n.core, type = cl.type) parallel::clusterExport(cl, varlist = c("data", "m", "seed", "cluster.seed", "n.core", "n.imp.core", "cl.type", ls(parent.frame())), envir = environment()) parallel::clusterExport(cl, varlist = "do.call") parallel::clusterEvalQ(cl, library(mice)) if (!is.na(cluster.seed)) { parallel::clusterSetRNGStream(cl, cluster.seed) } # generate imputations imps <- parallel::parLapply(cl = cl, X = 1:n.core, function(x) do.call(mice, as.list(args), envir = environment())) parallel::stopCluster(cl) # postprocess clustered imputation into a mids object imp <- imps[[1]] if (length(imps) > 1) { for (i in 2:length(imps)) { imp <- ibind(imp, imps[[i]]) } } for(i in 1:length(imp$imp)){ #let imputation matrix correspond to grand m colnames(imp$imp[[i]]) <- 1:imp$m } return(imp) } match.cluster <- function(n.core, m){ cores <- 1:n.core imps <- 1:m out <- data.frame(results = as.vector(cores %*% t(imps)), cores = cores, imps = rep(imps, each = n.core)) which <- out[out[, "results"] == m, ] which[order(which$cores, decreasing = T), ][1, 2:3] } mice/R/mice.impute.ri.R0000744000176200001440000000443513620753345014362 0ustar liggesusers#'Imputation by the random indicator method for nonignorable data #' #'Imputes nonignorable missing data by the random indicator method. #' #'@aliases mice.impute.ri ri #'@inheritParams mice.impute.pmm #'@param ri.maxit Number of inner iterations #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Shahab Jolani (University of Utrecht) \email{s.jolani@@uu.nl} #'@details #'The random indicator method estimates an offset between the #'distribution of the observed and missing data using an algorithm #'that iterates over the response and imputation models. #' #'This routine assumes that the response model and imputation model #'have same predictors. #' #' For an MNAR alternative see also \code{\link{mice.impute.mnar.logreg}}. #'@references Jolani, S. (2012). #'\emph{Dual Imputation Strategies for Analyzing Incomplete Data}. #'Dissertation. University of Utrecht, Dec 7 2012. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.ri <- function(y, ry, x, wy = NULL, ri.maxit = 10, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) xy <- x xr <- xy y.dot <- y y.dot[wy] <- mice.impute.sample(y, ry, wy = wy) for (k in seq_len(ri.maxit)) { r.dot <- .r.draw(y.dot, ry, xr, ...) y.dot <- .y.draw(y, ry, r.dot, xy, wy, ...) } return(y.dot[wy]) } # generting a realization of the response indicator r .r.draw <- function(ydot, ry, xr, ...) { n <- length(ry) xr <- cbind(xr, ydot) expr <- expression(glm.fit(xr, ry, family = binomial(link = logit))) fit <- suppressWarnings(eval(expr)) fit.sum <- summary.glm(fit) psi <- coef(fit) rv <- t(chol(sym(fit.sum$cov.unscaled))) psi.star <- psi + rv %*% rnorm(ncol(rv)) p <- 1/(1 + exp(-(xr %*% psi.star))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 rdot <- vec[seq_len(n)] return(rdot) } # Imputation of y given rdot .y.draw <- function(y, ry, rdot, xy, wy, ...) { parm <- .norm.draw(y, ry, cbind(xy, rdot), ...) if (all(rdot[ry] == 1) || all(rdot[ry] == 0)) parm$coef[length(parm$coef)] <- 0 ydot <- y rydot <- as.logical(rdot) ydot[wy] <- xy[wy, , drop = FALSE] %*% parm$beta[-length(parm$coef),] + rnorm(sum(wy)) * parm$sigma ydot[wy & !rydot] <- ydot[wy & !rydot] - parm$coef[length(parm$coef)] return(ydot) } mice/R/xyplot.R0000644000176200001440000002444013416657163013072 0ustar liggesusers#'Scatterplot of observed and imputed data #' #'Plotting methods for imputed data using \pkg{lattice}. #'\code{xyplot()} produces a conditional scatterplots. The function #'automatically separates the observed (blue) and imputed (red) data. The #'function extends the usual features of \pkg{lattice}. #' #'The argument \code{na.groups} may be used to specify (combinations of) #'missingness in any of the variables. The argument \code{groups} can be used #'to specify groups based on the variable values themselves. Only one of both #'may be active at the same time. When both are specified, \code{na.groups} #'takes precedence over \code{groups}. #' #'Use the \code{subset} and \code{na.groups} together to plots parts of the #'data. For example, select the first imputed data set by by #'\code{subset=.imp==1}. #' #'Graphical parameters like \code{col}, \code{pch} and \code{cex} can be #'specified in the arguments list to alter the plotting symbols. If #'\code{length(col)==2}, the color specification to define the observed and #'missing groups. \code{col[1]} is the color of the 'observed' data, #'\code{col[2]} is the color of the missing or imputed data. A convenient color #'choice is \code{col=mdc(1:2)}, a transparent blue color for the observed #'data, and a transparent red color for the imputed data. A good choice is #'\code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the #'duration of the session by running \code{mice.theme()}. #' #'@aliases xyplot #'@param x A \code{mids} object, typically created by \code{mice()} or #'\code{mice.mids()}. #'@param data Formula that selects the data to be plotted. This argument #'follows the \pkg{lattice} rules for \emph{formulas}, describing the primary #'variables (used for the per-panel display) and the optional conditioning #'variables (which define the subsets plotted in different panels) to be used #'in the plot. #' #'The formula is evaluated on the complete data set in the \code{long} form. #'Legal variable names for the formula include \code{names(x$data)} plus the #'two administrative factors \code{.imp} and \code{.id}. #' #'\bold{Extended formula interface:} The primary variable terms (both the LHS #'\code{y} and RHS \code{x}) may consist of multiple terms separated by a #'\sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be #'taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and #'\code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in #'\emph{separate panels}. This behavior differs from standard \pkg{lattice}. #'\emph{Only combine terms of the same type}, i.e. only factors or only #'numerical variables. Mixing numerical and categorical data occasionally #'produces odds labeling of vertical axis. #' #'@param na.groups An expression evaluating to a logical vector indicating #'which two groups are distinguished (e.g. using different colors) in the #'display. The environment in which this expression is evaluated in the #'response indicator \code{is.na(x$data)}. #' #'The default \code{na.group = NULL} contrasts the observed and missing data #'in the LHS \code{y} variable of the display, i.e. groups created by #'\code{is.na(y)}. The expression \code{y} creates the groups according to #'\code{is.na(y)}. The expression \code{y1 & y2} creates groups by #'\code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as #'\code{is.na(y1) | is.na(y2)}, and so on. #'@param groups This is the usual \code{groups} arguments in \pkg{lattice}. It #'differs from \code{na.groups} because it evaluates in the completed data #'\code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas #'\code{na.groups} evaluates in the response indicator. See #'\code{\link{xyplot}} for more details. When both \code{na.groups} and #'\code{groups} are specified, \code{na.groups} takes precedence, and #'\code{groups} is ignored. #'@param theme A named list containing the graphical parameters. The default #'function \code{mice.theme} produces a short list of default colors, line #'width, and so on. The extensive list may be obtained from #'\code{trellis.par.get()}. Global graphical parameters like \code{col} or #'\code{cex} in high-level calls are still honored, so first experiment with #'the global parameters. Many setting consists of a pair. For example, #'\code{mice.theme} defines two symbol colors. The first is for the observed #'data, the second for the imputed data. The theme settings only exist during #'the call, and do not affect the trellis graphical parameters. #'@param as.table See \code{\link[lattice:xyplot]{xyplot}}. #'@param outer See \code{\link[lattice:xyplot]{xyplot}}. #'@param allow.multiple See \code{\link[lattice:xyplot]{xyplot}}. #'@param drop.unused.levels See \code{\link[lattice:xyplot]{xyplot}}. #'@param subscripts See \code{\link[lattice:xyplot]{xyplot}}. #'@param subset See \code{\link[lattice:xyplot]{xyplot}}. #'@param \dots Further arguments, usually not directly processed by the #'high-level functions documented here, but instead passed on to other #'functions. #'@return The high-level functions documented here, as well as other high-level #'Lattice functions, return an object of class \code{"trellis"}. The #'\code{\link[lattice:update.trellis]{update}} method can be used to #'subsequently update components of the object, and the #'\code{\link[lattice:print.trellis]{print}} method (usually called by default) #'will plot it on an appropriate plotting device. #'@note The first two arguments (\code{x} and \code{data}) are reversed #'compared to the standard Trellis syntax implemented in \pkg{lattice}. This #'reversal was necessary in order to benefit from automatic method dispatch. #' #'In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas #'in \pkg{lattice} the argument \code{x} is always a formula. #' #'In \pkg{mice} the argument \code{data} is always a formula object, whereas in #'\pkg{lattice} the argument \code{data} is usually a data frame. #' #'All other arguments have identical interpretation. #' #'@author Stef van Buuren #'@seealso \code{\link{mice}}, \code{\link{stripplot}}, \code{\link{densityplot}}, #'\code{\link{bwplot}}, \code{\link{lattice}} for an overview of the #'package, as well as \code{\link[lattice:xyplot]{xyplot}}, #'\code{\link[lattice:panel.xyplot]{panel.xyplot}}, #'\code{\link[lattice:print.trellis]{print.trellis}}, #'\code{\link[lattice:trellis.par.set]{trellis.par.set}} #'@references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #'Visualization with R}, Springer. #' #'van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@keywords hplot #'@examples #'imp <- mice(boys, maxit=1) #' #'### xyplot: scatterplot by imputation number #'### observe the erroneous outlying imputed values #'### (caused by imputing hgt from bmi) #'xyplot(imp, hgt~age|.imp, pch=c(1,20),cex=c(1,1.5)) #' #'### same, but label with missingness of wgt (four cases) #'xyplot(imp, hgt~age|.imp, na.group=wgt, pch=c(1,20),cex=c(1,1.5)) #' #'@export xyplot.mids <- function(x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), ..., subscripts = TRUE, subset = TRUE) { call <- match.call() if (!is.mids(x)) stop("Argument 'x' must be a 'mids' object") if (missing(data)) stop("Missing formula") formula <- data ## unpack data and response indicator cd <- data.frame(complete(x, "long", include=TRUE)) r <- as.data.frame(is.na(x$data)) ## evaluate na.group in response indicator nagp <- eval(expr=substitute(na.groups), envir=r, enclos=parent.frame()) if (is.expression(nagp)) nagp <- eval(expr=nagp, envir=r, enclos=parent.frame()) ## evaluate groups in imputed data ngp <- eval(expr=substitute(groups), envir=cd, enclos=parent.frame()) if (is.expression(ngp)) ngp <- eval(expr=ngp, envir=cd, enclos=parent.frame()) groups <- ngp ## evaluate subset in imputed data ss <- eval(expr=substitute(subset), envir=cd, enclos=parent.frame()) if (is.expression(ss)) ss <- eval(expr=ss, envir=cd, enclos=parent.frame()) subset <- ss ## evaluate further arguments before parsing dots <- list(...) args <- list(allow.multiple = allow.multiple, outer = outer, drop.unused.levels = drop.unused.levels, subscripts = subscripts, as.table = as.table) ## determine the y-variables form <- lattice::latticeParseFormula(model=formula, data=cd, subset = subset, groups = groups, multiple = allow.multiple, outer = outer, subscripts = TRUE, drop = drop.unused.levels) ynames <- unlist(lapply(strsplit(form$left.name," \\+ "), rm.whitespace)) ## Jul2011 ## calculate selection vector gp nona <- is.null(call$na.groups) if (!is.null(call$groups) && nona) gp <- call$groups else { if (nona) { na.df <- r[, ynames, drop=FALSE] gp <- unlist(lapply(na.df, rep.int, x$m+1)) } else { gp <- rep.int(nagp, length(ynames)*(x$m+1)) } } ## change axis defaults of extended formula interface if (is.null(call$ylab)) { args$ylab <- "" if (length(ynames)==1) args$ylab <- ynames } if (is.null(call$scales)) { args$scales <- list() if (length(ynames)>1) args$scales <- list(x=list(relation="free"), y=list(relation="free")) } ## ready args <- c(x=formula, data=list(cd), groups=list(gp), args, dots, subset=call$subset) ## go tp <- do.call("xyplot", args) tp <- update(tp, par.settings = theme) return(tp) } mice/R/install.on.demand.R0000644000176200001440000000073513621203103015017 0ustar liggesusersinstall.on.demand <- function(pkg, quiet = FALSE, ...) { # internal function that checks whether package pkg is # in the library. If not found, it write a message to # the console (if quiet = TRUE) and installs it from CRAN if (requireNamespace(pkg, quietly = TRUE)) return() if (!quiet) cat(paste0("\nInstalling '", pkg, "' package...\n")) install.packages(pkg, repos = "https://cloud.r-project.org/", quiet = quiet) if (!quiet) cat("\n") } mice/R/popmis.R0000644000176200001440000000154713416657163013045 0ustar liggesusers#'Hox pupil popularity data with missing popularity scores #' #'Hox pupil popularity data with some missing popularity scores #' #'The original, complete dataset was generated by Joop Hox as an example of #'well-behaved multilevel data set. The distributed data contains missing data #'in pupil popularity. #' #'@name popmis #'@docType data #'@format A data frame with 2000 rows and 7 columns: #'\describe{ #'\item{pupil}{Pupil number within school} #'\item{school}{School number} #'\item{popular}{Pupil popularity with 848 missing entries} #'\item{sex}{Pupil gender} #'\item{texp}{Teacher experience (years)} #'\item{const}{Constant intercept term} #'\item{teachpop}{Teacher popularity} } #'@source Hox, J. J. (2002) \emph{Multilevel analysis. Techniques and #'applications.} Mahwah, NJ: Lawrence Erlbaum. #'@keywords datasets #'@examples #' #'popmis[1:3,] #' NULL mice/R/mice.impute.2lonly.mean.R0000644000176200001440000000624013556602652016103 0ustar liggesusers#' Imputation of most likely value within the class #' #' Method \code{2lonly.mean} replicates the most likely value within #' a class of a second-level variable. It works for numeric and #' factor data. The function is primarily useful as a quick fixup for #' data in which the second-level variable is inconsistent. #' #' @aliases 2lonly.mean #' @inheritParams mice.impute.pmm #' @param type Vector of length \code{ncol(x)} identifying random and class #' variables. The class variable (only one is allowed) is coded as \code{-2}. #' @param ... Other named arguments. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' Observed values in \code{y} are averaged within the class, and #' replicated to the missing \code{y} within that class. #' This function is primarily useful for repairing incomplete data #' that are constant within the class, but vary over classes. #' #' For numeric variables, \code{mice.impute.2lonly.mean()} imputes the #' class mean of \code{y}. If \code{y} is a second-level variable, then #' conventionally all observed \code{y} will be identical within the #' class, and the function just provides a quick fix for any #' missing \code{y} by filling in the class mean. #' #' For factor variables, \code{mice.impute.2lonly.mean()} imputes the #' most frequently occuring category within the class. #' #' If there are no observed \code{y} in the class, all entries of the #' class are set to \code{NA}. Note that this may produce problems #' later on in \code{mice} if imputation routines are called that #' expects predictor data to be complete. Methods designed for #' imputing this type of second-level variables include #' \code{\link{mice.impute.2lonly.norm}} and #' \code{\link{mice.impute.2lonly.pmm}}. #' #' @references #' Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Boca Raton, FL.: Chapman & Hall/CRC Press. #' @author Gerko Vink, Stef van Buuren, 2019 #' @family univariate-2lonly #' @keywords datagen #' @export mice.impute.2lonly.mean <- function(y, ry, x, type, wy = NULL, ...) { if (all(ry)) return(numeric(0)) if (is.null(wy)) wy <- !ry yobs <- y[ry] class <- x[, type == -2] if (length(class) == 0) stop("No class variable") classobs <- class[ry] classmis <- class[wy] # deal with empty classes (will be NaN) empty.classes <- class[!class %in% classobs] classobs <- c(classobs, empty.classes) yobs <- c(yobs, rep.int(NA, length(empty.classes))) # factor: return class levels corresponding to class median if (is.factor(y)) { ym <- aggregate(yobs, list(classobs), median, na.rm = TRUE) ym$x <- as.integer(ym$x) return(apply(as.matrix(classmis), 1, function(z, y, lev) lev[y[z == y[, 1], 2]], y = ym, lev = levels(y), ...)) } # otherwise: return the class means ym <- aggregate(yobs, list(classobs), mean, na.rm = TRUE) z <- apply(as.matrix(classmis), 1, function(z, y) y[z == y[, 1], 2], y = ym, ...) z[is.nan(z)] <- NA z } mice/R/formula.R0000644000176200001440000001777113416657163013211 0ustar liggesusers#' Creates a \code{formulas} argument #' #' This helper function creates a valid \code{formulas} object. The #' \code{formulas} object is an argument to the \code{mice} function. #' It is a list of formula's that specifies the target variables and #' the predictors by means of the standard \code{~} operator. #' @param data A \code{data.frame} with the source data #' @param blocks An optional specification for blocks of variables in #' the rows. The default assigns each variable in its own block. #' @param predictorMatrix A \code{predictorMatrix} specified by the user. #' @return A list of formula's. #' @seealso \code{\link{make.blocks}}, \code{\link{make.predictorMatrix}} #' @examples #' f1 <- make.formulas(nhanes) #' f1 #' f2 <- make.formulas(nhanes, blocks = make.blocks(nhanes, "collect")) #' f2 #' #' # for editing, it may be easier to work with the character vector #' c1 <- as.character(f1) #' c1 #' #' # fold it back into a formula list #' f3 <- name.formulas(lapply(c1, as.formula)) #' f3 #' #' @export make.formulas <- function(data, blocks = make.blocks(data), predictorMatrix = NULL) { data <- check.dataform(data) formulas <- as.list(rep("~ 0", length(blocks))) names(formulas) <- names(blocks) for (h in names(blocks)) { y <- blocks[[h]] if (is.null(predictorMatrix)) { predictors <- colnames(data) } else { type <- predictorMatrix[h, ] predictors <- names(type)[type != 0] } x <- setdiff(predictors, y) formulas[[h]] <- paste(paste(y, collapse = "+"), "~", paste(c("0", x), collapse = "+")) } formulas <- lapply(formulas, as.formula) formulas } #' Name formula list elements #' #' This helper function names any unnamed elements in the \code{formula} #' list. This is a convenience function. #' @inheritParams mice #' @param prefix A character vector of length 1 with the prefix to #' be using for naming any unnamed blocks with two or more variables. #' @return Named list of formulas #' @seealso \code{\link{mice}} #' @details #' This function will name any unnamed list elements specified in #' the optional argument \code{formula}. Unnamed formula's #' consisting with just one response variable will be named #' after this variable. Unnamed formula's containing more #' than one variable will be named by the \code{prefix} #' argument, padded by an integer sequence stating at 1. #' @examples #' # fully conditionally specified main effects model #' form1 <- list(bmi ~ age + chl + hyp, #' hyp ~ age + bmi + chl, #' chl ~ age + bmi + hyp) #' form1 <- name.formulas(form1) #' imp1 <- mice(nhanes, formulas = form1, print = FALSE, m = 1, seed = 12199) #' #' # same model using dot notation #' form2 <- list(bmi ~ ., hyp ~ ., chl ~ .) #' form2 <- name.formulas(form2) #' imp2 <- mice(nhanes, formulas = form2, print = FALSE, m = 1, seed = 12199) #' identical(complete(imp1), complete(imp2)) #' #' # same model using repeated multivariate imputation #' form3 <- name.blocks(list(all = bmi + hyp + chl ~ .)) #' imp3 <- mice(nhanes, formulas = form3, print = FALSE, m = 1, seed = 12199) #' cmp3 <- complete(imp3) #' identical(complete(imp1), complete(imp3)) #' #' # same model using predictorMatrix #' imp4 <- mice(nhanes, print = FALSE, m = 1, seed = 12199, auxiliary = TRUE) #' identical(complete(imp1), complete(imp4)) #' #' # different model: multivariate imputation for chl and bmi #' form5 <- list(chl + bmi ~ ., hyp ~ bmi + age) #' form5 <- name.formulas(form5) #' imp5 <- mice(nhanes, formulas = form5, print = FALSE, m = 1, seed = 71712) #' @export name.formulas <- function(formulas, prefix = "F") { if (!is.list(formulas)) stop("Argument `formulas` not a list", call. = FALSE) if (!all(sapply(formulas, is.formula) | sapply(formulas, is.list))) stop("Not all elements in `formulas` are a formula or a list") if (is.null(names(formulas))) names(formulas) <- rep("", length(formulas)) inc <- 1 for (i in seq_along(formulas)) { if (names(formulas)[i] != "") next #if (hasdot(formulas[[i]]) && is.null(data)) # stop("Formula with dot requires `data` argument", call. = FALSE) y <- lhs(formulas[[i]]) if (length(y) == 1) names(formulas)[i] <- y else { names(formulas)[i] <- paste0(prefix, inc) inc <- inc + 1 } } formulas } check.formulas <- function(formulas, data) { formulas <- name.formulas(formulas) formulas <- handle.oldstyle.formulas(formulas, data) formulas <- lapply(formulas, expand.dots, data) # escape if formula is list of two formula's if (any(sapply(formulas, is.list))) return(formulas) formulas <- lapply(formulas, as.formula) formulas } #' Extends formula's with predictor matrix settings #' #' @inheritParams mice #' @return A list of formula's #' @param auxiliary A logical that indicates whether the variables #' listed in \code{predictors} should be added to the formula as main #' effects. The default is \code{TRUE}. #' @param include.intercept A logical that indicated whether the intercept #' should be included in the result. #' @keywords internal extend.formulas <- function(formulas, data, blocks, predictorMatrix = NULL, auxiliary = TRUE, include.intercept = FALSE, ...) { # Extend formulas with predictorMatrix if (is.null(predictorMatrix)) return(formulas) for (h in names(blocks)) { type <- predictorMatrix[h, ] predictors <- names(type)[type != 0] ff <- extend.formula(formula = formulas[[h]], predictors = predictors, auxiliary = auxiliary, include.intercept = include.intercept) formulas[[h]] <- ff } formulas } #' Extends a formula with predictors #' #' @param formula A formula. If it is #' not a formula, the formula is internally reset to \code{~0}. #' @param predictors A character vector of variable names. #' @param auxiliary A logical that indicates whether the variables #' listed in \code{predictors} should be added to the formula as main #' effects. The default is \code{TRUE}. #' @param include.intercept A logical that indicated whether the intercept #' should be included in the result. #' @return A formula #' @keywords internal extend.formula <- function(formula = ~ 0, predictors = NULL, auxiliary = TRUE, include.intercept = FALSE, ...) { if (!is.formula(formula)) formula <- ~ 0 # handle dot in RHS if (hasdot(formula)) { if (length(predictors) > 1) fr <- as.formula(c("~", paste(predictors, collapse = "+"))) else fr <- ~ 0 } else fr <- reformulate(c(".", predictors)) if (auxiliary) formula <- update(formula, fr, ...) if (include.intercept) formula <- update(formula, ~ . + 1, ...) formula } handle.oldstyle.formulas <- function(formulas, data) { # converts old-style character vector to formula list oldstyle <- length(formulas) == ncol(data) && is.vector(formulas) && is.character(formulas) if (!oldstyle) return(formulas) formulas[formulas != ""] <- "~ 0" fl <- as.list(formulas) names(fl) <- names(formulas) fl } is.empty.model.data <- function (x, data) { tt <- terms(x, data = data) (length(attr(tt, "factors")) == 0L) & (attr(tt, "intercept") == 0L) } lhs <- function(x) all.vars(update(x, . ~ 1)) is.formula <- function(x){ inherits(x, "formula") } hasdot <- function(f) { if(is.recursive(f)) { return(any(sapply(as.list(f), hasdot))) } else { f == as.symbol(".")} } expand.dots <- function(formula, data) { if (!is.formula(formula)) return(formula) if (!hasdot(formula)) return(formula) y <- lhs(formula) x <- setdiff(colnames(data), y) fs <- paste(paste(y, collapse = "+"), "~", paste(x, collapse = "+")) as.formula(fs) } mice/R/post.R0000644000176200001440000000150113416657163012511 0ustar liggesusers#' Creates a \code{post} argument #' #' This helper function creates a valid \code{post} vector. The #' \code{post} vector is an argument to the \code{mice} function that #' specifies post-processing for a variable just after imputation. #' @inheritParams mice #' @return Character vector of \code{ncol(data)} element #' @seealso \code{\link{mice}} #' @examples #' make.post(nhanes2) #' @export make.post <- function(data) { post <- vector("character", length = ncol(data)) names(post) <- colnames(data) post } check.post <- function(post, data) { if(is.null(post)) return(make.post(data)) # check if (length(post) != ncol(data)) stop("length(post) does not match ncol(data)", call. = FALSE) # change if (is.null(names(post))) names(post) <- colnames(data) post } mice/R/D2.R0000644000176200001440000000364613621065766012005 0ustar liggesusers#'Compare two nested models using D2-statistic #' #'The D2-statistic pools test statistics from the repeated analyses. #'The method is less powerful than the D1- and D3-statistics. #' #'@inheritParams D1 #'@inheritParams mitml::testModels #'@references #'Li, K. H., X. L. Meng, T. E. Raghunathan, and D. B. Rubin. 1991. #'Significance Levels from Repeated p-Values with Multiply-Imputed Data. #'\emph{Statistica Sinica} 1 (1): 65–92. #' #'\url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:chi} #'@examples #'# Compare two linear models: #'imp <- mice(nhanes2, seed = 51009, print = FALSE) #'mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) #'mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) #'D2(mi1, mi0) #' #'# Compare two logistic regression models #'imp <- mice(boys, maxit = 2, print = FALSE) #'fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) #'fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) #'D2(fit1, fit0) #'@seealso \code{\link[mitml]{testModels}} #'@export D2 <- function(fit1, fit0 = NULL, use = "wald", ...) { install.on.demand("mitml", ...) # fit1: a fitlist or mira-object # fit0: named numerical vector, character vector, or list fit1 <- as.mitml.result(fit1) est1 <- pool(fit1) qbar1 <- getqbar(est1) if (is.null(fit0)) { # test all estimates equal to zero beta <- rep(0, length(qbar1)) names(beta) <- names(qbar1) fit0 <- lapply(fit1, fix.coef, beta = beta) fit0 <- as.mitml.result(fit0) } else if (is.mira(fit0)) { fit0 <- as.mitml.result(fit0) } tmr <- mitml::testModels(fit1, fit0, method = "D2", use = use, ...) out <- list( call = match.call(), result = tmr$test, formulas = list(`1` = formula(fit1[[1L]]), `2` = formula(fit0[[1L]])), m = tmr$m, method = "D2", use = use, df.com = NA) class(out) <- c("mice.anova", class(fit1)) out } mice/R/RcppExports.R0000644000176200001440000000033713623760433014016 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 matcher <- function(obs, mis, k) { .Call('_mice_matcher', PACKAGE = 'mice', obs, mis, k) } mice/R/anova.R0000644000176200001440000000315613416657163012640 0ustar liggesusers#'Compare several nested models #' #'@rdname anova #'@param object Two or more objects of class \code{mira} #'@param method Either \code{"D1"}, \code{"D2"} or \code{"D3"} #'@param use An character indicating the test statistic #'@param ... Other parameters passed down to \code{D1()}, \code{D2()}, #'\code{D3()} and \code{mitml::testModels}. #'@return Object of class \code{mice.anova} #'@export anova.mira <- function(object, ..., method = "D1", use = "wald") { modlist <- list(object, ...) first <- lapply(modlist, getfit, 1L) %>% sapply(glance) if (is.null(names(modlist))) names(modlist) <- names(first) <- 1L:length(modlist) else names(first) <- names(modlist) # order by model complexity idx <- order(unlist(first["df.residual", ]), decreasing = FALSE) modlist <- modlist[idx] df.com <- first["df.residual", idx] names(df.com) <- names(modlist) # get model formulas formulas <- lapply(modlist, getfit, 1L) %>% lapply(formula) names(formulas) <- names(modlist) # test successive models nm <- length(modlist) out <- vector("list", nm - 1L) names(out) <- paste(names(modlist), lead(names(modlist)), sep = " ~~ ")[-nm] for(j in seq_along(out)) { args <- alist(fit1 = modlist[[j]], fit0 = modlist[[j + 1L]], df.com = as.numeric(unlist(df.com[j])), use = use) out[[j]] <- do.call(method, args = args) } obj <- list( call = match.call(), out = out, formulas = formulas, m = length(getfit(modlist[[1L]])), method = method, use = use) class(obj) <- c("mice.anova", class(first)) obj } mice/R/fix.coef.R0000644000176200001440000000613713416657163013237 0ustar liggesusers#'Fix coefficients and update model #' #'Refits a model with a specified set of coefficients. #' #'@param model An R model, e.g., produced by \code{lm} or \code{glm} #'@param beta A numeric vector with \code{length(coef)} model coefficients. #'If the vector is not named, the coefficients should be #'given in the same order as in \code{coef(model)}. If the vector is named, #'the procedure attempts to match on names. #'@return An updated R model object #'@author Stef van Buuren, 2018 #'@details #'The function calculates the linear predictor using the new coefficients, #'and reformulates the model using the \code{offset} #'argument. The linear predictor is called #'\code{offset}, and its coefficient will be \code{1} by definition. #'The new model only fits the intercept, which should be \code{0} #'if we set \code{beta = coef(model)}. #'@examples #'model0 <- lm(Volume ~ Girth + Height, data = trees) #'formula(model0) #'coef(model0) #'deviance(model0) #' #'# refit same model #'model1 <- fix.coef(model0) #'formula(model1) #'coef(model1) #'deviance(model1) #' #'# change the beta's #'model2 <- fix.coef(model0, beta = c(-50, 5, 1)) #'coef(model2) #'deviance(model2) #' #'# compare predictions #'plot(predict(model0), predict(model1)); abline(0,1) #'plot(predict(model0), predict(model2)); abline(0,1) #' #'# compare proportion explained variance #'cor(predict(model0), predict(model0) + residuals(model0))^2 #'cor(predict(model1), predict(model1) + residuals(model1))^2 #'cor(predict(model2), predict(model2) + residuals(model2))^2 #' #'# extract offset from constrained model #'summary(model2$model$offset) #' #'# it also works with factors and missing data #'model0 <- lm(bmi ~ age + hyp + chl, data = nhanes2) #'model1 <- fix.coef(model0) #'model2 <- fix.coef(model0, beta = c(15, -8, -8, 2, 0.2)) #'@export fix.coef <- function(model, beta = NULL) { oldcoef <- tidy.coef(model) if (is.null(beta)) beta <- oldcoef if (length(oldcoef) != length(beta)) stop("incorrect length of 'beta'", call. = FALSE) # handle naming if (is.null(names(oldcoef))) names(oldcoef) <- make.names(seq_along(oldcoef)) if (is.null(names(beta))) names(beta) <- names(oldcoef) else { diff <- setdiff(names(oldcoef), names(beta)) if (length(diff) > 0) stop("names not found in 'beta': ", diff, call. = FALSE) diff <- setdiff(names(beta), names(oldcoef)) if (length(diff) > 0) stop("names not found in 'coef(model)': ", diff, call. = FALSE) } beta <- beta[names(oldcoef)] # re-calculate model for new beta's data <- model.frame(formula = formula(model), data = model.frame(model)) mm <- model.matrix(formula(model, fixed.only = TRUE), data = data) offset <- as.vector(mm %*% beta) uf <- . ~ 1 if (inherits(model, "merMod")) uf <- formula(model, random.only = TRUE) upd <- update(model, formula. = uf, data = cbind(data, offset = offset), offset = offset) upd } tidy.coef <- function(model) { est <- tidy(model, effects = "fixed") coef <- est$estimate names(coef) <- est$term coef } mice/R/nelsonaalen.R0000644000176200001440000000437613617532205014030 0ustar liggesusers# -----------------------------NELSONAALEN------------------------------------- #'Cumulative hazard rate or Nelson-Aalen estimator #' #'Calculates the cumulative hazard rate (Nelson-Aalen estimator) #' #'This function is useful for imputing variables that depend on survival time. #'White and Royston (2009) suggested using the cumulative hazard to the #'survival time H0(T) rather than T or log(T) as a predictor in imputation #'models. See section 7.1 of Van Buuren (2012) for an example. #' #'@aliases nelsonaalen hazard #'@param data A data frame containing the data. #'@param timevar The name of the time variable in \code{data}. #'@param statusvar The name of the event variable, e.g. death in \code{data}. #'@return A vector with \code{nrow(data)} elements containing the Nelson-Aalen #'estimates of the cumulative hazard function. #'@author Stef van Buuren, 2012 #'@references White, I. R., Royston, P. (2009). Imputing missing covariate #'values for the Cox model. \emph{Statistics in Medicine}, \emph{28}(15), #'1982-1998. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-toomany.html#a-further-improvement-survival-as-predictor-variable}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords misc #'@examples #'require(MASS) #' #'leuk$status <- 1 ## no censoring occurs in leuk data (MASS) #'ch <- nelsonaalen(leuk, time, status) #'plot(x = leuk$time, y = ch, ylab='Cumulative hazard', xlab='Time') #' #'### See example on http://www.engineeredsoftware.com/lmar/pe_cum_hazard_function.htm #'time <- c(43, 67, 92, 94, 149, rep(149,7)) #'status <- c(rep(1,5),rep(0,7)) #'eng <- data.frame(time, status) #'ch <- nelsonaalen(eng, time, status) #'plot(x = time, y = ch, ylab='Cumulative hazard', xlab='Time') #' #' #'@export nelsonaalen <- function(data, timevar, statusvar) { install.on.demand("survival") if (!is.data.frame(data)) stop("Data must be a data frame") timevar <- as.character(substitute(timevar)) statusvar <- as.character(substitute(statusvar)) time <- data[, timevar] status <- data[, statusvar] hazard <- survival::basehaz(survival::coxph(survival::Surv(time, status) ~ 1)) idx <- match(time, hazard[, "time"]) return(hazard[idx, "hazard"]) } mice/R/flux.R0000644000176200001440000002074113617315126012502 0ustar liggesusers#'Influx and outflux of multivariate missing data patterns #' #'Influx and outflux are statistics of the missing data pattern. These #'statistics are useful in selecting predictors that should go into the #'imputation model. #' #'Infux and outflux have been proposed by Van Buuren (2012), chapter 4. #' #'Influx is equal to the number of variable pairs \code{(Yj , Yk)} with #'\code{Yj} missing and \code{Yk} observed, divided by the total number of #'observed data cells. Influx depends on the proportion of missing data of the #'variable. Influx of a completely observed variable is equal to 0, whereas for #'completely missing variables we have influx = 1. For two variables with the #'same proportion of missing data, the variable with higher influx is better #'connected to the observed data, and might thus be easier to impute. #' #'Outflux is equal to the number of variable pairs with \code{Yj} observed and #'\code{Yk} missing, divided by the total number of incomplete data cells. #'Outflux is an indicator of the potential usefulness of \code{Yj} for imputing #'other variables. Outflux depends on the proportion of missing data of the #'variable. Outflux of a completely observed variable is equal to 1, whereas #'outflux of a completely missing variable is equal to 0. For two variables #'having the same proportion of missing data, the variable with higher outflux #'is better connected to the missing data, and thus potentially more useful for #'imputing other variables. #' #'FICO is an outbound statistic defined by the fraction of incomplete cases #'among cases with \code{Yj} observed (White and Carlin, 2010). #' #'@aliases flux #'@param data A data frame or a matrix containing the incomplete data. Missing #'values are coded as NA's. #'@param local A vector of names of columns of \code{data}. The default is to #'include all columns in the calculations. #'@return A data frame with \code{ncol(data)} rows and six columns: #'pobs = Proportion observed, #'influx = Influx #'outflux = Outflux #'ainb = Average inbound statistic #'aout = Average outbound statistic #'fico = Fraction of incomplete cases among cases with \code{Yj} observed #'@seealso \code{\link{fluxplot}}, \code{\link{md.pattern}}, \code{\link{fico}} #'@author Stef van Buuren, 2012 #'@references #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #'White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation #'compared with complete-case analysis for missing covariate values. #'\emph{Statistics in Medicine}, \emph{29}, 2920-2931. #'@keywords misc #'@export flux <- function(data, local=names(data)){ .avg <- function(row) sum(row, na.rm = TRUE)/(length(row) - 1) ## calculates influx and outflux statistics ## of the missing data pattern x <- colMeans(!is.na(data)) pat <- md.pairs(data) pat$rr <- pat$rr[local,,drop=FALSE] pat$rm <- pat$rm[local,,drop=FALSE] pat$mr <- pat$mr[local,,drop=FALSE] pat$mm <- pat$mm[local,,drop=FALSE] ainb <- apply(pat$mr/(pat$mr + pat$mm), 1, .avg) aout <- apply(pat$rm/(pat$rm + pat$rr), 1, .avg) fico <- fico(data) outflux <- rowSums(pat$rm)/(rowSums(pat$rm+pat$mm)) influx <- rowSums(pat$mr)/(rowSums(pat$mr+pat$rr)) return(data.frame(pobs=x, influx=influx, outflux=outflux, ainb=ainb, aout=aout, fico=fico)) } #'Fluxplot of the missing data pattern #' #'Influx and outflux are statistics of the missing data pattern. These #'statistics are useful in selecting predictors that should go into the #'imputation model. #' #'Infux and outflux have been proposed by Van Buuren (2012), chapter 4. #' #'Influx is equal to the number of variable pairs \code{(Yj , Yk)} with #'\code{Yj} missing and \code{Yk} observed, divided by the total number of #'observed data cells. Influx depends on the proportion of missing data of the #'variable. Influx of a completely observed variable is equal to 0, whereas for #'completely missing variables we have influx = 1. For two variables with the #'same proportion of missing data, the variable with higher influx is better #'connected to the observed data, and might thus be easier to impute. #' #'Outflux is equal to the number of variable pairs with \code{Yj} observed and #'\code{Yk} missing, divided by the total number of incomplete data cells. #'Outflux is an indicator of the potential usefulness of \code{Yj} for imputing #'other variables. Outflux depends on the proportion of missing data of the #'variable. Outflux of a completely observed variable is equal to 1, whereas #'outflux of a completely missing variable is equal to 0. For two variables #'having the same proportion of missing data, the variable with higher outflux #'is better connected to the missing data, and thus potentially more useful for #'imputing other variables. #' #'@aliases fluxplot #'@param data A data frame or a matrix containing the incomplete data. Missing #'values are coded as NA's. #'@param local A vector of names of columns of \code{data}. The default is to #'include all columns in the calculations. #'@param plot Should a graph be produced? #'@param labels Should the points be labeled? #'@param xlim See \code{par}. #'@param ylim See \code{par}. #'@param las See \code{par}. #'@param xlab See \code{par}. #'@param ylab See \code{par}. #'@param main See \code{par}. #'@param eqscplot Should a square plot be produced? #'@param pty See \code{par}. #'@param lwd See \code{par}. Controls axis line thickness and diagonal #'@param \dots Further arguments passed to \code{plot()} or \code{eqscplot()}. #'@return An invisible data frame with \code{ncol(data)} rows and six columns: #'pobs = Proportion observed, #'influx = Influx #'outflux = Outflux #'ainb = Average inbound statistic #'aout = Average outbound statistic #'fico = Fraction of incomplete cases among cases with \code{Yj} observed #'@seealso \code{\link{flux}}, \code{\link{md.pattern}}, \code{\link{fico}} #'@author Stef van Buuren, 2012 #'@references #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #'White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation #'compared with complete-case analysis for missing covariate values. #'\emph{Statistics in Medicine}, \emph{29}, 2920-2931. #'@keywords misc #'@export fluxplot <- function(data, local=names(data), plot=TRUE, labels=TRUE, xlim=c(0,1), ylim=c(0,1), las=1, xlab="Influx", ylab="Outflux", main=paste("Influx-outflux pattern for",deparse(substitute(data))), eqscplot = TRUE, pty="s", lwd = 1, ...) { f <- flux(data, local) if (plot) { if (eqscplot) MASS::eqscplot(x=f$influx, y=f$outflux, type='n', main=main, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, pty=pty, lwd = lwd, axes = FALSE, ...) else plot(x=f$influx, y=f$outflux, type='n', main=main, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, pty=pty, lwd = lwd, axes = FALSE, ...) axis(1, lwd = lwd, las = las) axis(2, lwd = lwd, las = las) abline(1, -1, lty = 2, lwd = lwd) if (labels) text(x = f$influx, y = f$outflux, label = names(data), ...) else points(x = f$influx, y = f$outflux, ...) box(lwd = lwd) } invisible(data.frame(f)) } #' Fraction of incomplete cases among cases with observed #' #'FICO is an outbound statistic defined by the fraction of incomplete cases #'among cases with \code{Yj} observed (White and Carlin, 2010). #' #'@aliases fico #'@param data A data frame or a matrix containing the incomplete data. Missing #'values are coded as NA's. #'@return A vector of length \code{ncol(data)} of FICO statistics. #'@seealso \code{\link{fluxplot}}, \code{\link{flux}}, \code{\link{md.pattern}} #'@author Stef van Buuren, 2012 #'@references #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #'White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation #'compared with complete-case analysis for missing covariate values. #'\emph{Statistics in Medicine}, \emph{29}, 2920-2931. #'@keywords misc #'@export fico <- function(data){ ic <- ici(data) unlist(lapply(data, FUN = function(x) sum((!is.na(x)) & ic)/sum(!is.na(x)))) } mice/R/fdd.R0000644000176200001440000001025013416657163012262 0ustar liggesusers#'SE Fireworks disaster data #' #'Multiple outcomes of a randomized study to reduce post-traumatic stress. #' #'Data from a randomized experiment to reduce post-traumatic stress by two #'treatments: Eye Movement Desensitization and Reprocessing (EMDR) #'(experimental treatment), and cognitive behavioral therapy (CBT) (control #'treatment). 52 children were randomized to one of these two treatments. #'Outcomes were measured at three time points: at baseline (pre-treatment, T1), #'post-treatment (T2, 4-8 weeks), and at follow-up (T3, 3 months). For more #'details, see de Roos et al (2011). Some person covariates were reshuffled. #'The imputation methodology is explained in Chapter 9 of van Buuren (2012). #' #'@name fdd #'@aliases fdd fdd.pred #'@docType data #'@format \code{fdd} is a data frame with 52 rows and 65 columns: #'\describe{ #'\item{id}{Client number} #'\item{trt}{Treatment (E=EMDR, C=CBT)} #'\item{pp}{Per protocol (Y/N)} #'\item{trtp}{Number of parental treatments} #'\item{sex}{Sex: M/F} #'\item{etn}{Ethnicity: NL/OTHER} #'\item{age}{Age (years)} #'\item{trauma}{Trauma count (1-5)} #'\item{prop1}{PROPS total score T1} #'\item{prop2}{PROPS total score T2} #'\item{prop3}{PROPS total score T3} #'\item{crop1}{CROPS total score T1} #'\item{crop2}{CROPS total score T2} #'\item{crop3}{CROPS total score T3} #'\item{masc1}{MASC score T1} #'\item{masc2}{MASC score T2} #'\item{masc3}{MASC score T3} #'\item{cbcl1}{CBCL T1} #'\item{cbcl3}{CBCL T3} #'\item{prs1}{PRS total score T1} #'\item{prs2}{PRS total score T2} #'\item{prs3}{PRS total score T3} #'\item{ypa1}{PTSD-RI B intrusive recollection parent T1} #'\item{ypb1}{PTSD-RI C avoidant/numbing parent T1} #'\item{ypc1}{PTSD-RI D hyper-arousal parent T1} #'\item{yp1}{PTSD-RI B+C+D parent T1} #'\item{ypa2}{PTSD-RI B intrusive recollection parent T2} #'\item{ypb2}{PTSD-RI C avoidant/numbing parent T2} #'\item{ypc2}{PTSD-RI D hyper-arousal parent T2} #'\item{yp2}{PTSD-RI B+C+D parent T1} #'\item{ypa3}{PTSD-RI B intrusive recollection parent T3} #'\item{ypb3}{PTSD-RI C avoidant/numbing parent T3} #'\item{ypc3}{PTSD-RI D hyper-arousal parent T3} #'\item{yp3}{PTSD-RI B+C+D parent T3} #'\item{yca1}{PTSD-RI B intrusive recollection child T1} #'\item{ycb1}{PTSD-RI C avoidant/numbing child T1} #'\item{ycc1}{PTSD-RI D hyper-arousal child T1} #'\item{yc1}{PTSD-RI B+C+D child T1} #'\item{yca2}{PTSD-RI B intrusive recollection child T2} #'\item{ycb2}{PTSD-RI C avoidant/numbing child T2} #'\item{ycc2}{PTSD-RI D hyper-arousal child T2} #'\item{yc2}{PTSD-RI B+C+D child T2} #'\item{yca3}{PTSD-RI B intrusive recollection child T3} #'\item{ycb3}{PTSD-RI C avoidant/numbing child T3} #'\item{ycc3}{PTSD-RI D hyper-arousal child T3} #'\item{yc3}{PTSD-RI B+C+D child T3} #'\item{ypf1}{PTSD-RI parent full T1} #'\item{ypf2}{PTSD-RI parent full T2} #'\item{ypf3}{PTSD-RI parent full T3} #'\item{ypp1}{PTSD parent partial T1} #'\item{ypp2}{PTSD parent partial T2} #'\item{ypp3}{PTSD parent partial T3} #'\item{ycf1}{PTSD child full T1} #'\item{ycf2}{PTSD child full T2} #'\item{ycf3}{PTSD child full T3} #'\item{ycp1}{PTSD child partial T1} #'\item{ycp2}{PTSD child partial T2} #'\item{ycp3}{PTSD child partial T3} #'\item{cbin1}{CBCL Internalizing T1} #'\item{cbin3}{CBCL Internalizing T3} #'\item{cbex1}{CBCL Externalizing T1} #'\item{cbex3}{CBCL Externalizing T3} #'\item{bir1}{Birlison T1} #'\item{bir2}{Birlison T2} #'\item{bir3}{Birlison T3} #'} #'\code{fdd.pred} is the 65 by 65 binary #'predictor matrix used to impute \code{fdd}. #'@source de Roos, C., Greenwald, R., den Hollander-Gijsman, M., Noorthoorn, #'E., van Buuren, S., de Jong, A. (2011). A Randomised Comparison of Cognitive #'Behavioral Therapy (CBT) and Eye Movement Desensitisation and Reprocessing #'(EMDR) in disaster-exposed children. \emph{European Journal of #'Psychotraumatology}, \emph{2}, 5694. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-fdd.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'Boca Raton, FL.: Chapman & Hall/CRC Press. #'@keywords datasets #'@examples #' #' #'data <- fdd #'md.pattern(fdd) #' #' NULL mice/R/method.R0000644000176200001440000001045413556602652013011 0ustar liggesusers#' Creates a \code{method} argument #' #' This helper function creates a valid \code{method} vector. The #' \code{method} vector is an argument to the \code{mice} function that #' specifies the method for each block. #' @inheritParams mice #' @return Vector of \code{length(blocks)} element with method names #' @seealso \code{\link{mice}} #' @examples #' make.method(nhanes2) #' @export make.method <- function(data, where = make.where(data), blocks = make.blocks(data), defaultMethod = c("pmm", "logreg", "polyreg", "polr")) { assign.method <- function(y) { if (is.numeric(y)) return(1) if (nlevels(y) == 2) return(2) if (is.ordered(y) && nlevels(y) > 2) return(4) if (nlevels(y) > 2) return(3) if (is.logical(y)) return(2) return(1) } # assign methods based on type, # use method 1 if there is no single method within the block method <- rep("", length(blocks)) names(method) <- names(blocks) for (j in names(blocks)) { yvar <- blocks[[j]] y <- data[, yvar] def <- sapply(y, assign.method) k <- ifelse(all(diff(def) == 0), k <- def[1], 1) method[j] <- defaultMethod[k] } nimp <- nimp(where, blocks) method[nimp == 0] <- "" method } check.method <- function(method, data, where, blocks, defaultMethod) { if (is.null(method)) return(make.method(data = data, where = where, blocks = blocks, defaultMethod = defaultMethod)) nimp <- nimp(where, blocks) # expand user's imputation method to all visited columns # single string supplied by user (implicit assumption of two columns) if (length(method) == 1) { if (is.passive(method)) stop("Cannot have a passive imputation method for every column.") method <- rep(method, length(blocks)) method[nimp == 0] <- "" } # check the length of the argument if (length(method) != length(blocks)) stop("Length of method differs from number of blocks", call. = FALSE) # add names to method names(method) <- names(blocks) # check whether the requested imputation methods are on the search path active.check <- !is.passive(method) & nimp > 0 & method != "" passive.check <- is.passive(method) & nimp > 0 & method != "" check <- all(active.check) & any(passive.check) if (check) { fullNames <- rep.int("mice.impute.passive", length(method[passive.check])) } else { fullNames <- paste("mice.impute", method[active.check], sep = ".") if (length(method[active.check]) == 0) fullNames <- character(0) } # type checks on built-in imputation methods for (j in names(blocks)) { vname <- blocks[[j]] y <- data[, vname, drop = FALSE] mj <- method[j] mlist <- list(m1 = c("logreg", "logreg.boot", "polyreg", "lda", "polr"), m2 = c("norm", "norm.nob", "norm.predict", "norm.boot", "mean", "2l.norm", "2l.pan", "2lonly.norm", "2lonly.pan", "quadratic", "ri"), m3 = c("norm", "norm.nob", "norm.predict", "norm.boot", "mean", "2l.norm", "2l.pan", "2lonly.norm", "2lonly.pan", "quadratic", "logreg", "logreg.boot")) cond1 <- sapply(y, is.numeric) cond2 <- sapply(y, is.factor) & sapply(y, nlevels) == 2 cond3 <- sapply(y, is.factor) & sapply(y, nlevels) > 2 if (any(cond1) && mj %in% mlist$m1) warning("Type mismatch for variable(s): ", paste(vname[cond1], collapse = ", "), "\nImputation method ", mj, " is for categorical data.", call. = FALSE) if (any(cond2) && mj %in% mlist$m2) warning("Type mismatch for variable(s): ", paste(vname[cond2], collapse = ", "), "\nImputation method ", mj, " is not for factors.", call. = FALSE) if (any(cond3) && mj %in% mlist$m3) warning("Type mismatch for variable(s): ", paste(vname[cond3], collapse = ", "), "\nImputation method ", mj, " is not for factors with >2 levels.", call. = FALSE) } method[nimp == 0] <- "" unlist(method) } mice/R/toenail.R0000644000176200001440000000412013617306734013155 0ustar liggesusers#' Toenail data #' #' The toenail data come from a Multicenter study comparing two oral #' treatments for toenail infection. Patients were evaluated for the #' degree of separation of the nail. Patients were randomized into two #' treatments and were followed over seven visits - four in the first #' year and yearly thereafter. The patients have not been treated #' prior to the first visit so this should be regarded as the #' baseline. #' @name toenail #' @docType data #' @format A data frame with 1908 observations on the following 5 variables: #' \describe{ #' \item{\code{ID}}{a numeric vector giving the ID of patient} #' \item{\code{outcome}}{a numeric vector giving the response #' (0=none or mild seperation, 1=moderate or severe)} #' \item{\code{treatment}}{a numeric vector giving the treatment group} #' \item{\code{month}}{a numeric vector giving the time of the visit #' (not exactly monthly intervals hence not round numbers)} #' \item{\code{visit}}{a numeric vector giving the number of the visit} #' } #' @source #' De Backer, M., De Vroey, C., Lesaffre, E., Scheys, I., and De #' Keyser, P. (1998). Twelve weeks of continuous oral therapy for #' toenail onychomycosis caused by dermatophytes: A double-blind #' comparative trial of terbinafine 250 mg/day versus itraconazole 200 #' mg/day. Journal of the American Academy of Dermatology, 38, 57-63. #' @references #' Lesaffre, E. and Spiessens, B. (2001). On the effect of the number of #' quadrature points in a logistic random-effects model: An example. #' Journal of the Royal Statistical Society, Series C, 50, 325-335. #' #' G. Fitzmaurice, N. Laird and J. Ware (2004) Applied Longitudinal Analysis, #' Wiley and Sons, New York, USA. #' #' Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-catoutcome.html#example}{\emph{Flexible #'Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. #'Boca Raton, FL. #' @keywords datasets #' @seealso \code{\link{toenail2}} #' @details This dataset was copied from the \code{DPpackage}, which is #' scheduled to be discontinued from CRAN in August 2019. NULL mice/R/mice.impute.pmm.R0000644000176200001440000001737013416657163014546 0ustar liggesusers#'Imputation by predictive mean matching #' #'Calculates imputations for univariate missing data by predictive mean matching. #' #'@aliases mice.impute.pmm pmm #'@param y Vector to be imputed #'@param ry Logical vector of length \code{length(y)} indicating the #'the subset \code{y[ry]} of elements in \code{y} to which the imputation #'model is fitted. The \code{ry} generally distinguishes the observed #'(\code{TRUE}) and missing values (\code{FALSE}) in \code{y}. #'@param x Numeric design matrix with \code{length(y)} rows with predictors for #'\code{y}. Matrix \code{x} may have no missing values. #'@param wy Logical vector of length \code{length(y)}. A \code{TRUE} value #'indicates locations in \code{y} for which imputations are created. #'@param donors The size of the donor pool among which a draw is made. #'The default is \code{donors = 5L}. Setting \code{donors = 1L} always selects #'the closest match, but is not recommended. Values between 3L and 10L #'provide the best results in most cases (Morris et al, 2015). #'@param matchtype Type of matching distance. The default choice #'(\code{matchtype = 1L}) calculates the distance between #'the \emph{predicted} value of \code{yobs} and #'the \emph{drawn} values of \code{ymis} (called type-1 matching). #'Other choices are \code{matchtype = 0L} #'(distance between predicted values) and \code{matchtype = 2L} #'(distance between drawn values). #'@param ridge The ridge penalty used in \code{.norm.draw()} to prevent #'problems with multicollinearity. The default is \code{ridge = 1e-05}, #'which means that 0.01 percent of the diagonal is added to the cross-product. #'Larger ridges may result in more biased estimates. For highly noisy data #'(e.g. many junk variables), set \code{ridge = 1e-06} or even lower to #'reduce bias. For highly collinear data, set \code{ridge = 1e-04} or higher. #'@param \dots Other named arguments. #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Stef van Buuren, Karin Groothuis-Oudshoorn #'@details #' Imputation of \code{y} by predictive mean matching, based on #' van Buuren (2012, p. 73). The procedure is as follows: #' #'\enumerate{ #'\item{Calculate the cross-product matrix \eqn{S=X_{obs}'X_{obs}}.} #'\item{Calculate \eqn{V = (S+{diag}(S)\kappa)^{-1}}, with some small ridge #'parameter \eqn{\kappa}.} #'\item{Calculate regression weights \eqn{\hat\beta = VX_{obs}'y_{obs}.}} #'\item{Draw \eqn{q} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_1}.} #'\item{Calculate \eqn{V^{1/2}} by Cholesky decomposition.} #'\item{Calculate \eqn{\dot\beta = \hat\beta + \dot\sigma\dot z_1 V^{1/2}}.} #'\item{Calculate \eqn{\dot\eta(i,j)=|X_{{obs},[i]|}\hat\beta-X_{{mis},[j]}\dot\beta} #'with \eqn{i=1,\dots,n_1} and \eqn{j=1,\dots,n_0}.} #'\item{Construct \eqn{n_0} sets \eqn{Z_j}, each containing \eqn{d} candidate donors, from Y_{obs} such that \eqn{\sum_d\dot\eta(i,j)} is minimum for all \eqn{j=1,\dots,n_0}. Break ties randomly.} #'\item{Draw one donor \eqn{i_j} from \eqn{Z_j} randomly for \eqn{j=1,\dots,n_0}.} #'\item{Calculate imputations \eqn{\dot y_j = y_{i_j}} for \eqn{j=1,\dots,n_0}.} #'} #' #'The name \emph{predictive mean matching} was proposed by Little (1988). #' #'@references Little, R.J.A. (1988), Missing data adjustments in large surveys #'(with discussion), Journal of Business Economics and Statistics, 6, 287--301. #' #'Morris TP, White IR, Royston P (2015). Tuning multiple imputation by predictive #'mean matching and local residual draws. BMC Med Res Methodol. ;14:75. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-pmm.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #'Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@family univariate imputation functions #'@keywords datagen #'@examples #'# We normally call mice.impute.pmm() from within mice() #'# But we may call it directly as follows (not recommended) #' #'set.seed(53177) #'xname <- c('age', 'hgt', 'wgt') #'r <- stats::complete.cases(boys[, xname]) #'x <- boys[r, xname] #'y <- boys[r, 'tv'] #'ry <- !is.na(y) #'table(ry) #' #'# percentage of missing data in tv #'sum(!ry) / length(ry) #' #'# Impute missing tv data #'yimp <- mice.impute.pmm(y, ry, x) #'length(yimp) #'hist(yimp, xlab = 'Imputed missing tv') #' #'# Impute all tv data #'yimp <- mice.impute.pmm(y, ry, x, wy = rep(TRUE, length(y))) #'length(yimp) #'hist(yimp, xlab = 'Imputed missing and observed tv') #'plot(jitter(y), jitter(yimp), #' main = 'Predictive mean matching on age, height and weight', #' xlab = 'Observed tv (n = 224)', #' ylab = 'Imputed tv (n = 224)') #'abline(0, 1) #'cor(y, yimp, use = 'pair') #'@export mice.impute.pmm <- function(y, ry, x, wy = NULL, donors = 5L, matchtype = 1L, ridge = 1e-05, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) ynum <- y if (is.factor(y)) ynum <- as.integer(y) parm <- .norm.draw(ynum, ry, x, ridge = ridge, ...) if (matchtype == 0L) { yhatobs <- x[ry, , drop = FALSE] %*% parm$coef yhatmis <- x[wy, , drop = FALSE] %*% parm$coef } if (matchtype == 1L) { yhatobs <- x[ry, , drop = FALSE] %*% parm$coef yhatmis <- x[wy, , drop = FALSE] %*% parm$beta } if (matchtype == 2L) { yhatobs <- x[ry, , drop = FALSE] %*% parm$beta yhatmis <- x[wy, , drop = FALSE] %*% parm$beta } idx <- matcher(yhatobs, yhatmis, k = donors) return(y[ry][idx]) } #' Finds an imputed value from matches in the predictive metric (deprecated) #' #' This function finds matches among the observed data in the predictive #' mean metric. It selects the \code{donors} closest matches, randomly #' samples one of the donors, and returns the observed value of the #' match. #' #' This function is included for backward compatibility. It was #' used up to \code{mice 2.21}. The current \code{mice.impute.pmm()} #' function calls the faster \code{C} function \code{matcher} instead of #' \code{.pmm.match()}. #' #'@aliases .pmm.match #'@param z A scalar containing the predicted value for the current case #'to be imputed. #'@param yhat A vector containing the predicted values for all cases with an observed #'outcome. #'@param y A vector of \code{length(yhat)} elements containing the observed outcome #'@param donors The size of the donor pool among which a draw is made. The default is #'\code{donors = 5}. Setting \code{donors = 1} always selects the closest match. Values #'between 3 and 10 provide the best results. Note: This setting was changed from #'3 to 5 in version 2.19, based on simulation work by Tim Morris (UCL). #'@param \dots Other parameters (not used). #'@return A scalar containing the observed value of the selected donor. #'@author Stef van Buuren #'@rdname pmm.match #'@references #'Schenker N \& Taylor JMG (1996) Partially parametric techniques #'for multiple imputation. \emph{Computational Statistics and Data Analysis}, 22, 425-446. #' #'Little RJA (1988) Missing-data adjustments in large surveys (with discussion). #'\emph{Journal of Business Economics and Statistics}, 6, 287-301. #' #'@export .pmm.match <- function(z, yhat = yhat, y = y, donors = 5, ...) { d <- abs(yhat - z) f <- d > 0 a1 <- ifelse(any(f), min(d[f]), 1) d <- d + runif(length(d), 0, a1/10^10) if (donors == 1) return(y[which.min(d)]) donors <- min(donors, length(d)) donors <- max(donors, 1) ds <- sort.int(d, partial = donors) m <- sample(y[d <= ds[donors]], 1) return(m) } mice/R/pattern1.R0000644000176200001440000000333613416657163013272 0ustar liggesusers#'Datasets with various missing data patterns #' #'Four simple datasets with various missing data patterns #' #'Van Buuren (2012) uses these four artificial datasets to illustrate various #'missing data patterns. #' #'@name pattern #'@aliases pattern1 pattern2 pattern3 pattern4 #'@docType data #'@format \describe{ \item{list("pattern1")}{Data with a univariate missing #'data pattern} \item{list("pattern2")}{Data with a monotone missing data #'pattern} \item{list("pattern3")}{Data with a file matching missing data #'pattern} \item{list("pattern4")}{Data with a general missing data pattern} } #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/missing-data-pattern.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords datasets #'@examples #'require(lattice) #'require(MASS) #' #'pattern4 #' #'data <- rbind(pattern1, pattern2, pattern3, pattern4) #'mdpat <- cbind(expand.grid(rec = 8:1, pat = 1:4, var = 1:3), r=as.numeric(as.vector(is.na(data)))) #' #'types <- c("Univariate","Monotone","File matching","General") #'tp41 <- levelplot(r~var+rec|as.factor(pat), data=mdpat, #' as.table=TRUE, aspect="iso", #' shrink=c(0.9), #' col.regions = mdc(1:2), #' colorkey=FALSE, #' scales=list(draw=FALSE), #' xlab="", ylab="", #' between = list(x=1,y=0), #' strip = strip.custom(bg = "grey95", style = 1, #' factor.levels = types)) #'print(tp41) #' #'md.pattern(pattern4) #'p <- md.pairs(pattern4) #'p #' #'### proportion of usable cases #'p$mr/(p$mr+p$mm) #' #'### outbound statistics #'p$rm/(p$rm+p$rr) #' #' #'fluxplot(pattern2) #' #' NULL mice/R/quickpred.R0000644000176200001440000001331013416657163013514 0ustar liggesusers# --------------------------------QUICKPRED------------------------------------ #'Quick selection of predictors from the data #' #'Selects predictors according to simple statistics #' #'This function creates a predictor matrix using the variable selection #'procedure described in Van Buuren et al.~(1999, p.~687--688). The function is #'designed to aid in setting up a good imputation model for data with many #'variables. #' #'Basic workings: The procedure calculates for each variable pair (i.e. #'target-predictor pair) two correlations using all available cases per pair. #'The first correlation uses the values of the target and the predictor #'directly. The second correlation uses the (binary) response indicator of the #'target and the values of the predictor. If the largest (in absolute value) of #'these correlations exceeds \code{mincor}, the predictor will be added to the #'imputation set. The default value for \code{mincor} is 0.1. #' #'In addition, the procedure eliminates predictors whose proportion of usable #'cases fails to meet the minimum specified by \code{minpuc}. The default value #'is 0, so predictors are retained even if they have no usable case. #' #'Finally, the procedure includes any predictors named in the \code{include} #'argument (which is useful for background variables like age and sex) and #'eliminates any predictor named in the \code{exclude} argument. If a variable #'is listed in both \code{include} and \code{exclude} arguments, the #'\code{include} argument takes precedence. #' #'Advanced topic: \code{mincor} and \code{minpuc} are typically specified as #'scalars, but vectors and squares matrices of appropriate size will also work. #'Each element of the vector corresponds to a row of the predictor matrix, so #'the procedure can effectively differentiate between different target #'variables. Setting a high values for can be useful for auxiliary, less #'important, variables. The set of predictor for those variables can remain #'relatively small. Using a square matrix extends the idea to the columns, so #'that one can also apply cellwise thresholds. #' #'@param data Matrix or data frame with incomplete data. #'@param mincor A scalar, numeric vector (of size \code{ncol(data))} or numeric #'matrix (square, of size \code{ncol(data)} specifying the minimum #'threshold(s) against which the absolute correlation in the data is compared. #'@param minpuc A scalar, vector (of size \code{ncol(data))} or matrix (square, #'of size \code{ncol(data)} specifying the minimum threshold(s) for the #'proportion of usable cases. #'@param include A string or a vector of strings containing one or more #'variable names from \code{names(data)}. Variables specified are always #'included as a predictor. #'@param exclude A string or a vector of strings containing one or more #'variable names from \code{names(data)}. Variables specified are always #'excluded as a predictor. #'@param method A string specifying the type of correlation. Use #'\code{'pearson'} (default), \code{'kendall'} or \code{'spearman'}. Can be #'abbreviated. #'@return A square binary matrix of size \code{ncol(data)}. #'@author Stef van Buuren, Aug 2009 #'@seealso \code{\link{mice}}, \code{\link[=mids-class]{mids}} #'@references van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple #'imputation of missing blood pressure covariates in survival analysis. #'\emph{Statistics in Medicine}, \bold{18}, 681--694. #' #'van Buuren, S. and Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@keywords misc #'@examples #' #' #'# default: include all predictors with absolute correlation over 0.1 #'quickpred(nhanes) #' #'# all predictors with absolute correlation over 0.4 #'quickpred(nhanes, mincor=0.4) #' #'# include age and bmi, exclude chl #'quickpred(nhanes, mincor=0.4, inc=c('age','bmi'), exc='chl') #' #'# only include predictors with at least 30% usable cases #'quickpred(nhanes, minpuc=0.3) #' #'# use low threshold for bmi, and high thresholds for hyp and chl #'pred <- quickpred(nhanes, mincor=c(0,0.1,0.5,0.5)) #'pred #' #'# use it directly from mice #'imp <- mice(nhanes, pred=quickpred(nhanes, minpuc=0.25, include='age')) #' #'@export quickpred <- function(data, mincor = 0.1, minpuc = 0, include = "", exclude = "", method = "pearson") { # automatic predictor selection according to Van Buuren et al (1999) # argument checking data <- check.dataform(data) # initialize nvar <- ncol(data) predictorMatrix <- matrix(0, nrow = nvar, ncol = nvar, dimnames = list(names(data), names(data))) x <- data.matrix(data) r <- !is.na(x) # include predictors with 1) pairwise correlation among data 2) pairwise correlation of data with response indicator # higher than mincor suppressWarnings(v <- abs(cor(x, use = "pairwise.complete.obs", method = method))) v[is.na(v)] <- 0 suppressWarnings(u <- abs(cor(y = x, x = r, use = "pairwise.complete.obs", method = method))) u[is.na(u)] <- 0 maxc <- pmax(v, u) predictorMatrix[maxc > mincor] <- 1 # exclude predictors with a percentage usable cases below minpuc p <- md.pairs(data) puc <- p$mr/(p$mr + p$mm) predictorMatrix[puc < minpuc] <- 0 # exclude predictors listed in the exclude argument yz <- pmatch(exclude, names(data)) predictorMatrix[, yz] <- 0 # include predictors listed in the include argument yz <- pmatch(include, names(data)) predictorMatrix[, yz] <- 1 # some final processing diag(predictorMatrix) <- 0 predictorMatrix[colSums(!r) == 0, ] <- 0 return(predictorMatrix) } mice/R/cc.R0000644000176200001440000000427413416657163012123 0ustar liggesusers#'Select complete cases #' #'Extracts the complete cases, also known as \emph{listwise deletion}. #'\code{cc(x)} is similar to #'\code{na.omit(x)}, but returns an object of the same class #'as the input data. Dimensions are not dropped. For extracting #'incomplete cases, use \code{\link{ici}}. #' #'@param x An \code{R} object. Methods are available for classes #'\code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} #'could be a vector. #'@return A \code{vector}, \code{matrix} or \code{data.frame} containing the data of the complete cases. #'@author Stef van Buuren, 2017. #'@seealso \code{\link{na.omit}}, \code{\link{cci}}, \code{\link{ici}} #'@keywords univar #'@examples #' #'# cc(nhanes) # get the 13 complete cases #'# cc(nhanes$bmi) # extract complete bmi #'@export cc <- function(x) UseMethod("cc", x) #' @export cc.mids <- function(x) return(x$data[cci(x), , drop = FALSE]) #' @export cc.matrix <- function(x) return(x[cci(x), , drop = FALSE]) #' @export cc.data.frame <- function(x) return(x[cci(x), , drop = FALSE]) #' @export cc.default <- function(x) return(x[cci(x)]) #'Select incomplete cases #' #'Extracts incomplete cases from a data set. #'The companion function for selecting the complete cases is \code{\link{cc}}. #' #'@param x An \code{R} object. Methods are available for classes #'\code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} #'could be a vector. #'@return A \code{vector}, \code{matrix} or \code{data.frame} containing the data of the complete cases. #'@author Stef van Buuren, 2017. #'@seealso \code{\link{cc}}, \code{\link{ici}} #'@keywords univar #'@examples #' #'ic(nhanes) # get the 12 rows with incomplete cases #'ic(nhanes[1:10,]) # incomplete cases within the first ten rows #'ic(nhanes[, c("bmi", "hyp")]) # restrict extraction to variables bmi and hyp #' #'@export ic <- function(x) UseMethod("ic", x) #' @export ic.mids <- function(x) return(x$data[ici(x), , drop = FALSE]) #' @export ic.matrix <- function(x) return(x[ici(x), , drop = FALSE]) #' @export ic.data.frame <- function(x) return(x[ici(x), , drop = FALSE]) #' @export ic.default <- function(x) return(x[ici(x)]) mice/R/pool.r.squared.R0000644000176200001440000000761713416657163014416 0ustar liggesusers# --------------------------pool.r.squared-------------------------- #'Pooling: R squared #' #'Pools R^2 of m repeated complete data models. #' #'The function pools the coefficients of determination R^2 or the adjusted #'coefficients of determination (R^2_a) obtained with the \code{lm} modeling #'function. For pooling it uses the Fisher \emph{z}-transformation. #' #'@param object An object of class 'mira', produced by \code{lm.mids} or #'\code{with.mids} with \code{lm} as modeling function. #'@param adjusted A logical value. If adjusted=TRUE then the adjusted R^2 is #'calculated. The default value is FALSE. #'@return Returns a 1x4 table with components. Component \code{est} is the #'pooled R^2 estimate. Component \code{lo95} is the 95 \% lower bound of the pooled R^2. #'Component \code{hi95} is the 95 \% upper bound of the pooled R^2. #'Component \code{fmi} is the fraction of missing information due to nonresponse. #'@author Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 #'@seealso \code{\link{pool}},\code{\link{pool.scalar}} #'@references Harel, O (2009). The estimation of R^2 and adjusted R^2 in #'incomplete data sets using multiple imputation, Journal of Applied Statistics, #'36:1109-1118. #' #'Rubin, D.B. (1987). Multiple Imputation for Nonresponse in Surveys. New #'York: John Wiley and Sons. #' #'van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #' #'@keywords htest #'@examples #' #' #'imp<-mice(nhanes) #' #'fit<-lm.mids(chl~age+hyp+bmi,imp) #'pool.r.squared(fit) #'pool.r.squared(fit,adjusted=TRUE) #' #'#fit<-lm.mids(chl~age+hyp+bmi,imp) #'# #'#> pool.r.squared(fit) #'# est lo 95 hi 95 fmi #'#R^2 0.5108041 0.1479687 0.7791927 0.3024413 #'# #'#> pool.r.squared(fit,adjusted=TRUE) #'# est lo 95 hi 95 fmi #'#adj R^2 0.4398066 0.08251427 0.743172 0.3404165 #'# #' #' #'@export pool.r.squared <- function(object, adjusted = FALSE) { # pooled rsquared for multiple imputed datasets. # # object: object of class mira based on article of O. Harel (Journal of Applied Statistics, 2009). call <- match.call() if (!is.mira(object)) stop("The object must have class 'mira'") if ((m <- length(object$analyses)) < 2) stop("At least two imputations are needed for pooling.\n") if (class((object$analyses[[1]]))[1] != "lm") stop("r^2 can only be calculated for results of the 'lm' modeling function") # Set up array r2 to store R2 values, Fisher z-transformations of R2 values and its variance. analyses <- object$analyses m <- length(analyses) r2 <- matrix(NA, nrow = m, ncol = 3, dimnames = list(seq_len(m), c("R^2", "Fisher trans F^2", "se()"))) # Fill arrays for (i in seq_len(m)) { fit <- analyses[[i]] r2[i, 1] <- if (!adjusted) sqrt(summary(fit)$r.squared) else sqrt(summary(fit)$adj.r.squared) r2[i, 2] <- 0.5 * log((r2[i, 1] + 1)/(1 - r2[i, 1])) r2[i, 3] <- 1/(length(summary(fit)$residuals) - 3) } # Compute within, between and total variances following Rubin's rules. with function pool.scalar(). fit <- pool.scalar(r2[, 2], r2[, 3]) # Make table with results. qbar <- fit$qbar table <- array(((exp(2 * qbar) - 1)/(1 + exp(2 * qbar)))^2, dim = c(1, 4)) dimnames(table) <- if (!adjusted) list("R^2", c("est", "lo 95", "hi 95", "fmi")) else list("adj R^2", c("est", "lo 95", "hi 95", "fmi")) table[, 2] <- ((exp(2 * (qbar - 1.96 * sqrt(fit$t))) - 1)/(1 + exp(2 * (qbar - 1.96 * sqrt(fit$t)))))^2 table[, 3] <- ((exp(2 * (qbar + 1.96 * sqrt(fit$t))) - 1)/(1 + exp(2 * (qbar + 1.96 * sqrt(fit$t)))))^2 table[, 4] <- fit$f return(table) } mice/R/pool.R0000644000176200001440000001360713621065467012505 0ustar liggesusers#'Combine estimates by Rubin's rules #' #'The \code{pool()} function combines the estimates from \code{m} #'repeated complete data analyses. The typical sequence of steps to #'do a multiple imputation analysis is: #'\enumerate{ #'\item Impute the missing data by the \code{mice} function, resulting in #'a multiple imputed data set (class \code{mids}); #'\item Fit the model of interest (scientific model) on each imputed data set #'by the \code{with()} function, resulting an object of class \code{mira}; #'\item Pool the estimates from each model into a single set of estimates #'and standard errors, resulting is an object of class \code{mipo}; #'\item Optionally, compare pooled estimates from different scientific models #'by the \code{D1()} or \code{D3()} functions. #'} #'A common error is to reverse steps 2 and 3, i.e., to pool the #'multiply-imputed data instead of the estimates. Doing so may severely bias #'the estimates of scientific interest and yield incorrect statistical #'intervals and p-values. The \code{pool()} function will detect #'this case. #' #'The \code{pool()} function averages the estimates of the complete #'data model, computes the #'total variance over the repeated analyses by Rubin's rules #'(Rubin, 1987, p. 76), #'and computes the following diagnostic statistics per estimate: #'\enumerate{ #'\item Relative increase in variance due to nonresponse {\code{r}}; #'\item Residual degrees of freedom for hypothesis testing {\code{df}}; #'\item Proportion of total variance due to missingness {\code{lambda}}; #'\item Fraction of missing information {\code{fmi}}. #'} #' #'The function requires the following input from each fitted model: #'\enumerate{ #'\item the estimates of the model, usually obtainable by \code{coef()} #'\item the standard error of each estimate; #'\item the residual degrees of freedom of the model. #'} #'The \code{pool()} function relies on the \code{broom::tidy} and #'\code{broom::glance} function for extracting this information from a #'list of fitted models. #' #'The degrees of freedom calculation uses the Barnard-Rubin adjustment #'for small samples (Barnard and Rubin, 1999). #' #'@param object An object of class \code{mira} (produced by \code{with.mids()} #'or \code{as.mira()}), or a \code{list} with model fits. #'@param dfcom A positive number representing the degrees of freedom in the #'complete-data analysis. The default (\code{dfcom = NULL}) is to #'extract this information from the first fitted model. When that fails #'the warning \code{"Large sample assumed"} is printed, and the #'parameter is set \code{dfcom = 999999}. Use the \code{dfcom} parameter #'to specify the correct degrees of freedom. #'@return An object of class \code{mipo}, which stands for 'multiple imputation #'pooled outcome'. #'@seealso \code{\link{with.mids}}, \code{\link{as.mira}}, #'\code{\link[broom]{glance}}, \code{\link[broom]{tidy}} #'@references Barnard, J. and Rubin, D.B. (1999). Small sample degrees of #'freedom with multiple imputation. \emph{Biometrika}, 86, 948-955. #' #'Rubin, D.B. (1987). \emph{Multiple Imputation for Nonresponse in Surveys}. #'New York: John Wiley and Sons. #' #'van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@keywords htest #'@examples #'# pool using the classic MICE workflow #'imp <- mice(nhanes, maxit = 2, m = 2) #'fit <- with(data = imp, exp = lm(bmi ~ hyp + chl)) #'summary(pool(fit)) #'@export pool <- function (object, dfcom = NULL) { call <- match.call() if (!is.list(object)) stop("Argument 'object' not a list", call. = FALSE) object <- as.mira(object) m <- length(object$analyses) # deal with m = 1 fa <- getfit(object, 1) if (m == 1) { warning("Number of multiple imputations m = 1. No pooling done.") return(fa) } pooled <- pool.fitlist(getfit(object), dfcom = dfcom) rr <- list(call = call, m = m, pooled = pooled) class(rr) <- c("mipo", "data.frame") rr } pool.fitlist <- function (fitlist, dfcom = NULL) { v <- summary(fitlist, type = "glance") w <- summary(fitlist, type = "tidy", exponentiate = FALSE) # residual degrees of freedom of model fitted on hypothetically complete data # assumed to be the same across imputations if (!is.null(dfcom)) dfcom <- max(dfcom, 1) else { if (is.null(dfcom)) dfcom <- v$df.residual[1L] if (is.null(dfcom)) dfcom <- df.residual(getfit(fitlist, 1L)) if (is.null(dfcom)) { dfcom <- 999999 warning("Large sample assumed.") } } # combine y.level and term into term (for multinom) # if ("y.level" %in% names(w)) w$term <- paste(w$y.level, w$term, sep = ":") # y.level: multinom # component: broom.mixed # Rubin's rules for scalar estimates grp <- intersect(names(w), c("term", "y.level", "component")) # Note: group_by() changes the order of the terms, which is undesirable # We convert any parameter terms to factor to preserve ordering if ("term" %in% names(w)) w$term <- factor(w$term, levels = unique(w$term)) if ("y.level" %in% names(w)) w$y.level <- factor(w$y.level, levels = unique(w$y.level)) if ("component" %in% names(w)) w$component <- factor(w$component, levels = unique(w$component)) pooled <- w %>% group_by(!!!syms(grp)) %>% summarize(m = n(), qbar = mean(.data$estimate), ubar = mean(.data$std.error ^ 2), b = var(.data$estimate), t = .data$ubar + (1 + 1 / .data$m) * .data$b, dfcom = dfcom, df = barnard.rubin(.data$m, .data$b, .data$t, .data$dfcom), riv = (1 + 1 / .data$m) * .data$b / .data$ubar, lambda = (1 + 1 / .data$m) * .data$b / .data$t, fmi = (.data$riv + 2 / (.data$df + 3)) / (.data$riv + 1)) pooled <- data.frame(pooled) names(pooled)[names(pooled) == "qbar"] <- "estimate" pooled } mice/R/mice.impute.norm.boot.R0000644000176200001440000000227413416664706015670 0ustar liggesusers#'Imputation by linear regression, bootstrap method #' #'Imputes univariate missing data using linear regression with bootstrap #' #'@aliases mice.impute.norm.boot norm.boot #'@inheritParams mice.impute.pmm #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@details #'Draws a bootstrap sample from \code{x[ry,]} and \code{y[ry]}, calculates #'regression weights and imputes with normal residuals. #'@author Gerko Vink, Stef van Buuren, 2018 #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.norm.boot <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) n1 <- sum(ry) s <- sample(n1, n1, replace = TRUE) ss<-s dotxobs <- x[ry, , drop = FALSE][s, ] dotyobs <- y[ry][s] p <- estimice(dotxobs, dotyobs, ...) sigma <- sqrt((sum(p$r^2))/(n1 - ncol(x) - 1)) return(x[wy, ] %*% p$c + rnorm(sum(wy)) * sigma) }mice/R/bwplot.mads.R0000644000176200001440000001334113617553077013765 0ustar liggesusers# # -------------------------- bwplot.mads ------------------------------------- # #'Box-and-whisker plot of amputed and non-amputed data #' #'Plotting method to investigate the result of function \code{\link{ampute}}. #'the relation between the data variables and the amputed data. The function does #'not show which data is amputed. It does show how the amputed values are related #'to the variable values. #' #'@param x A \code{mads} (\code{\link{mads-class}}) object, typically created by #'\code{\link{ampute}}. #'@param data A string or vector of variable names that needs to be plotted. As #'a default, all variables will be plotted. #'@param which.pat A scalar or vector indicating which patterns need to be plotted. #'As a default, all patterns are plotted. #'@param standardized Logical. Whether the box-and-whisker plots need to be created #'from standardized data or not. Default is TRUE. #'@param descriptives Logical. Whether the mean, variance and n of the variables #'need to be printed. This is useful to examine the effect of the amputation. #'Default is TRUE. #'@param layout A vector of two values indicating how the boxplots of one pattern #'should be divided over the plot. For example, \code{c(2, 3)} indicates that the #'boxplots of six variables need to be placed on 3 rows and 2 columns. Default #'is 1 row and an amount of columns equal to #variables. Note that for more than #'6 variables, multiple plots will be created automatically. #'@param \dots Not used, but for consistency with generic #'@return A list containing the box-and-whisker plots. Note that a new pattern #'will always be shown in a new plot. #'@note The \code{mads} object contains all the information you need to #'make any desired plots. Check \code{\link{mads-class}} or the vignette \emph{Multivariate #'Amputation using Ampute} to understand the contents of class object \code{mads}. #'@author Rianne Schouten, 2016 #'@seealso \code{\link{ampute}}, \code{\link{bwplot}}, \code{\link{Lattice}} for #'an overview of the package, \code{\link{mads-class}} #'@export bwplot.mads <- function(x, data, which.pat = NULL, standardized = TRUE, descriptives = TRUE, layout = NULL, ...) { if (!is.mads(x)) { stop("Object is not of class mads") } if (missing(data)) data <- NULL yvar <- data if (is.null(yvar)) { varlist <- colnames(x$amp) } else { varlist <- yvar } if (is.null(which.pat)) { pat <- nrow(x$patterns) which.pat <- seq_len(pat) } else { pat <- length(which.pat) } formula <- as.formula(paste0(paste0(varlist, collapse = "+"), "~ factor(.amp)")) data <- NULL if (standardized) { dat <- data.frame(scale(x$data)) } else { dat <- x$data } if (is.null(layout)) { if (ceiling(length(varlist) / 2) > 6) { layout <- c(6, 1) } else { layout <- c(length(varlist), 1) } } for (i in seq_len(pat)){ can <- which(x$cand == which.pat[i]) mis <- matrix(NA, nrow = length(can), ncol = 2) nc <- which(x$patterns[which.pat[i], ] == 0) if (length(nc) > 1){ mis[apply(is.na(x$amp[can, nc]), 1, all), 1] <- "Amp" mis[is.na(mis[, 1]), 1] <- "Non-Amp" } else if (length(nc) == 1) { mis[is.na(x$amp[can, nc]), 1] <- "Amp" mis[is.na(mis[, 1]), 1] <- "Non-Amp" } mis[, 2] <- rep.int(which.pat[i], length(can)) data <- rbind(data, cbind(mis, dat[can, ])) } colnames(data) <- c(".amp", ".pat", varlist) p <- list() vec1 <- c() vec3 <- c() for (i in seq_along(which.pat)) { vec1[((i*2)-1):(i*2)] <- rep.int(paste(which.pat[i]), 2) } vec3 <- paste("", varlist) var <- length(varlist) if (descriptives) { desc <- array(NA, dim = c(2 * length(which.pat), 4, var), dimnames = list(Pattern = vec1, Descriptives = c("Amp", "Mean", "Var", "N"), Variable = vec3)) desc[, 1, ] <- rep.int(rep.int(c(1, 0), length(which.pat)), var) for (i in seq_along(which.pat)) { wp <- which.pat[i] desc[(i*2) - 1, 2, ] <- round(vapply(varlist, function(x) mean(data[data$.pat == wp & data$.amp == "Amp", x]), numeric(1)), 5) desc[(i*2), 2, ] <- round(vapply(varlist, function(x) mean(data[data$.pat == wp & data$.amp == "Non-Amp", x]), numeric(1)), 5) desc[(i*2) - 1, 3, ] <- round(vapply(varlist, function(x) var(data[data$.pat == wp & data$.amp == "Amp", x]), numeric(1)), 5) desc[(i*2), 3, ] <- round(vapply(varlist, function(x) var(data[data$.pat == wp & data$.amp == "Non-Amp", x]), numeric(1)), 5) desc[(i*2) - 1, 4, ] <- vapply(varlist, function(x) length(data[data$.pat == wp & data$.amp == "Amp", x]), numeric(1)) desc[(i*2), 4, ] <- vapply(varlist, function(x) length(data[data$.pat == wp & data$.amp == "Non-Amp", x]), numeric(1)) } p[["Descriptives"]] <- desc } theme <- list(superpose.symbol = list(col = "black", pch = 1), superpose.line = list(col = "black", lwd = 1), box.dot = list(col = "black"), box.rectangle = list(col = "black"), box.umbrella = list(col = "black"), box.symbol = list(col = "black"), plot.symbol = list(col = "black", pch = 1), plot.line = list(col = "black"), strip.background = list(col = "grey95")) for (i in seq_len(pat)) { p[[paste("Boxplot pattern", which.pat[i])]] <- bwplot(x = formula, data = data[data$.pat == which.pat[i], ], multiple = TRUE, outer = TRUE, layout = layout, ylab = "", par.settings = theme, xlab = paste("Data distributions in pattern", which.pat[i])) } return(p) } mice/R/mice.impute.passive.R0000644000176200001440000000240013416657163015413 0ustar liggesusers#'Passive imputation #' #'Calculate new variable during imputation #' #'@param data A data frame #'@param func A \code{formula} specifying the transformations on data #'@return The result of applying \code{formula} #'@details #'Passive imputation is a special internal imputation function. Using this #'facility, the user can specify, at any point in the \code{mice} Gibbs #'sampling algorithm, a function on the imputed data. This is useful, for #'example, to compute a cubic version of a variable, a transformation like #'\code{Q = W/H^2} based on two variables, or a mean variable like #'\code{(x_1+x_2+x_3)/3}. The so derived variables might be used in other #'places in the imputation model. The function allows to dynamically derive #'virtually any function of the imputed data at virtually any time. #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #'@seealso \code{\link{mice}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords datagen #'@export mice.impute.passive <- function(data, func) { return(model.frame(func, data)) } mice/R/supports.transparent.R0000644000176200001440000000136513617527572015776 0ustar liggesusers#'Supports semi-transparent foreground colors? #' #'This function is used by \code{mdc()} to find out whether the current device #'supports semi-transparent foreground colors. #' #'The function calls the function \code{dev.capabilities()} from the package #'\code{grDevices}. The function return \code{FALSE} if the status of the #'current device is unknown. #' #'@aliases supports.transparent transparent #'@return \code{TRUE} or \code{FALSE} #'@seealso \code{\link{mdc}} \code{\link{dev.capabilities}} #'@keywords hplot #'@examples #' #'supports.transparent() #' #'@export supports.transparent <- function() { query <- grDevices::dev.capabilities("semiTransparency")$semiTransparency if (is.na(query)) query <- FALSE return(query) } mice/R/complete.R0000644000176200001440000001242013620062060013315 0ustar liggesusers#'Extracts the completed data from a \code{mids} object #' #'Takes an object of class \code{mids}, fills in the missing data, and returns #'the completed data in a specified format. #' #'@aliases complete #'@param data An object of class \code{mids} as created by the function #'\code{mice()}. #'@param action A numeric vector or a keyword. Numeric #'values between 1 and \code{data$m} return the data with #'imputation number \code{action} filled in. The value of \code{action = 0} #'return the original data, with missing values. \code{action} can #'also be one of the following keywords: \code{"all"}, \code{"long"}, #'\code{"broad"} and \code{"repeated"}. See the Details section #'for the interpretation. #'The default is \code{action = 1L} returns the first imputed data set. #'@param include A logical to indicate whether the original data with the missing #'values should be included. #'@param mild A logical indicating whether the return value should #'always be an object of class \code{mild}. Setting \code{mild = TRUE} #'overrides \code{action} keywords \code{"long"}, \code{"broad"} #'and \code{"repeated"}. The default is \code{FALSE}. #'@param \dots Additional arguments. Not used. #'@return Complete data set with missing values replaced by imputations. #'A \code{data.frame}, or a list of data frames of class \code{mild}. #'@details #'The argument \code{action} can be length-1 character, which is #'matched to one of the following keywords: #'\describe{ #'\item{\code{"all"}}{produces a \code{mild} object of imputed data sets. When #'\code{include = TRUE}, then the original data are appended as the first list #'element;} #'\item{\code{"long"}}{ produces a data set where imputed data sets #'are stacked vertically. The columns are added: 1) \code{.imp}, integer, #'referring the imputation number, and 2) \code{.id}, character, the row #'names of \code{data$data};} #'\item{\code{"stacked"}}{ same as \code{"long"} but without the two #'additional columns;} #'\item{\code{"broad"}}{ produces a data set with where imputed data sets #'are stacked horizontally. Columns are ordered as in the original data. #'The imputation number is appended to each column name;} #'\item{\code{"repeated"}}{ same as \code{"broad"}, but with #'columns in a different order.} #'} #'@note #'Technical note: \code{mice 3.7.5} renamed the \code{complete()} function #'to \code{complete.mids()} and exported it as an S3 method of the #'generic \code{tidyr::complete()}. Name clashes between #'\code{mice::complete()} and \code{tidyr::complete()} should no #'longer occur. #'@seealso \code{\link{mice}}, \code{\link[=mids-class]{mids}} #'@keywords manip #'@examples #' #'# obtain first imputed data set #'sum(is.na(nhanes2)) #'imp <- mice(nhanes2, print = FALSE, maxit = 1) #'dat <- complete(imp) #'sum(is.na(dat)) #' #'# obtain stacked third and fifth imputation #'dat <- complete(imp, c(3, 5)) #' #'# obtain all datasets, with additional identifiers #'head(complete(imp, "long")) #' #'# same, but now as list, mild object #'dslist <- complete(imp, "all") #'length(dslist) #' #'# same, but also include the original data #'dslist <- complete(imp, "all", include = TRUE) #'length(dslist) #' #'# select original + 3 + 5, store as mild #'dslist <- complete(imp, c(0, 3, 5), mild = TRUE) #'names(dslist) #' #'@export complete.mids <- function(data, action = 1L, include = FALSE, mild = FALSE, ...) { if (!is.mids(data)) stop("'data' not of class 'mids'") m <- as.integer(data$m) if (is.numeric(action)) { action <- as.integer(action) idx <- action[action >= 0L & action <= m] if (include && all(idx != 0L)) idx <- c(0L, idx) shape <- ifelse(mild, "mild", "stacked") } else if (is.character(action)) { if (include) idx <- 0L:m else idx <- 1L:m shape <- match.arg(action, c("all", "long", "broad", "repeated", "stacked")) shape <- ifelse(shape == "all" || mild, "mild", shape) } else stop("'action' not recognized") mylist <- vector("list", length = length(idx)) for (j in seq_along(idx)) mylist[[j]] <- single.complete(data$data, data$where, data$imp, idx[j]) if (shape == "stacked") return(bind_rows(mylist)) if (shape == "mild") { names(mylist) <- as.character(idx) class(mylist) <- c("mild", "list") return(mylist) } if (shape == "long") { cmp <- bind_rows(mylist) cmp <- data.frame(.imp = rep(idx, each = nrow(data$data)), .id = rep.int(1L:nrow(data$data), length(idx)), cmp) if (is.integer(attr(data$data, "row.names"))) row.names(cmp) <- seq_len(nrow(cmp)) else row.names(cmp) <- as.character(seq_len(nrow(cmp))) return(cmp) } # must be broad or repeated cmp <- bind_cols(mylist) names(cmp) <- paste(rep.int(names(data$data), m), rep.int(idx, rep.int(ncol(data$data), length(idx))), sep = ".") if (shape == "broad") return(cmp) else return(cmp[, order(rep.int(seq_len(ncol(data$data)), length(idx)))]) } single.complete <- function(data, where, imp, ell) { if (ell == 0L) return(data) if (is.null(where)) where <- is.na(data) idx <- seq_len(ncol(data))[apply(where, 2, any)] for (j in idx) { if (is.null(imp[[j]])) data[where[, j], j] <- NA else data[where[, j], j] <- imp[[j]][, ell] } data } mice/R/ampute.default.R0000644000176200001440000001162413416657163014451 0ustar liggesusers# # ------------------------- ampute.default.patterns -------------------------- # #'Default \code{patterns} in \code{ampute} #' #'This function creates a default pattern matrix for the multivariate #'amputation function \code{ampute()}. #' #'@param n A scalar specifying the #variables in the data. #'@return A square matrix of size #variables where \code{0} indicates a variable # should have missing values and \code{1} indicates a variable should remain # complete. Each pattern has missingness on one variable only. #'@seealso \code{\link{ampute}}, \code{\link{md.pattern}} #'@author Rianne Schouten, 2016 #'@keywords internal #'@export ampute.default.patterns <- function(n) { patterns.list <- lapply(seq_len(n), function(i) c(rep.int(1, i - 1), 0, rep.int(1, n - i))) return(do.call(rbind, patterns.list)) } # # ------------------------- ampute.default.freq ------------------------------ # #'Default \code{freq} in \code{ampute} #' #'Defines the default relative frequency vector for the multivariate #'amputation function \code{ampute}. #' #'@param patterns A matrix of size #patterns by #variables where \code{0} indicates #'a variable should have missing values and \code{1} indicates a variable should #'remain complete. Could be the result of \code{\link{ampute.default.patterns}}. #'@return A vector of length #patterns containing the relative frequencies with #'which the patterns should occur. An equal probability is given to each pattern. #'@seealso \code{\link{ampute}}, \code{\link{ampute.default.patterns}} #'@author Rianne Schouten, 2016 #'@keywords internal #'@export ampute.default.freq <- function(patterns) { freq <- rep.int(1 / nrow(patterns), nrow(patterns)) return(freq) } # # --------------------------- ampute.default.weights ------------------------- # #'Default \code{weights} in \code{ampute} #' #'Defines the default weights matrix for the multivariate amputation function #'\code{ampute}. #' #'@param patterns A matrix of size #patterns by #variables where \code{0} indicates #'a variable should have missing values and \code{1} indicates a variable should #'remain complete. Could be the result of \code{\link{ampute.default.patterns}}. #'@param mech A string specifying the missingness mechanism. #'@return A matrix of size #patterns by #variables containing the weights that #'will be used to calculate the weighted sum scores. Equal weights are given to #'all variables. When mechanism is MAR, variables that will be amputed will be #'weighted with \code{0}. If it is MNAR, variables that will be observed #'will be weighted with \code{0}. If mechanism is MCAR, the weights matrix will #'not be used. A default MAR matrix will be returned. #'@seealso \code{\link{ampute}}, \code{\link{ampute.default.patterns}} #'@author Rianne Schouten, 2016 #'@keywords internal #'@export ampute.default.weights <- function(patterns, mech) { weights <- matrix(data = 1, nrow = nrow(patterns), ncol = ncol(patterns)) if (mech != "MNAR") { weights <- matrix(data = 1, nrow = nrow(patterns), ncol = ncol(patterns)) weights[patterns == 0] <- 0 } else { weights <- matrix(data = 0, nrow = nrow(patterns), ncol = ncol(patterns)) weights[patterns == 0] <- 1 } return(weights) } # # -------------------------- ampute.default.type ----------------------------- # #'Default \code{type} in \code{ampute()} #' #'Defines the default type vector for the multivariate amputation function #'\code{ampute}. #' #'@param patterns A matrix of size #patterns by #variables where 0 indicates a #'variable should have missing values and 1 indicates a variable should remain #'complete. Could be the result of \code{\link{ampute.default.patterns}}. #'@return A string vector of length #patterns containing the missingness types. #'Each pattern will be amputed with a "RIGHT" missingness. #'@seealso \code{\link{ampute}}, \code{\link{ampute.default.patterns}} #'@author Rianne Schouten, 2016 #'@keywords internal #'@export ampute.default.type <- function(patterns) { type <- rep.int("RIGHT", nrow(patterns)) return(type) } # # ---------------------------- ampute.default.odds --------------------------- # #'Default \code{odds} in \code{ampute()} #' #'Defines the default odds matrix for the multivariate amputation function #'\code{ampute}. #' #'@param patterns A matrix of size #patterns by #variables where 0 indicates a #'variable should have missing values and 1 indicates a variable should remain #'complete. Could be the result of \code{\link{ampute.default.patterns}}. #'@return A matrix where #rows equals #patterns. Default is 4 quantiles with odds #'values 1, 2, 3 and 4, for each pattern, imitating a RIGHT type of missingness. #'@seealso \code{\link{ampute}}, \code{\link{ampute.default.patterns}} #'@author Rianne Schouten, 2016 #'@keywords internal #'@export ampute.default.odds <- function(patterns) { odds <- matrix(seq_len(4), nrow = nrow(patterns), ncol = 4, byrow = TRUE) return(odds) } mice/R/bwplot.R0000644000176200001440000003034213617553571013041 0ustar liggesusers#'Box-and-whisker plot of observed and imputed data #' #'Plotting methods for imputed data using \pkg{lattice}. \code{bwplot} #'produces box-and-whisker plots. The function #'automatically separates the observed and imputed data. The #'functions extend the usual features of \pkg{lattice}. #' #'The argument \code{na.groups} may be used to specify (combinations of) #'missingness in any of the variables. The argument \code{groups} can be used #'to specify groups based on the variable values themselves. Only one of both #'may be active at the same time. When both are specified, \code{na.groups} #'takes precedence over \code{groups}. #' #'Use the \code{subset} and \code{na.groups} together to plots parts of the #'data. For example, select the first imputed data set by by #'\code{subset=.imp==1}. #' #'Graphical parameters like \code{col}, \code{pch} and \code{cex} can be #'specified in the arguments list to alter the plotting symbols. If #'\code{length(col)==2}, the color specification to define the observed and #'missing groups. \code{col[1]} is the color of the 'observed' data, #'\code{col[2]} is the color of the missing or imputed data. A convenient color #'choice is \code{col=mdc(1:2)}, a transparent blue color for the observed #'data, and a transparent red color for the imputed data. A good choice is #'\code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the #'duration of the session by running \code{mice.theme()}. #' #'@aliases bwplot #'@param x A \code{mids} object, typically created by \code{mice()} or #'\code{mice.mids()}. #'@param data Formula that selects the data to be plotted. This argument #'follows the \pkg{lattice} rules for \emph{formulas}, describing the primary #'variables (used for the per-panel display) and the optional conditioning #'variables (which define the subsets plotted in different panels) to be used #'in the plot. #' #'The formula is evaluated on the complete data set in the \code{long} form. #'Legal variable names for the formula include \code{names(x$data)} plus the #'two administrative factors \code{.imp} and \code{.id}. #' #'\bold{Extended formula interface:} The primary variable terms (both the LHS #'\code{y} and RHS \code{x}) may consist of multiple terms separated by a #'\sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be #'taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and #'\code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in #'\emph{separate panels}. This behavior differs from standard \pkg{lattice}. #'\emph{Only combine terms of the same type}, i.e. only factors or only #'numerical variables. Mixing numerical and categorical data occasionally #'produces odds labeling of vertical axis. #' #'For convenience, in \code{stripplot()} and \code{bwplot} the formula #'\code{y~.imp} may be abbreviated as \code{y}. This applies only to a single #'\code{y}, and does not (yet) work for \code{y1+y2~.imp}. #' #'@param na.groups An expression evaluating to a logical vector indicating #'which two groups are distinguished (e.g. using different colors) in the #'display. The environment in which this expression is evaluated in the #'response indicator \code{is.na(x$data)}. #' #'The default \code{na.group = NULL} contrasts the observed and missing data #'in the LHS \code{y} variable of the display, i.e. groups created by #'\code{is.na(y)}. The expression \code{y} creates the groups according to #'\code{is.na(y)}. The expression \code{y1 & y2} creates groups by #'\code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as #'\code{is.na(y1) | is.na(y2)}, and so on. #'@param groups This is the usual \code{groups} arguments in \pkg{lattice}. It #'differs from \code{na.groups} because it evaluates in the completed data #'\code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas #'\code{na.groups} evaluates in the response indicator. See #'\code{\link{xyplot}} for more details. When both \code{na.groups} and #'\code{groups} are specified, \code{na.groups} takes precedence, and #'\code{groups} is ignored. #'@param theme A named list containing the graphical parameters. The default #'function \code{mice.theme} produces a short list of default colors, line #'width, and so on. The extensive list may be obtained from #'\code{trellis.par.get()}. Global graphical parameters like \code{col} or #'\code{cex} in high-level calls are still honored, so first experiment with #'the global parameters. Many setting consists of a pair. For example, #'\code{mice.theme} defines two symbol colors. The first is for the observed #'data, the second for the imputed data. The theme settings only exist during #'the call, and do not affect the trellis graphical parameters. #'@param mayreplicate A logical indicating whether color, line widths, and so #'on, may be replicated. The graphical functions attempt to choose #'"intelligent" graphical parameters. For example, the same color can be #'replicated for different element, e.g. use all reds for the imputed data. #'Replication may be switched off by setting the flag to \code{FALSE}, in order #'to allow the user to gain full control. #'@param as.table See \code{\link[lattice:xyplot]{xyplot}}. #'@param outer See \code{\link[lattice:xyplot]{xyplot}}. #'@param allow.multiple See \code{\link[lattice:xyplot]{xyplot}}. #'@param drop.unused.levels See \code{\link[lattice:xyplot]{xyplot}}. #'@param subscripts See \code{\link[lattice:xyplot]{xyplot}}. #'@param subset See \code{\link[lattice:xyplot]{xyplot}}. #'@param \dots Further arguments, usually not directly processed by the #'high-level functions documented here, but instead passed on to other #'functions. #'@return The high-level functions documented here, as well as other high-level #'Lattice functions, return an object of class \code{"trellis"}. The #'\code{\link[lattice:update.trellis]{update}} method can be used to #'subsequently update components of the object, and the #'\code{\link[lattice:print.trellis]{print}} method (usually called by default) #'will plot it on an appropriate plotting device. #'@note The first two arguments (\code{x} and \code{data}) are reversed #'compared to the standard Trellis syntax implemented in \pkg{lattice}. This #'reversal was necessary in order to benefit from automatic method dispatch. #' #'In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas #'in \pkg{lattice} the argument \code{x} is always a formula. #' #'In \pkg{mice} the argument \code{data} is always a formula object, whereas in #'\pkg{lattice} the argument \code{data} is usually a data frame. #' #'All other arguments have identical interpretation. #' #'@author Stef van Buuren #'@seealso \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{densityplot}}, #'\code{\link{stripplot}}, \code{\link{lattice}} for an overview of the #'package, as well as \code{\link[lattice:bwplot]{bwplot}}, #'\code{\link[lattice:panel.bwplot]{panel.bwplot}}, #'\code{\link[lattice:print.trellis]{print.trellis}}, #'\code{\link[lattice:trellis.par.set]{trellis.par.set}} #'@references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #'Visualization with R}, Springer. #' #'van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@keywords hplot #'@examples #' #'imp <- mice(boys, maxit=1) #' #'### box-and-whisker plot per imputation of all numerical variables #'bwplot(imp) #' #'### tv (testicular volume), conditional on region #'bwplot(imp, tv~.imp|reg) #' #'### same data, organized in a different way #'bwplot(imp, tv~reg|.imp, theme=list()) #'@export bwplot.mids <- function(x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), mayreplicate = TRUE, allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), ..., subscripts = TRUE, subset = TRUE) { call <- match.call() if (!is.mids(x)) stop("Argument 'x' must be a 'mids' object") ## unpack data and response indicator cd <- data.frame(complete(x, "long", include=TRUE)) cd$.imp <- as.factor(cd$.imp) r <- as.data.frame(is.na(x$data)) ## evaluate na.group in response indicator nagp <- eval(expr=substitute(na.groups), envir=r, enclos=parent.frame()) if (is.expression(nagp)) nagp <- eval(expr=nagp, envir=r, enclos=parent.frame()) ## evaluate groups in imputed data ngp <- eval(expr=substitute(groups), envir=cd, enclos=parent.frame()) if (is.expression(ngp)) ngp <- eval(expr=ngp, envir=cd, enclos=parent.frame()) groups <- ngp ## evaluate subset in imputed data ss <- eval(expr=substitute(subset), envir=cd, enclos=parent.frame()) if (is.expression(ss)) ss <- eval(expr=ss, envir=cd, enclos=parent.frame()) subset <- ss ## evaluate further arguments before parsing dots <- list(...) args <- list(allow.multiple = allow.multiple, outer = outer, drop.unused.levels = drop.unused.levels, subscripts = subscripts, as.table = as.table) ## create formula if not given (in call$data !) vnames <- names(cd)[-seq_len(2)] allfactors <- unlist(lapply(cd,is.factor))[-seq_len(2)] if (missing(data)) { vnames <- vnames[!allfactors] formula <- as.formula(paste(paste(vnames,collapse="+",sep=""),"~.imp",sep="")) } else { ## pad abbreviated formula abbrev <- length(grep("~", call$data))==0 if (abbrev) { if (length(call$data)>1) stop("Cannot pad extended formula.") else formula <- as.formula(paste(call$data,"~.imp",sep="")) } else { formula <- data } } ## determine the y-variables form <- lattice::latticeParseFormula(model=formula, data=cd, subset = subset, groups = groups, multiple = allow.multiple, outer = outer, subscripts = TRUE, drop = drop.unused.levels) ynames <- unlist(lapply(strsplit(form$left.name," \\+ "), rm.whitespace)) ## Jul2011 xnames <- unlist(lapply(strsplit(form$right.name," \\+ "), rm.whitespace)) ## Jul2011 ## groups is not useful in bwplot ## in order to force subgroup analysis, ## make the observed data in .imp>0 missing data nona <- is.null(call$na.groups) if (!is.null(call$groups) & nona) gp <- call$groups else { if (nona) { for (i in seq_along(ynames)) { yvar <- ynames[i] select <- cd$.imp!=0 & !r[,yvar] cd[select, yvar] <- NA } } else { for (i in seq_along(ynames)) { yvar <- ynames[i] select <- cd$.imp!=0 & !nagp cd[select, yvar] <- NA } } } ## replicate color 2 if .imp is part of xnames mustreplicate <- !is.na(match(".imp",xnames)) & mayreplicate if (mustreplicate) { theme$box.dot$col <- rep(theme$box.dot$col[1:2], c(1,x$m)) theme$box.rectangle$col <- rep(theme$box.rectangle$col[1:2], c(1,x$m)) theme$box.umbrella$col <- rep(theme$box.rectangle$col[1:2], c(1,x$m)) theme$plot.symbol$col <- mdc(3) theme$plot.symbol$pch <- 1 } ## change axis defaults of extended formula interface if (is.null(call$xlab) && !is.na(match(".imp",xnames))) { dots$xlab <- "" if (length(xnames)==1) dots$xlab <- "Imputation number" } if (is.null(call$ylab)) { args$ylab <- "" if (length(ynames)==1) args$ylab <- ynames } if (is.null(call$scales)) { args$scales <- list() if (length(ynames)>1) args$scales <- list(x=list(relation="free"), y=list(relation="free")) } ## ready args <- c(x = formula, data = list(cd), groups = list(groups), args, dots, subset = call$subset) ## go tp <- do.call("bwplot", args) tp <- update(tp, par.settings = theme) return(tp) } mice/R/sampler.R0000644000176200001440000001733113501515514013163 0ustar liggesuserssampler <- function(data, m, where, imp, blocks, method, visitSequence, predictorMatrix, formulas, blots, post, fromto, printFlag, ...) # The sampler controls the actual Gibbs sampling iteration scheme. # This function is called by mice and mice.mids { from <- fromto[1] to <- fromto[2] maxit <- to - from + 1 r <- !is.na(data) # set up array for convergence checking chainMean <- chainVar <- initialize.chain(blocks, maxit, m) ## THE MAIN LOOP: GIBBS SAMPLER ## if (maxit < 1) iteration <- 0 if (maxit >= 1) { if (printFlag) cat("\n iter imp variable") for (k in from:to) { # begin k loop : main iteration loop iteration <- k for (i in seq_len(m)) { # begin i loop: repeated imputation loop if (printFlag) cat("\n ", iteration, " ", i) # prepare the i'th imputation # do not overwrite any observed data for (h in visitSequence) { for (j in blocks[[h]]) { y <- data[, j] ry <- r[, j] wy <- where[, j] data[(!ry) & wy, j] <- imp[[j]][(!ry)[wy], i] } } # impute block-by-block for (h in visitSequence) { ct <- attr(blocks, "calltype") calltype <- ifelse(length(ct) == 1, ct[1], ct[h]) b <- blocks[[h]] if (calltype == "formula") ff <- formulas[[h]] else ff <- NULL if (calltype == "type") type <- predictorMatrix[h, ] else type <- NULL user <- blots[[h]] # univariate/multivariate logic theMethod <- method[h] empt <- theMethod == "" univ <- !empt && !is.passive(theMethod) && !handles.format(paste0("mice.impute.", theMethod)) mult <- !empt && !is.passive(theMethod) && handles.format(paste0("mice.impute.", theMethod)) pass <- !empt && is.passive(theMethod) && length(blocks[[h]]) == 1 if (printFlag & !empt) cat(" ", b) ## store current state oldstate <- get("state", pos = parent.frame()) newstate <- list(it = k, im = i, dep = h, meth = theMethod, log = oldstate$log) assign("state", newstate, pos = parent.frame(), inherits = TRUE) # (repeated) univariate imputation - type method if (univ) { for (j in b) { imp[[j]][, i] <- sampler.univ(data = data, r = r, where = where, type = type, formula = ff, method = theMethod, yname = j, k = k, calltype = calltype, user = user, ...) data[(!r[, j]) & where[, j], j] <- imp[[j]][(!r[, j])[where[, j]], i] # optional post-processing cmd <- post[j] if (cmd != "") { eval(parse(text = cmd)) data[where[, j], j] <- imp[[j]][, i] } } } # multivariate imputation - type and formula if (mult) { mis <- !r mis[, setdiff(colnames(data), b)] <- FALSE data[mis] <- NA fm <- paste("mice.impute", theMethod, sep = ".") if (calltype == "formula") imputes <- do.call(fm, args = list(data = data, formula = ff, ...)) else if (calltype == "type") imputes <- do.call(fm, args = list(data = data, type = type, ...)) else stop("Cannot call function of type ", calltype, call. = FALSE) if (is.null(imputes)) stop("No imputations from ", theMethod, h, call. = FALSE) for (j in names(imputes)) { imp[[j]][, i] <- imputes[[j]] data[!r[, j], j] <- imp[[j]][, i] } } # passive imputation if (pass) { for (j in b) { wy <- where[, j] ry <- r[, j] imp[[j]][, i] <- model.frame(as.formula(theMethod), data[wy, ], na.action = na.pass) data[(!ry) & wy, j] <- imp[[j]][(!ry)[wy], i] } } } # end h loop (blocks) } # end i loop (imputation number) # store means and sd of m imputes k2 <- k - from + 1L if (length(visitSequence) > 0L) { for (h in visitSequence) { for (j in blocks[[h]]) { if (!is.factor(data[, j])) { chainVar[j, k2, ] <- apply(imp[[j]], 2L, var, na.rm = TRUE) chainMean[j, k2, ] <- colMeans(as.matrix(imp[[j]]), na.rm = TRUE) } if (is.factor(data[, j])) { for (mm in seq_len(m)) { nc <- as.integer(factor(imp[[j]][, mm], levels = levels(data[, j]))) chainVar[j, k2, mm] <- var(nc, na.rm = TRUE) chainMean[j, k2, mm] <- mean(nc, na.rm = TRUE) } } } } } } # end main iteration if (printFlag) { r <- get("loggedEvents", parent.frame(1)) ridge.used <- any(grepl("A ridge penalty", r$out)) if (ridge.used) { cat("\n * Please inspect the loggedEvents \n") } else { cat("\n") } } } return(list(iteration = maxit, imp = imp, chainMean = chainMean, chainVar = chainVar)) } sampler.univ <- function(data, r, where, type, formula, method, yname, k, calltype = "type", user, ...) { j <- yname[1L] if (calltype == "type") { vars <- colnames(data)[type != 0] pred <- setdiff(vars, j) if (length(pred) > 0L) { formula <- reformulate(pred, response = j) formula <- update(formula, ". ~ . ") } else formula <- as.formula(paste0(j, " ~ 1")) } if (calltype == "formula") { # move terms other than j from lhs to rhs ymove <- setdiff(lhs(formula), j) formula <- update(formula, paste(j, " ~ . ")) if (length(ymove) > 0L) formula <- update(formula, paste("~ . + ", paste(ymove, collapse = "+"))) } # get the model matrix x <- obtain.design(data, formula) # expand type vector to model matrix, remove intercept if (calltype == "type") { type <- type[labels(terms(formula))][attr(x, "assign")] x <- x[, -1L, drop = FALSE] names(type) <- colnames(x) } if (calltype == "formula") { x <- x[, -1L, drop = FALSE] type <- rep(1L, length = ncol(x)) names(type) <- colnames(x) } # define y, ry and wy y <- data[, j] ry <- complete.cases(x, y) & r[, j] wy <- complete.cases(x) & where[, j] # nothing to impute if (all(!wy)) return(numeric(0)) cc <- wy[where[, j]] if (k == 1L) check.df(x, y, ry) # remove linear dependencies keep <- remove.lindep(x, y, ry, ...) x <- x[, keep, drop = FALSE] type <- type[keep] if (ncol(x) != length(type)) stop("Internal error: length(type) != number of predictors") # here we go f <- paste("mice.impute", method, sep = ".") imputes <- data[wy, j] imputes[!cc] <- NA args <- c(list(y = y, ry = ry, x = x, wy = wy, type = type), user, list(...)) imputes[cc] <- do.call(f, args = args) imputes } mice/R/initialize.chain.R0000644000176200001440000000043213416657163014750 0ustar liggesusersinitialize.chain <- function(blocks, maxit, m) { vars <- unique(unlist(blocks)) chain <- array(NA, dim = c(length(vars), maxit, m)) dimnames(chain) <- list(vars, seq_len(maxit), paste("Chain", seq_len(m))) chain }mice/R/mice.impute.2l.lmer.R0000644000176200001440000001372213617734733015227 0ustar liggesusers#'Imputation by a two-level normal model using \code{lmer} #' #'Imputes univariate systematically and sporadically missing data using a two-level normal model using \code{lme4::lmer()} #' #'Data are missing systematically if they have not been measured, e.g., in the #'case where we combine data from different sources. Data are missing sporadically #'if they have been partially observed. #' #'While the method is fully Bayesian, it may fix parameters of the #'variance-covariance matrix or the random effects to their estimated #'value in cases where creating draws from the posterior is not #'possible. The procedure throws a warning when this happens. #' #'@name mice.impute.2l.lmer #'@inheritParams mice.impute.pmm #'@param type Vector of length \code{ncol(x)} identifying random and class #'variables. Random variables are identified by a '2'. The class variable #'(only one is allowed) is coded as '-2'. Fixed effects are indicated by #'a '1'. #'@param intercept Logical determining whether the intercept is automatically #'added. #'@param \dots Arguments passed down to \code{lmer} #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Shahab Jolani, 2017 #'@references #'Jolani S. (2017) Hierarchical imputation of systematically and #'sporadically missing data: An approximate Bayesian approach using #'chained equations. Forthcoming. #' #'Jolani S., Debray T.P.A., Koffijberg H., van Buuren S., Moons K.G.M. (2015). #'Imputation of systematically missing predictors in an individual #'participant data meta-analysis: a generalized approach using MICE. #'\emph{Statistics in Medicine}, 34:1841-1863. #' #'Van Buuren, S. (2011) Multiple imputation of multilevel data. In Hox, J.J. #'and and Roberts, J.K. (Eds.), \emph{The Handbook of Advanced Multilevel #'Analysis}, Chapter 10, pp. 173--196. Milton Park, UK: Routledge. #'@family univariate-2l #'@keywords datagen #'@export mice.impute.2l.lmer <- function(y, ry, x, type, wy = NULL, intercept = TRUE, ...) { install.on.demand("lme4", ...) if (is.null(wy)) wy <- !ry if (intercept) { x <- cbind(1, as.matrix(x)) type <- c(2, type) names(type)[1] <- colnames(x)[1] <- "(Intercept)" } clust <- names(type[type == -2]) rande <- names(type[type == 2]) fixe <- names(type[type > 0]) lev <- unique(x[, clust]) X <- x[, fixe, drop = FALSE] Z <- x[, rande, drop = FALSE] xobs <- x[ry, , drop = FALSE] yobs <- y[ry] Xobs <- X[ry, , drop = FALSE] Zobs <- Z[ry, , drop = FALSE] # create formula fr <- ifelse(length(rande) > 1, paste("+ ( 1 +", paste(rande[-1L], collapse = "+")), "+ ( 1 ") randmodel <- paste("yobs ~ ", paste(fixe[-1L], collapse = "+"), fr, "|", clust, ")") suppressWarnings(fit <- try(lme4::lmer(formula(randmodel), data = data.frame(yobs, xobs), ...), silent = TRUE)) if(!is.null(attr(fit, "class"))) { if(attr(fit, "class") == "try-error") { warning("lmer does not run. Simplify imputation model") return(y[wy]) } } # taken from lme4 sigma <- function (object, ...) { dc <- object@devcomp dd <- dc$dims if (dd[["useSc"]]) dc$cmp[[if (dd[["REML"]]) "sigmaREML" else "sigmaML"]] else 1 } # draw sigma* sigmahat <- sigma(fit) df <- nrow(fit@frame) - length(fit@beta) sigma2star <- df * sigmahat^2 / rchisq(1, df) # draw beta* beta <- lme4::fixef(fit) RX <- lme4::getME(fit, "RX") # cov-matrix, i.e., vcov(fit) covmat <- sigma2star * chol2inv(RX) rv <- t(chol(covmat)) beta.star <- beta + rv %*% rnorm(ncol(rv)) # draw psi* # applying the standard Wishart prior rancoef <- as.matrix(lme4::ranef(fit)[[1]]) lambda <- t(rancoef) %*% rancoef df.psi <- nrow(rancoef) temp.psi.star <- stats::rWishart(1, df.psi, diag(nrow(lambda)))[, , 1] temp <- MASS::ginv(lambda) ev <- eigen(temp) if (sum(ev$values > 0) == length(ev$values)) { deco <- ev$vectors %*% diag(sqrt(ev$values), nrow = length(ev$values)) psi.star <- MASS::ginv(deco %*% temp.psi.star %*% t(deco)) } else { try(temp.svd <- svd(lambda)) if (class(temp.svd) != "try-error") { deco <- temp.svd$u %*% diag(sqrt(temp.svd$d), nrow = length(temp.svd$d)) psi.star <- MASS::ginv(deco %*% temp.psi.star %*% t(deco)) } else { psi.star <- temp warning("psi fixed to estimate") } } # Calculate myi, vyi and drawing bi per cluster for (jj in lev) { if(jj %in% unique(xobs[, clust])) { Xi <- as.matrix(Xobs[xobs[, clust] == jj, ]) Zi <- as.matrix(Zobs[xobs[, clust] == jj, ]) yi <- yobs[xobs[, clust] == jj] sigma2 <- diag(sigma2star, nrow = nrow(Zi)) Mi <- psi.star %*% t(Zi) %*% MASS::ginv(Zi %*% psi.star %*% t(Zi) + sigma2) myi <- Mi %*% (yi - Xi %*% beta.star) vyi <- psi.star - Mi %*% Zi %*% psi.star } else { myi <- matrix(0, nrow = nrow(psi.star), ncol = 1) vyi <- psi.star } vyi <- vyi - upper.tri(vyi) * vyi + t(lower.tri(vyi) * vyi) # generating bi.star using eigenvalues deco1 <- eigen(vyi) if (sum(deco1$values > 0) == length(deco1$values)){ A <- deco1$vectors %*% sqrt(diag(deco1$values, nrow = length(deco1$values))) bi.star <- myi + A %*% rnorm(length(myi)) } else { # generating bi.star using svd try(deco1 <- svd(vyi)) if (class(deco1) != "try-error"){ A <- deco1$u %*% sqrt(diag(deco1$d, nrow = length(deco1$d))) bi.star <- myi + A %*% rnorm(length(myi)) } else { bi.star <- myi warning("b_", jj, " fixed to estimate") } } # imputation y[wy & x[, clust] == jj] <- as.vector( as.matrix(X[wy & x[, clust] == jj,, drop = FALSE]) %*% beta.star + as.matrix(Z[wy & x[, clust] == jj,, drop = FALSE]) %*% as.matrix(bi.star) + rnorm(sum(wy & x[, clust] == jj)) * sqrt(sigma2star)) } return(y[wy]) } mice/R/mice.impute.rf.R0000644000176200001440000000622413617525100014344 0ustar liggesusers#'Imputation by random forests #' #'Imputes univariate missing data using random forests. #' #'@aliases mice.impute.rf #'@inheritParams mice.impute.pmm #'@param ntree The number of trees to grow. The default is 10. #'@param \dots Other named arguments passed down to #'\code{mice:::install.on.demand()}, \code{randomForest::randomForest()} and #'\code{randomForest:::randomForest.default()}. #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@details #'Imputation of \code{y} by random forests. The method #'calls \code{randomForrest()} which implements Breiman's random forest #'algorithm (based on Breiman and Cutler's original Fortran code) #'for classification and regression. See Appendix A.1 of Doove et al. #'(2014) for the definition of the algorithm used. #'@note An alternative implementation was independently #'developed by Shah et al (2014). This were available as #'functions \code{CALIBERrfimpute::mice.impute.rfcat} and #'\code{CALIBERrfimpute::mice.impute.rfcont} (now archived). #'Simulations by Shah (Feb 13, 2014) suggested that #'the quality of the imputation for 10 and 100 trees was identical, #'so mice 2.22 changed the default number of trees from \code{ntree = 100} to #'\code{ntree = 10}. #'@author Lisa Doove, Stef van Buuren, Elise Dusseldorp, 2012 #'@references #' #' Doove, L.L., van Buuren, S., Dusseldorp, E. (2014), Recursive partitioning #' for missing data imputation in the presence of interaction Effects. #' Computational Statistics \& Data Analysis, 72, 92-104. #' #' Shah, A.D., Bartlett, J.W., Carpenter, J., Nicholas, O., Hemingway, H. (2014), #' Comparison of random forest and parametric imputation models for #' imputing missing data using MICE: A CALIBER study. American Journal #' of Epidemiology, doi: 10.1093/aje/kwt312. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-cart.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@seealso \code{\link{mice}}, \code{\link{mice.impute.cart}}, #'\code{\link[randomForest]{randomForest}} #'@family univariate imputation functions #'@keywords datagen #'@examples #'library("lattice") #' #'imp <- mice(nhanes2, meth = "rf", ntree = 3) #'plot(imp) #' #'@export mice.impute.rf <- function(y, ry, x, wy = NULL, ntree = 10, ...) { install.on.demand("randomForest", ...) if (is.null(wy)) wy <- !ry onetree <- function(xobs, xmis, yobs, ...) { fit <- randomForest::randomForest(x = xobs, y = yobs, ntree = 1, ...) leafnr <- predict(object = fit, newdata = xobs, nodes = TRUE) nodes <- predict(object = fit, newdata = xmis, nodes = TRUE) donor <- lapply(nodes, function(s) yobs[leafnr == s]) return(donor) } ntree <- max(1, ntree) # safety nmis <- sum(wy) xobs <- x[ry, , drop = FALSE] xmis <- x[wy, , drop = FALSE] yobs <- y[ry] forest <- sapply(seq_len(ntree), FUN = function(s) onetree(xobs, xmis, yobs, ...)) if (nmis == 1) forest <- array(forest, dim = c(1, ntree)) impute <- apply(forest, MARGIN = 1, FUN = function(s) sample(unlist(s), 1)) return(impute) } mice/R/walking.R0000644000176200001440000000460113416657163013164 0ustar liggesusers#'Walking disability data #' #'Two items YA and YB measuring walking disability in samples A, B and E. #' #'Example dataset to demonstrate imputation of two items (YA and YB). Item YA #'is administered to sample A and sample E, item YB is administered to sample B #'and sample E, so sample E acts as a bridge study. Imputation using a bridge #'study is better than simple equating or than imputation under independence. #' #'Item YA corresponds to the HAQ8 item, and item YB corresponds to the GAR9 #'items from Van Buuren et al (2005). Sample E (as well as sample B) is the #'Euridiss study (n=292), sample A is the ERGOPLUS study (n=306). #' #'See Van Buuren (2012) chapter 7 for more details on the imputation #'methodology. #' #'@name walking #'@aliases walking #'@docType data #'@format A data frame with 890 rows on the following 5 variables: #'\describe{ #'\item{sex}{Sex of respondent (factor)} #'\item{age}{Age of respondent} #'\item{YA}{Item administered in samples A and E (factor)} #'\item{YB}{Item administered in samples B and E (factor)} #'\item{src}{Source: Sample A, B or E (factor)} #'} #'@references van Buuren, S., Eyres, S., Tennant, A., Hopman-Rock, M. (2005). #'Improving comparability of existing data by Response Conversion. #'\emph{Journal of Official Statistics}, \bold{21}(1), 53-72. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-codingsystems.html#sec:impbridge}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@keywords datasets #'@examples #' #'md.pattern(walking) #' #'micemill <- function(n) { #' for (i in 1:n) { #' imp <<- mice.mids(imp) # global assignment #' cors <- with(imp, cor(as.numeric(YA), #' as.numeric(YB), #' method="kendall")) #' tau <<- rbind(tau, getfit(cors, s=TRUE)) # global assignment #' } #'} #' #'plotit <- function() #' matplot(x=1:nrow(tau),y=tau, #' ylab=expression(paste("Kendall's ",tau)), #' xlab="Iteration", type="l", lwd=1, #' lty=1:10,col="black") #' #'tau <- NULL #'imp <- mice(walking, max=0, m=10, seed=92786) #'pred <- imp$pred #'pred[,c("src","age","sex")] <- 0 #'imp <- mice(walking, max=0, m=3, seed=92786, pred=pred) #'micemill(5) #'plotit() #' #'### to get figure 7.8 van Buuren (2012) use m=10 and micemill(20) #' NULL mice/R/mice.impute.2lonly.norm.R0000644000176200001440000001267113621211740016126 0ustar liggesusers### contributed by Alexander Robitzsch (robitzsch@ipn.uni-kiel.de) #' Imputation at level 2 by Bayesian linear regression #' #' Imputes univariate missing data at level 2 using Bayesian linear regression #' analysis. Variables are level 1 are aggregated at level 2. The group #' identifier at level 2 must be indicated by \code{type = -2} in the #' \code{predictorMatrix}. #' #' @aliases 2lonly.norm #' @inheritParams mice.impute.pmm #' @param type Group identifier must be specified by '-2'. Predictors must be #' specified by '1'. #' @param ... Other named arguments. #' @return A vector of length \code{nmis} with imputations. #' @author Alexander Robitzsch (IPN - Leibniz Institute for Science and #' Mathematics Education, Kiel, Germany), \email{robitzsch@@ipn.uni-kiel.de} #' plus some tweaks by Stef van Buuren #' @seealso \code{\link{mice.impute.norm}}, #' \code{\link{mice.impute.2lonly.pmm}}, \code{\link{mice.impute.2l.pan}}, #' \code{\link{mice.impute.2lonly.mean}} #' @details #' This function allows in combination with \code{\link{mice.impute.2l.pan}} #' switching regression imputation between level 1 and level 2 as described in #' Yucel (2008) or Gelman and Hill (2007, p. 541). #' #' The function checks for partial missing level-2 data. Level-2 data #' are assumed to be constant within the same cluster. If one or more #' entries are missing, then the procedure aborts with an error #' message that identifies the cluster with incomplete level-2 data. #' In such cases, one may first fill in the cluster mean (or mode) by #' the \code{2lonly.mean} method to remove inconsistencies. #' #' @references Gelman, A. and Hill, J. (2007). \emph{Data analysis using #' regression and multilevel/hierarchical models}. Cambridge, Cambridge #' University Press. #' #' Yucel, RM (2008). Multiple imputation inference for multivariate multilevel #' continuous data with ignorable non-response. \emph{Philosophical #' Transactions of the Royal Society A}, \bold{366}, 2389-2404. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #'@family univariate-2lonly #' @note #' For a more general approach, see #' \code{miceadds::mice.impute.2lonly.function()}. #'@examples #' #'################################################## #'# simulate some data #'# x,y ... level 1 variables #'# v,w ... level 2 variables #' #'G <- 250 # number of groups #'n <- 20 # number of persons #'beta <- .3 # regression coefficient #'rho <- .30 # residual intraclass correlation #'rho.miss <- .10 # correlation with missing response #'missrate <- .50 # missing proportion #'y1 <- rep( rnorm( G , sd = sqrt( rho ) ) , each=n ) + rnorm(G*n , sd = sqrt( 1 - rho )) #'w <- rep( round( rnorm(G ) , 2 ) , each=n ) #'v <- rep( round( runif( G , 0 , 3 ) ) , each=n ) #'x <- rnorm( G*n ) #'y <- y1 + beta * x + .2 * w + .1 * v #'dfr0 <- dfr <- data.frame( "group" = rep(1:G , each=n ) , "x" = x , "y" = y , "w" = w , "v" = v ) #'dfr[ rho.miss * x + rnorm( G*n , sd = sqrt( 1 - rho.miss ) ) < qnorm( missrate ) , "y" ] <- NA #'dfr[ rep( rnorm(G) , each=n ) < qnorm( missrate ) , "w" ] <- NA #'dfr[ rep( rnorm(G) , each=n ) < qnorm( missrate ) , "v" ] <- NA #' #'#.... #'# empty mice imputation #'imp0 <- mice( as.matrix(dfr) , maxit=0 ) #'predM <- imp0$predictorMatrix #'impM <- imp0$method #' #'#... #'# multilevel imputation #'predM1 <- predM #'predM1[c("w","y","v"),"group"] <- -2 #'predM1["y","x"] <- 1 # fixed x effects imputation #'impM1 <- impM #'impM1[c("y","w","v")] <- c("2l.pan" , "2lonly.norm" , "2lonly.pmm" ) #' #'# y ... imputation using pan #'# w ... imputation at level 2 using norm #'# v ... imputation at level 2 using pmm #' #'imp1 <- mice( as.matrix( dfr ) , m = 1 , predictorMatrix = predM1 , #' method = impM1 , maxit=1 , paniter=500) #' #' # #' # Demonstration that 2lonly.norm aborts for partial missing data. #' # Better use 2lonly.mean for repair. #' data <- data.frame(patid = rep(1:4, each = 5), #' sex = rep(c(1, 2, 1, 2), each = 5), #' crp = c(68, 78, 93, NA, 143, #' 5, 7, 9, 13, NA, #' 97, NA, 56, 52, 34, #' 22, 30, NA, NA, 45)) #' pred <- make.predictorMatrix(data) #' pred[, "patid"] <- -2 #' # only missing value (out of five) for patid == 1 #' data[3, "sex"] <- NA #' #' \dontrun{ #' # The following fails because 2lonly.norm found partially missing #' # level-2 data #' # imp <- mice(data, method = c("", "2lonly.norm", "2l.pan"), #' # predictorMatrix = pred, maxit = 1, m = 2) #' # > iter imp variable #' # > 1 1 sex crpError in .imputation.level2(y = y, ... : #' # > Method 2lonly.norm found the following clusters with partially missing #' #> level-2 data: 1 #' #> Method 2lonly.mean can fix such inconsistencies. #' } #' #' # In contrast, if all sex values are missing for patid == 1, it runs fine, #' # except on r-patched-solaris-x86. I used dontrun to evade CRAN errors. #' \dontrun{ #' data[1:5, "sex"] <- NA #' imp <- mice(data, method = c("", "2lonly.norm", "2l.pan"), #' predictorMatrix = pred, maxit = 1, m = 2) #' } #'@export mice.impute.2lonly.norm <- function (y, ry, x, type, wy = NULL, ...){ imp <- .imputation.level2( y = y , ry = ry , x = x, type = type, wy = wy, method = "norm" , ... ) return(imp) } mice/R/summary.R0000644000176200001440000000570513617221645013226 0ustar liggesusers# # ------------------------------summary.mira------------------------------- # setMethod("summary", signature(object = "mira"), function(object) { # summary.mira(object) # }) #'Summary of a \code{mira} object #' #'@rdname summary #'@param object A \code{mira} object #'@param type A length-1 character vector indicating the #'type of summary. There are three choices: \code{type = "tidy"} #'return the parameters estimates of each analyses as a data frame. #'\code{type = "glance"} return the fit statistics of each analysis #'as a data frame. \code{type = "summary"} returns a list of #'length \code{m} with the analysis results. The default is #'\code{"tidy"}. #'@param ... Other parameters passed down to \code{print()} and \code{summary()} #'@return \code{NULL} #'@seealso \code{\link[=mira-class]{mira}} #'@method summary mira #'@export summary.mira <- function(object, type = c("tidy", "glance", "summary"), ...) { type <- match.arg(type) fitlist <- getfit(object) if (type == "tidy") v <- lapply(fitlist, tidy, effects = "fixed", parametric = TRUE, ...) %>% bind_rows() if (type == "glance") v <- lapply(fitlist, glance, ...) %>% bind_rows() if (type == "summary") v <- lapply(fitlist, summary, ...) v } #'Summary of a \code{mids} object #' #'@rdname summary #'@return \code{NULL} #'@seealso \code{\link[=mids-class]{mids}} #'@method summary mids #'@export summary.mids <- function(object, ...) { print(object, ...) invisible() } # # ------------------------------summary.mads------------------------------- # #'Summary of a \code{mads} object #' #'@rdname summary #'@return \code{NULL} #'@seealso \code{\link[=mads-class]{mads}} #'@export summary.mads <- function(object, ...) { print(object, ...) invisible() } #'Print a \code{mice.anova} object #' #'@rdname summary #'@return \code{NULL} #'@seealso \code{\link{mipo}} #'@method summary mice.anova #'@export summary.mice.anova <- function(object,...) { # handle objects from anova out <- object$out # handle objects from D1, D2 and D3 if (is.null(out)) out <- list(`1 ~~ 2` = list(result = object$result, df.com = object$df.com)) test <- names(out) df.com <- vapply(out, function(x) x$df.com, numeric(1)) results <- t(vapply(out, function(x) x$result, numeric(5))) rf <- data.frame(test = test, statistic = results[, 1], df1 = results[, 2], df2 = results[, 3], df.com = df.com, p.value = results[, 4], riv = results[, 5], row.names = NULL) formulas <- object$formulas ff <- data.frame(model = names(formulas), formula = as.character(formulas)) structure(list(models = ff, comparisons = rf, m = object$m, method = object$method, use = object$use), class = c("mice.anova.summary", class(object))) } mice/R/plot.R0000644000176200001440000001110013416657163012476 0ustar liggesusers#'Plot the trace lines of the MICE algorithm #' #'Trace line plots portray the value of an estimate #'against the iteration number. The estimate can be anything that you can calculate, but #'typically are chosen as parameter of scientific interest. The \code{plot} method for #'a \code{mids} object plots the mean and standard deviation of the imputed (not observed) #'values against the iteration number for each of the $m$ replications. By default, #'the function plot the development of the mean and standard deviation for each incomplete #'variable. On convergence, the streams should intermingle and be free of any trend. #' #'@param x An object of class \code{mids} #'@param y A formula that specifies which variables, stream and iterations are plotted. #'If omitted, all streams, variables and iterations are plotted. #'@param theme The trellis theme to applied to the graphs. The default is \code{mice.theme()}. #'@param layout A vector of length 2 given the number of columns and rows in the plot. #'The default is \code{c(2, 3)}. #'@param type Parameter \code{type} of \code{\link{panel.xyplot}}. #'@param col Parameter \code{col} of \code{\link{panel.xyplot}}. #'@param lty Parameter \code{lty} of \code{\link{panel.xyplot}}. #'@param ... Extra arguments for \code{\link{xyplot}}. #'@return An object of class \code{"trellis"}. #'@author Stef van Buuren 2011 #'@seealso \code{\link{mice}}, \code{\link[=mids-class]{mids}}, #'\code{\link{xyplot}} #'@method plot mids #'@export plot.mids <- function(x, y = NULL, theme = mice.theme(), layout = c(2, 3), type = "l", col = 1:10, lty = 1, ...) { strip.combined <- function(which.given, which.panel, factor.levels, ...) { if (which.given == 1) { lattice::panel.rect(0, 0, 1, 1, col = theme$strip.background$col, border = 1) lattice::panel.text(x = 0, y = 0.5, pos = 4, lab = factor.levels[which.panel[which.given]]) } if (which.given == 2) { lattice::panel.text(x = 1, y = 0.5, pos = 2, lab = factor.levels[which.panel[which.given]]) } } call <- match.call() if (!is.mids(x)) stop("argument 'x' must be a 'mids' object", call. = FALSE) if (is.null(x$chainMean)) stop("no convergence diagnostics found", call. = FALSE) mn <- x$chainMean sm <- sqrt(x$chainVar) # select subset of nonmissing entries obs <- apply(!(is.nan(mn) | is.na(mn)), 1, all) varlist <- names(obs)[obs] ## create formula if not given in y if (missing(y)) { formula <- as.formula(paste0(paste0(varlist, collapse = "+"), "~.it|.ms")) } else { formula <- NULL if (is.null(y)) formula <- as.formula(paste0(paste0(varlist, collapse = "+"), "~.it|.ms")) if (is.character(y)) { formula <- if (length(y) == 1) as.formula(paste0(y, "~.it|.ms")) else as.formula(paste0(paste0(y, collapse = "+"), "~.it|.ms")) } if (is.integer(y) || is.logical(y)) { vars <- varlist[y] formula <- if (length(vars) == 1) as.formula(paste0(vars, "~.it|.ms")) else as.formula(paste0(paste0(vars, collapse = "+"), "~.it|.ms")) } if (is.null(formula)) formula <- as.formula(y) } m <- x$m it <- x$iteration mn <- matrix(aperm(mn[varlist, , , drop = FALSE], c(2, 3, 1)), nrow = m * it) sm <- matrix(aperm(sm[varlist, , , drop = FALSE], c(2, 3, 1)), nrow = m * it) adm <- expand.grid(seq_len(it), seq_len(m), c("mean", "sd")) data <- cbind(adm, rbind(mn, sm)) colnames(data) <- c(".it", ".m", ".ms", varlist) .m <- NULL rm(.m) ## Dummy to trick R CMD check tp <- xyplot(x = formula, data = data, groups = .m, type = type, lty = lty, col = col, layout = layout, scales = list(y = list(relation = "free"), x = list(alternating = FALSE)), as.table = TRUE, xlab = "Iteration", ylab = "", strip = strip.combined, par.strip.text = list(lines=0.5), ...) tp <- update(tp, par.settings = theme) return(tp) } # # setMethod("plot", signature(x = "mids", y = "ANY"), function(x, y, ...) { # plot.mids(x, y, ...) # }) mice/R/nimp.R0000644000176200001440000000157313416657163012500 0ustar liggesusers#' Number of imputations per block #' #' Calculates the number of cells within a block for which imputation #' is requested. #' @inheritParams mice #' @return A numeric vector of length \code{length(blocks)} containing #' the number of cells that need to be imputed within a block. #' @seealso \code{\link{mice}} #' @export #' @examples #' where <- is.na(nhanes) #' #' # standard FCS #' nimp(where) #' #' # user-defined blocks #' nimp(where, blocks = name.blocks(list(c("bmi", "hyp"), "age", "chl"))) nimp <- function(where, blocks = make.blocks(where)) { #if (length(blocks) > 0 && is.null(names(blocks))) # stop("Blocks have no names. Use name.blocks(...)") nwhere <- apply(where, 2, sum) nimp <- vector("integer", length = length(blocks)) names(nimp) <- names(blocks) for (i in seq_along(blocks)) nimp[i] <- sum(nwhere[blocks[[i]]]) nimp } mice/R/mice.impute.panImpute.R0000644000176200001440000000653013617734716015716 0ustar liggesusers#'Impute multilevel missing data using \code{pan} #' #'This function is a wrapper around the \code{panImpute} function #'from the \code{mitml} package so that it can be called to #'impute blocks of variables in \code{mice}. The \code{mitml::panImpute} #'function provides an interface to the \code{pan} package for #'multiple imputation of multilevel data (Schafer & Yucel, 2002). #'Imputations can be generated using \code{type} or \code{formula}, #'which offer different options for model specification. #' #'@name mice.impute.panImpute #'@inheritParams mitml::panImpute #'@param data A data frame containing incomplete and auxiliary variables, #'the cluster indicator variable, and any other variables that should be #'present in the imputed datasets. #'@param type An integer vector specifying the role of each variable #'in the imputation model (see \code{\link[mitml]{panImpute}}) #'@param formula A formula specifying the role of each variable #'in the imputation model. The basic model is constructed #'by \code{model.matrix}, thus allowing to include derived variables #'in the imputation model using \code{I()}. See #'\code{\link[mitml]{panImpute}}. #'@param format A character vector specifying the type of object that should #'be returned. The default is \code{format = "list"}. No other formats are #'currently supported. #'@param ... Other named arguments: \code{n.burn}, \code{n.iter}, #'\code{group}, \code{prior}, \code{silent} and others. #'@return A list of imputations for all incomplete variables in the model, #'that can be stored in the the \code{imp} component of the \code{mids} #'object. #'@seealso \code{\link[mitml]{panImpute}} #'@note The number of imputations \code{m} is set to 1, and the function #'is called \code{m} times so that it fits within the \code{mice} #'iteration scheme. #' #'This is a multivariate imputation function using a joint model. #'@author Stef van Buuren, 2018, building on work of Simon Grund, #'Alexander Robitzsch and Oliver Luedtke (authors of \code{mitml} package) #'and Joe Schafer (author of \code{pan} package). #'@references #' Grund S, Luedtke O, Robitzsch A (2016). Multiple #' Imputation of Multilevel Missing Data: An Introduction to the R #' Package \code{pan}. SAGE Open. #' #'Schafer JL (1997). Analysis of Incomplete Multivariate Data. London: #'Chapman & Hall. #' #'Schafer JL, and Yucel RM (2002). Computational strategies for #'multivariate linear mixed-effects models with missing values. #'Journal of Computational and Graphical Statistics, 11, 437-457. #'@family multivariate-2l #'@keywords datagen #'@examples #'blocks <- list(c("bmi", "chl", "hyp"), "age") #'method <- c("panImpute", "pmm") #'ini <- mice(nhanes, blocks = blocks, method = method, maxit = 0) #'pred <- ini$pred #'pred["B1", "hyp"] <- -2 #'imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1) #' #'@export mice.impute.panImpute <- function(data, formula, type, m = 1, silent = TRUE, format = "imputes", ...) { install.on.demand("mitml", ...) nat <- mitml::panImpute(data = data, formula = formula, type = type, m = m, silent = silent, ...) if (format == "native") return(nat) cmp <- mitml::mitmlComplete(nat, print = 1)[, names(data)] if (format == "complete") return(cmp) if (format == "imputes") return(single2imputes(cmp, is.na(data))) NULL } mice/R/mice.impute.mean.R0000644000176200001440000000231313416657163014664 0ustar liggesusers#'Imputation by the mean #' #'Imputes the arithmetic mean of the observed data #' #'@inheritParams mice.impute.pmm #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@section Warning: Imputing the mean of a variable is almost never #'appropriate. See Little and Rubin (2002, p. 61-62) or #'Van Buuren (2012, p. 10-11) #'@seealso \code{\link{mice}}, \code{\link{mean}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #' #'Little, R.J.A. and Rubin, D.B. (2002). Statistical Analysis with Missing #'Data. New York: John Wiley and Sons. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-simplesolutions.html#sec:meanimp}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.mean <- function(y, ry, x = NULL, wy = NULL, ...) { if (is.null(wy)) wy <- !ry return(rep.int(mean(y[ry]), times = sum(wy))) } mice/R/D1.R0000644000176200001440000000536313621065755012000 0ustar liggesusers#'Compare two nested models using D1-statistic #' #'The D1-statistics is the multivariate Wald test. #' #'@param fit1 An object of class \code{mira}, produced by \code{with()}. #'@param fit0 An object of class \code{mira}, produced by \code{with()}. The #'model in \code{fit0} is a nested within \code{fit1}. The default null #'model \code{fit0 = NULL} compares \code{fit1} to the intercept-only model. #'@param df.com A single number or a numeric vector denoting the #'complete-data degrees of freedom for the hypothesis test. If not specified, #'it is set equal to \code{df.residual} of model \code{fit1}. #'@param \dots Not used. #'@references #'Li, K. H., T. E. Raghunathan, and D. B. Rubin. 1991. #'Large-Sample Significance Levels from Multiply Imputed Data Using #'Moment-Based Statistics and an F Reference Distribution. #'\emph{Journal of the American Statistical Association}, 86(416): 1065–73. #' #'\url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:wald} #'@examples #'# Compare two linear models: #'imp <- mice(nhanes2, seed = 51009, print = FALSE) #'mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) #'mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) #'D1(mi1, mi0) #' #'# Compare two logistic regression models #'imp <- mice(boys, maxit = 2, print = FALSE) #'fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) #'fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) #'D1(fit1, fit0) #'@seealso \code{\link[mitml]{testModels}} #'@export D1 <- function(fit1, fit0 = NULL, df.com = NULL, ...) { install.on.demand("mitml", ...) # fit1: a fitlist or mira-object # fit0: named numerical vector, character vector, or list fit1 <- as.mitml.result(fit1) est1 <- pool(fit1) qbar1 <- getqbar(est1) if (is.null(fit0)) { # test all estimates equal to zero, except intercept beta <- rep(0, length(qbar1)) names(beta) <- names(qbar1) fit0 <- lapply(fit1, fix.coef, beta = beta) fit0 <- as.mitml.result(fit0) } else if (is.mira(fit0)) { fit0 <- as.mitml.result(fit0) } # automatic setting of df.com if (is.null(df.com)) { # better option might be pair of df.com # pair <- list(getfit(fit1, 1), getfit(fit0, 1)) # df.com <- unlist(sapply(pair, glance)["df.residual", ]) df.com <- fit1 %>% getfit(1) %>% glance() %>% select(df.residual)%>% as.numeric() } tmr <- mitml::testModels(fit1, fit0, method = "D1", df.com = df.com) out <- list( call = match.call(), result = tmr$test, formulas = list(`1` = formula(fit1[[1L]]), `2` = formula(fit0[[1L]])), m = tmr$m, method = "D1", use = NULL, df.com = tmr$df.com ) class(out) <- c("mice.anova", class(fit1)) out } mice/R/employee.R0000644000176200001440000000240213416657163013344 0ustar liggesusers#' Employee selection data #' #' A toy example from Craig Enders. #' #' Enders describes these data as follows: #' I designed these data to mimic an employee selection scenario in #' which prospective employees complete an IQ test and a #' psychological well-being questionnaire during their interview. #' The company subsequently hires the applications that score in the #' upper half of the IQ distribution, and a supervisor rates their #' job performance following a 6-month probationary period. #' Note that the job performance scores are missing at random (MAR) #' (i.e. individuals in the lower half of the IQ distribution were #' never hired, and thus have no performance rating). In addition, #' I randomly deleted three of the well-being scores in order to #' mimic a situation where the applicant's well-being questionnaire #' is inadvertently lost. #' #' A larger version of this data set in present as #' \code{\link[miceadds]{data.enders.employee}}. #' #' @format A data frame with 20 rows and 3 variables: #' \describe{ #' \item{IQ}{candidate IQ score} #' \item{wbeing}{candidate well-being score} #' \item{jobperf}{candidate job performance score} #' } #' @source Enders (2010), Applied Missing Data Analysis, p. 218 "employee" mice/R/cci.R0000644000176200001440000000341113416657163012264 0ustar liggesusers#'Complete case indicator #' #' #'The complete case indicator is useful for extracting the subset of complete cases. The function #'\code{cci(x)} calls \code{complete.cases(x)}. #'The companion function \code{ici()} selects the incomplete cases. #' #'@name cci #'@param x An \code{R} object. Currently supported are methods for the #'following classes: \code{mids}. #'@return Logical vector indicating the complete cases. #'@author Stef van Buuren, 2017. #'@seealso \code{\link{complete.cases}}, \code{\link{ici}}, \code{\link{cc}} #'@keywords univar #'@examples #' cci(nhanes) # indicator for 13 complete cases #' cci(mice(nhanes, maxit = 0)) #' f <- cci(nhanes[,c("bmi","hyp")]) # complete data for bmi and hyp #' nhanes[f,] # obtain all data from those with complete bmi and hyp #'@export cci <- function (x) UseMethod("cci", x) #' @export cci.mids <- function(x) return(complete.cases(x$data)) #' @export cci.default <- function(x) return(complete.cases(x)) #'Incomplete case indicator #' #'This array is useful for extracting the subset of incomplete cases. #'The companion function \code{cci()} selects the complete cases. #' #'@name ici #'@aliases ici ici,data.frame-method ici,matrix-method ici,mids-method #'@param x An \code{R} object. Currently supported are methods for the #'following classes: \code{mids}. #'@return Logical vector indicating the incomplete cases, #'@author Stef van Buuren, 2017. #'@seealso \code{\link{cci}}, \code{\link{ic}} #'@keywords univar #'@examples #' #' ici(nhanes) # indicator for 12 rows with incomplete cases #' #'@export ici <- function(x) UseMethod("ici", x) #' @export ici.mids <- function(x) return(!complete.cases(x$data)) #' @export ici.default <- function(x) return(!complete.cases(x)) mice/R/mice.impute.norm.nob.R0000644000176200001440000000440113416664706015475 0ustar liggesusers#'Imputation by linear regression without parameter uncertainty #' #'Imputes univariate missing data using linear regression analysis without #'accounting for the uncertainty of the model parameters. #' #'@aliases mice.impute.norm.nob norm.nob #'@inheritParams mice.impute.pmm #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@details #'This function creates imputations using the spread around the #'fitted linear regression line of \code{y} given \code{x}, as #'fitted on the observed data. #' #'This function is provided mainly to allow comparison between proper (e.g., #'as implemented in \code{mice.impute.norm} and improper (this function) #'normal imputation methods. #' #'For large data, having many rows, differences between proper and improper #'methods are small, and in those cases one may opt for speed by using #'\code{mice.impute.norm.nob}. #'@section Warning: The function does not incorporate the variability of the #'regression weights, so it is not 'proper' in the sense of Rubin. For small #'samples, variability of the imputed data is therefore underestimated. #'@author Gerko Vink, Stef van Buuren, Karin Groothuis-Oudshoorn, 2018 #'@seealso \code{\link{mice}}, \code{\link{mice.impute.norm}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #' #'Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple #'Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. #'Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.norm.nob <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) parm <- .norm.fix(y, ry, x, ...) return(x[wy, ] %*% parm$beta + rnorm(sum(wy)) * parm$sigma) } .norm.fix <- function(y, ry, x, ...) { p <- estimice(x[ry, , drop = FALSE], y[ry], ...) sigma <- sqrt((sum(p$r^2))/(sum(ry) - ncol(x) - 1)) parm <- list(p$c, sigma) names(parm) <- c("beta", "sigma") return(parm) } mice/R/cbind.R0000644000176200001440000003163713416657163012620 0ustar liggesusers#'Combine \code{mids} objects by columns #' #'This function combines two \code{mids} objects columnwise into a single #'object of class \code{mids}, or combines a single \code{mids} object with #'a \code{vector}, \code{matrix}, \code{factor} or \code{data.frame} #'columnwise into a \code{mids} object. #' #'\emph{Pre-requisites:} If \code{y} is a \code{mids}-object, the rows #'of \code{x$data} and \code{y$data} should match, as well as the number #'of imputations (\code{m}). Other \code{y} are transformed into a #'\code{data.frame} whose rows should match with \code{x$data}. #' #'The function renames any duplicated variable or block names by #'appending \code{".1"}, \code{".2"} to duplicated names. #' #'@param x A \code{mids} object. #'@param y A \code{mids} object, or a \code{data.frame}, \code{matrix}, #'\code{factor} or \code{vector}. #'@param \dots Additional \code{data.frame}, \code{matrix}, \code{vector} #'or \code{factor}. These can be given as named arguments. #'@return An S3 object of class \code{mids} #'@note #'The function constructs the elements of the new \code{mids} object as follows: #'\tabular{ll}{ #'\code{data} \tab Columnwise combination of the data in \code{x} and \code{y}\cr #'\code{imp} \tab Combines the imputed values from \code{x} and \code{y}\cr #'\code{m} \tab Taken from \code{x$m}\cr #'\code{where} \tab Columnwise combination of \code{x$where} and \code{y$where}\cr #'\code{blocks} \tab Combines \code{x$blocks} and \code{y$blocks}\cr #'\code{call} \tab Vector, \code{call[1]} creates \code{x}, \code{call[2]} #'is call to \code{cbind.mids}\cr #'\code{nmis} \tab Equals \code{c(x$nmis, y$nmis)}\cr #'\code{method} \tab Combines \code{x$method} and \code{y$method}\cr #'\code{predictorMatrix} \tab Combination with zeroes on the off-diagonal blocks\cr #'\code{visitSequence} \tab Combined as \code{c(x$visitSequence, y$visitSequence)}\cr #'\code{formulas} \tab Combined as \code{c(x$formulas, y$formulas)}\cr #'\code{post} \tab Combined as \code{c(x$post, y$post)}\cr #'\code{blots} \tab Combined as \code{c(x$blots, y$blots)}\cr #'\code{seed} \tab Taken from \code{x$seed}\cr #'\code{iteration} \tab Taken from \code{x$iteration}\cr #'\code{lastSeedValue} \tab Taken from \code{x$lastSeedValue}\cr #'\code{chainMean} \tab Combined from \code{x$chainMean} and \code{y$chainMean}\cr #'\code{chainVar} \tab Combined from \code{x$chainVar} and \code{y$chainVar}\cr #'\code{loggedEvents} \tab Taken from \code{x$loggedEvents}\cr #'\code{version} \tab Current package version\cr #'\code{date} \tab Current date\cr #'} #' #'@author Karin Groothuis-Oudshoorn, Stef van Buuren #'@seealso \code{\link{cbind}}, \code{\link{rbind.mids}}, \code{\link{ibind}}, #'\code{\link[=mids-class]{mids}} #'@keywords manip #'@examples #' #'# impute four variables at once (default) #'imp <- mice(nhanes, m = 1, maxit = 1, print = FALSE) #'imp$predictorMatrix #' #'# impute two by two #'data1 <- nhanes[, c("age", "bmi")] #'data2 <- nhanes[, c("hyp", "chl")] #'imp1 <- mice(data1, m = 2, maxit = 1, print = FALSE) #'imp2 <- mice(data2, m = 2, maxit = 1, print = FALSE) #' #'# Append two solutions #'imp12 <- cbind(imp1, imp2) #' #'# This is a different imputation model #'imp12$predictorMatrix #' #'# Append the other way around #'imp21 <- cbind(imp2, imp1) #'imp21$predictorMatrix #' #'# Append 'forgotten' variable chl #'data3 <- nhanes[, 1:3] #'imp3 <- mice(data3, maxit = 1,m = 2, print = FALSE) #'imp4 <- cbind(imp3, chl = nhanes$chl) #' #'# Of course, chl was not imputed #'head(complete(imp4)) #' #'# Combine mids object with data frame #'imp5 <- cbind(imp3, nhanes2) #'head(complete(imp5)) cbind.mids <- function(x, y = NULL, ...) { call <- match.call() dots <- list(...) if (is.mids(y)) return(cbind.mids.mids(x, y, call = call)) if ((is.null(y) || length(y) == 0L) && length(dots) == 0L) return(x) n <- nrow(x$data) if (length(y) == 1L) y <- rep(y, n) if (length(y) == 0L && length(dots) > 0L) y <- cbind.data.frame(dots) else if (length(y) > 0L && length(dots) == 0L) y <- cbind.data.frame(y) else y <- cbind.data.frame(y, dots) # Call is a vector, with first argument the mice statement # and second argument the call to cbind.mids. call <- c(x$call, call) if (nrow(y) != nrow(x$data)) stop("arguments imply differing number of rows: ", c(nrow(x$data), ", ", nrow(y))) data <- cbind(x$data, y) varnames <- make.unique(colnames(data)) colnames(data) <- varnames # where argument where <- cbind(x$where, matrix(FALSE, nrow = nrow(x$where), ncol = ncol(y))) colnames(where) <- varnames # blocks: no renaming needed because all block definition will # refer to varnames[1:ncol(x$data)] only, and are hence unique # but we do need to rename duplicate block names yblocks <- vector("list", length = ncol(y)) blocks <- c(x$blocks, yblocks) xynames <- c(names(x$blocks), colnames(y)) blocknames <- make.unique(xynames) names(blocknames) <- xynames names(blocks) <- blocknames ct <- c(attr(x$blocks, "calltype"), rep("type", ncol(y))) names(ct) <- blocknames attr(blocks, "calltype") <- ct m <- x$m # count the number of missing data in y nmis <- c(x$nmis, colSums(is.na(y))) names(nmis) <- varnames # imp: original data of y will be copied into the multiple imputed dataset, # including the missing values of y. r <- (!is.na(y)) f <- function(j) { m <- matrix(NA, nrow = sum(!r[, j]), ncol = x$m, dimnames = list(row.names(y)[!r[, j]], seq_len(m))) as.data.frame(m) } imp <- lapply(seq_len(ncol(y)), f) imp <- c(x$imp, imp) names(imp) <- varnames # The imputation method for (columns in) y will be set to ''. method <- c(x$method, rep.int("", ncol(y))) names(method) <- blocknames # The variable(s) in y are included in the predictorMatrix. # y is not used as predictor as well as not imputed. predictorMatrix <- rbind(x$predictorMatrix, matrix(0, ncol = ncol(x$predictorMatrix), nrow = ncol(y))) predictorMatrix <- cbind(predictorMatrix, matrix(0, ncol = ncol(y), nrow = nrow(x$predictorMatrix) + ncol(y))) rownames(predictorMatrix) <- blocknames colnames(predictorMatrix) <- varnames visitSequence <- x$visitSequence formulas <- x$formulas post <- c(x$post, rep.int("", ncol(y))) names(post) <- varnames blots <- x$blots # seed, lastSeedvalue, number of iterations, chainMean and chainVar # is taken as in mids object x. seed <- x$seed lastSeedvalue <- x$lastSeedvalue iteration <- x$iteration chainMean <- x$chainMean chainVar <- x$chainVar loggedEvents <- x$loggedEvents ## save, and return midsobj <- list(data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, seed = seed, iteration = iteration, lastSeedValue = .Random.seed, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date()) oldClass(midsobj) <- "mids" return(midsobj) } cbind.mids.mids <- function(x, y, call) { if (!is.mids(y)) stop("Argument `y` not a mids object") if (nrow(y$data) != nrow(x$data)) stop("The two datasets do not have the same length\n") if (x$m != y$m) stop("The two mids objects should have the same number of imputations\n") # Call is a vector, with first argument the mice statement # and second argument the call to cbind.mids. call <- c(x$call, call) # The data in x$data and y$data are combined together. # make variable names unique data <- cbind(x$data, y$data) xynames <- c(colnames(x$data), colnames(y$data)) varnames <- make.unique(xynames) names(varnames) <- xynames names(data) <- varnames where <- cbind(x$where, y$where) colnames(where) <- varnames # rename variable names within each x$blocks and y$blocks xnew <- varnames[1:ncol(x$data)] ynew <- varnames[-(1:ncol(x$data))] xblocks <- x$blocks yblocks <- y$blocks for (i in names(xblocks)) xblocks[[i]] <- unname(xnew[xblocks[[i]]]) for (i in names(yblocks)) yblocks[[i]] <- unname(ynew[yblocks[[i]]]) blocks <- c(xblocks, yblocks) xynames <- c(names(xblocks), names(yblocks)) blocknames <- make.unique(xynames) names(blocknames) <- xynames names(blocks) <- blocknames ct <- c(attr(xblocks, "calltype"), attr(yblocks, "calltype")) names(ct) <- blocknames attr(blocks, "calltype") <- ct m <- x$m nmis <- c(x$nmis, y$nmis) names(nmis) <- varnames imp <- c(x$imp, y$imp) names(imp) <- varnames method <- c(x$method, y$method) names(method) <- blocknames # The predictorMatrices of x and y are combined with zero matrices # on the off diagonal blocks. predictorMatrix <- rbind(x$predictorMatrix, matrix(0, ncol = ncol(x$predictorMatrix), nrow = nrow(y$predictorMatrix))) predictorMatrix <- cbind(predictorMatrix, rbind(matrix(0, ncol = ncol(y$predictorMatrix), nrow = nrow(x$predictorMatrix)), y$predictorMatrix)) rownames(predictorMatrix) <- blocknames colnames(predictorMatrix) <- varnames # As visitSequence is taken first the order for x and after that from y. # take care that duplicate names need to be renamed xnew <- blocknames[1:length(x$blocks)] ynew <- blocknames[-(1:length(x$blocks))] visitSequence <- unname(c(xnew[x$visitSequence], ynew[y$visitSequence])) formulas <- c(x$formulas, y$formulas) names(formulas) <- blocknames post <- c(x$post, y$post) names(post) <- varnames blots <- c(x$blots, y$blots) names(blots) <- blocknames # For the elements seed, lastSeedvalue and iteration the values # from midsobject x are copied. seed <- x$seed lastSeedvalue <- x$lastSeedvalue iteration <- x$iteration # the chainMean and chainVar vectors for x and y are combined. chainMean <- array(data = NA, dim = c(dim(x$chainMean)[1] + dim(y$chainMean)[1], iteration, m), dimnames = list(c(dimnames(x$chainMean)[[1]], dimnames(y$chainMean)[[1]]), dimnames(x$chainMean)[[2]], dimnames(x$chainMean)[[3]])) chainMean[seq_len(dim(x$chainMean)[1]), , ] <- x$chainMean if (iteration <= dim(y$chainMean)[2]) { chainMean[(dim(x$chainMean)[1] + 1):dim(chainMean)[1], , ] <- y$chainMean[, seq_len(iteration), ] } else { chainMean[(dim(x$chainMean)[1] + 1):dim(chainMean)[1], seq_len(dim(y$chainMean)[2]), ] <- y$chainMean } chainVar <- array(data = NA, dim = c(dim(x$chainVar)[1] + dim(y$chainVar)[1], iteration, m), dimnames = list(c(dimnames(x$chainVar)[[1]], dimnames(y$chainVar)[[1]]), dimnames(x$chainVar)[[2]], dimnames(x$chainVar)[[3]])) chainVar[seq_len(dim(x$chainVar)[1]), , ] <- x$chainVar if (iteration <= dim(y$chainVar)[2]) { chainVar[(dim(x$chainVar)[1] + 1):dim(chainVar)[1], , ] <- y$chainVar[, seq_len(iteration), ] } else { chainVar[(dim(x$chainVar)[1] + 1):dim(chainVar)[1], seq_len(dim(y$chainVar)[2]), ] <- y$chainVar } loggedEvents <- x$loggedEvents midsobj <- list(data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, seed = seed, iteration = iteration, lastSeedValue = .Random.seed, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date()) oldClass(midsobj) <- "mids" return(midsobj) } mice/R/mice.impute.2l.pan.R0000644000176200001440000002063413617735034015041 0ustar liggesusers### contributed by Alexander Robitzsch (robitzsch@ipn.uni-kiel.de) #-------------------MICE.IMPUTE.2L.PAN---------------------------- # Usage is an extension of 2l.norm # -2 ... group identifier # 1 ... fixed effects # 2 ... fixed and random effects # 3 ... introduce aggregated effects (i.e. group means) # 4 ... fixed, random and aggregated effects #'Imputation by a two-level normal model using \code{pan} #' #'Imputes univariate missing data using a two-level normal model with #'homogeneous within group variances. Aggregated group effects (i.e. group #'means) can be automatically created and included as predictors in the #'two-level regression (see argument \code{type}). This function needs the #'\code{pan} package. #' #'Implements the Gibbs sampler for the linear two-level model with homogeneous #'within group variances which is a special case of a multivariate linear mixed #'effects model (Schafer & Yucel, 2002). For a two-level imputation with #'heterogeneous within-group variances see \code{\link{mice.impute.2l.norm}}. % #'The random intercept is automatically added in % #'\code{mice.impute.2l.norm()}. #' #'@aliases mice.impute.2l.pan 2l.pan #'@name mice.impute.2l.pan #'@param y Incomplete data vector of length \code{n} #'@param ry Vector of missing data pattern (\code{FALSE}=missing, #'\code{TRUE}=observed) #'@param x Matrix (\code{n} x \code{p}) of complete covariates. #'@param type Vector of length \code{ncol(x)} identifying random and class #'variables. Random effects are identified by a '2'. The group variable (only #'one is allowed) is coded as '-2'. Random effects also include the fixed #'effect. If for a covariates X1 group means shall be calculated and included #'as further fixed effects choose '3'. In addition to the effects in '3', #'specification '4' also includes random effects of X1. #'@param intercept Logical determining whether the intercept is automatically #'added. #'@param paniter Number of iterations in \code{pan}. Default is 500. #'@param groupcenter.slope If \code{TRUE}, in case of group means (\code{type} #'is '3' or'4') group mean centering for these predictors are conducted before #'doing imputations. Default is \code{FALSE}. #'@param ... Other named arguments. #'@return A vector of length \code{nmis} with imputations. #'@author Alexander Robitzsch (IPN - Leibniz Institute for Science and #'Mathematics Education, Kiel, Germany), \email{robitzsch@@ipn.uni-kiel.de}. #'@note This function does not implement the \code{where} functionality. It #'always produces \code{nmis} imputation, irrespective of the \code{where} #'argument of the \code{mice} function. #'@family univariate-2l #'@references #' #'Schafer J L, Yucel RM (2002). Computational strategies for multivariate #'linear mixed-effects models with missing values. \emph{Journal of #'Computational and Graphical Statistics}. \bold{11}, 437-457. #' #'Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@examples #' #'################################### #'# simulate some data #'# two-level regression model with fixed slope #' #'# number of groups #'G <- 250 #'# number of persons #'n <- 20 #'# regression parameter #'beta <- .3 #'# intraclass correlation #'rho <- .30 #'# correlation with missing response #'rho.miss <- .10 #'# missing proportion #'missrate <- .50 #'y1 <- rep( rnorm( G , sd = sqrt( rho ) ) , each=n ) + rnorm(G*n , sd = sqrt( 1 - rho )) #'x <- rnorm( G*n ) #'y <- y1 + beta * x #'dfr0 <- dfr <- data.frame( "group" = rep(1:G , each=n ) , "x" = x , "y" = y ) #'dfr[ rho.miss * x + rnorm( G*n , sd = sqrt( 1 - rho.miss ) ) < qnorm( missrate ) , "y" ] <- NA #' #'#..... #'# empty imputation in mice #'imp0 <- mice( as.matrix(dfr) , maxit=0 ) #'predM <- imp0$predictorMatrix #'impM <- imp0$method #' #'#... #'# specify predictor matrix and method #'predM1 <- predM #'predM1["y","group"] <- -2 #'predM1["y","x"] <- 1 # fixed x effects imputation #'impM1 <- impM #'impM1["y"] <- "2l.pan" #' #'# multilevel imputation #'imp1 <- mice( as.matrix( dfr ) , m = 1 , predictorMatrix = predM1 , #' method = impM1 , maxit=1 ) #'# multilevel analysis #'library(lme4) #'mod <- lmer( y ~ ( 1 + x | group) + x , data = complete(imp1) ) #'summary(mod) #' #'############################################ #'# Examples of predictorMatrix specification #' #'# random x effects #'# predM1["y","x"] <- 2 #' #'# fixed x effects and group mean of x #'# predM1["y","x"] <- 3 #' #'# random x effects and group mean of x #'# predM1["y","x"] <- 4 #' #'@export mice.impute.2l.pan <- function(y, ry, x, type, intercept=TRUE, paniter = 500 , groupcenter.slope = FALSE , ...){ install.on.demand("pan", ...) ## append intercept if (intercept) { x <- cbind(1, as.matrix(x)) type <- c(2, type) } # add groupmeans in the regression model if (any(type %in% c(3,4))) { x0 <- as.matrix(cbind( x[ , type == -2 ] , x[ , type %in% c(3,4) ] )) colnames(x0) <- c( colnames(x)[ type==-2] , colnames(x)[ type %in% c(3,4) ] ) type0 <- c( -2 , rep.int(1 , ncol(x0)-1) ) x0.aggr <- as.matrix( .mice.impute.2l.groupmean(y = y , ry=ry , x = x0 , type = type0 , grmeanwarning=FALSE , ...) ) colnames(x0.aggr) <- paste0( "M." , colnames(x0)[-1]) # groupcentering if ( groupcenter.slope ){ x0.aggr1 <- as.matrix(x0.aggr) colnames(x0.aggr1) <- colnames(x0)[-1] x0cent <- x0[,-1] - x0.aggr1 x[ , colnames(x0cent) ] <- x0cent } # combine covariate matrix x <- cbind( x , x0.aggr ) # add type type1 <- c( type , rep.int(1 , ncol(x0.aggr) ) ) names(type1) <- c( names(type) , colnames(x0.aggr) ) type1[ type1 == 3 ] <- 1 type1[ type1 == 4 ] <- 2 type <- type1 } ############################# # pan imputation # define cluster group <- x[ , type == -2 ] subj <- match( group , unique(group) ) # is group resorting necessary? (need this for pan) sortgroups <- any( diff(subj) < 0 ) if ( sortgroups ){ dfr <- data.frame( "group" = group , "ry" = ry , "index" = seq(1,length(ry)) ) dfr <- dfr[ order(dfr$group) , ] group <- group[ dfr$index ] y <- y[ dfr$index ] x <- x[ dfr$index , ] ry <- ry[ dfr$index ] subj <- subj[ dfr$index ] # stop( "Sort group identifiers in increasing order!\n") } y1 <- matrix( as.numeric(y) , ncol=1 ) y1[ ! ry , 1 ] <- NA # specify predictors pred <- x[ , type != -2 , drop = FALSE] ## fixed SvB 1feb2013 # columns fixed effects xcol <- seq( 1 , ncol(pred) ) type1 <- type[ type != -2 ] zcol <- which(type1 == 2 ) # noninformative priors prior <- list( a=ncol(y1), Binv= diag( rep(1,ncol(y1) ) ) , c= ncol(y1) * length(zcol) , Dinv= diag( rep(1 ,ncol(y1)*length(zcol) ) ) ) if (length(subj) != nrow(y1)) stop("No class variable") ## fixed SvB 27apr2013 # pan imputation ii <- 0 while (ii == 0){ s1 <- round(runif(1, 1,10^7)) imput <- pan::pan(y1,subj,pred,xcol,zcol,prior,seed= s1 ,iter= paniter ) res <- imput$y ii <- 1 - any( is.na( res ) ) # check for invalid imputations: pan occasionally produces NaNs } if ( sortgroups ){ dfr <- cbind( res , dfr ) dfr <- dfr[ order(dfr$index ) , ] res <- dfr[ ! dfr$ry , "res" ] } else { res <- res[ ! ry ] } flush.console() return(res) } ########################################################################################### # compute cluster groupmean .mice.impute.2l.groupmean <- function (y, ry, x, type , grmeanwarning=TRUE, ...){ if ( ( ncol(x) > 2 ) & grmeanwarning ) warning("\nMore than one variable is requested to be aggregated.\n") # calculate aggregated values a1 <- aggregate( x[, type %in% c(1,2) ] , list( x[,type == -2] ) , mean , na.rm=TRUE) i1 <- match( x[,type == -2] , a1[,1] ) ximp <- as.matrix(a1[i1,-1]) colnames(ximp) <- paste( names(type)[ type %in% c(1,2) ] , names(type)[ type == -2 ] , sep="." ) return(ximp) } ########################################################################################### mice/R/md.pairs.R0000644000176200001440000000406113416657163013245 0ustar liggesusers# ------------------------------md.pairs--------------------------------- #'Missing data pattern by variable pairs #' #'Number of observations per variable pair. #' #'The four components in the output value is have the following interpretation: #'\describe{ \item{list('rr')}{response-response, both variables are observed} #'\item{list('rm')}{response-missing, row observed, column missing} #'\item{list('mr')}{missing -response, row missing, column observed} #'\item{list('mm')}{missing -missing, both variables are missing} } #' #'@param data A data frame or a matrix containing the incomplete data. Missing #'values are coded as \code{NA}. #'@return A list of four components named \code{rr}, \code{rm}, \code{mr} and #'\code{mm}. Each component is square numerical matrix containing the number #'observations within four missing data pattern. #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2009 #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords univar #'@examples #' #' #'pat <- md.pairs(nhanes) #'pat #' #'# show that these four matrices decompose the total sample size #'# for each pair #'pat$rr + pat$rm + pat$mr + pat$mm #' #'# percentage of usable cases to impute row variable from column variable #'round(100*pat$mr/(pat$mr+pat$mm)) #' #'@export md.pairs <- function(data) { # calculates pairwise missing data statistics # rr: response-response pairs # rm: response-missing pairs # mr: missing-response pairs # mm: missing-missing pairs if (!(is.matrix(data) || is.data.frame(data))) stop("Data should be a matrix or dataframe") if (ncol(data) < 2) stop("Data should have at least two columns") r <- !is.na(data) rr <- t(r) %*% r mm <- t(!r) %*% (!r) mr <- t(!r) %*% r rm <- t(r) %*% (!r) return(list(rr = rr, rm = rm, mr = mr, mm = mm)) } mice/R/pool.scalar.R0000644000176200001440000000605213416657163013747 0ustar liggesusers# ------------------------------pool.scalar---------------------------- #'Multiple imputation pooling: univariate version #' #'Pools univariate estimates of m repeated complete data analysis #' #'The function averages the univariate estimates of the complete data model, #'computes the total variance over the repeated analyses, and computes the #'relative increase in variance due to nonresponse and the fraction of missing #'information. #' #'@param Q A vector of univariate estimates of \code{m} repeated complete data #'analyses. #'@param U A vector containing the corresponding \code{m} variances of the univariate #'estimates. #'@param n A number providing the sample size. If nothing is specified, an infinite sample \code{n = Inf} is assumed. #'@param k A number indicating the number of parameters to be estimated. By default, \code{k = 1} is assumed. #'@return Returns a list with components. Component \code{m} is the #'number of imputations. Component \code{qhat} contains the \code{m} #'univariate estimates of repeated complete data analyses. #'Component \code{u} contains the corresponding \code{m} variances of the univariate estimates. #'Component \code{qbar} is the pooled univariate estimate, formula (3.1.2) Rubin #'(1987). Component \code{ubar} is the mean of the variances #'(i.e. the pooled within-imputation variance), formula (3.1.3) Rubin (1987). #'Component \code{b} is the between-imputation variance, formula (3.1.4) Rubin (1987). #'Component \code{t} is the total variance of the pooled estimated, formula (3.1.5) Rubin #'(1987). #'Component \code{r} is the relative increase in variance due to nonresponse, formula #'(3.1.7) Rubin (1987). #'Component \code{df} is the degrees of freedom for t reference distribution, formula #'(3.1.6) Rubin (1987) or method of Barnard-Rubin (1999) (if \code{method = "smallsample"}). #'Component \code{fmi} is the fraction missing information due to nonresponse, formula #'(3.1.10) Rubin (1987). #'@author Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 #'@seealso \code{\link{pool}} #'@references Rubin, D.B. (1987). Multiple Imputation for Nonresponse in #'Surveys. New York: John Wiley and Sons. #'@keywords htest #'@examples #' #' #'imp <- mice(nhanes) #'m <- imp$m #'Q <- rep(NA, m) #'U <- rep(NA, m) #'for (i in 1:m) { #' Q[i] <- mean(complete(imp, i)$bmi) #' U[i] <- var(complete(imp, i)$bmi) / nrow(nhanes) # (standard error of estimate)^2 #'} #'pool.scalar(Q, U, n = nrow(nhanes), k = 1) # Barnard-Rubin 1999 #' #'@export pool.scalar <- function(Q, U, n = Inf, k = 1) { # Simple pooling function for univariate parameter # # Based on Rubin's rules (Rubin, 1987) with Barnard-Rubin adjustment m <- length(Q) qbar <- mean(Q) ubar <- mean(U) b <- var(Q) t <- ubar + (m + 1) * b/m df <- barnard.rubin(m, b, t, dfcom = n - k) r <- (1 + 1/m) * b/ubar fmi <- (r + 2/(df + 3))/(r + 1) fit <- list(m = m, qhat = Q, u = U, qbar = qbar, ubar = ubar, b = b, t = t, df = df, r = r, fmi = fmi) return(fit) } mice/R/windspeed.R0000644000176200001440000000255513416657163013520 0ustar liggesusers#'Subset of Irish wind speed data #' #'Subset of Irish wind speed data #' #'The original data set is much larger and was analyzed in detail by Haslett #'and Raftery (1989). Van Buuren et al (2006) used this subset to investigate #'the influence of extreme MAR mechanisms on the quality of imputation. #' #'@name windspeed #'@docType data #'@format A data frame with 433 rows and 6 columns containing the daily average #'wind speeds within the period 1961-1978 at meteorological stations in the #'Republic of Ireland. The data are a random sample from a larger data set. #'\describe{ #'\item{RochePt}{Roche Point} #'\item{Rosslare}{Rosslare} #'\item{Shannon}{Shannon} #'\item{Dublin}{Dublin} #'\item{Clones}{Clones} #'\item{MalinHead}{Malin Head} } #'@references Haslett, J. and Raftery, A. E. (1989). \emph{Space-time #'Modeling with Long-memory Dependence: Assessing Ireland's Wind Power #'Resource (with Discussion)}. Applied Statistics 38, 1-50. #'\url{http://lib.stat.cmu.edu/datasets/wind.desc} and #'\url{http://lib.stat.cmu.edu/datasets/wind.data} #' #'van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) #'Fully conditional specification in multivariate imputation. \emph{Journal of #'Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. #'@keywords datasets #'@examples #' #'windspeed[1:3,] #' NULL mice/R/mice.impute.2l.norm.R0000644000176200001440000001366413511166311015230 0ustar liggesusers#'Imputation by a two-level normal model #' #'Imputes univariate missing data using a two-level normal model #' #'Implements the Gibbs sampler for the linear multilevel model with #'heterogeneous with-class variance (Kasim and Raudenbush, 1998). Imputations #'are drawn as an extra step to the algorithm. For simulation work see Van #'Buuren (2011). #' #'The random intercept is automatically added in \code{mice.impute.2L.norm()}. #'A model within a random intercept can be specified by \code{mice(..., #'intercept = FALSE)}. #' #'@name mice.impute.2l.norm #'@inheritParams mice.impute.pmm #'@param type Vector of length \code{ncol(x)} identifying random and class #'variables. Random variables are identified by a '2'. The class variable #'(only one is allowed) is coded as '-2'. Random variables also include the #'fixed effect. #'@param intercept Logical determining whether the intercept is automatically #'added. #'@param ... Other named arguments. #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@note Added June 25, 2012: The currently implemented algorithm does not #'handle predictors that are specified as fixed effects (type=1). When using #'\code{mice.impute.2l.norm()}, the current advice is to specify all predictors #'as random effects (type=2). #' #'Warning: The assumption of heterogeneous variances requires that in every #'class at least one observation has a response in \code{y}. #'@author Roel de Jong, 2008 #'@references #' #'Kasim RM, Raudenbush SW. (1998). Application of Gibbs sampling to nested #'variance components models with heterogeneous within-group variance. Journal #'of Educational and Behavioral Statistics, 23(2), 93--116. #' #'Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #' #'Van Buuren, S. (2011) Multiple imputation of multilevel data. In Hox, J.J. #'and and Roberts, J.K. (Eds.), \emph{The Handbook of Advanced Multilevel #'Analysis}, Chapter 10, pp. 173--196. Milton Park, UK: Routledge. #'@family univariate-2l #'@keywords datagen #'@export mice.impute.2l.norm <- function(y, ry, x, type, wy = NULL, intercept = TRUE, ...) { rwishart <- function(df, p = nrow(SqrtSigma), SqrtSigma = diag(p)) { ## rwishart, written by Bill Venables Z <- matrix(0, p, p) diag(Z) <- sqrt(rchisq(p, df:(df - p + 1))) if (p > 1) { pseq <- seq_len(p - 1) Z[rep(p * pseq, pseq) + unlist(lapply(pseq, seq))] <- rnorm(p * (p - 1)/2) } crossprod(Z %*% SqrtSigma) } force.chol <- function(x, warn = TRUE) { z <- 0 repeat { lambda <- 0.1 * z XT <- x + diag(x = lambda, nrow = nrow(x)) XT <- (XT + t(XT))/2 s <- try(expr = chol(XT), silent = TRUE) if (class(s) != "try-error") break z <- z + 1 } attr(s, "forced") <- (z > 0) if (warn && z > 0) warning("Cholesky decomposition had to be forced", call. = FALSE) return(s) } symridge <- function(x, ridge = 0.0001, ...) { x <- (x + t(x))/2 if (nrow(x) == 1L) return(x) x + diag(diag(x) * ridge) } ## append intercept if (intercept) { x <- cbind(1, as.matrix(x)) type <- c(2, type) } ## Initialize n.iter <- 100 if (is.null(wy)) wy <- !ry n.class <- length(unique(x[, type == -2])) if (n.class == 0) stop("No class variable") gf.full <- factor(x[, type == -2], labels = seq_len(n.class)) gf <- gf.full[ry] XG <- split.data.frame(as.matrix(x[ry, type == 2]), gf) X.SS <- lapply(XG, crossprod) yg <- split(as.vector(y[ry]), gf) n.g <- tabulate(gf) n.rc <- ncol(XG[[1]]) bees <- matrix(0, nrow = n.class, ncol = n.rc) ss <- vector(mode = "numeric", length = n.class) mu <- rep.int(0, n.rc) inv.psi <- diag(1, n.rc, n.rc) inv.sigma2 <- rep.int(1, n.class) sigma2.0 <- 1 theta <- 1 ## Execute Gibbs sampler for (iter in seq_len(n.iter)) { ## Draw bees for (class in seq_len(n.class)) { vv <- symridge(inv.sigma2[class] * X.SS[[class]] + inv.psi, ...) bees.var <- chol2inv(chol(vv)) bees[class, ] <- drop(bees.var %*% (crossprod(inv.sigma2[class] * XG[[class]], yg[[class]]) + inv.psi %*% mu)) + drop(rnorm(n = n.rc) %*% chol(symridge(bees.var, ...))) ss[class] <- crossprod(yg[[class]] - XG[[class]] %*% bees[class, ]) } ## Draw mu mu <- colMeans(bees) + drop(rnorm(n = n.rc) %*% chol(chol2inv(chol(symridge(inv.psi, ...)))/n.class)) ## Draw psi inv.psi <- rwishart(df = n.class - n.rc - 1, SqrtSigma = chol(chol2inv(chol(symridge(crossprod(t(t(bees) - mu)), ...))))) ## Draw sigma2 inv.sigma2 <- rgamma(n.class, n.g/2 + 1/(2 * theta), scale = 2 * theta/(ss * theta + sigma2.0)) ## Draw sigma2.0 H <- 1/mean(inv.sigma2) # Harmonic mean sigma2.0 <- rgamma(1, n.class/(2 * theta) + 1, scale = 2 * theta * H/n.class) ## Draw theta G <- exp(mean(log(1/inv.sigma2))) # Geometric mean theta <- 1/rgamma(1, n.class/2 - 1, scale = 2/(n.class * (sigma2.0/H - log(sigma2.0) + log(G) - 1))) } ## Generate imputations imps <- rnorm(n = sum(wy), sd = sqrt(1/inv.sigma2[gf.full[wy]])) + rowSums(as.matrix(x[wy, type == 2, drop = FALSE]) * bees[gf.full[wy], ]) return(imps) } mice/R/where.R0000644000176200001440000000373613416657163012652 0ustar liggesusers#' Creates a \code{where} argument #' #' This helper function creates a valid \code{where} matrix. The #' \code{where} matrix is an argument to the \code{mice} function. #' It has the same size as \code{data} and specifies which values #' are to be imputed (\code{TRUE}) or nor (\code{FALSE}). #' @param data A \code{data.frame} with the source data #' @param keyword An optional keyword, one of \code{"missing"} (missing #' values are imputed), \code{"observed"} (observed values are imputed), #' \code{"all"} and \code{"none"}. The default #' is \code{keyword = "missing"} #' @return A matrix with logical #' @seealso \code{\link{make.blocks}}, \code{\link{make.predictorMatrix}} #' @examples #' head(make.where(nhanes), 3) #' @export make.where <- function(data, keyword = c("missing", "all", "none", "observed")) { keyword <- match.arg(keyword) data <- check.dataform(data) where <- switch(keyword, missing = is.na(data), all = matrix(TRUE, nrow = nrow(data), ncol = ncol(data)), none = matrix(FALSE, nrow = nrow(data), ncol = ncol(data)), observed = !is.na(data)) dimnames(where) <- dimnames(data) where } check.where <- function(where, data, blocks) { if (is.null(where)) where <- make.where(data, keyword = "missing") if (!(is.matrix(where) || is.data.frame(where))) if (is.character(where)) return(make.where(data, keyword = where)) else stop("Argument `where` not a matrix or data frame", call. = FALSE) if (!all(dim(data) == dim(where))) stop("Arguments `data` and `where` not of same size", call. = FALSE) where <- as.logical(as.matrix(where)) if (anyNA(where)) stop("Argument `where` contains missing values", call. = FALSE) where <- matrix(where, nrow = nrow(data), ncol = ncol(data)) dimnames(where) <- dimnames(data) where[, !colnames(where) %in% unlist(blocks)] <- FALSE where } mice/R/Mads.R0000644000176200001440000000765613416657163012431 0ustar liggesusers#'Multivariate Amputed Data Set (\code{mads}) #' #'The \code{mads} object contains an amputed data set. The \code{mads} object is #'generated by the \code{ampute} function. The \code{mads} class of objects has #'methods for the following generic functions: \code{print}, \code{summary}, #'\code{bwplot}and \code{xyplot}. #' #'@section Contents: #'\describe{ #'\item{\code{call}:}{The function call.} #'\item{\code{prop}:}{Proportion of cases with missing values. Note: even when #'the proportion is entered as the proportion of missing cells (when #'\code{bycases == TRUE}), this object contains the proportion of missing cases.} #'\item{\code{patterns}:}{A data frame of size #patterns by #variables where \code{0} #'indicates a variable has missing values and \code{1} indicates a variable remains #'complete.} #'\item{\code{freq}:}{A vector of length #patterns containing the relative #'frequency with which the patterns occur. For example, if the vector is #'\code{c(0.4, 0.4, 0.2)}, this means that of all cases with missing values, #'40 percent is candidate for pattern 1, 40 percent for pattern 2 and 20 #'percent for pattern 3. The vector sums to 1.} #'\item{\code{mech}:}{A string specifying the missingness mechanism, either #'\code{"MCAR"}, \code{"MAR"} or \code{"MNAR"}.} #'\item{\code{weights}:}{A data frame of size #patterns by #variables. It contains #'the weights that were used to calculate the weighted sum scores. The weights #'may differ between patterns and between variables.} #'\item{\code{cont}:}{Logical, whether probabilities are based on continuous logit #'functions or on discrete odds distributions.} #'\item{\code{type}:}{A vector of strings containing the type of missingness #'for each pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or #'\code{"RIGHT"}. The first type refers to the first pattern, the second type #'to the second pattern, etc.} #'\item{\code{odds}:}{A matrix where #patterns defines the #rows. Each row contains #'the odds of being missing for the corresponding pattern. The amount of odds values #'defines in how many quantiles the sum scores were divided. The values are #'relative probabilities: a quantile with odds value 4 will have a probability of #'being missing that is four times higher than a quantile with odds 1. The #'#quantiles may differ between patterns, NA is used for cells remaining empty.} #'\item{\code{amp}:}{A data frame containing the input data with NAs for the #'amputed values.} #'\item{\code{cand}:}{A vector that contains the pattern number for each case. #'A value between 1 and #patterns is given. For example, a case with value 2 is #'candidate for missing data pattern 2.} #'\item{\code{scores}:}{A list containing vectors with weighted sum scores of the #'candidates. The first vector refers to the candidates of the first pattern, the #'second vector refers to the candidates of the second pattern, etc. The length #'of the vectors differ because the number of candidates is different for each #'pattern.} #'\item{\code{data}:}{The complete data set that was entered in \code{ampute}.} #'} #'@note Many of the functions of the \code{mice} package do not use the S4 class #'definitions, and instead rely on the S3 list equivalent #'\code{oldClass(obj) <- "mads"}. #'@author Rianne Schouten, 2016 #'@seealso \code{\link{ampute}}, Vignette titled "Multivariate Amputation using #'Ampute". #'@export setClass("mads", representation( call = "call", prop = "numeric" , patterns = "matrix", freq = "numeric", mech = "character", weights = "matrix", cont = "logical", type = "character", odds = "matrix", amp = "data.frame", cand = "integer", scores = "list", data = "data.frame"), contains = "list" ) mice/R/ampute.discrete.R0000644000176200001440000001162313416657163014626 0ustar liggesusers#'Multivariate Amputation Based On Discrete Probability Functions #' #'This function creates a missing data indicator for each pattern. Odds probabilities #'(Brand, 1999, pp. 110-113) will be induced on the weighted sum scores, calculated earlier #'in the multivariate amputation function \code{\link{ampute}}. #' #'@param P A vector containing the pattern numbers of the cases's candidacies. #'For each case, a value between 1 and #patterns is given. For example, a #'case with value 2 is candidate for missing data pattern 2. #'@param scores A list containing vectors with the candidates's weighted sum scores, #'the result of an underlying function in \code{\link{ampute}}. #'@param prop A scalar specifying the proportion of missingness. Should be a value #'between 0 and 1. Default is a missingness proportion of 0.5. #'@param odds A matrix where #patterns defines the #rows. Each row should contain #'the odds of being missing for the corresponding pattern. The amount of odds values #'defines in how many quantiles the sum scores will be divided. The values are #'relative probabilities: a quantile with odds value 4 will have a probability of #'being missing that is four times higher than a quantile with odds 1. The #'#quantiles may differ between the patterns, specify NA for cells remaining empty. #'Default is 4 quantiles with odds values 1, 2, 3 and 4, the result of #'\code{\link{ampute.default.odds}}. #'@return A list containing vectors with \code{0} if a case should be made missing #'and \code{1} if a case should remain complete. The first vector refers to the #'first pattern, the second vector to the second pattern, etcetera. #'@author Rianne Schouten, 2016 #'@seealso \code{\link{ampute}}, \code{\link{ampute.default.odds}} #'@references Brand, J.P.L. (1999). \emph{Development, implementation and #'evaluation of multiple imputation strategies for the statistical analysis of #'incomplete data sets.} Dissertation. Rotterdam: Erasmus University. #'@keywords internal #'@export ampute.discrete <- function(P, scores, prop, odds) { # Multivariate Amputation Based On Discrete Probability Functions # # This function creates a missing data indicator for each pattern. Odds probabilities # (Brand, 1999, pp. 110-113) will be induced on the weighted sum scores calculated # earlier in the multivariate amputation function ampute(). # R <- vector(mode = "list", length = nrow(odds)) for (i in seq_len(nrow(odds))) { if (scores[[i]][[1]] == 0) { R[[i]] <- 0 } else { # The scores are divided into quantiles # Specify #quantiles by #odds values ng <- length(odds[i, ][!is.na(odds[i, ])]) quantiles <- quantile(scores[[i]], probs = seq.int(0, 1, by = 1 / ng)) if (anyDuplicated(quantiles) || anyNA(quantiles)) { stop("Division of sum scores into quantiles did not succeed. Possibly the sum scores contain too few different observations (in case of categorical or dummy variables). Try using more variables to calculate the sum scores or diminish the number of quantiles in the odds matrix", call. = FALSE) } # For each candidate the quantile number is specified R.temp <- rep.int(NA, length(scores[[i]])) for (k in seq_len(ng)) { R.temp <- replace(R.temp, scores[[i]] >= quantiles[k] & scores[[i]] <= quantiles[k + 1], k) } # For each candidate, a random value between 0 and 1 is compared with the # odds probability of being missing. If random value <= prob, the candidate # will receive missing data indicator 0, meaning he will be made missing # according the pattern; if random value > prob, the candidate will receive # missing data indicator 1, meaning the candidate will remain complete. for (l in seq_len(ng)) { prob <- (ng * prop * odds[i, l]) / sum(odds[i, ], na.rm = TRUE) if (prob >= 1.0) { warning("Combination of odds matrix and desired proportion of missingness results to small quantile groups, probably decreasing the obtained proportion of missingness", call. = FALSE) } gn <- length(R.temp[R.temp == l]) if (gn != 0) { random <- runif(n = gn, min = 0, max = 1) Q <- c() for (m in seq_len(gn)) { if (random[m] <= prob) { Q[m] <- 0 # Candidate will be made missing } else { Q[m] <- 1 # Candidate will be kept complete } } # Give the result to the right candidate R.temp <- replace(R.temp, R.temp == l, Q) } } # Give the result to the right cases in the data R[[i]] <- replace(P, P == (i + 1), R.temp) R[[i]] <- replace(R[[i]], P != (i + 1), 1) } } return(R) }mice/R/nhanes.R0000644000176200001440000000161513416657163013006 0ustar liggesusers#'NHANES example - all variables numerical #' #'A small data set with non-monotone missing values. #' #'A small data set with all numerical variables. The data set \code{nhanes2} is #'the same data set, but with \code{age} and \code{hyp} treated as factors. #' #'@name nhanes #'@docType data #'@format A data frame with 25 observations on the following 4 variables. #'\describe{ #'\item{age}{Age group (1=20-39, 2=40-59, 3=60+)} #'\item{bmi}{Body mass index (kg/m**2)} #'\item{hyp}{Hypertensive (1=no,2=yes)} #'\item{chl}{Total serum cholesterol (mg/dL)} } #'@seealso \code{\link{nhanes2}} #'@source Schafer, J.L. (1997). \emph{Analysis of Incomplete Multivariate #'Data.} London: Chapman & Hall. Table 6.14. #'@keywords datasets #'@examples #' #' #'imp <- mice(nhanes) # create 5 imputed data sets #'complete(imp) # print the first imputed data set #' NULL mice/R/Ampute.R0000644000176200001440000006123613617561132012763 0ustar liggesusers#'Generate Missing Data for Simulation Purposes #' #'This function generates multivariate missing data in a MCAR, MAR or MNAR manner. #'Imputation of data sets containing missing values can be performed with #'\code{\link{mice}}. #' #'When new multiple imputation techniques are tested, missing values need to be #'generated in simulated data sets. The generation of missing values is what #'we call: amputation. The function \code{ampute} is developed to perform any kind #'of amputation desired by the researcher. An extensive example and more explanation #'of the function can be found in the vignette \emph{Generate missing values with #'ampute}, available in \pkg{mice} as well. For imputation, the function #'\code{\link{mice}} is advised. #' #'Until recently, univariate amputation procedures were used to generate missing #'data in complete, simulated data sets. With this approach, variables are made #'incomplete one variable at a time. When several variables need to be amputed, #'the procedure is repeated multiple times. #' #'With this univariate approach, it is difficult to relate the missingness on one #'variable to the missingness on another variable. A multivariate amputation procedure #'solves this issue and moreover, it does justice to the multivariate nature of #'data sets. Hence, \code{ampute} is developed to perform the amputation according #'the researcher's desires. #' #'The idea behind the function is the specification of several missingness #'patterns. Each pattern is a combination of variables with and without missing #'values (denoted by \code{0} and \code{1} respectively). For example, one might #'want to create two missingness patterns on a data set with four variables. The #'patterns could be something like: \code{0, 0, 1, 1} and \code{1, 0, 1, 0}. #'Each combination of zeros and ones may occur. #' #'Furthermore, the researcher specifies the proportion of missingness, either the #'proportion of missing cases or the proportion of missing cells, and the relative #'frequency each pattern occurs. Consequently, the data is divided over the #'patterns with these probabilities. Now, each case is candidate for a certain #'missingness pattern, but whether the case will have missing values eventually, #'depends on other specifications. #' #'The first of these specifications is the missing mechanism. There are three possible #'mechanisms: the missingness depends completely on chance (MCAR), the missingness #'depends on the values of the observed variables (i.e. the variables that remain #'complete) (MAR) or on the values of the variables that will be made incomplete (MNAR). #'For a more thorough explanation of these definitions, I refer to Van Buuren #'(2012). #' #'When the user sets the missingness mechanism to \code{"MCAR"}, the candidates #'have an equal probability of having missing values. No other specifications #'have to be made. For a \code{"MAR"} or \code{"MNAR"} mechanism, weighted sum #'scores are calculated. These scores are a linear combination of the #'variables. #' #'In order to calculate the weighted sum scores, the data is standardized. That #'is the reason the data has to be numeric. Second, for each case, the values in #'the data set are multiplied with the weights, specified by argument \code{weights}. #'These weighted scores will be summed, resulting in a weighted sum score for each case. #' #'The weights may differ between patterns and they may be negative or zero as well. #'Naturally, in case of a \code{MAR} mechanism, the weights corresponding to the #'variables that will be made incomplete, have a \code{0}. Note that this might be #'different for each pattern. In case of \code{MNAR} missingness, especially #'the weights of the variables that will be made incomplete are of importance. However, #'the other variables might be weighted as well. #' #'It is the relative difference between the weights that will result in an effect #'in the sum scores. For example, for the first missing data #'pattern mentioned above, the weights for the third and fourth variables might #'be set to 2 and 4. However, weight values of 0.2 and 0.4 will have the exact #'same effect on the weighted sum score: the fourth variable is weighted twice as #'much as variable 3. #' #'Based on the weighted sum scores, either a discrete or continuous distribution #'of probabilities is used to calculate whether a candidate will have missing values. #' #'For a discrete distribution of probabilities, the weighted sum scores are #'divided into subgroups of equal size (quantiles). Thereafter, the user #'specifies for each subgroup the odds of being missing. Both the number of #'subgroups and the odds values are important for the generation of missing data. #'For example, for a RIGHT-like mechanism, scoring in one of the #'higher quantiles should have high missingness odds, whereas for a MID-like #'mechanism, the central groups should have higher odds. Again, not the size of #'the odds values are of importance, but the relative distance between the values. #' #'The continuous distributions of probabilities are based on the logit function, as #'described by Van Buuren (2012). The user can specify the type of missingness, #'which, again, may differ between patterns. #' #'For an extensive example of the working of the function, I gladly refer to the #'vignette \emph{Generate missing values with ampute}. #' #'@param data A complete data matrix or dataframe. Values should be numeric. #'Categorical variables should have been transformed into dummies. #'@param prop A scalar specifying the proportion of missingness. Should be a value #'between 0 and 1. Default is a missingness proportion of 0.5. #'@param patterns A matrix or data frame of size #patterns by #variables where #'\code{0} indicates a variable should have missing values and \code{1} indicates #'a variable should remain complete. The user may specify as many patterns as #'desired. One pattern (a vector) or double patterns are possible as well. Default #'is a square matrix of size #variables where each pattern has missingness on one #'variable only (created with \code{\link{ampute.default.patterns}}). After the #'amputation procedure, \code{\link{md.pattern}} can be used to investigate the #'missing data patterns in the data. #'@param freq A vector of length #patterns containing the relative frequency with #'which the patterns should occur. For example, for three missing data patterns, #'the vector could be \code{c(0.4, 0.4, 0.2)}, meaning that of all cases with #'missing values, 40 percent should have pattern 1, 40 percent pattern 2 and 20 #'percent pattern 3. The vector should sum to 1. Default is an equal probability #'for each pattern, created with \code{\link{ampute.default.freq}}. #'@param mech A string specifying the missingness mechanism, either MCAR #'(Missing Completely At Random), MAR (Missing At Random) or MNAR (Missing Not At #'Random). Default is a MAR missingness mechanism. #'@param weights A matrix or data frame of size #patterns by #variables. The matrix #'contains the weights that will be used to calculate the weighted sum scores. For #'a MAR mechanism, weights of the variables that will be made incomplete, should be #'zero. For a MNAR mechanism, these weights might have any possible value. Furthermore, #'the weights may differ between patterns and between variables. They may be negative #'as well. Within each pattern, the relative size of the values are of importance. #'The default weights matrix is made with \code{\link{ampute.default.weights}} and #'returns a matrix with equal weights for all variables. In case of MAR, variables #'that will be amputed will be weighted with \code{0}. If it is MNAR, variables #'that will be observed will be weighted with \code{0}. If mechanism is MCAR, the #'weights matrix will not be used. #'@param std Logical. Whether the weighted sum scores should be calculated with #'standardized data or with non-standardized data. The latter is advised when #'making use of train and testsets in order to prevent leakage. #'@param cont Logical. Whether the probabilities should be based on a continuous #'or discrete distribution. If TRUE, the probabilities of being missing are based #'on a continuous logistic distribution function. \code{\link{ampute.continuous}} #'will be used to calculate and assign the probabilities. These will be based on #'argument \code{type}. If FALSE, the probabilities of being missing are based on #'a discrete distribution (\code{\link{ampute.discrete}}) based on the \code{odds} #'argument. #'Default is TRUE. #'@param type A vector of strings containing the type of missingness for each #'pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or '\code{"RIGHT"}. #'If a single missingness type is entered, all patterns will be created by the same #'type. If missingness types should differ over patterns, a vector of missingness #'types should be entered. Default is RIGHT for all patterns and is the result of #'\code{\link{ampute.default.type}}. #'@param odds A matrix where #patterns defines the #rows. Each row should contain #'the odds of being missing for the corresponding pattern. The amount of odds values #'defines in how many quantiles the sum scores will be divided. The values are #'relative probabilities: a quantile with odds value 4 will have a probability of #'being missing that is four times higher than a quantile with odds 1. The #'#quantiles may differ between the patterns, specify NA for cells remaining empty. #'Default is 4 quantiles with odds values 1, 2, 3 and 4, the result of #'\code{\link{ampute.default.odds}}. #'@param bycases Logical. If TRUE, the proportion of missingness is defined in #'terms of cases. If FALSE, the proportion of missingness is defined in terms of #'cells. Default is TRUE. #'@param run Logical. If TRUE, the amputations are implemented. If FALSE, the #'return object will contain everything but the amputed data set. #' #'@return Returns an S3 object of class \code{\link{mads-class}} (multivariate #'amputed data set) #'@author Rianne Schouten [aut, cre], Gerko Vink [aut], Peter Lugtig [ctb], 2016 #'@seealso \code{\link{mads-class}}, \code{\link{bwplot}}, \code{\link{xyplot}}, #'\code{\link{mice}} #' #'@references Brand, J.P.L. (1999). \emph{Development, implementation and #'evaluation of multiple imputation strategies for the statistical analysis of #'incomplete data sets} (pp. 110-113). Dissertation. Rotterdam: Erasmus University. #' #'Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn, C.G.M., Rubin, D.B. (2006). #'Fully conditional specification in multivariate imputation. \emph{Journal of #'Statistical Computation and Simulation}, 76\emph{(12)}, Appendix B. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-linearnormal.html#sec:generateuni}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Boca Raton, FL.: Chapman & Hall/CRC Press. #' #'Vink, G. (2016). Towards a standardized evaluation of multiple imputation #'routines. #' #'@examples #'# Simulate data set with \code{mvrnorm} from package \code{\pkg{MASS}}. #'sigma <- matrix(data = c(1, 0.2, 0.2, 0.2, 1, 0.2, 0.2, 0.2, 1), nrow = 3) #'complete.data <- MASS::mvrnorm(n = 100, mu = c(5, 5, 5), Sigma = sigma) #'# Perform quick amputation #'result1 <- ampute(data = complete.data) #'# Change default matrices as desired #'patterns <- result1$patterns #'patterns[1:3, 2] <- 0 #'odds <- result1$odds #'odds[2,3:4] <- c(2, 4) #'odds[3,] <- c(3, 1, NA, NA) #'# Rerun amputation #'result2 <- ampute(data = complete.data, patterns = patterns, freq = #'c(0.3, 0.3, 0.4), cont = FALSE, odds = odds) #'# Run an amputation procedure with continuous probabilities #'result3 <- ampute(data = complete.data, type = c("RIGHT", "TAIL", "LEFT")) #' #'@export ampute <- function(data, prop = 0.5, patterns = NULL, freq = NULL, mech = "MAR", weights = NULL, std = TRUE, cont = TRUE, type = NULL, odds = NULL, bycases = TRUE, run = TRUE) { # Generate Missing Data for Simulation Purposes # # This function generates multivariate missing data in a MCAR, MAR or MNAR manner. # The details section gives a concise explanation of the why and how of this function. # ------------------------ sum.scores ----------------------------------- # sum.scores <- function(P, data, std, weights) { # This is an underlying function of multivariate amputation function ampute(). # This function is used to calculate the weighted sum scores of the candidates. # Based on the data, the weights matrix and the kind of mechanism, each case # will obtain a certain score that will define his probability to be made missing. # The calculation of the probabilities occur in the function ampute.mcar(), # ampute.continuous() or ampute.discrete(), based on the kind of missingness. weights <- as.matrix(weights) f <- function(i) { if (length(P[P == (i + 1)]) == 0) { return(0) } else { candidates <- as.matrix(data[P == (i + 1), ]) # For each candidate in the pattern, a weighted sum score is calculated if (std) { length_unique <- function(x) { return(length(unique(x)) == 1) } # shangzhi-hong, Feb 2020, #216 if (nrow(candidates) > 1 && !(any(apply(candidates, 2, length_unique)))) { candidates <- scale(candidates) } } scores <- apply(candidates, 1, function(x) weights[i, ] %*% x) if (length(scores) > 1 && length(unique(scores)) != 1) { scores <- scale(scores) } return(scores) } } scores <- lapply(seq_len(nrow(patterns)), f) return(scores) } # # ------------------------ recalculate.prop ----------------------------- # recalculate.prop <- function(prop, n, patterns, freq) { # This is an underlying function of multivariate amputation function ampute(). # The function recalculates the proportion of missing cases for the desired # #missing cells. miss <- prop * n^2 # Desired #missing cells # Calculate #cases according prop and #zeros in patterns cases <- vapply(seq_len(nrow(patterns)), function(i) (miss * freq[i]) / length(patterns[i,][patterns[i,] == 0]), numeric(1)) if (sum(cases) > n) { stop("Proportion of missing cells is too large in combination with the desired number of missing variables", call. = FALSE) } else { prop <- sum(cases) / n } return(prop) } # # -------------------------- recalculate.freq ----------------------------- # recalculate.freq <- function(freq) { # This is an underlying function of multivariate amputation function ampute(). # The function recalculates the frequency vector to make the sum equal to 1. return(freq / sum(freq)) } # # ------------------------- check.patterns --------------------------------- # check.patterns <- function(patterns, freq, prop) { # This is an underlying function of multivariate amputation function ampute(). # The function checks whether there are patterns with merely ones or zeroos. # In case of the first, these patterns will be removed, and argument prop # and freq will be changed. In case there is a pattern with merely zeroos, # this is ascertained and saved in the object row.zero. prop.one <- 0 row.one <- c() for (h in seq_len(nrow(patterns))) { if (any(!patterns[h, ] %in% c(0, 1))) { stop(paste("Argument patterns can only contain 0 and 1, pattern", h, "contains another element"), call. = FALSE) } if (all(patterns[h, ] %in% 1)) { prop.one <- prop.one + freq[h] row.one <- c(row.one, h) } } if (prop.one != 0) { warning(paste("Proportion of missingness has changed from", prop, "to", prop.one, "because of pattern(s) with merely ones"), call. = FALSE) prop <- prop.one freq <- freq[-row.one] freq <- recalculate.freq(freq) patterns <- patterns[-row.one, ] warning("Frequency vector and patterns matrix have changed because of pattern(s) with merely ones", call. = FALSE) } prop.zero <- 0 row.zero <- c() for (h in seq_len(nrow(patterns))) { if (all(patterns[h, ] %in% 0)) { prop.zero <- prop.zero + freq[h] row.zero <- c(row.zero, h) } } objects = list(patterns = patterns, prop = prop, freq = freq, row.zero = row.zero) return(objects) } # # ------------------------ AMPUTE ------------------------------------------ # if (is.null(data)) { stop("Argument data is missing, with no default", call. = FALSE) } data <- check.dataform(data) if (anyNA(data)) { stop("Data cannot contain NAs", call. = FALSE) } if (ncol(data) < 2) { stop("Data should contain at least two columns", call. = FALSE) } data <- data.frame(data) if (any(vapply(data, Negate(is.numeric), logical(1))) && mech != "MCAR") { data <- as.data.frame(sapply(data, as.numeric)) warning("Data is made numeric because the calculation of weights requires numeric data", call. = FALSE) } if (prop < 0 || prop > 100) { stop("Proportion of missingness should be a value between 0 and 1 (for a proportion) or between 1 and 100 (for a percentage)", call. = FALSE) } else if (prop > 1) { prop <- prop / 100 } if (is.null(patterns)) { patterns <- ampute.default.patterns(n = ncol(data)) } else if (is.vector(patterns) && (length(patterns) / ncol(data)) %% 1 == 0) { patterns <- matrix(patterns, length(patterns) / ncol(data), byrow = TRUE) if (nrow(patterns) == 1 && all(patterns[1, ] %in% 1)) { stop("One pattern with merely ones results to no amputation at all, the procedure is therefore stopped", call. = FALSE) } } else if (is.vector(patterns)) { stop("Length of pattern vector does not match #variables", call. = FALSE) } patterns <- data.frame(patterns) if (is.null(freq)) { freq <- ampute.default.freq(patterns = patterns) } if (!is.vector(freq)) { freq <- as.vector(freq) warning("Frequency should be a vector", call. = FALSE) } if (length(freq) != nrow(patterns)) { if (length(freq) > nrow(patterns)) { freq <- freq[seq_along(nrow(patterns))] } else { freq <- c(freq, rep.int(0.2, nrow(patterns) - length(freq))) } warning(paste("Length of vector with relative frequencies does not match #patterns and is therefore changed to", freq), call. = FALSE) } if (sum(freq) != 1) { freq <- recalculate.freq(freq = freq) } if (!bycases) { prop <- recalculate.prop(prop = prop, freq = freq, patterns = patterns, n = ncol(data)) } check.pat <- check.patterns(patterns = patterns, freq = freq, prop = prop) patterns.new <- check.pat[["patterns"]] freq <- check.pat[["freq"]] prop <- check.pat[["prop"]] if (any(!mech %in% c("MCAR", "MAR", "MNAR"))) { stop("Mechanism should be either MCAR, MAR or MNAR", call. = FALSE) } if (!is.vector(mech)) { mech <- as.vector(mech) warning("Mechanism should contain merely MCAR, MAR or MNAR", call. = FALSE) } else if (length(mech) > 1) { mech <- mech[1] warning("Mechanism should contain merely MCAR, MAR or MNAR. First element is used", call. = FALSE) } # Check if there is a pattern with merely zeroos if (!is.null(check.pat[["row.zero"]]) && mech == "MAR") { stop(paste("Patterns object contains merely zeros and this kind of pattern is not possible when mechanism is MAR"), call. = FALSE) } if (mech == "MCAR" && !is.null(weights)) { weights = NULL warning("Weights matrix is not used when mechanism is MCAR", call. = FALSE) } if (mech == "MCAR" && !is.null(odds)) { odds = NULL warning("Odds matrix is not used when mechanism is MCAR", call. = FALSE) } if (mech != "MCAR" && !is.null(weights)) { if (is.vector(weights) && (length(weights) / ncol(data)) %% 1 == 0) { weights <- matrix(weights, length(weights) / ncol(data), byrow = TRUE) } else if (is.vector(weights)) { stop("Length of weight vector does not match #variables", call. = FALSE) } else if (!is.matrix(weights) && !is.data.frame(weights)) { stop("Weights matrix should be a matrix", call. = FALSE) } } if (is.null(weights)) { weights <- ampute.default.weights(patterns = patterns.new, mech = mech) } weights <- as.data.frame(weights) if (!nrow(weights) %in% c(nrow(patterns), nrow(patterns.new))) { stop("The objects patterns and weights are not matching", call. = FALSE) } if (!is.vector(cont)) { cont <- as.vector(cont) warning("Continuous should contain merely TRUE or FALSE", call. = FALSE) } else if (length(cont) > 1) { cont <- cont[1] warning("Continuous should contain merely TRUE or FALSE. First element is used", call. = FALSE) } if (!is.logical(cont)) { stop("Continuous should contain TRUE or FALSE", call. = FALSE) } if (cont && !is.null(odds)) { odds = NULL warning("Odds matrix is not used when continuous probabilities (cont == TRUE) are specified", call. = FALSE) } if (!cont && !is.null(type)) { type = NULL warning("Type is not used when discrete probabilities (cont == FALSE) are specified", call. = FALSE) } if (is.null(type)) { type <- ampute.default.type(patterns = patterns.new) } if (any(!type %in% c("LEFT","MID","TAIL","RIGHT"))) { stop("Type should contain LEFT, MID, TAIL or RIGHT", call. = FALSE) } if (!is.vector(type)) { type <- as.vector(type) warning("Type should be a vector of strings", call. = FALSE) } else if (!length(type) %in% c(1, nrow(patterns), nrow(patterns.new))) { type <- type[1] warning("Type should either have length 1 or length equal to #patterns, first element is used for all patterns", call. = FALSE) } if (mech != "MCAR" && !is.null(odds) && !is.matrix(odds)) { if (nrow(patterns.new) == 1 && is.vector(odds)) { odds <- matrix(odds, nrow = 1) } else { stop("Odds matrix should be a matrix", call. = FALSE) } } if (is.null(odds)) { odds <- ampute.default.odds(patterns = patterns.new) } if (!cont) { for (h in seq_len(nrow(odds))) { if(any(!is.na(odds[h, ]) & odds[h, ] < 0)) { stop("Odds matrix can only have positive values", call. = FALSE) } } } if (!nrow(odds) %in% c(nrow(patterns), nrow(patterns.new))) { stop("The objects patterns and odds are not matching", call. = FALSE) } # # Start using arguments # Create empty objects P <- NULL scores <- NULL missing.data <- NULL # Apply function (run = TRUE) or merely return objects (run = FALSE) if (run) { # Assign cases to the patterns according probs # Because 0 and 1 will be used for missingness, # the numbering of the patterns will start from 2 P <- sample.int(n = nrow(patterns.new), size = nrow(data), replace = TRUE, prob = freq) + 1 # Calculate missingness according MCAR or calculate weighted sum scores # Standardized data is used to calculate weighted sum scores if (mech == "MCAR") { R <- ampute.mcar(P = P, patterns = patterns.new, prop = prop) } else { scores <- sum.scores(P = P, data = data, std = std, weights = weights) if (!cont) { R <- ampute.discrete(P = P, scores = scores, odds = odds, prop = prop) } else if (cont) { R <- ampute.continuous(P = P, scores = scores, prop = round(prop, 3), type = type) } } missing.data <- data for (i in seq_len(nrow(patterns.new))) { if (any(P == (i + 1))) { missing.data[R[[i]] == 0, patterns.new[i, ] == 0] <- NA } } } # # Create return object names(patterns.new) <- names(data) names(weights) <- names(data) call <- match.call() missing.data <- data.frame(missing.data) result <- list(call = call, prop = prop, patterns = patterns.new, freq = freq, mech = mech, weights = weights, cont = cont, std = std, type = type, odds = odds, amp = missing.data, cand = P - 1, scores = scores, data = as.data.frame(data)) # # Return result oldClass(result) <- "mads" return(result) } mice/R/mids2spss.R0000644000176200001440000001441613416664706013465 0ustar liggesusers#'Export \code{mids} object to SPSS #' #'Converts a \code{mids} object into a format recognized by SPSS, and writes #'the data and the SPSS syntax files. #' #'This function automates most of the work needed to export a \code{mids} #'object to SPSS. It uses a modified version of \code{writeForeignSPSS()} from #'the \code{foreign} package. The modified version allows for a choice of the #'field and decimal separators, and makes some improvements to the formatting, #'so that the generated syntax file is amenable to the \code{INCLUDE} statement #'in SPSS. #' #'Below are some things to pay attention to. #' #'The \code{SPSS} syntax file has the proper file names and separators set, so #'in principle it should run and read the data without alteration. \code{SPSS} #'is more strict than \code{R} with respect to the paths. Always use the full #'path, otherwise \code{SPSS} may not be able to find the data file. #' #'Factors in \code{R} translate into categorical variables in \code{SPSS}. The #'internal coding of factor levels used in \code{R} is exported. This is #'generally acceptable for \code{SPSS}. However, when the data are to be #'combined with existing \code{SPSS} data, watch out for any changes in the #'factor levels codes. The \code{read.spss()} in package \code{foreign} for #'reading \code{.sav} uses its own internal numbering scheme \code{1,2,3,...} #'for the levels of a factor. Consequently, changes in factor code can cause #'discrepancies in factor level when re-imported to \code{SPSS}. The solution #'is to manually recode the factor level in \code{SPSS}. #' #'\code{SPSS} will recognize the data set as a multiply imputed data set, and #'do automatic pooling in procedures where that is supported. Note however that #'pooling is an extra option only available to those who license the #'\code{MISSING VALUES} module. Without this license, \code{SPSS} will still #'recognize the structure of the data, but not do any pooling. #' #'@param imp The \code{imp} argument is an object of class \code{mids}, #'typically produced by the \code{mice()} function. #'@param filedat A character string describing the name of the output data #'file. #'@param filesps A character string describing the name of the output syntax #'file. #'@param path A character string containing the path of the output file. The #'value in \code{path} is appended to \code{filedat} and \code{filesps}. By #'default, files are written to the current \code{R} working directory. If #'\code{path=NULL} then no file path appending is done. #'@param sep The separator between the data fields. #'@param dec The decimal separator for numerical data. #'@param silent A logical flag stating whether the names of the files should be #'printed. #'@return The return value is \code{NULL}. #'@author Stef van Buuren, dec 2010. #'@seealso \code{\link[=mids-class]{mids}} #'@keywords manip #'@export mids2spss <- function(imp, filedat="midsdata.txt", filesps="readmids.sps", path=getwd(), sep="\t", dec=".", silent=FALSE) { miceWriteForeignSPSS <- function (df, datafile, codefile, varnames = NULL, dec=".", sep="\t") { ##adapted version of writeForeignSPSS from foreign package to write mids-objects adQuote <- function (x) paste0("\"", x, "\"") dfn <- lapply(df, function(x) if (is.factor(x)) as.numeric(x) else x) eol <- paste0(sep,"\n") write.table(dfn, file = datafile, row.names = FALSE, col.names = FALSE, sep = sep, dec = dec, quote = FALSE, na = "", eol=eol) varlabels <- names(df) if (is.null(varnames)) { varnames <- abbreviate(names(df), 8L) if (any(nchar(varnames) > 8L)) stop("I cannot abbreviate the variable names to eight or fewer letters") if (any(varnames != varlabels)) warning("some variable names were abbreviated") } varnames <- gsub("[^[:alnum:]_\\$@#]", "\\.", varnames) dl.varnames <- varnames if (any(chv <- vapply(df, is.character, logical(1)))) { lengths <- vapply(df[chv], function(v) max(nchar(v)), numeric(1)) if (any(lengths > 255L)) stop("Cannot handle character variables longer than 255") lengths <- paste0("(A", lengths, ")") star <- ifelse(c(FALSE, diff(which(chv) > 1L)), " *", " ") dl.varnames[chv] <- paste(star, dl.varnames[chv], lengths) } if (sep=="\t") freefield <- " free (TAB)\n" if (sep!="\t") freefield <- cat(' free (\"',sep,'\")\n',sep="") cat("DATA LIST FILE=", adQuote(datafile), freefield, file = codefile) cat(" /", dl.varnames, ".\n\n", file = codefile, append = TRUE, fill=60, labels=" ") cat("VARIABLE LABELS\n", file = codefile, append = TRUE) cat(" ",paste(varnames, adQuote(varlabels), "\n"), ".\n", file = codefile, append = TRUE) factors <- vapply(df, is.factor, logical(1)) if (any(factors)) { cat("\nVALUE LABELS\n", file = codefile, append = TRUE) for (v in which(factors)) { cat(" /", varnames[v], "\n", file = codefile, append = TRUE) levs <- levels(df[[v]]) for (u in seq_along(levs)) cat(paste(" ",seq_along(levs)[u], adQuote(levs)[u], sep = " "), file = codefile, append = TRUE, fill=60) } cat(" .\n", file = codefile, append = TRUE) } cat("\nEXECUTE.\n", file = codefile, append = TRUE) cat("SORT CASES by Imputation_.\n", file = codefile, append = TRUE) cat("SPLIT FILE layered by Imputation_.\n", file = codefile, append=TRUE) } if(!is.mids(imp)) stop("Exports only objects of class 'mids'.") imputed <- complete(imp, "long", include=TRUE)[,-2] names(imputed)[1] <- "Imputation_" if (!is.null(path)) { filedat <- file.path(path,filedat) filesps <- file.path(path,filesps) } miceWriteForeignSPSS(imputed, filedat, filesps, varnames=names(imputed),sep=sep,dec=dec) if (!silent) { cat("Data values written to",filedat,"\n") cat("Syntax file written to",filesps,"\n") } } mice/R/densityplot.R0000644000176200001440000003152313416657163014111 0ustar liggesusers#'Density plot of observed and imputed data #' #'Plotting methods for imputed data using \pkg{lattice}. \code{densityplot} #'produces plots of the densities. The function #'automatically separates the observed and imputed data. The #'functions extend the usual features of \pkg{lattice}. #' #'The argument \code{na.groups} may be used to specify (combinations of) #'missingness in any of the variables. The argument \code{groups} can be used #'to specify groups based on the variable values themselves. Only one of both #'may be active at the same time. When both are specified, \code{na.groups} #'takes precedence over \code{groups}. #' #'Use the \code{subset} and \code{na.groups} together to plots parts of the #'data. For example, select the first imputed data set by by #'\code{subset=.imp==1}. #' #'Graphical parameters like \code{col}, \code{pch} and \code{cex} can be #'specified in the arguments list to alter the plotting symbols. If #'\code{length(col)==2}, the color specification to define the observed and #'missing groups. \code{col[1]} is the color of the 'observed' data, #'\code{col[2]} is the color of the missing or imputed data. A convenient color #'choice is \code{col=mdc(1:2)}, a transparent blue color for the observed #'data, and a transparent red color for the imputed data. A good choice is #'\code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the #'duration of the session by running \code{mice.theme()}. #' #'@aliases densityplot #'@param x A \code{mids} object, typically created by \code{mice()} or #'\code{mice.mids()}. #'@param data Formula that selects the data to be plotted. This argument #'follows the \pkg{lattice} rules for \emph{formulas}, describing the primary #'variables (used for the per-panel display) and the optional conditioning #'variables (which define the subsets plotted in different panels) to be used #'in the plot. #' #'The formula is evaluated on the complete data set in the \code{long} form. #'Legal variable names for the formula include \code{names(x$data)} plus the #'two administrative factors \code{.imp} and \code{.id}. #' #'\bold{Extended formula interface:} The primary variable terms (both the LHS #'\code{y} and RHS \code{x}) may consist of multiple terms separated by a #'\sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be #'taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and #'\code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in #'\emph{separate panels}. This behavior differs from standard \pkg{lattice}. #'\emph{Only combine terms of the same type}, i.e. only factors or only #'numerical variables. Mixing numerical and categorical data occasionally #'produces odds labeling of vertical axis. #' #'The function \code{densityplot} does not use the \code{y} terms in the #'formula. Density plots for \code{x1} and \code{x2} are requested as \code{~ #'x1 + x2}. #'@param na.groups An expression evaluating to a logical vector indicating #'which two groups are distinguished (e.g. using different colors) in the #'display. The environment in which this expression is evaluated in the #'response indicator \code{is.na(x$data)}. #' #'The default \code{na.group = NULL} contrasts the observed and missing data #'in the LHS \code{y} variable of the display, i.e. groups created by #'\code{is.na(y)}. The expression \code{y} creates the groups according to #'\code{is.na(y)}. The expression \code{y1 & y2} creates groups by #'\code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as #'\code{is.na(y1) | is.na(y2)}, and so on. #'@param groups This is the usual \code{groups} arguments in \pkg{lattice}. It #'differs from \code{na.groups} because it evaluates in the completed data #'\code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas #'\code{na.groups} evaluates in the response indicator. See #'\code{\link{xyplot}} for more details. When both \code{na.groups} and #'\code{groups} are specified, \code{na.groups} takes precedence, and #'\code{groups} is ignored. #'@param plot.points A logical used in \code{densityplot} that signals whether #'the points should be plotted. #'@param theme A named list containing the graphical parameters. The default #'function \code{mice.theme} produces a short list of default colors, line #'width, and so on. The extensive list may be obtained from #'\code{trellis.par.get()}. Global graphical parameters like \code{col} or #'\code{cex} in high-level calls are still honored, so first experiment with #'the global parameters. Many setting consists of a pair. For example, #'\code{mice.theme} defines two symbol colors. The first is for the observed #'data, the second for the imputed data. The theme settings only exist during #'the call, and do not affect the trellis graphical parameters. #'@param mayreplicate A logical indicating whether color, line widths, and so #'on, may be replicated. The graphical functions attempt to choose #'"intelligent" graphical parameters. For example, the same color can be #'replicated for different element, e.g. use all reds for the imputed data. #'Replication may be switched off by setting the flag to \code{FALSE}, in order #'to allow the user to gain full control. #'@param thicker Used in \code{densityplot}. Multiplication factor of the line #'width of the observed density. \code{thicker=1} uses the same thickness for #'the observed and imputed data. #'@param as.table See \code{\link[lattice:xyplot]{xyplot}}. #'@param panel See \code{\link{xyplot}}. #'@param default.prepanel See \code{\link[lattice:xyplot]{xyplot}}. #'@param outer See \code{\link[lattice:xyplot]{xyplot}}. #'@param allow.multiple See \code{\link[lattice:xyplot]{xyplot}}. #'@param drop.unused.levels See \code{\link[lattice:xyplot]{xyplot}}. #'@param subscripts See \code{\link[lattice:xyplot]{xyplot}}. #'@param subset See \code{\link[lattice:xyplot]{xyplot}}. #'@param \dots Further arguments, usually not directly processed by the #'high-level functions documented here, but instead passed on to other #'functions. #'@return The high-level functions documented here, as well as other high-level #'Lattice functions, return an object of class \code{"trellis"}. The #'\code{\link[lattice:update.trellis]{update}} method can be used to #'subsequently update components of the object, and the #'\code{\link[lattice:print.trellis]{print}} method (usually called by default) #'will plot it on an appropriate plotting device. #'@note The first two arguments (\code{x} and \code{data}) are reversed #'compared to the standard Trellis syntax implemented in \pkg{lattice}. This #'reversal was necessary in order to benefit from automatic method dispatch. #' #'In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas #'in \pkg{lattice} the argument \code{x} is always a formula. #' #'In \pkg{mice} the argument \code{data} is always a formula object, whereas in #'\pkg{lattice} the argument \code{data} is usually a data frame. #' #'All other arguments have identical interpretation. #' #'\code{densityplot} errs on empty groups, which occurs if all observations in #'the subgroup contain \code{NA}. The relevant error message is: \code{Error in #'density.default: ... need at least 2 points to select a bandwidth #'automatically}. There is yet no workaround for this problem. Use the more #'robust \code{bwplot} or \code{stripplot} as a replacement. #'@author Stef van Buuren #'@seealso \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{stripplot}}, #'\code{\link{bwplot}}, \code{\link{lattice}} for an overview of the #'package, as well as \code{\link[lattice:densityplot]{densityplot}}, #'\code{\link[lattice:panel.densityplot]{panel.densityplot}}, #'\code{\link[lattice:print.trellis]{print.trellis}}, #'\code{\link[lattice:trellis.par.set]{trellis.par.set}} #'@references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #'Visualization with R}, Springer. #' #'van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #'Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #'Software}, \bold{45}(3), 1-67. \url{https://www.jstatsoft.org/v45/i03/} #'@keywords hplot #'@examples #'imp <- mice(boys, maxit=1) #' #'### density plot of head circumference per imputation #'### blue is observed, red is imputed #'densityplot(imp, ~hc|.imp) #' #'### All combined in one panel. #'densityplot(imp, ~hc) #' #'@export densityplot.mids <- function(x, data, na.groups = NULL, groups = NULL, as.table = TRUE, plot.points = FALSE, theme = mice.theme(), mayreplicate = TRUE, thicker = 2.5, allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), panel = lattice::lattice.getOption("panel.densityplot"), default.prepanel = lattice::lattice.getOption("prepanel.default.densityplot"), ..., subscripts = TRUE, subset = TRUE) { call <- match.call() if (!is.mids(x)) stop("Argument 'x' must be a 'mids' object") ## unpack data and response indicator cd <- data.frame(complete(x, "long", include=TRUE)) r <- as.data.frame(is.na(x$data)) ## evaluate na.group in response indicator nagp <- eval(expr=substitute(na.groups), envir=r, enclos=parent.frame()) if (is.expression(nagp)) nagp <- eval(expr=nagp, envir=r, enclos=parent.frame()) ## evaluate groups in imputed data ngp <- eval(expr=substitute(groups), envir=cd, enclos=parent.frame()) if (is.expression(ngp)) ngp <- eval(expr=ngp, envir=cd, enclos=parent.frame()) groups <- ngp ## evaluate subset in imputed data ss <- eval(expr=substitute(subset), envir=cd, enclos=parent.frame()) if (is.expression(ss)) ss <- eval(expr=ss, envir=cd, enclos=parent.frame()) subset <- ss ## evaluate further arguments before parsing dots <- list(...) args <- list(panel = panel, default.prepanel = default.prepanel, allow.multiple = allow.multiple, outer = outer, drop.unused.levels = drop.unused.levels, subscripts = subscripts, as.table = as.table, plot.points = plot.points) ## create formula if not given (in call$data !) vnames <- names(cd)[-seq_len(2)] allfactors <- vapply(cd, is.factor, logical(1))[-seq_len(2)] if (missing(data)) { vnames <- vnames[!allfactors & x$nmis>2 & x$nmis < nrow(x$data)-1] formula <- as.formula(paste("~",paste(vnames,collapse="+",sep=""),sep="")) } else formula <- data ## determine the y-variables form <- lattice::latticeParseFormula(model=formula, data=cd, subset = subset, groups = groups, multiple = allow.multiple, outer = outer, subscripts = TRUE, drop = drop.unused.levels) xnames <- unlist(lapply(strsplit(form$right.name," \\+ "), rm.whitespace)) ## Jul2011 ## calculate selection vector gp nona <- is.null(call$na.groups) if (!is.null(call$groups) && nona) gp <- call$groups else { if (nona) { ## na.df <- r[, xnames, drop=FALSE] ## imp0 <- rep(cd$.imp==0, x$m+1) ## ss <- rep(subset, x$m+1) ## gp <- unlist(lapply(na.df, rep, x$m+1)) ## gp[imp0] <- !gp[imp0] ## call$subset <- ss & gp for (i in seq_along(xnames)) { xvar <- xnames[i] select <- cd$.imp!=0 & !r[,xvar] cd[select, xvar] <- NA } gp <- rep.int(cd$.imp, length(xnames)) } else { for (i in seq_along(xnames)) { xvar <- xnames[i] select <- cd$.imp!=0 & !nagp cd[select, xvar] <- NA } gp <- rep.int(cd$.imp, length(xnames)) } } ## replicate color 2 if group=.imp is part of xnames mustreplicate <- !(!is.null(call$groups) && nona) && mayreplicate if (mustreplicate) { theme$superpose.line$col <- rep(theme$superpose.line$col[seq_len(2)], c(1,x$m)) theme$superpose.line$lwd <- rep(c(theme$superpose.line$lwd[1]*thicker, theme$superpose.line$lwd[1]),c(1,x$m)) theme$superpose.symbol$col <- rep(theme$superpose.symbol$col[seq_len(2)], c(1,x$m)) theme$superpose.symbol$pch <- c(NA,49:(49+x$m-1)) } ## change axis defaults of extended formula interface if (is.null(call$xlab)) { args$xlab <- "" if (length(xnames)==1) args$xlab <- xnames } if (is.null(call$scales)) { args$scales <- list() if (length(xnames)>1) args$scales <- list(x=list(relation="free"), y=list(relation="free")) } ## ready args <- c(x=formula, data=list(cd), groups=list(gp), args, dots, subset=call$subset) ## go tp <- do.call("densityplot", args) tp <- update(tp, par.settings = theme) return(tp) } mice/R/internal.R0000644000176200001440000000742413416664706013353 0ustar liggesusers keep.in.model <- function(y, ry, x, wy) (complete.cases(y, x) & ry) | (complete.cases(x) & wy) impute.with.na <- function(x, wy) !complete.cases(x) & wy check.df <- function(x, y, ry) { # if needed, writes the df warning message to the log df <- sum(ry) - ncol(x) - 1 mess <- paste("df set to 1. # observed cases:", sum(ry), " # predictors:", ncol(x) + 1) if (df < 1 && sum(ry) > 0) updateLog(out = mess, frame = 4) } remove.lindep <- function(x, y, ry, eps = 1e-04, maxcor = 0.99, allow.na = TRUE, frame = 4, ...) { # returns a logical vector of length ncol(x) if (ncol(x) == 0) return(NULL) if (eps <= 0) stop("\n Argument 'eps' must be positive.") # Keep all predictors if we allow imputation of fully missing y if (allow.na && sum(ry) == 0) return(rep.int(TRUE, ncol(x))) xobs <- x[ry, , drop = FALSE] yobs <- as.numeric(y[ry]) if (var(yobs) < eps) return(rep(FALSE, ncol(xobs))) keep <- unlist(apply(xobs, 2, var) > eps) keep[is.na(keep)] <- FALSE highcor <- suppressWarnings(unlist(apply(xobs, 2, cor, yobs) < maxcor)) keep <- keep & highcor if (all(!keep)) updateLog(out = "All predictors are constant or have too high correlation.", frame = frame) # no need to calculate correlations, so return k <- sum(keep) if (k <= 1L) return(keep) # at most one TRUE # correlation between x's cx <- cor(xobs[, keep, drop = FALSE], use = "all.obs") eig <- eigen(cx, symmetric = TRUE) ncx <- cx while (eig$values[k]/eig$values[1] < eps) { j <- seq_len(k)[order(abs(eig$vectors[, k]), decreasing = TRUE)[1]] keep[keep][j] <- FALSE ncx <- cx[keep[keep], keep[keep], drop = FALSE] k <- k - 1 eig <- eigen(ncx) } if (!all(keep)) { out <- paste(dimnames(x)[[2]][!keep], collapse = ", ") updateLog(out = out, frame = frame) } return(keep) } ## make list of collinear variables to remove find.collinear <- function(x, threshold = 0.999, ...) { nvar <- ncol(x) x <- data.matrix(x) r <- !is.na(x) nr <- apply(r, 2, sum, na.rm = TRUE) ord <- order(nr, decreasing = TRUE) xo <- x[, ord, drop = FALSE] ## SvB 24mar2011 varnames <- dimnames(xo)[[2]] z <- suppressWarnings(cor(xo, use = "pairwise.complete.obs")) hit <- outer(seq_len(nvar), seq_len(nvar), "<") & (abs(z) >= threshold) out <- apply(hit, 2, any, na.rm = TRUE) return(varnames[out]) } updateLog <- function(out = NULL, meth = NULL, frame = 1) { # find structures defined a mice() level pos_state <- ma_exists("state", frame)$pos pos_loggedEvents <- ma_exists("loggedEvents", frame)$pos s <- get("state", pos_state) r <- get("loggedEvents", pos_loggedEvents) rec <- data.frame(it = s$it, im = s$im, dep = s$dep, meth = if(is.null(meth)) s$meth else meth, out = if (is.null(out)) "" else out) if (s$log) rec <- rbind(r, rec) s$log <- TRUE assign("state", s, pos = pos_state, inherits = TRUE) assign("loggedEvents", rec, pos = pos_loggedEvents, inherits = TRUE) return() } sym <- function(x) {(x + t(x)) / 2} # This helper function was copied from # https://github.com/alexanderrobitzsch/miceadds/blob/master/R/ma_exists.R ma_exists <- function( x, pos, n_index=1:8) { n_index <- n_index + 1 is_there <- exists(x, where=pos) obj <- NULL if (is_there){ obj <- get(x, pos) } if (! is_there){ for (nn in n_index){ pos <- parent.frame(n=nn) is_there <- exists(x, where=pos) if (is_there){ obj <- get(x, pos) break } } } #--- output res <- list( is_there=is_there, obj=obj, pos=pos) return(res) } mice/R/mice.impute.norm.R0000644000176200001440000001776513416664706014741 0ustar liggesusers#'Imputation by Bayesian linear regression #' #'Calculates imputations for univariate missing data by Bayesian linear #'regression, also known as the normal model. #' #'@aliases mice.impute.norm norm #'@inheritParams mice.impute.pmm #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@author Stef van Buuren, Karin Groothuis-Oudshoorn #'@details #' Imputation of \code{y} by the normal model by the method defined by #' Rubin (1987, p. 167). The procedure is as follows: #' #'\enumerate{ #'\item{Calculate the cross-product matrix \eqn{S=X_{obs}'X_{obs}}.} #'\item{Calculate \eqn{V = (S+{diag}(S)\kappa)^{-1}}, with some small ridge #'parameter \eqn{\kappa}.} #'\item{Calculate regression weights \eqn{\hat\beta = VX_{obs}'y_{obs}.}} #'\item{Draw a random variable \eqn{\dot g \sim \chi^2_\nu} with \eqn{\nu=n_1 - q}.} #'\item{Calculate \eqn{\dot\sigma^2 = (y_{obs} - X_{obs}\hat\beta)'(y_{obs} - X_{obs}\hat\beta)/\dot g.}} #'\item{Draw \eqn{q} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_1}.} #'\item{Calculate \eqn{V^{1/2}} by Cholesky decomposition.} #'\item{Calculate \eqn{\dot\beta = \hat\beta + \dot\sigma\dot z_1 V^{1/2}}.} #'\item{Draw \eqn{n_0} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_2}.} #'\item{Calculate the \eqn{n_0} values \eqn{y_{imp} = X_{mis}\dot\beta + \dot z_2\dot\sigma}.} #'} #' #'Using \code{mice.impute.norm} for all columns emulates Schafer's NORM method (Schafer, 1997). #'@references #'Rubin, D.B (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley & Sons. #' #'Schafer, J.L. (1997). Analysis of incomplete multivariate data. London: Chapman & Hall. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.norm <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) parm <- .norm.draw(y, ry, x, ...) return(x[wy, ] %*% parm$beta + rnorm(sum(wy)) * parm$sigma) } #' Draws values of beta and sigma by Bayesian linear regression #' #' This function draws random values of beta and sigma under the Bayesian #' linear regression model as described in Rubin (1987, p. 167). This function #' can be called by user-specified imputation functions. #' #'@aliases norm.draw .norm.draw #'@param y Incomplete data vector of length \code{n} #'@param ry Vector of missing data pattern (\code{FALSE}=missing, #'\code{TRUE}=observed) #'@param x Matrix (\code{n} x \code{p}) of complete covariates. #'@param rank.adjust Argument that specifies whether \code{NA}'s in the #'coefficients need to be set to zero. Only relevant when \code{ls.meth = "qr"} #'AND the predictor matrix is rank-deficient. #'@param ... Other named arguments. #'@return A \code{list} containing components \code{coef} (least squares estimate), #'\code{beta} (drawn regression weights) and \code{sigma} (drawn value of the #'residual standard deviation). #'@references #'Rubin, D.B. (1987). \emph{Multiple imputation for nonresponse in surveys}. New York: Wiley. #'@author Gerko Vink, 2018, for this version, based on earlier versions written #'by Stef van Buuren, Karin Groothuis-Oudshoorn, 2017 #'@export norm.draw <- function(y, ry, x, rank.adjust = TRUE, ...) return(.norm.draw(y, ry, x, rank.adjust = TRUE, ...)) ###'@rdname norm.draw ###'@export .norm.draw <- function (y, ry, x, rank.adjust = TRUE, ...){ p <- estimice(x[ry, , drop = FALSE], y[ry], ...) sigma.star <- sqrt(sum((p$r)^2)/rchisq(1, p$df)) beta.star <- p$c + (t(chol(sym(p$v))) %*% rnorm(ncol(x))) * sigma.star parm <- list(p$c, beta.star, sigma.star, p$ls.meth) names(parm) <- c("coef", "beta", "sigma", "estimation") if(any(is.na(parm$coef)) & rank.adjust){ parm$coef[is.na(parm$coef)] <- 0 parm$beta[is.na(parm$beta)] <- 0 } return(parm) } #' Computes least squares parameters #' #' This function computes least squares estimates, variance/covariance matrices, #' residuals and degrees of freedom according to ridge regression, QR decomposition #' or Singular Value Decomposition. This function is internally called by .norm.draw(), #' but can be called by any user-specified imputation function. #' #' When calculating the inverse of the crossproduct of the predictor matrix, #' problems may arise. For example, taking the inverse is not possible when the #' predictor matrix is rank deficient, or when the estimation problem is #' computationally singular. This function detects such error cases and #' automatically falls back to adding a ridge penalty to the diagonal of the #' crossproduct to allow for proper calculation of the inverse. #' #'@aliases estimice #'@param x Matrix (\code{n} x \code{p}) of complete covariates. #'@param y Incomplete data vector of length \code{n} #'@param ls.meth the method to use for obtaining the least squares estimates. By #'default parameters are drawn by means of QR decomposition. #'@param ridge A small numerical value specifying the size of the ridge used. #' The default value \code{ridge = 1e-05} represents a compromise between stability #' and unbiasedness. Decrease \code{ridge} if the data contain many junk variables. #' Increase \code{ridge} for highly collinear data. #'@param ... Other named arguments. #'@return A \code{list} containing components \code{c} (least squares estimate), #'\code{r} (residuals), \code{v} (variance/covariance matrix) and \code{df} #'(degrees of freedom). #'@author Gerko Vink, 2018 #'@export estimice <- function(x, y, ls.meth = "qr", ridge = 1e-05, ...){ df <- max(length(y) - ncol(x), 1) if (ls.meth == "qr"){ qr <- lm.fit(x = x, y = y) c <- t(qr$coef) f <- qr$fitted.values r <- t(qr$residuals) v <- try(solve(as.matrix(crossprod(qr.R(qr$qr)))), silent = TRUE) if(inherits(v, "try-error")){ xtx <- as.matrix(crossprod(qr.R(qr$qr))) pen <- diag(xtx) * ridge #calculate ridge penalty v <- solve(xtx + diag(pen)) #add ridge penalty to allow inverse of v mess <- "* A ridge penalty had to be used to calculate the inverse crossproduct of the predictor matrix. Please remove duplicate variables or unique respondent names/numbers from the imputation model. It may be advisable to check the fraction of missing information (fmi) to evaluate the validity of the imputation model" updateLog(out = mess, frame = 6) if (get("printFlag", parent.frame(search.parents("printFlag")))) cat("*") #indicator of added ridge penalty in the printed iteration history } return(list(c=t(c), r=t(r), v=v, df=df, ls.meth=ls.meth)) } if (ls.meth == "ridge"){ xtx <- crossprod(x) pen <- ridge * diag(xtx) if (length(pen) == 1) pen <- matrix(pen) v <- solve(xtx + diag(pen)) c <- t(y) %*% x %*% v r <- y - x %*% t(c) return(list(c=t(c), r=r, v=v, df=df, ls.meth=ls.meth)) } if (ls.meth == "svd"){ s <- svd(x) c <- s$v %*% ((t(s$u) %*% y) / s$d) f <- x %*% c r <- f - y v <- try(solve(s$v %*% diag(s$d)^2 %*% t(s$v)), silent = TRUE) if(inherits(v, "try-error")){ xtx <- s$v %*% diag(s$d)^2 %*% t(s$v) pen <- diag(xtx) * ridge #calculate ridge penalty v <- solve(xtx + diag(pen)) #add ridge penalty to allow inverse of v mess <- "* A ridge penalty had to be used to calculate the inverse crossproduct of the predictor matrix. Please remove duplicate variables or unique respondent names/numbers from the imputation model. It may be advisable to check the fraction of missing information (fmi) to evaluate the validity of the imputation model" updateLog(out = mess, frame = 6) if (get("printFlag", parent.frame(search.parents("printFlag")))) cat("*") #indicator of added ridge penalty in the printed iteration history } return(list(c=c, r=r, v=v, df=df, ls.meth=ls.meth)) } } search.parents <- function(name, start = 4){ while(inherits(try(get("printFlag", parent.frame(start)), silent = TRUE), "try-error")){ start = start + 1 } start }mice/R/mids2mplus.R0000644000176200001440000000505513416657163013633 0ustar liggesusers#'Export \code{mids} object to Mplus #' #'Converts a \code{mids} object into a format recognized by Mplus, and writes #'the data and the Mplus input files #' #'This function automates most of the work needed to export a \code{mids} #'object to \code{Mplus}. The function writes the multiple imputation datasets, #'the file that contains the names of the multiple imputation data sets and an #'\code{Mplus} input file. The \code{Mplus} input file has the proper file #'names, so in principle it should run and read the data without alteration. #'\code{Mplus} will recognize the data set as a multiply imputed data set, and #'do automatic pooling in procedures where that is supported. #' #'@param imp The \code{imp} argument is an object of class \code{mids}, #'typically produced by the \code{mice()} function. #'@param file.prefix A character string describing the prefix of the output #'data files. #'@param path A character string containing the path of the output file. By #'default, files are written to the current \code{R} working directory. #'@param sep The separator between the data fields. #'@param dec The decimal separator for numerical data. #'@param silent A logical flag stating whether the names of the files should be #'printed. #'@return The return value is \code{NULL}. #'@author Gerko Vink, 2011. #'@seealso \code{\link[=mids-class]{mids}}, \code{\link{mids2spss}} #'@keywords manip #'@export mids2mplus <- function(imp, file.prefix="imp", path=getwd(), sep="\t", dec=".", silent = FALSE) { m <- imp$m file.list <- matrix(0,m,1) script <- matrix(0,3,1) for (i in seq_len(m)){ write.table(complete(imp,i), file.path(path, paste0(file.prefix, i, ".dat")), sep=sep, dec=dec, col.names=FALSE, row.names=FALSE) file.list[i,] <- paste0(file.prefix,i,".dat") } write.table(file.list, file.path(path, paste0(file.prefix,"list.dat")), sep=sep, dec=dec, col.names=FALSE, row.names=FALSE, quote=FALSE) names <- paste(colnames(complete(imp, 1)), collapse=" ") script[1,] <- paste0("DATA: FILE IS ",file.prefix,"list.dat;") script[2,] <- "TYPE = IMPUTATION;" script[3,] <- paste0("VARIABLE: NAMES ARE ",names,";") write.table(script, file.path(path,paste0(file.prefix,"list.inp")), sep=sep, dec=dec, col.names=FALSE, row.names=FALSE, quote=FALSE) if (!silent) { cat("Data values written to", file.path(path,paste0(file.prefix,1,".dat")),"through", paste0(file.prefix,m,".dat"), "\n") cat("Data names written to", file.path(path,paste0(file.prefix,"list.dat")), "\n") cat("Mplus code written to", file.path(path,paste0(file.prefix,"list.inp")), "\n") } } mice/R/predictorMatrix.R0000644000176200001440000000771713535173041014711 0ustar liggesusers#' Creates a \code{predictorMatrix} argument #' #' This helper function creates a valid \code{predictMatrix}. The #' \code{predictorMatrix} is an argument to the \code{mice} function. #' It specifies the target variable or block in the rows, and the #' predictor variables on the columns. An entry of \code{0} means that #' the column variable is NOT used to impute the row variable or block. #' A nonzero value indicates that it is used. #' @param data A \code{data.frame} with the source data #' @param blocks An optional specification for blocks of variables in #' the rows. The default assigns each variable in its own block. #' @return A matrix #' @seealso \code{\link{make.blocks}} #' @examples #' make.predictorMatrix(nhanes) #' make.predictorMatrix(nhanes, blocks = make.blocks(nhanes, "collect")) #' @export make.predictorMatrix <- function(data, blocks = make.blocks(data)) { data <- check.dataform(data) predictorMatrix <- matrix(1, nrow = length(blocks), ncol = ncol(data)) dimnames(predictorMatrix) <- list(names(blocks), colnames(data)) for (i in row.names(predictorMatrix)) predictorMatrix[i, colnames(predictorMatrix) %in% i] <- 0 predictorMatrix } check.predictorMatrix <- function(predictorMatrix, data, blocks = NULL) { data <- check.dataform(data) if (!is.matrix(predictorMatrix)) stop("predictorMatrix not a matrix", call. = FALSE) if (any(dim(predictorMatrix) == 0L)) stop("predictorMatrix has no rows or columns", call. = FALSE) # if we have no blocks, restrict to square predictorMatrix if (is.null(blocks)) { if (nrow(predictorMatrix) != ncol(predictorMatrix)) stop(paste("If no blocks are specified, predictorMatrix must", "have same number of rows and columns"), call. = FALSE) if (is.null(dimnames(predictorMatrix))) { if (ncol(predictorMatrix) == ncol(data)) dimnames(predictorMatrix) <- list(colnames(data), colnames(data)) else stop("Missing row/column names in predictorMatrix", call. = FALSE) } for (i in row.names(predictorMatrix)) predictorMatrix[i, grep(paste0("^", i, "$"), colnames(predictorMatrix))] <- 0 return(predictorMatrix) } # check conforming arguments if (nrow(predictorMatrix) > length(blocks)) stop(paste0("predictorMatrix has more rows (", nrow(predictorMatrix), ") than blocks (", length(blocks), ")"), call. = FALSE) # borrow rownames from blocks if needed if (is.null(rownames(predictorMatrix)) && nrow(predictorMatrix) == length(blocks)) rownames(predictorMatrix) <- names(blocks) if (is.null(rownames(predictorMatrix))) stop("Unable to set row names of predictorMatrix", call. = FALSE) # borrow blocknames from predictorMatrix if needed if (is.null(names(blocks)) && nrow(predictorMatrix) == length(blocks)) names(blocks) <- rownames(predictorMatrix) if (is.null(names(blocks))) stop("Unable to set names of blocks", call. = FALSE) # check existence of row names in blocks found <- rownames(predictorMatrix) %in% names(blocks) if (!all(found)) stop("Names not found in blocks: ", paste(rownames(predictorMatrix)[!found], collapse = ", "), call. = FALSE) # borrow colnames from data if needed if (is.null(colnames(predictorMatrix)) && ncol(predictorMatrix) == ncol(data)) colnames(predictorMatrix) <- names(data) if (is.null(colnames(predictorMatrix))) stop("Unable to set column names of predictorMatrix", call. = FALSE) # check existence of variable names on data found <- colnames(predictorMatrix) %in% names(data) if (!all(found)) stop("Names not found in data: ", paste(colnames(predictorMatrix)[!found], collapse = ", "), call. = FALSE) list(predictorMatrix = predictorMatrix, blocks = blocks) } mice/R/mira.R0000644000176200001440000000532013574247243012456 0ustar liggesusers#'Multiply imputed repeated analyses (\code{mira}) #' #'The \code{mira} object is generated by the \code{with.mids()} function. #'The \code{as.mira()} #'function takes the results of repeated complete-data analysis stored as a #'list, and turns it into a \code{mira} object that can be pooled. #' #'@section Slots: #' \describe{ #' #' \item{\code{.Data}:}{Object of class \code{"list"} containing the #' following slots:} #' \item{\code{call}:}{The call that created the object.} #' \item{\code{call1}:}{The call that created the \code{mids} object that was used #'in \code{call}.} #' \item{\code{nmis}:}{An array containing the number of missing observations per #'column.} #' \item{\code{analyses}:}{A list of \code{m} components containing the individual #'fit objects from each of the \code{m} complete data analyses.} #' } #' #' @details #' In versions prior to \code{mice 3.0} pooling required only that #' \code{coef()} and \code{vcov()} methods were available for fitted #' objects. \emph{This feature is no longer supported}. The reason is that \code{vcov()} #' methods are inconsistent across packages, leading to buggy behaviour #' of the \code{pool()} function. Since \code{mice 3.0+}, the \code{broom} #' package takes care of filtering out the relevant parts of the #' complete-data analysis. It may happen that you'll see the messages #' like \code{No method for tidying an S3 object of class ...} or #' \code{Error: No glance method for objects of class ...}. The royal #' way to solve this problem is to write your own \code{glance()} and \code{tidy()} #' methods and add these to \code{broom} according to the specifications #' given in \url{https://broom.tidyverse.org/articles/adding-tidiers.html}. #' #' #'The \code{mira} class of objects has methods for the #' following generic functions: \code{print}, \code{summary}. #' #' Many of the functions of the \code{mice} package do not use the #' S4 class definitions, and instead rely on the S3 list equivalent #' \code{oldClass(obj) <- "mira"}. #' #'@name mira-class #'@rdname mira-class #'@aliases mira-class mira #'@author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #'@seealso \code{\link{with.mids}}, \code{\link[=mids-class]{mids}}, \code{\link{mipo}} #'@references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #'@keywords classes #'@export setClass("mira", representation( call = "call", call1 = "call", nmis = "integer", analyses = "list"), contains = "list" ) mice/R/nhanes2.R0000644000176200001440000000161613416657163013071 0ustar liggesusers#'NHANES example - mixed numerical and discrete variables #' #'A small data set with non-monotone missing values. #' #'A small data set with missing data and mixed numerical and discrete #'variables. The data set \code{nhanes} is the same data set, but with all data #'treated as numerical. #' #'@name nhanes2 #'@docType data #'@format A data frame with 25 observations on the following 4 variables. #'\describe{ #'\item{age}{Age group (1=20-39, 2=40-59, 3=60+)} #'\item{bmi}{Body mass index (kg/m**2)} #'\item{hyp}{Hypertensive (1=no,2=yes)} #'\item{chl}{Total serum cholesterol (mg/dL)} } #'@seealso \code{\link{nhanes}} #'@source Schafer, J.L. (1997). \emph{Analysis of Incomplete Multivariate #'Data.} London: Chapman & Hall. Table 6.14. #'@keywords datasets #'@examples #' #' #'imp <- mice(nhanes2) # create 5 imputed data sets #'complete(imp) # print the first imputed data set #' NULL mice/R/as.R0000644000176200001440000001443613617556227012144 0ustar liggesusers#'Converts an multiply imputed dataset (long format) into a \code{mids} object #' #'This function converts imputed data stored in long format into #'an object of class \code{mids}. The original incomplete dataset #'needs to be available so that we know where the missing data are. #'The function is useful to convert back operations applied to #'the imputed data back in a \code{mids} object. It may also be #'used to store multiply imputed data sets from other software #'into the format used by \code{mice}. #'@note The function expects the input data \code{long} to be sorted by #'imputation number (variable \code{".imp"} by default), and in the #'same sequence within each imputation block. #'@param long A multiply imputed data set in long format, for example #'produced by a call to \code{complete(..., action = 'long', include = TRUE)}, #'or by other software. #'@param .imp An optional column number or column name in \code{long}, #'indicating the imputation index. The values are assumed to be consecutive #'integers between 0 and \code{m}. Values \code{1} through \code{m} #'correspond to the imputation index, value \code{0} indicates #'the original data (with missings). #'By default, the procedure will search for a variable named \code{".imp"}. #'@param .id An optional column number or column name in \code{long}, #'indicating the subject identification. If not specified, then the #'function searches for a variable named \code{".id"}. If this variable #'is found, the values in the column will define the row names in #'the \code{data} element of the resulting \code{mids} object. #'@inheritParams mice #'@return An object of class \code{mids} #'@author Gerko Vink #'@examples #'# impute the nhanes dataset #'imp <- mice(nhanes, print = FALSE) #'# extract the data in long format #'X <- complete(imp, action = "long", include = TRUE) #'# create dataset with .imp variable as numeric #'X2 <- X #' #'# nhanes example without .id #'test1 <- as.mids(X) #'is.mids(test1) #'identical(complete(test1, action = "long", include = TRUE), X) #' #'# nhanes example without .id where .imp is numeric #'test2 <- as.mids(X2) #'is.mids(test2) #'identical(complete(test2, action = "long", include = TRUE), X) #' #'# nhanes example, where we explicitly specify .id as column 2 #'test3 <- as.mids(X, .id = ".id") #'is.mids(test3) #'identical(complete(test3, action = "long", include = TRUE), X) #' #'# nhanes example with .id where .imp is numeric #'test4 <- as.mids(X2, .id = 2) #'is.mids(test4) #'identical(complete(test4, action = "long", include = TRUE), X) #' #'# example without an .id variable #'# variable .id not preserved #'X3 <- X[, -2] #'test5 <- as.mids(X3) #'is.mids(test5) #'identical(complete(test5, action = "long", include = TRUE)[, -2], X[, -2]) #' #'# as() syntax has fewer options #'test7 <- as(X, "mids") #'test8 <- as(X2, "mids") #'test9 <- as(X2[, -2], "mids") #'rev <- ncol(X):1 #'test10 <- as(X[, rev], "mids") #' #'# where argument copies also observed data into $imp element #'where <- matrix(TRUE, nrow = nrow(nhanes), ncol = ncol(nhanes)) #'colnames(where) <- colnames(nhanes) #'test11 <- as.mids(X, where = where) #'identical(complete(test11, action = "long", include = TRUE), X) #'@keywords mids #'@export as.mids <- function(long, where = NULL, .imp = ".imp", .id = ".id") { if (is.numeric(.imp)) .imp <- names(long)[.imp] if (is.numeric(.id)) .id <- names(long)[.id] if (!.imp %in% names(long)) stop("Imputation index `.imp` not found") # no missings allowed in .imp imps <- unlist(long[, .imp], use.names = FALSE) if (anyNA(imps)) stop("Missing values in imputation index `.imp`") # number of records within .imp should be the same if (any(diff(table(imps))) != 0) stop("Unequal group sizes in imputation index `.imp`") # get original data part keep <- setdiff(names(long), na.omit(c(.imp, .id))) data <- long[imps == 0, keep, drop = FALSE] n <- nrow(data) if (n == 0) stop("Original data not found.\n Use `complete(..., action = 'long', include = TRUE)` to save original data.") # determine m m <- length(unique(imps)) - 1 # use mice to get info on data if (is.null(where)) where <- is.na(data) ini <- mice(data, m = m, where = where, maxit = 0, remove.collinear = FALSE, allow.na = TRUE) # store any .id as row names if (!is.na(.id)) rownames(ini$data) <- unlist(long[imps == 0, .id], use.names = FALSE) # copy imputations from long into proper ini$imp elements names <- names(ini$imp) for (i in seq_along(names)) { varname <- names[i] if(!is.null(ini$imp[[varname]])) { for(j in seq_len(m)) { idx <- imps == j & where[, varname] ini$imp[[varname]][j] <- long[idx, varname] } } } ini } #' Create a \code{mira} object from repeated analyses #' #' The \code{as.mira()} function takes the results of repeated #' complete-data analysis stored as a list, and turns it #' into a \code{mira} object that can be pooled. #' @param fitlist A list containing $m$ fitted analysis objects #' @return An S3 object of class \code{mira}. #' @seealso \code{\link[=mira-class]{mira}} #' @author Stef van Buuren #' @export as.mira <- function(fitlist) { if (is.mira(fitlist)) return(fitlist) call <- match.call() if (!is.list(fitlist)) stop("Argument 'fitlist' is not a list") class(fitlist) <- "list" object <- list(call = call, call1 = NULL, nmis = NULL, analyses = fitlist) oldClass(object) <- c("mira", "matrix") return(object) } #' Converts into a \code{mitml.result} object #' #' The \code{as.mitml.result()} function takes the results of repeated #' complete-data analysis stored as a list, and turns it #' into an object of class \code{mitml.result}. #' @param x An object of class \code{mira} #' @return An S3 object of class \code{mitml.result}, a list #' containing $m$ fitted analysis objects. #' @seealso \code{\link[mitml]{with.mitml.list}} #' @author Stef van Buuren #' @export as.mitml.result <- function(x) { if (inherits(x, "mitml.result")) return(x) z <- NULL if (is.mira(x)) z <- getfit(x) else if (is.list(x)) z <- x class(z) <- c("mitml.result", "list") z } setOldClass(c("mids", "mira")) setAs(from = "data.frame", to = "mids", def = function(from) { as.mids(from) } ) setAs(from = "list", to = "mira", def = function(from) { as.mira(from) } ) mice/R/mice-package.R0000644000176200001440000001134413440175651014032 0ustar liggesusers#' \pkg{mice}: Multivariate Imputation by Chained Equations #' #' The \pkg{mice} package implements a method to deal with missing data. #' The package creates multiple imputations (replacement values) for #' multivariate missing data. The method is based on Fully Conditional #' Specification, where each incomplete variable is imputed by a separate #' model. The MICE algorithm can impute mixes of continuous, binary, #' unordered categorical and ordered categorical data. In addition, MICE #' can impute continuous two-level data, and maintain consistency between #' imputations by means of passive imputation. Many diagnostic plots are #' implemented to inspect the quality of the imputations. #' #' The \pkg{mice} package contains functions to #' \itemize{ #' \item Inspect the missing data pattern #' \item Impute the missing data \emph{m} times, resulting in \emph{m} completed data sets #' \item Diagnose the quality of the imputed values #' \item Analyze each completed data set #' \item Pool the results of the repeated analyses #' \item Store and export the imputed data in various formats #' \item Generate simulated incomplete data #' \item Incorporate custom imputation methods #' } #' #' @section Functions: #' #' The main functions are: #' \tabular{ll}{ #' \code{mice()} \tab Impute the missing data *m* times\cr #' \code{with()} \tab Analyze completed data sets\cr #' \code{pool()} \tab Combine parameter estimates\cr #' \code{complete()} \tab Export imputed data\cr #' \code{ampute()} \tab Generate missing data\cr} #' #' @section Vignettes: #' #' There is a detailed series of #' six online vignettes that walk you through solving realistic inference #' problems with mice. #' #' We suggest going through these vignettes in the following order #' \enumerate{ #' \item \href{https://gerkovink.github.io/miceVignettes/Ad_hoc_and_mice/Ad_hoc_methods.html}{Ad hoc methods and the MICE algorithm} #' \item \href{https://gerkovink.github.io/miceVignettes/Convergence_pooling/Convergence_and_pooling.html}{Convergence and pooling} #' \item \href{https://gerkovink.github.io/miceVignettes/Missingness_inspection/Missingness_inspection.html}{Inspecting how the observed data and missingness are related} #' \item \href{https://gerkovink.github.io/miceVignettes/Passive_Post_processing/Passive_imputation_post_processing.html}{Passive imputation and post-processing} #' \item \href{https://gerkovink.github.io/miceVignettes/Multi_level/Multi_level_data.html}{Imputing multilevel data} #' \item \href{https://gerkovink.github.io/miceVignettes/Sensitivity_analysis/Sensitivity_analysis.html}{Sensitivity analysis with \pkg{mice}} #' } #' #' #'Van Buuren, S. (2018). #'Boca Raton, FL.: Chapman & Hall/CRC Press. #' The book #' \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' contains a lot of \href{https://github.com/stefvanbuuren/fimdbook/tree/master/R}{example code}. #' #' @section Methodology: #' #' The \pkg{mice} software was published in the \href{https://www.jstatsoft.org/article/view/v045i03}{Journal of Statistical Software} (Van Buuren and Groothuis-Oudshoorn, 2011). #' The first application of the method #' concerned missing blood pressure data (Van Buuren et. al., 1999). #' The term \emph{Fully Conditional Specification} was introduced in 2006 to describe a general class of methods that specify imputations model for multivariate data as a set of conditional distributions (Van Buuren et. al., 2006). Further details on mixes of variables and applications can be found in the book #'\href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #' @docType package #' @name mice #' @seealso \code{\link{mice}}, \code{\link{with.mids}}, #' \code{\link{pool}}, \code{\link{complete}}, \code{\link{ampute}} #'@references #'van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple #'imputation of missing blood pressure covariates in survival analysis. #'\emph{Statistics in Medicine}, \bold{18}, 681--694. #' #'van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) #'Fully conditional specification in multivariate imputation. \emph{Journal of #'Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. #' #'van Buuren, S., Groothuis-Oudshoorn, K. (2011). \href{https://www.jstatsoft.org/v45/i03/}{\code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1--67. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' NULL mice/R/imports.R0000644000176200001440000000234713620071400013207 0ustar liggesusers#' @import methods #' @importFrom broom glance tidy #' @importFrom dplyr %>% .data bind_cols bind_rows group_by lead #' mutate n pull select summarize syms #' @importFrom graphics abline axis box par plot plot.new plot.window #' points rect text #' @importFrom lattice bwplot densityplot stripplot xyplot #' @importFrom stats C aggregate as.formula binomial coef #' complete.cases confint #' contr.treatment cor df.residual fitted #' formula gaussian getCall #' glm is.empty.model lm lm.fit #' median model.frame model.matrix #' na.exclude na.omit na.pass #' pf predict pt qt quantile #' rbinom rchisq reformulate rgamma rnorm runif #' summary.glm terms update var vcov #' @importFrom tidyr complete #' @importFrom utils flush.console head install.packages methods #' packageDescription packageVersion #' tail write.table #' @export bwplot #' @export densityplot #' @export stripplot #' @export xyplot #' @export complete #' @useDynLib mice NULL mice/R/boys.R0000644000176200001440000000527313617562066012512 0ustar liggesusers#'Growth of Dutch boys #' #'Height, weight, head circumference and puberty of 748 Dutch boys. #' #'Random sample of 10\% from the cross-sectional data used to construct the #'Dutch growth references 1997. Variables \code{gen} and \code{phb} are ordered #'factors. \code{reg} is a factor. #' #'@name boys #'@docType data #'@format A data frame with 748 rows on the following 9 variables: \describe{ #'\item{age}{Decimal age (0-21 years)} #'\item{hgt}{Height (cm)} #'\item{wgt}{Weight (kg)} #'\item{bmi}{Body mass index} #'\item{hc}{Head circumference (cm)} #'\item{gen}{Genital Tanner stage (G1-G5)} #'\item{phb}{Pubic hair (Tanner P1-P6)} #'\item{tv}{Testicular volume (ml)} #'\item{reg}{Region (north, east, west, south, city)} } #'@source Fredriks, A.M,, van Buuren, S., Burgmeijer, R.J., Meulmeester JF, #'Beuker, R.J., Brugman, E., Roede, M.J., Verloove-Vanhorick, S.P., Wit, J.M. #'(2000) Continuing positive secular growth change in The Netherlands #'1955-1997. \emph{Pediatric Research}, \bold{47}, 316-323. #' #'Fredriks, A.M., van Buuren, S., Wit, J.M., Verloove-Vanhorick, S.P. (2000). #'Body index measurements in 1996-7 compared with 1980. \emph{Archives of #'Disease in Childhood}, \bold{82}, 107-112. #'@keywords datasets #'@examples #' #'# create two imputed data sets #'imp <- mice(boys, m=1, maxit=2) #'z <- complete(imp, 1) #' #'# create imputations for age <8yrs #'plot(z$age, z$gen, col=mdc(1:2)[1+is.na(boys$gen)], #' xlab = "Age (years)", ylab = "Tanner Stage Genital") #' #'# figure to show that the default imputation method does not impute BMI #'# consistently #'plot(z$bmi,z$wgt/(z$hgt/100)^2, col=mdc(1:2)[1+is.na(boys$bmi)], #' xlab = "Imputed BMI", ylab="Calculated BMI") #' #'# also, BMI distributions are somewhat different #'oldpar <- par(mfrow=c(1,2)) #'MASS::truehist(z$bmi[!is.na(boys$bmi)],h=1,xlim=c(10,30),ymax=0.25, #' col=mdc(1),xlab="BMI observed") #'MASS::truehist(z$bmi[is.na(boys$bmi)],h=1,xlim=c(10,30),ymax=0.25, #' col=mdc(2),xlab="BMI imputed") #'par(oldpar) #' #'# repair the inconsistency problem by passive imputation #'meth <- imp$meth #'meth["bmi"] <- "~I(wgt/(hgt/100)^2)" #'pred <- imp$predictorMatrix #'pred["hgt","bmi"] <- 0 #'pred["wgt","bmi"] <- 0 #'imp2 <- mice(boys, m=1, maxit=2, meth=meth, pred=pred) #'z2 <- complete(imp2, 1) #' #'# show that new imputations are consistent #'plot(z2$bmi,z2$wgt/(z2$hgt/100)^2, col=mdc(1:2)[1+is.na(boys$bmi)], #' ylab="Calculated BMI") #' #'# and compare distributions #'oldpar <- par(mfrow=c(1,2)) #'MASS::truehist(z2$bmi[!is.na(boys$bmi)],h=1,xlim=c(10,30),ymax=0.25,col=mdc(1), #' xlab="BMI observed") #'MASS::truehist(z2$bmi[is.na(boys$bmi)],h=1,xlim=c(10,30),ymax=0.25,col=mdc(2), #' xlab="BMI imputed") #'par(oldpar) #' #' NULL mice/R/mice.impute.norm.predict.R0000644000176200001440000000367613416664706016366 0ustar liggesusers#'Imputation by linear regression through prediction #' #'Imputes the "best value" according to the linear regression model, also #'known as \emph{regression imputation}. #' #'@aliases mice.impute.norm.predict norm.predict #'@inheritParams mice.impute.pmm #'@return Vector with imputed data, same type as \code{y}, and of length #'\code{sum(wy)} #'@details #'Calculates regression weights from the observed data and returns predicted #'values to as imputations. This #'method is known as \emph{regression imputation}. #'@section Warning: THIS METHOD SHOULD NOT BE USED FOR DATA ANALYSIS. #'This method is seductive because it imputes the most #'likely value according to the model. However, it ignores the uncertainty #'of the missing values and artificially #'amplifies the relations between the columns of the data. Application of #'richer models having more parameters does not help to evade these issues. #'Stochastic regression methods, like \code{\link{mice.impute.pmm}} or #'\code{\link{mice.impute.norm}}, are generally preferred. #' #'At best, prediction can give reasonable estimates of the mean, especially #'if normality assumptions are plausible. See Little and Rubin (2002, p. 62-64) #'or Van Buuren (2012, p. 11-13, p. 45-46) for a discussion of this method. #'@author Gerko Vink, Stef van Buuren, 2018 #'@references #'Little, R.J.A. and Rubin, D.B. (2002). Statistical Analysis with Missing #'Data. New York: John Wiley and Sons. #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-linearnormal.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #'@family univariate imputation functions #'@keywords datagen #'@export mice.impute.norm.predict <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) p <- estimice(x[ry, , drop = FALSE], y[ry], ...) return(x[wy, , drop = FALSE] %*% p$c) } mice/R/auxiliary.R0000644000176200001440000000601013617531060013521 0ustar liggesusers#'Conditional imputation helper #' #'Sorry, the \code{ifdo()} function is not yet implemented. #'@aliases ifdo #'@param cond a condition #'@param action the action to do #'@return Currently returns an error message. #'@author Stef van Buuren, 2012 #'@keywords internal ifdo <- function(cond, action){ cat("Function ifdo() not yet implemented.\n") } # #'Appends specified break to the data #' #'A custom function to insert rows in long data with new pseudo-observations #'that are being done on the specified break ages. There should be a #'column called \code{first} in \code{data} with logical data that codes whether #'the current row is the first for subject \code{id}. Furthermore, #'the function assumes that columns \code{age}, \code{occ}, #'\code{hgt.z}, \code{wgt.z} and #'\code{bmi.z} are available. This function is used on the \code{tbc} #'data in FIMD chapter 9. Check that out to see it in action. #'@aliases appendbreak #'@param data A data frame in the long long format #'@param brk A vector of break ages #'@param warp.model A time warping model #'@param id The subject identifier #'@param typ Label to signal that this is a newly added observation #'@return A long data frame with additional rows for the break ages #'@export appendbreak <- function(data, brk, warp.model = warp.model, id=NULL, typ="pred"){ k <- length(brk) app <- data[data$first,] if (!is.null(id)) { idx <- app$id %in% id app <- app[idx,] } nap <- nrow(app) ## update administrative variables app$first <- FALSE app$typ <- typ app$occ <- NA app <- app[rep.int(seq_len(nap),length(brk)),] ## update age variables app$age <- rep(brk,each=nap) app$age2 <- predict(warp.model,newdata=app) X <- splines::bs(app$age, knots = brk, Boundary.knots = c(brk[1],brk[k]+0.0001), degree = 1) X <- X[,-(k+1)] app[,paste0("x",seq_len(ncol(X)))] <- X ## update outcome variable (set to missing) app[,c("hgt.z","wgt.z","bmi.z")] <- NA app <- rbind(data, app) data <- app[order(app$id, app$age),] return(data) } #'Extract broken stick estimates from a \code{lmer} object #' #'@param fit An object of class \code{lmer} #'@return A matrix containing broken stick estimates #'@author Stef van Buuren, 2012 #'@export extractBS <- function(fit) { siz <- t(lme4::ranef(fit)[[1]]) + lme4::fixef(fit) bs <- matrix(siz, nrow=nrow(siz)*ncol(siz), ncol=1) return(bs) } ## used by mice.impute.midastouch bootfunc.plain <- function(n){ random <- sample.int(n,replace = TRUE) weights <- as.numeric(table(factor(random,levels = seq_len(n)))) return(weights) } minmax <- function(x,domin=TRUE,domax=TRUE){ maxx <- sqrt(.Machine$double.xmax) minx <- sqrt(.Machine$double.eps) if(domin){ x <- pmin(x,maxx) } if(domax){ x <- pmax(x,minx) } return(x) } single2imputes <- function(single, mis) { nmis <- colSums(mis) vars <- names(single)[nmis > 0] z <- vector("list", length(vars)) names(z) <- vars for (j in vars) z[[j]] <- single[mis[, j], j] z } mice/R/nmar_demo_data.R0000644000176200001440000000036013620753345014454 0ustar liggesusers#' MNAR demo data #' #' A toy example from Margarita Moreno-Betancur for checking NARFCS. #' #' A small dataset with just three columns. #' @source \url{https://github.com/moreno-betancur/NARFCS/blob/master/datmis.csv} "mnar_demo_data" mice/R/mice.R0000644000176200001440000005002513416664706012447 0ustar liggesusers#'Multivariate Imputation by Chained Equations (MICE) #' #'Generates Multivariate Imputations by Chained Equations (MICE) #' #'Generates multiple imputations for incomplete multivariate data by Gibbs #'sampling. Missing data can occur anywhere in the data. The algorithm imputes #'an incomplete column (the target column) by generating 'plausible' synthetic #'values given other columns in the data. Each incomplete column must act as a #'target column, and has its own specific set of predictors. The default set of #'predictors for a given target consists of all other columns in the data. For #'predictors that are incomplete themselves, the most recently generated #'imputations are used to complete the predictors prior to imputation of the #'target column. #' #'A separate univariate imputation model can be specified for each column. The #'default imputation method depends on the measurement level of the target #'column. In addition to these, several other methods are provided. You can #'also write their own imputation functions, and call these from within the #'algorithm. #' #'The data may contain categorical variables that are used in a regressions on #'other variables. The algorithm creates dummy variables for the categories of #'these variables, and imputes these from the corresponding categorical #'variable. #' #'Built-in univariate imputation methods are: #' #'\tabular{lll}{ #'\code{pmm} \tab any \tab Predictive mean matching\cr #'\code{midastouch} \tab any \tab Weighted predictive mean matching\cr #'\code{sample} \tab any \tab Random sample from observed values\cr #'\code{cart} \tab any \tab Classification and regression trees\cr #'\code{rf} \tab any \tab Random forest imputations\cr #'\code{mean} \tab numeric \tab Unconditional mean imputation\cr #'\code{norm} \tab numeric \tab Bayesian linear regression\cr #'\code{norm.nob} \tab numeric \tab Linear regression ignoring model error\cr #'\code{norm.boot} \tab numeric \tab Linear regression using bootstrap\cr #'\code{norm.predict} \tab numeric \tab Linear regression, predicted values\cr #'\code{quadratic} \tab numeric \tab Imputation of quadratic terms\cr #'\code{ri} \tab numeric \tab Random indicator for nonignorable data\cr #'\code{logreg} \tab binary \tab Logistic regression\cr #'\code{logreg.boot} \tab binary \tab Logistic regression with bootstrap\cr #'\code{polr} \tab ordered \tab Proportional odds model\cr #'\code{polyreg} \tab unordered\tab Polytomous logistic regression\cr #'\code{lda} \tab unordered\tab Linear discriminant analysis\cr #'\code{2l.norm} \tab numeric \tab Level-1 normal heteroscedastic\cr #'\code{2l.lmer} \tab numeric \tab Level-1 normal homoscedastic, lmer\cr #'\code{2l.pan} \tab numeric \tab Level-1 normal homoscedastic, pan\cr #'\code{2l.bin} \tab binary \tab Level-1 logistic, glmer\cr #'\code{2lonly.mean} \tab numeric \tab Level-2 class mean\cr #'\code{2lonly.norm} \tab numeric \tab Level-2 class normal\cr #'\code{2lonly.pmm} \tab any \tab Level-2 class predictive mean matching #'} #' #'These corresponding functions are coded in the \code{mice} library under #'names \code{mice.impute.method}, where \code{method} is a string with the #'name of the univariate imputation method name, for example \code{norm}. The #'\code{method} argument specifies the methods to be used. For the \code{j}'th #'column, \code{mice()} calls the first occurrence of #'\code{paste('mice.impute.', method[j], sep = '')} in the search path. The #'mechanism allows uses to write customized imputation function, #'\code{mice.impute.myfunc}. To call it for all columns specify #'\code{method='myfunc'}. To call it only for, say, column 2 specify #'\code{method=c('norm','myfunc','logreg',\dots{})}. #' #'\emph{Passive imputation:} \code{mice()} supports a special built-in method, #'called passive imputation. This method can be used to ensure that a data #'transform always depends on the most recently generated imputations. In some #'cases, an imputation model may need transformed data in addition to the #'original data (e.g. log, quadratic, recodes, interaction, sum scores, and so #'on). #' #'Passive imputation maintains consistency among different transformations of #'the same data. Passive imputation is invoked if \code{~} is specified as the #'first character of the string that specifies the univariate method. #'\code{mice()} interprets the entire string, including the \code{~} character, #'as the formula argument in a call to \code{model.frame(formula, #'data[!r[,j],])}. This provides a simple mechanism for specifying deterministic #'dependencies among the columns. For example, suppose that the missing entries #'in variables \code{data$height} and \code{data$weight} are imputed. The body #'mass index (BMI) can be calculated within \code{mice} by specifying the #'string \code{'~I(weight/height^2)'} as the univariate imputation method for #'the target column \code{data$bmi}. Note that the \code{~} mechanism works #'only on those entries which have missing values in the target column. You #'should make sure that the combined observed and imputed parts of the target #'column make sense. An easy way to create consistency is by coding all entries #'in the target as \code{NA}, but for large data sets, this could be #'inefficient. Note that you may also need to adapt the default #'\code{predictorMatrix} to evade linear dependencies among the predictors that #'could cause errors like \code{Error in solve.default()} or \code{Error: #'system is exactly singular}. Though not strictly needed, it is often useful #'to specify \code{visitSequence} such that the column that is imputed by the #'\code{~} mechanism is visited each time after one of its predictors was #'visited. In that way, deterministic relation between columns will always be #'synchronized. #' #'#'A new argument \code{ls.meth} can be parsed to the lower level #'\code{.norm.draw} to specify the method for generating the least squares #'estimates and any subsequently derived estimates. Argument \code{ls.meth} #'takes one of three inputs: \code{"qr"} for QR-decomposition, \code{"svd"} for #'singular value decomposition and \code{"ridge"} for ridge regression. #'\code{ls.meth} defaults to \code{ls.meth = "qr"}. #' #'\emph{Auxiliary predictors in formulas specification: } #'For a given block, the \code{formulas} specification takes precedence over #'the corresponding row in the \code{predictMatrix} argument. This #'precedence is, however, restricted to the subset of variables #'specified in the terms of the block formula. Any #'variables not specified by \code{formulas} are imputed #'according to the \code{predictMatrix} specification. Variables with #'non-zero \code{type} values in the \code{predictMatrix} will #'be added as main effects to the \code{formulas}, which will #'act as supplementary covariates in the imputation model. It is possible #'to turn off this behavior by specifying the #'argument \code{auxiliary = FALSE}. #' #'@param data A data frame or a matrix containing the incomplete data. Missing #'values are coded as \code{NA}. #'@param m Number of multiple imputations. The default is \code{m=5}. #'@param where A data frame or matrix with logicals of the same dimensions #'as \code{data} indicating where in the data the imputations should be #'created. The default, \code{where = is.na(data)}, specifies that the #'missing data should be imputed. The \code{where} argument may be used to #'overimpute observed data, or to skip imputations for selected missing values. #'@param blocks List of vectors with variable names per block. List elements #'may be named to identify blocks. Variables within a block are #'imputed by a multivariate imputation method #'(see \code{method} argument). By default each variable is placed #'into its own block, which is effectively #'fully conditional specification (FCS) by univariate models #'(variable-by-variable imputation). Only variables whose names appear in #'\code{blocks} are imputed. The relevant columns in the \code{where} #'matrix are set to \code{FALSE} of variables that are not block members. #'A variable may appear in multiple blocks. In that case, it is #'effectively re-imputed each time that it is visited. #'@param method Can be either a single string, or a vector of strings with #'length \code{length(blocks)}, specifying the imputation method to be #'used for each column in data. If specified as a single string, the same #'method will be used for all blocks. The default imputation method (when no #'argument is specified) depends on the measurement level of the target column, #'as regulated by the \code{defaultMethod} argument. Columns that need #'not be imputed have the empty method \code{""}. See details. #'@param predictorMatrix A numeric matrix of \code{length(blocks)} rows #'and \code{ncol(data)} columns, containing 0/1 data specifying #'the set of predictors to be used for each target column. #'Each row corresponds to a variable block, i.e., a set of variables #'to be imputed. A value of \code{1} means that the column #'variable is used as a predictor for the target block (in the rows). #'By default, the \code{predictorMatrix} is a square matrix of \code{ncol(data)} #'rows and columns with all 1's, except for the diagonal. #'Note: For two-level imputation models (which have \code{"2l"} in their names) #'other codes (e.g, \code{2} or \code{-2}) are also allowed. #'@param visitSequence A vector of block names of arbitrary length, specifying the #'sequence of blocks that are imputed during one iteration of the Gibbs #'sampler. A block is a collection of variables. All variables that are #'members of the same block are imputed #'when the block is visited. A variable that is a member of multiple blocks #'is re-imputed within the same iteration. #'The default \code{visitSequence = "roman"} visits the blocks (left to right) #'in the order in which they appear in \code{blocks}. #'One may also use one of the following keywords: \code{"arabic"} #'(right to left), \code{"monotone"} (ordered low to high proportion #'of missing data) and \code{"revmonotone"} (reverse of monotone). #'@param formulas A named list of formula's, or expressions that #'can be converted into formula's by \code{as.formula}. List elements #'correspond to blocks. The block to which the list element applies is #'identified by its name, so list names must correspond to block names. #'The \code{formulas} argument is an alternative to the #'\code{predictorMatrix} argument that allows for more flexibility in #'specifying imputation models, e.g., for specifying interaction terms. #'@param blots A named \code{list} of \code{alist}'s that can be used #'to pass down arguments to lower level imputation function. The entries #'of element \code{blots[[blockname]]} are passed down to the function #'called for block \code{blockname}. #'@param post A vector of strings with length \code{ncol(data)} specifying #'expressions as strings. Each string is parsed and #'executed within the \code{sampler()} function to post-process #'imputed values during the iterations. #'The default is a vector of empty strings, indicating no post-processing. #'@param defaultMethod A vector of length 4 containing the default #'imputation methods for 1) numeric data, 2) factor data with 2 levels, 3) #'factor data with > 2 unordered levels, and 4) factor data with > 2 #'ordered levels. By default, the method uses #'\code{pmm}, predictive mean matching (numeric data) \code{logreg}, logistic #'regression imputation (binary data, factor with 2 levels) \code{polyreg}, #'polytomous regression imputation for unordered categorical data (factor > 2 #'levels) \code{polr}, proportional odds model for (ordered, > 2 levels). #'@param maxit A scalar giving the number of iterations. The default is 5. #'@param printFlag If \code{TRUE}, \code{mice} will print history on console. #'Use \code{print=FALSE} for silent computation. #'@param seed An integer that is used as argument by the \code{set.seed()} for #'offsetting the random number generator. Default is to leave the random number #'generator alone. #'@param data.init A data frame of the same size and type as \code{data}, #'without missing data, used to initialize imputations before the start of the #'iterative process. The default \code{NULL} implies that starting imputation #'are created by a simple random draw from the data. Note that specification of #'\code{data.init} will start all \code{m} Gibbs sampling streams from the same #'imputation. #'@param ... Named arguments that are passed down to the univariate imputation #'functions. #' #'@return Returns an S3 object of class \code{\link[=mids-class]{mids}} #' (multiply imputed data set) #'@author Stef van Buuren \email{stef.vanbuuren@@tno.nl}, Karin #'Groothuis-Oudshoorn \email{c.g.m.oudshoorn@@utwente.nl}, 2000-2010, with #'contributions of Alexander Robitzsch, Gerko Vink, Shahab Jolani, #'Roel de Jong, Jason Turner, Lisa Doove, #'John Fox, Frank E. Harrell, and Peter Malewski. #'@seealso \code{\link[=mids-class]{mids}}, \code{\link{with.mids}}, #'\code{\link{set.seed}}, \code{\link{complete}} #'@references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #'Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #'Statistical Software}, \bold{45}(3), 1-67. #'\url{https://www.jstatsoft.org/v45/i03/} #' #'Van Buuren, S. (2018). #'\href{https://stefvanbuuren.name/fimd/sec-FCS.html#sec:MICE}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #'Chapman & Hall/CRC. Boca Raton, FL. #' #'Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) #'Fully conditional specification in multivariate imputation. \emph{Journal of #'Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. #' #'Van Buuren, S. (2007) Multiple imputation of discrete and continuous data by #'fully conditional specification. \emph{Statistical Methods in Medical #'Research}, \bold{16}, 3, 219--242. #' #'Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of #'missing blood pressure covariates in survival analysis. \emph{Statistics in #'Medicine}, \bold{18}, 681--694. #' #'Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of #'multiple imputation strategies for the statistical analysis of incomplete #'data sets.} Dissertation. Rotterdam: Erasmus University. #'@keywords iteration #'@examples #' #' #'# do default multiple imputation on a numeric matrix #'imp <- mice(nhanes) #'imp #' #'# list the actual imputations for BMI #'imp$imp$bmi #' #'# first completed data matrix #'complete(imp) #' #' #'# imputation on mixed data with a different method per column #' #'mice(nhanes2, meth=c('sample','pmm','logreg','norm')) #' #'@export mice <- function(data, m = 5, method = NULL, predictorMatrix, where = NULL, blocks, visitSequence = NULL, formulas, blots = NULL, post = NULL, defaultMethod = c("pmm", "logreg", "polyreg", "polr"), maxit = 5, printFlag = TRUE, seed = NA, data.init = NULL, ...) { call <- match.call() check.deprecated(...) if (!is.na(seed)) set.seed(seed) # check form of data and m data <- check.dataform(data) m <- check.m(m) # determine input combination: predictorMatrix, blocks, formulas mp <- missing(predictorMatrix) mb <- missing(blocks) mf <- missing(formulas) # case A if (mp & mb & mf) { # blocks lead blocks <- make.blocks(colnames(data)) predictorMatrix <- make.predictorMatrix(data, blocks) formulas <- make.formulas(data, blocks) } # case B if (!mp & mb & mf) { # predictorMatrix leads predictorMatrix <- check.predictorMatrix(predictorMatrix, data) blocks <- make.blocks(colnames(predictorMatrix), partition = "scatter") formulas <- make.formulas(data, blocks, predictorMatrix = predictorMatrix) } # case C if (mp & !mb & mf) { # blocks leads blocks <- check.blocks(blocks, data) predictorMatrix <- make.predictorMatrix(data, blocks) formulas <- make.formulas(data, blocks) } # case D if (mp & mb & !mf) { # formulas leads formulas <- check.formulas(formulas, data) blocks <- construct.blocks(formulas) predictorMatrix <- make.predictorMatrix(data, blocks) } # case E if (!mp & !mb & mf) { # predictor leads blocks <- check.blocks(blocks, data) z <- check.predictorMatrix(predictorMatrix, data, blocks) predictorMatrix <- z$predictorMatrix blocks <- z$blocks formulas <- make.formulas(data, blocks, predictorMatrix = predictorMatrix) } # case F if (!mp & mb & !mf) { # formulas lead formulas <- check.formulas(formulas, data) predictorMatrix <- check.predictorMatrix(predictorMatrix, data) blocks <- construct.blocks(formulas, predictorMatrix) } # case G if (mp & !mb & !mf) { # blocks lead blocks <- check.blocks(blocks, data, calltype = "formula") formulas <- check.formulas(formulas, blocks) predictorMatrix <- make.predictorMatrix(data, blocks) } # case H if (!mp & !mb & !mf) { # blocks lead blocks <- check.blocks(blocks, data) formulas <- check.formulas(formulas, data) predictorMatrix <- check.predictorMatrix(predictorMatrix, data, blocks) } chk <- check.cluster(data, predictorMatrix) where <- check.where(where, data, blocks) visitSequence <- check.visitSequence(visitSequence, data = data, where = where, blocks = blocks) method <- check.method(method = method, data = data, where = where, blocks = blocks, defaultMethod = defaultMethod) post <- check.post(post, data) blots <- check.blots(blots, data, blocks) # data frame for storing the event log state <- list(it = 0, im = 0, dep = "", meth = "", log = FALSE) loggedEvents <- data.frame(it = 0, im = 0, dep = "", meth = "", out = "") # edit imputation setup setup <- list(method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, post = post) setup <- edit.setup(data, setup, ...) method <- setup$method predictorMatrix <- setup$predictorMatrix visitSequence <- setup$visitSequence post <- setup$post # initialize imputations nmis <- apply(is.na(data), 2, sum) imp <- initialize.imp(data, m, where, blocks, visitSequence, method, nmis, data.init) # and iterate... from <- 1 to <- from + maxit - 1 q <- sampler(data, m, where, imp, blocks, method, visitSequence, predictorMatrix, formulas, blots, post, c(from, to), printFlag, ...) if (!state$log) loggedEvents <- NULL if (state$log) row.names(loggedEvents) <- seq_len(nrow(loggedEvents)) ## save, and return midsobj <- list(data = data, imp = q$imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, seed = seed, iteration = q$iteration, lastSeedValue = .Random.seed, chainMean = q$chainMean, chainVar = q$chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date()) oldClass(midsobj) <- "mids" if (!is.null(midsobj$loggedEvents)) warning("Number of logged events: ", nrow(midsobj$loggedEvents), call. = FALSE) return(midsobj) } mice/R/handles.R0000644000176200001440000000047013416657163013146 0ustar liggesusershandles.arg <- function(f, a = "data") { # determine whether function f handles argument a if (!is.function(f)) return(FALSE) a %in% names(formals(f)) } handles.format <- function(fn) { # determine whether function fn handles the `format` argument f <- get(fn) handles.arg(f, "format") } mice/R/D3.R0000644000176200001440000000546213621063470011772 0ustar liggesusers#'Compare two nested models using D3-statistic #' #'The D3-statistics is a likelihood-ratio test statistic. #' #'@inheritParams D1 #'@references #' Meng, X. L., and D. B. Rubin. 1992. #' Performing Likelihood Ratio Tests with Multiply-Imputed Data Sets. #' \emph{Biometrika}, 79 (1): 103–11. #' #'\url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:likelihoodratio} #'@examples #'# Compare two linear models: #'imp <- mice(nhanes2, seed = 51009, print = FALSE) #'mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) #'mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) #'D3(mi1, mi0) #' #'# Compare two logistic regression models #'imp <- mice(boys, maxit = 2, print = FALSE) #'fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) #'fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) #'D3(fit1, fit0) #'@export D3 <- function(fit1, fit0 = NULL, df.com = Inf, ...) { call <- match.call() fit1 <- getfit(fit1) m <- length(fit1) est1 <- pool(fit1) qbar1 <- getqbar(est1) if (is.null(fit0)) { # test all estimates equal to zero beta <- rep(0, length(qbar1)) names(beta) <- names(qbar1) fit0 <- lapply(fit1, fix.coef, beta = beta) } else fit0 <- getfit(fit0) est0 <- pool(fit0) qbar0 <- getqbar(est0) k <- length(qbar1) - length(qbar0) # For each imputed dataset, calculate the deviance between the two # models as fitted dev1.M <- lapply(fit1, glance) %>% bind_rows() %>% pull(.data$deviance) dev0.M <- lapply(fit0, glance) %>% bind_rows() %>% pull(.data$deviance) # For each imputed dataset, calculate the deviance between the two # models with coefficients restricted to qbar mds1 <- lapply(fit1, fix.coef, beta = qbar1) dev1.L <- lapply(mds1, glance) %>% bind_rows() %>% pull(.data$deviance) mds0 <- lapply(fit0, fix.coef, beta = qbar0) dev0.L <- lapply(mds0, glance) %>% bind_rows() %>% pull(.data$deviance) deviances <- list(dev1.M = dev1.M, dev0.M = dev0.M, dev1.L = dev1.L, dev0.L = dev0.L) dev.M <- mean(dev0.M - dev1.M) # scaled deviance, as fitted dev.L <- mean(dev0.L - dev1.L) # scaled deviance, restricted rm <- ((m + 1)/(k * (m - 1))) * (dev.M - dev.L) Dm <- dev.L / (k * (1 + rm)) # Degrees of freedom for F distribution v <- k * (m - 1) if (v > 4) w <- 4 + (v - 4) * ((1 + (1 - 2 / v) * (1 / rm))^2) else w <- v * (1 + 1 / k) * ((1 + 1 / rm)^2) / 2 pvalue = 1 - pf(Dm, k, w) test <- out <- list( call = match.call(), result = c(Dm, k, w, pvalue, rm), formulas = list(`1` = formula(getfit(fit1, 1L)), `2` = formula(getfit(fit0, 1L))), m = m, method = "D3", use = NULL, df.com = df.com, deviances = deviances) class(out) <- c("mice.anova", class(fit1)) out } mice/NEWS.md0000644000176200001440000005701513621067272012303 0ustar liggesusers--- title: "News" output: github_document --- # mice 3.8.0 ## Major changes * This version adds two new NARFCS methods for imputing data under the *Missing Not at Random (MNAR)* assumption. NARFCS is generalised version of the so-called $\delta$-adjustment method. Margarita Moreno-Betancur and Ian White kindly contributes the functions `mice.impute.mnar.norm()` and `mice.impute.mnar.logreg()`. These functions aid in performing sensitivity analysis to investigate the impact of different MNAR assumptions on the conclusion of the study. An alternative for MNAR is the older `mice.impute.ri()` function. * Installation of `mice` is faster. External packages needed for imputation and analyses are now installed on demand. The number of dependencies as estimated by `rsconnect::appDepencies()` decreased from 132 to 83. * The name clash with the `complete()` function of `tidyr` should no longer be a problem. * There is now a more flexible `pool()` function that integrates better with the `broom` and `broom.mixed` packages. ## Bug fixes * Deprecates `pool.compare()`. Use `D1()` instead (#220) * Removes everything in `utils::globalVariables()` * Prevents name clashes with `tidyr` by defining `complete.mids()` as an S3 method for the `tidyr::complete()` generic (#212) * Extends the `pool()` function to deal with multiple sets of parameters. Currently supported keywords are: `term` (all `broom` functions), `component` (some `broom.mixed` functions) and `y.values` (for `multinom()` model) (#219) * Adds a new `install.on.demand()` function for lighter installation * Adds `toenail2` and remove dependency on `HSAUR3` * Solves problem with `ampute` in extreme cases (#216) * Solves problem with `pool` with `mgcv::gam` (#218) * Adds `.gitattributes` for consistent line endings # mice 3.7.0 * Solves a bug that made `polr()` always fail (#206) * Aborts if one or more columns are a `data.frame` (#208) * Update `mira-class` documentation (#207) * Remove links to deprecated package `CALIBERrfimpute` * Adds check on partial missing level-2 data to `2lonly.norm` and `2lonly.pmm` * Change calculation of `a2` to elementwise division by a matrix of observations * Extend documentation for `2lonly.norm` and `2lonly.pmm` * Repair return value from `2lonly.pmm` * Imputation method `2lonly.mean` now also works with factors * Replace deprecated `imputationMethod` argument in examples by `method` * More informative error message when stopped after pre-processing (#194) * Updated URL's in DESCRIPTION * Fix string matching in `check.predictorMatrix()` (#191) # mice 3.6.0 * Copy `toenail` data from orphaned `DPpackage` package * Remove `DPpackage` from `Suggests` field in `DESCRIPTION` * Adds support for rotated names in `md.pattern()` (#170, #177) # mice 3.5.0 * This version has some error fixes * Fixes a bug in the sampler that ignored imputed values in variables outside the active block (#175, @alexanderrobitzsch) * Adds a note to the documenation of `as.mids`() (#173) * Removes a superfluous warning from process_mipo() (#92) * Fixes an error in the degrees of freedom of the P-value calculation (#171) # mice 3.4.0 * Add a hex sticker to the mice package. Designed by Jaden M. Walters. * Specify the R3.5.0 random generator in order to pass CRAN tests * Remove test-fix.coef.R from tests * Adds a rotate.names argument to md.pattern() (#154, #160) * Fix to solve the name-matching problem (#156, #149, #147) * Fix that removes the pre-check for existence of `mice.impute.xxx()` so that `mice::mice()` works as expected (#55) * Solves a bug that crashed `mids2spss()`, thanks Edgar Schoreit (#149) * Solves a problem in the routing logic (#149) causing that passive imputation was not done when no predictors were specified. No passive imputation correctly will ignore any the specification of `predictorMatrix`. * Implements an alternative solution for #93 and #96. Instead of skipping imputation of variables without predictors, `mice 3.3.1` will impute those variables using the intercept only * Adds a routine contributed by Simon Grund that checks for deprecated arguments #137 * Improves the `nelsonaalen()` function for data where variables `time` or `status` have already been defined (#140), thanks matthieu-faron # mice 3.3.0 * Solves bug in passive imputation (#130). *Warning: This bug may have caused invalid imputations in `mice 3.0.0` - `mice 3.2.0` under passive imputation.* * Updates code to `broom 0.5.0` (#128) * Solves problem with `mice.impute.2l.norm()` (#129) * Use explicit foreign function calls in tests # mice 3.2.0 * Skip tests for `mice.impute.2l.norm()` (#129) * Skip tests for `D1()` (#128) * Solve problem with `md.pattern` (#126) * Evades warning in `rbind` and `cbind` (#114) * Solves `rbind` problem when `method` is a list (#113) * More efficient use of `parlmice` (#109) * Add `dfcom` argument to `pool()` (#105, #110) * Updates to `parlmice` + bugfix (#107) # mice 3.1.0 * New parallel functionality: `parlmice` (#104) * Incorporate suggestion of @JoergMBeyer to `flux` (#102) * Replace duplicate code by `estimice` (#101) * Better checking for empty methods (#99) * Remove problem with `parent.frame` (#98) * Set empty method for complete data (#93) * Add `NEWS.md`, `index.Rmd` and online package documentation * Track `.R` instead of `.r` * Patch issue with `updateLog` (#8, @alexanderrobitzsch) * Extend README * Repair issue `md.pattern` (#90) * Repair check on `m` (#89) # mice 3.0.0 Version 3.0 represents a major update that implements the following features: 1. `blocks`: The main algorithm iterates over blocks. A block is simply a collection of variables. In the common MICE algorithm each block was equivalent to one variable, which - of course - is the default; The `blocks` argument allows mixing univariate imputation method multivariate imputation methods. The `blocks` feature bridges two seemingly disparate approaches, joint modeling and fully conditional specification, into one framework; 2. `where`: The `where` argument is a logical matrix of the same size of `data` that specifies which cells should be imputed. This opens up some new analytic possibilities; 3. Multivariate tests: There are new functions `D1()`, `D2()`, `D3()` and `anova()` that perform multivariate parameter tests on the repeated analysis from on multiply-imputed data; 4. `formulas`: The old `form` argument has been redesign and is now renamed to `formulas`. This provides an alternative way to specify imputation models that exploits the full power of R's native formula's. 5. Better integration with the `tidyverse` framework, especially for packages `dplyr`, `tibble` and `broom`; 6. Improved numerical algorithms for low-level imputation function. Better handling of duplicate variables. 7. Last but not least: A brand new edition AND online version of [Flexible Imputation of Missing Data. Second Edition.](https://stefvanbuuren.name/fimd/) # mice 2.46.9 (2017-12-08) * simplify code for `mids` object in `mice` (thanks stephematician) (#61) * simplify code in `rbind.mids` (thanks stephematician) (#59) * repair bug in `pool.compare()` in handling factors (#60) * fixed bug in `rbind.mids` in handling `where` (#59) * add new arguments to `as.mids()`, add `as()` * update contact info * resolved problem `cart` not accepting a matrix (thanks Joerg Drechsler) * Adds generalized `pool()` to list of models * Switch to 3-digit versioning # mice 2.46 (2017-10-22) * Allow for capitals in imputation methods # mice 2.45 (2017-10-21) * Reorganized vignettes to land on GitHUB pages # mice 2.44 (2017-10-18) * Code changes for robustness, style and efficiency (Bernie Gray) # mice 2.43 (2017-07-20) * Updates the `ampute` function and vignettes (Rianne Schouten) # mice 2.42 (2017-07-11) * Rename `mice.impute.2l.sys` to `mice.impute.2l.lmer` # mice 2.41 (2017-07-10) * Add new feature: `where`argument to mice * Add new `wy` argument to imputation functions * Add `mice.impute.2l.sys()`, author Shahab Jolani * Update with many simplifications and code enhancements * Fixed broken `cbind()` function * Fixed Bug that made the pad element disappear from `mids` object # mice 2.40 (2017-07-07) * Fixed integration with `lattice` package * Updates colors in `xyplot.mads` * Add support for factors in `mice.impute.2lonly.pmm()` * Create more robust version of as.mids() * Update of `ampute()` by Rianne Schouten * Fix timestamp problem by rebuilding vignette using R 3.4.0. # mice 2.34 (2017-04-24) * Update to roxygen 6.0.1 * Stylistic changes to `mice` function (thanks Ben Ogorek) * Calls to `cbind.mids()` replaced by calls to `cbind()` # mice 2.31 (2017-02-23) * Add link to `miceVignettes` on github (thanks Gerko Vink) * Add package documentation * Add `README` for GitHub * Add new ampute functions and vignette (thanks Rianne Schouten) * Rename `ccn` --> `ncc`, `icn` --> `nic` * Change helpers `cc()`, `ncc()`, `cci()`, `ic()`, `nic()` and `ici()` use `S3` dispatch * Change issues tracker on Github - add BugReports URL #21 * Fixed `multinom` MaxNWts type fix in `polyreg` and `polr` #9 * Fix checking of nested models in `pool.compare` #12 * Fix `as.mids` if names not same as all columns #11 * Fix extension for `glmer` models #5 # mice 2.29 (2016-10-05) * Add `midastouch`: predictive mean matching for small samples (thanks Philip Gaffert, Florian Meinfelder) # mice 2.28 (2016-10-05) * Repaired dots problem in `rpart` call # mice 2.27 (2016-07-27) * Add `ridge` to `2l.norm()` * Remove `.o` files # mice 2.25 (2015-11-09) * Fix `as.mids()` bug that crashed `miceadds::mice.1chain()` # mice 2.23 (2015-11-04) * Update of example code on /doc * Remove lots of dependencies, general cleanup * Fix `impute.polyreg()` bug that bombed if there were no predictors (thanks Jan Graffelman) * Fix `as.mids()` bug that gave incorrect $m$ (several users) * Fix `pool.compare()` error for `lmer` object (thanks Claudio Bustos) * Fix error in `mice.impute.2l.norm()` if just one `NA` (thanks Jeroen Hoogland) # mice 2.22 (2014-06-11) * Add about six times faster predictive mean matching * `pool.scalar()` now can do Barnard-Rubin adjustment * `pool()` now handles class `lmerMod` from the `lme4` package * Added automatic bounds on donors in `.pmm.match()` for safety * Added donors argument to `mice.impute.pmm()` for increased visibility * Changes default number of trees in `mice.impute.rf()` from 100 to 10 (thanks Anoop Shah) * `long2mids()` deprecated. Use `as.mids()` instead * Put `lattice` back into DEPENDS to find generic `xyplot()` and friends * Fix error in `2lonly.pmm` (thanks Alexander Robitzsch, Gerko Vink, Judith Godin) * Fix number of imputations in `as.mids()` (thanks Tommy Nyberg, Gerko Vink) * Fix colors to `mdc()` in example `mice.impute.quadratic()` * Fix error in `mice.impute.rf()` if just one `NA` (thanks Anoop Shah) * Fix error in `summary.mipo()` when `names(x$qbar)` equals `NULL` (thanks Aiko Kuhn) * Fix improper testing in `ncol()` in `mice.impute.2lonly.mean()` # mice 2.21 02-05-2014 SvB * FIXED: compilation problem in match.cpp on solaris CC # mice 2.20 02-02-2014 SvB * ADDED: experimental fastpmm() function using Rcpp * FIXED: fixes to mice.impute.cart() and mice.impute.rf() (thanks Anoop Shah) # mice 2.19 21-01-2014 SvB * ADDED: mice.impute.rf() for random forest imputation (thanks Lisa Doove) * CHANGED: default number of donors in mice.impute.pmm() changed from 3 to 5. Use mice(..., donors = 3) to get the old behavior. * CHANGED: speedup in .norm.draw() by using crossprod() (thanks Alexander Robitzsch) * CHANGED: speedup in .imputation.level2() (thanks Alexander Robitzsch) * FIXED: define MASS, nnet, lattice as imports instead of depends * FIXED: proper handling of rare case in remove.lindep() that removed all predictors (thanks Jaap Brand) # mice 2.18 31-07-2013 SvB * ADDED: as.mids() for converting long format in a mids object (thanks Gerko Vink) * FIXED: mice.impute.logreg.boot() now properly exported (thanks Suresh Pujar) * FIXED: two bugs in rbind.mids() (thanks Gerko Vink) # mice 2.17 10-05-2013 SvB * ADDED: new form argument to mice() to specify imputation models using forms (contributed Ross Boylan) * FIXED: with.mids(), is.mids(), is.mira() and is.mipo() exported * FIXED: eliminated errors in the documentation of pool.scalar() * FIXED: error in mice.impute.ri() (thanks Shahab Jolani) # mice 2.16 27-04-2013 SvB * ADDED: random indicator imputation by mice.impute.ri() for nonignorable models (thanks Shahab Jolani) * ADDED: workhorse functions .norm.draw() and .pmm.match() are exported * FIXED: bug in 2.14 and 2.15 in mice.impute.pmm() that produced an error on factors * FIXED: bug that crashed R when the class variable was incomplete (thanks Robert Long) * FIXED: bug in 2l.pan and 2l.norm by convert a class factor to integer (thanks Robert Long) * FIXED: warning eliminated caused by character variables (thanks Robert Long) # mice 2.15 - 02-04-2013 SvB * CHANGED: complete reorganization of documentation and source files * ADDED: source published on GitHub.com * ADDED: new imputation method mice.impute.cart() (thanks Lisa Doove) * FIXED: calculation of degrees of freedom in pool.compare() (thanks Lorenz Uhlmann) * FIXED: error in DESCRIPTION file (thanks Kurt Hornik) # mice 2.14 - 11-03-2013 / SvB * ADDED: mice.impute.2l.mean() for imputing class means at level 2 * ADDED: sampler(): new checks of degrees of freedom per variable at iteration 1 * ADDED: function check.df() to throw a warning about low degrees of freedom * FIXED: tolower() added in "2l" test in sampler() * FIXED: conversion of factors that have other roles (multilevel) in padModel() * FIXED: family argument in call to glm() in glm.mids() (thanks Nicholas Horton) * FIXED: .norm.draw(): evading NaN imputed values by setting df in rchisq() to a minimum of 1 * FIXED: bug in mice.df() that prevented the classic Rubin df calculation (thanks Jean-Batiste Pingaul) * FIXED: bug fixed in mice.impute.2l.norm() (thanks Robert Long) * CHANGED: faster .pmm.match2() from version 2.12 renamed to default .pmm.match() # mice 2.13 - 03-07-2012 / SvB * ADDED: new multilevel functions 2l.pan(), 2lonly.norm(), 2lonly.pmm() (contributed by Alexander Robitzsch) * ADDED: new quadratic imputation function: quadratic() (contributed by Gerko Vink) * ADDED: pmm2(), five times faster than pmm() * ADDED: new argument data.init in mice() for initialization (suggested by Alexander Robitzsch) * ADDED: mice() now accepts pmm as method for (ordered) factors * ADDED: warning and a note to 2l.norm() that advises to use type=2 for the predictors * FIXED: bug that chrashed plot.mids() if there was only one incomplete variable (thanks Dennis Prangle) * FIXED: bug in sample() in .pmm.match() when donor=1 (thanks Alexander Robitzsch) * FIXED: bug in sample() in mice.impute.sample() * FIXED: fixed '?data' bug in check.method() * REMOVED: wp.twin(). Now available from the AGD package # mice 2.12 - 25-03-2012 / SvB * UPDATE: version for launch of Flexible Imputation of Missing Data (FIMD) * ADDED: code fimd1.r-fim9.r to inst/doc for calculating solutions in FIMD * FIXED: more robust version of supports.transparent() (thanks Brian Ripley) * ADDED: auxiliary functions ifdo(), long2mids(), appendbreak(), extractBS(), wp.twin() * ADDED: getfit() function * ADDED: datasets: tbc, potthoffroy, selfreport, walking, fdd, fdgs, pattern1-pattern4, mammalsleep * FIXED: as.mira() added to namespace * ADDED: functions flux(), fluxplot() and fico() for missing data patterns * ADDED: function nelsonaalen() for imputing survival data * CHANGED: rm.whitespace() shortened * FIXED: bug in pool() that crashed on nonstandard behavior of survreg() (thanks Erich Studerus) * CHANGED: pool() streamlined, warnings about incompatibility in lengths of coef() and vcov() * FIXED: mdc() bug that ignored transparent=FALSE argument, now made visible * FIXED: bug in md.pattern() for >32 variables (thanks Sascha Vieweg, Joshua Wiley) # mice 2.11 - 21-11-2011 / SvB * UPDATE: definite reference to JSS paper * ADDED: rm.whitespace() to do string manipulation (thanks Gerko Vink) * ADDED: function mids2mplus() to export data to Mplus (thanks Gerko Vink) * CHANGED: plot.mids() changed into trellis version * ADDED: code used in JSS-paper * FIXED: bug in check.method() (thanks Gerko Vink) # mice 2.10 - 14-09-2011 / SvB * FIXED: arguments dec and sep in mids2spss (thanks Nicole Haag) * FIXED: bug in keyword "monotone" in mice() (thanks Alain D) # mice 2.9 - 31-08-2011 / SvB * FIXED: appropriate trimming of ynames and xnames in Trellis plots * FIXED: exported: spss2mids(), mice.impute.2L.norm() * ADDED: mice.impute.norm.predict(), mice.impute.norm.boot(), mice.impute.logreg.boot() * ADDED: supports.transparent() to detect whether .Device can do semi-transparent colors * FIXED: stringr package is now properly loaded * ADDED: trellis version of plot.mids() * ADDED: automatic semi-transparancy detection in mdc() * FIXED: documentation of mira class (thanks Sandro Tsang) # mice 2.8 - 24-03-2011 / SvB * FIXED: bug fixed in find.collinear() that bombed when only one variable was left # mice 2.7 - 16-03-2011 / SvB * CHANGED: check.data(), remove.lindep(): fully missing variables are imputed if allow.na=TRUE (Alexander Robitzsch) * FIXED: bug in check.data(). Now checks collinearity in predictors only (Alexander Robitzsch) * CHANGED: abbreviations of arguments eliminated to evade linux warnings # mice 2.6 - 03-03-2011 / SvB * ADDED: bwplot(), stripplot(), densityplot() and xyplot() for creating Trellis graphs * ADDED: function mdc() and mice.theme() for graphical parameters * ADDED: argument passing from mice() to lower-level functions (requested by Juned Siddique) * FIXED: erroneous rgamma() replaced by rchisq() in .norm.draw, lowers variance a bit for small n * ADDED: with.mids() extended to handle expression objects * FIXED: reporting bug in summary.mipo() * CHANGED: df calculation in pool(), intervals may become slightly wider * ADDED: internal functions mice.df() and df.residual() * FIXED: error in rm calculation for "likelihood" in pool.compare() * CHANGED: default ridge parameter changed # mice 2.5 - 06-01-2011 / SvB * ADDED: various stability enhancements and code clean-up * ADDED: find.collinear() function * CHANGED: automatic removal of constant and collinear variables * ADDED: ridge parameter in .norm.draw() and .norm.fix() * ADDED: mice.impute.polr() for ordered factors * FIXED: chainMean and chainVar in mice.mids() * FIXED: iteration counter for mice.mids and sampler() * ADDED: component 'loggedEvents' to mids-object for logging actions * REMOVED: annoying warnings about removed predictors * ADDED: updateLog() function * CHANGED: smarter handling of model setup in mice() * CHANGED: .pmm.match() now draws from the three closest donors * ADDED: mids2spss() for shipping a mids-object to SPSS * FIXED: change in summary.mipo() to work with as.mira() * ADDED: function mice.impute.2L.norm.noint() * ADDED: function as.mira() * FIXED: global assign() removed from mice.impute.polyreg() * FIXED: improved handling of factors by complete() * FIXED: improved labeling of nhanes2 data # mice 2.4 - 17-10-2010 / SvB * ADDED: pool() now supports class 'polr' (Jean-Baptiste Pingault) * FIXED: solved problem in mice.impute.polyreg when one of the variables was named y or x * FIXED: remove.lindep: intercept prediction bug * ADDED: version() function * ADDED: cc(), cci() and ccn() convenience functions # mice 2.3 - 14-02-2010 / SvB * FIXED: check.method: logicals are now treated as binary variables (Emmanuel Charpentier) * FIXED: complete: the NULL imputation case is now properly handled * FIXED: mice.impute.pmm: now creates between imputation variability for univariate predictor * FIXED: remove.lindep: returns 'keep' vector instead of data # mice 2.2 - 13-01-2010 / SvB * ADDED: pool() now supports class 'multinom' (Jean-Baptiste Pingault) * FIXED: bug fixed in check.data for data consisting of two columns (Rogier Donders, Thomas Koepsell) * ADDED: new function remove.lindep() that removes predictors that are (almost) linearly dependent * FIXED: bug fixed in pool() that produced an (innocent) warning message (Qi Zheng) # mice 2.1 - 14-09-2009 / SvB * ADDED: pool() now also supports class 'mer' * CHANGED: nlme and lme4 are now only loaded if needed (by pool()) * FIXED: bug fixed in mice.impute.polyreg() when there was one missing entry (Emmanuel Charpentier) * FIXED: bug fixed in plot.mids() when there was one missing entry (Emmanuel Charpentier) * CHANGED: NAMESPACE expanded to allow easy access to function code * FIXED: mice() can now find mice.impute.xxx() functions in the .GlobalEnv # mice 2.0 - 26-08-2009 / SvB, KO Major upgrade for JSS manuscript * ADDED: new functions cbind.mids(), rbind.mids(), ibind() * ADDED: new argument in mice(): 'post' in post-processing imputations * ADDED: new functions: pool.scaler(), pool.compare(), pool.r.squared() * ADDED: new data: boys, popmis, windspeed * FIXED: function summary.mipo all(object$df) command fixed * REMOVED: data.frame.to.matrix replaced by the internal data.matrix function * ADDED: new imputation method mice.impute.2l.norm() for multilevel data * CHANGED: pool now works for any class having a vcov() method * ADDED: with.mids() provides a general complete-data analysis * ADDED: type checking in mice() to ensure appropriate imputation methods * ADDED: warning added in mice() for constant predictors * ADDED: prevention of perfect prediction in mice.impute.logreg() and mice.impute.polyreg() * CHANGED: mice.impute.norm.improper() changed into mice.impute.norm.nob() * REMOVED: mice.impute.polyreg2() deleted * ADDED: new 'include' argument in complete() * ADDED: support for the empty imputation method in mice() * ADDED: new function md.pairs() * ADDED: support for intercept imputation * ADDED: new function quickpred() * FIXED: plot.mids() bug fix when number of variables > 5 # mice 1.21 - 15/3/2009 SvB Maintainance release * FIXED: Stricter type checking on logicals in mice() to evade warnings. * CHANGED: Modernization of all help files. * FIXED: padModel: treatment changed to contr.treatment * CHANGED: Functions check.visitSequence, check.predictorMatrix, check.imputationMethod are now coded as local to mice() * FIXED: existsFunction in check.imputationMethod now works both under S-Plus and R # mice 1.16 - 6/25/2007 * FIXED: The impution function impute.logreg used convergence criteria that were too optimistic when fitting a GLM with glm.fit. Thanks to Ulrike Gromping. # mice 1.15 - 01/09/2006 * FIXED: In the lm.mids and glm.mids functions, parameters were not passed through to glm and lm. # mice 1.14R - 9/26/2005 11:44AM * FIXED: Passive imputation works again. (Roel de Jong) * CHANGED: Random seed is now left alone, UNLESS the argument "seed" is specified. This means that unless you specify identical seed values, imputations of the same dataset will be different for multiple calls to mice. (Roel de Jong) * FIXED: (docs): Documentation for "impute.mean" (Roel de Jong) * FIXED: Function 'summary.mids' now works (Roel de Jong) * FIXED: Imputation function 'impute.polyreg' and 'impute.lda' should now work under R # mice 1.13 * Changed function checkImputationMethod, Feb 6, 2004 # mice 1.12 * Maintainance, S-Plus 6.1 and R 1.8 unicode, January 2004 # mice 1.1 * R version (with help of Peter Malewski and Frank Harrell), Feb 2001 # mice 1.0 * Original S-PLUS release, June 14 2000 mice/MD50000644000176200001440000004321513624017522011506 0ustar liggesuserse6852ce71ace7f22b0177a8aae23f60e *DESCRIPTION 3f490e13e093090d27914ca17e52d232 *NAMESPACE 5b546ae5b45e3c951774c3a297203742 *NEWS.md 9b6c9603903cd6351b6c26430e6faf36 *R/Ampute.R a9f7f8cde89929cd66a9b1b9678b6a13 *R/D1.R 06e1a552148ab1c27a42d78bb546f8a7 *R/D2.R 3801807eeb5efd8272aad133f2f6d51a *R/D3.R 0db1ab7d9c1018bda661184c067078a1 *R/Mads.R 238ba3866d9b380bdbdd75e6ea19c2ab *R/RcppExports.R 83d659cf6d7f009b49f2d26269a49cf4 *R/ampute.continuous.R aee9669d4de272e1aede6eba85be426c *R/ampute.default.R 54063ea45a019fac8b6227624daff355 *R/ampute.discrete.R 769e84f882e137e329bb9342e9b26740 *R/ampute.mcar.R f901204c39c451a7714fff6fc5ad3e13 *R/anova.R 90e9695d68001e526bb6e58e93414fa6 *R/as.R 115bff469d6401eff69a2819aa1815d6 *R/auxiliary.R d9cfdea05bde0e1ec5418347a85d3894 *R/barnard.rubin.R d8a646c459086958554c3ef0b4d06489 *R/blocks.R c3f9280733bc8f5edcae9c80b275519e *R/blots.R 9c92a0e7bd84e3b00e6d324cc8009631 *R/boys.R 88b0b43d6c6b7c9547a566b70641f2d2 *R/brandsma.R ec3907ac9948d679ee7218bf70b42d69 *R/bwplot.R bc926d131060a8920b0b2ce7155faedd *R/bwplot.mads.R 1a3c46d36238089540cace6ade59bcbf *R/cbind.R 4967f38699d43d328050a8d1ad0ebbc9 *R/cc.R fa9e132cda3e90ef35958e74af8995f9 *R/cci.R ce31b86ae69e833952375ce6ade477be *R/check.R 8469f85203bf8648e710595f95871143 *R/check.deprecated.R 9cff2c8c1686ccda71707cb686fcb444 *R/complete.R 5743d93665260837d91c04dd29832432 *R/densityplot.R b298e9b2593fb5a269e019ad479f05c7 *R/design.R 80a4e627366316e477c036fed248297c *R/df.residual.R d0774c513946527335d29649657a3931 *R/edit.setup.R 8f6b428cc6bf631f6611131e08ea1a8d *R/employee.R 4ff0ff3e4299e207e3765b9da676e2aa *R/expandcov.R 38b2b3a9efe0f16289deaa5c59fb1f22 *R/fdd.R 05466a74438b76cf9b519ce4c68e7e61 *R/fdgs.R fc1399e77e0c8136165799b61ddc6625 *R/fix.coef.R 1e8b164d59e2bb3956087152ab312968 *R/flux.R bd4d7dd38019d91e6fc770db482309c7 *R/formula.R de6060dc48b07b93cece260675c78565 *R/generics.R a5055774fceba9c3a06c4729b2ee4145 *R/getfit.R 411575967ec462fce12e6ec1ba04112a *R/handles.R 21a8bed9b43a5de0cd1919bb96a7453d *R/ibind.R 09e33d0ecb5c191385cfed6ba2fa2099 *R/imports.R dd47a46a22c35d580597943d75cbaaa5 *R/initialize.chain.R 1b14ef8486d35d74fc236f1a471fa30f *R/initialize.imp.R 10c6eebe10873297c82b447e339090ec *R/install.on.demand.R 2cb15b75f4d410d8a566d30aeb2f2d69 *R/internal.R 83bc491196ba9a5fb8ddc695e725bed7 *R/is.R 3363bfe9f467acb84c2e92d93dc063f6 *R/leiden85.R 88da73f7e852383a7e47280c228d7aac *R/lm.R 4cb5449153fb6d2b4cab15ffb85f63d9 *R/mammalsleep.R 00686da4a109a4301fcb34ad804f3be7 *R/md.pairs.R 74a9374b2cd22ecec03fced257d11776 *R/md.pattern.R a7f0c934a33f13a52e6beea2647bbd1a *R/mdc.R 3aeac6384832fd65d20672406c4476c5 *R/method.R eb05673653b061752af325ca1cc4f43f *R/mice-package.R aeefb655b052d1176aba868a9c6b657e *R/mice.R 9a378d33bd23b29534872c5c06b93fbf *R/mice.impute.2l.bin.R 0ce399ab6123f73acc1f45206f89984b *R/mice.impute.2l.lmer.R 2572bb010a5b977471e70824274a7970 *R/mice.impute.2l.norm.R f15a49b16f3abb478b4863865b3ae9f4 *R/mice.impute.2l.pan.R e834f7c5bed2e4516a5601139ccb578c *R/mice.impute.2lonly.mean.R 0fe2bec7894cfa6fac9b55ac5cb9cfcf *R/mice.impute.2lonly.norm.R 0270e6ad50147a4c1d3692c85be3bc07 *R/mice.impute.2lonly.pmm.R 9904b997a0057e0f698b501f2506ab1b *R/mice.impute.cart.R d32fcfcdd8fa21e3c5b24914fcd9f541 *R/mice.impute.jomoImpute.R 8b56eff83e7eee78226251749cb18fb2 *R/mice.impute.lda.R 5dbbaf7e31b84675b9586cba020ad12c *R/mice.impute.logreg.R 65758bd704f1bf7bca891de71ab6bbc1 *R/mice.impute.mean.R 4e3984f5a695871de7a21475c2555b59 *R/mice.impute.midastouch.R da11a90ee210246aef9760a4094e8e6d *R/mice.impute.mnar.logreg.R 144f2e72808d589e71c57c2936e4999d *R/mice.impute.mnar.norm.R de7b560a9c0b18e27c4ad9cd43f4e497 *R/mice.impute.norm.R 51db6e5604d6f7e1ce7271c30013f5d7 *R/mice.impute.norm.boot.R c9852c4463a71e47e2804447cae67a53 *R/mice.impute.norm.nob.R bad7a0a5e68c9f412e2b465fe7efd913 *R/mice.impute.norm.predict.R 6248993b4abe4d3f56c244b28836e03e *R/mice.impute.panImpute.R cb31f6c40a0e0ad8abffdc1c2a25db04 *R/mice.impute.passive.R bcb2a4d4158b94e5e7309a68a03d4760 *R/mice.impute.pmm.R a1d8ae15cf88c48c273566924af429c7 *R/mice.impute.polr.R 6c7236748a59cc4fc75e902319e12caf *R/mice.impute.polyreg.R 4971f509c76a28db1067ad3d76a15e09 *R/mice.impute.quadratic.R 3a58c276121a39ebb926f76cc5b644cd *R/mice.impute.rf.R 2cc648e3332a6c9574b17d5a54094d5c *R/mice.impute.ri.R 52e534eb568093ada60f75f9c82faba6 *R/mice.impute.sample.R 566ba1a7bfd8605f4aa6e01d6e007565 *R/mice.mids.R f62ca577f0118a1115686dc318852179 *R/mice.theme.R 82ed9e0d19cf9e80672412e9b9f6f3ce *R/mids.R e747f5d20fd8e36dfb6d7fac7ded032c *R/mids2mplus.R bcf4de642ee119aff7b79bde8f7adf2b *R/mids2spss.R d4bd9073a29b2c3a1aae06be64c08b45 *R/mipo.R 92375f4760a98be5a80555421bc272c8 *R/mira.R f651765138adcab541c9d8c1b2d06622 *R/ncc.R 334d5553fdb7606a4f91486190efc05f *R/nelsonaalen.R 34c6dd4a49ff12ff17e48c2cf0415d5c *R/nhanes.R 8aa4b519d9096f7247ea15c37eb125c0 *R/nhanes2.R e10ec49086311265a96e9570712e7d35 *R/nimp.R 01c232aeb5e143a9a2b74ca3a323d3fd *R/nmar_demo_data.R c3e3eb68ce4729ba15610bbffb945ac5 *R/parlmice.R 23c62fbde2f8b5d65361ecfa0893c1cb *R/parse.ums.R 3dcf45bbc1c94981fbb272d71b25e449 *R/pattern1.R 278dd5e5a061afd2f8863a405df8ac91 *R/plot.R 80c89d6bf5e0e675fbecc9f670154fa6 *R/pool.R fc24e5575fbfe099c686f0bc3f60b282 *R/pool.compare.R f740e14b094eb44d38fd67e84d15a884 *R/pool.r.squared.R f29eb6eb449393676504f7454694828b *R/pool.scalar.R 4eb9f8a6678dd932fc0c0b223f83984d *R/popmis.R ec0d0d0441e20982bd309ed2e42f59c1 *R/pops.R 7e568600be74ba25f9ec4f9ff0bb7f9b *R/post.R 92b3f37f72666056cdd37fa0a80ae287 *R/potthoffroy.R 7c3bf25fe1c2c86b30cee73b3d78dfe7 *R/predictorMatrix.R 967b4546a7a978a25e12dc1881de5ddc *R/print.R 7026a05a8576940220b08ae16571b11e *R/quickpred.R 914712f3aa77f3c254268dccfa02911b *R/rbind.R 1a9b8efd4bd6e1463429e9d1e54cb80d *R/rm.whitespace.R b0cf738d7eaa30293717d80082b5e449 *R/sampler.R 04a9014216b4b0d492fb7ae3530f6c59 *R/selfreport.R 32072143f5f0780137d5f9d56c6bb74c *R/squeeze.R 88af212b6b44029eda0253348964f7a7 *R/stripplot.R a29367d5e16744c73c79bfdc44577e12 *R/summary.R 2106bee3ca37fad67709b12179c69ce6 *R/supports.transparent.R f684d55ceb0833ac5ec15fed0460ccb1 *R/tbc.R b63643b38fbc84723f7c27069b0f5188 *R/toenail.R 8807557efe65f77d9210ad06c765559e *R/toenail2.R 58a9e3475f8a9ff6f6009381f7af2470 *R/validate.arguments.R a0d9e8786b20856b1207b14c0e41ee42 *R/visitSequence.R 03e9e1f53ad9958d839c761fad8247d9 *R/walking.R e32c16124d16cf04af92b0fafea1f19c *R/where.R a9373e6909440a13ff6b44e7c20262d1 *R/windspeed.R 820f816623f3e6f5ea4654169d6ead0d *R/with.R e516e5c474edcd6fcd5c278bd31ddcd7 *R/xyplot.R 24aa1359610b2ab8c1c8653e71df5a70 *R/xyplot.mads.R a5c45f5cac682c3e65579ed80468abd9 *R/zzz.R 3709c798ba6427dfc28b9e8c47b165e8 *data/boys.rda 32cb50043192073624d7a3f7215ad801 *data/brandsma.rda ee4bdbb28f21c4e95b7a449245eb620c *data/employee.rda 8ce389a211ad979d60676e039df7319f *data/fdd.pred.rda 972229ddd3e452cfec73ccbb6c31892c *data/fdd.rda 5ea54eea59179e3ab0dec78a373f8459 *data/fdgs.rda 365f612df41418b00e12e954bf501414 *data/leiden85.rda 0d0cd0c7d3a1fc652daf88dc5fff39d7 *data/mammalsleep.rda 3089353b6ad68b780988d157b85ffb1e *data/mnar_demo_data.rda 2ab63b9932cfe39c847cc2eac344848b *data/nhanes.rda b2fe3e7c172f48efd23e0cc94fd0d203 *data/nhanes2.rda 4d264592bd199a63030472eccd6fd87d *data/pattern1.rda 66af0bc4eeb77a3524948906746fcf55 *data/pattern2.rda 487cb144a4a9185dd1c0f9fd3007973d *data/pattern3.rda 5cdd5b75e7c03bb3585eae04b33567a2 *data/pattern4.rda 0b2ffd42fa531abed362b545dba051d6 *data/popmis.rda 55e2f8d9a4dcd0a6dcd23bcd7d048201 *data/pops.pred.rda 40825f787646b160277a2a7e209b129b *data/pops.rda 4f9667b7d0e46602a22516e1ee845442 *data/potthoffroy.rda 6b5ef38755a87c6e6a60896a0a922ef2 *data/selfreport.rda d3b54e3ae3665be970c8648c524118d8 *data/tbc.rda 6fd8c89895806f89236d3856339cf2eb *data/tbc.target.rda 02c7a5dde878a13a1c6d17ca0bf8f02d *data/toenail.rda 5135c2ab6bc3d23abe90caefa3bb2b78 *data/toenail2.rda c1a7281864590a4890fa4b20ba991a20 *data/walking.rda 50833f651067e6300f4b1940f94f9503 *data/windspeed.rda 731c28e828e027b47101bce756f57eae *inst/CITATION 12509316afb21702182f7779bf6a4039 *inst/NEWS 80ed29589ea59d8e4584b26b4e4fdf13 *man/D1.Rd 078fd602fb5cf890a57b9276b7234497 *man/D2.Rd 45a4a500a8bc2186f5d7068cc65490e9 *man/D3.Rd 55a3c98c376218f44dba32453d563bc9 *man/ampute.Rd 20b566a3a8287171c824bf24c1cf599c *man/ampute.continuous.Rd fd2d1a168363d4607a60c633f141ab5e *man/ampute.default.freq.Rd 879c6742add3efd4025059771004837f *man/ampute.default.odds.Rd 5e4fef648b23607f365a77107e82b1fd *man/ampute.default.patterns.Rd 44e19cc9325a1ff8a796048ae473c590 *man/ampute.default.type.Rd 941a5e41607c2d2305533db6133a91a0 *man/ampute.default.weights.Rd 41d86969d8fde16db4bd94fcf84287ce *man/ampute.discrete.Rd 3f672c7a10eb95c35dc720ef504db30c *man/ampute.mcar.Rd 07d2ac3ce84367c0837254a693bba900 *man/anova.Rd f24afd97f6eeaa0679ed779b44a8029d *man/appendbreak.Rd 457e6b84a0b6be887a72df960c864a7d *man/as.mids.Rd 3464c8cf6f994d07ac63ac6aa5ff6765 *man/as.mira.Rd cb92f23e5b96e8a962be5f658c6a3049 *man/as.mitml.result.Rd 392a2201a5c1d3feed5d6e0ce7b729f0 *man/boys.Rd dea81d5ecee7287e1b75a18d7e23ad35 *man/brandsma.Rd a7a4ae2d95fafbf87f940c27bee2f04c *man/bwplot.mads.Rd 437b04996bdb4a309d47363df3b49e0a *man/bwplot.mids.Rd 6b11644c279c2e30ac4ba3e0be876523 *man/cbind.Rd 7288978dbfb89aeeffb8908151a71501 *man/cbind.mids.Rd ef99ec7700d3ac83a9329340dcccb449 *man/cc.Rd 87e94d1a7e7dead69444c1c15ae5229a *man/cci.Rd 080086c3bec7867de3a889e17893b095 *man/complete.mids.Rd d1d9f1f5df0407b22967ec6877742597 *man/construct.blocks.Rd ae1395144f7c990b6a4688ffda8cc617 *man/densityplot.mids.Rd e071e3ea048db6f99b23ddcf7afdd5dc *man/employee.Rd 2dca80c3b27a1a481022c504bd42d62f *man/estimice.Rd 2e7fd3af369a7a64c555da6f5108244d *man/extend.formula.Rd 6c6706828c760e15329b5af96573c500 *man/extend.formulas.Rd 8c98624d0af097d231bc36c714e01665 *man/extractBS.Rd adfce7da2e45c5879bfc03b6117e7c61 *man/fdd.Rd af2035be20d29a28867880d4a6c1563c *man/fdgs.Rd 389dba42928f2599e0ff7c61d8673e07 *man/fico.Rd e5452a691ec4d8f62a2d8cd66875d974 *man/fix.coef.Rd b3d6fd24523320e2e43dc533f13e81ad *man/flux.Rd 10fd2d9c0bbc943c9afdfb9d6f5c471e *man/fluxplot.Rd cd38b3c74d6a693e7a46d4882a573136 *man/getfit.Rd 2dde2beed9a1fc5f01e45dac991f006a *man/getqbar.Rd 18030e1e6b64f07b6bd711c6801665fd *man/glm.mids.Rd 18bef66955252002c00de7a6090dd10f *man/ibind.Rd 413a74653d6f764353c681a7d32a6505 *man/ic.Rd 97081805ef408b2f10036b2c0fb432b8 *man/ici.Rd fa2f428cbe0167adb38f65ce85d09a7d *man/ifdo.Rd 5e3f6e4be4e37c414de7298d6a9936e7 *man/is.mads.Rd c7b112c5fabe6d6af37e22706334981e *man/is.mids.Rd 505b6d5a4d2028f39a4e3b1502c28f01 *man/is.mipo.Rd 75eaed69a60dcbd4fffd579d69926bde *man/is.mira.Rd 698720da50609f106a6440d268a02727 *man/is.mitml.result.Rd c8b998a8f42d916e2176b37ab5b57140 *man/leiden85.Rd 7cbce9c7211ba82dce38539b25ca77c6 *man/lm.mids.Rd 910cf43214c05515f5465bd55c458646 *man/mads-class.Rd 5b6f42a7ae490f6cf1d4e9a92eec7a70 *man/make.blocks.Rd 22273fc44d9fa3e53536907a0835dd03 *man/make.blots.Rd 6664e577b59cc675f361e5273c97d58e *man/make.formulas.Rd 07dbf07f2d2691d7b5343c8816b7b6da *man/make.method.Rd 23fe763f4fd121e5ce8f577c092a478f *man/make.post.Rd d0880eb2c2722aa780eb9fcbdebbaf55 *man/make.predictorMatrix.Rd 919b1bdc81c883a5d9fb428c114d3eae *man/make.visitSequence.Rd 2263249cede534ee0771f9f8b9d2bfd7 *man/make.where.Rd 0d2e101b44ba02ef3e9e50c0f1613bd3 *man/mammalsleep.Rd 3dd7942704a2bd0ebf4228c15f09f04a *man/md.pairs.Rd feaf65ceedb3bc0b3a1bc9d00d08d8d1 *man/md.pattern.Rd 5d95491969bbcb33c12986fc2f1832b3 *man/mdc.Rd 24cd88cbd77d1e57d9aa290b9691ff02 *man/mice.Rd 22fb2adf3443a9a2a1970cc43af6931c *man/mice.impute.2l.bin.Rd 2fd7fe531eedad2ed55ad43af9e07d02 *man/mice.impute.2l.lmer.Rd 91348115eb0128646e6121b2397d547f *man/mice.impute.2l.norm.Rd a67b3e99a09b8206fe037440fa15b29a *man/mice.impute.2l.pan.Rd 1cddf630af2e588b4e4d1ce342e7e021 *man/mice.impute.2lonly.mean.Rd 3a499779bcd8219cd6bbf7da59abce79 *man/mice.impute.2lonly.norm.Rd 87255019fd86ce9b4460ab918ad8fe64 *man/mice.impute.2lonly.pmm.Rd b6cf09e7bfc17e0e255e63d2bd8c097a *man/mice.impute.cart.Rd 018badf4b48b7f8ed6ccd8da62f76c63 *man/mice.impute.jomoImpute.Rd 836bbc87bdfde39eb929a96313cf9bdf *man/mice.impute.lda.Rd a06392c8654df0972d139ef1d681d0d1 *man/mice.impute.logreg.Rd 1aab9b4af460c60e23b4d5c247b5453c *man/mice.impute.logreg.boot.Rd b848d1460bd77f36cf9c09a9e9d5f5c0 *man/mice.impute.mean.Rd 56d94b0af0aadcc260b0a7ca617c4937 *man/mice.impute.midastouch.Rd 2cb72fc63dfbccebdd67dc38791f2856 *man/mice.impute.mnar.Rd 92f74b532d1ff7684126c105934112bb *man/mice.impute.norm.Rd 178616d488a3a8b6ed3ddf1a0ae7f07e *man/mice.impute.norm.boot.Rd 2482ccb2e26a559759c1cb88bac89100 *man/mice.impute.norm.nob.Rd 1118ce99f4d78ddb9b77114eae4ae7dd *man/mice.impute.norm.predict.Rd 9b87280e50ac91e53b20b9fc7dbf661e *man/mice.impute.panImpute.Rd 403a2f9a1281ed1a4b9206762a41c1a4 *man/mice.impute.passive.Rd 7be173a7b4510611b8f25dfa3ef5ad32 *man/mice.impute.pmm.Rd 2f870edf6893e83c7d21bfc3cb916269 *man/mice.impute.polr.Rd a46f8bee31b90c198d30d9717db85635 *man/mice.impute.polyreg.Rd 5f8deaa35d9cce51e90d8a072a2d2171 *man/mice.impute.quadratic.Rd 70183decee0f4a9b9c559581e6f44ace *man/mice.impute.rf.Rd f74eb7a39872c86451a86dd20e90f054 *man/mice.impute.ri.Rd e687052df5309940012b6f856d2a9ab0 *man/mice.impute.sample.Rd 9e9a356b03d8c677e91f511dfd4c7d00 *man/mice.mids.Rd b80f880e2bd3720eb47883d0d28b60ff *man/mice.theme.Rd f96013886e3fbdf8948fbc46d4f9484e *man/mids-class.Rd 186a052848ac6e5bfd02ffbcdbc67087 *man/mids2mplus.Rd 305f99a60604e96ad5d721681a6feb0a *man/mids2spss.Rd 5f9ecc4eeeaa297804bbef80fde57a68 *man/mipo.Rd aeb787d4f4bf9f76b4a78daf8c0ae782 *man/mira-class.Rd b6707513cefd79dbe2ad52f339bbe787 *man/mnar_demo_data.Rd 9d7c8870448e7f7c5ca06aee789dca81 *man/name.blocks.Rd cbda476ae7c7ca7afff9fece6544feef *man/name.formulas.Rd c68eb6813f20e407c25014e2bfa2efb0 *man/ncc.Rd 921a6410185d88c10753d3af601a5bba *man/nelsonaalen.Rd 648353481b2075177b5ccc4b3e3cfce8 *man/nhanes.Rd 0057f36847abf15eca901b8d13c7c370 *man/nhanes2.Rd 81c57dd54c58bd2aa47d8a17f8e274b2 *man/nic.Rd 1b82f8d90d4432a34880a1c53a3da1d4 *man/nimp.Rd c80e4bb94aa9e1353f346c71f4d9f709 *man/norm.draw.Rd 6a7b11ee19b89a7defd731e4280a6e67 *man/parlmice.Rd 04618021e5f5fdaf0e93a1aed1eaa551 *man/pattern.Rd 107233b456635a2bbd35fb84d17d7127 *man/plot.mids.Rd 5b5433584be424a006a2abc598aa3619 *man/pmm.match.Rd 845b5da831fd16559afc3ca53dc4784c *man/pool.Rd f5b99a150788c197f6ba4e0e75e72cd1 *man/pool.compare.Rd 27232627a016274511e2d8091ba6fca4 *man/pool.r.squared.Rd f9ae6b23931b69be8d1c04156208a664 *man/pool.scalar.Rd 47148d54afdbd46beb98a6dcd12b2c31 *man/popmis.Rd 853eaf7f7b3322a7da0a100b6ddbdc04 *man/pops.Rd ce7e206a16a4ea7df4c92274b04fb939 *man/potthoffroy.Rd 66c7521aab3da1592ec21d7411f7eaf5 *man/print.Rd df1da421444ab898dc53ef32b7d5c7df *man/print.mads.Rd ed7a3123133e3725852a278ce10f7ee5 *man/quickpred.Rd c75abb0601a34c0773f0ace43f590ea4 *man/rbind.mids.Rd 693d2e7302616263d069cc3e6ae75ae4 *man/selfreport.Rd 3ab63acc8936bb9e531242ec9b5f6638 *man/squeeze.Rd 00df80f9bb7b3c36f00d5f0b1dfca07a *man/stripplot.mids.Rd 3b3a7ce76202be61c1e10a9383a439b1 *man/summary.Rd 67e1010810dd52ad7d64fa1f37e6610f *man/supports.transparent.Rd 14644e235147b6ad95e214a410c015ed *man/tbc.Rd 686581a3a368a26bb0e1a62f87b9669b *man/toenail.Rd 8679a3cd611f5e0ca07ed0b611691c6f *man/toenail2.Rd 4f9b3be520cfc09543dfe6abf2094ab4 *man/version.Rd d38a706a8cd49720ae343f373c4a39eb *man/walking.Rd e588ca3141ab15b7543ad0fabc9b4cf2 *man/windspeed.Rd 19eda57b3e72fd426b1cabfddb467db4 *man/with.mids.Rd 6091a09f15193dd47eefde76238d4e4d *man/xyplot.mads.Rd c01bd7510aa922da771eb33fe208ce59 *man/xyplot.mids.Rd b3f86c20b81b51f5162874977d38784d *src/Makevars 73505498722ad4b07b0ddb3067e92523 *src/Makevars.win 2600faaaa79b957259714446a909a132 *src/RcppExports.cpp 88d0e11d19f58d7504e1ac6ddf845ceb *src/match.cpp 904497a5e128718077a01d78f3691302 *tests/testthat.R f5ea8e446101f9ed0b9cdf0586ac24e2 *tests/testthat/test-D1.R 9128fa8edfb94c5b4209e38553eac15d *tests/testthat/test-D3.R 37fcb5aa7bb42d6684884253eb1b15c4 *tests/testthat/test-ampute.R 43e5f8347f83431db117b39d2934815f *tests/testthat/test-anova.R 0bbe84083339e37767e43c9752eed80f *tests/testthat/test-as.mids.R f4dc461e751190948ae48ea3dd87d067 *tests/testthat/test-blocks.R afd04fa36957bfda22ec4ed8ed4e5175 *tests/testthat/test-blots.R 8ffc19325a4f37a3761d901bf80417cb *tests/testthat/test-cbind.R 368cbf82bdfa38df1f391f5c6ebc3de9 *tests/testthat/test-check.formula.R 79af517d6a6572d1b2c80f69d76b0c65 *tests/testthat/test-check.visitSequence.R 6e71b9354d7b03cea69e9bbc555edab6 *tests/testthat/test-complete.R 39e181e18d77ad4d65b13274ee6f5271 *tests/testthat/test-formulas.R be11971a3b848d702d6e5732c0049442 *tests/testthat/test-loggedEvents.R e7670f784bdadfd47370ea7f6ac924b3 *tests/testthat/test-make.predictorMatrix.R da4d503794f466201b140dabe1d8934b *tests/testthat/test-md.pattern.R 922df6c2fe84211dc3453856a1d94931 *tests/testthat/test-mice-initialize.R 47e3af705f2860e6ba22dcf970728f25 *tests/testthat/test-mice.R a73b7b19b37f01137b2ca8c43b312e71 *tests/testthat/test-mice.impute.2l.bin.R 044700d11a6e5e439919804265e9cb6c *tests/testthat/test-mice.impute.2l.lmer.R ca30016911794d94173743240f4786de *tests/testthat/test-mice.impute.2l.norm.R 81c7c3d5ea80deb79df709dc82edab48 *tests/testthat/test-mice.impute.2lonly.mean.R 79990c30c21144fade0248607adaff5c *tests/testthat/test-mice.impute.2lonly.norm.R ba32ac10b7d1b5b77879e2c0a0baf733 *tests/testthat/test-mice.impute.jomoImpute.R 8b6c1519c7ba9747cc9905d925ea5e9d *tests/testthat/test-mice.impute.norm.R c9e37d1ec4e10563b6433c56b217a986 *tests/testthat/test-mice.impute.panImpute.R 0e99abaa97d4182dd322083918c9cc9a *tests/testthat/test-mice.impute.pmm.R ca833848f5fdb811c41e45a84e9140c6 *tests/testthat/test-mice.impute.polr.R 509bc21a2b419d2a919f49146a86f434 *tests/testthat/test-mira.R ddbb360c71cedf8cad0323a6fa561258 *tests/testthat/test-parlmice.R 07da62012ad6015c5d8132d0b8fc428e *tests/testthat/test-pool.R 89f538b8ec46f16e83fd1d734e2461db *tests/testthat/test-rbind.R 5845885f931d382587d1a42a2476d85f *tests/testthat/test-remove.lindep.R 40fd8f9ae4994fb2066570cb35dcf8ab *tests/testthat/test-update.design.R mice/inst/0000755000176200001440000000000013553050021012136 5ustar liggesusersmice/inst/CITATION0000644000176200001440000000135513416664706013322 0ustar liggesuserscitHeader("To cite mice in publications use:") citEntry(entry = "Article", title = "{mice}: Multivariate Imputation by Chained Equations in R", author = personList(person("Stef", "van Buuren"), as.person("Karin Groothuis-Oudshoorn")), journal = "Journal of Statistical Software", year = "2011", volume = "45", number = "3", pages = "1-67", url = "https://www.jstatsoft.org/v45/i03/", textVersion = paste("Stef van Buuren, Karin Groothuis-Oudshoorn (2011).", "mice: Multivariate Imputation by Chained Equations in R.", "Journal of Statistical Software, 45(3), 1-67.", "URL https://www.jstatsoft.org/v45/i03/.")) mice/inst/NEWS0000644000176200001440000004270113416664706012664 0ustar liggesusersMICE: Multivariate Imputation by Chained Equations MICE is an R package implementing multiple imputation of incomplete multivariate data according the principle of Fully Conditional Specification (FCS). Questions and suggestions about MICE can be directed to https://github.com/stefvanbuuren/mice/issues CHANGELOG MICE ~~~~~~~~~~~~~~ V3.0.5 Track .R instead of .r V3.0.4 Patch issue with updateLog V3.0.3 Extend README V3.0.2 Repair issue #90. md.pattern. GV V3.0.1 Repair issue #89. Add check on m. SvB V3.0.0 24may2018 SvB Major new version. See https://github.com/stefvanbuuren/mice for an overview V2.46.9 08dec2017 SvB UPDATE simplify code for mids object in mice (thanks stephematician) (#61) V2.46.8 08dec2017 SvB UPDATE simplify code in rbind.mids (thanks stephematician) (#59) V2.46.7 06dec2017 SvB FIXED bug in pool.compare in handling factors (#60) V2.46.6 06dec2017 SvB FIXED bug in rbind.mids in handling `where` (#59) V2.46.5 05dec2017 SvB ADDED new arguments to as.mids() ADDED as() syntax UPDATE contact info V2.46.2 27nov2017 SvB FIXED resolve cart not accepting a matrix (thanks Joerg Drechsler) V2.46.1 14nov2017 SvB ADDED Generalize pool() to list of models V2.46.0 23oct2017 SvB UPDATE Switch to 3-digit versioning V2.46 22oct2017 SvB FIXED Allow for capitals in imputation methods V2.45 21oct2017 SvB UPDATE Reorganized vignettes to land on GitHUB pages V2.44 18oct2017 SvB UPDATE Code changes for robustness, style and efficiency (Bernie Gray) V2.43 20jul2017 SvB UPDATE Updates to ampute function and vignettes (Rianne Schouten) V2.42 11jul2017 SvB UPDATE Rename mice.impute.2l.sys to mice.impute.2l.lmer V2.41 10jul2017 SvB ADDED New feature: `where`argument to mice ADDED New `wy` argument to imputation functions ADDED New mice.impute.2l.sys(), author Shahab Jolani UPDATE Many simplifications and code enhancements FIXED Broken cbind() function FIXED Bug that made the pad element disappear from mids object V2.40 07jul2017 SvB FIXED Integration with lattice package V2.39 27jun2017 SvB UPDATE Colors in xyplot.mads V2.38 22may2017 SvB ADDED Support for factors in mice.impute.2lonly.pmm() V2.37 22may2017 SvB UPDATE More robust version of as.mids() V2.36 15may2017 SvB UPDATE Substantial update of ampute() by Rianne Schouten V2.35 03may2017 SvB FIXED Fix timestamp problem by rebuilding vignette using R 3.4.0. V2.34 24apr2017 SvB UPDATE Update to roxygen 6.0.1 V2.33 11apr2017 SvB CHANGED Stylistic changes to mice function (thanks Ben Ogorek) CHANGED calls to cbind.mids() replaced by calls to cbind() V2.31 23feb2017 SvB ADDED link to miceVignettes on github (thanks Gerko Vink) ADDED package documentation V2.30 04feb2017 SvB ADDED README for GitHub ADDED new ampute functions and vignette (thanks Rianne Schouten) CHANGED rename ccn-->ncc, icn-->nic CHANGED helpers cc(), ncc(), cci(), ic(), nic() and ici() use S3 dispatch CHANGED issues tracker now on Github - add BugReports URL #21 FIXED multinom MaxNWts type fix in polyreg and polr #9 FIXED better checking of nested models in pool.compare #12 FIXED bug fix in as.mids if names not same as all columns #11 FIXED extension for glmer models #5 V2.29 05oct2016 SvB ADDED midastouch: predictive mean matching for small samples (thanks Philip Gaffert, Florian Meinfelder) V2.28 05oct2016 SvB FIXED repaired dots problem in rpart call V2.27 27jul2016 SvB FIXED add ridge to 2l.norm() V2.26 12may2016 SvB FIXED removed .o files V2.25 09nov2015 SvB FIXED fix as.mids() bug that crashed miceadds::mice.1chain() V2.23 04nov2015 SvB FIXED update of example code on /doc FIXED remove lots of dependencies, general cleanup FIXED impute.polyreg() bug that bombed if there were no predictors (thanks Jan Graffelman) FIXED as.mids() bug that gave incorrect m (several users) FIXED pool.compare() error for lmer object (thanks Claudio Bustos) FIXED error in mice.impute.2l.norm() if just one NA (thanks Jeroen Hoogland) V2.22 11jun2014 SvB ADDED about six times faster predictive mean matching ADDED pool.scalar() now can do Barnard-Rubin adjustment ADDED pool() now handles class 'lmerMod' from the 'lme4' package CHANGED added automatic bounds on donors in .pmm.match() for safety CHANGED added donors argument to mice.impute.pmm() for increased visibility CHANGED default number of trees in mice.impute.rf() from 100 to 10 (thanks Anoop Shah) DELETED long2mids() deprecated. Use as.mids() instead FIXED put lattice back into DEPENDS to find generic xyplot() and friends FIXED error in 2lonly.pmm (thanks Alexander Robitzsch, Gerko Vink, Judith Godin) FIXED number of imputations in as.mids() (thanks Tommy Nyberg, Gerko Vink) FIXED colors to mdc() in example mice.impute.quadratic() FIXED error in mice.impute.rf() if just one NA (thanks Anoop Shah) FIXED error in summary.mipo() when names(x$qbar) equals NULL (thanks Aiko Kuhn) FIXED improper testing in ncol() in mice.impute.2lonly.mean() V2.21 02-05-2014 SvB FIXED compilation problem in match.cpp on solaris CC V2.20 02-02-2014 SvB ADDED experimental fastpmm() function using Rcpp FIXED fixes to mice.impute.cart() and mice.impute.rf() (thanks Anoop Shah) V2.19 21-01-2014 SvB ADDED mice.impute.rf() for random forest imputation (thanks Lisa Doove) CHANGED default number of donors in mice.impute.pmm() changed from 3 to 5. Use mice(..., donors = 3) to get the old behavior. CHANGED speedup in .norm.draw() by using crossprod() (thanks Alexander Robitzsch) CHANGED speedup in .imputation.level2() (thanks Alexander Robitzsch) FIXED define MASS, nnet, lattice as imports instead of depends FIXED proper handling of rare case in remove.lindep() that removed all predictors (thanks Jaap Brand) V2.18 31-07-2013 SvB ADDED as.mids() for converting long format in a mids object (thanks Gerko Vink) FIXED mice.impute.logreg.boot() now properly exported (thanks Suresh Pujar) FIXED two bugs in rbind.mids() (thanks Gerko Vink) V2.17 10-05-2013 SvB ADDED new form argument to mice() to specify imputation models using forms (contributed Ross Boylan) FIXED with.mids(), is.mids(), is.mira() and is.mipo() exported FIXED eliminated errors in the documentation of pool.scalar() FIXED error in mice.impute.ri() (thanks Shahab Jolani) V2.16 27-04-2013 SvB ADDED random indicator imputation by mice.impute.ri() for nonignorable models (thanks Shahab Jolani) ADDED workhorse functions .norm.draw() and .pmm.match() are exported FIXED bug in 2.14 and 2.15 in mice.impute.pmm() that produced an error on factors FIXED bug that crashed R when the class variable was incomplete (thanks Robert Long) FIXED bug in 2l.pan and 2l.norm by convert a class factor to integer (thanks Robert Long) FIXED warning eliminated caused by character variables (thanks Robert Long) V2.15 - 02-04-2013 SvB CHANGED complete reorganization of documentation and source files ADDED source published on GitHub.com ADDED new imputation method mice.impute.cart() (thanks Lisa Doove) FIXED calculation of degrees of freedom in pool.compare() (thanks Lorenz Uhlmann) FIXED error in DESCRIPTION file (thanks Kurt Hornik) V2.14 - 11-03-2013 / SvB ADDED mice.impute.2l.mean() for imputing class means at level 2 ADDED sampler(): new checks of degrees of freedom per variable at iteration 1 ADDED function check.df() to throw a warning about low degrees of freedom FIXED tolower() added in "2l" test in sampler() FIXED conversion of factors that have other roles (multilevel) in padModel() FIXED family argument in call to glm() in glm.mids() (thanks Nicholas Horton) FIXED .norm.draw(): evading NaN imputed values by setting df in rchisq() to a minimum of 1 FIXED bug in mice.df() that prevented the classic Rubin df calculation (thanks Jean-Batiste Pingaul) FIXED bug fixed in mice.impute.2l.norm() (thanks Robert Long) CHANGED faster .pmm.match2() from version 2.12 renamed to default .pmm.match() V2.13 - 03-07-2012 / SvB ADDED new multilevel functions 2l.pan(), 2lonly.norm(), 2lonly.pmm() (contributed by Alexander Robitzsch) ADDED new quadratic imputation function: quadratic() (contributed by Gerko Vink) ADDED pmm2(), five times faster than pmm() ADDED new argument data.init in mice() for initialization (suggested by Alexander Robitzsch) ADDED mice() now accepts pmm as method for (ordered) factors ADDED warning and a note to 2l.norm() that advises to use type=2 for the predictors FIXED bug that chrashed plot.mids() if there was only one incomplete variable (thanks Dennis Prangle) FIXED bug in sample() in .pmm.match() when donor=1 (thanks Alexander Robitzsch) FIXED bug in sample() in mice.impute.sample() FIXED fixed '?data' bug in check.method() REMOVED wp.twin(). Now available from the AGD package V2.12 - 25-03-2012 / SvB UPDATE version for launch of Flexible Imputation of Missing Data (FIMD) ADDED code fimd1.r-fim9.r to inst/doc for calculating solutions in FIMD FIXED more robust version of supports.transparent() (thanks Brian Ripley) ADDED auxiliary functions ifdo(), long2mids(), appendbreak(), extractBS(), wp.twin() ADDED getfit() function ADDED datasets: tbc, potthoffroy, selfreport, walking, fdd, fdgs, pattern1-pattern4, mammalsleep FIXED as.mira() added to namespace ADDED functions flux(), fluxplot() and fico() for missing data patterns ADDED function nelsonaalen() for imputing survival data CHANGED rm.whitespace() shortened FIXED bug in pool() that crashed on nonstandard behavior of survreg() (thanks Erich Studerus) CHANGED pool() streamlined, warnings about incompatibility in lengths of coef() and vcov() FIXED mdc() bug that ignored transparent=FALSE argument, now made visible FIXED bug in md.pattern() for >32 variables (thanks Sascha Vieweg, Joshua Wiley) V2.11 - 21-11-2011 / SvB UPDATED definite reference to JSS paper ADDED rm.whitespace() to do string manipulation (thanks Gerko Vink) ADDED function mids2mplus() to export data to Mplus (thanks Gerko Vink) CHANGED plot.mids() changed into trellis version ADDED code used in JSS-paper FIXED bug in check.method() (thanks Gerko Vink) V2.10 - 14-09-2011 / SvB FIXED: arguments dec and sep in mids2spss (thanks Nicole Haag) FIXED: bug in keyword "monotone" in mice() (thanks Alain D) V2.9 - 31-08-2011 / SvB FIXED: appropriate trimming of ynames and xnames in Trellis plots FIXED: exported: spss2mids(), mice.impute.2L.norm() ADDED: mice.impute.norm.predict(), mice.impute.norm.boot(), mice.impute.logreg.boot() ADDED: supports.transparent() to detect whether .Device can do semi-transparent colors FIXED: stringr package is now properly loaded ADDED: trellis version of plot.mids() ADDED: automatic semi-transparancy detection in mdc() FIXED: documentation of mira class (thanks Sandro Tsang) V2.8 - 24-03-2011 / SvB FIXED: bug fixed in find.collinear() that bombed when only one variable was left V2.7 - 16-03-2011 / SvB CHANGED: check.data(), remove.lindep(): fully missing variables are imputed if allow.na=TRUE (Alexander Robitzsch) FIXED: bug in check.data(). Now checks collinearity in predictors only (Alexander Robitzsch) CHANGED: abbreviations of arguments eliminated to evade linux warnings V2.6 - 03-03-2011 / SvB ADDED: bwplot(), stripplot(), densityplot() and xyplot() for creating Trellis graphs ADDED: function mdc() and mice.theme() for graphical parameters ADDED: argument passing from mice() to lower-level functions (requested by Juned Siddique) FIXED: erroneous rgamma() replaced by rchisq() in .norm.draw, lowers variance a bit for small n ADDED: with.mids() extended to handle expression objects FIXED: reporting bug in summary.mipo() CHANGED: df calculation in pool(), intervals may become slightly wider ADDED: internal functions mice.df() and df.residual() FIXED: error in rm calculation for "likelihood" in pool.compare() CHANGED: default ridge parameter changed V2.5 - 06-01-2011 / SvB ADDED: various stability enhancements and code clean-up ADDED: find.collinear() function CHANGED: automatic removal of constant and collinear variables ADDED: ridge parameter in .norm.draw() and .norm.fix() ADDED: mice.impute.polr() for ordered factors FIXED: chainMean and chainVar in mice.mids() FIXED: iteration counter for mice.mids and sampler() ADDED: component 'loggedEvents' to mids-object for logging actions REMOVED: annoying warnings about removed predictors ADDED: updateLog() function CHANGED: smarter handling of model setup in mice() CHANGED: .pmm.match() now draws from the three closest donors ADDED: mids2spss() for shipping a mids-object to SPSS FIXED: change in summary.mipo() to work with as.mira() ADDED: function mice.impute.2L.norm.noint() ADDED: function as.mira() FIXED: global assign() removed from mice.impute.polyreg() FIXED: improved handling of factors by complete() FIXED: improved labeling of nhanes2 data V2.4 - 17-10-2010 / SvB ADDED: pool() now supports class 'polr' (Jean-Baptiste Pingault) FIXED: solved problem in mice.impute.polyreg when one of the variables was named y or x FIXED: remove.lindep: intercept prediction bug ADDED: version() function ADDED: cc(), cci() and ccn() convenience functions V2.3 - 14-02-2010 / SvB FIXED: check.method: logicals are now treated as binary variables (Emmanuel Charpentier) FIXED: complete: the NULL imputation case is now properly handled FIXED: mice.impute.pmm: now creates between imputation variability for univariate predictor FIXED: remove.lindep: returns 'keep' vector instead of data V2.2 - 13-01-2010 / SvB ADDED: pool() now supports class 'multinom' (Jean-Baptiste Pingault) FIXED: bug fixed in check.data for data consisting of two columns (Rogier Donders, Thomas Koepsell) ADDED: new function remove.lindep() that removes predictors that are (almost) linearly dependent FIXED: bug fixed in pool() that produced an (innocent) warning message (Qi Zheng) V2.1 - 14-09-2009 / SvB ADDED: pool() now also supports class 'mer' CHANGED: nlme and lme4 are now only loaded if needed (by pool()) FIXED: bug fixed in mice.impute.polyreg() when there was one missing entry (Emmanuel Charpentier) FIXED: bug fixed in plot.mids() when there was one missing entry (Emmanuel Charpentier) CHANGED: NAMESPACE expanded to allow easy access to function code FIXED: mice() can now find mice.impute.xxx() functions in the .GlobalEnv v2.0 - 26-08-2009 / SvB, KO Major upgrade for JSS manuscript ADDED: new functions cbind.mids(), rbind.mids(), ibind() ADDED: new argument in mice(): 'post' in post-processing imputations ADDED: new functions: pool.scaler(), pool.compare(), pool.r.squared() ADDED: new data: boys, popmis, windspeed FIXED: function summary.mipo all(object$df) command fixed DELETED: data.frame.to.matrix replaced by the internal data.matrix function ADDED: new imputation method mice.impute.2l.norm() for multilevel data CHANGED: pool now works for any class having a vcov() method ADDED: with.mids() provides a general complete-data analysis ADDED: type checking in mice() to ensure appropriate imputation methods ADDED: warning added in mice() for constant predictors ADDED: prevention of perfect prediction in mice.impute.logreg() and mice.impute.polyreg() CHANGED: mice.impute.norm.improper() changed into mice.impute.norm.nob() DELETED: mice.impute.polyreg2() deleted ADDED: new 'include' argument in complete() ADDED: support for the empty imputation method in mice() ADDED: new function md.pairs() ADDED: support for intercept imputation ADDED: new function quickpred() FIXED: plot.mids() bug fix when number of variables > 5 v1.21 - 15/3/2009 SvB Maintainance release FIXED: Stricter type checking on logicals in mice() to evade warnings. CHANGED: Modernization of all help files. FIXED: padModel: treatment changed to contr.treatment CHANGED: Functions check.visitSequence, check.predictorMatrix, check.imputationMethod are now coded as local to mice() FIXED: existsFunction in check.imputationMethod now works both under S-Plus and R v1.16 - 6/25/2007 FIXED: The impution function impute.logreg used convergence criteria that were too optimistic when fitting a GLM with glm.fit. Thanks to Ulrike Gromping. v1.15 - 01/09/2006 FIXED: In the lm.mids and glm.mids functions, parameters were not passed through to glm and lm. v1.14R - 9/26/2005 11:44AM FIXED: Passive imputation works again. (Roel de Jong) CHANGED: Random seed is now left alone, UNLESS the argument "seed" is specified. This means that unless you specify identical seed values, imputations of the same dataset will be different for multiple calls to mice. (Roel de Jong) FIXED (docs): Documentation for "impute.mean" (Roel de Jong) FIXED: Function 'summary.mids' now works (Roel de Jong) FIXED: Imputation function 'impute.polyreg' and 'impute.lda' should now work under R v1.13 Changed function checkImputationMethod, Feb 6, 2004 v1.12 Maintainance, S-Plus 6.1 and R 1.8 unicode, January 2004 v1.1 R version (with help of Peter Malewski and Frank Harrell), Feb 2001 v1.0 Original S-PLUS release, June 14 2000