mice/0000755000176200001440000000000014437371703011200 5ustar liggesusersmice/NAMESPACE0000644000176200001440000001416714436636520012430 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(anova,mira) S3method(bwplot,mads) S3method(bwplot,mids) S3method(cc,data.frame) S3method(cc,default) S3method(cc,matrix) S3method(cc,mids) S3method(cci,default) S3method(cci,mids) S3method(complete,mids) S3method(densityplot,mids) S3method(filter,mids) S3method(glance,mipo) S3method(ic,data.frame) S3method(ic,default) S3method(ic,matrix) S3method(ic,mids) S3method(ici,default) S3method(ici,mids) S3method(mcar,data.frame) S3method(plot,mcar_object) S3method(plot,md.pattern) S3method(plot,mids) S3method(print,mads) S3method(print,mcar_object) S3method(print,mice.anova) S3method(print,mice.anova.summary) S3method(print,mids) S3method(print,mipo) S3method(print,mipo.summary) S3method(print,mira) S3method(stripplot,mids) S3method(summary,mads) S3method(summary,mice.anova) S3method(summary,mids) S3method(summary,mipo) S3method(summary,mira) S3method(tidy,mipo) S3method(with,mids) S3method(xyplot,mads) S3method(xyplot,mids) export(.norm.draw) export(.pmm.match) export(D1) export(D2) export(D3) export(ampute) export(ampute.continuous) export(ampute.default.freq) export(ampute.default.odds) export(ampute.default.patterns) export(ampute.default.type) export(ampute.default.weights) export(ampute.discrete) export(ampute.mcar) export(appendbreak) export(as.mids) export(as.mira) export(as.mitml.result) export(bwplot) export(cbind) export(cc) export(cci) export(complete) export(construct.blocks) export(convergence) export(densityplot) export(estimice) export(extractBS) export(fico) export(filter) export(fix.coef) export(flux) export(fluxplot) export(futuremice) export(getfit) export(getqbar) export(glance) export(glm.mids) export(ibind) export(ic) export(ici) export(is.mads) export(is.mids) export(is.mipo) export(is.mira) export(is.mitml.result) export(lm.mids) export(make.blocks) export(make.blots) export(make.formulas) export(make.method) export(make.post) export(make.predictorMatrix) export(make.visitSequence) export(make.where) export(matchindex) export(mcar) export(md.pairs) export(md.pattern) export(mdc) export(mice) export(mice.impute.2l.bin) export(mice.impute.2l.lmer) export(mice.impute.2l.norm) export(mice.impute.2l.pan) export(mice.impute.2lonly.mean) export(mice.impute.2lonly.norm) export(mice.impute.2lonly.pmm) export(mice.impute.cart) export(mice.impute.jomoImpute) export(mice.impute.lasso.logreg) export(mice.impute.lasso.norm) export(mice.impute.lasso.select.logreg) export(mice.impute.lasso.select.norm) export(mice.impute.lda) export(mice.impute.logreg) export(mice.impute.logreg.boot) export(mice.impute.mean) export(mice.impute.midastouch) export(mice.impute.mnar.logreg) export(mice.impute.mnar.norm) export(mice.impute.mpmm) export(mice.impute.norm) export(mice.impute.norm.boot) export(mice.impute.norm.nob) export(mice.impute.norm.predict) export(mice.impute.panImpute) export(mice.impute.passive) export(mice.impute.pmm) export(mice.impute.polr) export(mice.impute.polyreg) export(mice.impute.quadratic) export(mice.impute.rf) export(mice.impute.ri) export(mice.impute.sample) export(mice.mids) export(mice.theme) export(mids2mplus) export(mids2spss) export(mipo) export(name.blocks) export(name.formulas) export(ncc) export(nelsonaalen) export(nic) export(nimp) export(norm.draw) export(parlmice) export(pool) export(pool.compare) export(pool.r.squared) export(pool.scalar) export(pool.scalar.syn) export(pool.syn) export(quickpred) export(rbind) export(squeeze) export(stripplot) export(supports.transparent) export(tidy) export(version) export(xyplot) exportClasses(mads) exportClasses(mira) import(methods) importFrom(Rcpp,evalCpp) importFrom(broom,glance) importFrom(broom,tidy) importFrom(dplyr,"%>%") importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,lead) importFrom(dplyr,mutate) importFrom(dplyr,n) importFrom(dplyr,pull) importFrom(dplyr,row_number) importFrom(dplyr,select) importFrom(dplyr,summarize) importFrom(generics,glance) importFrom(generics,tidy) importFrom(glmnet,cv.glmnet) importFrom(grDevices,dev.off) importFrom(graphics,abline) importFrom(graphics,axis) importFrom(graphics,box) importFrom(graphics,hist) importFrom(graphics,par) importFrom(graphics,plot) importFrom(graphics,plot.new) importFrom(graphics,plot.window) importFrom(graphics,points) importFrom(graphics,rect) importFrom(graphics,text) importFrom(lattice,bwplot) importFrom(lattice,densityplot) importFrom(lattice,stripplot) importFrom(lattice,xyplot) importFrom(mitml,jomoImpute) importFrom(mitml,mitmlComplete) importFrom(mitml,panImpute) importFrom(mitml,testModels) importFrom(nnet,multinom) importFrom(rlang,.data) importFrom(rlang,syms) importFrom(rpart,rpart) importFrom(rpart,rpart.control) importFrom(stats,C) importFrom(stats,aggregate) importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,coef) importFrom(stats,complete.cases) importFrom(stats,confint) importFrom(stats,contr.treatment) importFrom(stats,cor) importFrom(stats,cov) importFrom(stats,df.residual) importFrom(stats,fitted) importFrom(stats,formula) importFrom(stats,gaussian) importFrom(stats,getCall) importFrom(stats,glm) importFrom(stats,is.empty.model) importFrom(stats,lm) importFrom(stats,lm.fit) importFrom(stats,median) importFrom(stats,model.frame) importFrom(stats,model.matrix) importFrom(stats,na.exclude) importFrom(stats,na.omit) importFrom(stats,na.pass) importFrom(stats,pchisq) importFrom(stats,pf) importFrom(stats,predict) importFrom(stats,pt) importFrom(stats,qt) importFrom(stats,quantile) importFrom(stats,quasibinomial) importFrom(stats,rbinom) importFrom(stats,rchisq) importFrom(stats,reformulate) importFrom(stats,rgamma) importFrom(stats,rnorm) importFrom(stats,runif) importFrom(stats,sd) importFrom(stats,spline) importFrom(stats,summary.glm) importFrom(stats,terms) importFrom(stats,update) importFrom(stats,var) importFrom(stats,vcov) importFrom(tidyr,complete) importFrom(utils,askYesNo) importFrom(utils,flush.console) importFrom(utils,head) importFrom(utils,install.packages) importFrom(utils,methods) importFrom(utils,packageDescription) importFrom(utils,packageVersion) importFrom(utils,tail) importFrom(utils,write.table) useDynLib(mice, .registration = TRUE) mice/README.md0000644000176200001440000001546314436104377012470 0ustar liggesusers # mice [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/mice)](https://cran.r-project.org/package=mice) [![](https://cranlogs.r-pkg.org/badges/mice)](https://cran.r-project.org/package=mice) [![R-CMD-check](https://github.com/amices/mice/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/amices/mice/actions/workflows/R-CMD-check.yaml) [![](https://img.shields.io/badge/github%20version-3.16.0-orange.svg)](https://amices.org/mice/) ## [Multivariate Imputation by Chained Equations](https://amices.org/mice/) The [`mice`](https://cran.r-project.org/package=mice) package implements a method to deal with missing data. The package creates multiple imputations (replacement values) for multivariate missing data. The method is based on Fully Conditional Specification, where each incomplete variable is imputed by a separate model. The `MICE` algorithm can impute mixes of continuous, binary, unordered categorical and ordered categorical data. In addition, MICE can impute continuous two-level data, and maintain consistency between imputations by means of passive imputation. Many diagnostic plots are implemented to inspect the quality of the imputations. ## Installation The `mice` package can be installed from CRAN as follows: ``` r install.packages("mice") ``` The latest version can be installed from GitHub as follows: ``` r install.packages("devtools") devtools::install_github(repo = "amices/mice") ``` ## Minimal example ``` r library(mice, warn.conflicts = FALSE) # show the missing data pattern md.pattern(nhanes) ``` ![Missing data pattern of `nhanes` data. Blue is observed, red is missing.](man/figures/README-pattern-1.png) #> age hyp bmi chl #> 13 1 1 1 1 0 #> 3 1 1 1 0 1 #> 1 1 1 0 1 1 #> 1 1 0 0 1 2 #> 7 1 0 0 0 3 #> 0 8 9 10 27 The table and the graph summarize where the missing data occur in the `nhanes` dataset. ``` r # multiple impute the missing values imp <- mice(nhanes, maxit = 2, m = 2, seed = 1) #> #> iter imp variable #> 1 1 bmi hyp chl #> 1 2 bmi hyp chl #> 2 1 bmi hyp chl #> 2 2 bmi hyp chl # inspect quality of imputations stripplot(imp, chl, pch = 19, xlab = "Imputation number") ``` ![Distribution of `chl` per imputed data set.](man/figures/README-stripplot-1.png) In general, we would like the imputations to be plausible, i.e., values that could have been observed if they had not been missing. ``` r # fit complete-data model fit <- with(imp, lm(chl ~ age + bmi)) # pool and summarize the results summary(pool(fit)) #> term estimate std.error statistic df p.value #> 1 (Intercept) 9.08 73.09 0.124 4.50 0.9065 #> 2 age 35.23 17.46 2.017 1.36 0.2377 #> 3 bmi 4.69 1.94 2.417 15.25 0.0286 ``` The complete-data is fit to each imputed dataset, and the results are combined to arrive at estimates that properly account for the missing data. ## `mice 3.0` Version 3.0 represents a major update that implements the following features: 1. `blocks`: The main algorithm iterates over blocks. A block is simply a collection of variables. In the common MICE algorithm each block was equivalent to one variable, which - of course - is the default; The `blocks` argument allows mixing univariate imputation method multivariate imputation methods. The `blocks` feature bridges two seemingly disparate approaches, joint modeling and fully conditional specification, into one framework; 2. `where`: The `where` argument is a logical matrix of the same size of `data` that specifies which cells should be imputed. This opens up some new analytic possibilities; 3. Multivariate tests: There are new functions `D1()`, `D2()`, `D3()` and `anova()` that perform multivariate parameter tests on the repeated analysis from on multiply-imputed data; 4. `formulas`: The old `form` argument has been redesign and is now renamed to `formulas`. This provides an alternative way to specify imputation models that exploits the full power of R’s native formula’s. 5. Better integration with the `tidyverse` framework, especially for packages `dplyr`, `tibble` and `broom`; 6. Improved numerical algorithms for low-level imputation function. Better handling of duplicate variables. 7. Last but not least: A brand new edition AND online version of [Flexible Imputation of Missing Data. Second Edition.](https://stefvanbuuren.name/fimd/) See [MICE: Multivariate Imputation by Chained Equations](https://amices.org/mice/) for more resources. I’ll be happy to take feedback and discuss suggestions. Please submit these through Github’s issues facility. ## Resources ### Books 1. Van Buuren, S. (2018). [Flexible Imputation of Missing Data. Second Edition.](https://stefvanbuuren.name/fimd/). Chapman & Hall/CRC. Boca Raton, FL. ### Course materials 1. [Handling Missing Data in `R` with `mice`](https://amices.org/Winnipeg/) 2. [Statistical Methods for combined data sets](https://stefvanbuuren.name/RECAPworkshop/) ### Vignettes 1. [Ad hoc methods and the MICE algorithm](https://www.gerkovink.com/miceVignettes/Ad_hoc_and_mice/Ad_hoc_methods.html) 2. [Convergence and pooling](https://www.gerkovink.com/miceVignettes/Convergence_pooling/Convergence_and_pooling.html) 3. [Inspecting how the observed data and missingness are related](https://www.gerkovink.com/miceVignettes/Missingness_inspection/Missingness_inspection.html) 4. [Passive imputation and post-processing](https://www.gerkovink.com/miceVignettes/Passive_Post_processing/Passive_imputation_post_processing.html) 5. [Imputing multilevel data](https://www.gerkovink.com/miceVignettes/Multi_level/Multi_level_data.html) 6. [Sensitivity analysis with `mice`](https://www.gerkovink.com/miceVignettes/Sensitivity_analysis/Sensitivity_analysis.html) 7. [Generate missing values with `ampute`](https://rianneschouten.github.io/mice_ampute/vignette/ampute.html) 8. [`futuremice`: Wrapper for parallel MICE imputation through futures](https://www.gerkovink.com/miceVignettes/futuremice/Vignette_futuremice.html) ### Code from publications 1. [Flexible Imputation of Missing Data. Second edition.](https://github.com/stefvanbuuren/fimdbook/tree/master/R) ## Acknowledgement The cute mice sticker was designed by Jaden M. Walters. Thanks Jaden! ## Code of Conduct Please note that the mice project is released with a [Contributor Code of Conduct](https://amices.org/mice/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. mice/data/0000755000176200001440000000000013707271524012110 5ustar liggesusersmice/data/pattern3.rda0000644000176200001440000000027413666252075014347 0ustar liggesusers]; 0KҊ YD?]Z'\?\.wtH:D$I AR! $AE؛ocmV"\B)RA0 jrI]anY+[vk}Q@aC>8}sRߌ5wq9Z[mice/data/toenail.rda0000755000176200001440000001252513666252075014247 0ustar liggesusersBZh91AY&SY?Nc$@ R@B 0hhh hhUIOT@~ cꪪꪧꑠd4i  M2d4hhhF4шL&FCL0 C#CFFED4mLSMOb(QOTy'&FɧG1 <zOIGhzGz5&O)ɩz6MN@~ 0I$$$da-,>HOnٍ"6rׅs9f "[l@d -%,5;4SacO@1zl`ƛqFLm&)xg7kN>$::bGCI$4zk3q8XeN܂C;ϕS9g1pi2c6Ml \i.b䤺Imθ3$Sft&/٦钳)$M5f]]MM~Dˮ}fq@?ñ|.ӫo q|1) ")Dew$ H &@ nHH#P*g! $*HH79J2 "lD$$C 6 H H 6% ")! Ho/N׶6kZִnh;:_Ʒ+ ٍ'p>ӝOd^2YnNjϺK;7^r /Z={4N{U#X~r9^Bh/!y '_>rOB=`^6z%cuk~}(垙}8>}J'_X_'/_I3֯_z}xw+ /GBY{ KW٧/Cm}=a}?-}`'WK1{KA:r}xOv}Z#z~___N~~e~W_ yCW/_!$`eK/~珌~~%/_U}d'OO|E@Q#H ᯆK_]~jU^R<5|#k/+')|Q'),%6r K>Z@/:(y+_^JW?9|W_||/_2߯_|׿^{/ I;U7E·QN"H#"(7uŀz(w CD#<P>doDQ8ϩynfŠb@8&bq /q=G?I t FD,-ي}[t!~RYȖBBB0$"tJVapTo@`DJ0A#PUR OvkZִ1s qs93Fs9b39@ 9rg9s8g9s#9s1s =~@mg9s 1Ю9Fz4b*1U-[Dm$nݻv v۷o%KDm%Kj1UEֵkZb*1T%Kh-[DmݻZ֮/7EUF"UQUTb*1UEֵkZb*-[Dm%Kh]kZֵ "UQUTb*1UEUF"ukZ֮opm%Kh-[DjUQֵkW`EUF"UDm%Kh8.۷nݻ/7EUF"UQUTb*1UEֵkZ"*fffffffwNkZU:N|I'pʪ2 j*ڪpʪ2 j*ڪpʪ7Zֵ] 11wwwwvU[UUUVUmUUeU[UUnUVU[UUUVUmUUeU[UUnUVU[BlwwwwuU]2 j*ڪpʪ2 j*ڪpʪ2j7cF뻻 ٙ*ڪpʪ2 j*ڪpʪ2 j*ڪpj 뻻 ٙUUUVUmUUeU[UUnUVU[UUUVUmUUeU[UUnUVU[UUUV<2U.#N9|L À G UmUUeU[UUnUVU[UUUVUmUUeU[UUnUVU[UUUVUmUULֵkW`{э뻻{333333322 j*ڪpʪ2 j*ڪpʪ2XZֵk` @6hlF#` 1mшm6Flb6` m6hn8X"Þx蔌#ab$ X$$ J,d,)ieH[@)d Z[BZe-Vd$B’@1lŖVږ${1 HXBR|31Kqf[d $ޏg>)a"bdDsf`! f!K $ \gRaddLX78\{gM$L8I& e&hgLrQ03 !!#Idd߸6b˽.I=.ٍ8 f<..g䩮B1:李nٷdٍaf)\˶$%$iV!2 -ud)bX\[d8HɋtŬιS`l˝v3u۳ag5u\mcٝÇijwͦ +ճtlPf16I$&!:M&Y4:\`)ɉ qԚoX!iss7qfmq vɿl2Ca$7ud!f`p] mMdtۃg΄[fI]4e lzCMuݤߍ Mdٜjmƶer6ݸcsRɧky}7 f'͛5ۧB1{N5`aS+**&82`D@8i8Q'i,C];Lv}:PO#3WG)Ȥ`@y ЬQ֬ V/p5z`bŋ \,XX{/h WEg͜}9Js-8ڨ\,1jAчyF,pgtkxnȹXj*9\r}Eѝo?Uf1!_{y,'q?VEN*Pd!FH@.}Qz]UAz躢(R #"葚XUU=BDh&TM*P@Hh'l"l&Tv: l#t #~a0D&%F cG '$|p;RFW.H7@FGmNFLrA*w1?,{̚`q?Sq21o Np7_ʚy s V'Œ֎6NbqQv|kLbYV qJB{lpJүO^<2.eAE%s(U!曎lmo::{eI2q)ኼUCJ*U"KaLSE.ǎSrCs|b524[RX6)css}ӚwJReo8xB;m~J8ꮓ2A+C4*A\N{zioh6NSdĥX6[G:=\8rjxc^ "w%8,{%-6;RC6+*"Zgi]*-98'isiژ'Gh\A1Gs^YXŒ\Ra3=^F,8D 4yʵ )T/I$qMf(ຟ,yƳ TN#p+x%%`q(MĩxؽՐqEזr"S|zf05b:VMaZ__]5/P3v>wj?jzv>S[cY[8~_SEJ [U7lgU?e+:1x;;ou~[j<_Q~+p*j Sr;l}p-W+?zY!6vY]~.@tS߭}Wokrm~iVv?T^S gϾn^-Ouʾf;w u섽}ֹk-M?>iwa$WF߱>vY<}r Ϯ"?w^]R4 EO>?Qun@%cm}=[5vEAoCrȧzw~wKʎD~Fz<_Ӡa__7}{a^P>k[c? z[WkJ_Y }- ћ67bvQTλێuw,^q?>o[b[bk̥7A'@TѸZVqX7a ydX=}Z|?m-[QZr f::ke)Y5hv}h=ؽ%[ކ:|D㽧'Ͱ'v~NIa x'Aw~m1mac4m 9NQG/q쟏_UB!|?}t+@(,#'͂Q~zUѧ9qۓs]~k0>Յ(U|\2 Y7z2e!Ǒ4q&C4 !7۩ 3ˆ::$c6u8Ǒ ~ຘL8xoD_GI%Xǟ8$%cOɢtԏ^։{X[+ݻו`NӚ1IPjZHTmice/data/walking.rda0000644000176200001440000000471313666252075014245 0ustar liggesusersBZh91AY&SY^:;kH`/@@<@X 8D JI  QOQ@ 40TS)(ީdM2MT{JPM@SJꪞTa0d4AD @ASj4 24ն,V%I%-,ē$2l0-,) V҈+V*In#5VV%2bk,ٖjZYYZn5٣+u1u;FIyq,#ayJ;9$p ɰr<\QI6gi \#^̓&y/[Gx2񉦱.9\0t2^ozr.Z#d3fOA1rndх l|B*[="Ia)Jy]ȫue%}mc lL02**7suV-ZLjT$ն2[&2da6fnl`$[m$lϏqRb)Zٕn_ D1-5`yBq$S3vcR,e%&+ws0"fa,bX0bd[iܔLljwe'Ż> Z!Fo m h"))bV-e))*11LŬжfjIJ1,)c j"Ɍ(L$ōZi S>&LĘPNV+U)QI#왛,F͈bd2RI$ba[-nEs5aH$%STC;8ICðu3,$Kfl b&Y1Qmjx P33M9AC0!e$HoWn *8A;K`&̪ũxE[ "#P qwwc{{ֵkZր~^V34ɞ*G9UC8-b* 5fUĊ PVD PKt[okgu]u%UUUUW]u39ͭbުꪩުꪩު*88c8Vs6cִ5l<}Źmkc&f@I$32I$I$L̀I"fdI3 $H~X1kZ}ߋs<1bk`Vsͭmw1[yֶ1c<ͬ$H$D $&f@I$32I$I$Ĺ$mk` vyI39kZ]uU$I$MYsUURI${wwww={;{xwwws{ $I'9xI$I9UUkZ:목${wwww={;{xwwws{/|Ǐ`0yʪI$I&s5UUk[뮪I$Ik9s+wwww={;{xwwws{ٕUU$I$s{<8{ު6mmcI$H @$/{zhq{-m@$I$H &g9m@hq1c3mm3X$H @${vc뮸׽q[mmg^$H @$i իiæ_nLRMmk Qf[ WK,I%))bK$RJ$IJTI%IbIK$%$KJXE QF%")XbKQ$0f&&d_KHD&%$CDXd&!Xc1BI!abIdcaJXa883?" aK )^J`;u5^NčAXmJKBQ:T5|AYeCc ,7z='|CQ,9=$cCE(c2AELq4p8>{G(WɹKQbZFJ/hĕ(h?#f7D"\TqSCc5иE9Y"Ĵ̗7==+\΅E#ދ^kPh)!:ţU&0<_~afP) ,aTt]BB{mice/data/leiden85.rda0000644000176200001440000000022713666252075014222 0ustar liggesusersM @D.)406 ~E#٘31'wş93T諸""K1d U1TR}1oy:5=#Oh8OAltQ:Hx4]bt!eÔ([zЩmice/data/nhanes2.rda0000644000176200001440000000070613666252075014145 0ustar liggesusersMN@S@1q! eJ 5cҕ+PJƕ\Wƅ@3B}A$p|V1ƙi s\, J(cvSrCR43}ʻ*/=_z5վ'Ε'熾+_!"MLSš:Wc굿u|!`.<ׂ+"Ȍt70yڟ߀9}p?􇹠 ~g<9H'x7dx\K*i?j̃O$}IX-[e'wR2^}q+Ѿul4M)W,~`ƭmice/data/nhanes.rda0000644000176200001440000000062113666252075014057 0ustar liggesusersN@ǧ-*p0&x "[Z@Tģ'O4"P"d_Oƃ/ <{dvwFY l'4Eۑǂ ]3p6r,T]|6Փ?7._VzvEg͋TT{U3=wݝ&uʿ}*?%zXf":kʫyyfܴ?t^K֜{-G[kh&/ c?J+ߕ=h׵2 ろ8N%a;ڃ`Bș? bw,5WV@e'T jSI0Le&g0UXC` 5kO~"Ȟmice/data/pattern1.rda0000644000176200001440000000027113666252075014342 0ustar liggesusers r0b```b`fdd`b2Y# '(H,)I-3d``A xA@!@,@@H관8XXWI= ' H0XsS  0ጦ(\f $$, r$$y@3Cmice/data/selfreport.rda0000644000176200001440000007561413666252075015006 0ustar liggesusers7zXZi"6!X {N])TW"nRʟ[^ nt̗UدG`ıTvo|ǍGk 'qI[3VZ.3Ɨz#$RG 'gGjR94fPi[НvEjht/syȿq_6_w:7bInՀ?FlZ59Kq3+713bqlkdN!̌u ;sbD"CG!(KN^8|G} u '0:5ycި7孲ʜmYtk8On) DP[LC.ޒ%Ahj ZDNEԟmUΰ?VB8ioQ-ҕ`C1&䞁S䀃/BQ ù1@A{r9%P= $5q9OK;8(m!%=ne>(l!Ǘtewp-_޴wU\ɯlm>,V2%孶7N^ 7~%WV~~DOؐE8_A|D=ٰ{*g0`*9ʩ]=`l @ڏ1na0" ҳ> sI|KYN3O eX=q'U "_|7E hCNŷ`c5 t41!sOo"Qm )]{Ac.k}#fd,5 G>ĊHق1il Lv;/Iѳ2Q|߲oT;83Vd2GLSqҁ%Ӫn*xW@Al s!bSJۚ`pob%?1k&DI|Bf/QѢouAJ~;[H'% /V{>[m>აTئٺbfR?E2Rv,̓OH=#-N_=h3:aeP&\okQE2{ TO 31C4Jex'F>fޒQ8ެf /.$I1mC}JMSY" ׌ N{Gm6DrRVD8}L>aNm|k{KKB>Q˟}v$y8  zَAAB۬6e Y故+U`EFnt?G)r-_vRdl9[CJ4TDn\ uz?G) +u0#L;V[@w:}ޫf 4Ն@Nd\#+㷕J]k}v+woD vNf>gg +DS>)Ar}j=8{"۸YTLҏ$6TBZt#t/Bmt.BDۊǴdFRC}jf1R hqˤݕZUT0[AwVbW$ SРۢHfO?&7ްv;4⵭wwx&Lr8j!yJ C.I76]Gִw5)ps羘iڼR[d3܀!n%AQ<؁OD[,D50G|ev-MYP9 >vu\CGcEO3).Z]v~kF["# H(kbp #؈ R3H0EKC%(ȡRV+gNP ȣՐo R﹘nٺ7:tk# Dҭ70pF>,CMRdDZ$Ɵgb9!6 R<l۰n:|'aj$rQMr*){Rf4@xSO]%S!U}G,co(ٚ\p..<Ҋ긂-[2Q;NXy>[~u)i5pH7Č:+Ruڛ;HPϨ__o崄FH+ 9n_v~p8emExFTGBXϣO<ш5q$+Eqv]Fjι7g9t;Pڼxٶ-%9Sˆ t16˲] jN Cl[?j{w5d+5:R&1cS \GGS(:5RDIh ipu!: 0*Oӹȳx\T` 37NXWviV{us9og\{ 9"G aq 5֔pg6x $ưo5ͬkH3jcRp#\\YxlT&KBXcjt[j`;<]}Wi%bVK %e^ voL;Tu`QNps,:M:Wސ$<&8" ƺnAUuL`k(Q#4ezGyF2"4&T[Cs,ь"zXg/ySw촇ާ󟒇@< 0€^b K^<7V|3RX{{'M:퉴 7ckӫD|ӤQB]]7yo(u%.wM onY e7ts,Rb|^54 '߳TA`bj6PGeTLͽCp k V33іKF'fZ@oWЯޟU^آq͆L| TĎq ޣ[ky7ш'1QjyCh i1Y#兆NW;NfZ*"AMGo\xТ]+3> Z)+*e':/m؍ 7*k=ʅZ+I3[6K%Cz)zhX_L^9S~zARWH1tֈYƭ=D̓A45x[b%SZR s~4^{ DlfY&i ۫|w =;ӀP+?`ng,tӄR.Pc}I.vqà C/6˓E Nz*R*4֠*sk)rN+/3=t4dA qVe9!EϐTMm l=c|jV>Ou:fj*dWTBǮquڕbکCww]X*A{j{Lwt+ R%K>KK1we}!rwƯu6F:^^S1΃g}A ͍P0goRZQ@HdKkD-%(0mf2P.[9(v牶hB_(6m :%SkfMeN|`~lJI<2I/qwLt T?+tҡh+9|w;7tZTZ4L&z*`qp6V3EOU7Rk60ֲW-_ ǭDN^ WbWW.-7%֨șQɭ,ϙm\]Qj 7qָ:Wņ7;S)ut#Ԧ7o$b rP\d`p7Rg,#{}E  R[s .# WȯDƠ{^k k3[te7q|dל`I~0/6婤0Xh/gX?.L2B(*u" 7Q(px&9m~[&#KtS^r%Z-2P_YÜ=_!  '?.BEK3h0puO%˝]Ŷ,ޓ *DCi+W[Ͳ;u@XK٦/bT.UY=,c in|*h`҅GZ)Hbǥu#~<!?nl@N;G'?E|WH.oƼ;rU7g1C2]VL-x2!L0gTZҺH$eߜ$RI5Kt_BST-/++i 6߮C{J6W 2!: \vʏBq;syCFˌ Q'VN9ry ?WκyvB5++[PB# 8zuzA|Z*" !\5С+sF U|A pnᘓ %E<=$?GI.(O5 L3M'X*XϞ҅/ w@}_.2?<+FkwW3[dz7U+Re]4 %0M{Z?7c t1 /Jiv뫒3Whʏ,QA4þ}lEZo][8K V:a7 ʶJkM$h(HbMzs4 LSgDVv fJW`Mi<$v 0XtLcL$zxwm'VXR~H'ZH}٬4fOĤ%5vҞDw;97 v;?,rY'qxSΧRDCX4Y t"p[ THDq`c1z]( G4]? bμ Gk؞l7DV_nSٵNHfǰܨ0)^DSĻ,[O%9%P Ћ5QmL"Kz@b _٣-PR6%M=#IZi|@i thȘt5Q@F!ϊ/pvD3Rze1`Aq mfb2d͵= T7sHb85w83{iSpx5+[]*W}1~gaUGa2ٟy^+VļGNѧZ 9V% 8iq`Lbl1:Nы9X_qr+0k#k1Ϝ' \\0IM:SgW'wMﺰ.f].`AnCۙܢ!x32a8ߨī 8jge nI&ͧ".!UVBVtjͭVaqCU'@pUJ'MmM}WЙ:2/I6"?õ}W;qva'ͮ3/ DǞ4 h>Ew>!* A'O,-Q=<'sZtNEg!T"UOv$or{vk? ~فv*P=TmZ^Ҍ <Wl}jڇӍ0zܙ?\/e(ѹ| 63Bt% _r{N;lE3sr#sdvq2%uG el;@W'fbzIy@tS4)]&iPV/󌐟@(Q}>dnt3c$DZoAԳɔ{ڣ3ƈ6#pɌNߓ&mz:=-ZΚDϒL]f>vc?jWrbZT sd7w&-*9'\Y| |Ј Ç KGHy %Ouص/GyˇO [>k@~_-xڀB@/%ϔSeok%8 ;RE먖L9 %bT3|\QHMnͨ߭$9Mu<6*nѠ!q* LO&-~%̮ 3۰m Ha zV?h +FQ0f$qvac8ƝX-ip'^c_ʡBK]:͞㏻5`YESR)ݿ2[8*vŹKT^t"Bo'F]AJMp+5^U-1'bhOrE%I.@=[λ4-TMRDXM!>BPfb{>4@ ORA!e`$?zW<=V0بL1+B b@fau^c]cmLGVc%N"yoXp)絧!%%Aݚ0e@'KA%掑Rp֗H*$ZyHQH̴nb'&G~ΛFpBWsXg2 *bP<3G[rSH A߆dSHoqd{8F}\^+S넎=O.2"U*WǤÖ*;\?=&Z\x)厯ݢ3I5 l%UUV9}qĖp">*sQoMv x^(GY5BMh|L„ b}!mӄ>QYJέ~?h-EXfNlN)9e*vPF yyHbsZIna&VZI!W!"٩,)|IC .8w8;k.{}_AN! 8wI}smd3y;];-muT;}'KWv7M)mT*+j!9^ruTW)#կ\"=[dfBF!XJ^ F@Xwq7~3s_o~' etɆ$3̮[uNFu7m@=81OfA4C u(i6[ pgK.vO$ƖI.^Kۼd#CbDk% %[*PKsa\S)E&wG DQD(k+wأ KvPp͊ U&'ip:Iۛ&۹Nh>"wkRYϮwi?HV8E%VjIrvL $U ;4%6)A)N5vSFb]{ a rO8GHea7-}ݭtS`E!ANjw;D[lh7984f⟊"n:$P]U:g~ln(۲܀U (놃|n6dQbnu!-Bn=VE~DnV[x52v˶.7aBMZE hOL`krT!`zS{wHd-=H8$X|*XkJB 4`N6!%M jyIjTކ3(Vl$ދoU$6S8ǢrY5>, [1Ym}y4K?i0<ȷh^*@2潵X'`=6x&Qs=lq[XV $]W5CUk؎o/rӧmU+ yyo'k(:vN('dzm]Q*l/B( *1=Cvopž ^P鑹؄'xb9-L{i25vV.6GewlbZ|K@R _' NXxT+"Cy@5rڽ `3%I+Jxi P3]\ܲ>qW,uHeH;ڐ—o\]H.W<{w;\N \$rt4 ѩ\ z dLyMcҘq:bqE!bIj%_d,;cgV}Xo_~ۭr KwϚ# |p^hNXO{A{1iO1i? ߴU[([V̖2tK\V5Z*[3<AȣŰ& 7/D%$e zK7838KoFp6*4d´̊Ut TO$n>?y^ Zjf1ɿcNW2o1bмܹhWhU@:ZCv,Slao#-efov5dӀBZgLL[2ϝ 1@w2AT >nО="A҈ e%'5GSA". ʬulC9ptl@$ =>T$EQo J G\/i-\+Χgvf%VE/z1Ӟ{OQz x.m]òbEң;Trɫe;;bwt-waqY\ulDκN͊\{R?x'ߠ @`)}'%!PiNDiur7fp`1wutHOΪ0'0e)`1!ۆ-HN8a}wWeڛC>&;jyMѹIFGNG6 5@LEQ5in}zu;lҀ\(q-"$Q)%G[zE],ػ ]YBw@#°By%@P9l_B~ԑØ<*nzN.J_>&Ūa NJ\ A$̫-s,;;X!jN*?ZAcNy;Wh<:,\AAժB3Neey_=V[صل2^f 6s <8lIYk*;|uΛ*hwvR*m61=8]u˶CYաڳ +1u^MðY>5N*!>-#2T(XB-~꼮HynVE;kkS~Nӟ#蘑w8* +ԟ>R\?_{"3!`s {Lq G({~`+]vlg΢`ULEwySmO {: Gfawc_#FWFk5&:8DJX^4omS'}9O0 ?: V3{;/۷KAә9tX7~*1.J@|"1 \8axA@zMu@N܆ʉ Dx !8}\fЩ\J>TXܺqhtWTͮ|# K`nn3n?=+!JeK{^H JpRi]lfh%xBQa'DK>OrG'΃O'Jo8UTޏQ*e̚XD">Zd^t}k3&Lvy8wkJTTKj\ Ss}Ƭ+Ϗg*4 ~e7m$0qUr9>Q'mas@Ԭc&1e3 NщoNaK$Nm!:Ua@u* wZc #My!j̗{.[xSp"q D{+]K tby6@Wd=r]CkJ&8i}4_0 @`p: q 2=G fc;ΉyPzhuo^XJكy J%=Eixă 8o 0ӾK\7#UA!e2A+ 3/!K(#ᐥ0 /ܴs`}_|v55o9hA9a]S? i3:zQxz3Ɨ%_/t:6|vt;ٜet:Sܳv&p6Cg۞rf+Qfy^{]/YkJ{3XdW}\V 09U-r&ճFUԵ~-c9-ֱԣdlI#{"D~+WUUG6K+sXvCާZ(` u1 xK+lb9m+N vx8\ WKsSQ녭:o;,. m!*ӡ2~Uw D w?)QZBKfi/ wBAYn %UѶv򳂦tu Zĺ] un=7/‰lͪ'~8^(Ҭlp7o+p^ZVW`#^@ops/L<Fb< LΒL9)eZ3z  <\gTw|' / :!W@&[IxFG^ q(WSfA2=A$̒C}Bd2}u/Q@rZen`ԣI8,2 dWfST(=-p>JC qV!'< at>#DBp}d|< (h~m׺3"=D Ԙ|D!*X=BiIo3eZKJ('~ e.R hu+Pk\mXN4銍v/HoSk/:Mg~v|٬=6Oֳ{r~;D%=E8ӳО7ղkU9G.̣Xp$흢F#|)i0 ,E LjQ{B:k{OIAgG*h qjsqV~jS˅ DSS2ukPܶcz!h-b&${/Ǿk(6苭 m4l{ r-/ߋ|۫SSIԫs}Ԯܵ9+\&u^q;t誖 SeYY 쨎yW aճU}Ik:#l>ÍE5g0Y>w<_K2LZ뉓aQMC#btap@=>DW )0ukg1  x4)Ҫ2 .U d NW6Gq[1 w/V4ZNp>+.εݗvvUWB!kxi s9_&G҈&&4y"]l_m=t}8 zwE{yX,_v״L"G0rrC 5ih)5\ rd ;H ^ In`3g/=>tMYdL"v5Zp)n4IXLԱ15!JhBl(.*.Ao] >i ZRGX:䨂 hM$5p>"2ح^<)MM\DG]vU( /lbm4/,FjJz]JLi7ˑXSG Y~Si tp@:ߟC)a7(tI+7)烙5[W\s-idi>X>'՗պ[0K쇘>BNbUrD*qP8|5ofs-1-#uۦWT]TSΫ&>z&W"8)<줶6t&27*mǒ؋ m'ʡpOm"0 HG:Jajj*/ Sza}'hM=ZbinnbW2_k & WGoqׯ &Bϒj];:US*g,muLMa`jh 5nj1M";`r[.q`0]k_~<8Tggv̮4xDUWe>DoKNwV:SD'}LO5zhO 5]`\о]g5 3\%_݇.ʆVs2/j4,ea~ e$Р_"}4*I%;q{Gy:z|[<,-ϟڃ3U`> ̿DAyk<.t4qXBU5o$G貝w@o*YO2 XMDlRI?~Ja[F%j5'[OM~hpHSԩ_R PEbT# JV7CmIwDaŕwt`ʦڄ-Xgf^2` &`[% C'yY''7=6+[\؛:/m .$ͳj+"!@?Α짹&ReW2>!bkF z~bD8XS, AḎ6 P~8z>cw6${s_+_\ {`p-!0vd^rEVH xlͳEwzu&/r|т,Q{W̉i@IYD}aF|(Iwe,_^eava' W_sXF|wp,P[OI ~X92V'47 jPX"%$-_'=WDQ&{WbS 8L"adǪK[΋w8|#DQ4M.P"Drݝu̳EoR^`3PUׅZ^8y0&٭~`bs:R)>6b\ I h q >pW)>hGZku+-A^)[_ܔ,Υ:EzsM<4KIx?4r ִI\8n0d Im.^PF?5gE'?cXnK+]X?A>x%6a-$GivCOb=w^J(Y'SHU R]s ^XʀPñ#ٟmMa+8n4 jE3+N'l`fK0[%>pw;Sk<^#"i8!( 013֜Kl±G,'. 5-\Wq$F; T³轟qxX{,טH<OѺ¾p0zRYOd' \ 4ˊi2o^%3Vj> NM,6J(\PsRz6ǹ7hĽ#'xA32RƗIUT^cL(TYcfg7|\&vRvE?|Yj#A=hs(5{?[='d=~D;ԏ8 nPUNUMԧ2u!Bs}r O2Ƃ$=inAoAҹO1'B`\̓M`2@IZ7:*͒S25+FBo4}j R\ sѺ%@ Oq^dQ 7a,?wŁ:!3*j ۟lD{ߋk]phJ`cջAI[Nh}*n 0lWSꮴaxi)JOWFYeckPRþ8vOc򛅣inӚ \77uΒh"#[(W[Qw[S\1Ҟc#fDB!+_hő~oND2!`LVù-x9ʣR?D3hJCy8&F#n8@os$>TBK˭@ NL^rruBܔy"-iL*>5h~+/CcPBc38 et).[ P&Rj\>8sO-sRwRAmm>uS(vkltV1m|M`MF[~+,clrG^r%2?^ PGfUYY4xΫ8蹌T♻0[jO9N-3pfפEi*"Χdi|jѱ\V #tw#] 9֦NZ$RihO47i&E &6~u3ڵM 8"g_~' yM5zV2wwW<WCA׺{2_ dq}kqw=U2udorz 3(Lw|7i ֵ$?#;) OjPCjQxtD#F(ULɓ}tm[x̑%,o٭wzg>" ,(@`k^T=uJ l}q_? RV(earM9􁥇Z\=)L^>Tu4+\ Y : 3bywMBtT"e5FY`BOŞ8ڕ1Ҳ,Y9GNT :!{'ߝ~ ÙeSHP,O;q_k%~ui:l8xC:-[|XѦ1K2{`f0yFlw;]A>cfBG,S#22\m REɍHm[8y }qôq𹻠 6y\ }[6֥2jI(=E!SI;đ(]D>fA]77@8?DZyH]qpMY+ mX㔢Y:!\u`q'ko3-cԽE+g"AbF<g-,[mN4&QkT"y#e?9&%TaQ9Ջ9n>VB$#.iB8A k12f;VYӈJlEWOlf܃7p ~mgiYj1AU:w\l,}޷1r-X@1ѸgX ROA}並t0KA:oR>x$SO4όsɊ=ɵѾ[Y3QY \[@/-&Iht Av`[\^5Xږ7ϝ)ǀKtg~l|"HbGE !a]hr).>|O^2$]ϻV(%5%CU늍e!C(bh޴ @d R^~ 귾Cx=LE0RIV( ⋙^1Tjyl!+; j5%=bdQɹj Һy$+GL4{18F}TYK`,iepFjc9.>(͋}glܬWebp6qC}j)Åʱ -_?[(7 ̖ vy'ײ&v;IVn>2 _5GE0=i&~ik~B/ꆡ؀A^Rt5` f'7؅z>ԓ!H3҅SKƔܠFl(~Fc"VkQ.Y:C46 \$zm+ Wi.!CkMxAw4[6f&4l1'-'?rRe(T AڇZ=\JƝO.)\ϯvNP߹4yo6nPLP{Y7$),/K ״!}̢ |3&'YŸ~"TŽA)5e\U'H1IIli_ 6&Z s\ MUw<-'+ ۔~,Q86%9=Ū=9YWir?_W 7UjqH ~YAcrac^jν n}byǒ,ROm⃕5X{!{aY-(AyX,k[4-rTˢjF`Sþx?L?k`.DkO҇nߞ`8 Q*Q(H8zʃYZfqEl%^s@ P[miƥƗOr&0Ꙧ^ҵNB[9p:Wx.JYo hk_iD/vU,ъpx3I San2_] =ܖdEw^L QPJp}=<_%z ]@BbR5n13n.!&a8`Z k8-Mמ.u^nHɨ~١|)$;MyWewf yn:MxMQ.zvweZP ~J2rTR{mmm#}E^)WWp]ևK l(!F "6H/D4vѵ:\ɸ&jɖ^T:œoqK[wo< TC-Rn g[$,|xHmxKtÑ_Gb6 )(Rf"w&TB *u$ƈ})N, 35-ƺ{c8ho6wBY# ;H#wg)3#R!XVpDq YQ )ߪ\(=fhQ6ҝ҅%}PG`ƾym詊9C@~1NŵMC~]# :);(#;! p_m I99a/VP}?2j6vR:}yA E.Gٺ㶳/>M^  ~*ԸfP*Zr/e,WJ5R{ftzVR>}ȢJi.~VZNgx!fRm&ˉ_:aoUHqiA*uM)1QYZ;g)(}mhdߊd:Ց1dtz^hW9]N)[vB aS0:BaΥzKw7>dOr*x1QBC14[zHRDpEcrP Lsyybgtσ[.\Ѥdr-6ouh4*ws TE E'lԫs}EP_訾ܖ9.-*F̎AQN?vmo쇵Е۸QYe}&T Lrd![4 cZwV_t %QmeB!5p@ΫZLSG5V*-c]| oY#j҉@/@Gpx*vRlrtrݸg7<7I$@HyEިbla5( !?hng;3T#uߑك=ƁG*EqD r\6ITkg1awkzw}p,糠SC=H?FI[ֻn]}u,k>Iھc))(Gn@mH`Y@3H`Jjmjg,ϳ Ec,^vޕ4'2~gBFaDŊ X_lC~1lΘ%РU,e%٭*TwLMգ vydLhMWK{ieK!hU `Cnn0mn: [BOe#ph#C{kFaQsgen՗g^f R*`;μm;&A&"cADgq֬oZq!Au,LߕD:&7XgލpHl ⑐F[=;kHMQ&1'X/F" $k01С8dntc/m&tI_?~lmo#U AW8x1VSĸ¹ʭ 9ϨVgwR[mlpZ&h[Kr_[1-̭/kU+QyZs9 }=1Gɫ#l+3iH5ڇU,\8vȸB[2X15B0{07xn]D'zk|Lݭ%(gbs7d"mCHPH R_reL+Yhl*Gܒ{B"~_ /UɎ$b\EAG?V={Ӛ+_+e:ɿ?0LY=w!USK~a|w1YXro9D: +6@"^c0ͽ|d d`gPՃEg7aWxfSt؟Sb~r,rDyoކ[q;8늻S#i,C6 7Y$*>ELh`G|jtK* 䕇1XIPUdCVN>yNl5^|h-q ; ƒP;7<︽mIIj$Mu>4bf5tvAH߳O@؄x|A wvSJhT.6[W8Õgp 4륄!}N֔rc ]Q;Է/;#nsSw ac?pk6@> 3tiSvMc3]2&T#/ʗ3]UŎ* >ZܩK+iEi ,yA Qy4<\ob mB4 `ԑ]U \T A) rZmX\@Rp^$G#+^&aU*:fE=䣤 c svV 8s}XZVpoW= V4 #i{y])gĞ}CMXJ53za yM숓Yl2tJ`%r[4D{Oe 3&ˬTǩiE >"4r<-h꟪4RiGuwޚHײ2V|ZJԥm$|>cW y:~ـ-8[J]{FUC $ͻ`;MG+#mAڽaIpCMW;>BXu }^RgvqٌqcQ(5|Vukwq$joH@ ?1 q65Pq-TV' =;mOAy)wܜ8^_]vW{@ˏwU ܙ=iH="&&qn "'C14)20^^ W/ b61uە٭CbVًژbb?W_Bl3H$A /1TR$}ˇt\w#g]ɦb]X%Wdy˞?|EZAu?p z6:]d3m&jmYy${cn8<椦?$Fm].?h+R}pQQ@SrgYog)j~›00A'~Mi)og4iTp%k0d1' }~|Kbg]S?!M.lmŔxx)^'< [Ye.&$P,`n*No*gL 3Ql!> qT]P'GUhR3@|=LftAuRWYK&Աr7}t#ꂈ `a,`Rj)ql13E+p3Yl+HU  -Cu#s~!8@B҈\uY}J #,T][ngwt .<}zf.̹>CVT4? (];Bj=oaV! nT8I* pՉHD0i~LY]VPO|$TORIkHinHs !0zI~`[2S~p!sA|>ܗf8֭hS8l`{DOngRQdgK)l9o5;@@˂1+3?ctoOKc Έw!>3}DTyq8oX6Hг쓚ш4F~%`1Hi;G^vc )zm*j {U׍8l}礩/oaJB1"6?U9Be/ R1!5CG lumG|}б{O=q52D=[Z%@05;1$П*z7еG/{uD!c`P mIPԝ hP .ncbN@V7W -)1nS5QUFsl1ah9eO iRB@6Dt3&Miő˼fMEQ$\)(R".2Tmjffv%]<τ-3(V<ŧ,3{B6@zzn~H<'JR8FB{bg$i3k,7 #\ҦSWMA#U˕0ᝨ:tNCs 3[ab"]S:&aYpP '22 zq+SX/}5й?KcHhgonA(ZGt}mL (Nj'[lZWW3+(1k$^! 0΄MWpPH5I.c g6eDǂOgW_$I٭ءA% s`{wNZ?/\t*-oԧ% T(=-ocEa6A]#+.#Qdq ig `p&`%OMi9Bn%<_g"k/jawq\)! N vggSÏNJ6<>!lԪ9C+?Kܰ5mi1.4OHDWWl+^?ϲ3 #ශ_/sU gXbWRq1GxZ )`%f 9^VZ+3\eu'l^ KVQ=(,`R75o|BC'M.XQ%Džamj:M|;J淰 y\&= DUruw]i3G6Z .,4wͅ *JWzqILZB$v-FR861U%]"V jAZF{Ax錦fW[5H *?uGЪX0V|ES,{ն!} F>0 YZmice/data/pops.pred.rda0000644000176200001440000000166713666252075014530 0ustar liggesusersBZh91AY&SYX tL  PP|pѓFhdd02dh2CFM4ii@xhTئQzM @4@ɧCAM U"hL"fz C h42xE^8D`"j(ThX(f"@a"򩹘 uyF]>>Y=)Q5€RH\?3𨷥3zk}XUzϡsq0IQdɝbm#;Irִ(I+F+z#3448M&vf֗RY40@=H%C D^|(~0͝RߢNwCQkmfp*`{µRTӢ8Gl*Qbr+\6lo=s@pWR'-Ye&Yddd+x \rGnT݀rGN@= G N K nίR.C Phb$7MM ?i; 5BoH-Ŋ¼vWT휓 E9  bq \AEy㍗:NN筘?ZIGR8̃|a;}Th!ìaVHZdTy$ѯڗ5BKݕkʛ[-hQ7[sk;RMmjLDMEk S\q ;'p4e6.qDm.II%myjY2kێT86ۛ˒0k"C'HdʼnMedf=r,q4P#z*xW/L7l0ME9IIt ŋۚ$cI,k"Ǎ'Ғ/4 @?w$S ̮=mice/data/fdgs.rda0000644000176200001440000032010013666252075013523 0ustar liggesusers7zXZi"6!X])TW"nRʟ[^ nʣ7]ɷqh⎠lp,;z{7kFh=7UlcB}p++jMx<'x< fcwQ&SUh-^5&ڛG ! OeoUr{+8*y5 gY鍩eiFԚ~22 +84`,*VwVCA >6(cͽx{;9NPf0C\:mMYC>6-갮%-4ϒPolxA&.Vroi:Ĕ^3MxoV2EKbF,<ֈ`՜562]/pYIq1O&Jgtz2q޷LPj왮'?Uh*o cnd|5uRG3Kq^)cҴ}XǶT&D_~!ѩتi&ja!} a K8j_7RXB#/ݜ}6PLyYx((2~r]ص9ո{m>9pF~,8߻/{4gXM7 ]{-}U,$q_?.՗Ĺ־y3qw/S"`x7{#j?r\Jl5o0@-h>#z$q%Ơa~} S 0Tµ`B9aD$ԝ+Ơہq7N2R]Pwie)6n~|eO`OmvyN<u6 w;΍; uOV鰨#w;&<$1RFiےL(e_v,H**^bË R7d+f..N[vWؑ!lҵ)Bw4h>+-i'Gi:lM`;YN8ͣ N"?I_L񰴫{w~+ ݥJMxڷfHk$u[p kX:(\Mқ?<~G"onuhݜvv!`ڐ}PRiR'z 7rNDGYn$ Q"JTP3N49G"Sc]9k0KBd}g<5-1`w1pSMe=K|6ٯY#4oXP/@NB/yfcc}73s_hEsL޽ pD2Ϝ>Z/'ڨ%窒D@*ɨ4WMl3ȥ3)N0N1% 9yo enjcMn=zTgܾgBE/kE"p.{#v"2ADe^~'V˗;rLk>/7Y!ͯO]'$Eqưxfe]YEQyȵ@7nd<ʙY)u L&b#ĭזtzj!JzvZhA!sUpN`"nf|e/x[j[5A;e!qU^e 頳aÁ:$({Qф peIţD*1.B_pnEqK8k);8TgRKF'ZHCI>F 1uV6rDw ]#D傔u՘,+&𞧖|RЩCH;!5й$5.y- Fj )RNsQ֓,ЦO]*4*= &oGASE\B' 6 gڵw7ha9])˴='.Mw&&kdERq1 hC{V/QۚNiv:|7[7ovUkIM>@pYVcn @Z?=qxg("LaZE ; ;"S32a!*tE/ўgJ/Bt%3ڽEK0I.ZeA1+L@lpj}=J旁`qSz@L4ެ΍pBa{>K# k¿a()|̎~J=ґ)b018FcQqd+50.t$8JaֱU zgp$e}Q'DЋuicе0^ F=RЯ3KTqtM7꜕g7v*8x.eg9v^mro຿.Cwʭ6THBT筠H &d׈Y'"| hS IOǤqlQVYE]EK!A4ѾdI!(Ͽ.cg]mZI M)I? 62:3>Jy٘߰옊kp:lJlpm!ہ1ҵնW{P uHpBı]m=8&QdE2G]ȴ^6މlSu9ӵ"-#Gno75p&|lPa}X<00&-;_z`L犩T,'4H}T/k6i"UP).ϩsAX Xvgj#l4o+҄HR=/ig z$XT4ܬ,Lँ&GS~]'C_1L]m7/& >[{*HM`/o vlcBZusTq'PDqoz; D԰I jh D1* jln\VaXY9 0B^nRNÛ[]ܩN(?i?Bm6r; ߏ5C@RwP[:/_Kh_-9`m~FTmoʊcVǞaѽJjR̅ >Vlyb `\^j8Tiq{ =21ix.SϺD&z9hI tŘ0_aE)JƜ8)q=y\ aD<帵wC'X }| ' yA" t #QW=M.Lme % ʽ}zG/ѽu,~P!qcj.WAzB0t8cʦ!Bv+c@k ֭:j 5]Пej}jǍz K+qv5ػgRiKYH p2]%l$i}RM_}T~ޢ(K˿9YdFW x#}n،K_$/{ 7?8tS\SZ B,oi[[v iI\77SnPu'bQ"eBY]a|7>̆Ѐ O(ec}>תڌ2{x8]Wr#j(48ڻr u'GKn/CHA;]T^W7׫PDF~ ]^ {EmBimwf&V 0|@x`5a[4Q#2!]bҟxSh\scJv>u,D7ֶṘ0J{2o/~{0[(>VVa+DEFHW1 }>P?+?JӜ%z q g s[SqI ӖY~ v dH%#F<)e6E5b[3d@Sv*g9Y ]M{o_j"giQjYh< ҷ7=%p}_ u&5k&35R]M2t'I:6-:y|!t?˵36Er8ƠgSnx1 Mch`dt pcf.WI%>p&P%L0Pߨ(:| -5 W }5@(eV%O<)3^n/xJjHZ_1bZ/-W fkD9cF"d`7?Ϋha ֣ZL;T75 54A,1`5&!K|?MAۣ8*65ll@#a64! "PxX(So Lz<9;7O NyUǏփƣ^;RD xK\4PoMcBs\tE'0֥\I{>,ڒ5u|'^gG";E:[x5G-X&I-R \<tItm:4Mҭn "rΰea3ʩ{s*_0=c _VeѴO CZX79: l>2 ҭi!M| Ŵr os%69 "S])_!n ~c!8R͊0zG[|Zez^,YeyTԾE\Lv*ִovPܑ5Y<:Y>RO!Ej >,D]s0m2.4t\yyaw, OΊf>WsJq"{pNf}R t bAJ!etBSPВ^aiD<;?&/>EZcz?[HNai' 0>5X9d c^#eײ07Ǵ 5m< LmE!:W{ 4Eov_\E_wz{? B{ɏ\tW®_O)-- -^A%IR o |@Ȋw}Cw|KiXtV@Y ﮅS@zȪ*[Q.y\o) cjcTT|la󾵃[?$*b0iPȀDVb Z? a7.o(ÙYlޔ(L.qv')-T1!H9o<=Chs9ǖ&]ykmv  x#} Y m|C+tdVjsWMKd] NĐG\ݥfggb#ڝ c4)ΓJo?:+7"!_ؙa37~X:k6yv -j<ɬY2*~[Bv{zq<#PSX(Q2 B|` Xv}DOZ>Q]'>Ŀ"φd.aXgP3qKVB]w*%HCM y3EjP:_cq#8nѴE*rƎ˥Pjj GQ@rL0}Ԭ()dSzY#AV̈5 $GS9 GtH63_WdžGr2no]B!ʰ-o{!Z JB)s͓ lmS$S(e0;vXFljPX޸Ua2))?˱1 ސH QA0Z^Z%BGR/Fh8y@Ln>(ӇϋF>ՍpTv&YbɷZ +VfD-:Na4p v} sؘkXf^8f_Ekt-EU-o􅱑g5GQOO*Rixq傷4X(5鱥KݪQZˆ#;Ԣ}J8(ԹrfUx"o[Yew%;U{I\w$UqUvk܀4j-`_赁퐹T@"*ZMcSZPdӉ:ۛ$~tXj Mɴw}Z.ksj W+Ozs$$:xjAgC+'t^Rsc'+S#2qTu㒀|t[ Y} =nrcgA dveP<'ceUHqEi,d먝wR'ҳרT hx+̊ 62+#H4%W0>oX\8vEuIk^57o#EiM(D?rtnaeKvC6,:xF}ri8~Ҝ* jN/\Vvp8 ^DHȵD1npW=ίpl+2܌ {W粏r;6&exg:ĸy˱6xӔꎔ$Zhz,='zT 7Vr▽دڮ9y- y% !챒" `g/3c'jMVp5Cn\Jw(䔙-e-^Lwϩ?!+Ս` ~"DbCPhQ4_Ro=d鴡Lˢ1?1yYsCP+:60~3_`Ŷ(Kvo?}5K2KVn^.dϽKqÚ\#e(w>3$=b~b׉~2yN~/iS#{ mp G|U>Wfd6Xhdb@eΪ_ƠCs.1jhRAjjUU㴴 uTJ$El8 0^U *LnzY9LUT&%k+)Z]LJ#a,W35H'BdyJ<]}RM rE:%O ߈)"Hڪa~L jҙmÎaP&R*cJirgwJRa_ᗅ}i7r@Ҥ@9U{z݂_ڽx nH(6i*r<3UCDq^P XtcWTNҐ1@Mu_#]$Zn6 Yb%^Ϧ,K߳GnqJ7u9qCUc ~0Q\jSJ  O~ᜍy@M_6暪d_]2qk>;8ޙdbb靮0s=& Ҋb9gi=Ʌ_sr@7?ϣ"d"J5Iqz\Ӧͩ8Jv6]^yRe6l}8¸)ңA+7'|ְ!߯&SZka 0*.Tk>}+amG`z'F(>$vߔ^G8T4Ax7'Ýrlݕ:a\Bs f:*X5u1A{21tHc;NtsrRd{|1¯&\Q#lLoI°\h8~ N(R[dyŴjGeHyT!/X![S˒mt2)_ݒ;piA#dJٗ'ٳpi*Aךּĝ?cOS,ByRLh~_tgg(LGҬki:  m &us]̕yN@I?l R~59QDU=K&dP/Hc7_0$~wN@Xyn0Mθ; 2.:>mֱ~O7_x@ (!]]4Dw&6=RC\I;Rah4,EBIoU$z㑠+I.zt|3)ٻJJr9'WnXO٦u=ZB=aK*z[Mt$pkQ~ W !{q.*nn`#&ia6y"9 0pFWE+!c(;=@Ћ'}?Q/' L0 <~Y DTgC)ʿ.pwh%U*!z(/.ίI{U9d{u0E,nNVA߯!siVOBx*Ĥ œݭ}\>C?P:7U@8r)w9s*c lzLh&Q9ݫ~rbyq<ފ _oX)r 1MP"<:$$82BGX1~ˤ>e$?"]-NZ2hǫ4IT -gx T@З7_.3z?Ciekd+$#¾AfX6KVBb(~ȧmWl/F}\󥢠N8⣎,֥TK:~xHdlFŻ:HsSi}G*45|rﴉP$[ĸm^Np`'!օ?Xs>O{:Cx8^L8z_|Ƀ $b={D!`ִS~[0`<`>p!+v^"k*m:۰mU~BG2;6nUfEy ˗jy+;QmiPQ\&zƤbё1Hy健<, a]I'{Pve]R-#jZ0B-YV^YW2#:ٓю3տ@dZJuMƝj60W齁# c!v6VF UU\ Za 0 O: [g7yL,}?ivS0p=@[V]A\g>D$I/??jәr1^ P()Nn.lh^\YKs߽!ѥ"?v҈^ճhRAɅɷСc8>XJHZy 'GR ?o &r n)A96&Պtb,I|@kRD%䤬f?Up8:0t~GlXk9eV>(ur"#߆uJbbmMD5mso[30k/<,M1FP {7z}LR0o2 B-"ǮW% HJhKb WG$#)tާ3 բQHƃe+ӡIulZ)hox.&,Y !@T[$os _:~9ogDprt\je[+߁:ү5HIײQ̚9P0;8@ԤP+Q`q$ݡ$+l3?;ϫՃJUL\ۼMԁCxʵm8f H"7XU؋a˺ugJH㴼E0v_VH "ƨ?PsVɑ:X@"aGEǤ=3O{ a\?UivX%/`ka Dq]p" Eʺ8:DRoz>ƽV'`JDWRCG|H8^8n$纨t-",GE-(^5puA3O5Sh8N<AnF!5_IQ_Ɋ ؗ?>|sbmrWtySW-O#V>w0^e7CjQ&|χ9#gšLsSzn825zO+P4V^<eZ˹YᱴSpf;w1=̜Ȗ/v?[M*:[[?WoGOTZxܖMt-n5iC2X-> cqѻe6 N[?ZI\OdEL:SfkGBBt2b!U̒nZp<r/.ï#_/iU*,9ٱW1JVN0εi[w~Q<$XT#EךXc-)UsP3PuBjTeo$aA\ ^bt!Fs>D1(Ɗe_-@TDWMTEN<.5F.S @M)h,@ )wSMqY0mvѿ8d<@YάpU+xӬEp̲T4xQDPR^oAtw#+R9ɖ Q!HSt&PEq21~FWXjȟnâAp9B:M.E$A%`#G*g?qɗ+4v 6y¾ (JTXW2VM7l<&7Ct5jeA_h/S~DfTE79f?>#gԮ@[?-*S_5 iu,OOvmQ;ՠxv" xFr=q %#PTeo=!Co,=n$B)*ShɝCαDLb ^3-%7ѐ"BO]+w->NBI\Ys"뻟ԥruV o L4$@#VT/Ԗȿ/VMOtaw0/|I٣Ln27.(P- g2k%Q0MrbX ˦*adí,Vҹ'b &=ȼKz',V!:v)hAX1`:ǙSD>8\YfQ^1Ğ0S󼯏IK1`~oYNvΈ~Ƨ": `.sDKmn@8˓)R8 (/ڄ izEz ™~#Ifʘ KvP$ui)ͤRu{Zq*>/a@Q>Ëut ,}jϡF-Fjɵ-m߉!2*]L&i;`U?!R='%pᦼD{HCU9ꑞX ,`O)iW%" 5Cyw Fh8A)<hˆ^2eb|-2c-V8CcbҞ9c(*4lcU86H?lv6)=K'>4(%Z7p&'a2M9qX.xeoPor,E*<>UFvtth ^VϭF'!,HcQ/:lͷF#xz8ŜHE$FʸIw3An(*sș/l~~dO'fA®YN%נUx7k/ Tq,rG…y5R/jPvdSa1so@5m0p4L+@EF;A?0NLJ0O rxw42ncn񃅋[w~`ڥ bfnӭl.]{X0 W+5vk>"hߘ=.|2xxO <}6,ʹ0]C|A^ɫNS`(V8U>F慶OSL}{ %nZoԯ||*6@:Y8xӈX0FB.5 $aOA+#:$ٙN™u+sK?µ]ΓUL։z[Qǝ>fn^ 10S\q(NFd t>D֭_НTǿI?Ɗφ/]OƇR2Hxe䬽W#G}_ 2cؔca'Hlj cQIbѦԂ5:eSxme_["(CMT#3ր4{WK0bE fJ\J\kM wNvmAS17KnZ +ECGWqqz5jNmUF}dnV=Ӵ|Cf~U,xI B=a` O`b/Mv %Dbֵ$G>D0=Dܭm^Bdz:[(ohnk \f#V݈hD~4RO& 2%C=.١wc fa8?9h#5Õ/`dل\yl6$T|@խ U[]8:;,|,bnKCr '0צnG{yݜnIK#"˞ h@\09V!^ץ6Aaa(M{r+ P~Q𖆏Zv!JY9kcSp 莭w&z [gM@%m[)#ǰEB@A{m;],p50YV[Lcw8o__ gQ%( 7,߈R7m" o]; zhFH'Tx~E (|R?^m"t)ә,^I mAE}DÒFSjGVOf^>T<)٪cCzfŽ#jV4݆h`ak_ 5#.@H3)]mPs /-}S *5t'T8֗:VJyܦ~1_Ѓ*v>|vGLū} `DmTM+^ׯfw($.`LGS);UZaRdʹNJl+Hbφ̞deXoN-Xf Nu3{W& qW*`|]8gQd}(/r9cD7ϟ^cep}fF ݜGBj9n܎Fz5_^h }F(q YR/ZR3dt3O"U p,gqu~HbqvLe)4\@^NƟKrLPqac?ԿؾI?e9nsS<ɚ x{Yv'_y#CW@GI\=\a]֘> ּ^NE |3׳R8`&BrHP$ |L *(Kݕ_M< ?]/HUՄY=`H3g{kI*:Cgav߄f¬Hxlz0/ѻPFj \|O<$`Ttc|Xx3ըk+ y0G.C6n[lz6wy&S,5_Y> k!JDR/F|Q?!1O S3k7B/4?}bm|纳Bdnςy* KY:cBIzć@~u$\>bd~P.V37F64YiXHUx_y8x֫Co +I7%K HnDF&t>[vS- dBZ>_5'eUr:51f-$뱜7XcFi() əWwCF Ardhs"3Ѩ̚[i&u˲=wf =D~uҍ%fT~@I[.K-lΟ> 'i?$⥅;gCd~,y`GnS^AfHUsɖka괁eNpRyn8\[HEP4zxL)F!#r90$vRW` ^R #LJr3L~A(HUa>fƑr=7j8, [3#GrqP})ۘg:4{Muy{;0J/PP+J 5Gx9D6^;TIJFA$G.G%} \D$b~MT\ 6(iKm*lzCի.dElƐ*9q}pCaJEcXɂ J$. ҴT:<@P&^whw5.:pOӓwغwK)e?|FG8;[|xW1*c57rYh|cҝx]vUI{E[HnAP3q80Z0Ffq^KhAQ=EcAẼ*(o4d1tL"K#%#,ӈx? W//_6l>-EYQc:Kao GZ[$é lJ qpwãn‰hed>78VIqLebEsm~}ϊ/p+(e\y?gFXqvqg7R**TTw.0]r`OFEOYn2WRLܨ]3? lWjaY|L]選ktet僞` <^BLf&/SXVs׀b=+˛}~l)O< s]zc:m!y. 8M.=0aFxvnm\uSGvk=" "q?^ c%>sWk~H2(eJ0(KCuEFOl=^i{^ |`dY 1u Q@[G2fי,Z?Q?p֝_=_Z+g| 0+Wm4"0Pz/_Tֆc&A1x~ ;Kbf*$VA!7.IeL>{ςͻMq5obD+8 0`^qWw 1,8˶( &xz{ dr9JWf&X@x"V H0aK}X =CD\پү e|̧ /!X+7£U{ BPPYݨyegA ,Z:I8Wy$X"M]0.١8_;::D[|'m8O$?ըTXo^Q13mnd>U0)zC$nnk֣գ&&:*43F`˝hR,Ɛ!{n<B,e=gcB! { = U@'Zⱖ6opJaJAfͤ`<\Puo~6^$-Gqb3OA*PS | zٵ.chj"}3LdM=k>NwqҲ(KKdZułR:>iЅ _мxZR\y2VPW_aʺaw^?7+LM;A^jE榔^GTş3k5/Hqzʏݦi aBx'0d`(oqρ=*N (~\Rp"d0jc׭n%q;)=y?I#ݜְƝ֧|bhM1i)A[R+B+w$ ;6UomG gž!2sw{P~޹0`tUUXGJVdC ?9PCCjV!c$$/.8bMdPQ oʴ(JTPdܑQXEHP*:Y4Ho̶+ ݡ*1$iwZjؿ8(@,9SNxUnЩrom^㡔 'T;PtN~ԋ <#V?:3~Ac)|yoԌׁ@Ɔ&ρN&~A-wLHqɅKm!6m&9SRhEʾWvi=A ,$!ubvs_wC] e"<2_kT('D20,[rP}Q RVjt{38f/:dt'>}< znn,&i)AӽMɈճ'Sk/`1N8bWްc%PO wHtžF;i QY.ZR8F#i&~gB^ͨ^xdt9IJ8GR?- Ҳ>yEʼn Ƴ N*.wҌ *SzF"CKf1Z1eo ÆL*pt!@z|L;JHzÁl@.EFh|{B;Kx8Ge:SnvznB>!nhb|Գ}(V }&]V7tquIE񗺙ec_$os[q & 29.1|k?o 6[zK8kdm'= .u'5;Ku<wk o?+`ܓ. ]2,degsl+^J1=j>i83Qh=ÞE%*-Ux"*oj!`eˬحح;ƚO\[Add;X82蟋gRq#/'+V^| 48Q@؂e#ǗB8zecQя({noMyC>1%{ q%!pE)2oEl,N1}li)*Z(]QҀ~|("p̫0˵1ZXV0=V:#Vyd/O(U z()6 H,#jH~? u$t} RAIL&1DM[k cY+oc10Tzo gAֽjA:+*K%c-Cwbt_@@xÉ/)L.$pZ݃.ԿgK!%ˎ՝C*/>&.k[zچTQp:7;0H+3DSpgUC \ Ĩ7y%6Bde'Yg.$YǞD$u*1M̍S6UbeKk u]Vٜp-H,(&Թi@Q¿b-d ~cJBھg0_O>jƥrcHU|iPcظ[4c]Hrm?$ eW_~o<gc}q yױr 7/%*fe.Pã-Җ9nLNjzAx'qv Ɍ"r~5h{żb< 7Ό` ty{)b !R?BCہL%!9㌳oz%LMΙ1yDn1O|wpc_dh,o%4|ћBuMY" hvrR! khȽFp¼- "<6*(J">~Y{"PWlzi>YV 3}*+{qe[7%}3)2KsGbLJy)k ѷJ 5Vrs/[]i C~X`q`X\=OʪwKؐ|d]g!L\\ʦ *>.#|UlPA68:ңKQ^O @ W;%F G5+,"ZYl!eQM^x^RSӕ3)>13+@EeO jw_ k A'Y.[5J Yw>m8ܙ{^hՍ&s9߶7QɅ@%mhv*q2S*a];8nj*&6v '^(=ER+r+ ,ڑ!>^ DiF[GU@@:"Rctyuq͒5N6Y_1f* y 7`da8 -UUsoYEɠvG؈JO己`]xGbpHΰrE}C#GӔ$HQoFLS=ȭRwO0PG"%7HX9[7z7x62('sugW@7B<{{^>ľNi;'E؇{߀t^Pu8刺RY &Hھ&|5}JGr[@RQ&NAr(>>2_Hi AK E~U\и2|o " tHnib$Pi~rsPqc&ΥA( wR ៽}j%Q>n$ h*^^ƜPM_%OrZ9sQĉd5 '|n0 'dt@ąٹr^cR}dsP[T-x3uvܡ])VFAcV?p_++koD X[|08Rf>Z7#%a^ PPTSnb_f~էɚ^`+~#y*"鮮e$YFF|Vtײ"a,r#Ӣk+tt]՛}<WK͊G(6ha?$M9:WZJSa\o)O@5(^JaGöks7y  lwdD3e0iN*Ḩn3Tל1=0M>5<~-!>IY-iBA6Gh!;~"=.Qb͒I9\Chѕm,&}" ~MN, &H?~=%{ jX`-pR^fC_*XE{xyR1 qrF>N,pntZ@C /Kͦa3keֽ݄<Kݿ\Fq"pKx,YCxjZp{hEյs̲lMx.yh*z:%^6P9Vu#}PSd'+1,M+ u HLC+V}QsD}T2-H6ӶzP[vA`~v$X(%@f=nE>=JwJW)SAuJI 0^[Ni?yBsRteMѼcY'{!]a夤W}t-pҏ?נ4]_Se4طЉ(!XA>ݡTLȅ}z{Xd!pj=hd!e:(-#R4'7 0) c?n~~Ї{[8UM$0zhb]v] OoaYvN`^Y7;C1[H.^YڟfÂ= ʓ CE%]Q6[-CBe6u4DP!xwd׏i_O[L۸FQMŢ k&ڞ 0ҽ; @8vU+Ժ>j-+oWv{osL1>09p0/8HU0iڧDqI?9Z/}Ѯ4&C"Zb߆iثuH39P⤳!E+C _(p^C#tjԡ)o-0G}cvФ+V95!Ti"k~︽# M?e2BqČ* ヂ y+^?c Y/pb`PSy>t> F^[ 4F|ća  HL8[5k1Fca\\GizGP]57HeγM/`/ t!#pN6ȴ0j;DhlFuFQ&QwlcJG8 ?y}QS86%lcß[m8w]`l2YvyM5j}ƿ^(rI4:2lnPR5_Y@) Qyт Zy_snG g$৩cߪ9kme;nyCYgl=`7syye凟9W~AX%nbo!"8]u_ǀT1-2;ݧT`&9#s1^YN񒊒rcDB.J;8evJB2ә5dHօޢy M|گM@<{_p7Sg>ȈH|SSY\I晦|Ԟ10bMC%XW)afg٥d:Jl\j۲&y\EAtNܠ9[|eHFAhp=!҉0 71O`F_&fD7϶&{x_,b$)*1=J[rXTyyPQk1%ƣ$,IXgr_7?eʗk,*NqfnPی)[Ip7EԟgpMQSP IA>ܛ Ub|;8`>t'|ԋSnN 벎J7ډ>O =Z-NlG,9gj/^scۖ \kDF`_@Lȹs*sON\p d? fbe\gQ?~Jzq/:8\.+E˄iېBY wM!Fw V;iլLnj w~%*ݼȨuqU\ãI;!jzoH-j#Xzev/v;{܌W" j+ФJp4 KS2;"T\NV[_vXWֽħ|8ay_ص&Qӕ^)d9svIvP5'@}7cU!'z?CwXޞozŁ6߂e% |tZ[S)PxE/b^%!|,;V-V@W~g*X9cc&v0 J˝_p?N]bn(D"{&ok(4gkj16`=c(]-9lCISa:ALj؋'SEeCko$jӴ1qj7p}kFCt*<,y" A (iQص2󾁭d=10QA]GJiD!\2fUt2},9,r(m~ޚyHt\CCi&b? qQ_蒏 Xaq_vġSX;vYؑ}"2.KG%@(5QvN;A9ij,l"+MsB 8{7*Ls 蚴2"62kW:3ʼnmٿPv3c3>Ys3v&lb+Ewˍg&~r6GcFUh2=kHt6b@ ! }TKRKKV Nb,F9hrCCߏM%8\֠(q2 VP-]Dk%Qc2zek-`}\O;`V2%OG_dRK[%I2߉)/ʦ՝ J 2¥c',ܻVL&Tp:+.Q_[တpD[nsx#%U- b 3TH*0n~ I (~00=ȸr?J=^HA$$C:7aQ?3i8iFC-iB&Nk2ݵID8>)69(+0,Jy>4qGTIeҭam$lNE~BOtTJ'SIB0 +zфV+(V1i+>#(c_*q]o-#Ϲ3BH)v\snSZbKnɶkPq3}vd*I6^{:K$´cN3>fzz<tU7DGUŸbJGX*(D&k;44#s"τ+TOY"tZg# =(~;>mï=pV,ɬ 7M)u2kKB-?0 ڍL(W;,cRNDodhk{Ε!Ђs(OL-)fӜl*OepZ rU~wHW1v=s; ޖy˴ ^{82dDʐx+tljpC?+]LI%K}GkMv1 T":'@s, dgN>#y%5̎sW"M`sv eeGQϹ5 d2,t^*^D/wmlN"4gCE$cPv쁤 }*|!F?샘xgQj$XW>/6D ckG5A+O[tBx*D i +cܙZWAtw45rxn-mn*_PDu᳾NNwb^_kՌ]>p;I/~nd5 [T*nAc chQeAKKdˍTŀl8aI%tslm=\HEINj,3hMc:6I1eN([km,z {Xv]p$1rj#\IyT',fq}'bD :ot$<-ȥOەU߯no`Fр*jEuD=:8_{p"FتSs~>a(tIQ̎bSP.7\9d!ߚƬ~,D׉]`DTz lߒyr5W0`=;Űz4ll_yq@ 4'MUTz,=U&WhQ849L:ybk[R?T?PF:bdYO>"N=Ͼk,B%X>lQRp?ڧ_YSR&=Vdc<,F ǟ)ytJ=WQZzmy2Q3d dfIZ'f+q{;jPg\)1TEQArvTկɣŸ 2:ܫ9AAƹ}PWMoeHÔQmȧV@aTSژ afu=`KPGFyrD/Qtֻ$IW< UoG&rA["]1^gʡ沊Zo.Mn|O*B+r].ҙ^eX\]N 81~xPyS v&6Ux6ELE0/s1Y1K]&Qv.5bu0tF.%civ H e ø#{o4r?V&aJG /RJ1 =d߆vX5 к^NpD0*&FD1wsXWߪ qb .4:BfI_P}Ҏ ^4(@X} U;:%F=hqLׯ.nDܶN|tl\*n"5Y^!q5X}&A,GeMUugXǺ!\io& 5'3/! )vI`ֲ-GLwhݤmyΛB @m3RKVx l3 z{`Z?j~]mD6( ~qj8M_(W2zn?[;ש; iΞ:+Co! TiPh#ʚ,&ӭ{-wY0"CZ Tr0X80"2O7t$R],;7~èPk a!.?$d"e F!]ϮcFDŽ;2aTۨqYlMEi9nܦM3sV|Gi<ތj@dU+{ !z|Vd>HG}w&Xw䣲!;~_UMW mE9Yr2t)Gan6ڮe > p^U)`61$6$mEw ]K2^߰DtsĴ s([]W`Ar"{%0E$&4 pV W9?#?w:YA|(1ʠob^:-TwcKڄϽ#مF?fh tC(9 }lɨw,0h [z3&_f?q8ۡ q8|źII(:F9L͚jL򜀅]HMXmNss=V nR ERߟHe)48}N;S=kmcfMe% B<97iEDVnirg _{V茎,՛Yډw$噾Su TXiM2Ɠ"=VE|?"CwjSJ vtlf ^HYV |M]Uɟ%%)@AobFM5$4iH,Q"9&u*&P\9`L 5a~7q9'gwqWMK/ vB3@@8qq:+.k̓gLw+_<{"/m+OL n-FS}!6_>1VB<y Z/5@|ƌ4"ࡄsiYe^*IlۯV+#uSTqIN`BftAq}^ ;\`=Ƨ l2d;ʼnI#\ldn = ݬ\|V9eߔvU\V̕ $5? Vϥ_Q C&NgϤq[ݲaq9lClZ{h5;Nm>Bc(-f]g2۝ҞHz(fD+N= sTJjmF`򙚑ؕa?d.y0POyQ(H)jJ."!k:vwւҨ+ CT(s1ʵsj `W m<|kb%dй3(:tF @TAdË=_ǰCD Dž zmˁ$lyf#hQj-zY]LצKBF@lbHNn܁lx`9sD'A_P4e%6"-$+B\4oJuD{-&Q*naUM`l|f_Jsʽ8`Q~g_4}f,!c"b*T9/ޗ )85v lH|tjEƁ'=N a9g\Մ J|( }?˺l̨-c|kz~ʎ]+ QI5k^9ҵ/~Jk0dOv b/1iYeThjpj$Sk|*b5K0paR1 6Q*t&*O^!s:"SŒͫFb]mB_$8AuFc|)SWB.JgM An8f=Y6]k)p ۏ+En5o Ykl=!l\$Ld0w@n_Av ۡK0^S7aLY4.qԡM[qBΰBn2]qs;DFvo\ ȏ2ᒎ!NwSGshy ԤHV Ը(3=}# "8j޵?rm [Fu>Vw.'IA\YBna&ՠ8W|1|ģγ$(Q^+?[}})8t}k8*XPd p:.@(YH\zQs;Q༖d=4M6*ȶDtB9Dvb>%b%rWlZ&P:6BwVN`u[u\'\sX:7e\ Q.P<Qbkh˧'CpS9O`_GڑF+˻UH?\!4 K GPl34`Q4n"UqO-Bi7?[Q"b] ZhW~J6OEa"R֙\:]CIL{,!o:Li6LF?)&[rI:o?}R[;GMɝhQeK_*Rقm 㶵\4$eiⴄYo$^_ 'hO JH&JڋߍRg.~LO}npmEy yHהCE==.@Do탱$ؘI{ . 0) sQEhpDJ`tA8)A X=zpIIMfiH*?e{u$3ɓ+Aׄ>?JCFmm=L h8$oG:`SZ^^e+D7)`i?ⶋU}+R&9>IrSՂqM \>Lȴgi8<@rӝC -f= 0UH_:J%Z)n w"djrL{yy}GGbS.7?3OvԚ(;ƴSF*#7CNWT VQLH?FC҅dY}BYeL[Uܡsjx\6tD.yAX)uUkEc~PεI*Uo%Zb,M5HjWc8#HC4Dl8]u^Gm/%S J6{8])܅3z>\s3dXizMsŕXUx A<[ >7YPşPW\N_ @_[38)G 7u^ Ed e@#΍j߅ZA~띡9KsnhhCWu;$nt"6oA(*|OY)r׵Or@Z:WLRvqɛ͍TɲRrA*PhAVr>dl Tq^)D㎵SiYCI`zAV_b6HQnRdNhcliQ6 =Bp.~]mKu(<,Λ_\̆#+e皆Jۓ0Pě8<_7mJe,188<_GN"xT(DrcdԠkVqmCHA8P 36g<\'K+QeغEGg{oĕր:1#~w(جyjOeD~IlJ K/"dl*e7y?h=*$B+ST_OAxl*`$(O" 'ݪp18?36#u1oӗ M͟yًRX3O0L_3~mO$AFWU,2~Cmq. ZAeQO2(9U\= +9 ae`zي %'?cTC CX2r Sϛ]ACl6Lz-sw8Zq `mqYH8j3$~Cgpב:΁pvw,2DT9L P^1[jkck\俓VЍ޵ YvPT`Y jQ#B-qW΂/:6%<6>+q9:ݘ:W})KrA~ē{>iB\LV>ǛmRac.WN{:*3x-W!.'{e?aҊԇ)?TR[]dm$]. -zeYhӞ~. 6XZRdd^yI9 ^tZ б-bU$& } ܎dRDy%j8 vO3ᔹ~^ M޳`"-ݰ{PSP%QYSdNŸ><%\D$lrXRxz CsG1(9v7Hw$ &>kNw`aL[]p MdS->'Wg%?k`lcr)mXSC~ִl> :\q{54%yPvJVz].OZ 6M q ɽ]dyh{=O%G,zO .Ĩ\u|#8IxqZ XR X9lS{ʗkvW4M*f!?VtIv&2w6G-o!/U-6*H?]D) fXsFX гy? VWkhmӶә(kT= 3i0x}`B۔({"t6Fya!>Mi񦪇]邚EjmhYyΎ&J ^ ^^VQm`) uhS1` ՌMg臁b0 ;S1\Z Nj 7kj( []D&c~B/NPbO&:j_#WI ,Y͌u 0T`x\ܫޔcZ_# 3v98MZ0ʲj #8w;˨΍\钂a-9 M {=DTtU%Mp8(8sx5ӝw˜ܾtF$ )V1ZE;ʴlfcIc0-_ǀǫ՚gBv}Os4\"MW ;T*)No= Xj$bm4}ӨPP&zL`ͦI/W(*6_b#\}OYQ/[Z|s2~XʼnJZ)t&@@huQ}cgg QJUaFtv}a A>L!;NW ͎&- Lu؎NȀɇ\^m) I!SWmuZkx7j\7mw;++]T[m놜)ՈSL̟+fsjW/x×4|5𝘬^s w|c /2.VNאTO(@Ӗ/m4|]Rzv p(-.`W.bu9o^i^Am*g˪yJsEіHтZ,EOA$«񟩓ꛬ__aAɦ>!Îe?qxV:mɞ4̮jv=^h,;0=T*meO1.i''+佅@v}ZnpxQ3oWU6xiv(@Gтg1*ö=vv=Znvf#,7 1J_4Ⱂ.  jig]SL+H$ZsKY&mߠ4W ]QIXOb`%Bf?1] #*oRx. ρ _Z-KrF1_@0X>VU4%L/S3] -qNa^= #vEK,iYQv D}m#![}Iʮ˹Š\y AӌdƒكTb PЛnj~!X4F&Wo6=8 rp ,AuE(7eR`Ba^fkzD^Iuͱ.'j!}n;pP#]×K,-OҊ=mý>.6Gy"‹ׇSnU70 M4g/F0}j;R݋RyHDV2הk :q)9hJ|" :8p5wXf V4OJ)6OrF0V }7cS{1$Z^=kPҦ^k޹|Te) CiRzZ^l  5>Gio-$&΃N<ֆXOYHhw5mS.?TSׅ][- 5_U6+;YbYO.; u $s}wrL:x0%N җιA4թWJv,juE&Y>/sZL0J2#Ɨ]Еy@H>xq'Z]6t/ŹޣqB+!!iBF_[1W),HkR /7kAe2PmX_Zr+wraiM}Cl3Cۀ]%MZoLktO*E3zr{ݜ7]2ŻKO=Ø_GiSG-"Fcada;"љ0Em⊗;?7prsF؏+?/XAs /^2+`yH˵ڶ1px\/B@Om4⨖XW0K'vNYR>e.f[`n K{^üjC܂,`6':'Ȩ);eG Uw&PiZt>I˷6wѵ U d?g  $F˃ U.6H̔)::Q|EG,=٪3?P~z*Oim^eeޏVAM_ü&D%߽?h{H?Q,q*չ5@Nvl,ITәÎ|fwX <0eZ>Id$cϣ&\r&A>~_%4KHFDot6;Xmjw`" Owlܖf:A/CRXbGmw)hHqma92ZL >>Bw'vM׉VE,5][MkYbԆG :.v9}mWBFi^yS,#WhA$ [/ҫ࿩`ׇץAG/s@SXL9YE-b ٙYfK:<sz3Q?S3)(kZAa/_Άw) kUA0,rq\Syb Ye;΃D*P`2I=.9E < +VKi<ެ{+9!y wVN|h#vx¾ )/Sبׇh<|)g,z)x9}]tq@/Gr~ `qZX%s zI̔qF} $l0@ԧ/0xqoO 8h1GCqMDګ=$cuSG]uSL)eLC XAQ/oӁ p'Pihk`D޷vb(&\>x\B9wr6Z]OVڬi8QpdG{aڊ.]ynoΆ;c̔V)9P#68D@rx Z~wۊNJ2O~չpǫsNOZkm aAaևeqEu4 &#&7n%\Z.wC Am Y*ᧀ| Umrt7j@/ +*t1 "ffrI؜4+uA>\f>apjcBǴ/i&bDc4R$  ߧpH*.vNN; 6gg[LgN-0r7ԫ:jʉCRÕ6./y3(_!t"$/ f:;L9$N]X0ft|u CCe5ȁSR4h=iq|=Ӧ$+H RUҩ:ra_jށ?rB]29O%bAAXIjM`hD5vH܃J?yr{1d|DtPă$7yWcp5<"/ أ\>!No N`VXwz~ y\%D38ک,Z?;, J.E .`CJ OQߤSryb]3(i.E(_u46:DᜠaUGΕj7ͦ~4if+턨@;@@nJ:/LDg9X)QׅlErP:\Gx=*'>rF0㏟=A^(df1\L&#%# 3=tIR *ø\RL$>#t_.G43_AxTx5!i &{"wvѹݐ]VG o|:u+V(qPKV`J|{_j4NlR,V^xpa2ٞ@j4^3r9t-kCÙL#րknf[j]>S1 "~^ 6)=dTsUoBVt 5~C$$lnϵ 3" :(g-ߗ2iTN aY(#/%bTpghl~߶\#ǐCY+a _ٹž&gKmy~Q[ZܐD]'mEʘ1:hVV2=( ЩQBRi'O~TAGib: zye #R![8?M,&W^ܺ{otv/A ^w6[gOz`'[c?5Iԃ"D+RjEDV;{voD0=XƝqmgOxb]05r2Q*BjD׮1~BWB /Wߒzaw5\;9k']|:k+r/xJ*@qi"4>mu(!(spą*&Ɯ7*NIl8ulǨt)#ۙ}rti8cM63L6 Gҝo<9&NydG00Aa5+ 5 h!DX!Dq:^z)Qi&,Öտ /@_ue;=*hO (Ա&T{av~Ir t<, 7q% jf%́+h-H 5@j[VXۄPbY(%$\ dczi}5:2}g#ut@o:7qj{:Em-l%'yWwt_ x>ֽ/ ~cޑChފKE>OFب8 "5d$r4/u_Cm{S&?`{V\˾~08a.y2콿 \3R. U_S l( ӏZ(c:A9p2ŜgXTd{EPMKL0A8 l֖]ů \ 1C>+UC\HvِuNE~}WPG-ۉ[{ M6ya5j4;jN=Qy1B)XP%IAkg65$Q9"'g /T) oَ۵¾ B;[_ev^„[yO{Ӥ}x/n#v1 >3\;b:A`L=)1EJQLϡot1{\ySrUգ5jeyR[X.!Fey7)_`Iwe둎CB8N[f7E>+Q/>ę@%k`-S}`G5!;R*B9۳ԛ+;07{}b\9Q 9&+gq:cvdlxKBAO8ĴmJM5aUJY^QW>9: ki\'Mݑ_Z"u;-p/ Ρ ?(%?[0_[\.otԪa8Z_P@bd#it>&u"@/S*clj=H9ݬji^rohfg>SHAsqVջ52q̓C[q\f;XrN3\YnqD.W i<àLY΁o7a= Rk?=kҍ*($MY'rp$t&Eq60L x'cA /s! xۛ 1j+%9`,2 UJ,t}%0XÆbXdiw8+\4BKZ#+Aogٷĺu ȇScd 91& |eM/_}4B qcn/9Jb21s 3^ԍLJz*49.hi|IIVnдd3)TcBz :H76>J} b0բ ;gǦ ˍ]{c|d[)ep me4boxD}R%11Dgjqq :?&\B4:5[₀!pV\Af~!f3ugXrO-qP}{|5l ݂oŸX0pZorIənlq~"}qK}мD[~? G ӤY@&lg[&c[K"#Anc!r\';Qi#]#D5eD(C g2֮M7# D QL}# 1;d"(AשbŭxдLz(U@B!oW6)"9HF#-dCÊPX% 3K<$ܣ@MMr#rux6u*?k)hҺA<Ŭ ֈ, fdIob"mc0 %lt|YE#"XŮ(0<@93]Z4?R^N&E-e#Mw^Y0E٥6ț*!Urr3I'y{AtF"i&ueqNs)Ki c>7\B|6-FtMK۱.!I<۞d֑㷑k!E`_ ح- 9p-!~Cǣ6MPNY5wr*:#Q齎V-"4I~t͌*Gl! 8>7%/oTڧ"K*լ#S,\26IՐM|%uR6xb]\c˘#1vDU5zP 0%u EPLïϋƤ1 l!n$C4h$0bL٪%ew$~_Jp؁Dʙ1/{'ar#P7? [~R @Ne$zAՐP\+*Zk({ږYR畼}z6x4WJ`NⱞJ*]ZN$t]ޤ ֱJSQ9kLydgGdxT? IYy-5xqVt?q9([7s1?Փ!lLg$d PUSE%{r5 !?o+_K}Sj} gtf9B$?lŤJ}UЈ»ax]%V/$C o .qG2?!K ؝Qc8iW%MYh/xS*U$@kjYdƲEMw=w0×jPE87<*KP*Ky:@!H]hH1o"PWTW)<'(?SPg-4cz3AwY67cT= PGB;ljGnGoHڟ|>t?䐍Jlj"i5|ԋ 8}!7y/vd-l>:ԑOw:g 5ZoKH.=GvF Be)&Gx` N7J,vX4. [`^L璽prQ!M^~'qQEw™44{)OڶeN+gu嬭?̐-NXN]z"'ƹhUer>TBHeK ~~ޏR#/g\?bxߗla-pk1Dd.WoHޓ#o|oۙ+0óD|pUѶppBC=]/fdLX 90zMjm0Fa]G梲xޑ(v| ~4V_oMʠbV2sVc4Xu>eM7[-Muo|]bX\: -<$P7<Н1I$ ԋR:udNG^کvYq8{sd*ygi.v|{g\(f e*3UY F{昕#8ouwXUYPwKz۠(p8 /]u˵H8 )+ARh -m)9}5d*tl=֝gk>y\dE wL qI 2@o=$jL:lZI]fnp%(2Mȁ6/yU a !e9ua˕&^c;33mrswma}?|.|r6K(32B_4$51~azH(lZj-hniG0?('"?ݒwxaYZh#x<16m)DNp"koǣ%g!NP{]{rO?8w"8ur#Q$„j~4m8"0ިr_?+1)p\ 뚩J$sfU,!kﻧ5HB-9`i =Z Jdt6N!0k·4;vB||kY)Adm+R_XV_RPO| wI@?swN!Bb6ӻM~ er%P }TU(wÝoprKZ@&;w9*+rn ^S\E]a>_tc-oj[t2z~uFͶfgQ)g [xk3&Rn9,8yjmnKnTΤT_Vy9qc: = !GzƲ=9%׳r Л@3D2/Nf~KOW*0Ԃ헰Ҭ,`0N9`S.7' haxVTwn k(RcMne3E, =DF6j2pı b#D9pԌK.jY.N܌--S(ډ=]vC,};^ל%VKR8zA)Эg&/8A@.Դ:BF:1,HuI!zH:tȗf*> nŰߚ+݋p"IË\?vc=!)|Y=[?yk}ڃ\z‚O<W!=`0L2^-˫-x qiך4i:VW 5-5E>`yO1X s6Ugn )G"ED'-V%\n*Lz'kG 4 *']F_bvBٸ"]G4s%T'7bc s&%bW uwH֎ńuFkng-- dwbPC{яOxX6Gie\|YB 7Җ^ FyPQ'7lw(Ũ|Z(eTZe݆k^'ܪ[n8-;'rt9HGydAhV$[a7t(e\N,V( 9$Oc,:rܵ׉ œWSݧ {PtFG%硐iQu'ΰ84Yboke őyA{P!35XQʀVՄ9'yt#mx)hG`%'];}lj3$=w:^[$ tyؚpیo?lNA,'xS7;ӻ$gӺUsxӯM; 1=F @cXKc\plko̳/qûB$!h(e3郔;/Il{A$--Ҹuvh'K]_:Vm]D;D{.0]*XpzkG@DR~;M5Qkl}k[5& w+5#@yi-F/G=m|ȻG,X)^AWM \<щmFQYq/:uz)i6,L&qz;v.tK'h(/ 6O +[xùL- >i#cp:e9s2dgEՑa'c/F-#WïLDΞSb­4J5Y(8e[bwe` #HVHJ3w_F"Ɵ2+($w&|w=u3Anc) Ca$C㷭^ Qw4)Q\go]΋=!$Gjzʑ#.haVM"0yBk|xh|oCǀoh=7 ^k˟YŻqfާ̊ )\A5 /Gϻ[y/=ܖYX?[s0Q;j?s|}6 Jڧ }m>5)Wms-]5v?Z7rL{D4ɗ0O-I |xlAoL4Qb;hUpcOaۏ ~ (#d̢Y7*فF y0Bd6K.17QEة}j~焈BwCۢLhm7,޺Z8}kz=^%EI9smM(KqwcآN@'yN@_FakO]fA'V~58E0Di:)j&?h[í%NBxgޫ/¤V'tRdo`_v :V8"6/ܢs~P*-2l]I%!vD~p~Tړ|fHD`qh Pm 6q);vaQiHoN[`n$#[h<=skX{*j P(n&>Ql.&]Hf) 8'X䛿ΟXE:ճ \Wq;G!-I4WP`~hNJwDq. PA BD7 0M&I0]Eq\ xrkJU>Qmw}%8??L Kcv&b!>c;d\'q[: iN_O}cqyPK *!r#'M$T ͐D6X~V3k 3ԍgXkKRW\\?_ 3; F_]A޾ `mz|W b>__T["iWRt>6._g PBjEٙt7nS "~,y=4X9C#rlCtem& NT&\N̛x])E&zsO"E,Jmd/Csj8vM a8kxzucğf(p)74oq=P.بwy)y%?-&C#4ϖʶD4,z(Pi5xn<5`]v<',Oz-eC/QJ?$u Ey v5淘RHW2QkliWUYy~҅~gܚD29Y=v[.;x3 ("xW0c'(DҺz-`W+}3X!A3X5MPw@ZiyS3(ˡ],6v;aBb){rð.2xD^oYS T(uv<VY:IGyNvArsyxNd5D7澧UƧTn1b~\?jx"y+6+<neƠ("we tkq*{ܯya2]M/ry`$z_-­G[J]2CP=B~7[ }{W8|nAZ ưt vjoRnlO2.+\0{4V)s&J \6hkf  K̞ ^oc] ט*+ǜv4yϪ)C;h|[fkLL9CQA wdjNGi;qM4  a4f8jC L &,z(Q̬TDΚAcK&A=ƭ%2R ܪ)eD4@1d4JrkUV#o3pH"2V؈6m-$P7ghW~vk m*\*r3Ibꤽv`ޟ=)Eo|HTFj7W~c]\ էhSFr|jzfxm_ 92N2s'S-tCFr ts.{ٳ.N,+ (B!ci@hfIA3ouύl- }eդu_" Aɪl!Ls0Rr/ȯrik8@kJ|vʚb8WG L.Dc"ݢLkggr NDJ< vﲄηP]l?_ W3P#Lh?u!jWTB}O+ Wx"VcCZ˩81L6k/X(LL!eozYwCHwiG&h2DTÛ\;HfSp>]d"SB܅8%Uߢ? Ԉ[$W[o]nn#Ohޒ]\Q<`TO.#8T{]5@ط͚PAx0tʂ2j<`|-h&]ĥ,.8FN|WŅٹ VD`͊ߔvcty|WoOEK 9BpG)?ʽ*)zҹ*YDfB ={i3\Xl~H 4ҚZp[o1 *~$'iUb* +z=i^ceUqpN2>v{hl-'a+_ҲZU}]J' kudnXJR mWkO̶c0Oe vq-plk?, :FHJKs |<FŒ-z82x Zt<~ct߅s #E'Y"7ϕNlL"e.#˅ mA[P˂YNz:XpY[MhjN($m!X]G!MͰ~Zn輏6N?-Xyޱ*Sz3@8,i\lO 0@Q*P%ȵ\7aͯQPkHU,uq,}6S}hi/:&*s|% %qhuV7oˠK֓%bC}<$/GnfG=M]g/q,Lg6A„f 02;k\IUnhлoocz4ES0][!^`(?6ڊԌΜDlk[c>Ptui:AYF/I0 C uLU/lm%5$$OL}]Rgh]_YZ]6Rk%qծs|o `g=c:2țS@ӎY@3  Gw zB%\2=5;JG}#ܣ ky),ET3s+^2OP}8twk3j\۴:uٮ;YCA yoP J*'8JJY$0.nTڧf8RR15/|Ա.&o!C=;cy>!ZF`]9(,Fl!!#Ote4#Mu ]#AUA̓3@nM&3)ϣI-" k/i3K ;ujuVxkg披:a|Rǥ"+Ff2%Y6.i]S-f얬+*ґQb[eK Pn% vihqb"3* Kvy$h1Ȩ&B+lC^|x&AW||rD;PƩ]}0PGȑ"8/<@U<8}HIt Iٹ51CHD,@\#g3Q&x_ldiMkЫ;+ O(^Қ);WDI U4*bf5k{g xXqlƁ^'8NXe+ǻ~ G@E}S堯D9$O+yrU=( kzc値Vcl26Lyy9EK×㴈ŃiK_^jW"irG(/joBܠߛoSE9OoYv-,׎X$ؗ8sQ)XBHV]7EEj'kN~60“R.vh`U3y4΂$1ǔ|lOh3U; HtOm*y9V\u{ Gdwd;@xOz#OmVs/'ڨy…F! Ev ћo$ev}oN]ʾÌx˂_>cieةa qF下c:z[zCQӥ(x'Y$džB8mwqZ [r "%E㢘Τ--{gNU'cӽ%N^vsTԍSk[u1h1\:> @بg-ogLD2uזJߜ%BR S0Hv r~w6m|ThWbBw=8ÓX!#Ey?Qe?TS1ЄiS_Dm h55*UM^ >F~u*tlt`cY>|zg^YKn,VQiW^,E~=i>@[j쵦 uiysY'OL9A続SpFJ$~@|~aSZ8'ݤ1[VgFl1#YΊ~r- !-[8Hfq03nt~Udz!/Y gˠ צw}N:Y ycuXIF`0k>P iR >N4'@$/8! z t@Hx&.KQ#qE 6} ezyp>l }]6?z<)L{\mB4ha silC[k$-LiQn!SH8ڍI"^@{JCl`UW6zXݜ#wwBQo| var,^vʿoh|d>Asu%_omGX:PêK"p91xghQHek̮#5OD< S0Ngo}aםΰi {+Z45t!|YvS}|}ANU / }vQc(rF[SSff1N'Al V 5WVtĒչerŽDRM{<ӼXVY鋧1;m|0oI^ g\19q/a̼FxԧPwX{q6W稦r 1G[@){3w8wŨWVsuCi;^ȹwZ g'zq+{S)G'0RjN v1 ĵ{X:qy@4gJ=fX签$l8܍O`k 0,(1q%XWvlj>h^>ͩEIbtJ]/\,̲k_5n^ăp^h(i|W={J-Is~ sm>Fnf>%&D{J@5Zq5gQʅJʉ5=f׀)Va odTd(zTQ4:eJHznj-\X)ie$ jdZWUF1{B\t/Ԯ+A+zǬJ M!.nފgҩ `s~Qʯ3T OW!ǚ|iZ e"A3KJMخ{fF5=8/ٷ.2O)ZW6Ӓ&@HwxIU\Yn$[1*X9dhGV_%-&ϓ;cL&JXmYY;Lk%_GzH"*MO ]n!o?ET|x*K4RؓQ.E"e%88c1E&z@zBbTMFl6w<0ɫh+ S' *|Cg^/~G#T*U1 e ^j Qi b牄6B 6JduQTm T(!8}eGxI"ܛDɍ]uxlK1:,ha\({ʃcQ:ZxS5baZ| ,P)Ai3{M1A+9s<`:يUʌdf^[7'*衐FyUipZ,?<)Ӓ̂2 ȴYs1c]WU&~WBYp:4`'gfHYinUb fhAsmbO>ʒq-xz%Шf,$ݐGkpmKX78c%1H<́VMX͕tc"Dmq^[`!Yqv@F :-w((-vNE E^7Y`f nPe~K[X]j('Q [A E°=BaM[p"4|H nx.N9Md*Iԭ[$G7`n+(G`B|e_nu@3C2ró;SBK(S9")5u׉GSPH(J4\i2CC=3uGeLZ=ܓ_ԃ&mkIߠע7]veY$Pŏ>=}z7?b`oDr;I<ҟGBhq; +0Xѣѝ8|q?Pה"xJ1u'X}4x@K}JK+'x[ؑ6y4"RUEj "$ ưXp $O_QCxQ}6Q+8ڐU ULdn9iJW蘥]ml<9a2 {S:ooh.N|]Ì)͇1RmsQY/RԛGÄ|խlp};I piF&Y/e@~{?,`Y;P2a'CG~d`8o0BB&< 9dG]Cm4h_ M>opSzԞrmY`447EHC 06=JZ6p ;^\1-g7*NTنhLyڲYQo U9#\ñkHvY|+oBԙ1X/eɁB6ҳhEOF^`ru꿏?OdN|?<@k4RE{@چpZz yk1, ]yథ'/:S+v:Nfw(35.Qsoxr.=tH/dEM"ֻbC4?LIz@g8rM\3ΉSF ^Ehә#bCl]+;M-gz&N^T;P(jjR_륦a`0[&)gTM3hc@cWl62A#GEhRh4Og(^JWcŢ_y;=ZC|v=9a"Uy[=+b㱾<9ᢇ{kz"X$%X|] FSUhzEhV֠S3~) /w#FXc)܌럸!A2/qu4BrKvtt= Dxu j Yॖ$^B*2|WcT82?姭\bZ Kaǟ#3pW;1dzƸPیؤP}c" %IOHcQds5&?wt_;Df2Vs;K,R̐C32V~^Do&4nA[VYFf|h5"~r; g1u˃R42 ̥W $pb.OU A17QIJa=l|{"Pǥ[*ZәlMTZKmvk\ƐsW>*_7(+agA >u7Cr;o)rևpP]3\ŗiBk=#cTi"˾s=,LLxP (CCb=`#~N-ICFHZ&ˢYdO`[cH 8 |^U- CV˜:8'"?noD'S`%3`> fU}A MkCR fW~^x#Hf81^aMIMHSR䶘א랼oք|wVx.v'䯃 y]Jj&w%#]Yډ;r-L6(DX2}j,ש!*>j\zI>JdMYl-vK;o0T\(/[,7 Y%!ю*TЕٚ_.'*f,#@\-uѽmض`]SƐ}ϴ}.DO&19: S6?{~yBbUFHEbN)a͵7N5B,VHw ǎ l gzv Hr62t|=mwW_E̤-9 xQPnAZyLj*: :OENn<{tJy2J2hqCTX!4xܸOOx6=}c]7Bw?ɰpM[p]"pQ/3ƯÍ6mg^,qg0}j;PU aN &)P&)yd>; *][t8hOI]r@ O)I4w*q Cfrfy7vN$>%FInߝKZ(OtVq!٧JqAsׁ[jWf۸wNT\>$(:OyQT7ˌ%3%/5<~32./Ry=7{.v`axJhEwﰢu ʼA&SjNltS /-2Wn,se[`Yr??vIa{&+QZZwEc˚zVnE))?șM%VvE]fp.( xjyre>R_U,<7WW1D,NC*P0.pdgE++ ; 'z•gj,8$Œ;WǪҔd\ʊ}tOp1BM⪌W^3#o[P󡐨M\6!2:R'RhIl]`/n\ʥ$ ~w/E% 7NW6'nq;2F.LywC'$zTf؞e:P|&':;PQ%CÅgb4un6~x9:c"+MJƧQ[zH6z[P%2ڃE-7aȓ"7d'^'R//"$PZ KڊL3V^0u :9VȪ TK7rL>(`bl^plufڨo3zYZNtI(g*;t^Y%OUpRUZxG#ȑ K \FYJqݑ.c|{_WԜ}ͭKux*6IbqCn[P;(f)Loe%D #@KE:gн5,%żͣ܄w+}>BsKj ШgiXy;}7@~G ԃV=u$R1!`}8an ڌ(WIJ]IGEorPep𵟷Δ!Kҏ=.rQ̗\[DsXN`-&R +>rqp7orS1H^D:z;i^fH,zw$j] C\qs|3kwܾ<tX<'Tr˪/ksj!h %[Zq 5Ca"c* L:= K)^U%ݬ6i\;Q׺ JY̵d5*9!G~'dEl6+sIEZz E|H?TN܁1lM1T Ivwy5g9~W)"Va'5ˌKE$\5s Wxsl \kPBڢKI98p#o 䏥4@,1>wfdEe̋]=Q}8;/Qݜtjlhu˃n 0xtRM{F'H(a#gNؼb{hmdVins\qmuˎwńE5`!+-`lzwGN:%d'{uhv&t]NT{5W8q'I/V$0Ntfh{1%jXhl $lADBwcDpxT_L?Ižۭt㴻<ᯏt5s$JUP{Lu~'$ۥ ĜW{EnjVi_E|C(xo>~'^Yzb+Zb(6 XW0Ǻ1p8a)PDeÖ5 bp <6s_|"46-|I`R\!c\r.[3H*ԙpįyeVΉQi4L2}o-}9!KnXxe|iv_ ZE4ڪ;̧XDф/:x\%̕ OA!tD[,dK 3/[J&LFC<ƺ=>딟E@ǂDLAA P1 Eۣ2m MsXlz楦b }*$epO=@7RtTH9Y_|S*]њ2OHW nXB 8P%]D*s7ƶ{>f$)WqdBOX݊Ϡ4ݥYwwp1aI"FWv W)Aᚫe:6EԖ!F 7 <ޚXAϞrERp{ye13J)AQ)ǤI`0a-~І1'ؘffCSi osra>X-/e|DC-T[**ENJQ"4 M {' iU#@:iwwUQW7TD HW71"\1!5SFeNR𧐪gq rѬ:.fo Y#~;6:RKxmm yN~^؜AwpcuL)w&6[&H,yngP@{53n0DW=sxlnCOebao ><\A.5cXdmeBO@Pu#n`-Wq!򠑶$|j{R15x0hXblg 70S$k:-Q`Dȹ̚7/],Ueȩt"+W6+AXڵݷ=1U]mPH ~Z-9LR9Ƣ`tdk J1\g7y%G4^7 DjI{6J%J aY;_9wx&~ڵTZ67w*:U6eck@]J'>"p,!1%חl6*o)z8i| iOmqTeSUд`EJk$Ǧ!¾:]~4QCnl>%M@ |z2,Dp(?G݋kDŕ=Bp6n ,,f _TW*Tl* (߄U*si\Azr,؜Ȓoa,7"5*S۱Npm$J=^yRtˈmMd_qҡ?Hs2"yѨܽ=(LͪE:x`-<=^zٳ`\m%@b|Nʯ?[q8[Z9o8w-Z SU)N% !|xS1 o\rMuA| V;3W+Ǡr˺V)!/b]f~6_3C_Htr.boRwQə[Z->{=K5xOVG7UX1:<:,&&>nYz0Ru`/P0s*&Tń)u2DbC<<%z+ռ/jy _A7l{ѽdLh뉮t1fuIq? 9;sZWR4$To(߀># 𗧼O왮٩ENvG0 gϗOK0wyK4h M7&{&oa&=~J @: ]PK/Qw$Å,c:*1])30Lx6]%8Eځe|{2r~wIZTd!=ٚgM=%߷g7P@H}֨!"e1k$Cs IqD|*†q{ymGKںjY׵PN<׮_ݯ@ub&1"D ^NT m:26JGcb_ MXҙon ?XW푏!#aVjW>?-C-0UX(xp%6&H ncLX |&U9OG,FyB;Jku\r i^j3F礡U;GF|qO^5u4ZI{t/7VTB]V9$(*+j8/:3]U0NoP,|-Rø|bm]o*T=d*1[Lj<[:9)z5Apsiҳ6)-XRjo cZr (v"s9?҉1?+`'x!.~pR=|\nzG|R7G]* UuRBmG!ٕ[$j ';儨tp1HV |$>,2>à@ekBTT)_)1yPڱF} -/PІGIXa }PAd$+ '9/Da$+)cԺ/ti1;a%y怔L- mQ:o5S|d^or}KXgK64*ܡL[ ! z]68"52aLV!ovߵ*Ym<#rW c35ҵ,GeO?oL$z&q#s![d q@%Ad?֬ۗڍ(gjT@p 40oCޒ(CDKj9 )\UĘߔZ7fgd0ab -5 >;_@ybDW*.=w0U&ŬV.- *s6 ,&+H_^(H9z");FZƅ J<ƭ:TOJ&`g= ޶|kzؼ5<]x[<d1@Tˑ^8 Q?%ĎkZ_H oe"61ӭvJ$IOb7]!s0 HE$X/)Zp//%$^(!Ds~ Mt 6H%+/Hzxw~5LH,3>w%^w= :**зMV}&rh=˪`or& p=¥ͷw|=opV!Rw߄6IsC>,vAn *$=:Oy$w}piSiЁj-J'jyvWٕ^RIPE7#GPlGo\;Z&.  捭NذAdX1}>]iW V ˱i!F"w~>!vnbdk} >]P[aT&>c>|>@M,v"J TG$aQJ=ȧ-Ay?-|y͖nsI(|VwwC2zexoj"1y Ӣu "tFǀ" /&-ݣ+l ͙͜awPD @Sɺ? p0,ROlYBe%M%uQj;m'mvnD Tsǽob9@Mux80.Z=~\0 R?M%aGdlWve{c(00e%ʃ?#(%]IJl3ȕ5 F+(1@-8?1N$!p=YEB^8UQUAI=UCtZutjgDZpE@/^ mPv=f'hLv7 o%fmѷ EYO\!v_˿6 It^̻Bb|jt>vp&d=x4^dЩ7ggxqSiHl6j= e '%O$Ov$XGcGC0FzvO;h9({S4| jHnt:v[tw5$\W]p9p"ܳvJ%saЂ'Gnm"QDthaSL䝦1ft?K$!("M7C'9KO]{.ᐇUھ20OeM,a u'eaY,*a[V$34R HwF/N/IYSEt|庅,F n 7F0B۳6>X!CArE>LFb11ucbV ft)C7__}l<-.5= P#&dXbm: (9884 C:%|#%q~в1oXd ݢK4Z~@ъٳ"ɦe'9#&b^[-+Zgv[JV$dIvoYLpPM(9 `kjk[ 5DԊ8/gKzIq~Ĉ3z8""ܘ7rގ8Zގ;ȯ|(ޅۨ`]⫪C303+/x19ȁnh"X+v=x6p/5lO~z̤$ͷ1Xv^0S;hDž+V;.+'S&yrpP9{C&ll*ݪxZ#nNִC/JSx׎6W]᦭hZnZ_" 'Axo5뉤/u?29 )dU!E_{ o7}"֚^qY%}~Դܣ\0dՉqnb2:ֽBzvӤqˋNNQ)Eϧ\Fד F? ey(k%ބ 9EI}8d"$H )PU\`ai ;/`kß+O" #Md͇NV1/dv6ҏG@a]v AES6,8cЙTNЗjZqyNj䜐H:*ΉWhwMP #8mw#4\/4rfW0]`NO::ҲT$2:hpI'`aֶ:[vť$c ;"uv A#.}d-L<-ڹhiL0h'5p2T#ḓ(r2/78g.&3p/:Z )8:pKy*?‚WJѧB̯;AN{XZdf?'НFM}^B#ɜNʡxcf) M^ Yހ%%P0{-w{1tẙ eR}Sd;y`dƙueTsej!]$cS5vEW>VEGL_F rHN0mH#4Rwr訇b9x'4dT-U=y ѐ͊yq^U[N2` eF >8>-NehnS 'BeOq`_gkj[ oDA"qDO1;~$g0j# '0{aPj0yE{ۜm5>7<Ds²!%Zm#a($wK(O 1C_\|Q,iu~<[kY=I&~a')ћ+Zf @ OgF=mn$[wҀ ZM^>I4t@L{9avD5X"-9D|h YUB%"'Uoo&b[\ _@|#К]gN;;XAL$NV? 3QPN.JF6י^*"5Hƭmd@c8Ьk+MG}Jwӷa80UMZ9'\LC(`gw&e /[&wz?jg<3d 4=ښH"WbAs3jbY)pӹAm|0 Cn'0_ɰމ,"NM(HQ7> ?Qٓ#z2C]Ron Xԁ {#sj?ul{d0Rq3paпJ|پu2= Jv'k^O kDlt9jYzhuM|2B>vN'mN1caZrSo~PJl}壸w{ijKhUl.x:z+s8}n<`G\]; ;laylS6]?@P8hg? Н4CVueu⑯ȮV/ᠳdi;dՖ5>Z5<d 5&Gh⮔A(~0aQ>٩;3 u,@Bj4D|xFJ:xbM=ǘ!P6?vZ-Yђ{mOUv=7!E2h Gh8zSv⊭P>G@ET)<\aN<ܠsM= !U4NI<ȇr2ZL'7 !)mTKUG)s!X|Sd~ ´#,9hڰ!f c=hǣ5,$$.pAaz/.3E(+uhg[O/sn w q\ yl9'r-mBx&׍IپleRtz,{׼*'wJfW-x7TkV,%Omjw>7fT 1%r! #wӓ֐ҭ lsueLPїpx24cR]L xEXtzMdEr}hiLy]Oi}?n |@%#01RILY4`_lc%Gd̷@EzsF|B4ؤ9516?x^[&=9h!,_p(/# y@dQRd(0Ұ-3lA{33a J_Jl0 J{(r߫/H7D?_f- !*[*?jD%I]0Np9^# sLܡe3( n(`f\'y2@:OCf/vT;)#(&G&oni4/>bأ%~Ϋs.9v~P :nl|jE"@V> > e_L6Qǂ{,B o_# _:d~c |%*֘ƾCy;mcC5=o4e܇NDؽe|FDDW y_kilm R$$bKB7Q?~bÍՊnD:6wJ&vЙR*yee mc{lZZlߙϪoch澤\k<"P5-cQ7h:ׅaFo~ 璼Hc7oS:1O%I`@aLEOp?KWe-y}8d=Pt=\/ak{ؗ s?;@T>jЪ0450$>>ޱTY4sĈyguQBI/; ;s!%g+C<} h.rC!a {\BuQLxz~%){PZw*5sR޷m"-1''yhSLc;S[So6e:죵W)TsOfĮr\Rl₰(z7DVˋS"XJDwq | v_ꂩ\zO'@g"PkO>!@e6RCϜ$j2@`ޕ2 /p]CN/@LIg"yםd=$^l 4iT6T,#l^ M tc"e{8Fvh8zЋA 'lIyk$5ST.mmIn_ y* ]i:SkK?ׅYU\Bi'ʂvlT_B;$_7/mI93B{m?JN [5XhGD!+w75nZU pL?6+_!h՜p[\G8мw>R'l`E+s] v@آ.2s?w\Ǔo"^#8#&*cѐ Zynv3SU^ð4y7~#XէԬnhp>UȘ+hv<s)0ǰZ`j{~B;q;@ /iQM#K̞=HM,d* 0dCG_gx)6!k Q.u1 kdG1vqJHRئ^i웜L*̵R B1"&aj/ =xZU_R2뜾S['i`kuj['1T!7ǰ9 D'WAse Ն@ğDBjK1tzsjf>YX%XUh}>ee@fd v8EX v@AWHJΰmI06~pMbQnY!rGfTe5wLT~TYu"LؑWWi,oVIҝ Kz @_O\""dpWɌ4Z;~0)$/(YCuPqDC6P,g4,ҸYǿsHӁts$†uFVodZhS3(mKG#rzPyZ%&Xk6Aw*7\c=X '9$# x]G4էH ª: ϝkS}5f3VMɇ; R]NYGޝaPfUCb#%t[F(k,w E}苹wx=ym&ׂYN^xΩIܣDRCu#AkTF3.T+ws̖dAJiw*&݈J0Aj*3A347|򐠨,nSJ%#3L{ueS;` VQJ^㫥v!R|<ϺeTy0W h޺5xlj -=6/\ uPdBE5UsbVcDi^89 & :i6";2ŐT%'SeVNWB+mg. LgɅkjmHc{O;c(\qlCkhR_slՌԹW'RDjU̞ӕBm7P~oON4Նn/j07PPkZ<(-[G"֠P֒wu@.'E`_[wf4sA,A)qʁh^.ZVdz\C7hkqP*^6G_"[5]3bK9Tsq-IJ ۥVڿ gI|<9^uٻ/wɵ26LGG [t@;"8MȐ5kV9-p]y.&wǦAyG{ֱM)A--Nq {Ta8(ôv}y;g9U qZr%yMXP aFXi45j=SbtR{1U5Nl )Oj&@(yDh3Flw` "8 1Ȑmax3cd7tVxY]Hex a}$Ha-?;atLTpkl:\sKZ o9CƎĹ$R 5z㔪u՗Jgd]V1EQd8,o5 & G(_~q) hf^MaL)r@ʅy2Yᆆ@S"ҚmD[`G\ UhMǾ0cI61`-j*"3DQ*̥6RAB iA[%8]Wwh0$7Cz]nz3"W,+UMp.k~ʞ/c\)CXm0:+FasVH[2|3~ Eެ0 x-CHW[6Ʊ^0#5[zlv+\.۝YXփ'w`nqi UK588RFed ϋ1dWf;R!)cs ?2QpV!c1& WN74{t7ʟA[/6ݱzufm {Z^KyF!!QgUccbBDt5 _MGJZ5[Z@&+ Uttz($_ F2Yj VBeX=ThjꗦRR:R'p+׏<+ \Mm~[0DOr^vg17*J]~PLG,ʝI$Ճ?ъ[W+F!jN6[VK_W05z )eu-ic[ QFqz \@h1BOlW,hpSwFfbۭ'òWAY <87ca?a|y8 ''>Rk X@:j8]6(/^$3);''uTWL(uw&v=bL;In <GeO8Hu286KW N+$(Ǯ$_8[,Q >OtW=sJ9P)qZ[IOG䂐vHHRZrEήcAM ;׿xa#7.YX`WC畗t&<јpю@"v}./JS=sy5P蒆 pe@6NP+O ƂNM&T}R` $p f4}dqۃ梬f#+}ieBـ'R/} X#*C<>I/W282hw8_x%0Onz͎?aK9ݪ|RˡG_D5%0z{::LGI1Gp$=IgU Wb=wz[31a UA5ߔκ  1g8JCO5W3a)6^bɋy*UOZ $!kmOpܱy^eGq]}  ׫ZI*Fl x2 %Ma5x!?/?<įy6Qũzd~W򂢏fS<j70ȧm #C $Ӈo,?DtXfZs7ԥl?ZCXtrxAjo=GW9OmSg>GPBCRw/Fс裰5E$P"¯AD{VM:խ"N=k=FpyDhGNh!KNb ʏ/{cHw =iؔPn;̞ڗNVID^Yon/. Ib'˷)LXɆlcɌR+jϖJg8hzbE$h2#ӵ؀ڶ=njdLvҚ˯Gć]xU}~sPz)D9xՍTr%ɢHd<>=#}Sr?>&keӼ]M*W[ \%-}S70aT.*,[lعL-tG1nB,_4"5Qq׌ ";RqNDEJ`^8w=x"XiHJ3sݚOR~&y\kozٷ72ә!E ՃW$3Pp2eVh}7R77U6K"sǑ]rPm9MhLȔrlro{hw|+mHJa"xPViW)r"Zy#$9V4ůȐ/t}1U՜sǏ:X%QBxE= I?7= Ef5)%a m|X1@S+'⪏C3ZE;X;myVb R eԳiN=Er @h툃]Đ챸~liNevf 3/A [yXRJ,BԢ>$ *Kϟe\?cld\b8goYLg)q7LI l{cSYW@lLt Vl]&P|ubP Oak|p!uYݰ#=<$Z_xo0[ "M3HXJ;5:#Rnbqbc'L#|ĆV_9dtk[6aܟQv4Y\ FeP /v/*NFx{1w' ##6Mv'+NDz;u"˭YUϵ/AWֈgF̛`*̣aq򔕐NwuߒO7lG-^1#({ 0I0kle*M[AZ 3s)LS{@䆸@ү}N Mnz5~O~$v[)ʉ^F ތ`B7ȸRښfZn>C>m_ke`Nh"L#ritҶJ^ʟנw@Ґn+t៘`Vi"IXG^J^l{)(+6i }ėBU^C z%8+dqeB_y3jӀV>]Zδ*;f2zٚѻ ϷY/t*-* T[,#a>ܕsj2o׹d3uV1(f oSzKd.zPр+OtUyq ݘ#wD.Z5gTaT4?tG&}macpCDaAa7M{${\8Fl+!|/hÝSpor\V~(gD70pއ{I"o ^埔bn:ùwMnA%]-45f+Q&9)\*ԝle^򙎛h_Kl&1Z& S"QA!un,X{IԷjl%rr;I)i(tfOp: gQip%# Lc LR)-AK*|ߺ&v!P25Mo2{k>#)S!јP&Bg֥w&qs,ퟂ}m0> kMxL _{LPDWן뾍òfA'9xӵ?C̬ȣY6WUo#eOHze~W5!rIx ӵ I!-"zd%$6PZ}(Āv,_8-{;f֒D`fЃ8}2#{f]gw2!/D <]hhMQL"V^Њ jL؋p82u #*T1po 5DKԆu%UȌzoγCVvk^@#ka_:MMO+I7jxً!)ߙsqoIY; F8<!{bklB>I TJKr kHn[j9i4~t9PRn!J dG$=:oedo4Y9_|y[ZYtJXe5hh|'[*=O2x9!c({KqwTA8$R^M>fgZbO'HS;..=!U-^y[ޫ%a3Wͺk6em!]M˶\ .ϡ}HzO1a^?`,7~#0 EgH5JIʣQxy# |.^ N'xd$J0VύaIxo4$'( $.ڣbr릜W0quH(M8@dA]~c ڡ5 /K5 &"TgJ]z\aY(I??Xt ABx&kØʱl2?rM}Nbͺ )w^2-*9C 4I6`_!c̊#Þ"ȴJ pö&ٺ?sY'^*,eޜq]Z"3bVk?KȐ"ӷ Jn[wCh 7#oT0F0H% qE 3&\܈k: [E5:=6eNdYE*>w2DہҾ{؉!5y] 0̣D|kP7=!})D/}ȣd;F;!VPg,rjө4pPX\ =ot=CnA^#ZU& 6?'JǪ-$uUXNwɍjq z׫gmCYT0gCV,9]㥅:$\\PxSkM^Ԟ*|vBh pu~F剋ǎb>prN"׈( 42eCWv;(4d ) S@#Kp_ boHDi}J7RC@iz察GP2聋L2q%STB0*Z=Y(A;/rgX;/A"wG7Џp"s}5g]k{ -P#ZصlxI@!ҀVY[.'UgIGҧ#,}7uaѮrԤ^<\='"w!wl. nq<@R"qD '|"_^҉U1.IZ?LelsҺ aGu@pdmŸ%`b4/n >S1;D*@,|UfS}Sr.\1Got9 US+^e:bmŤ/p/p2jWW/YpjzLx z%Ⱥ+>"A N(?He.q5nu85wBO]dѐ I' ϣ<.ۻ=c$t Ew܏*<+@:Y:S^`-/,itgݿv-8;sEzKoj wĨ*2_/PԐ 4K?ؖҤ+A`T)?jOK-j5s *б#R}pJN.sIsscQ}F.T$_r{چaWqẲz Zv|gg G-/k= Uv;Ł3IRev)3G ,V#@p|j^{{i/M.^;cIp khR%iN~")$O$X=G$Cu, J\; Jq0R=]lOtĘxn֍zjVY]xd? &+@@l!h3nlF~뚳RV\icN>zƹvТ$~(ۈyt"v fh)M,<$mTOY3%"-FYcG1d&vOQ%6xڅl5ץW; ߥO^ +{I@#c'Voj<Tn@̂j9Ws"eGZ`1U&a\ܬ4heh(6))Q  r~*=i?ߙEIg`hz Ǿ SM_[/1]=xj'R}qc,KÙԔ|`L,-ʼnJvgrXwUgQt]T5>P FU;ey҄Cxڏ lPDގc&S I6x#f98 OJ_;B%Hp^Rh~z;z!S4o IG߃ aTj?>YZ]GMWsC}|3  x(ŏ[.@_sS5,512C Ylorؼ4W'd|NfQfӾo,즪:dt禱)w}^:sA(6HZւD ΅BV?[iG7tru9+ح|x:P&w΂{?|&٦>.:e,a$=gɪP.{Q,H6z'YRr)n)ߍim'krKz+4̌=I6JpIT@ fVIClDj#I5\ZaPJt>JRFe~Y!9:ڒ\鮜s_Je7ZܱNɗze9e!yd/ (WLFd}Z7#=F1:j XYo@Sd'AP=/W~aAݿME5^@tJqd6[*GޡY+[Rl;3?WorȾL×^!gtv3pX \ItO>sqJXUw5JN1ֶbOaDÁ%  ⁽]5DU҆wT"]|XYqZ2b̦1>+ 4iguY|@kX{l1LEJy֑;b4-\p0/*ϑE% DYLJl66skӞ}kj3KŨ*(vI,ƛuQ٧Pՠ$pN_i-GO]hPig%t@K/,;_/ƑDl O>s ?Ԫ^)k?dř 0 jC6a?vEwqF#)3 `/~0|3 -n'@Dj0jL&s-j i*OD 4= ?ꍒ r[Kt*Gr{_;}cs6YCZ9/ߺMJ:|ir]b̆c +:Ŕ5 R!k`T$?B" % ³[)wra.Izu[ ;fi(XWÅ3q-x?IcK=]iʉCI͈:@Ȋ 6}3^PV^BX. lq@7}fWv+~^(NYL Q~"0Z̻ր0pF0HPF;g TeV"n(3zNI53B]&"/r P8#Vtܘ 'ClƉdkmW^@ ~Q G(V8Uaΰ7;QI/Es \3(3gO ȑ ۛkұ8eie7M vYuɍKѯ!+c@wBebvtzNocx/2]oJx`it_գ #TP%@ ލP&n\Mż"$Xj}(|󁜻`ʁK"חaG~ӏvQ;:iUluW ؊R2Ğ!?&8,XUd99= L x=9ɒi AZăE`Хk(,n%!v`S7dڋr3”n8'۶=n_QJVvla `J%w;lP;vW<`&  .jH1 P}}dy}$*(xrpDÑ]{ u4^. xWֶ<> SxΘ:/] " J}-? W><VÃPn&x S+semY芻Wq0-̤nj9jW~ia<*8+Mcq= e<3h+-`Qi0bY(NDs?_80ҶEcwޜUs H9 \kژ-Yp?ߚ$1_goA2Nj{L,nG2ANmH՜XCP+$ׄ(PT<5 pf^qk P¨[K|4o+u^]t,?l|"{T̥j>lJ{_Ԙ3E h^k(5 >e3U_cD+yaP'=CDM=pZ嶐C8KɵA|J5W`(>f&<Á0f1M[$l_*%[2}/>*WRV4HvN,W|Ip(ևg)Ě7x Dlɋy4 H: ΍nVL ܷ>U,䀦ݠ&vk*oav6%,h8/cRgģr{D.Wc&C>qtx꾝siT:mN_hEhut/bO2١Bv*4s1+,,'|vD ,;_[FvBX18-먲1etS%k}k[$5w8*#Huz}u)6(D"/RTW{4Zqׁ@kWFh҅&L$icQVb9"zҿ&{iX'\E] ViY˹Ssh\IGi$z6듍,QT:-5t9$bٵ  U7O܎ XKr]p_8:lD u-J|]`6q$-VN)Ru])R5C PzD 1Ul&\l:tp +)8;OUg$ F Tra& ]ZR$+̅9?@;\! `l?_52h8@WH"9sҀvNm@z蹧 .LoqE1F%!t 3{eRAAv*ӂQYP C%FQ/dMƸ"+> QX\@XmBaitv̤ Bi_5g$^v9BZŝ5$ KRԜZ2x]H[?"l1] q>fS2:Nâ"Lͽ䨬l#kԅ\LYlsiZ1+7-t!|g}Ɓ3@Z8J-^Fwmv%aV7x*'ۯ=aP~Y//Z;'S0.qB{>-wt4P'}x:>w.`9NǷhu9{u+4"|3}m6{MP (f]Bp0)ِ Ap46" \Ue[߅?̲kLE_'"SWdcؙ Xb ob0=?r>xQ|Τ"ߙ/ I1`8 5|3Ab}֨ }rif!Z[APNC Q膢Nh >{WE 2 Efqq5X_ϴ7]yՊRR/T6KLȿU)$ o%"*Hk5HUy,\R5ߤ? C + m"a߮qd:o8+U9Þ]ΘƃZ!OEU/:I,84#p݄u2o#M'sbC6GP Uruj"hA.㱠>w5ؖm鯹ZgQl6C亞~sJ{M7_s }_IS!!Ƣp+4* 껌F{[0֟*sbYӍ莠':9N#ܥ3q,aFu|KV".t?3 .fqy(&cʚ )h6@vf$S/2/z%#˗uĩ!WYW8%;U)S-Up35#s}X8CNS)E4/QhgM<v :XLv0xȃMU(؋0/0nH7FhGyH@ʎ ]tR~ofo,qpUK,l*3h kޛJGe \*k6OGK>| 2e Ѳ̢:% ok 8[]bN=s"ZQ+ scDYlʁ-XE*A# (VHGL/2\ \ rzaX!ic޿ЀG-噻#"Ǣp +ˉѳzߋ'{ 9іZ66r˴Eaa@ITn_EnhxNW~V96 <ܰ ]xk{9m?S\%ZhrB7YA zlt_FV;K,}Bѝ]"g<>0 YZmice/data/fdd.rda0000644000176200001440000000757713666252075013361 0ustar liggesusersBZh91AY&SY g(w~Rg+MOKĉ1D)wz%uhTM1dъzShh4@ EO& $'==M)MF4@hߩ(  @b44hhd4i@h$QM#HS@hMhFM@ѡPI'S@рCF D4jzOPdidd =FOPz2'zhD=HSi@d<ɦz hh ;EI\kڠ$RjPZ,^}՘ VC&ep c6OݿkXD f\)k,vR|!{a0/$k.H ^ S:;kx޲ub،!ozjk sq$y0+`lԲ@z͕2]\ Bf7Td[L2cA m<궤u|J|Z\) 1 @1Ǯ1t1dZv`4ۙ#c*\e /;%6Z༴:c3rÆY%܉.S@<]5mXyŵs\*Tn7~UF9oLb\0X^NPb8ⴘ$쬱s3iiY5|2>lMdͭXœ`ݽeRjaMm&hmRo8 4,7hLY0+NU֘Y%yOB'e _-aNzύEY!#S)[(Mw\9*OT1FNiɆ|u]lhSyUYVȞ]Oja]b"E4)*I ³&Xn י5F! pNb:|kkH6U")GJ~p(`뗞\IPˋ=7KHƏn?>nY q !jj&HF֒Fr10<"^evծeB jzgrP-jWXƙ!RԐ`l㡐ȨƊ Z`Kkt{*pGt/(j>CW{1ҸU:n5zx9"kA0PǂƊeh+k76w+2)TFiD]Ν k{RQy s/}NBRG<3o HU+m^&Wz-USQoMP-W>rL"\s^Eҧy:ex&"F>8d;N;N<mws$ ,#Y˳`AqMO .-͇ؖ+w!ÆOADx&7'3&^up}gwemL;¡lWUU`X#b#DXXj.# dXQE1V(J"U""ί`kd&*4F`D FF+H FEY ܦ_r aw HqRC.@HHUS$PROPu+.nURUJETYN4sYU6^14j*P,HBP(1m]#34[;cm۷ʪ8l٦4ļŢq=yjK)iYS5=08q xy@ !'7kX @7Q2`"47$M$$U2 H% 6$PzaPAƊ(NvE\zg^Mh35C\pgIR9g̭RF "Ј=!zcUd!JX@a|εu>]DJR)Mp9x|wnDMkZֵ0 kDD;FDC)>9Fi2wI:{*%`E!@' q`,'$SFcM´~0sE@=&͛%]tDDDq&P@6aX>""%6@fΰ*Ք= 'k::@ Ds%q6-k0i%WçCM` TxqˊUhmeC,)Hy ӣrl< .,ޏ @0ALLl 4*(5(ʜn ̽zh8%t(fbֆF Nha:M2R=5V^Ebٮ1<֘o{mv4 F EPXmJĢk.]#v[+;GqSKWPPX**IJ} ,PaMym#e6ШaEDq˱2n`S[(Ij6T9g&c&Í1ZviQT7`vSUF+{*78 Yu`*T<(?ũϿpd~ڎ̫}F{a[*Hd!RTPօ"3T4P*Dm"(] +DuVTV.F [(JMYҩsTV ]ChhPmR eZoRiu U[mUUvFPqu-4+&GbD8m(-km3*%d(MiZ`Lҳ:ct9دbI5pY`W# e鰀v) 3/V_#v.Ӗ0WSq<^Bp q1mj`(i2}̍.*osb+].Kw 7DA[@o O˫Yj?wݞ9?!jv!g{KCB{Zs)@Q.@@jxnYM43J`Nx"(HTmice/data/popmis.rda0000644000176200001440000000507013666252075014115 0ustar liggesusersBZh91AY&SYJhdog@@A[+Q $(-|0 .{ᔧ 4L422C&M I"!hhIꔥSɠhщ0L AF U%SCG4b 4j0'a HF#SSHh= \? )J*+Bgn\NĔA! d .* WVfsÌBD::ihQꂍ3:k'+pcaEZYfϝU٠p0\^Ai=o ;H! 'μ.ɠKBUX(@8R2,@,&)2f$),,fgp"u;"- 1d`&ٹp]<+< BUr)RX$qG:I<':03EOOO/G"$3 K(*\^]rםъ] N\W5tG9E)^.Ix3 -IJ:y:+,y琗v.$AT}JnwF.phQéć1>q}>9}fQG(s$GOOt$IJ4u(꣬N;2vQIS&mN3wQއ|>O x@RqعҊ?Jsqy0UPgV UI*͹/sÈ ET ҈3Ņz6FG$ub]Mve#pbHEV!!-|@1'};MD$aXkk+񑄰 `NJME&/la?o&{^PΌʒN6\XQXJDN]p'FZ$kgM v g6ZTHԂYmeVi4e-$Be18[˩ӈ@`e'+;ɽuN1'TaJN cD4RJ3g>4G|FgzkvЃXCma!kPT0kmnArNjTMs*P"PJ#S[6aV1E%%mlUfURmd&mFFZf54E)64QLf bJiKCMXVka* bHIV6V-Rm#m6,JEE&jLYf&@%C$c10B Ĥ%XF*ԐUEh"5cdf1S-mZ(d,[+kY"BHɑ$FIeH`Ri$J`PIi0QAjؙ5f%4ȥIj `4^@ P8`UI{qMFªi>&ˮi[)i0-YHd9jn!V{pf(f sF֦ \FUF,k.i|zTtR%2qaKfpQSw۶.wI7e,F\*t*H! J% FܪHBmIּ׎RhM tĐ' 6A#UzVr EpF/}.,AbM2 ښ hiv[!%VÄ@HI7,[,)%dԸM֝(k`7n[)L(n٤C`]ݢt M>'3l.p mice/data/toenail2.rda0000644000176200001440000001015213666252075014320 0ustar liggesusersBZh91AY&SYUR\PƳkImlHzz|6ǐ$TiM5zSj 4hGhh4 d)<2hM CM@4 фd 42ih14hd 1")Cڞ(hP 4)DJ h Ph&%*i  &@44iɓFALd44 4F4hFM2UA"Q4m4i64LL& F2&`&&&d2`0@gP??hp%PQM0g@ǝ+l@Қht"&z 6"{ %tl!ܵAGIA $kL9 0bd.P2"2콰k zМQFzC8=4=IVz0ǯ='='Ƿ'=6{ӎ=π| |Ciǟ!O|>aO|DGӟP֜ϰ}>Ϲ>yrs/¹%~INPNX~s9NdG?a~s͟8sk?NrϝSOƳ:ib'EEgjjjjjjMkSSSUSSUz$ IJ0M$ %Q!j%L8uY%6Lly^P $L̐EH&]UX*FdvdYdYUٲI$I$dI& dd*cfM*.fwx$b 4MݒI&6LnIi\ *+Ā(C4"bH`P¬!l˜M*i2b]&*$r!Nѥ`J % z3 ,d݊`*2g',1 VLbw*r Z. UU4źr QEPEw=8w4su`]}%A-[kdZ3L̙z72`3C3&eYdt,SCC*ɓda@h:.N`$M%UiZe 5{LLy'%*AP*8AT,Z&Vlˆoh2jM"*8TATKY.pL\1V*o$2a\ZX¡d0 YX.*q`ӈDe Qc|wА|& "^)ٙݙ[5]鐪$/7dDYXDDIUyDDHXDWk`jVƕZX-uD01*33(10m^存֪תƽ$UXZȉfUb"&feQ""&"R]bfQ&b"tTCYhN@UUG:wfwhhfUr$ՑUUb͐*y^1 Q3U҉LFImax@) h vv3g|l*bG`ljwxhxwvhmUSEYgwyEUVuv*.ffvww|tdyݐUX77ZofffffPݣƪKU$N]bf (ƒ02k&202Ƞ37iXAcfFa6XfqyEdaPRiiXbdSح" 2"3΅~^m5rD I"-3LFDurriCI>q2(ߣpm5LnYeĤ(ɂpD @&,%X7\$(@щ4cAV!(nmxvg 6yG,L2a2sWj%!@S".CF*K ݖ0RY+L40eXFI[eFe(50s \F #6f;g#ZASU *KJ (3YfU2TVUL $gIUGXHt8}peGkâI5Z6F)P֙H26I$(!PjdPMıT$`2":'a_.!Y1 UQK EA$0-fDK 30ÂD(EMW ( p(+xDKKX,إ2dŠa1HbaaٳF. G2c EO0m1ƒ-25[*wADҝQ2RMxQԢЭKҠa!)1-PBj. F {+-D"Ux G"(2(ITX;p]`Wb:@LH/[&&tXP!lTSjh1cimìNSA6}{J$n&Lu0Dp%)ZYȈ ]%*b}4Bq$77 $> ULN% h¬Z'k+TGz6ʦVNi)- A ܑN$9Ubmice/data/pops.rda0000644000176200001440000000022113666252075013560 0ustar liggesusersM10 Eݴ 8aa`d1tC#P?}۵D䨮*rQQC9q 0Qn8+gDpz ?/%ı,O˽5}?I^mice/data/tbc.target.rda0000644000176200001440000001342013666252075014641 0ustar liggesusers7zXZi"6!X])TW"nRʟ[^ ntEB*u͓a@Yw3g0x: A6!u ewmaEqD?CN^0w|OZT5M'i;LZ<+A_:o$ֈfKC Iny<t<xOd@,?˜ eXr%_UF"Wv)rcՎG#'$ Z6lvJ \Gj[xC^b5W^L|ȳCIȚe]eIuh.V7ʑFHwMOu"q4 лl,Uib iyXǒk[[H;oVUkY RNLȺ.-]T q z_rf &Aa# zeAO#?ypa5{!ד GXb&9 t O">Z6vE !~,}$ Y$=tz$454{mƣ6,9x  g;)`Oy ۭ\߁"Ht FH7H|A>8;IVsfL?#}rR Z lb#oY|k82sp LW>(bdmdDk"pM++)09/=EdžZkC}a,䴰 RZ#⠖Eܫ))cCRVe}ǚ0/|X1ut4G=1ƉmTMmH ݔsEeg, M]`ָ$TfNnRRl3&"/Ew?>ʦA4'ɬ|f(/ YR܇`rH1aJl`%`AM柼_'i7 QeЎD#HЩ8^f[BM_B#!(2 f."&65~D_yzCě0Q'3Q2'δl\hY_ xaZ}ȕOV]*bS+N,=B!QD&gšJ1!c!. "*vr<]7/A_΀aUwg,Y^*g)miF A@BVR06$5[#A?{/bQMG'Tlb >?2`ѹ.ra4;c/hForTkBN'JS"7ct,;5΢`5`4N7Dyrb)sf]6{4!~"sIAEZ{tB/gCPlԨyC~:4y&n|jDDŽ0ts߼tGK1ˋCm/+B[ʺ !vf㪅a!NWh{p^n.ۆK]:9uC^#K?V(j#[SaWG˄"0oW!sET=#EFQu$5Eo9qiϰ>yng'nXk+⧎ wImgPxy6/qq`hwQKT;YONJ^4I긴q3&2@2A09T[LqҤr>wFtI=A(^{fIӕ@L}6+7D OTɼL(H'} s\" GڐA JKA%@|uqz*l^DBuQo5襶Y]"&6$tA9WBn9r9AzTm77I!rMi/c. ޸NyC 2f" 2ٺ%I$&'-ٖv 6Ryig0^nq]r*.:SAB\r2J֘Ցygxe&hW奸:>r r[9su+jG4/Qi nV([PLi<+%ADBg""5niih)~>"$P uFa,h,F6#.o֏%>DX]58V;Zrj¤:Vt%B3fZxn'@M $]@7Y?m޹\Q`O܇MrorGo Z0IGUPo  g p2쀓de9l\*vx/FGdޙd XqJm gJMї?h%w^̓'z( & dz!&{YZA4 ~mDO D*)O4E+CL^`ۛQ@ۜ}iWx?iBOZ_WW}]k1T9R٦WuDV*YSY BUt+) {2|#U:wM4J͘%qK}V:Sb W5/|J c3Xw_2`Cy}Қ.J1<\LEJϾA=Y\A]܄*X' ɶI50[| C5$qE>8/qCbUȎMGÁ@_BɓVL2mqNcVK $6dj5V`qғ%˓'krZa_=x ێ~wUҽi%HU ^x-I>0 hC 45t\ḮkSʡڿDe\FKgcy Oy-k@r%+P\baW)tg35JF4AgdkeSk_v`89~ .. m7hh.@Tر]j7J7[@`ݜ (kFkȷ$!c@6} _ux1Cb ^ZH2cfCdqά:HjU]\Di]HS[LC]k_=jqb|}tEt l'`PfpٽL1lC{^m6 ǻck^IHW e0މEU95 L#\LN/Q͏Ql0" %C3Ε ?}* fE\IY[1'$)q2͠RI|,W[6Q3~B:f_>_lu>&f 'TrV^ d{tӧ{V&>#!+OyF^VTBy nǯ=OzDAm8I"7Q #;ܳaZ #F 7 s =+OKrEgZK@> 3bx& l_F] vj}S D:ɨN}6B#WnB,lz`8;e݁񨤊-cU᛿T1eCK!y(A| dZ+/&78?4J^)fS_ _ThD=bNIj *2ݙxȈMt?2 xJ /^GT{g#5{$wiNUW¥~Ӽ07NYC@)ifGf()_ĺsdHx=ԎͰj`"z<+]6B[:%qqt29QYջ7&/)UDCNlVReVژ#k_AW2b8u}i OwqK̠HHq|ERuF4%@Q>{l6qlJR|hoilAÎ'`i =w,Ů3j˶̧QgZqD5 ;RKyj?.:+VykHVS|27fyVwS#`UΩ _i>;1fd q1UrqI98m*Ȅu 騺>w! *4 U7)}A V67uvNPϙ5A ՋLݒ,}HBaWqE6nPa:<|a돁>U-ݲUC? ILPQ20 YZmice/data/brandsma.rda0000644000176200001440000007261413666252075014405 0ustar liggesusersBZh91AY&SY651N:/D@@R˻p$XڵJͶNu <׀ EG2!ZQA[BP$(c {yl}π}U/@PB)UBg|+L>>`= GE@AH @H{w:ĝՆ8G`ʥ$T*Q4h7 4h44 AhP7jTUUB)& y{*h=#iH44h 4h2SBHddmy'zGdz@&h4@ 4 44zJ'a 12fL&F M42`mLL#dѡhd2HbaLj&&)TQ{?kC%߱S,eڕU.H?*d 9ga1PU ۅByLBeu lZ?OHD΃1=ۉ>/+fY2;v&ro<;^- kRZYy[>JLa%VF>FvRڮ"^OiA\mwe_e|+'/AA Am1sRUhYj LXCgHZf8P;%^jR[."⵮4^;ȹU.qz IM7tJ;Y7ЄB4.6ІV'2'*o.bևL!Џ1A$t0 3 Ici$R ܤ]1$VlyXLyljJcC!^7E+ H%􊨪ZURU֤I66.Dg<ϔ5Y߂-m@N4)aV DHNf(CQg $ B5]{wkzLbIhz/{ch 0͉(7ЂmuW=łEPa0.I. ZRaPQQC2[h6ݤ:4$,)J!r,Φmtv{2:e=9NR]o(:8BϼmƜViO\9<Qc[hKBYj+uJrr P!3@$l"6\nχ|wܥ4h4 u8E LÄО2O$R,\ BRX)'AQ9!;i)擗8p*Tk| IB& a&ַ(uYByu5jj֩^x}9Ng彭k{r&=]y^|R '𸕈.FxHe3s֣)\@DI]miv1c!ɬH4*YdsSw'U!0R&J_a?@.9Ɨ@QtH\ k V`&@%c>5 aإ=-|ZFRUƘ#56j$cf"@.&+TdvNv9YT*煨7 L(C9NB+ g Gr+4S{^sBrz>"!Kt˹˟y].M/q#C~:εPwu02Y?4,Dޔ<B3: V(kF֪pNHE\S KPӁDz<}yN޾ͻ{zt@r)&a%LSr:8溱I$D^[z|gC~gWH@PC-f"6:(KiP8mYj3 r8aQ:")!& W[C'!aK- LQtЧW=hB׷(IvƘ`c(qʭYQLjLb. I@q&jJ#G3^.$BHȁzI1j6'=t׳:i3C{S!+8`2+FMwYvgM xb ־i_=eLU[HBkceC6dь^Xa1L(M]|mYNjELVjzk.e$eK+Jp:9+ӎ(qeaʠZX`Dtcm,@ xN dΖ.V}eU!.|̃EfvA|DjUlp9CJRIA)\k|Pf5]gYd;! OOpTiWC@a,@ qvWe,0;Sfg];MVj ! K"!ܲ1& =%^d\ZS! DҢ ~FIu}ڡS9_@7Ѐhr*YFQd~5LaD}l1,i9:tMgm4xR3nBs UyZı"bP6Β' (6 /J}"5ݞʩe}ug{ %;f 98S@I`2gV~iWߣn*pǍ.ҾI:0\( j##(4>{8khAePeMݙS&ZXwZA2 ܀Iִ#~'ws^5[Zb 1ܐ. lAҝSXJU:x/HшsTѣ0LJb[~=xYм]_N!IiI=;RT.[Z@D,ֹ&RH4bs j֝3mQГ]-jI@ù$A\A4u^ltOl'ۯ cSmVZ*D!/WOZ|bWs#`!JklҖHFrMu>/9c-iEmdrpn=u iXg%-sʴD[Nw c/m'Kܹn: j!2\G*lnaF# FVӭ|=u=#r ޛyby/[8V'Ĺ^<%iʸ7.WX(- B,!edWDewǴ.Rg,>E:HtIIt'%99ZG.H 01> dtJ†kdmF |^(rdkZM릵0/ើ ߷Y\=5Uzi,-0%1-^:'놖ܴ 2r60$>%=-Kim={|@{|r\@<A~n?oKX;O>z4y.o,c2Ǜ$2m/9~-[9k`O]!4I),!2 @2)ÄpLk ,_0j a7/# ozCAky ؏9;iNi094N{kdڏl'"%XԄG_t4Yg徔: 6ۈŹi]췕ߩ.7O,Z}⽭s B7Ba)P듏VVlZד]N;v>y'BcEI t S!qwf:dt޷Ӭ(@e*NM7^H<7' W׎Z=zw%iTAܼ*Jpoy꾤㶙z.g^r9y)pq <+J{z|UNSnn֗ayPi=> IOzz8PYL񋍽#S=uW'y1&/Ǜ[N?1϶yz,@(GҔzztX͛X6徯ÿsќMM69CxCy8î=Ygd<χ_/4BXז8֬54yq(bPLaK䃱Q,,ah=OSϯ~t~޲+8>^!lxifmkqڽ[E k=4iqԽ %>Ҽ]哀eA4v@q:/$cqӳCo3Ru5mn,8fyJCi .~]Z3^l H_-_A։=X3 U |J뽽o-?WS5f_gʪu>vs\o[8.Ӓ/e6oҙ9)oH:h{L-="ӑB;g$CAtT˿Ezet44nwiMLDw0A eerDQ9R溷\xK]mK]5 !_ǡd4^D?_笎 V?hӔX[ܮ6`c*8qV-a[maSgS}Crjo6æ$6@"tʭ!(JlUp%RGҒUƎ8"7m$܌~1_&r q1Ϡ= w%MGKg}qw*{vzǎOjU7Q>[k,cQ}?~TVYsڹr0Q-o([-uUݛTLtlNR;eYYqP"&Y6|I˼Plՙ\rs Ev8LYoKړgCbD[ !Rn_/k_4$qXEsoWYzj"}Yׯ+Z$8xԈ'8_#~hՠ]-UAMZu|մԮ-^$)ah"n-t-̹x\|2A*`G("qIQ u8P&%NjSL)ek 䗞Mg(JבbC"D>o~v"u= EMib&m{^|E3ڟP d%H bCJP?z.BseNG9=جNOȦI 0-PzoFUS~IH,jǜM\uD`!dLC,b!W橵2OR&'3gklZLLmSl{{"VuiQ#thf 'uz`+ A-˩?(_b*)c_E M-FHe޹OO-ATPڔ=}V{|S9PBcp yg\71A[kpԔ@(oz U8)J|K64ijQm9h:CxޛY'Y|:W#6}jq81^ [/0jgbڴ{ :g#,hd" p(>o;ށNo' 1|F}#شϦ-Z1m^F.}8Q#nB1czi3k-&gk O3EZE+MfJRfz'ڙf[D]bo0tmzb IGQ};"ƝvEQы5_MKlB^#;%ґs:ʻGNt-J0|5hu LiGI NqKi[yX¥4]C6 / .C9f贕0񤛶n84ԟC0D9bDZbmSG;Jϣכc}zznj!oAk&k33Sd:vUj:GqcsR A[_[)QjflcfPͱR)`T὘4x/!I3 ҐyAR@s@["kgMs \'Ew`C88̫R0pqLgMn;J8诮L rPׇv%>[wijT X^,)zI.;Tl׿'%\0Lk,(&EBTnvKfT@SoaE&ԂKEJ]HAL8zFz^d)+0̲PBtxTd5|yׇ/O>>X4#ѾּDT}ѰOmo=SMjM~-(Y~_O :\z{([Q:knN_/_2N:v% ~=Ro϶<}>^|~^?do[}y>.JUJ;Fw@Q0 :JH@lZB i*R@VW:ە^Ǐ1 T8B($ErDLo4ѵ_kW5*RdJ  @+C9"9.J *:B@ ʘ@E$tE"2)T !  )C$@L@2G$M$iP\( 2D#IT !rTdAR%"/-mArܪP@d@d(9d(pD4 'IT (  BNKZWH+{T 8+@$ pM'-s/S+ۀYopgo`"h>HثX^#B=M `5`-s,3(YqT+ jgƂWʍz^[|hkkB69ٞZ@ Uk`k|D `WǪQ7p_"q>@B5(VQM3k݂Q/t^J7ϕTRP v{=hAKC>ꍉvQޑ>!2)|?|zp4ϝT;G&^m{m[~M/ ~$Ot^B3iO"}H=|+|6)|N|UT|]QA@"alϩ @ ~e4_J!j}:??}B'_R+_Q*]f~om9Ry.ݣmzc|[GcGj"Ds>S3 >\ࣙBvހW_Z+_[魷>?ӴTTo_?Xuh}j>c655o8c{chYk[]V>l}}X-h}}1kkV[G?>Z>+G+"akbE+p)͙}O? 3T"tp<='ws?3^lUxz:#̺oz{{{{{{{4I6{{z^W߿~߿=;~߿~߿~_O߿~}|I66 SGFm`VM1c1c1c1c1c1c1c1c1c1c1c1c1c1c1$MtiX1c1o?wc1c1c1c1c1c7c1c1c1c1<1]ct1<1c1{H&fffffffffffffffffffffffgmڞ3L fffffffffffffffffffffffffffffffffffffffffff 333333333333?(xxxxxxxxxxxxxxxxxxxxxx{{{{{G{&G{{^{{{{{{{{{{{{ٽ{{{{{{զѰZֵkZֵi6KZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵkZֵk_i6oZֵkZֵkZֵ۷nݻv۷nݻv۷nݻv۷nݻv۷nݻv۷`9yyy矐\<<<<<<<<<<<<<<<<<<<<7{׽{{{W{{{{{{{{{{{{{{~5{{{{{{{{{{{ 9s9s9s9s9s9s9~],K9s9s9s9s9s9s9s9s9s9s9s9s99s9s9s9s9s9s9s9s9s9s9s94gDmGD=l{tILSѦu4t` =X-SU>3z]{ޯ/:|uA1L0AQ~-:;;tpn8뻜;]\u]7LxJxvutaA iws9ӮK]ۘ.4w]:Kx .Nn:3!#4A| 2ԪcCչfʍʁ)Z4kVHѡ 5fbG M~~;3{ _f*ozI $aG)giO( y(8]*lH#B{t%_m &/`6;4ሂ%ɨndLnp tHXP: XAۿOmIEB@¤@,+;S誦ؗ+! .ϖBmOU_"fEl h[LM!u6nި#sPQV  /Prr#PJXDn[DMbC?B L-0hSD)ʮ'ŷ, ۑCUbsj؈YKff*A1r3gRSŴۤ븽e LH{%Q&~$Q" Q%*(})"NoڈD>BרYA aEPSAF[uwľgYuG麾R_U".lp6L܎Z3_O__Z]j f7d58i؜z^ѡPUJ*o!aFiOC@T 4B^Gv1`R8a0T'P{ptfQE q 9FV{lDLLE? jB5a~44h}=jHL\bC[(X܇R?Ag'"~J{J7G a#J¦<_oy?}ki86h5t 𐀐B4P8VA? ..4fZ@۟&PxJ޷qhOej!jv:Õ)W)3^OƄD A٨i rr #~ ci=011AفêێB")T+;|LytPv? o_S{E>)U DP>+(( () R* o< s ṔQhQZU AD|_/|^@򠤰@si'qqqq"x燞/0x"sϟ>|ϟ>zѧJR)JR) mymյZmmB!U8:88qs [mm4qOo N*ֵkZֵkXM4DDDqqqJ%m|<>U O} T4Q2Q￘⪅-mxh-mnzva`a8baaAMFɒd$(FhY $aS)3IB1JHbf  Tw}}}}dPz@mPQV[ukZֵkZְiDDDDDDDDE !TTNd mmޟQ2t9-@m"m(I8)!4䉃ph!6 %Q (6BG!Ln8b#n7"c 䄒iI"`MC@qM6)QDɡII4ऑhB&LH$bPM5!"H@B>~s9Rڡzvޭ[z^@vmVkZֵkZ֧dOӵkZֵkZ֣IpZmב4ZֵkZֵi ^r6ֽ dDaɣ2*Fa6UQ8 㧏gmY]i4NֵkZֵk[@mmh(0-moXDt# /+IyRi6M8888MP6mmmmEDmmAAn<DDCM$$DDDDDDDBLY"yW-pti4ݓjֵkZֵkE>m$DDDDGFIVikZֵkZֵM4tҳM$ߕ&kZֵkZEmm-tII%kZֵkZֵj'I5Vg_?з?+{ae(:o\}n3 ᜩAu.nNnl4rorڽw]ڝ|3a]fE{f fM,W"θқ<|s;mr"\YT{Vo-bg'{0|ߕ:B5s!ȥa/7!6,wݫ|jG_ *o77;ud¡s2乸Q&Js_3g)UU^85.ü7Ǖ]h*̊x gq۝}Oe_'E '0_)y+|VE3t{n+y魻Tdg+mr:&l7hG2\+J Ӽٽ̿^d":bsUO#PcrsI LǕ躇=%rX9ڗofF v<GfQ=B9ˉ,ti%a&{H0n^ 1"YK2>RUȻ9@Q]1Tr:[۝9VP;iӮN^\u;E T% mwH \EI;PykyAe>'If_O~AOd cXRaȝj,@yeύ yVLcapcLH)x/#NX36D!~qv$ %iy#_%_\P@C_=t1HcHmՇ'3Dm\[ .؁eԭO{HAFٲasKIy.Qd,G hV޾1u|GOC\u30"DBB݄uCAg2ʢHRP%4(P\;{pkJ).|+" q>: 8ZִZ֐i%ZDi@^Y3. `)hFkZ%~V6܈/+q{e[^T{c.@@ǓzDOv [yT5k ҂RPУ$^-]YVTo$$T ("鞞sWDQ Q(F$^xq΢;-0NKSVGB Ӧ懄U8kCvhBIU5n)q<^ qq鶥3M8iӮJ2"%$Ż_wuՋwؗ6f ܊ߧ7OV\I{ĴKHZ@2 E A)h riUn{ ^^y[1˞qULWUxn4!Fn*J%൭j5UcVTZ;d( C#^4٦ ( 4ŤbzkjĪF6f]uUC%-Fut띺kH @ PvJPdBvt=}ӆNzpW:(C^I - %<8!9۷2ML& ԲC)SdBd,E-MӝyS+B-@R4E9CKEP9uG"tвuͷE8{%̵١rÎPDZrmU+]nW !h56}IꮏT Js&_qTQF Lesv] n1'nmt浭AkZ^ --5TTUWwmF\dJ&#w]QCR"@ &fenTkQQ2jƪ[֋EkLF\7:ѵHEJ@4&BUElcQ`F)6FpX9sknVQTU-(*҅*&FIM.dїw,k$,n\ܫEl1B!iJQiRQ s0a.\0!M]ݫV#k`tnVknE14F2j$D3!I\]5nk+kmNv͢1((QID`mrAti6Cddn](IAF2I&(M"$f!!e)qp+Hq Y#b(F5Th1&-F0 4bIB$דjyT2cF&`זhbDƈf4e1y2ddfH:d۝6HQd5ʂ!2ʌ`1hPMBd e\]b(,J!& Fi ѡ4ȣF0  cEF"biEAˌHI`9̊"$e1b5ˈRlZ,\Bl0I$Ѳ&61Rd6d\eHI`҃!I !%u0E ,$Y Dq2PcEE3$F(a;S4%9s\$M"I L%ˬ IdNĬClIa*"9bƀb42L5$ "d&16d\d1&J5˔F4!@RQ%HRDW7401&$T!"#FD]2S,&l$DW9 IQ1IQ"B̰EFȗuԤ%\Kp1&r @,A DAf3I#Iuݰ`&HCK$L@ QFl2FRA$QaH HQ #%d#؉ 1fR S1&4Q(6heL0L 3 \bΆQ`0d`Bg5 m2DJi"Pad- 0IL]shDE2 &Q"bH$LP@$R) ,#FFD(L(RLE@Dbs2IQ#(Rd: M0M%&;Ѓ!wME'6FLFr(eHI3H#&39DQI.qE$spD nfN])[#a)2A0ΑH I1$ &d 9]&3$Fw`ABF6 4YLhJ$2 ɀ1;LBfb`XλdQ,0Q%`cwnO-'zCD#["(Em4Ĉ0F #H3(Ѵ$ 4 T@ŶO?~mMGm6"""""""($I:IkZֵkZֵ6I(M(iJm$""""""")6N${Myi䵭kZֵkZ+"!J$ǘv%㻓..ws.pnCpܹnܖr;Jk$ˮ蓝.w]%t]9p\qvw;ƈ 2yϟ>|ϟ>|vm&DzFᦓQѾI8I8/״}qk"mm@f bf.#$I$zR I$I$JУqDDDDyIѦJR)JRym&{I>M@@SW"׵s;{x;Sr!MSZ 8m HAFB5IF0mFJ) 8ۃzjM8&!٠J8L61d#N8B'(_-Л9JN[L}GvnӴ$1S$I$H$I$O~cff`9B@ P $JHLL C1D# 0P3(0CL$C#&iAK$D44I %4$cLE+^Qe&ڼkbٔP[}}}CC{zֵk$lz@{߳JR)4Fҥ)JR)JRiҔ)JR)JQ/i$Zֵm?' 8)Do$3 A-Cit|qqq|Q|M5{^zׯ^ziIU'Zֵ}kZֵ~H^"""""=qkZֵZֵ<ЬVkZֵkZ; (ۈMu?t^~j+"_ Di mv;6Tѩ("m(^5hK?S2di !dhޙdSnݻv۷nݻgԌ '" ; mmmmO~s2&2N]r3wWvtw]9.$r9̻9ݎsnn.v:jtpTnt.wl[L*q&]7vt..Kwu]uua?<xU@C>|9ϟ>|ϟ? o }}}뽇w=7}}"qqqq@u888㏰/>+k8888Z=[]N:4FlRF!H&lE!Q%ƂI%&F#0lw%YeqqiX_x47}}GqHmDDDDDDDG]{{AtBmm 6""""""#M:$)JR)M{ {{2I$I$M4J)JR)J|bm4$2ub[mmPQI$hBlK2Q&d2Fɻ.Nvِn.Gw';\+;7wyx&4HVMbDt]vww99(B (!C5bh(6ŴZbmՍbڋZ5ckEnX[6آ\ih"hgG}tQ+߂ZOulا%eeIF5-1ZkehTحB]T zZx\PIDLd *@?PGk8ӥۺ3ssKTT%P~mS/ S6Qmo|7wQeW)ƥfRo}{IQU_"WOWߞlSVs&[TiG nuotKی(-xnPV*v e{Q!2Q=rzԝsU1Ky>]u=h9jEǁUjd;VNԲnd1g!u:;\ ?& >Jt]Nfz>>k&2F12ĄBfb 3D@(dVסk%DiuNR)JRZֵZֵk^K$I$HI$I=(I$I'!$I$k $I$ˢBI$I$ E ;Ҕ)JRo7yo7y)JR)JRI$I'`f9`80M I0MB%fffffffI:rI$ qqqq\d6]dI4QIFڥ)JR)NUDG_!|NQf8fcn&pЕ.nu;Wsw.]R]w'bㅮu(s]M)u\];N3wtN9G^bI+ZֵkZֵi }iDڦ{dI$I|f{>#5MJMSTh j U(1:/eqqq` 1c1c,I$I$$I$AI$I$h8$I&ukZֵkZ {{ kZֵkZֵ~QWÂyFmmG@b(AI$I&kZֵkZPB:ֵkZB $I$(?>QW888 dI$r˗.\r˗.^xC""f ,IٲLbD(LI3131(i "LR2DbHR&jf -mn88rU\r˗.\r*#]k^$I'#1\@VEdѵkQZ6MX|m(06,l,&26"R1#Bشkyӛv6,Ax"ׅ3%("wwt$r4 "5"#MF!F޴$$"ōItr+CsxB'ۮE9GEǎٚ4!E:HaLiRK0wtƼn@EExqX(,bwsWWAO+t\+Ɠ軮xp:Xaڿ?RRVJ 4ti/T6knFVdlF0v&6T$vH"=6K.ti~@l1YA!Qi:p٥)+ɩVݼ"hF¥*ҡGE[5T#JM MVT2ޱiJ2=[Ǐ1StЬjBfL%MD5 SQEQ\J#m\T\W4=* TYiԂjBԻW,gd:yϟ>zͲ,,{CR`@h_ b b#2I$I'DI$I'EdI$MZֵkZֵ >8xDCя9`HCLK *Vm{Q2I$I<$z ;C{{"$I$I$I$ {~W aɱ5..]tn;\ۅ].swnwtQۗf]1dHeq˹\ԓNӜ\]wlvT3t8XXkw:c]wsˉ_ફ-Zߦ/Gz-kZֵ$iQ6JR)JRֵkZֵkZ5kZֵkZ.kZֵkZGZֵkZֵ(kZֵkZֵMkZֵkZּ8 J";88Hrd}6Gyx$Gk48("KNy NH'1V?,+6Wu2:[HOT$iPD;̘I eY^`d(EC!֦z59o/̀JLl `wS19Y_{qw:ۜoNmV)V*˧T[͐าnv1r[O7j;o9sjcv!e̳|LJ ͬUB86\jB8%18YeYrnwujMero9)6.  $9)Jk#d JyvQg>Tΐ/O30 4,˾iGܯ[Cw/|DĂD%ˬDi) )%iFĊ7BƂ39[B1Br׎y|uyJO~/vGZ)j/)a-LPMU{])B4Ws ) 5yvMywܻr|Kw#%yuk⵩A,e;&s0c|%N1B `{1)wjr\su/Mjyߏ%\i9ݜG4+}+G}>gBC8_PR.R:3 s^ӣ$Q:)zwYy4SC\! tB2*zxUW66$ Ѥ##+$ZD\\y$s{U^pjg+Z2G;5e_>R[4^MW|.yWknfWN=Ͻ<̭^[W<8nWng3v nW7I.:&=wfg)HJJ2?{?D> }]ZG1%D6;|՜_Ya8Ҧ}}aDcMgk/'SқL>s qTs,ɅlhyL@!U8"+y7S}*ծUExg|˘廷sNY\y\*9DY3Bc3q\1Ea6Sz]@:wɸ6@#0z5|jrرy ťRPK90LhI8W@bC DNM>@~ 01i%E XysUۋmd,R% (XLj{~9y "}䥱Ш]T;oAt.Y$5( l kSiɟOm>l+;dlB32WJ)# k*Tۨ-kw_{ 4imk&\w%^L/gk fD:Paz!CPEKQ }ab[8& zk澍"b$FbP }_5Mnc&i6@|ys`_A$A<ŋ ߏ}>eBH!H RDfC%P20XWQnFcPϴ0'"o9EztP)8‰ړt;mxs>9n`/pliIQB>nJTorr܋|R3 ' n*=wze>뇆2C uݼTl^biKuOXwyV Ddyh{`-ɜG}~ U$ ?"gI .Znz)&F;g3{|$> ?~ϊ۟`uB.@y!'^~=J'OmdI$I I$I$I$I?HE6.{{:S E0 4l֔efSUSVim,ڔS 9I$6{{D:$tI:N_A]6mmmo]a4SHWNNɫZֵkZֵO &_Sߠ{j)JR)JpI67I$I$|qo{$I$N@I$I($I$NA$I$v`I$I$>iQPxuxmmmT IFKIlR!,E@`9qmdI$I E@}imm R)JR!4PQi4ItIi:ֵkm ?(mmh<uBmmUOUQmoETIQ ]:6t_m4ӵkZֵkZ4N'JR)JRD8+lH=}8oWCF:dBJd_96*Ddj\Wf &&: rOףt Sm%3?W<"T !죝#'E01:?e抭<"-4Rddp=ul̙7PU2\eL+,ug"48 !㤲>Am9Z+ߚS<9w|qK 2D2s=El%y j 2U6Rvf0v1淏[z6A:!0q$Әcۢn,vHKe`h (-9X ^\BM(^~ #HtX\ahXgJiLW('UF^^̌,Xx%'~!@@%+P3Y-/Pk>k%-NMܞKջM"b6ūYEWx\@+?fDx81Pf7bgN잰M$ىAjtf}*QCwcY$vh/J|AfP@uAU_{^ Of.cker1`11*FEi_@&-8B~Qln 'qEͧ6P$Ͷho iJX8Kuy`exU1 Rdp3 eA6O %4aiS^9## L|\[E,N7ɮC_aoX>8>54Ct)3YcoLJ=ǚ #XDv={|}!Q=AR͆kv>/ep 3>TZ4ЎΊrxr,ϓJ2N(ؔ[rkDW@@Pjhy1t?x>F<'h$0=>Z-mxT)m[!n}tL'[V:ᐢ3SsM.pՅXiDc'q褱E "~F=R^H3H`LO9@7eks";*z0SR5 EA(3_g}X(m5eS 325bdMgy>C$QJaLP29Ϊj8 dwr85YP0-(_p QWjMk}%ҿMbJD&$]vZ;d>.V&8c[&` w$DpD'|33׻-1_TwH* US/_~6o07gcz#p}'HSU #XE)kI :.4' q{͇TꂞADk%[uOj+f mPgm!Q$ wWl_ G ٠Y c,4? x j^FЬa/דZ >nb0$&" YM+rr{`[h^]='l3]M%F#[O &|%2(7 A5,!uaVJo"\O':kf>)"si:3ai'!qf.o>Xẘ`(6O&@vr,\jx`9PZK 4Khں'^g؇,8P^ 0A*/ ojM@`'' 7gpsm<'MNe<Ȥ Z |-ޡaA +cIxcu ǾK{k bo~y3NuOJblϪy'mT@x([Ui%׺p۾NW>f<宵4v,hL ?tS6I0J,JH`ɖ+G?nkgtN;b~p}D&<  uG؏ (7 MIyfF/W=Z0s*@ ʙ`sxU4Fay}dRUGa?,3S.P>zRx u;;km ݽ r%pQּmVZbNῆ(Y)Ñn$SB~ $0tf4ol>߮ɫ 67$lz&5шL$촤<qm֙cD0{,@Aldƕ("@??%8ʟs ht3~qY;2]ɯǺ0iwy45Ϣ|U7wc=u),#2uu* .yc*9uӘ^ K+s7FwzO/(-:q ٿ!*+_ ?a+x9v?#b;|_ŐC~qL"tv{'Vb_ NV}ý\I#ֹ?pķ e)FV5R ohHLY~ MsòNJ!>췹 Q]?G9{.B:8WL?JgB/5=vjbˤ 3HZ^pd$n@@o ;O\vgPyajѢbݢ Ȫ'a'4*#"R6#yYO!Iĩ\ST % qy)TUy3\*-&bv&g Z2"Φz,;3qA:^2Ӕ5|hXygFbl8ʲ+vD# M!'{hQZ7/ֆi+"3BĤ3EpzIR/q< xrִ<]D4`٦dXc=1X#镾DzEVBfj4z{]U`I|x[4IOl)Sin3A>>ꭼ{[*hĉ{Z(M?̻e7̘i/ _h >rBH9̟[=X=DM:;Hаj Z~e`YOw<堳ʭ[x˙X- F"Bq~L $B7<DF@亠 Эz K{ >נ Qդ#ò۪_:NlZ+A+5' [?J܃ 97r_ZQtoQ1y=%'wj͙8Eҥ&-y 4$\Ȁߜxxb _V4w V̻\M{"הjZ|D&- _sYb3\} u+]y;j?@Rq:<ٮH|J9cr,zK'acj,H4ݗ,DK1\6Kcd\4|xGGg[Y76PB`)*c?Hd t8>KJDZ_^Ug\Yc+#LLGP IFx7ShRCث`Fn:XԷ=bk <M- "t=;i8=2/naH#siC/½RK+4 wi49Z̗s nf}ybR\yhOus'HŹЮ`3g `Ұ^f 7 dr@agn0I{ݫƹ )E}jio0y1k.ʹ1= O0VN&4w(4+$^A 5%o:*y,l`VYC";=ArYrJ; Ud#LjECGd _30ˆsDgYZ_O h PF,`:Y/'4ho c8Ĕ'oYzPz#ײ7 7'[Ef4OI:nj䏱+>K9FDž^9= hv[#livIOvIiKTH#: :,ǥac7` 򬥐LPgG{J9qf&|QfkHnGuB* 65qìjTK80 xL֗5{ɋ7w :>\gl2s+eVL@;ߏ]{@]>c֡w wo3HO \m\zoaL[_,Vy7h̒j:t0ՓL6X>ks ?nh#!>˚Y/Ձy-~J|dǗVs8.Y`\qcwaGg6XdB%'Ukg(:,ZU ;QfJ[|"*־Y5u~qv8ᘂ%DTX^vX lwyS>ʤi0 fwyܕ'rUxX7I0@au *SsďNb Qg0 JD%1䢧nQs7#i,`^J<7Q]q"y..-X(ͮ0 YZmice/data/tbc.rda0000644000176200001440000010366013666252075013362 0ustar liggesusers7zXZi"6!XHs])TW"nRʟ[^ ntXb=7l([T2دn |~3+DXjcݻu)ZڹVs 3joueQM }.W 5A,VvoIwN0u0|kb σxaTh#KS tBU\IAdb|PDe"ڻa 2Xrq>Peȯ+'^ ,E8ZIJ=9Ȱ1bG)R|L0S8DQQV4b5ZoJ@c.ϷpH'F$C ˷%S"}ab^򇳳V-۰L0` ]^W{q gC+xho,T7ayڳ*-l2#T6]Jsw`Cbx.B t-TmM ̑=Y%d(/MV 00J:=8 p] wѽ!; 2\2=\FҌzTi>,\K[wPȑ D܄#OS#6T5۱}GΩF?!1f4sC1"I3ܔ>;lc߭x Ge6 qH" P]e2zcCK#>7W+rWh0 2Mte:7S($)mzD: Hctȃ^F޶Nѻjf 2Ԅ# |NZ0{"» nSu}R늃v+fC{ݚ=0Ԓ8{KצMgL#UaKlE q*1k*TùJKڲfɊZ(+r6Y)tʜ ĩ~g*ˏ}.yl0G)VfZzf"o EDbjH' '[Sx.iQ$h;` BKR)܄IYK L3%<-[g %r *W.]afGBZJ?U @Xx;<ֿb0ܲcd+C=AWNO`' UHUjWGءkQ\ 94y?S3K#sj̀"ڥ\B}|m^#3Wɯ8Z) qHurKRBv"qGPA|ԓ!I#<.ﵮgh&Ϳ["D l"GDo@G j"$m{Aݮ1ޕo`kxILv*(8pF8u4Rz6d6"͘ʍY|#KHݣla.n6/[G+д\Ngaje_Y+߸D9JnVP:ŃN/0[Ef]b4 㲚5yKH`U@P }>Usعk>p ( }~(PL3'CF?DZYˊX?Kny5IQ`}]Rmnn?⼯hft*WèVu/) e-a6@?mhi_>Qzמ{mMc`(X*h\jX}Äj`5dO^NX9G4G m װ1AkvmQ:MjХL4#d?jyAuծ5a6׈-{XfE?|R%]b%W.R7y0"d·Ce@%JŰGrG!>?O_KNj=Z ߙV*h1`' |(e5byPPjcM@'4 e;} NxKL$PO/biÚMDt7XS+@E7N&W'4V]:ڑYyCԭLuG7]Zʙ3_M' fԬ8[qy7KοxLQ;HPgKg]{J;'?7|vt8)gMxcəXdcY_|7;~mkP- |LVm B "GaYgEl߻_%*@naͽX!7D,k"o!Z<+2eSh{* d9)ӀBr[E#σ2@ҭ$q?HSU\2fq?OkkPUlr t*T8 /R*,9fM@tEbzz3̔A!H-XٍТrj,d[qZe;T6"!~!,a#ˆ13:WUOOʚ?[ܜХS0%h!v+.Ibr*;L579#|b1f/S"(R[I6dˉ/ńٵѱ]Ηks_? 857`b/+2N̓"[]ɦ%H sx2zrOڪҥޟR)8.mv쮢'JDn7!14q? ?g v :l]d&Q0lKSD ^L=,r^H.lfո) Z+wToYBBcdn 8A+"Hn Q4e4m+\(Ȱ4=4VtOak魹6~}~b)]~ @DRX+QKNAh\0<\Ʌ73$n Cx#=aN!ήWM ifqwkEj#o31&\C 4͘ !gFo`.[OڋǯPY_u*֨2cFNʘ*r{$ȼ]էqOGpl$9{<q"[PNGy1#VϜ/ZK8>(mdڧl sA飧Įm.SIs :Smhɕ,Ke4HA^@|0 (1Yt9~R.a1r%ɩ52k)H5qY TT2L$f8Z:ƾY_)T3henu\z=BM =trfPT>neIkV&{Fs )&v(\WPU0*FPBblksåȓb(+F.m?LMrL"' J'۪E$ǯ;1_kS^CdL,m,x2Ky)^O؃|췴w.dCˆCg<2,{,~QC$i:T{yxݨ؄A*zΓ9/7oV =mZ|1 Oo5|:yY^ӔE/[3Doo)GVϫP^#|ñ[ PļvDZ 5n"gLEK>5Aћv̏3 iF 4Ĵ6Qς.4Bis?u(ꌑ1bMp#i1:V>T&Eo,n_(!$ *EyTFwgvuȲXt_ܰb'ճ?2OZ>K=3H?rURs'AF| ?1 W=0sœO!GGئoo+Ztdx?&69 i3\ґ8]Q-^7{O! ]9#uP$ri.@兖-ﱻ\]@hƓ@6U3GǰCQ?`HXyV &:ۥi7(C/Gx?N }y-Gdjܱ[ QxBaf!䒹]+u"\ڈZYEFqi HnϱS\Hߘ?r'?֊ߠfo]' H&9t{ u&4eMY~jW/kb?s:tf{ y A%4~kPN_hT-6g,0Ђ % ο!#&igb?L_c,58h#75S@̟hs r&I"m\\2t4!ҹ!~7N&rmV~ -L`dC$./0 }є]xs|_uDS:.f 0a#uutXL{MmrDBb\14R̶`b%6 գr%)z$ aMIv1(1?܇94v䩰jn,}od * .A⎉O.'3l{4PƏ@,I2I|7>pCMw|=PxIqh#5CYIER╭=|4]]iO?E|cBNWp1%!r+\.5'胸{.?gy0"~1c uHGVseQ- ^TB"fS@y:2.p#,Įl^6{ZȚvfH?IMu}bVL۴)$-^K@qo7u79u IA/THmW{֠àeTsǩ٦+ATg 6 ڦ=r':6pA:;q[KvWpsKhnC͉|oG>c~ޣF _@/k3E:5ׄNU-ró,6A@P0p`_ 8EɂqT9XvGb A^񺬊J V2{BiΰEl8mGQiF\Z1Ⱥ]/ ]XӒ ?DލE_˲41jY2TP?JNO҉zA?*s֥ί!<;oS ʴ0{cX#]촕AUR<ʆzI:0֌.%ƦU 4OCҖd, b+쁨$r{.\t~hscV;B-|i}/DCB+y7X)W ua-\F\vW:t{hSU7I;2Uyؤ!Z3r4c*MIe{, n IG_h6vjѻ7.hGZ8gSGR`t؅ a};) UۏSA6md"a9&΁8o~H r=>܃P*۶PbW]=/qiOUՇ>b,WZҝ7{vao+aC̲? D΢ey ^62LЦ@e7ۅ%'\T'nn_0X:BFU2u/<v5⡂ !19Hf'\6PsN8bIê<$YsL5"( n~+>l 5pk>0ۂSE>cb47v3iѿ0z5 JkKVnh?AYTOL[x} c}:.5|Hm ,AjF{(%3L;dFS*o@T*GE_X Դ)om>:=,Փ_UeޥTa>^v a#vm%2kr|9ΫqYK@54ād] "x?k CTYk5rlekHPlU9LTsmX4#Dm4^m}QX,T?/޷g{\sN$-+&X{\p F~ łoZKe Hjwȶ`fx*NÜ7{5/f:i>s8A~S(:pq8tvݾʘN1̖ O5~ FM{ sֈE HUanxq\Ǽ7HU k {| &۴_~rXPjUj?O\`J=~ș!,BLh3sba$M ,C$9{Z;ȁ1m )_B G@uZPX$rF$"_qyYyu'IH^VDuT̰M:c mN(J&/ Pn֮wYeW77q8)8[ϝd#?=b[΀բ$>X *I@j vl91qC{TsAO{:NQB8۾`Zi(d^2=5e-KTAwNI@nw0= s vfY0+uXk#PY}\WIQ͈Qy%7./(,KV8 KKJ6Q K^^_fJ3:Lܢ@YtX쐰?6t88#w];UJk,^S=vÆ$ݿEX7G+=~MgON68^S,/)t3#/^ZpS봆k=ty~wPBPZdRřnl0~%NF9{zt>bYs4$>a>R0JWQ!Xxfx ~ x^5”+Kgr%O5W<+F3h_hB  Su'l'|YqkIIN;[bAφ߲,&s`$5**Rܫ*kQ%7l &q(1]Edu412PLaX\7*GU@˕ѽ9V[Z}4yh)hvҹ9/[B F?9l }6`GJ0LkUFZ,w 4˞; J\"HUE0j0zKSlI4pG}8[nr0ƉYaUA죍k[fs/A4Q  ,'N~;:;kt ^"Z`ҋ j qm&]G8L_Kyg8j#B \qg BݱTmG5*Z-tj_/qTmLsRl,9!rI,$OZ>hVkUδ"*uL\}b) X@h/ϡ0)Ce6ilN^A "fڪ1f"6a[ePtc(y=#} n;1#@D!á[oʇ+IftuT˥M鎖`;&2eS+T\FO!" BÈ@hQIv홹d,liEJ jC6y9~1+@^m=@M ӌIѤ:ѲQzl]z!mb@G[B-v܋' C_B/d>,%p+U 8 FC WP4S e Wcqta$ S%w&L}F@0VrڸthI,HأSKr;(a[?n` pMꒆJDxzh6?}B*Q{zeՌ/rd8="w֗['*_XSJJA2 ׽~P5M\ZZ-|&e,l2~a~tTԔ(1Gl| rT8I-oI.ԅzdT8XydeGP._=g֓O*+ö?]GZ3UZ|RX֕aYrN/…!ݨ?*o|˂/ZUZGV֔ HؒӽTK^ɪ>D*߹xfTB6[C5O<vnw.=zldBs\Cn% k*FQ/;iPg #yE!Bsr 0~ jy@g}[1{0Br&/kpXʟCȓ /]ےS2m0VǤu>LO K#OyzH 2b*x|k@T{s뻩v?N12Qցi4BZg=,eXi.:zVWoK@bX71krKYIRbQ82F\Vej,d4i=8؂ 61MnfP?ZDs'Uk!(ɉ^L!i8+ Rz~K0g~5N XX.Ɓ0x鬏36G;h5  Cz"rBMO;ݗ*?kQ0?<ǵ7Ckx]36 qhi@MǴqreyq<rtݲv>Iǥ>pݝOB % `t(b>VGϨV&eI߾P0*PפM)dxKb4C%7iߡWfY[^a2N_N.~,JUqg]`d΃bRJT[^:yM!&ѩ-@QAuʼnٳfq̫?߭bGs\SΙEYg hcO{b}Y;))2K"d4!C:=eԸ(?;xn6O+JI.*EyJTþ ]WuNkdbv1# O1ϻNU: k CpY_DyI )Ď>Y͉4Sfÿf*l: kX 3k01}G˅ ZK HdP"<[{P//?W2-.|hY^"AsN\w_@di y8GbWSԼF*:n a`+աo7QteHQ&ua.ZM+e'yW=r'=LoevSj 냺E4^;e-ŸuБ߼qխ{ :Xd@urTݛ‹[x%V sK.F `a!{5l);y5V8nK#yqh`hVwփče?$siQ ž63# "흗06ǀ?(as [Z:6߁F^@6$ЈCTbU/S}w3d]0&*=a}x39,0vX>6ȸt =m>@s9pCr? T<-)긡y`KJ?s^BDH>(?UNwf9§]ⰁpRND9IhpaW`s5tŷ/E Z ! ޿+ժxveU+ лXOڽ?, VկE^#B)"aදq 5eb.O,U~ gyhydͤxdz ԫwd*Zp8m9g M+.[PSCӳܦF˨#TثY3' N,e0jĒX^DA ~l- TvD4<4l. wuOJWl@fm8\?8Sp_~CTB7z|<&^%!gR[JlXuf7;پ̍S2ڠ`ỹru2X#~M,P{vlqjx:A^l_h_`|i,6O] [P$yaٯ`}{>68J .|2zϙ# Txߝ+;">hO$.K\gfg}J"׊Cy>OݟW↠ଽs[%_5bXcIcrOZQH0VDߩ!zrZKcy_׵R&6 3E !j${x9ru5w]+$i< .-QIO&HaS(+&¤NxFj^bk3!ih5b}Vs 1c$s(qWE#6KӝAS `AcKs<ɛy S Cdpñ]N#A8kv=1)91%ji@xbXsO*&'G{*!z~A{OL| fh?cq LWͳ7n%/M@ǔpA iEd։'Z(\i3*VnSʩ}]pZBVdSdĔ /H-c;z Zi(GW*&*Pu; lvLXdr X#{p$lJzyPשMޓD9SgMЕϹdž^୵]Bcxi<Xf H38~xj0#|{\-]3޷!|š2+Ԣ(VZϠI֯ &x<6`#z$h$u.Aa"X|aW'Ўf{>̦C2YMˏPTkk1jNCo.OI>Gg5@wY5_ˤSIMϼ*QEO9u0@  _)H<OFg¢]*ɼHS ˢܜ'CaVA(Z!L*̉CD-ᑛj >Rj+㝽\=\ѯ&}+l0*, TH $#&PcFp5!1&ʕ띅~HvDԽn(w~G_RI+s@jrQh_ QSSpDTF~-ݲq FRZN!ʙÑQ^[PJŢ+Y|AZj8YR̽"&!;c$ЮtxK03kN~Ze-4&z6dnC)TGdZכzU({MuV6}kLۚy#FʆO惼`j$\Xk,O‰6-U{QK3^{Q(e~m%uRwH&GW5pѣ?ykI#4'SdF!|mԬ[NjmUdo4"{bs#t|\VĀ~li}8񰂡2\peLGKzVRɎI |X=3$a}&?(xvEeJ LW-=6uACw׏cFVfSl ;ZQJԶDo"p*c7\ xVϿ1nEǶXzHK(P85<*wKq EA]s3qNϩT,/g^ ;>minEr/#+ʻR 2"A[+i m$_(+UYB@5j^](d̚7PfJI ,FhO1צkt[xۈ$^yPra#QVNWY hl g2 _ҽ([BGC6/x OHhXJ ({Zb1p sx>g[8RΫm}2 an7Zms1iǫo_ 8G\+BblMFHj։gpf/v+&`JNBo6fmx'bX.zXuk+ yB:r$9W!CQ]}Qn'_ m#a6(Ef<7[9&Hu0͔U׫jQ4U[Ϸ,ԉ#HLr-dk%9$5֋db&K#_Dr6mtE klO3,XpbD'{ڑoDB&ڄEt4M;[~+GmA%*8X" ֪ax8PdA[u=!同8/ԴGΔwe74-o"TWe`O_@K{l~L6 n#-Y36ŜPrn10ƳN ;4c/y&!j2 1|dm`lW'3 >T ؟v|gsdR#䑪-ψZYF`fev>54 ^߂7t8Ôeƀ' ϙ=9$N9ˣT~(p@ܫ/QQLz#4SId)iSf# <@hT2dMC Ns[ ~ ޭʅ;NE{ } 8GAޱS"NQvCO?-WH-)W","SP@i:4D'#/~1YmwXF̩~=owv5ε~ 1dyB%OptZ0I[48F\,=HHI9qe{ˁf>M d [Ze GqJ88 @قCȋ WbR 24Y7k8dl?rsNl8 5 +>a)yNeÒ\ m3uxFH65=`< Z$nD dx.ۀ:k%C7+scẎb͚!o2X҈Ͳo~IүU_碪zz1ڐK/hWpg?9#=I4Cڸ{|[/aZq4!^KYuQW';i%zٸةPZ7YYn4 Y 79(1Xm-9D( "HaRG^9ͧM)rX8fGFϰ_[Cԡz 7Pd/Sɥ9&.*I8򆹈EƘK% [H"oUu-sNoXzsتT1tE1P؛jGӾ3M7ƎQaܺVaJ=&멾_*j* $eoBx!bQ/!'?jtU^T-rf,_[H`D4t cU}۴I"9]?ۀdC%&]yˆڟHgPSFg{E>B*fg"hhS&G"6PnO5C߸U$C/([7r-xǣeP/B&x<#OI6JӐLxmdfW*\#Td` *6տDXY Q >PTbQ;s)puW_MIF-G3Sݰg~|T XGDRrc$:?z6-?MΑ970|DHy7tx#FҞxhNO}}ͯ)V^+^:8e 8xDmITʿ»+gCΰn\A2^- ;GU>{M/ Ŋ;lޖPN 9~c7*aXIf34ߘ*G =$ :}^#,|>y71%9rܾid!lA]4LZUao7r12 H)p ?xvihl:.v}N{6[O|9s"TB(6W:O͠TdYoEb7zrk r-٢F9gN& F#UпWtݠ WъNYiqw$۷"T($à𰻖P"S{Ê!IJB4o1Rv a ƺ-+IIX2TuH"ﶢ"w7>l)dAjV: 6,y_j L ssS:ƒ;?LV~ն: 4]>è=^Hu8fz)m.=e|ez#ZS>zdav)r[uHYoU , =zsB?<İod-,AЪJknf9Cpi..+EK[qeC 'pPΊMdrp`]/{S6E$:"k;+20yV6Ћr0˹=m~2IbO1NC#2\ݶ{u%@$䌋'm3nYVCz$S5ă)}ǹDOhG})n)]L -g˼t# lWE\ofY@T-Q-:Z(-|ςIg!QP8f,>cmF`}z? n 0|yB^N\3PmI!"ul0]\OVL!-{ g HKn7ѫgA_];q)x@k{o+ 8OEzBJ=|8Z$ f MHf LopЁf/Cs$<\^L^ 1 3`u%,MNfUo|E{iwegenM&R*t[:PP;Fiݠ+x#kuď׾!9VN-p|=@H)K#qW^J`nw>˦1汃E>P>Q8xlPtLQ0=SI1M=ushe5H Qy")˪]:yncN_6@!5)H" &4N_V ӑ]_$A!HsAmĔ;;  *X&&"aL`B8vw- *cU5k ew (htusMUTXb[ɍ󵐤嵚HYG(9ycaޟItCed~?4QM6&ɮ^|DB.$K/c)KC@XI\D^=Fr*sWNn$7n,ABEҟ:fz㋄u$ኦKN1Aqʠ28v.!]#8r/|xԵގ~D77?6{G/(3q9Ҽb@%ԫNЇ@! *MVc0+M;sx]nԹu_}1*')b%XUNgz N*@R0;3tT5h똜[0ɞR%r@&rhw"%(qO[IfO|7ofbMdqV7@j:5#q&J$9!9S76}.~4Kfrg|P /tRr#Wg_^v,6@}x91tm83\sAOdٙeBοAZG;c6퐆ڃmkr euM Ajܡ+] wK-ԘcrYMuԟ^i<]}RV f_x|5?k}Ȕ> LG a([pRg 9]XLiRh:9g\U%0nn+u_zP b")FvtLdUMuA`:-V3 :bulI;=8ATHR%mq:W tCWfvwئ1W/EO}6]\sVNh?b*V✔-Qޛ5ZoH֪ƌ8&bp_JwP ȗhk N:L[VgCO3)Ut$'kF,!e/u|lw;'mAqZgyvF1Z7{E5h˲UA0 (@ܘ? 6qLIHvI7ESS iCZ] 8Jr28gNFUe3[jB*'][ ;Yӓ ZՑX =V3)^?ͼ:vZ|D9/uGϞ j;y Džmo+8‡Q-Xs8^(eK[C7/_վi,~2Qg4u$@bS)<lߒt&ZJ )M^fge( '<^uffV6VBwJPu|:nn9X^KjВ+J%7\ꇎWy3[νCڣh #Ufhta:P*z@${m9p㽫BumS7}C;b8Cxe SdݕB$ҕ \C&3 DSuͧy]IGk!1ܒrF9LA1Ńbq߯9ېy'mP!>fZƑ.jgQ49AlbJY BX'9.;=X GҘRL3uMS-}@ ̰`(H+/J{O\7=e;!.Zd6ODG*eEz̲0 L]P҉j9Jmi.+||.hyWVO^*dwWAOH9́O#􍢶MO9i/ۉum@JObZR{:Tz;Cݏ.NJ }Du@n=o=i^} rۊ,3$fUlm[*g+8@HGڔ'8m܀Fncb~UB=!Lk D'(wިc1[8I 3-v6Un$:bFc4PI,h+hy," S]ؑc8'$vGwV=IT`^(H^:S.meS[s+#Ą>|#UCM#S,-d+me$O dlĐ.ixG"fa)jK]j~$ޘV7@fG}oPdHvB@^^{C9:Bfc}#W?*?N\YB6\:U̓l#*EVԒ2 @m}= j;xmASϩF$\?/>VcA X׽M2)C F2Q֬sct9CQOyYglWs3tbbx HnR yܕIţj75p夷FWO)VMN C5(bbeAuϕPR0G?5IJ&+KIhY=mT ,nkf#Cҳ~n9# aP ^ϴIӬqK`l:Gg4}v^)ਘl.cy2NhWwэ-$=,׃ <ѥ$"]1 䎷]R=}!*v'Mi^`@W|L6vvN}Ibg|H;ʾH d_S^64eRZ4I]mD@KQ%jҌ$KS]!5?{(a J' C N'ztX3Y83' )~MώeCvSnE(,I?pI(Lt$ԔU~n #\Bֽ H$gJKEp܍8ݦ$vz)n؁զlJ] CʡƿE WA@ߵ0N0 S"\X|JS\kNh1xW XNҀAZ#&{<Q7A?'7fŽ0)ݳzC rpċU/\lC{zz"w2YLmgX ^CgsꫛO+@cߑÿ/A9 }4Mv'D6R۲zn7pW >Y&BaY!ȸIGo2+94Urv#%ܼkð6UCD[@K +O(XrV:#.w"MhP `,H/5} ۶lf!4|YA`rXK|4]uҘygoM酹f먢o`p+g`CmqRo hm @j8YgT6#eʬ&kT+ *zQ޺r2j#<PdO8k7cJhwi!Kpb^T)6lf ei,3dn^}@=#VOMMN$Aš+_WA)7B|5n2JAKK!6[n($K;lDwmϋ6: fGkTFRD;$'g.=?Ul=hM~Kb'QG>`# ȏ{BLv$ҧlbuwQu djM}٤i|edvކo^QmW|!4?BmE/w {}rrdP3Sb "j C])wt%|<>o RC/iJ1EףZ޵AM% bmt$ךE݈ߊXJWp|Y49'i҇SJ#-M}v'q% XBV#Awل{H+8{ dS_ai987OQ)_;`Ƅ R'{x;c%W*fufH+OG:qP.z̟f=~h᮲_I3KJOsJ"qJLR'.p(7"WP}r`K@|E4(Kl0]?Rճ n7m#,zKk9U(T8$2f٘6 ,ڢKFm!:j x6:CG.jVY~$K8&& ;vAjEӒ|gNj}L:ef `^Nke'0]zT IQWt^nxgTQc2I3?tce7@_IԜ~7-} bG8zVk esh{x|jxlyI|d8eڅE!UT b-.Џ~)ltabkW;F*{6ws)yջU72dsH:e޴1᫋&(fÝ1VbĻs O&1gC%I9!Kw~EVʗA?GRh<0#pcRlf[5]_r#ۅ٩= oJHmQwu~q\A`ߍ3W@:E ,V]|5 0Lffm)]DZ7Mx"RpS_Ѽ] ͕g,򉫏&WH)d)Mw2|9R/ޣĉwx A.'/#*NPR(zt#::7"e6\ o2;9R)6Ī[ՐM#O];!16m ˼V,?0@ֻ酶z뵬@PwOuqz3X3".n] -"xa$ɨrlQ ęɒl 4IJ04wTpU#haʐ'Elz[&S?q6o8g93i^o{Ju#r byPp#͈MWʄ:XThB Kbmm1m)pF` &TYe1iȟҳYBLpEf '-|^V%Q_˿hͽˎ'P ,B,tP00Vsho"/ˑڅߌDvT0&N}u\at['|aPaZPUFt?^&@&%HI@c s81N}K&O0eYkNSU<re)egP)[cwm Meb杤v V`*b,чMRlJ!T.rˆ&+`*] k.WɥfEx %OExj8// ^U-b8jG6n})S~ Y(L[D;~ח%UU#TtOf3.0)ۿG^t,-45nP0a]ip-f~.;xɝbƈ q^N!id\E% xFZUlAo0kUl70^Bm=t'gO6+NR;|O,, Dܓ6\;_uB~žApAc.s|_^ r fWTwu|pa=@5*:2hO߁)P- |d|dOBRo<%W i1]XC?@Qnľ:vahΪ MB0`:PO8mkïTy},DnF,4$%36Y^_xh,5xG,#7rᢕ8 ?sYcEv|vZ?)1Tپ$a\d imo@u}c{Ecr`d|~dN* ,)%Tފׯ)''Ѽ̉>%s]&V}8Ykgr/I!-Mξ*Lph1e"GKd1M:ҁER42[rD"&e4|C Ev pnУ܁$.~Kplq^[p D'hM[bп .tOO3H2 W$6+)5!$B`*Hcm4u_"蠘BM|:ǣ/WƾHB@Ct#~q0>8E/x!FXV o =a@MHpB!@X9xn݉-ȳq F so{yMK$PbEpHp9Ϯ_DKR=Ͷ+9ib| ZO#JjaKTA<#t9:k^rugKmB( 9N&;`>y` eNVmSR;EseLߔ&SSvֲ5 C6-7p_ze#-5*{횕ksfsv wYZT]2 OkJwHV0$䝩ڹPvxF-P:-io!,"{D)>()U {]*?])D}.t~;kqěZ! 2B7NGu!-BIM7%Ւu!"|aT!mw({ɱAu.֗Gڍ)~Q<]?P7;?Ja; j-+x7dqXRGU"7\c f]!p[$zMG0ČRu-JYMt7hye?]iL,{ Yхv[ZXV4&-~Xvx޼S}67["VbI] ױn>uAt.a,jjȴnr07nr:JpM@"XsUЋr4vKYymm^Q&_ [̈́Ŷ[6py23ѣГ6L@E ĝzNewc(ajwu֏@eE6$E 1i[=pTۨds2D^$2:mND~5cjaOE&u5>,H1ׅt x5'?yX)%ks,ɳWMN,U,?(FnTkR&%-f)]XN%/f=(T2$iC':PtlH\pa-T,5*<*+ Zx+NGff+譊Qjy~Kk?OR1n`iHYƽm&a MI1ⲙPs y[GbV !.bb>SkʬUoQ_jq~A>^ctӰYsk4374<$TwEr?}DK|~BRA& ]xqe 4z//+LP7"$#i v" Ƀk&{yu:6Z0QG0&‌%KvTǬCSdntԲRBlB8gJSFǩނqt;uaB9XL활!4颥 dH{:0ҫ%4v\@b?XhR8uru.(6 kU<J[4rOvKdӆQ,RXB8!#t?3jIY/\MxtSW'`~0u}bG# >Q*чd[vA5D32-xkq|ra'y9 + Yh{汬6q ^=99PNB]23N_ٜ#esyY9KB:6045;4|4^,t)uf}=: 1!lNh50oYS# }(tiUV'3Saj5:i41YlE 3nWAlaFYpޞ}~E*c Bn`p4%Z=/V3mԆF, <˸EpHȗ7>Nn@*XzPT495W㯌F)w'Upp+<,K*~\V#5hXQᱬϛn JCv2[8׮6u|l>OTJpsR3_(9k=>0٧G 7pȮD4:slWH` ?$ԄʞI>X2׉$9w?cHJ ǿͯ7_kɤQR`>0 YZmice/data/employee.rda0000644000176200001440000000043713666252075014427 0ustar liggesusersBZh91AY&SYVsDLDUPD ;ܠ@@ hA4`F `"RM 04s2`0&`0L`DI@h`0L`Q$fSh 4 1=J)Ql $_EKƢ&HYlR_O $ C@RI8XKwݎ11cfiUc c1cc1c $$TywlQIn`RGԺ8m0`[Q%ZSX4aQB1lA"5gk6j]VIU o>Mk5Ιe%HЂ .쁧[aZ5r_CUU1;j'-\mInwmcF)sS-^Nj!lFMLJ2QV*)iXQFa3)z5c ^iIT,z]N,~VB RI#لMT?i{zŎ...cM4ѲohnppcfN}6Nn ꪯܪ%cM4M4/wkgC䪪y-rujrNM4ьiBtwwƝUJМiR)LLIyZ_H 4`mice/data/pattern4.rda0000644000176200001440000000027713666252075014353 0ustar liggesusers]= 0KRAVt n(Hu*m_. Oz^r&q@D$BOb䑏oItAۂCf[ۄ WCK8ŽkSؙ[#*bA9gggq.C:Ɯ ^@K[mice/data/pattern2.rda0000644000176200001440000000027213666252075014344 0ustar liggesusers]; 0oҊ6>Q'wu IWxBݜ{r_}T@d(tUHr[0>XsCSm,Ax2p6]B?bR՘bbbE8׻.y1*ӕos}-?V[mice/data/windspeed.rda0000644000176200001440000001061313666252075014567 0ustar liggesusersBZh91AY&SYn?oB,I*rX$ n_}3xݾ]pws"2L LCMih4z@"(z=Fz45IHid 2$D2P ~zO'==AAQ4=F@ O%T@ 4 @4iFɐB hFbmOSF ѦF@44y BSIaцFf wwj p# 0_njr8m X0^o;WrԈ*pB.)P-D9`י i{zj۹ ؇yM6ͅv\Ъojuq^j-jE&˙ TDRM!Q ]%΍j*vjIyhae,ڻsf<0C6 ,v+)0ݯM[ S\/`E"Oz-,jmyht lT'7{cv.XD0^QU@F 39ԑ##MS;iT)C2#ʊR-i1!JPn+$HQA;>˗D]/Wm6g[oA0OpQ 3$J# Db0#Q$0# L/ɘ.n1lnڔnY=5UHeXUYIP[o{DʿN> {MJ3mB ߭N8kw~MKuz^] /CWL"(,( #,,0QAGoXTP; Y lledd@,¾ʀc Pٯ$`^?7q-dk(7~(|+JP#n4AVʇȢgK PL1RS2"U m+cuL;ҾkUW)P맪KIM*vb%GuDqx}Cɛe.+bF}em㒐dQ #o @ͼoB%ZKi8Ԁ X[yP3 TJ 6YGTUB:lRLJںFQۢ}6LDl$$e2LD"C$a6!9x\ <'I(rV6Z=r{A4OGd"uV8$FD u"=lz,[٫z^SLYeF |sлĘ1RSs(v,ean%1Ƽ!c[; 7D,Hv䒆re g6D\<`- ($6v7  آ0mQlu PWʟq8Ek#PX2폅wvftn8)u󰴟}jqQK{:"&ViC^Knj8@FdcAp4xgu'30 ($jh`b#|M:JGj׉DS210 #ѽCaΣ+xM."c2;t7y -rր4[ڵ At'&ayros#wfYwFCvXXHFMA^An ]RYa1r&Ǝg8E,ta4eN5SIr*0pKBMߛIeqE·70| #(n,JR R0P!l"g[b1C e`acdspw(l9"XC"0:rXHeʠ:"}t8["8:U6Y(vI(< WD$jydlGKn!h 8IK Tf5-l0Nןfl|ǎEwyAxjoG\ MeܒZK,{_5vtBwq`WS^~y!bnϾt;3ldRB ȢQr5ukGr pHh\ron7f"9iEM5n(v|` kk Z@N6ׁ1qgǀO, "q<FDpQ*_fE.ϱc5Vt+TPhq?};p&tepu!&.)]< ̴o5m' MLDQkT$J =:$IUռco cɇ`0)l#qk;`M 7m\ğlSBƕN3~ͫ$z{@ ^O]{+$9(6c t~X)JFjUX#HI4m^D͛͝ˍ&IARS7h[0"3[&2Ej&bX\3wx@a0"`(38&feeVLۿ]BBamice/data/mnar_demo_data.rda0000644000176200001440000001016113666252233015531 0ustar liggesusersBZh91AY&SY: - }}»]]tiMLhO(ڦGf)Mkrqj"0xhI'`|>d@ _ᇺ@PAH 0I ADA b) E) DGs|3dà 1Ad A*ȡ ,RXI_\Jb2j !Pؐ25?E㐘ץICADL%8wO!k~$Zx)]kp `vwU*€&P9B!y7n3"n2&m7trZSygHRKQ7la \@{D'a05S`F=v~pgY{ +dw!akW0aZF.DPcmAl(E hTQ~CL{y~A.8}w 3XPxh 8]8ܤ_i yFsO6#g[^ʷ3&u`(9ɯsǞqaN $/>w#PIq 9&-q"mq)"nԙu\mŠBTWb{w5/^Ķ4 ѯ]=kt' ے\KtcdHBR>`hL:R52DEY' ݄E@9>)imnzG!Dcgp~!?jaX !x4ǤCIG~NR8({:(@j|?42^͵[;?VC~=9TqOS?iτN'bVmV4'(>y흁 dS"}gQ~R5Yj3+!-le)Pe §moOfH[4t;7?/~<"jb3Gkb+B#T~ Л mC$9cb ehs:(@ɐ?V}B wX0aA-4')E5g-C!PtU_zm լbFska3p H Mʗ6kc 5$/hP`uUjژuJm!תeaңZ5 }ɼQG@G$%M}>e {J6AfŘXǬn>XE& SXU?g?||cx4SY7㥼Zi=Ҫ?/^{;[v܈?+wDp@(<*پZ!~C=ۧ艆MZY"q߄41f ; &H'|$d*ёY.fℎP5@}Wf0Q/Kpx\d6?HQEV5Y2R>[]]oym9]RRŨx.|kFʕ^ё*Ʉ]_=Q_]T 'L4uZʱxGXV(U暷;q%dW/] ;2F9GUf}Ŗ:9FA7 SYAk/0*B扢'C/I$^UeJX%Wك EIB8ъѣ["C.p tnZmice/man/0000755000176200001440000000000014437176012011747 5ustar liggesusersmice/man/cc.Rd0000644000176200001440000000161714330031606012617 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cc.R \name{cc} \alias{cc} \title{Select complete cases} \usage{ cc(x) } \arguments{ \item{x}{An \code{R} object. Methods are available for classes \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} could be a vector.} } \value{ A \code{vector}, \code{matrix} or \code{data.frame} containing the data of the complete cases. } \description{ Extracts the complete cases, also known as \emph{listwise deletion}. \code{cc(x)} is similar to \code{na.omit(x)}, but returns an object of the same class as the input data. Dimensions are not dropped. For extracting incomplete cases, use \code{\link{ici}}. } \examples{ # cc(nhanes) # get the 13 complete cases # cc(nhanes$bmi) # extract complete bmi } \seealso{ \code{\link{na.omit}}, \code{\link{cci}}, \code{\link{ici}} } \author{ Stef van Buuren, 2017. } \keyword{univar} mice/man/as.mitml.result.Rd0000644000176200001440000000116014330031606015264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as.R \name{as.mitml.result} \alias{as.mitml.result} \title{Converts into a \code{mitml.result} object} \usage{ as.mitml.result(x) } \arguments{ \item{x}{An object of class \code{mira}} } \value{ An S3 object of class \code{mitml.result}, a list containing $m$ fitted analysis objects. } \description{ The \code{as.mitml.result()} function takes the results of repeated complete-data analysis stored as a list, and turns it into an object of class \code{mitml.result}. } \seealso{ \code{\link[mitml]{with.mitml.list}} } \author{ Stef van Buuren } mice/man/mice.impute.mpmm.Rd0000644000176200001440000000613014347334322015422 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.mpmm.R \name{mice.impute.mpmm} \alias{mice.impute.mpmm} \alias{mpmm} \title{Imputation by multivariate predictive mean matching} \usage{ mice.impute.mpmm(data, format = "imputes", ...) } \arguments{ \item{data}{matrix with exactly two missing data patterns} \item{format}{A character vector specifying the type of object that should be returned. The default is \code{format = "imputes"}.} \item{...}{Other named arguments.} } \value{ A matrix with imputed data, which has \code{ncol(y)} columns and \code{sum(wy)} rows. } \description{ Imputes multivariate incomplete data among which there are specific relations, for instance, polynomials, interactions, range restrictions and sum scores. } \details{ This function implements the predictive mean matching and applies canonical regression analysis to select donors fora set of missing variables. In general, canonical regressionanalysis looks for a linear combination of covariates that predicts a linear combination of outcomes (a set of missing variables) optimally in a least-square sense (Israels, 1987). The predicted value of the linear combination of the set of missing variables would be applied to perform predictive mean matching. } \note{ The function requires variables in the block have the same missingness pattern. If there are more than one missingness pattern, the function will return a warning. } \examples{ # simulate data beta2 <- beta1 <- .5 x <- rnorm(1000) e <- rnorm(1000, 0, 1) y <- beta1 * x + beta2 * x^2 + e dat <- data.frame(y = y, x = x, x2 = x^2) m <- as.logical(rbinom(1000, 1, 0.25)) dat[m, c("x", "x2")] <- NA # impute blk <- list("y", c("x", "x2")) meth <- c("", "mpmm") imp <- mice(dat, blocks = blk, method = meth, print = FALSE, m = 2, maxit = 2) # analyse and check summary(pool(with(imp, lm(y ~ x + x2)))) with(dat, plot(x, x2, col = mdc(1))) with(complete(imp), points(x[m], x2[m], col = mdc(2))) } \seealso{ \code{\link{mice.impute.pmm}} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-knowledge.html#sec:quadratic}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Mingyang Cai and Gerko Vink } \concept{univariate imputation functions} \keyword{datagen} mice/man/nimp.Rd0000644000176200001440000000354114334522175013205 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nimp.R \name{nimp} \alias{nimp} \title{Number of imputations per block} \usage{ nimp(where, blocks = make.blocks(where)) } \arguments{ \item{where}{A data frame or matrix with logicals of the same dimensions as \code{data} indicating where in the data the imputations should be created. The default, \code{where = is.na(data)}, specifies that the missing data should be imputed. The \code{where} argument may be used to overimpute observed data, or to skip imputations for selected missing values. Note: Imputation methods that generate imptutations outside of \code{mice}, like \code{mice.impute.panImpute()} may depend on a complete predictor space. In that case, a custom \code{where} matrix can not be specified.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} } \value{ A numeric vector of length \code{length(blocks)} containing the number of cells that need to be imputed within a block. } \description{ Calculates the number of cells within a block for which imputation is requested. } \examples{ where <- is.na(nhanes) # standard FCS nimp(where) # user-defined blocks nimp(where, blocks = name.blocks(list(c("bmi", "hyp"), "age", "chl"))) } \seealso{ \code{\link{mice}} } mice/man/md.pattern.Rd0000644000176200001440000000361214330031647014310 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/md.pattern.R \name{md.pattern} \alias{md.pattern} \title{Missing data pattern} \usage{ md.pattern(x, plot = TRUE, rotate.names = FALSE) } \arguments{ \item{x}{A data frame or a matrix containing the incomplete data. Missing values are coded as NA's.} \item{plot}{Should the missing data pattern be made into a plot. Default is `plot = TRUE`.} \item{rotate.names}{Whether the variable names in the plot should be placed horizontally or vertically. Default is `rotate.names = FALSE`.} } \value{ A matrix with \code{ncol(x)+1} columns, in which each row corresponds to a missing data pattern (1=observed, 0=missing). Rows and columns are sorted in increasing amounts of missing information. The last column and row contain row and column counts, respectively. } \description{ Display missing-data patterns. } \details{ This function is useful for investigating any structure of missing observations in the data. In specific case, the missing data pattern could be (nearly) monotone. Monotonicity can be used to simplify the imputation model. See Schafer (1997) for details. Also, the missing pattern could suggest which variables could potentially be useful for imputation of missing entries. } \examples{ md.pattern(nhanes) # age hyp bmi chl # 13 1 1 1 1 0 # 1 1 1 0 1 1 # 3 1 1 1 0 1 # 1 1 0 0 1 2 # 7 1 0 0 0 3 # 0 8 9 10 27 } \references{ Schafer, J.L. (1997), Analysis of multivariate incomplete data. London: Chapman&Hall. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \author{ Gerko Vink, 2018, based on an earlier version of the same function by Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{univar} mice/man/mice.mids.Rd0000644000176200001440000000433214330031647014104 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.mids.R \name{mice.mids} \alias{mice.mids} \title{Multivariate Imputation by Chained Equations (Iteration Step)} \usage{ mice.mids(obj, newdata = NULL, maxit = 1, printFlag = TRUE, ...) } \arguments{ \item{obj}{An object of class \code{mids}, typically produces by a previous call to \code{mice()} or \code{mice.mids()}} \item{newdata}{An optional \code{data.frame} for which multiple imputations are generated according to the model in \code{obj}.} \item{maxit}{The number of additional Gibbs sampling iterations.} \item{printFlag}{A Boolean flag. If \code{TRUE}, diagnostic information during the Gibbs sampling iterations will be written to the command window. The default is \code{TRUE}.} \item{...}{Named arguments that are passed down to the univariate imputation functions.} } \description{ Takes a \code{mids} object, and produces a new object of class \code{mids}. } \details{ This function enables the user to split up the computations of the Gibbs sampler into smaller parts. This is useful for the following reasons: \itemize{ \item RAM memory may become easily exhausted if the number of iterations is large. Returning to prompt/session level may alleviate these problems. \item The user can compute customized convergence statistics at specific points, e.g. after each iteration, for monitoring convergence. - For computing a 'few extra iterations'. } Note: The imputation model itself is specified in the \code{mice()} function and cannot be changed with \code{mice.mids}. The state of the random generator is saved with the \code{mids} object. } \examples{ imp1 <- mice(nhanes, maxit = 1, seed = 123) imp2 <- mice.mids(imp1) # yields the same result as imp <- mice(nhanes, maxit = 2, seed = 123) # verification identical(imp$imp, imp2$imp) # } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{complete}}, \code{\link{mice}}, \code{\link{set.seed}}, \code{\link[=mids-class]{mids}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{iteration} mice/man/estimice.Rd0000644000176200001440000000367114330031606014036 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.R \name{estimice} \alias{estimice} \title{Computes least squares parameters} \usage{ estimice(x, y, ls.meth = "qr", ridge = 1e-05, ...) } \arguments{ \item{x}{Matrix (\code{n} x \code{p}) of complete covariates.} \item{y}{Incomplete data vector of length \code{n}} \item{ls.meth}{the method to use for obtaining the least squares estimates. By default parameters are drawn by means of QR decomposition.} \item{ridge}{A small numerical value specifying the size of the ridge used. The default value \code{ridge = 1e-05} represents a compromise between stability and unbiasedness. Decrease \code{ridge} if the data contain many junk variables. Increase \code{ridge} for highly collinear data.} \item{...}{Other named arguments.} } \value{ A \code{list} containing components \code{c} (least squares estimate), \code{r} (residuals), \code{v} (variance/covariance matrix) and \code{df} (degrees of freedom). } \description{ This function computes least squares estimates, variance/covariance matrices, residuals and degrees of freedom according to ridge regression, QR decomposition or Singular Value Decomposition. This function is internally called by .norm.draw(), but can be called by any user-specified imputation function. } \details{ When calculating the inverse of the crossproduct of the predictor matrix, problems may arise. For example, taking the inverse is not possible when the predictor matrix is rank deficient, or when the estimation problem is computationally singular. This function detects such error cases and automatically falls back to adding a ridge penalty to the diagonal of the crossproduct to allow for proper calculation of the inverse. } \note{ This functions adds a star to variable names in the mice iteration history to signal that a ridge penalty was added. In that case, it also adds an entry to \code{loggedEvents}. } \author{ Gerko Vink, 2018 } mice/man/fix.coef.Rd0000644000176200001440000000357114330031606013734 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fix.coef.R \name{fix.coef} \alias{fix.coef} \title{Fix coefficients and update model} \usage{ fix.coef(model, beta = NULL) } \arguments{ \item{model}{An R model, e.g., produced by \code{lm} or \code{glm}} \item{beta}{A numeric vector with \code{length(coef)} model coefficients. If the vector is not named, the coefficients should be given in the same order as in \code{coef(model)}. If the vector is named, the procedure attempts to match on names.} } \value{ An updated R model object } \description{ Refits a model with a specified set of coefficients. } \details{ The function calculates the linear predictor using the new coefficients, and reformulates the model using the \code{offset} argument. The linear predictor is called \code{offset}, and its coefficient will be \code{1} by definition. The new model only fits the intercept, which should be \code{0} if we set \code{beta = coef(model)}. } \examples{ model0 <- lm(Volume ~ Girth + Height, data = trees) formula(model0) coef(model0) deviance(model0) # refit same model model1 <- fix.coef(model0) formula(model1) coef(model1) deviance(model1) # change the beta's model2 <- fix.coef(model0, beta = c(-50, 5, 1)) coef(model2) deviance(model2) # compare predictions plot(predict(model0), predict(model1)) abline(0, 1) plot(predict(model0), predict(model2)) abline(0, 1) # compare proportion explained variance cor(predict(model0), predict(model0) + residuals(model0))^2 cor(predict(model1), predict(model1) + residuals(model1))^2 cor(predict(model2), predict(model2) + residuals(model2))^2 # extract offset from constrained model summary(model2$offset) # it also works with factors and missing data model0 <- lm(bmi ~ age + hyp + chl, data = nhanes2) model1 <- fix.coef(model0) model2 <- fix.coef(model0, beta = c(15, -8, -8, 2, 0.2)) } \author{ Stef van Buuren, 2018 } mice/man/mice.impute.2l.norm.Rd0000644000176200001440000000576014330031647015747 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2l.norm.R \name{mice.impute.2l.norm} \alias{mice.impute.2l.norm} \title{Imputation by a two-level normal model} \usage{ mice.impute.2l.norm(y, ry, x, type, wy = NULL, intercept = TRUE, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. Random variables are identified by a '2'. The class variable (only one is allowed) is coded as '-2'. Random variables also include the fixed effect.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{intercept}{Logical determining whether the intercept is automatically added.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using a two-level normal model } \details{ Implements the Gibbs sampler for the linear multilevel model with heterogeneous with-class variance (Kasim and Raudenbush, 1998). Imputations are drawn as an extra step to the algorithm. For simulation work see Van Buuren (2011). The random intercept is automatically added in \code{mice.impute.2L.norm()}. A model within a random intercept can be specified by \code{mice(..., intercept = FALSE)}. } \note{ Added June 25, 2012: The currently implemented algorithm does not handle predictors that are specified as fixed effects (type=1). When using \code{mice.impute.2l.norm()}, the current advice is to specify all predictors as random effects (type=2). Warning: The assumption of heterogeneous variances requires that in every class at least one observation has a response in \code{y}. } \references{ Kasim RM, Raudenbush SW. (1998). Application of Gibbs sampling to nested variance components models with heterogeneous within-group variance. Journal of Educational and Behavioral Statistics, 23(2), 93--116. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Van Buuren, S. (2011) Multiple imputation of multilevel data. In Hox, J.J. and and Roberts, J.K. (Eds.), \emph{The Handbook of Advanced Multilevel Analysis}, Chapter 10, pp. 173--196. Milton Park, UK: Routledge. } \seealso{ Other univariate-2l: \code{\link{mice.impute.2l.bin}()}, \code{\link{mice.impute.2l.lmer}()}, \code{\link{mice.impute.2l.pan}()} } \author{ Roel de Jong, 2008 } \concept{univariate-2l} \keyword{datagen} mice/man/toenail2.Rd0000644000176200001440000000412214330031606013741 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/toenail2.R \docType{data} \name{toenail2} \alias{toenail2} \title{Toenail data} \format{ A data frame with 1908 observations on the following 5 variables: \describe{ \item{\code{patientID}}{a numeric vector giving the ID of patient} \item{\code{outcome}}{a factor with 2 levels giving the response} \item{\code{treatment}}{a factor with 2 levels giving the treatment group} \item{\code{time}}{a numeric vector giving the time of the visit (not exactly monthly intervals hence not round numbers)} \item{\code{visit}}{an integer giving the number of the visit} } } \source{ De Backer, M., De Vroey, C., Lesaffre, E., Scheys, I., and De Keyser, P. (1998). Twelve weeks of continuous oral therapy for toenail onychomycosis caused by dermatophytes: A double-blind comparative trial of terbinafine 250 mg/day versus itraconazole 200 mg/day. Journal of the American Academy of Dermatology, 38, 57-63. } \description{ The toenail data come from a Multicenter study comparing two oral treatments for toenail infection. Patients were evaluated for the degree of separation of the nail. Patients were randomized into two treatments and were followed over seven visits - four in the first year and yearly thereafter. The patients have not been treated prior to the first visit so this should be regarded as the baseline. } \details{ Apart from formatting, this dataset is identical to \code{toenail}. The formatting is taken identical to \code{data("toenail", package = "HSAUR3")}. } \references{ Lesaffre, E. and Spiessens, B. (2001). On the effect of the number of quadrature points in a logistic random-effects model: An example. Journal of the Royal Statistical Society, Series C, 50, 325-335. G. Fitzmaurice, N. Laird and J. Ware (2004) Applied Longitudinal Analysis, Wiley and Sons, New York, USA. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-catoutcome.html#example}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{toenail}} } \keyword{datasets} mice/man/appendbreak.Rd0000644000176200001440000000207714330031606014507 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/auxiliary.R \name{appendbreak} \alias{appendbreak} \title{Appends specified break to the data} \usage{ appendbreak(data, brk, warp.model = warp.model, id = NULL, typ = "pred") } \arguments{ \item{data}{A data frame in the long long format} \item{brk}{A vector of break ages} \item{warp.model}{A time warping model} \item{id}{The subject identifier} \item{typ}{Label to signal that this is a newly added observation} } \value{ A long data frame with additional rows for the break ages } \description{ A custom function to insert rows in long data with new pseudo-observations that are being done on the specified break ages. There should be a column called \code{first} in \code{data} with logical data that codes whether the current row is the first for subject \code{id}. Furthermore, the function assumes that columns \code{age}, \code{occ}, \code{hgt.z}, \code{wgt.z} and \code{bmi.z} are available. This function is used on the \code{tbc} data in FIMD chapter 9. Check that out to see it in action. } mice/man/make.visitSequence.Rd0000644000176200001440000000252214330031606015771 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/visitSequence.R \name{make.visitSequence} \alias{make.visitSequence} \title{Creates a \code{visitSequence} argument} \usage{ make.visitSequence(data = NULL, blocks = NULL) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} } \value{ Vector containing block names } \description{ This helper function creates a valid \code{visitSequence}. The \code{visitSequence} is an argument to the \code{mice} function that specifies the sequence in which blocks are imputed. } \examples{ make.visitSequence(nhanes) } \seealso{ \code{\link{mice}} } mice/man/print.mads.Rd0000644000176200001440000000065213666252075014327 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print.R \name{print.mads} \alias{print.mads} \title{Print a \code{mads} object} \usage{ \method{print}{mads}(x, ...) } \arguments{ \item{x}{Object of class \code{mads}} \item{...}{Other parameters passed down to \code{print.default()}} } \value{ \code{NULL} } \description{ Print a \code{mads} object } \seealso{ \code{\link[=mads-class]{mads}} } mice/man/as.mids.Rd0000644000176200001440000000742514334522175013605 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as.R \name{as.mids} \alias{as.mids} \title{Converts an imputed dataset (long format) into a \code{mids} object} \usage{ as.mids(long, where = NULL, .imp = ".imp", .id = ".id") } \arguments{ \item{long}{A multiply imputed data set in long format, for example produced by a call to \code{complete(..., action = 'long', include = TRUE)}, or by other software.} \item{where}{A data frame or matrix with logicals of the same dimensions as \code{data} indicating where in the data the imputations should be created. The default, \code{where = is.na(data)}, specifies that the missing data should be imputed. The \code{where} argument may be used to overimpute observed data, or to skip imputations for selected missing values. Note: Imputation methods that generate imptutations outside of \code{mice}, like \code{mice.impute.panImpute()} may depend on a complete predictor space. In that case, a custom \code{where} matrix can not be specified.} \item{.imp}{An optional column number or column name in \code{long}, indicating the imputation index. The values are assumed to be consecutive integers between 0 and \code{m}. Values \code{1} through \code{m} correspond to the imputation index, value \code{0} indicates the original data (with missings). By default, the procedure will search for a variable named \code{".imp"}.} \item{.id}{An optional column number or column name in \code{long}, indicating the subject identification. If not specified, then the function searches for a variable named \code{".id"}. If this variable is found, the values in the column will define the row names in the \code{data} element of the resulting \code{mids} object.} } \value{ An object of class \code{mids} } \description{ This function converts imputed data stored in long format into an object of class \code{mids}. The original incomplete dataset needs to be available so that we know where the missing data are. The function is useful to convert back operations applied to the imputed data back in a \code{mids} object. It may also be used to store multiply imputed data sets from other software into the format used by \code{mice}. } \note{ The function expects the input data \code{long} to be sorted by imputation number (variable \code{".imp"} by default), and in the same sequence within each imputation block. } \examples{ # impute the nhanes dataset imp <- mice(nhanes, print = FALSE) # extract the data in long format X <- complete(imp, action = "long", include = TRUE) # create dataset with .imp variable as numeric X2 <- X # nhanes example without .id test1 <- as.mids(X) is.mids(test1) identical(complete(test1, action = "long", include = TRUE), X) # nhanes example without .id where .imp is numeric test2 <- as.mids(X2) is.mids(test2) identical(complete(test2, action = "long", include = TRUE), X) # nhanes example, where we explicitly specify .id as column 2 test3 <- as.mids(X, .id = ".id") is.mids(test3) identical(complete(test3, action = "long", include = TRUE), X) # nhanes example with .id where .imp is numeric test4 <- as.mids(X2, .id = 2) is.mids(test4) identical(complete(test4, action = "long", include = TRUE), X) # example without an .id variable # variable .id not preserved X3 <- X[, -2] test5 <- as.mids(X3) is.mids(test5) identical(complete(test5, action = "long", include = TRUE)[, -2], X[, -2]) # as() syntax has fewer options test7 <- as(X, "mids") test8 <- as(X2, "mids") test9 <- as(X2[, -2], "mids") rev <- ncol(X):1 test10 <- as(X[, rev], "mids") # where argument copies also observed data into $imp element where <- matrix(TRUE, nrow = nrow(nhanes), ncol = ncol(nhanes)) colnames(where) <- colnames(nhanes) test11 <- as.mids(X, where = where) identical(complete(test11, action = "long", include = TRUE), X) } \author{ Gerko Vink } \keyword{mids} mice/man/anova.Rd0000644000176200001440000000116214330031606013331 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/anova.R \name{anova.mira} \alias{anova.mira} \title{Compare several nested models} \usage{ \method{anova}{mira}(object, ..., method = "D1", use = "wald") } \arguments{ \item{object}{Two or more objects of class \code{mira}} \item{...}{Other parameters passed down to \code{D1()}, \code{D2()}, \code{D3()} and \code{mitml::testModels}.} \item{method}{Either \code{"D1"}, \code{"D2"} or \code{"D3"}} \item{use}{An character indicating the test statistic} } \value{ Object of class \code{mice.anova} } \description{ Compare several nested models } mice/man/pops.Rd0000644000176200001440000000467114330031606013216 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pops.R \docType{data} \name{pops} \alias{pops} \alias{pops.pred} \title{Project on preterm and small for gestational age infants (POPS)} \format{ \code{pops} is a data frame with 959 rows and 86 columns. \code{pops.pred} is the 86 by 86 binary predictor matrix used for specifying the multiple imputation model. } \source{ Hille, E. T. M., Elbertse, L., Bennebroek Gravenhorst, J., Brand, R., Verloove-Vanhorick, S. P. (2005). Nonresponse bias in a follow-up study of 19-year-old adolescents born as preterm infants. Pediatrics, 116(5):662666. Hille, E. T. M., Weisglas-Kuperus, N., Van Goudoever, J. B., Jacobusse, G. W., Ens-Dokkum, M. H., De Groot, L., Wit, J. M., Geven, W. B., Kok, J. H., De Kleine, M. J. K., Kollee, L. A. A., Mulder, A. L. M., Van Straaten, H. L. M., De Vries, L. S., Van Weissenbruch, M. M., Verloove-Vanhorick, S. P. (2007). Functional outcomes and participation in young adulthood for very preterm and very low birth weight infants: The Dutch project on preterm and small for gestational age infants at 19 years of age. Pediatrics, 120(3):587595. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-selective.html#pops-study-19-years-follow-up}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Subset of data from the POPS study, a national, prospective study on preterm children, including all liveborn infants <32 weeks gestational age and/or <1500 g from 1983 (n = 1338). } \details{ The data set concerns of subset of 959 children that survived up to the age of 19 years. Hille et al (2005) divided the 959 survivors into three groups: Full responders (examined at an outpatient clinic and completed the questionnaires, n = 596), postal responders (only completed the mailed questionnaires, n = 109), non-responders (did not respond to any of the mailed requests or telephone calls, or could not be traced, n = 254). Compared to the postal and non-responders, the full response group consists of more girls, contains more Dutch children, has higher educational and social economic levels and has fewer handicaps. The responders form a highly selective subgroup in the total cohort. Multiple imputation of this data set has been described in Hille et al (2007) and Van Buuren (2012), chapter 8. } \note{ This dataset is not part of \code{mice}. } \examples{ pops <- data(pops) } \keyword{datasets} mice/man/make.method.Rd0000644000176200001440000000500414334522175014432 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/method.R \name{make.method} \alias{make.method} \title{Creates a \code{method} argument} \usage{ make.method( data, where = make.where(data), blocks = make.blocks(data), defaultMethod = c("pmm", "logreg", "polyreg", "polr") ) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} \item{where}{A data frame or matrix with logicals of the same dimensions as \code{data} indicating where in the data the imputations should be created. The default, \code{where = is.na(data)}, specifies that the missing data should be imputed. The \code{where} argument may be used to overimpute observed data, or to skip imputations for selected missing values. Note: Imputation methods that generate imptutations outside of \code{mice}, like \code{mice.impute.panImpute()} may depend on a complete predictor space. In that case, a custom \code{where} matrix can not be specified.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} \item{defaultMethod}{A vector of length 4 containing the default imputation methods for 1) numeric data, 2) factor data with 2 levels, 3) factor data with > 2 unordered levels, and 4) factor data with > 2 ordered levels. By default, the method uses \code{pmm}, predictive mean matching (numeric data) \code{logreg}, logistic regression imputation (binary data, factor with 2 levels) \code{polyreg}, polytomous regression imputation for unordered categorical data (factor > 2 levels) \code{polr}, proportional odds model for (ordered, > 2 levels).} } \value{ Vector of \code{length(blocks)} element with method names } \description{ This helper function creates a valid \code{method} vector. The \code{method} vector is an argument to the \code{mice} function that specifies the method for each block. } \examples{ make.method(nhanes2) } \seealso{ \code{\link{mice}} } mice/man/make.blocks.Rd0000644000176200001440000000550114330031606014417 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/blocks.R \name{make.blocks} \alias{make.blocks} \title{Creates a \code{blocks} argument} \usage{ make.blocks( data, partition = c("scatter", "collect", "void"), calltype = "type" ) } \arguments{ \item{data}{A \code{data.frame}, character vector with variable names, or \code{list} with variable names.} \item{partition}{A character vector of length 1 used to assign variables to blocks when \code{data} is a \code{data.frame}. Value \code{"scatter"} (default) will assign each column to it own block. Value \code{"collect"} assigns all variables to one block, whereas \code{"void"} produces an empty list.} \item{calltype}{A character vector of \code{length(block)} elements that indicates how the imputation model is specified. If \code{calltype = "type"} (the default), the underlying imputation model is called by means of the \code{type} argument. The \code{type} argument for block \code{h} is equivalent to row \code{h} in the \code{predictorMatrix}. The alternative is \code{calltype = "formula"}. This will pass \code{formulas[[h]]} to the underlying imputation function for block \code{h}, together with the current data. The \code{calltype} of a block is set automatically during initialization. Where a choice is possible, calltype \code{"formula"} is preferred over \code{"type"} since this is more flexible and extendable. However, what precisely happens depends also on the capabilities of the imputation function that is called.} } \value{ A named list of character vectors with variables names. } \description{ This helper function generates a list of the type needed for \code{blocks} argument in the \code{[=mice]{mice}} function. } \details{ Choices \code{"scatter"} and \code{"collect"} represent to two extreme scenarios for assigning variables to imputation blocks. Use \code{"scatter"} to create an imputation model based on \emph{fully conditionally specification} (FCS). Use \code{"collect"} to gather all variables to be imputed by a \emph{joint model} (JM). Scenario's in-between these two extremes represent \emph{hybrid} imputation models that combine FCS and JM. Any variable not listed in will not be imputed. Specification \code{"void"} represents the extreme scenario that skips imputation of all variables. A variable may be a member of multiple blocks. The variable will be re-imputed in each block, so the final imputations for variable will come from the last block that was executed. This scenario may be useful where the same complete background factors appear in multiple imputation blocks. A variable may appear multiple times within a given block. If a univariate imputation model is applied to such a block, then the variable is re-imputed each time as it appears in the block. } \examples{ make.blocks(nhanes) make.blocks(c("age", "sex", "edu")) } mice/man/mice.Rd0000644000176200001440000005431314334522175013162 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice-package.R, R/mice.R \docType{package} \name{mice} \alias{mice} \title{\pkg{mice}: Multivariate Imputation by Chained Equations} \usage{ mice( data, m = 5, method = NULL, predictorMatrix, ignore = NULL, where = NULL, blocks, visitSequence = NULL, formulas, blots = NULL, post = NULL, defaultMethod = c("pmm", "logreg", "polyreg", "polr"), maxit = 5, printFlag = TRUE, seed = NA, data.init = NULL, ... ) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} \item{m}{Number of multiple imputations. The default is \code{m=5}.} \item{method}{Can be either a single string, or a vector of strings with length \code{length(blocks)}, specifying the imputation method to be used for each column in data. If specified as a single string, the same method will be used for all blocks. The default imputation method (when no argument is specified) depends on the measurement level of the target column, as regulated by the \code{defaultMethod} argument. Columns that need not be imputed have the empty method \code{""}. See details.} \item{predictorMatrix}{A numeric matrix of \code{length(blocks)} rows and \code{ncol(data)} columns, containing 0/1 data specifying the set of predictors to be used for each target column. Each row corresponds to a variable block, i.e., a set of variables to be imputed. A value of \code{1} means that the column variable is used as a predictor for the target block (in the rows). By default, the \code{predictorMatrix} is a square matrix of \code{ncol(data)} rows and columns with all 1's, except for the diagonal. Note: For two-level imputation models (which have \code{"2l"} in their names) other codes (e.g, \code{2} or \code{-2}) are also allowed.} \item{ignore}{A logical vector of \code{nrow(data)} elements indicating which rows are ignored when creating the imputation model. The default \code{NULL} includes all rows that have an observed value of the variable to imputed. Rows with \code{ignore} set to \code{TRUE} do not influence the parameters of the imputation model, but are still imputed. We may use the \code{ignore} argument to split \code{data} into a training set (on which the imputation model is built) and a test set (that does not influence the imputation model estimates). Note: Multivariate imputation methods, like \code{mice.impute.jomoImpute()} or \code{mice.impute.panImpute()}, do not honour the \code{ignore} argument.} \item{where}{A data frame or matrix with logicals of the same dimensions as \code{data} indicating where in the data the imputations should be created. The default, \code{where = is.na(data)}, specifies that the missing data should be imputed. The \code{where} argument may be used to overimpute observed data, or to skip imputations for selected missing values. Note: Imputation methods that generate imptutations outside of \code{mice}, like \code{mice.impute.panImpute()} may depend on a complete predictor space. In that case, a custom \code{where} matrix can not be specified.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} \item{visitSequence}{A vector of block names of arbitrary length, specifying the sequence of blocks that are imputed during one iteration of the Gibbs sampler. A block is a collection of variables. All variables that are members of the same block are imputed when the block is visited. A variable that is a member of multiple blocks is re-imputed within the same iteration. The default \code{visitSequence = "roman"} visits the blocks (left to right) in the order in which they appear in \code{blocks}. One may also use one of the following keywords: \code{"arabic"} (right to left), \code{"monotone"} (ordered low to high proportion of missing data) and \code{"revmonotone"} (reverse of monotone). \emph{Special case}: If you specify both \code{visitSequence = "monotone"} and \code{maxit = 1}, then the procedure will edit the \code{predictorMatrix} to conform to the monotone pattern. Realize that convergence in one iteration is only guaranteed if the missing data pattern is actually monotone. The procedure does not check this.} \item{formulas}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names. The \code{formulas} argument is an alternative to the \code{predictorMatrix} argument that allows for more flexibility in specifying imputation models, e.g., for specifying interaction terms.} \item{blots}{A named \code{list} of \code{alist}'s that can be used to pass down arguments to lower level imputation function. The entries of element \code{blots[[blockname]]} are passed down to the function called for block \code{blockname}.} \item{post}{A vector of strings with length \code{ncol(data)} specifying expressions as strings. Each string is parsed and executed within the \code{sampler()} function to post-process imputed values during the iterations. The default is a vector of empty strings, indicating no post-processing. Multivariate (block) imputation methods ignore the \code{post} parameter.} \item{defaultMethod}{A vector of length 4 containing the default imputation methods for 1) numeric data, 2) factor data with 2 levels, 3) factor data with > 2 unordered levels, and 4) factor data with > 2 ordered levels. By default, the method uses \code{pmm}, predictive mean matching (numeric data) \code{logreg}, logistic regression imputation (binary data, factor with 2 levels) \code{polyreg}, polytomous regression imputation for unordered categorical data (factor > 2 levels) \code{polr}, proportional odds model for (ordered, > 2 levels).} \item{maxit}{A scalar giving the number of iterations. The default is 5.} \item{printFlag}{If \code{TRUE}, \code{mice} will print history on console. Use \code{print=FALSE} for silent computation.} \item{seed}{An integer that is used as argument by the \code{set.seed()} for offsetting the random number generator. Default is to leave the random number generator alone.} \item{data.init}{A data frame of the same size and type as \code{data}, without missing data, used to initialize imputations before the start of the iterative process. The default \code{NULL} implies that starting imputation are created by a simple random draw from the data. Note that specification of \code{data.init} will start all \code{m} Gibbs sampling streams from the same imputation.} \item{\dots}{Named arguments that are passed down to the univariate imputation functions.} } \value{ Returns an S3 object of class \code{\link[=mids-class]{mids}} (multiply imputed data set) } \description{ The \pkg{mice} package implements a method to deal with missing data. The package creates multiple imputations (replacement values) for multivariate missing data. The method is based on Fully Conditional Specification, where each incomplete variable is imputed by a separate model. The MICE algorithm can impute mixes of continuous, binary, unordered categorical and ordered categorical data. In addition, MICE can impute continuous two-level data, and maintain consistency between imputations by means of passive imputation. Many diagnostic plots are implemented to inspect the quality of the imputations. Generates Multivariate Imputations by Chained Equations (MICE) } \details{ The \pkg{mice} package contains functions to \itemize{ \item Inspect the missing data pattern \item Impute the missing data \emph{m} times, resulting in \emph{m} completed data sets \item Diagnose the quality of the imputed values \item Analyze each completed data set \item Pool the results of the repeated analyses \item Store and export the imputed data in various formats \item Generate simulated incomplete data \item Incorporate custom imputation methods } Generates multiple imputations for incomplete multivariate data by Gibbs sampling. Missing data can occur anywhere in the data. The algorithm imputes an incomplete column (the target column) by generating 'plausible' synthetic values given other columns in the data. Each incomplete column must act as a target column, and has its own specific set of predictors. The default set of predictors for a given target consists of all other columns in the data. For predictors that are incomplete themselves, the most recently generated imputations are used to complete the predictors prior to imputation of the target column. A separate univariate imputation model can be specified for each column. The default imputation method depends on the measurement level of the target column. In addition to these, several other methods are provided. You can also write their own imputation functions, and call these from within the algorithm. The data may contain categorical variables that are used in a regressions on other variables. The algorithm creates dummy variables for the categories of these variables, and imputes these from the corresponding categorical variable. Built-in univariate imputation methods are: \tabular{lll}{ \code{pmm} \tab any \tab Predictive mean matching\cr \code{midastouch} \tab any \tab Weighted predictive mean matching\cr \code{sample} \tab any \tab Random sample from observed values\cr \code{cart} \tab any \tab Classification and regression trees\cr \code{rf} \tab any \tab Random forest imputations\cr \code{mean} \tab numeric \tab Unconditional mean imputation\cr \code{norm} \tab numeric \tab Bayesian linear regression\cr \code{norm.nob} \tab numeric \tab Linear regression ignoring model error\cr \code{norm.boot} \tab numeric \tab Linear regression using bootstrap\cr \code{norm.predict} \tab numeric \tab Linear regression, predicted values\cr \code{lasso.norm} \tab numeric \tab Lasso linear regression\cr \code{lasso.select.norm} \tab numeric \tab Lasso select + linear regression\cr \code{quadratic} \tab numeric \tab Imputation of quadratic terms\cr \code{ri} \tab numeric \tab Random indicator for nonignorable data\cr \code{logreg} \tab binary \tab Logistic regression\cr \code{logreg.boot} \tab binary \tab Logistic regression with bootstrap\cr \code{lasso.logreg} \tab binary \tab Lasso logistic regression\cr \code{lasso.select.logreg}\tab binary \tab Lasso select + logistic regression\cr \code{polr} \tab ordered \tab Proportional odds model\cr \code{polyreg} \tab unordered\tab Polytomous logistic regression\cr \code{lda} \tab unordered\tab Linear discriminant analysis\cr \code{2l.norm} \tab numeric \tab Level-1 normal heteroscedastic\cr \code{2l.lmer} \tab numeric \tab Level-1 normal homoscedastic, lmer\cr \code{2l.pan} \tab numeric \tab Level-1 normal homoscedastic, pan\cr \code{2l.bin} \tab binary \tab Level-1 logistic, glmer\cr \code{2lonly.mean} \tab numeric \tab Level-2 class mean\cr \code{2lonly.norm} \tab numeric \tab Level-2 class normal\cr \code{2lonly.pmm} \tab any \tab Level-2 class predictive mean matching } These corresponding functions are coded in the \code{mice} library under names \code{mice.impute.method}, where \code{method} is a string with the name of the univariate imputation method name, for example \code{norm}. The \code{method} argument specifies the methods to be used. For the \code{j}'th column, \code{mice()} calls the first occurrence of \code{paste('mice.impute.', method[j], sep = '')} in the search path. The mechanism allows uses to write customized imputation function, \code{mice.impute.myfunc}. To call it for all columns specify \code{method='myfunc'}. To call it only for, say, column 2 specify \code{method=c('norm','myfunc','logreg',\dots{})}. \emph{Skipping imputation:} The user may skip imputation of a column by setting its entry to the empty method: \code{""}. For complete columns without missing data \code{mice} will automatically set the empty method. Setting t he empty method does not produce imputations for the column, so any missing cells remain \code{NA}. If column A contains \code{NA}'s and is used as predictor in the imputation model for column B, then \code{mice} produces no imputations for the rows in B where A is missing. The imputed data for B may thus contain \code{NA}'s. The remedy is to remove column A from the imputation model for the other columns in the data. This can be done by setting the entire column for variable A in the \code{predictorMatrix} equal to zero. \emph{Passive imputation:} \code{mice()} supports a special built-in method, called passive imputation. This method can be used to ensure that a data transform always depends on the most recently generated imputations. In some cases, an imputation model may need transformed data in addition to the original data (e.g. log, quadratic, recodes, interaction, sum scores, and so on). Passive imputation maintains consistency among different transformations of the same data. Passive imputation is invoked if \code{~} is specified as the first character of the string that specifies the univariate method. \code{mice()} interprets the entire string, including the \code{~} character, as the formula argument in a call to \code{model.frame(formula, data[!r[,j],])}. This provides a simple mechanism for specifying deterministic dependencies among the columns. For example, suppose that the missing entries in variables \code{data$height} and \code{data$weight} are imputed. The body mass index (BMI) can be calculated within \code{mice} by specifying the string \code{'~I(weight/height^2)'} as the univariate imputation method for the target column \code{data$bmi}. Note that the \code{~} mechanism works only on those entries which have missing values in the target column. You should make sure that the combined observed and imputed parts of the target column make sense. An easy way to create consistency is by coding all entries in the target as \code{NA}, but for large data sets, this could be inefficient. Note that you may also need to adapt the default \code{predictorMatrix} to evade linear dependencies among the predictors that could cause errors like \code{Error in solve.default()} or \code{Error: system is exactly singular}. Though not strictly needed, it is often useful to specify \code{visitSequence} such that the column that is imputed by the \code{~} mechanism is visited each time after one of its predictors was visited. In that way, deterministic relation between columns will always be synchronized. #'A new argument \code{ls.meth} can be parsed to the lower level \code{.norm.draw} to specify the method for generating the least squares estimates and any subsequently derived estimates. Argument \code{ls.meth} takes one of three inputs: \code{"qr"} for QR-decomposition, \code{"svd"} for singular value decomposition and \code{"ridge"} for ridge regression. \code{ls.meth} defaults to \code{ls.meth = "qr"}. \emph{Auxiliary predictors in formulas specification: } For a given block, the \code{formulas} specification takes precedence over the corresponding row in the \code{predictMatrix} argument. This precedence is, however, restricted to the subset of variables specified in the terms of the block formula. Any variables not specified by \code{formulas} are imputed according to the \code{predictMatrix} specification. Variables with non-zero \code{type} values in the \code{predictMatrix} will be added as main effects to the \code{formulas}, which will act as supplementary covariates in the imputation model. It is possible to turn off this behavior by specifying the argument \code{auxiliary = FALSE}. } \section{Functions}{ The main functions are: \tabular{ll}{ \code{mice()} \tab Impute the missing data *m* times\cr \code{with()} \tab Analyze completed data sets\cr \code{pool()} \tab Combine parameter estimates\cr \code{complete()} \tab Export imputed data\cr \code{ampute()} \tab Generate missing data\cr} } \section{Vignettes}{ There is a detailed series of six online vignettes that walk you through solving realistic inference problems with mice. We suggest going through these vignettes in the following order \enumerate{ \item \href{https://www.gerkovink.com/miceVignettes/Ad_hoc_and_mice/Ad_hoc_methods.html}{Ad hoc methods and the MICE algorithm} \item \href{https://www.gerkovink.com/miceVignettes/Convergence_pooling/Convergence_and_pooling.html}{Convergence and pooling} \item \href{https://www.gerkovink.com/miceVignettes/Missingness_inspection/Missingness_inspection.html}{Inspecting how the observed data and missingness are related} \item \href{https://www.gerkovink.com/miceVignettes/Passive_Post_processing/Passive_imputation_post_processing.html}{Passive imputation and post-processing} \item \href{https://www.gerkovink.com/miceVignettes/Multi_level/Multi_level_data.html}{Imputing multilevel data} \item \href{https://www.gerkovink.com/miceVignettes/Sensitivity_analysis/Sensitivity_analysis.html}{Sensitivity analysis with \pkg{mice}} } #'Van Buuren, S. (2018). Boca Raton, FL.: Chapman & Hall/CRC Press. The book \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} contains a lot of \href{https://github.com/stefvanbuuren/fimdbook/tree/master/R}{example code}. } \section{Methodology}{ The \pkg{mice} software was published in the {Journal of Statistical Software} (Van Buuren and Groothuis-Oudshoorn, 2011). \doi{10.18637/jss.v045.i03} The first application of the method concerned missing blood pressure data (Van Buuren et. al., 1999). The term \emph{Fully Conditional Specification} was introduced in 2006 to describe a general class of methods that specify imputations model for multivariate data as a set of conditional distributions (Van Buuren et. al., 2006). Further details on mixes of variables and applications can be found in the book \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \section{Enhanced linear algebra}{ Updating the BLAS can improve speed of R, sometime considerably. The details depend on the operating system. See the discussion in the "R Installation and Administration" guide for further information. } \examples{ # do default multiple imputation on a numeric matrix imp <- mice(nhanes) imp # list the actual imputations for BMI imp$imp$bmi # first completed data matrix complete(imp) # imputation on mixed data with a different method per column mice(nhanes2, meth = c("sample", "pmm", "logreg", "norm")) \dontrun{ # example where we fit the imputation model on the train data # and apply the model to impute the test data set.seed(123) ignore <- sample(c(TRUE, FALSE), size = 25, replace = TRUE, prob = c(0.3, 0.7)) # scenario 1: train and test in the same dataset imp <- mice(nhanes2, m = 2, ignore = ignore, print = FALSE, seed = 22112) imp.test1 <- filter(imp, ignore) imp.test1$data complete(imp.test1, 1) complete(imp.test1, 2) # scenario 2: train and test in separate datasets traindata <- nhanes2[!ignore, ] testdata <- nhanes2[ignore, ] imp.train <- mice(traindata, m = 2, print = FALSE, seed = 22112) imp.test2 <- mice.mids(imp.train, newdata = testdata) complete(imp.test2, 1) complete(imp.test2, 2) } } \references{ van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. van Buuren, S., Groothuis-Oudshoorn, K. (2011). {\code{mice}: Multivariate Imputation by Chained Equations in \code{R}}. \emph{Journal of Statistical Software}, \bold{45}(3), 1--67. \doi{10.18637/jss.v045.i03} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-FCS.html#sec:MICE}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. Van Buuren, S. (2007) Multiple imputation of discrete and continuous data by fully conditional specification. \emph{Statistical Methods in Medical Research}, \bold{16}, 3, 219--242. Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} Dissertation. Rotterdam: Erasmus University. } \seealso{ \code{\link{mice}}, \code{\link{with.mids}}, \code{\link{pool}}, \code{\link{complete}}, \code{\link{ampute}} \code{\link[=mids-class]{mids}}, \code{\link{with.mids}}, \code{\link{set.seed}}, \code{\link{complete}} } \author{ Stef van Buuren \email{stef.vanbuuren@tno.nl}, Karin Groothuis-Oudshoorn \email{c.g.m.oudshoorn@utwente.nl}, 2000-2010, with contributions of Alexander Robitzsch, Gerko Vink, Shahab Jolani, Roel de Jong, Jason Turner, Lisa Doove, John Fox, Frank E. Harrell, and Peter Malewski. } \keyword{iteration} mice/man/ampute.default.patterns.Rd0000644000176200001440000000117414330031606017005 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.patterns} \alias{ampute.default.patterns} \title{Default \code{patterns} in \code{ampute}} \usage{ ampute.default.patterns(n) } \arguments{ \item{n}{A scalar specifying the number of variables in the data.} } \value{ A square matrix of size \code{n} where \code{0} indicates a variable } \description{ This function creates a default pattern matrix for the multivariate amputation function \code{ampute()}. } \seealso{ \code{\link{ampute}}, \code{\link{md.pattern}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/is.mids.Rd0000644000176200001440000000051513666252075013614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mids} \alias{is.mids} \title{Check for \code{mids} object} \usage{ is.mids(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mids} } \description{ Check for \code{mids} object } mice/man/mads-class.Rd0000644000176200001440000000656714330031606014272 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mads.R \docType{class} \name{mads-class} \alias{mads-class} \title{Multivariate amputed data set (\code{mads})} \description{ The \code{mads} object contains an amputed data set. The \code{mads} object is generated by the \code{ampute} function. The \code{mads} class of objects has methods for the following generic functions: \code{print}, \code{summary}, \code{bwplot} and \code{xyplot}. } \note{ Many of the functions of the \code{mice} package do not use the S4 class definitions, and instead rely on the S3 list equivalent \code{oldClass(obj) <- "mads"}. } \section{Contents}{ \describe{ \item{\code{call}:}{The function call.} \item{\code{prop}:}{Proportion of cases with missing values. Note: even when the proportion is entered as the proportion of missing cells (when \code{bycases == TRUE}), this object contains the proportion of missing cases.} \item{\code{patterns}:}{A data frame of size #patterns by #variables where \code{0} indicates a variable has missing values and \code{1} indicates a variable remains complete.} \item{\code{freq}:}{A vector of length #patterns containing the relative frequency with which the patterns occur. For example, if the vector is \code{c(0.4, 0.4, 0.2)}, this means that of all cases with missing values, 40 percent is candidate for pattern 1, 40 percent for pattern 2 and 20 percent for pattern 3. The vector sums to 1.} \item{\code{mech}:}{A string specifying the missingness mechanism, either \code{"MCAR"}, \code{"MAR"} or \code{"MNAR"}.} \item{\code{weights}:}{A data frame of size #patterns by #variables. It contains the weights that were used to calculate the weighted sum scores. The weights may differ between patterns and between variables.} \item{\code{cont}:}{Logical, whether probabilities are based on continuous logit functions or on discrete odds distributions.} \item{\code{type}:}{A vector of strings containing the type of missingness for each pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or \code{"RIGHT"}. The first type refers to the first pattern, the second type to the second pattern, etc.} \item{\code{odds}:}{A matrix where #patterns defines the #rows. Each row contains the odds of being missing for the corresponding pattern. The amount of odds values defines in how many quantiles the sum scores were divided. The values are relative probabilities: a quantile with odds value 4 will have a probability of being missing that is four times higher than a quantile with odds 1. The #quantiles may differ between patterns, NA is used for cells remaining empty.} \item{\code{amp}:}{A data frame containing the input data with NAs for the amputed values.} \item{\code{cand}:}{A vector that contains the pattern number for each case. A value between 1 and #patterns is given. For example, a case with value 2 is candidate for missing data pattern 2.} \item{\code{scores}:}{A list containing vectors with weighted sum scores of the candidates. The first vector refers to the candidates of the first pattern, the second vector refers to the candidates of the second pattern, etc. The length of the vectors differ because the number of candidates is different for each pattern.} \item{\code{data}:}{The complete data set that was entered in \code{ampute}.} } } \seealso{ \code{\link{ampute}}, Vignette titled "Multivariate Amputation using Ampute". } \author{ Rianne Schouten, 2016 } mice/man/stripplot.mids.Rd0000644000176200001440000002144114330031647015227 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stripplot.R \name{stripplot.mids} \alias{stripplot.mids} \alias{stripplot} \title{Stripplot of observed and imputed data} \usage{ \method{stripplot}{mids}( x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), panel = lattice::lattice.getOption("panel.stripplot"), default.prepanel = lattice::lattice.getOption("prepanel.default.stripplot"), jitter.data = TRUE, horizontal = FALSE, ..., subscripts = TRUE, subset = TRUE ) } \arguments{ \item{x}{A \code{mids} object, typically created by \code{mice()} or \code{mice.mids()}.} \item{data}{Formula that selects the data to be plotted. This argument follows the \pkg{lattice} rules for \emph{formulas}, describing the primary variables (used for the per-panel display) and the optional conditioning variables (which define the subsets plotted in different panels) to be used in the plot. The formula is evaluated on the complete data set in the \code{long} form. Legal variable names for the formula include \code{names(x$data)} plus the two administrative factors \code{.imp} and \code{.id}. \bold{Extended formula interface:} The primary variable terms (both the LHS \code{y} and RHS \code{x}) may consist of multiple terms separated by a \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in \emph{separate panels}. This behavior differs from standard \pkg{lattice}. \emph{Only combine terms of the same type}, i.e. only factors or only numerical variables. Mixing numerical and categorical data occasionally produces odds labeling of vertical axis. For convenience, in \code{stripplot()} and \code{bwplot} the formula \code{y~.imp} may be abbreviated as \code{y}. This applies only to a single \code{y}, and does not (yet) work for \code{y1+y2~.imp}.} \item{na.groups}{An expression evaluating to a logical vector indicating which two groups are distinguished (e.g. using different colors) in the display. The environment in which this expression is evaluated in the response indicator \code{is.na(x$data)}. The default \code{na.group = NULL} contrasts the observed and missing data in the LHS \code{y} variable of the display, i.e. groups created by \code{is.na(y)}. The expression \code{y} creates the groups according to \code{is.na(y)}. The expression \code{y1 & y2} creates groups by \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as \code{is.na(y1) | is.na(y2)}, and so on.} \item{groups}{This is the usual \code{groups} arguments in \pkg{lattice}. It differs from \code{na.groups} because it evaluates in the completed data \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas \code{na.groups} evaluates in the response indicator. See \code{\link{xyplot}} for more details. When both \code{na.groups} and \code{groups} are specified, \code{na.groups} takes precedence, and \code{groups} is ignored.} \item{as.table}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{theme}{A named list containing the graphical parameters. The default function \code{mice.theme} produces a short list of default colors, line width, and so on. The extensive list may be obtained from \code{trellis.par.get()}. Global graphical parameters like \code{col} or \code{cex} in high-level calls are still honored, so first experiment with the global parameters. Many setting consists of a pair. For example, \code{mice.theme} defines two symbol colors. The first is for the observed data, the second for the imputed data. The theme settings only exist during the call, and do not affect the trellis graphical parameters.} \item{allow.multiple}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{outer}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{drop.unused.levels}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{panel}{See \code{\link{xyplot}}.} \item{default.prepanel}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{jitter.data}{See \code{\link[lattice:panel.xyplot]{panel.xyplot}}.} \item{horizontal}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{\dots}{Further arguments, usually not directly processed by the high-level functions documented here, but instead passed on to other functions.} \item{subscripts}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{subset}{See \code{\link[lattice:xyplot]{xyplot}}.} } \value{ The high-level functions documented here, as well as other high-level Lattice functions, return an object of class \code{"trellis"}. The \code{\link[lattice:update.trellis]{update}} method can be used to subsequently update components of the object, and the \code{\link[lattice:print.trellis]{print}} method (usually called by default) will plot it on an appropriate plotting device. } \description{ Plotting methods for imputed data using \pkg{lattice}. \code{stripplot} produces one-dimensional scatterplots. The function automatically separates the observed and imputed data. The functions extend the usual features of \pkg{lattice}. } \details{ The argument \code{na.groups} may be used to specify (combinations of) missingness in any of the variables. The argument \code{groups} can be used to specify groups based on the variable values themselves. Only one of both may be active at the same time. When both are specified, \code{na.groups} takes precedence over \code{groups}. Use the \code{subset} and \code{na.groups} together to plots parts of the data. For example, select the first imputed data set by by \code{subset=.imp==1}. Graphical parameters like \code{col}, \code{pch} and \code{cex} can be specified in the arguments list to alter the plotting symbols. If \code{length(col)==2}, the color specification to define the observed and missing groups. \code{col[1]} is the color of the 'observed' data, \code{col[2]} is the color of the missing or imputed data. A convenient color choice is \code{col=mdc(1:2)}, a transparent blue color for the observed data, and a transparent red color for the imputed data. A good choice is \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the duration of the session by running \code{mice.theme()}. } \note{ The first two arguments (\code{x} and \code{data}) are reversed compared to the standard Trellis syntax implemented in \pkg{lattice}. This reversal was necessary in order to benefit from automatic method dispatch. In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas in \pkg{lattice} the argument \code{x} is always a formula. In \pkg{mice} the argument \code{data} is always a formula object, whereas in \pkg{lattice} the argument \code{data} is usually a data frame. All other arguments have identical interpretation. } \examples{ imp <- mice(boys, maxit = 1) ### stripplot, all numerical variables \dontrun{ stripplot(imp) } ### same, but with improved display \dontrun{ stripplot(imp, col = c("grey", mdc(2)), pch = c(1, 20)) } ### distribution per imputation of height, weight and bmi ### labeled by their own missingness \dontrun{ stripplot(imp, hgt + wgt + bmi ~ .imp, cex = c(2, 4), pch = c(1, 20), jitter = FALSE, layout = c(3, 1) ) } ### same, but labeled with the missingness of wgt (just four cases) \dontrun{ stripplot(imp, hgt + wgt + bmi ~ .imp, na = wgt, cex = c(2, 4), pch = c(1, 20), jitter = FALSE, layout = c(3, 1) ) } ### distribution of age and height, labeled by missingness in height ### most height values are missing for those around ### the age of two years ### some additional missings occur in region WEST \dontrun{ stripplot(imp, age + hgt ~ .imp | reg, hgt, col = c(grDevices::hcl(0, 0, 40, 0.2), mdc(2)), pch = c(1, 20) ) } ### heavily jitted relation between two categorical variables ### labeled by missingness of gen ### aggregated over all imputed data sets \dontrun{ stripplot(imp, gen ~ phb, factor = 2, cex = c(8, 1), hor = TRUE) } ### circle fun stripplot(imp, gen ~ .imp, na = wgt, factor = 2, cex = c(8.6), hor = FALSE, outer = TRUE, scales = "free", pch = c(1, 19) ) } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{densityplot}}, \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the package, as well as \code{\link[lattice:xyplot]{stripplot}}, \code{\link[lattice:panel.stripplot]{panel.stripplot}}, \code{\link[lattice:print.trellis]{print.trellis}}, \code{\link[lattice:trellis.par.get]{trellis.par.set}} } \author{ Stef van Buuren } \keyword{hplot} mice/man/parlmice.Rd0000644000176200001440000000723214332750363014037 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/parlmice.R \name{parlmice} \alias{parlmice} \title{Wrapper function that runs MICE in parallel} \usage{ parlmice( data, m = 5, seed = NA, cluster.seed = NA, n.core = NULL, n.imp.core = NULL, cl.type = "PSOCK", ... ) } \arguments{ \item{data}{A data frame or matrix containing the incomplete data. Similar to the first argument of \code{\link{mice}}.} \item{m}{The number of desired imputated datasets. By default $m=5$ as with \code{mice}} \item{seed}{A scalar to be used as the seed value for the mice algorithm within each parallel stream. Please note that the imputations will be the same for all streams and, hence, this should be used if and only if \code{n.core = 1} and if it is desired to obtain the same output as under \code{mice}.} \item{cluster.seed}{A scalar to be used as the seed value. It is recommended to put the seed value here and not outside this function, as otherwise the parallel processes will be performed with separate, random seeds.} \item{n.core}{A scalar indicating the number of cores that should be used.} \item{n.imp.core}{A scalar indicating the number of imputations per core.} \item{cl.type}{The cluster type. Default value is \code{"PSOCK"}. Posix machines (linux, Mac) generally benefit from much faster cluster computation if \code{type} is set to \code{type = "FORK"}.} \item{...}{Named arguments that are passed down to function \code{\link{mice}} or \code{\link{makeCluster}}.} } \value{ A mids object as defined by \code{\link{mids-class}} } \description{ This function is included for backward compatibility. The function is superseded by \code{\link{futuremice}}. } \details{ This function relies on package \code{\link{parallel}}, which is a base package for R versions 2.14.0 and later. We have chosen to use parallel function \code{parLapply} to allow the use of \code{parlmice} on Mac, Linux and Windows systems. For the same reason, we use the Parallel Socket Cluster (PSOCK) type by default. On systems other than Windows, it can be hugely beneficial to change the cluster type to \code{FORK}, as it generally results in improved memory handling. When memory issues arise on a Windows system, we advise to store the multiply imputed datasets, clean the memory by using \code{\link{rm}} and \code{\link{gc}} and make another run using the same settings. This wrapper function combines the output of \code{\link{parLapply}} with function \code{\link{ibind}} in \code{\link{mice}}. A \code{mids} object is returned and can be used for further analyses. Note that if a seed value is desired, the seed should be entered to this function with argument \code{seed}. Seed values outside the wrapper function (in an R-script or passed to \code{\link{mice}}) will not result to reproducible results. We refer to the manual of \code{\link{parallel}} for an explanation on this matter. } \examples{ # 150 imputations in dataset nhanes, performed by 3 cores \dontrun{ imp1 <- parlmice(data = nhanes, n.core = 3, n.imp.core = 50) # Making use of arguments in mice. imp2 <- parlmice(data = nhanes, method = "norm.nob", m = 100) imp2$method fit <- with(imp2, lm(bmi ~ hyp)) pool(fit) } } \references{ Schouten, R. and Vink, G. (2017). parlmice: faster, paraleller, micer. \url{https://www.gerkovink.com/parlMICE/Vignette_parlMICE.html} #'Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/parallel-computation.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{parallel}}, \code{\link{parLapply}}, \code{\link{makeCluster}}, \code{\link{mice}}, \code{\link{mids-class}} } \author{ Gerko Vink, Rianne Schouten } mice/man/mice.impute.norm.boot.Rd0000644000176200001440000000454614330031647016376 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.boot.R \name{mice.impute.norm.boot} \alias{mice.impute.norm.boot} \alias{norm.boot} \title{Imputation by linear regression, bootstrap method} \usage{ mice.impute.norm.boot(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using linear regression with bootstrap } \details{ Draws a bootstrap sample from \code{x[ry,]} and \code{y[ry]}, calculates regression weights and imputes with normal residuals. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Gerko Vink, Stef van Buuren, 2018 } \concept{univariate imputation functions} \keyword{datagen} mice/man/D1.Rd0000644000176200001440000000356414436064333012513 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/D1.R \name{D1} \alias{D1} \title{Compare two nested models using D1-statistic} \usage{ D1(fit1, fit0 = NULL, dfcom = NULL, df.com = NULL) } \arguments{ \item{fit1}{An object of class \code{mira}, produced by \code{with()}.} \item{fit0}{An object of class \code{mira}, produced by \code{with()}. The model in \code{fit0} is a nested within \code{fit1}. The default null model \code{fit0 = NULL} compares \code{fit1} to the intercept-only model.} \item{dfcom}{A single number denoting the complete-data degrees of freedom of model \code{fit1}. If not specified, it is set equal to \code{df.residual} of model \code{fit1}. If that cannot be done, the procedure assumes (perhaps incorrectly) a large sample.} \item{df.com}{Deprecated} } \description{ The D1-statistics is the multivariate Wald test. } \note{ Warning: `D1()` assumes that the order of the variables is the same in different models. See \url{https://github.com/amices/mice/issues/420} for details. } \examples{ # Compare two linear models: imp <- mice(nhanes2, seed = 51009, print = FALSE) mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) D1(mi1, mi0) \dontrun{ # Compare two logistic regression models imp <- mice(boys, maxit = 2, print = FALSE) fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) D1(fit1, fit0) } } \references{ Li, K. H., T. E. Raghunathan, and D. B. Rubin. 1991. Large-Sample Significance Levels from Multiply Imputed Data Using Moment-Based Statistics and an F Reference Distribution. \emph{Journal of the American Statistical Association}, 86(416): 1065–73. \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:wald} } \seealso{ \code{\link[mitml]{testModels}} } mice/man/pool.scalar.Rd0000644000176200001440000000677314334522175014471 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.scalar.R \name{pool.scalar} \alias{pool.scalar} \alias{pool.scalar.syn} \title{Multiple imputation pooling: univariate version} \usage{ pool.scalar(Q, U, n = Inf, k = 1, rule = c("rubin1987", "reiter2003")) pool.scalar.syn(Q, U, n = Inf, k = 1, rule = "reiter2003") } \arguments{ \item{Q}{A vector of univariate estimates of \code{m} repeated complete data analyses.} \item{U}{A vector containing the corresponding \code{m} variances of the univariate estimates.} \item{n}{A number providing the sample size. If nothing is specified, an infinite sample \code{n = Inf} is assumed.} \item{k}{A number indicating the number of parameters to be estimated. By default, \code{k = 1} is assumed.} \item{rule}{A string indicating the pooling rule. Currently supported are \code{"rubin1987"} (default, for missing data) and \code{"reiter2003"} (for synthetic data created from a complete data set).} } \value{ Returns a list with components. \describe{ \item{\code{m}:}{Number of imputations.} \item{\code{qhat}:}{The \code{m} univariate estimates of repeated complete-data analyses.} \item{\code{u}:}{The corresponding \code{m} variances of the univariate estimates.} \item{\code{qbar}:}{The pooled univariate estimate, formula (3.1.2) Rubin (1987).} \item{\code{ubar}:}{The mean of the variances (i.e. the pooled within-imputation variance), formula (3.1.3) Rubin (1987).} \item{\code{b}:}{The between-imputation variance, formula (3.1.4) Rubin (1987).} \item{\code{t}:}{The total variance of the pooled estimated, formula (3.1.5) Rubin (1987).} \item{\code{r}:}{The relative increase in variance due to nonresponse, formula (3.1.7) Rubin (1987).} \item{\code{df}:}{The degrees of freedom for t reference distribution by the method of Barnard-Rubin (1999).} \item{\code{fmi}:}{The fraction missing information due to nonresponse, formula (3.1.10) Rubin (1987). (Not defined for synthetic data.)} } } \description{ Pools univariate estimates of m repeated complete data analysis } \details{ The function averages the univariate estimates of the complete data model, computes the total variance over the repeated analyses, and computes the relative increase in variance due to missing data or data synthesisation and the fraction of missing information. } \examples{ # missing data imputation with with manual pooling imp <- mice(nhanes, maxit = 2, m = 2, print = FALSE, seed = 18210) fit <- with(data = imp, lm(bmi ~ age)) # manual pooling summary(fit$analyses[[1]]) summary(fit$analyses[[2]]) pool.scalar(Q = c(-1.5457, -1.428), U = c(0.9723^2, 1.041^2), n = 25, k = 2) # check: automatic pooling using broom pool(fit) # manual pooling for synthetic data created from complete data imp <- mice(cars, maxit = 2, m = 2, print = FALSE, seed = 18210, where = matrix(TRUE, nrow(cars), ncol(cars)) ) fit <- with(data = imp, lm(speed ~ dist)) # manual pooling: extract Q and U summary(fit$analyses[[1]]) summary(fit$analyses[[2]]) pool.scalar.syn(Q = c(0.12182, 0.13209), U = c(0.02121^2, 0.02516^2), n = 50, k = 2) # check: automatic pooling using broom pool.syn(fit) } \references{ Rubin, D.B. (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley and Sons. Reiter, J.P. (2003). Inference for Partially Synthetic, Public Use Microdata Sets. \emph{Survey Methodology}, \bold{29}, 181-189. } \seealso{ \code{\link{pool}} } \author{ Karin Groothuis-Oudshoorn and Stef van Buuren, 2009; Thom Volker, 2021 } mice/man/name.blocks.Rd0000644000176200001440000000322614330031606014424 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/blocks.R \name{name.blocks} \alias{name.blocks} \title{Name imputation blocks} \usage{ name.blocks(blocks, prefix = "B") } \arguments{ \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} \item{prefix}{A character vector of length 1 with the prefix to be using for naming any unnamed blocks with two or more variables.} } \value{ A named list of character vectors with variables names. } \description{ This helper function names any unnamed elements in the \code{blocks} specification. This is a convenience function. } \details{ This function will name any unnamed list elements specified in the optional argument \code{blocks}. Unnamed blocks consisting of just one variable will be named after this variable. Unnamed blocks containing more than one variables will be named by the \code{prefix} argument, padded by an integer sequence stating at 1. } \examples{ blocks <- list(c("hyp", "chl"), AGE = "age", c("bmi", "hyp"), "edu") name.blocks(blocks) } \seealso{ \code{\link{mice}} } mice/man/quickpred.Rd0000644000176200001440000001076514330031647014232 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quickpred.R \name{quickpred} \alias{quickpred} \title{Quick selection of predictors from the data} \usage{ quickpred( data, mincor = 0.1, minpuc = 0, include = "", exclude = "", method = "pearson" ) } \arguments{ \item{data}{Matrix or data frame with incomplete data.} \item{mincor}{A scalar, numeric vector (of size \code{ncol(data))} or numeric matrix (square, of size \code{ncol(data)} specifying the minimum threshold(s) against which the absolute correlation in the data is compared.} \item{minpuc}{A scalar, vector (of size \code{ncol(data))} or matrix (square, of size \code{ncol(data)} specifying the minimum threshold(s) for the proportion of usable cases.} \item{include}{A string or a vector of strings containing one or more variable names from \code{names(data)}. Variables specified are always included as a predictor.} \item{exclude}{A string or a vector of strings containing one or more variable names from \code{names(data)}. Variables specified are always excluded as a predictor.} \item{method}{A string specifying the type of correlation. Use \code{'pearson'} (default), \code{'kendall'} or \code{'spearman'}. Can be abbreviated.} } \value{ A square binary matrix of size \code{ncol(data)}. } \description{ Selects predictors according to simple statistics } \details{ This function creates a predictor matrix using the variable selection procedure described in Van Buuren et al.~(1999, p.~687--688). The function is designed to aid in setting up a good imputation model for data with many variables. Basic workings: The procedure calculates for each variable pair (i.e. target-predictor pair) two correlations using all available cases per pair. The first correlation uses the values of the target and the predictor directly. The second correlation uses the (binary) response indicator of the target and the values of the predictor. If the largest (in absolute value) of these correlations exceeds \code{mincor}, the predictor will be added to the imputation set. The default value for \code{mincor} is 0.1. In addition, the procedure eliminates predictors whose proportion of usable cases fails to meet the minimum specified by \code{minpuc}. The default value is 0, so predictors are retained even if they have no usable case. Finally, the procedure includes any predictors named in the \code{include} argument (which is useful for background variables like age and sex) and eliminates any predictor named in the \code{exclude} argument. If a variable is listed in both \code{include} and \code{exclude} arguments, the \code{include} argument takes precedence. Advanced topic: \code{mincor} and \code{minpuc} are typically specified as scalars, but vectors and squares matrices of appropriate size will also work. Each element of the vector corresponds to a row of the predictor matrix, so the procedure can effectively differentiate between different target variables. Setting a high values for can be useful for auxiliary, less important, variables. The set of predictor for those variables can remain relatively small. Using a square matrix extends the idea to the columns, so that one can also apply cellwise thresholds. } \note{ \code{quickpred()} uses \code{\link[base]{data.matrix}} to convert factors to numbers through their internal codes. Especially for unordered factors the resulting quantification may not make sense. } \examples{ # default: include all predictors with absolute correlation over 0.1 quickpred(nhanes) # all predictors with absolute correlation over 0.4 quickpred(nhanes, mincor = 0.4) # include age and bmi, exclude chl quickpred(nhanes, mincor = 0.4, inc = c("age", "bmi"), exc = "chl") # only include predictors with at least 30\% usable cases quickpred(nhanes, minpuc = 0.3) # use low threshold for bmi, and high thresholds for hyp and chl pred <- quickpred(nhanes, mincor = c(0, 0.1, 0.5, 0.5)) pred # use it directly from mice imp <- mice(nhanes, pred = quickpred(nhanes, minpuc = 0.25, include = "age")) } \references{ van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. van Buuren, S. and Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{mice}}, \code{\link[=mids-class]{mids}} } \author{ Stef van Buuren, Aug 2009 } \keyword{misc} mice/man/cci.Rd0000644000176200001440000000161014330031606012761 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cci.R \name{cci} \alias{cci} \title{Complete case indicator} \usage{ cci(x) } \arguments{ \item{x}{An \code{R} object. Currently supported are methods for the following classes: \code{mids}.} } \value{ Logical vector indicating the complete cases. } \description{ The complete case indicator is useful for extracting the subset of complete cases. The function \code{cci(x)} calls \code{complete.cases(x)}. The companion function \code{ici()} selects the incomplete cases. } \examples{ cci(nhanes) # indicator for 13 complete cases cci(mice(nhanes, maxit = 0)) f <- cci(nhanes[, c("bmi", "hyp")]) # complete data for bmi and hyp nhanes[f, ] # obtain all data from those with complete bmi and hyp } \seealso{ \code{\link{complete.cases}}, \code{\link{ici}}, \code{\link{cc}} } \author{ Stef van Buuren, 2017. } \keyword{univar} mice/man/ampute.default.weights.Rd0000644000176200001440000000232314330031606016614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.weights} \alias{ampute.default.weights} \title{Default \code{weights} in \code{ampute}} \usage{ ampute.default.weights(patterns, mech) } \arguments{ \item{patterns}{A matrix of size #patterns by #variables where \code{0} indicates a variable should have missing values and \code{1} indicates a variable should remain complete. Could be the result of \code{\link{ampute.default.patterns}}.} \item{mech}{A string specifying the missingness mechanism.} } \value{ A matrix of size #patterns by #variables containing the weights that will be used to calculate the weighted sum scores. Equal weights are given to all variables. When mechanism is MAR, variables that will be amputed will be weighted with \code{0}. If it is MNAR, variables that will be observed will be weighted with \code{0}. If mechanism is MCAR, the weights matrix will not be used. A default MAR matrix will be returned. } \description{ Defines the default weights matrix for the multivariate amputation function \code{ampute}. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.patterns}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/leiden85.Rd0000644000176200001440000000323214330031606013642 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/leiden85.R \docType{data} \name{leiden85} \alias{leiden85} \title{Leiden 85+ study} \format{ \code{leiden85} is a data frame with 956 rows and 336 columns. } \source{ Lagaay, A. M., van der Meij, J. C., Hijmans, W. (1992). Validation of medical history taking as part of a population based survey in subjects aged 85 and over. \emph{Brit. Med. J.}, \emph{304}(6834), 1091-1092. Izaks, G. J., van Houwelingen, H. C., Schreuder, G. M., Ligthart, G. J. (1997). The association between human leucocyte antigens (HLA) and mortality in community residents aged 85 and older. \emph{Journal of the American Geriatrics Society}, \emph{45}(1), 56-60. Boshuizen, H. C., Izaks, G. J., van Buuren, S., Ligthart, G. J. (1998). Blood pressure and mortality in elderly people aged 85 and older: Community based study. \emph{Brit. Med. J.}, \emph{316}(7147), 1780-1784. Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-toomany.html#sec:leiden85cohort}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Subset of data from the Leiden 85+ study } \details{ The data set concerns of subset of 956 members of a very old (85+) cohort in Leiden. Multiple imputation of this data set has been described in Boshuizen et al (1998), Van Buuren et al (1999) and Van Buuren (2012), chapter 7. The data set is not available as part of \code{mice}. } \keyword{datasets} mice/man/is.mitml.result.Rd0000644000176200001440000000057513666252075015325 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mitml.result} \alias{is.mitml.result} \title{Check for \code{mitml.result} object} \usage{ is.mitml.result(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mitml.result} } \description{ Check for \code{mitml.result} object } mice/man/ampute.default.type.Rd0000644000176200001440000000151314330031606016123 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.type} \alias{ampute.default.type} \title{Default \code{type} in \code{ampute()}} \usage{ ampute.default.type(patterns) } \arguments{ \item{patterns}{A matrix of size #patterns by #variables where 0 indicates a variable should have missing values and 1 indicates a variable should remain complete. Could be the result of \code{\link{ampute.default.patterns}}.} } \value{ A string vector of length #patterns containing the missingness types. Each pattern will be amputed with a "RIGHT" missingness. } \description{ Defines the default type vector for the multivariate amputation function \code{ampute}. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.patterns}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/matchindex.Rd0000644000176200001440000000574614330031606014365 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/RcppExports.R \name{matchindex} \alias{matchindex} \title{Find index of matched donor units} \usage{ matchindex(d, t, k = 5L) } \arguments{ \item{d}{Numeric vector with values from donor cases.} \item{t}{Numeric vector with values from target cases.} \item{k}{Integer, number of unique donors from which a random draw is made. For \code{k = 1} the function returns the index in \code{d} corresponding to the closest unit. For multiple imputation, the advice is to set values in the range of \code{k = 5} to \code{k = 10}.} } \value{ An integer vector with \code{length(t)} elements. Each element is an index in the array \code{d}. } \description{ Find index of matched donor units } \details{ For each element in \code{t}, the method finds the \code{k} nearest neighbours in \code{d}, randomly draws one of these neighbours, and returns its position in vector \code{d}. Fast predictive mean matching algorithm in seven steps: 1. Shuffle records to remove effects of ties 2. Obtain sorting order on shuffled data 3. Calculate index on input data and sort it 4. Pre-sample vector \code{h} with values between 1 and \code{k} For each of the \code{n0} elements in \code{t}: 5. find the two adjacent neighbours 6. find the \code{h_i}'th nearest neighbour 7. store the index of that neighbour Return vector of \code{n0} positions in \code{d}. We may use the function to perform predictive mean matching under a given predictive model. To do so, specify both \code{d} and \code{t} as predictions from the same model. Suppose that \code{y} contains the observed outcomes of the donor cases (in the same sequence as \code{d}), then \code{y[matchindex(d, t)]} returns one matched outcome for every target case. See \url{https://github.com/amices/mice/issues/236}. This function is a replacement for the \code{matcher()} function that has been in default in \code{mice} since version \code{2.22} (June 2014). } \examples{ set.seed(1) # Inputs need not be sorted d <- c(-5, 5, 0, 10, 12) t <- c(-6, -4, 0, 2, 4, -2, 6) # Index (in vector a) of closest match idx <- matchindex(d, t, 1) idx # To check: show values of closest match # Random draw among indices of the 5 closest predictors matchindex(d, t) # An example train <- mtcars[1:20, ] test <- mtcars[21:32, ] fit <- lm(mpg ~ disp + cyl, data = train) d <- fitted.values(fit) t <- predict(fit, newdata = test) # note: not using mpg idx <- matchindex(d, t) # Borrow values from train to produce 12 synthetic values for mpg in test. # Synthetic values are plausible values that could have been observed if # they had been measured. train$mpg[idx] # Exercise: Create a distribution of 1000 plausible values for each of the # twelve mpg entries in test, and count how many times the true value # (which we know here) is located within the inter-quartile range of each # distribution. Is your count anywhere close to 500? Why? Why not? } \author{ Stef van Buuren, Nasinski Maciej, Alexander Robitzsch } mice/man/tbc.Rd0000644000176200001440000000467714330031606013013 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tbc.R \docType{data} \name{tbc} \alias{tbc} \alias{tbc.target} \alias{terneuzen} \title{Terneuzen birth cohort} \format{ \code{tbs} is a data frame with 3951 rows and 11 columns: \describe{ \item{id}{Person number} \item{occ}{Occasion number} \item{nocc}{Number of occasions} \item{first}{Is this the first record for this person? (TRUE/FALSE)} \item{typ}{Type of data (all observed)} \item{age}{Age (years)} \item{sex}{Sex 1=M, 2=F} \item{hgt.z}{Height Z-score} \item{wgt.z}{Weight Z-score} \item{bmi.z}{BMI Z-score} \item{ao}{Adult overweight (0=no, 1=yes)} } \code{tbc.target} is a data frame with 2612 rows and 3 columns: \describe{ \item{id}{Person number} \item{ao}{Adult overweight (0=no, 1=yes)} \item{bmi.z.jv}{BMI Z-score as young adult (18-29 years)} } } \source{ De Kroon, M. L. A., Renders, C. M., Kuipers, E. C., van Wouwe, J. P., van Buuren, S., de Jonge, G. A., Hirasing, R. A. (2008). Identifying metabolic syndrome without blood tests in young adults - The Terneuzen birth cohort. \emph{European Journal of Public Health}, \emph{18}(6), 656-660. De Kroon, M. L. A., Renders, C. M., Van Wouwe, J. P., Van Buuren, S., Hirasing, R. A. (2010). The Terneuzen birth cohort: BMI changes between 2 and 6 years correlate strongest with adult overweight. \emph{PLoS ONE}, \emph{5}(2), e9155. De Kroon, M. L. A. (2011). \emph{The Terneuzen Birth Cohort. Detection and Prevention of Overweight and Cardiometabolic Risk from Infancy Onward.} Dissertation, Vrije Universiteit, Amsterdam. \url{https://research.vu.nl/en/publications/the-terneuzen-birth-cohort-detection-and-prevention-of-overweight} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-rastering.html#terneuzen-birth-cohort}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Data of subset of the Terneuzen Birth Cohort data on child growth. } \details{ This \code{tbc} data set is a random subset of persons from a much larger collection of data from the Terneuzen Birth Cohort. The total cohort comprises of 2604 unique persons, whereas the subset in \code{tbc} covers 306 persons. The \code{tbc.target} is an auxiliary data set containing two outcomes at adult age. For more details, see De Kroon et al (2008, 2010, 2011). The imputation methodology is explained in Chapter 9 of Van Buuren (2012). } \examples{ data <- tbc md.pattern(data) } \keyword{datasets} mice/man/toenail.Rd0000644000176200001440000000412614330031606013663 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/toenail.R \docType{data} \name{toenail} \alias{toenail} \title{Toenail data} \format{ A data frame with 1908 observations on the following 5 variables: \describe{ \item{\code{ID}}{a numeric vector giving the ID of patient} \item{\code{outcome}}{a numeric vector giving the response (0=none or mild seperation, 1=moderate or severe)} \item{\code{treatment}}{a numeric vector giving the treatment group} \item{\code{month}}{a numeric vector giving the time of the visit (not exactly monthly intervals hence not round numbers)} \item{\code{visit}}{a numeric vector giving the number of the visit} } } \source{ De Backer, M., De Vroey, C., Lesaffre, E., Scheys, I., and De Keyser, P. (1998). Twelve weeks of continuous oral therapy for toenail onychomycosis caused by dermatophytes: A double-blind comparative trial of terbinafine 250 mg/day versus itraconazole 200 mg/day. Journal of the American Academy of Dermatology, 38, 57-63. } \description{ The toenail data come from a Multicenter study comparing two oral treatments for toenail infection. Patients were evaluated for the degree of separation of the nail. Patients were randomized into two treatments and were followed over seven visits - four in the first year and yearly thereafter. The patients have not been treated prior to the first visit so this should be regarded as the baseline. } \details{ This dataset was copied from the \code{DPpackage}, which is scheduled to be discontinued from CRAN in August 2019. } \references{ Lesaffre, E. and Spiessens, B. (2001). On the effect of the number of quadrature points in a logistic random-effects model: An example. Journal of the Royal Statistical Society, Series C, 50, 325-335. G. Fitzmaurice, N. Laird and J. Ware (2004) Applied Longitudinal Analysis, Wiley and Sons, New York, USA. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-catoutcome.html#example}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{toenail2}} } \keyword{datasets} mice/man/tidy.mipo.Rd0000644000176200001440000000162213666252233014156 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidiers.R \name{tidy.mipo} \alias{tidy.mipo} \title{Tidy method to extract results from a `mipo` object} \usage{ \method{tidy}{mipo}(x, conf.int = FALSE, conf.level = 0.95, ...) } \arguments{ \item{x}{An object of class \code{mipo}} \item{conf.int}{Logical. Should confidence intervals be returned?} \item{conf.level}{Confidence level for intervals. Defaults to .95} \item{...}{extra arguments (not used)} } \value{ A dataframe withh these columns: \itemize{ \item term \item estimate \item ubar \item b \item t \item dfcom \item df \item riv \item lambda \item fmi \item p.value \item conf.low (if called with conf.int = TRUE) \item conf.high (if called with conf.int = TRUE) } } \description{ Tidy method to extract results from a `mipo` object } \keyword{internal} mice/man/ici.Rd0000644000176200001440000000133414330031606012772 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cci.R \name{ici} \alias{ici} \alias{ici,data.frame-method} \alias{ici,matrix-method} \alias{ici,mids-method} \title{Incomplete case indicator} \usage{ ici(x) } \arguments{ \item{x}{An \code{R} object. Currently supported are methods for the following classes: \code{mids}.} } \value{ Logical vector indicating the incomplete cases, } \description{ This array is useful for extracting the subset of incomplete cases. The companion function \code{cci()} selects the complete cases. } \examples{ ici(nhanes) # indicator for 12 rows with incomplete cases } \seealso{ \code{\link{cci}}, \code{\link{ic}} } \author{ Stef van Buuren, 2017. } \keyword{univar} mice/man/filter.mids.Rd0000644000176200001440000000605014330031606014446 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/filter.R \name{filter.mids} \alias{filter.mids} \title{Subset rows of a \code{mids} object} \usage{ \method{filter}{mids}(.data, ..., .preserve = FALSE) } \arguments{ \item{.data}{A \code{mids} object.} \item{...}{Expressions that return a logical value, and are defined in terms of the variables in \code{.data$data}. If multiple expressions are specified, they are combined with the \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are kept.} \item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise the grouping is kept as is.} } \value{ An S3 object of class \code{mids} } \description{ This function takes a \code{mids} object and returns a new \code{mids} object that pertains to the subset of the data identified by the expression in \dots. The expression may use column values from the incomplete data in \code{.data$data}. } \note{ The function calculates a logical vector \code{include} of length \code{nrow(.data$data)}. The function constructs the elements of the filtered \code{mids} object as follows: \tabular{ll}{ \code{data} \tab Select rows in \code{.data$data} for which \code{include == TRUE}\cr \code{imp} \tab Select rows each imputation \code{data.frame} in \code{.data$imp} for which \code{include == TRUE}\cr \code{m} \tab Equals \code{.data$m}\cr \code{where} \tab Select rows in \code{.data$where} for which \code{include == TRUE}\cr \code{blocks} \tab Equals \code{.data$blocks}\cr \code{call} \tab Equals \code{.data$call}\cr \code{nmis} \tab Recalculate \code{nmis} based on the selected \code{data} rows\cr \code{method} \tab Equals \code{.data$method}\cr \code{predictorMatrix} \tab Equals \code{.data$predictorMatrix}\cr \code{visitSequence} \tab Equals \code{.data$visitSequence}\cr \code{formulas} \tab Equals \code{.data$formulas}\cr \code{post} \tab Equals \code{.data$post}\cr \code{blots} \tab Equals \code{.data$blots}\cr \code{ignore} \tab Select positions in \code{.data$ignore} for which \code{include == TRUE}\cr \code{seed} \tab Equals \code{.data$seed}\cr \code{iteration} \tab Equals \code{.data$iteration}\cr \code{lastSeedValue} \tab Equals \code{.data$lastSeedValue}\cr \code{chainMean} \tab Set to \code{NULL}\cr \code{chainVar} \tab Set to \code{NULL}\cr \code{loggedEvents} \tab Equals \code{.data$loggedEvents}\cr \code{version} \tab Replaced with current version\cr \code{date} \tab Replaced with current date } } \examples{ imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE) # example with external logical vector imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) nrow(complete(imp)) nrow(complete(imp_f)) # example with calculated include vector imp_f2 <- filter(imp, age >= 2 & hyp == 1) nrow(complete(imp_f2)) # should be 5 } \seealso{ \code{\link[dplyr]{filter}} } \author{ Patrick Rockenschaub } \keyword{manip} mice/man/mice.impute.rf.Rd0000644000176200001440000001010114436640740015057 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.rf.R \name{mice.impute.rf} \alias{mice.impute.rf} \title{Imputation by random forests} \usage{ mice.impute.rf( y, ry, x, wy = NULL, ntree = 10, rfPackage = c("ranger", "randomForest"), ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{ntree}{The number of trees to grow. The default is 10.} \item{rfPackage}{A single string specifying the backend for estimating the random forest. The default backend is the \code{ranger} package. The only alternative currently implemented is the \code{randomForest} package, which used to be the default in mice 3.13.10 and earlier.} \item{\dots}{Other named arguments passed down to \code{mice:::install.on.demand()}, \code{randomForest::randomForest()} and \code{randomForest:::randomForest.default()}.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using random forests. } \details{ Imputation of \code{y} by random forests. The method calls \code{randomForrest()} which implements Breiman's random forest algorithm (based on Breiman and Cutler's original Fortran code) for classification and regression. See Appendix A.1 of Doove et al. (2014) for the definition of the algorithm used. } \note{ An alternative implementation was independently developed by Shah et al (2014). This were available as functions \code{CALIBERrfimpute::mice.impute.rfcat} and \code{CALIBERrfimpute::mice.impute.rfcont} (now archived). Simulations by Shah (Feb 13, 2014) suggested that the quality of the imputation for 10 and 100 trees was identical, so mice 2.22 changed the default number of trees from \code{ntree = 100} to \code{ntree = 10}. } \examples{ \dontrun{ imp <- mice(nhanes2, meth = "rf", ntree = 3) plot(imp) } } \references{ Doove, L.L., van Buuren, S., Dusseldorp, E. (2014), Recursive partitioning for missing data imputation in the presence of interaction Effects. Computational Statistics & Data Analysis, 72, 92-104. Shah, A.D., Bartlett, J.W., Carpenter, J., Nicholas, O., Hemingway, H. (2014), Comparison of random forest and parametric imputation models for imputing missing data using MICE: A CALIBER study. American Journal of Epidemiology, \doi{10.1093/aje/kwt312}. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-cart.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice}}, \code{\link{mice.impute.cart}}, \code{\link[randomForest]{randomForest}} \code{\link[ranger]{ranger}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.ri}()} } \author{ Lisa Doove, Stef van Buuren, Elise Dusseldorp, 2012; Patrick Rockenschaub, 2021 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.logreg.boot.Rd0000644000176200001440000000516414330031647016677 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.logreg.R \name{mice.impute.logreg.boot} \alias{mice.impute.logreg.boot} \title{Imputation by logistic regression using the bootstrap} \usage{ mice.impute.logreg.boot(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using logistic regression by a bootstrapped logistic regression model. The bootstrap method draws a simple bootstrap sample with replacement from the observed data \code{y[ry]} and \code{x[ry, ]}. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-categorical.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice}}, \code{\link{glm}}, \code{\link{glm.fit}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000, 2011 } \concept{univariate imputation functions} \keyword{datagen} mice/man/D3.Rd0000644000176200001440000000513714436133175012514 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/D3.R \name{D3} \alias{D3} \title{Compare two nested models using D3-statistic} \usage{ D3(fit1, fit0 = NULL, dfcom = NULL, df.com = NULL) } \arguments{ \item{fit1}{An object of class \code{mira}, produced by \code{with()}.} \item{fit0}{An object of class \code{mira}, produced by \code{with()}. The model in \code{fit0} is a nested within \code{fit1}. The default null model \code{fit0 = NULL} compares \code{fit1} to the intercept-only model.} \item{dfcom}{A single number denoting the complete-data degrees of freedom of model \code{fit1}. If not specified, it is set equal to \code{df.residual} of model \code{fit1}. If that cannot be done, the procedure assumes (perhaps incorrectly) a large sample.} \item{df.com}{Deprecated} } \value{ An object of class \code{mice.anova} } \description{ The D3-statistic is a likelihood-ratio test statistic. } \details{ The \code{D3()} function implement the LR-method by Meng and Rubin (1992). The implementation of the method relies on the \code{broom} package, the standard \code{update} mechanism for statistical models in \code{R} and the \code{offset} function. The function calculates \code{m} repetitions of the full (or null) models, calculates the mean of the estimates of the (fixed) parameter coefficients \eqn{\beta}. For each imputed imputed dataset, it calculates the likelihood for the model with the parameters constrained to \eqn{\beta}. The \code{mitml::testModels()} function offers similar functionality for a subset of statistical models. Results of \code{mice::D3()} and \code{mitml::testModels()} differ in multilevel models because the \code{testModels()} also constrains the variance components parameters. For more details on } \examples{ # Compare two linear models: imp <- mice(nhanes2, seed = 51009, print = FALSE) mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) D3(mi1, mi0) \dontrun{ # Compare two logistic regression models imp <- mice(boys, maxit = 2, print = FALSE) fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) D3(fit1, fit0) } } \references{ Meng, X. L., and D. B. Rubin. 1992. Performing Likelihood Ratio Tests with Multiply-Imputed Data Sets. \emph{Biometrika}, 79 (1): 103–11. \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:likelihoodratio} \url{http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#setting-residual-variances-to-a-fixed-value-zero-or-other} } \seealso{ \code{\link{fix.coef}} } mice/man/ibind.Rd0000644000176200001440000000214314433400023013307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ibind.R \name{ibind} \alias{ibind} \title{Enlarge number of imputations by combining \code{mids} objects} \usage{ ibind(x, y) } \arguments{ \item{x}{A \code{mids} object.} \item{y}{A \code{mids} object.} } \value{ An S3 object of class \code{mids} } \description{ This function combines two \code{mids} objects \code{x} and \code{y} into a single \code{mids} object, with the objective of increasing the number of imputed data sets. If the number of imputations in \code{x} and \code{y} are \code{m(x)} and \code{m(y)}, then the combined object will have \code{m(x)+m(y)} imputations. } \details{ The two \code{mids} objects are required to have the same underlying multiple imputation model and should be fitted on the same data. } \examples{ data(nhanes) imp1 <- mice(nhanes, m = 1, maxit = 2, print = FALSE) imp1$m imp2 <- mice(nhanes, m = 3, maxit = 3, print = FALSE) imp2$m imp12 <- ibind(imp1, imp2) imp12$m plot(imp12) } \seealso{ \code{\link[=mids-class]{mids}} } \author{ Karin Groothuis-Oudshoorn, Stef van Buuren } \keyword{manip} mice/man/mice.impute.mnar.Rd0000644000176200001440000002023214330031647015404 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.mnar.logreg.R, % R/mice.impute.mnar.norm.R \name{mice.impute.mnar.logreg} \alias{mice.impute.mnar.logreg} \alias{mice.impute.mnar.norm} \alias{mnar.norm} \alias{mnar.logreg} \title{Imputation under MNAR mechanism by NARFCS} \usage{ mice.impute.mnar.logreg(y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...) mice.impute.mnar.norm(y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{ums}{A string containing the specification of the unidentifiable part of the imputation model (the *unidentifiable model specification"), that is, the desired \eqn{\delta}-adjustment (offset) as a function of other variables and values for the corresponding deltas (sensitivity parameters). See details.} \item{umx}{An auxiliary data matrix containing variables that do not appear in the identifiable part of the imputation procedure but that have been specified via \code{ums} as being predictors in the unidentifiable part of the imputation model. See details.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate data under a user-specified MNAR mechanism by linear or logistic regression and NARFCS. Sensitivity analysis under different model specifications may shed light on the impact of different MNAR assumptions on the conclusions. } \details{ This function imputes data that are thought to be Missing Not at Random (MNAR) by the NARFCS method. The NARFCS procedure (Tompsett et al, 2018) generalises the so-called \eqn{\delta}-adjustment sensitivity analysis method of Van Buuren, Boshuizen & Knook (1999) to the case with multiple incomplete variables within the FCS framework. In practical terms, the NARFCS procedure shifts the imputations drawn at each iteration of \code{mice} by a user-specified quantity that can vary across subjects, to reflect systematic departures of the missing data from the data distribution imputed under MAR. Specification of the NARFCS model is done by the \code{blots} argument of \code{mice()}. The \code{blots} parameter is a named list. For each variable to be imputed by \code{mice.impute.mnar.norm()} or \code{mice.impute.mnar.logreg()} the corresponding element in \code{blots} is a list with at least one argument \code{ums} and, optionally, a second argument \code{umx}. For example, the high-level call might like something like \code{mice(nhanes[, c(2, 4)], method = c("pmm", "mnar.norm"), blots = list(chl = list(ums = "-3+2*bmi")))}. The \code{ums} parameter is required, and might look like this: \code{"-4+1*Y"}. The \code{ums} specifcation must have the following characteristics: \enumerate{ \item{A single term corresponding to the intercept (constant) term, not multiplied by any variable name, must be included in the expression;} \item{Each term in the expression (corresponding to the intercept or a predictor variable) must be separated by either a \code{"+"} or \code{"-"} sign, depending on the sign of the sensitivity parameter;} \item{Within each non-intercept term, the sensitivity parameter value comes first and the predictor variable comes second, and these must be separated by a \code{"*"} sign;} \item{For categorical predictors, for example a variable \code{Z} with K + 1 categories \code{("Cat0","Cat1", ...,"CatK")}, K category-specific terms are needed, and those not in \code{umx} (see below) must be specified by concatenating the variable name with the name of the category (e.g. \code{ZCat1}) as this is how they are named in the design matrix (argument \code{x}) passed to the univariate imputation function. An example is \code{"2+1*ZCat1-3*ZCat2"}.} } If given, the \code{umx} specification must have the following characteristics: \enumerate{ \item{It contains only complete variables, with no missing values;} \item{It is a numeric matrix. In particular, categorical variables must be represented as dummy indicators with names corresponding to what is used in \code{ums} to refer to the category-specific terms (see above);} \item{It has the same number of rows as the \code{data} argument passed on to the main \code{mice} function;} \item{It does not contain variables that were already predictors in the identifiable part of the model for the variable under imputation.} } Limitation: The present implementation can only condition on variables that appear in the identifiable part of the imputation model (\code{x}) or in complete auxiliary variables passed on via the \code{umx} argument. It is not possible to specify models where the offset depends on incomplete auxiliary variables. For an MNAR alternative see also \code{\link{mice.impute.ri}}. } \examples{ # 1: Example with no auxiliary data: only pass unidentifiable model specification (ums) # Specify argument to pass on to mnar imputation functions via "blots" argument mnar.blot <- list(X = list(ums = "-4"), Y = list(ums = "2+1*ZCat1-3*ZCat2")) # Run NARFCS by using mnar imputation methods and passing argument via blots impNARFCS <- mice(mnar_demo_data, method = c("mnar.logreg", "mnar.norm", ""), blots = mnar.blot, seed = 234235, print = FALSE ) # Obtain MI results: Note they coincide with those from old version at # https://github.com/moreno-betancur/NARFCS pool(with(impNARFCS, lm(Y ~ X + Z)))$pooled$estimate # 2: Example passing also auxiliary data to MNAR procedure (umx) # Assumptions: # - Auxiliary data are complete, no missing values # - Auxiliary data are a numeric matrix # - Auxiliary data have same number of rows as x # - Auxiliary data have no overlapping variable names with x # Specify argument to pass on to mnar imputation functions via "blots" argument aux <- matrix(0:1, nrow = nrow(mnar_demo_data)) dimnames(aux) <- list(NULL, "even") mnar.blot <- list( X = list(ums = "-4"), Y = list(ums = "2+1*ZCat1-3*ZCat2+0.5*even", umx = aux) ) # Run NARFCS by using mnar imputation methods and passing argument via blots impNARFCS <- mice(mnar_demo_data, method = c("mnar.logreg", "mnar.norm", ""), blots = mnar.blot, seed = 234235, print = FALSE ) # Obtain MI results: As expected they differ (slightly) from those # from old version at https://github.com/moreno-betancur/NARFCS pool(with(impNARFCS, lm(Y ~ X + Z)))$pooled$estimate } \references{ Tompsett, D. M., Leacy, F., Moreno-Betancur, M., Heron, J., & White, I. R. (2018). On the use of the not-at-random fully conditional specification (NARFCS) procedure in practice. \emph{Statistics in Medicine}, \bold{37}(15), 2338-2353. \doi{10.1002/sim.7643}. Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of missing blood pressure covariates in survival analysis. \emph{Statistics in Medicine}, \bold{18}, 681--694. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Margarita Moreno-Betancur, Stef van Buuren, Ian R. White, 2020. } \concept{univariate imputation functions} \keyword{datagen} mice/man/ampute.default.freq.Rd0000644000176200001440000000157414330031606016106 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.freq} \alias{ampute.default.freq} \title{Default \code{freq} in \code{ampute}} \usage{ ampute.default.freq(patterns) } \arguments{ \item{patterns}{A matrix of size #patterns by #variables where \code{0} indicates a variable should have missing values and \code{1} indicates a variable should remain complete. Could be the result of \code{\link{ampute.default.patterns}}.} } \value{ A vector of length #patterns containing the relative frequencies with which the patterns should occur. An equal probability is given to each pattern. } \description{ Defines the default relative frequency vector for the multivariate amputation function \code{ampute}. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.patterns}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/nhanes2.Rd0000644000176200001440000000173114330031606013565 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nhanes2.R \docType{data} \name{nhanes2} \alias{nhanes2} \title{NHANES example - mixed numerical and discrete variables} \format{ A data frame with 25 observations on the following 4 variables. \describe{ \item{age}{Age group (1=20-39, 2=40-59, 3=60+)} \item{bmi}{Body mass index (kg/m**2)} \item{hyp}{Hypertensive (1=no,2=yes)} \item{chl}{Total serum cholesterol (mg/dL)} } } \source{ Schafer, J.L. (1997). \emph{Analysis of Incomplete Multivariate Data.} London: Chapman & Hall. Table 6.14. } \description{ A small data set with non-monotone missing values. } \details{ A small data set with missing data and mixed numerical and discrete variables. The data set \code{nhanes} is the same data set, but with all data treated as numerical. } \examples{ # create 5 imputed data sets imp <- mice(nhanes2) # print the first imputed data set complete(imp) } \seealso{ \code{\link{nhanes}} } \keyword{datasets} mice/man/supports.transparent.Rd0000644000176200001440000000134414330031606016466 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/supports.transparent.R \name{supports.transparent} \alias{supports.transparent} \alias{transparent} \title{Supports semi-transparent foreground colors?} \usage{ supports.transparent() } \value{ \code{TRUE} or \code{FALSE} } \description{ This function is used by \code{mdc()} to find out whether the current device supports semi-transparent foreground colors. } \details{ The function calls the function \code{dev.capabilities()} from the package \code{grDevices}. The function return \code{FALSE} if the status of the current device is unknown. } \examples{ supports.transparent() } \seealso{ \code{\link{mdc}} \code{\link{dev.capabilities}} } \keyword{hplot} mice/man/summary.Rd0000644000176200001440000000241614330031606013725 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summary.R \name{summary.mira} \alias{summary.mira} \alias{summary.mids} \alias{summary.mads} \alias{summary.mice.anova} \title{Summary of a \code{mira} object} \usage{ \method{summary}{mira}(object, type = c("tidy", "glance", "summary"), ...) \method{summary}{mids}(object, ...) \method{summary}{mads}(object, ...) \method{summary}{mice.anova}(object, ...) } \arguments{ \item{object}{A \code{mira} object} \item{type}{A length-1 character vector indicating the type of summary. There are three choices: \code{type = "tidy"} return the parameters estimates of each analyses as a data frame. \code{type = "glance"} return the fit statistics of each analysis as a data frame. \code{type = "summary"} returns a list of length \code{m} with the analysis results. The default is \code{"tidy"}.} \item{...}{Other parameters passed down to \code{print()} and \code{summary()}} } \value{ \code{NULL} \code{NULL} \code{NULL} \code{NULL} } \description{ Summary of a \code{mira} object Summary of a \code{mids} object Summary of a \code{mads} object Print a \code{mice.anova} object } \seealso{ \code{\link[=mira-class]{mira}} \code{\link[=mids-class]{mids}} \code{\link[=mads-class]{mads}} \code{\link{mipo}} } mice/man/pattern.Rd0000644000176200001440000000327114436640317013721 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pattern1.R \docType{data} \name{pattern} \alias{pattern} \alias{pattern1} \alias{pattern2} \alias{pattern3} \alias{pattern4} \title{Datasets with various missing data patterns} \format{ \describe{ \item{list("pattern1")}{Data with a univariate missing data pattern} \item{list("pattern2")}{Data with a monotone missing data pattern} \item{list("pattern3")}{Data with a file matching missing data pattern} \item{list("pattern4")}{Data with a general missing data pattern} } Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Four simple datasets with various missing data patterns } \details{ Van Buuren (2012) uses these four artificial datasets to illustrate various missing data patterns. } \examples{ pattern4 data <- rbind(pattern1, pattern2, pattern3, pattern4) mdpat <- cbind(expand.grid(rec = 8:1, pat = 1:4, var = 1:3), r = as.numeric(as.vector(is.na(data)))) types <- c("Univariate", "Monotone", "File matching", "General") tp41 <- lattice::levelplot(r ~ var + rec | as.factor(pat), data = mdpat, as.table = TRUE, aspect = "iso", shrink = c(0.9), col.regions = mdc(1:2), colorkey = FALSE, scales = list(draw = FALSE), xlab = "", ylab = "", between = list(x = 1, y = 0), strip = lattice::strip.custom( bg = "grey95", style = 1, factor.levels = types ) ) print(tp41) md.pattern(pattern4) p <- md.pairs(pattern4) p ### proportion of usable cases p$mr / (p$mr + p$mm) ### outbound statistics p$rm / (p$rm + p$rr) fluxplot(pattern2) } \keyword{datasets} mice/man/cbind.Rd0000644000176200001440000001743114433400023013307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/generics.R \name{cbind} \alias{cbind} \alias{rbind} \title{Combine R objects by rows and columns} \usage{ cbind(...) rbind(...) } \arguments{ \item{...}{ Arguments passed on to \code{\link[base:cbind]{base::cbind}} \describe{ \item{\code{deparse.level}}{integer controlling the construction of labels in the case of non-matrix-like arguments (for the default method):\cr \code{deparse.level = 0} constructs no labels;\cr the default \code{deparse.level = 1} typically and \code{deparse.level = 2} always construct labels from the argument names, see the \sQuote{Value} section below.} }} } \value{ An S3 object of class \code{mids} } \description{ Functions \code{cbind()} and \code{rbind()} are defined in the \code{mice} package in order to enable dispatch to \code{cbind.mids()} and \code{rbind.mids()} when one of the arguments is a \code{data.frame}. } \details{ The standard \code{base::cbind()} and \code{base::rbind()} always dispatch to \code{base::cbind.data.frame()} or \code{base::rbind.data.frame()} if one of the arguments is a \code{data.frame}. The versions defined in the \code{mice} package intercept the user command and test whether the first argument has class \code{"mids"}. If so, function calls \code{cbind.mids()}, respectively \code{rbind.mids()}. In all other cases, the call is forwarded to standard functions in the \code{base} package. The \code{cbind.mids()} function combines two \code{mids} objects columnwise into a single object of class \code{mids}, or combines a single \code{mids} object with a \code{vector}, \code{matrix}, \code{factor} or \code{data.frame} columnwise into a \code{mids} object. If both arguments of \code{cbind.mids()} are \code{mids}-objects, the \code{data} list components should have the same number of rows. Also, the number of imputations (\code{m}) should be identical. If the second argument is a \code{matrix}, \code{factor} or \code{vector}, it is transformed into a \code{data.frame}. The number of rows should match with the \code{data} component of the first argument. The \code{cbind.mids()} function renames any duplicated variable or block names by appending \code{".1"}, \code{".2"} to duplicated names. The \code{rbind.mids()} function combines two \code{mids} objects rowwise into a single \code{mids} object, or combines a \code{mids} object with a vector, matrix, factor or data frame rowwise into a \code{mids} object. If both arguments of \code{rbind.mids()} are \code{mids} objects, then \code{rbind.mids()} requires that both have the same number of multiple imputations. In addition, their \code{data} components should match. If the second argument of \code{rbind.mids()} is not a \code{mids} object, the columns of the arguments should match. The \code{where} matrix for the second argument is set to \code{FALSE}, signalling that any missing values in that argument were not imputed. The \code{ignore} vector for the second argument is set to \code{FALSE}. Rows inherited from the second argument will therefore influence the parameter estimation of the imputation model in any future iterations. } \note{ The \code{cbind.mids()} function constructs the elements of the new \code{mids} object as follows: \tabular{ll}{ \code{data} \tab Columnwise combination of the data in \code{x} and \code{y}\cr \code{imp} \tab Combines the imputed values from \code{x} and \code{y}\cr \code{m} \tab Taken from \code{x$m}\cr \code{where} \tab Columnwise combination of \code{x$where} and \code{y$where}\cr \code{blocks} \tab Combines \code{x$blocks} and \code{y$blocks}\cr \code{call} \tab Vector, \code{call[1]} creates \code{x}, \code{call[2]} is call to \code{cbind.mids()}\cr \code{nmis} \tab Equals \code{c(x$nmis, y$nmis)}\cr \code{method} \tab Combines \code{x$method} and \code{y$method}\cr \code{predictorMatrix} \tab Combination with zeroes on the off-diagonal blocks\cr \code{visitSequence} \tab Combined as \code{c(x$visitSequence, y$visitSequence)}\cr \code{formulas} \tab Combined as \code{c(x$formulas, y$formulas)}\cr \code{post} \tab Combined as \code{c(x$post, y$post)}\cr \code{blots} \tab Combined as \code{c(x$blots, y$blots)}\cr \code{ignore} \tab Taken from \code{x$ignore}\cr \code{seed} \tab Taken from \code{x$seed}\cr \code{iteration} \tab Taken from \code{x$iteration}\cr \code{lastSeedValue} \tab Taken from \code{x$lastSeedValue}\cr \code{chainMean} \tab Combined from \code{x$chainMean} and \code{y$chainMean}\cr \code{chainVar} \tab Combined from \code{x$chainVar} and \code{y$chainVar}\cr \code{loggedEvents} \tab Taken from \code{x$loggedEvents}\cr \code{version} \tab Current package version\cr \code{date} \tab Current date\cr } The \code{rbind.mids()} function constructs the elements of the new \code{mids} object as follows: \tabular{ll}{ \code{data} \tab Rowwise combination of the (incomplete) data in \code{x} and \code{y}\cr \code{imp} \tab Equals \code{rbind(x$imp[[j]], y$imp[[j]])} if \code{y} is \code{mids} object; otherwise the data of \code{y} will be copied\cr \code{m} \tab Equals \code{x$m}\cr \code{where} \tab Rowwise combination of \code{where} arguments\cr \code{blocks} \tab Equals \code{x$blocks}\cr \code{call} \tab Vector, \code{call[1]} creates \code{x}, \code{call[2]} is call to \code{rbind.mids}\cr \code{nmis} \tab \code{x$nmis} + \code{y$nmis}\cr \code{method} \tab Taken from \code{x$method}\cr \code{predictorMatrix} \tab Taken from \code{x$predictorMatrix}\cr \code{visitSequence} \tab Taken from \code{x$visitSequence}\cr \code{formulas} \tab Taken from \code{x$formulas}\cr \code{post} \tab Taken from \code{x$post}\cr \code{blots} \tab Taken from \code{x$blots}\cr \code{ignore} \tab Concatenate \code{x$ignore} and \code{y$ignore}\cr \code{seed} \tab Taken from \code{x$seed}\cr \code{iteration} \tab Taken from \code{x$iteration}\cr \code{lastSeedValue} \tab Taken from \code{x$lastSeedValue}\cr \code{chainMean} \tab Set to \code{NA}\cr \code{chainVar} \tab Set to \code{NA}\cr \code{loggedEvents} \tab Taken from \code{x$loggedEvents}\cr \code{version} \tab Taken from \code{x$version}\cr \code{date} \tab Taken from \code{x$date} } } \examples{ # --- cbind --- # impute four variables at once (default) imp <- mice(nhanes, m = 1, maxit = 1, print = FALSE) imp$predictorMatrix # impute two by two data1 <- nhanes[, c("age", "bmi")] data2 <- nhanes[, c("hyp", "chl")] imp1 <- mice(data1, m = 2, maxit = 1, print = FALSE) imp2 <- mice(data2, m = 2, maxit = 1, print = FALSE) # Append two solutions imp12 <- cbind(imp1, imp2) # This is a different imputation model imp12$predictorMatrix # Append the other way around imp21 <- cbind(imp2, imp1) imp21$predictorMatrix # Append 'forgotten' variable chl data3 <- nhanes[, 1:3] imp3 <- mice(data3, maxit = 1, m = 2, print = FALSE) imp4 <- cbind(imp3, chl = nhanes$chl) # Of course, chl was not imputed head(complete(imp4)) # Combine mids object with data frame imp5 <- cbind(imp3, nhanes2) head(complete(imp5)) # --- rbind --- imp1 <- mice(nhanes[1:13, ], m = 2, maxit = 1, print = FALSE) imp5 <- mice(nhanes[1:13, ], m = 2, maxit = 2, print = FALSE) mylist <- list(age = NA, bmi = NA, hyp = NA, chl = NA) nrow(complete(rbind(imp1, imp5))) nrow(complete(rbind(imp1, mylist))) nrow(complete(rbind(imp1, data.frame(mylist)))) nrow(complete(rbind(imp1, complete(imp5)))) } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link[base:cbind]{cbind}}, \code{\link{ibind}}, \code{\link[=mids-class]{mids}} } \author{ Karin Groothuis-Oudshoorn, Stef van Buuren } \keyword{manip} mice/man/make.formulas.Rd0000644000176200001440000000225614330031606014776 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{make.formulas} \alias{make.formulas} \title{Creates a \code{formulas} argument} \usage{ make.formulas(data, blocks = make.blocks(data), predictorMatrix = NULL) } \arguments{ \item{data}{A \code{data.frame} with the source data} \item{blocks}{An optional specification for blocks of variables in the rows. The default assigns each variable in its own block.} \item{predictorMatrix}{A \code{predictorMatrix} specified by the user.} } \value{ A list of formula's. } \description{ This helper function creates a valid \code{formulas} object. The \code{formulas} object is an argument to the \code{mice} function. It is a list of formula's that specifies the target variables and the predictors by means of the standard \code{~} operator. } \examples{ f1 <- make.formulas(nhanes) f1 f2 <- make.formulas(nhanes, blocks = make.blocks(nhanes, "collect")) f2 # for editing, it may be easier to work with the character vector c1 <- as.character(f1) c1 # fold it back into a formula list f3 <- name.formulas(lapply(c1, as.formula)) f3 } \seealso{ \code{\link{make.blocks}}, \code{\link{make.predictorMatrix}} } mice/man/extend.formula.Rd0000644000176200001440000000147714330031606015171 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{extend.formula} \alias{extend.formula} \title{Extends a formula with predictors} \usage{ extend.formula( formula = ~0, predictors = NULL, auxiliary = TRUE, include.intercept = FALSE, ... ) } \arguments{ \item{formula}{A formula. If it is not a formula, the formula is internally reset to \code{~0}.} \item{predictors}{A character vector of variable names.} \item{auxiliary}{A logical that indicates whether the variables listed in \code{predictors} should be added to the formula as main effects. The default is \code{TRUE}.} \item{include.intercept}{A logical that indicated whether the intercept should be included in the result.} } \value{ A formula } \description{ Extends a formula with predictors } \keyword{internal} mice/man/name.formulas.Rd0000644000176200001440000000462614330031606015004 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{name.formulas} \alias{name.formulas} \title{Name formula list elements} \usage{ name.formulas(formulas, prefix = "F") } \arguments{ \item{formulas}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names. The \code{formulas} argument is an alternative to the \code{predictorMatrix} argument that allows for more flexibility in specifying imputation models, e.g., for specifying interaction terms.} \item{prefix}{A character vector of length 1 with the prefix to be using for naming any unnamed blocks with two or more variables.} } \value{ Named list of formulas } \description{ This helper function names any unnamed elements in the \code{formula} list. This is a convenience function. } \details{ This function will name any unnamed list elements specified in the optional argument \code{formula}. Unnamed formula's consisting with just one response variable will be named after this variable. Unnamed formula's containing more than one variable will be named by the \code{prefix} argument, padded by an integer sequence stating at 1. } \examples{ # fully conditionally specified main effects model form1 <- list( bmi ~ age + chl + hyp, hyp ~ age + bmi + chl, chl ~ age + bmi + hyp ) form1 <- name.formulas(form1) imp1 <- mice(nhanes, formulas = form1, print = FALSE, m = 1, seed = 12199) # same model using dot notation form2 <- list(bmi ~ ., hyp ~ ., chl ~ .) form2 <- name.formulas(form2) imp2 <- mice(nhanes, formulas = form2, print = FALSE, m = 1, seed = 12199) identical(complete(imp1), complete(imp2)) # same model using repeated multivariate imputation form3 <- name.blocks(list(all = bmi + hyp + chl ~ .)) imp3 <- mice(nhanes, formulas = form3, print = FALSE, m = 1, seed = 12199) cmp3 <- complete(imp3) identical(complete(imp1), complete(imp3)) # same model using predictorMatrix imp4 <- mice(nhanes, print = FALSE, m = 1, seed = 12199, auxiliary = TRUE) identical(complete(imp1), complete(imp4)) # different model: multivariate imputation for chl and bmi form5 <- list(chl + bmi ~ ., hyp ~ bmi + age) form5 <- name.formulas(form5) imp5 <- mice(nhanes, formulas = form5, print = FALSE, m = 1, seed = 71712) } \seealso{ \code{\link{mice}} } mice/man/fico.Rd0000644000176200001440000000205414330031606013146 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/flux.R \name{fico} \alias{fico} \title{Fraction of incomplete cases among cases with observed} \usage{ fico(data) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as NA's.} } \value{ A vector of length \code{ncol(data)} of FICO statistics. } \description{ FICO is an outbound statistic defined by the fraction of incomplete cases among cases with \code{Yj} observed (White and Carlin, 2010). } \references{ Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation compared with complete-case analysis for missing covariate values. \emph{Statistics in Medicine}, \emph{29}, 2920-2931. } \seealso{ \code{\link{fluxplot}}, \code{\link{flux}}, \code{\link{md.pattern}} } \author{ Stef van Buuren, 2012 } \keyword{misc} mice/man/lm.mids.Rd0000644000176200001440000000274714330031647013607 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lm.R \name{lm.mids} \alias{lm.mids} \title{Linear regression for \code{mids} object} \usage{ lm.mids(formula, data, ...) } \arguments{ \item{formula}{a formula object, with the response on the left of a ~ operator, and the terms, separated by + operators, on the right. See the documentation of \code{\link{lm}} and \code{\link{formula}} for details.} \item{data}{An object of type 'mids', which stands for 'multiply imputed data set', typically created by a call to function \code{mice()}.} \item{\dots}{Additional parameters passed to \code{\link{lm}}} } \value{ An objects of class \code{mira}, which stands for 'multiply imputed repeated analysis'. This object contains \code{data$m} distinct \code{lm.objects}, plus some descriptive information. } \description{ Applies \code{lm()} to multiply imputed data set } \details{ This function is included for backward compatibility with V1.0. The function is superseded by \code{\link{with.mids}}. } \examples{ imp <- mice(nhanes) fit <- lm.mids(bmi ~ hyp + chl, data = imp) fit } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{lm}}, \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{multivariate} mice/man/mice.impute.lasso.logreg.Rd0000644000176200001440000000643714330031647017061 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.lasso.logreg.R \name{mice.impute.lasso.logreg} \alias{mice.impute.lasso.logreg} \alias{lasso.logreg} \title{Imputation by direct use of lasso logistic regression} \usage{ mice.impute.lasso.logreg(y, ry, x, wy = NULL, nfolds = 10, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{nfolds}{The number of folds for the cross-validation of the lasso penalty. The default is 10.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing binary data using lasso logistic regression with bootstrap. } \details{ The method consists of the following steps: \enumerate{ \item For a given y variable under imputation, draw a bootstrap version y* with replacement from the observed cases \code{y[ry]}, and stores in x* the corresponding values from \code{x[ry, ]}. \item Fit a regularised (lasso) logistic regression with y* as the outcome, and x* as predictors. A vector of regression coefficients bhat is obtained. All of these coefficients are considered random draws from the imputation model parameters posterior distribution. Same of these coefficients will be shrunken to 0. \item Compute predicted scores for m.d., i.e. logit-1(X bhat) \item Compare the score to a random (0,1) deviate, and impute. } The method is based on the Direct Use of Regularized Regression (DURR) proposed by Zhao & Long (2016) and Deng et al (2016). } \references{ Deng, Y., Chang, C., Ido, M. S., & Long, Q. (2016). Multiple imputation for general missing data patterns in the presence of high-dimensional data. Scientific reports, 6(1), 1-10. Zhao, Y., & Long, Q. (2016). Multiple imputation in the presence of high-dimensional data. Statistical Methods in Medical Research, 25(5), 2021-2035. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Edoardo Costantini, 2021 } \concept{univariate imputation functions} \keyword{datagen} mice/man/with.mids.Rd0000644000176200001440000000330214330031647014136 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/with.R \name{with.mids} \alias{with.mids} \title{Evaluate an expression in multiple imputed datasets} \usage{ \method{with}{mids}(data, expr, ...) } \arguments{ \item{data}{An object of type \code{mids}, which stands for 'multiply imputed data set', typically created by a call to function \code{mice()}.} \item{expr}{An expression to evaluate for each imputed data set. Formula's containing a dot (notation for "all other variables") do not work.} \item{\dots}{Not used} } \value{ An object of S3 class \code{\link[=mira-class]{mira}} } \description{ Performs a computation of each of imputed datasets in data. } \note{ Version 3.11.10 changed to tidy evaluation on a quosure. This change should not affect any code that worked on previous versions. It turned out that the latter statement was not true (#292). Version 3.12.2 reverts to the old \code{with()} function. } \examples{ imp <- mice(nhanes2, m = 2, print = FALSE, seed = 14221) # descriptive statistics getfit(with(imp, table(hyp, age))) # model fitting and testing fit1 <- with(imp, lm(bmi ~ age + hyp + chl)) fit2 <- with(imp, glm(hyp ~ age + chl, family = binomial)) fit3 <- with(imp, anova(lm(bmi ~ age + chl))) } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}}, \code{\link{pool}}, \code{\link{D1}}, \code{\link{D3}}, \code{\link{pool.r.squared}} } \author{ Karin Oudshoorn, Stef van Buuren 2009, 2012, 2020 } \keyword{multivariate} mice/man/mice.impute.lasso.select.logreg.Rd0000644000176200001440000000744214330031647020334 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.lasso.select.logreg.R \name{mice.impute.lasso.select.logreg} \alias{mice.impute.lasso.select.logreg} \alias{lasso.select.logreg} \title{Imputation by indirect use of lasso logistic regression} \usage{ mice.impute.lasso.select.logreg(y, ry, x, wy = NULL, nfolds = 10, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{nfolds}{The number of folds for the cross-validation of the lasso penalty. The default is 10.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using logistic regression following a preprocessing lasso variable selection step. } \details{ The method consists of the following steps: \enumerate{ \item For a given \code{y} variable under imputation, fit a linear regression with lasso penalty using \code{y[ry]} as dependent variable and \code{x[ry, ]} as predictors. The coefficients that are not shrunk to 0 define the active set of predictors that will be used for imputation. \item Fit a logit with the active set of predictors, and find (bhat, V(bhat)) \item Draw BETA from N(bhat, V(bhat)) \item Compute predicted scores for m.d., i.e. logit-1(X BETA) \item Compare the score to a random (0,1) deviate, and impute. } The user can specify a \code{predictorMatrix} in the \code{mice} call to define which predictors are provided to this univariate imputation method. The lasso regularization will select, among the variables indicated by the user, the ones that are important for imputation at any given iteration. Therefore, users may force the exclusion of a predictor from a given imputation model by speficing a \code{0} entry. However, a non-zero entry does not guarantee the variable will be used, as this decision is ultimately made by the lasso variable selection procedure. The method is based on the Indirect Use of Regularized Regression (IURR) proposed by Zhao & Long (2016) and Deng et al (2016). } \references{ Deng, Y., Chang, C., Ido, M. S., & Long, Q. (2016). Multiple imputation for general missing data patterns in the presence of high-dimensional data. Scientific reports, 6(1), 1-10. Zhao, Y., & Long, Q. (2016). Multiple imputation in the presence of high-dimensional data. Statistical Methods in Medical Research, 25(5), 2021-2035. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Edoardo Costantini, 2021 } \concept{univariate imputation functions} \keyword{datagen} mice/man/ampute.Rd0000644000176200001440000003002314433400023013513 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.R \name{ampute} \alias{ampute} \title{Generate missing data for simulation purposes} \usage{ ampute( data, prop = 0.5, patterns = NULL, freq = NULL, mech = "MAR", weights = NULL, std = TRUE, cont = TRUE, type = NULL, odds = NULL, bycases = TRUE, run = TRUE ) } \arguments{ \item{data}{A complete data matrix or data frame. Values should be numeric. Categorical variables should have been transformed to dummies.} \item{prop}{A scalar specifying the proportion of missingness. Should be a value between 0 and 1. Default is a missingness proportion of 0.5.} \item{patterns}{A matrix or data frame of size #patterns by #variables where \code{0} indicates that a variable should have missing values and \code{1} indicates that a variable should remain complete. The user may specify as many patterns as desired. One pattern (a vector) is possible as well. Default is a square matrix of size #variables where each pattern has missingness on one variable only (created with \code{\link{ampute.default.patterns}}). After the amputation procedure, \code{\link{md.pattern}} can be used to investigate the missing data patterns in the data.} \item{freq}{A vector of length #patterns containing the relative frequency with which the patterns should occur. For example, for three missing data patterns, the vector could be \code{c(0.4, 0.4, 0.2)}, meaning that of all cases with missing values, 40 percent should have pattern 1, 40 percent pattern 2 and 20 percent pattern 3. The vector should sum to 1. Default is an equal probability for each pattern, created with \code{\link{ampute.default.freq}}.} \item{mech}{A string specifying the missingness mechanism, either "MCAR" (Missing Completely At Random), "MAR" (Missing At Random) or "MNAR" (Missing Not At Random). Default is a MAR missingness mechanism.} \item{weights}{A matrix or data frame of size #patterns by #variables. The matrix contains the weights that will be used to calculate the weighted sum scores. For a MAR mechanism, the weights of the variables that will be made incomplete should be zero. For a MNAR mechanism, these weights could have any possible value. Furthermore, the weights may differ between patterns and between variables. They may be negative as well. Within each pattern, the relative size of the values are of importance. The default weights matrix is made with \code{\link{ampute.default.weights}} and returns a matrix with equal weights for all variables. In case of MAR, variables that will be amputed will be weighted with \code{0}. For MNAR, variables that will be observed will be weighted with \code{0}. If the mechanism is MCAR, the weights matrix will not be used.} \item{std}{Logical. Whether the weighted sum scores should be calculated with standardized data or with non-standardized data. The latter is especially advised when making use of train and test sets in order to prevent leakage.} \item{cont}{Logical. Whether the probabilities should be based on a continuous or a discrete distribution. If TRUE, the probabilities of being missing are based on a continuous logistic distribution function. \code{\link{ampute.continuous}} will be used to calculate and assign the probabilities. These probabilities will then be based on the argument \code{type}. If FALSE, the probabilities of being missing are based on a discrete distribution (\code{\link{ampute.discrete}}) based on the \code{odds} argument. Default is TRUE.} \item{type}{A string or vector of strings containing the type of missingness for each pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or '\code{"RIGHT"}. If a single missingness type is given, all patterns will be created with the same type. If the missingness types should differ between patterns, a vector of missingness types should be given. Default is RIGHT for all patterns and is the result of \code{\link{ampute.default.type}}.} \item{odds}{A matrix where #patterns defines the #rows. Each row should contain the odds of being missing for the corresponding pattern. The number of odds values defines in how many quantiles the sum scores will be divided. The odds values are relative probabilities: a quantile with odds value 4 will have a probability of being missing that is four times higher than a quantile with odds 1. The number of quantiles may differ between the patterns, specify NA for cells remaining empty. Default is 4 quantiles with odds values 1, 2, 3 and 4 and is created by \code{\link{ampute.default.odds}}.} \item{bycases}{Logical. If TRUE, the proportion of missingness is defined in terms of cases. If FALSE, the proportion of missingness is defined in terms of cells. Default is TRUE.} \item{run}{Logical. If TRUE, the amputations are implemented. If FALSE, the return object will contain everything except for the amputed data set.} } \value{ Returns an S3 object of class \code{\link{mads-class}} (multivariate amputed data set) } \description{ This function generates multivariate missing data under a MCAR, MAR or MNAR missing data mechanism. Imputation of data sets containing missing values can be performed with \code{\link{mice}}. } \details{ This function generates missing values in complete data sets. Amputation of complete data sets is useful for the evaluation of imputation techniques, such as multiple imputation (performed with function \code{\link{mice}} in this package). The basic strategy underlying multivariate imputation was suggested by Don Rubin during discussions in the 90's. Brand (1997) created one particular implementation, and his method found its way into the FCS paper (Van Buuren et al, 2006). Until recently, univariate amputation procedures were used to generate missing data in complete, simulated data sets. With this approach, variables are made incomplete one variable at a time. When more than one variable needs to be amputed, the procedure is repeated multiple times. With the univariate approach, it is difficult to relate the missingness on one variable to the missingness on another variable. A multivariate amputation procedure solves this issue and moreover, it does justice to the multivariate nature of data sets. Hence, \code{ampute} is developed to perform multivariate amputation. The idea behind the function is the specification of several missingness patterns. Each pattern is a combination of variables with and without missing values (denoted by \code{0} and \code{1} respectively). For example, one might want to create two missingness patterns on a data set with four variables. The patterns could be something like: \code{0,0,1,1} and \code{1,0,1,0}. Each combination of zeros and ones may occur. Furthermore, the researcher specifies the proportion of missingness, either the proportion of missing cases or the proportion of missing cells, and the relative frequency each pattern occurs. Consequently, the data is split into multiple subsets, one subset per pattern. Now, each case is candidate for a certain missingness pattern, but whether the case will have missing values eventually depends on other specifications. The first of these specifications is the missing mechanism. There are three possible mechanisms: the missingness depends completely on chance (MCAR), the missingness depends on the values of the observed variables (i.e. the variables that remain complete) (MAR) or on the values of the variables that will be made incomplete (MNAR). For a discussion on how missingness mechanisms are related to the observed data, we refer to \doi{10.1177/0049124118799376}{Schouten and Vink, 2018}. When the user specifies the missingness mechanism to be \code{"MCAR"}, the candidates have an equal probability of becoming incomplete. For a \code{"MAR"} or \code{"MNAR"} mechanism, weighted sum scores are calculated. These scores are a linear combination of the variables. In order to calculate the weighted sum scores, the data is standardized. For this reason, the data has to be numeric. Second, for each case, the values in the data set are multiplied with the weights, specified by argument \code{weights}. These weighted scores will be summed, resulting in a weighted sum score for each case. The weights may differ between patterns and they may be negative or zero as well. Naturally, in case of a MAR mechanism, the weights corresponding to the variables that will be made incomplete, have a 0. Note that this may be different for each pattern. In case of MNAR missingness, especially the weights of the variables that will be made incomplete are of importance. However, the other variables may be weighted as well. It is the relative difference between the weights that will result in an effect in the sum scores. For example, for the first missing data pattern mentioned above, the weights for the third and fourth variables could be set to 2 and 4. However, weight values of 0.2 and 0.4 will have the exact same effect on the weighted sum score: the fourth variable is weighted twice as much as variable 3. Based on the weighted sum scores, either a discrete or continuous distribution of probabilities is used to calculate whether a candidate will have missing values. For a discrete distribution of probabilities, the weighted sum scores are divided into subgroups of equal size (quantiles). Thereafter, the user specifies for each subgroup the odds of being missing. Both the number of subgroups and the odds values are important for the generation of missing data. For example, for a RIGHT-like mechanism, scoring in one of the higher quantiles should have high missingness odds, whereas for a MID-like mechanism, the central groups should have higher odds. Again, not the size of the odds values are of importance, but the relative distance between the values. The continuous distributions of probabilities are based on the logistic distribution function. The user can specify the type of missingness, which, again, may differ between patterns. For an example and more explanation about how the arguments interact with each other, we refer to the vignette \href{https://rianneschouten.github.io/mice_ampute/vignette/ampute.html}{Generate missing values with ampute} The amputation methodology is published in \doi{10.1080/00949655.2018.1491577}{Schouten, Lugtig and Vink, 2018}. } \examples{ # start with a complete data set compl_boys <- cc(boys)[1:3] # Perform amputation with default settings mads_boys <- ampute(data = compl_boys) mads_boys$amp # Change default matrices as desired my_patterns <- mads_boys$patterns my_patterns[1:3, 2] <- 0 my_weights <- mads_boys$weights my_weights[2, 1] <- 2 my_weights[3, 1] <- 0.5 # Rerun amputation my_mads_boys <- ampute( data = compl_boys, patterns = my_patterns, freq = c(0.3, 0.3, 0.4), weights = my_weights, type = c("RIGHT", "TAIL", "LEFT") ) my_mads_boys$amp } \references{ Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} pp. 110-113. Dissertation. Rotterdam: Erasmus University. Schouten, R.M., Lugtig, P and Vink, G. (2018) {Generating missing values for simulation purposes: A multivariate amputation procedure.}. \emph{Journal of Statistical Computation and Simulation}, 88(15): 1909-1930. \doi{10.1080/00949655.2018.1491577} Schouten, R.M. and Vink, G. (2018){The Dance of the Mechanisms: How Observed Information Influences the Validity of Missingness Assumptions}. \emph{Sociological Methods and Research}, 50(3): 1243-1258. \doi{10.1177/0049124118799376} Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn, C.G.M., Rubin, D.B. (2006) {Fully conditional specification in multivariate imputation.} \emph{Journal of Statistical Computation and Simulation}, 76(12): 1049-1064. \doi{10.1080/10629360600810434} Van Buuren, S. (2018) \href{https://stefvanbuuren.name/fimd/sec-FCS.html#sec:MICE}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Vink, G. (2016) Towards a standardized evaluation of multiple imputation routines. } \seealso{ \code{\link{mads-class}}, \code{\link{bwplot}}, \code{\link{xyplot}}, \code{\link{mice}} } \author{ Rianne Schouten [aut, cre], Gerko Vink [aut], Peter Lugtig [ctb], 2016 } mice/man/mice.impute.2lonly.mean.Rd0000644000176200001440000000555214330031606016610 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2lonly.mean.R \name{mice.impute.2lonly.mean} \alias{mice.impute.2lonly.mean} \alias{2lonly.mean} \title{Imputation of most likely value within the class} \usage{ mice.impute.2lonly.mean(y, ry, x, type, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. The class variable (only one is allowed) is coded as \code{-2}.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Method \code{2lonly.mean} replicates the most likely value within a class of a second-level variable. It works for numeric and factor data. The function is primarily useful as a quick fixup for data in which the second-level variable is inconsistent. } \details{ Observed values in \code{y} are averaged within the class, and replicated to the missing \code{y} within that class. This function is primarily useful for repairing incomplete data that are constant within the class, but vary over classes. For numeric variables, \code{mice.impute.2lonly.mean()} imputes the class mean of \code{y}. If \code{y} is a second-level variable, then conventionally all observed \code{y} will be identical within the class, and the function just provides a quick fix for any missing \code{y} by filling in the class mean. For factor variables, \code{mice.impute.2lonly.mean()} imputes the most frequently occuring category within the class. If there are no observed \code{y} in the class, all entries of the class are set to \code{NA}. Note that this may produce problems later on in \code{mice} if imputation routines are called that expects predictor data to be complete. Methods designed for imputing this type of second-level variables include \code{\link{mice.impute.2lonly.norm}} and \code{\link{mice.impute.2lonly.pmm}}. } \references{ Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Boca Raton, FL.: Chapman & Hall/CRC Press. } \seealso{ Other univariate-2lonly: \code{\link{mice.impute.2lonly.norm}()}, \code{\link{mice.impute.2lonly.pmm}()} } \author{ Gerko Vink, Stef van Buuren, 2019 } \concept{univariate-2lonly} \keyword{datagen} mice/man/densityplot.mids.Rd0000644000176200001440000002115514330031647015547 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/densityplot.R \name{densityplot.mids} \alias{densityplot.mids} \alias{densityplot} \title{Density plot of observed and imputed data} \usage{ \method{densityplot}{mids}( x, data, na.groups = NULL, groups = NULL, as.table = TRUE, plot.points = FALSE, theme = mice.theme(), mayreplicate = TRUE, thicker = 2.5, allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), panel = lattice::lattice.getOption("panel.densityplot"), default.prepanel = lattice::lattice.getOption("prepanel.default.densityplot"), ..., subscripts = TRUE, subset = TRUE ) } \arguments{ \item{x}{A \code{mids} object, typically created by \code{mice()} or \code{mice.mids()}.} \item{data}{Formula that selects the data to be plotted. This argument follows the \pkg{lattice} rules for \emph{formulas}, describing the primary variables (used for the per-panel display) and the optional conditioning variables (which define the subsets plotted in different panels) to be used in the plot. The formula is evaluated on the complete data set in the \code{long} form. Legal variable names for the formula include \code{names(x$data)} plus the two administrative factors \code{.imp} and \code{.id}. \bold{Extended formula interface:} The primary variable terms (both the LHS \code{y} and RHS \code{x}) may consist of multiple terms separated by a \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in \emph{separate panels}. This behavior differs from standard \pkg{lattice}. \emph{Only combine terms of the same type}, i.e. only factors or only numerical variables. Mixing numerical and categorical data occasionally produces odds labeling of vertical axis. The function \code{densityplot} does not use the \code{y} terms in the formula. Density plots for \code{x1} and \code{x2} are requested as \code{~ x1 + x2}.} \item{na.groups}{An expression evaluating to a logical vector indicating which two groups are distinguished (e.g. using different colors) in the display. The environment in which this expression is evaluated in the response indicator \code{is.na(x$data)}. The default \code{na.group = NULL} contrasts the observed and missing data in the LHS \code{y} variable of the display, i.e. groups created by \code{is.na(y)}. The expression \code{y} creates the groups according to \code{is.na(y)}. The expression \code{y1 & y2} creates groups by \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as \code{is.na(y1) | is.na(y2)}, and so on.} \item{groups}{This is the usual \code{groups} arguments in \pkg{lattice}. It differs from \code{na.groups} because it evaluates in the completed data \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas \code{na.groups} evaluates in the response indicator. See \code{\link{xyplot}} for more details. When both \code{na.groups} and \code{groups} are specified, \code{na.groups} takes precedence, and \code{groups} is ignored.} \item{as.table}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{plot.points}{A logical used in \code{densityplot} that signals whether the points should be plotted.} \item{theme}{A named list containing the graphical parameters. The default function \code{mice.theme} produces a short list of default colors, line width, and so on. The extensive list may be obtained from \code{trellis.par.get()}. Global graphical parameters like \code{col} or \code{cex} in high-level calls are still honored, so first experiment with the global parameters. Many setting consists of a pair. For example, \code{mice.theme} defines two symbol colors. The first is for the observed data, the second for the imputed data. The theme settings only exist during the call, and do not affect the trellis graphical parameters.} \item{mayreplicate}{A logical indicating whether color, line widths, and so on, may be replicated. The graphical functions attempt to choose "intelligent" graphical parameters. For example, the same color can be replicated for different element, e.g. use all reds for the imputed data. Replication may be switched off by setting the flag to \code{FALSE}, in order to allow the user to gain full control.} \item{thicker}{Used in \code{densityplot}. Multiplication factor of the line width of the observed density. \code{thicker=1} uses the same thickness for the observed and imputed data.} \item{allow.multiple}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{outer}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{drop.unused.levels}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{panel}{See \code{\link{xyplot}}.} \item{default.prepanel}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{\dots}{Further arguments, usually not directly processed by the high-level functions documented here, but instead passed on to other functions.} \item{subscripts}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{subset}{See \code{\link[lattice:xyplot]{xyplot}}.} } \value{ The high-level functions documented here, as well as other high-level Lattice functions, return an object of class \code{"trellis"}. The \code{\link[lattice:update.trellis]{update}} method can be used to subsequently update components of the object, and the \code{\link[lattice:print.trellis]{print}} method (usually called by default) will plot it on an appropriate plotting device. } \description{ Plotting methods for imputed data using \pkg{lattice}. \code{densityplot} produces plots of the densities. The function automatically separates the observed and imputed data. The functions extend the usual features of \pkg{lattice}. } \details{ The argument \code{na.groups} may be used to specify (combinations of) missingness in any of the variables. The argument \code{groups} can be used to specify groups based on the variable values themselves. Only one of both may be active at the same time. When both are specified, \code{na.groups} takes precedence over \code{groups}. Use the \code{subset} and \code{na.groups} together to plots parts of the data. For example, select the first imputed data set by by \code{subset=.imp==1}. Graphical parameters like \code{col}, \code{pch} and \code{cex} can be specified in the arguments list to alter the plotting symbols. If \code{length(col)==2}, the color specification to define the observed and missing groups. \code{col[1]} is the color of the 'observed' data, \code{col[2]} is the color of the missing or imputed data. A convenient color choice is \code{col=mdc(1:2)}, a transparent blue color for the observed data, and a transparent red color for the imputed data. A good choice is \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the duration of the session by running \code{mice.theme()}. } \note{ The first two arguments (\code{x} and \code{data}) are reversed compared to the standard Trellis syntax implemented in \pkg{lattice}. This reversal was necessary in order to benefit from automatic method dispatch. In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas in \pkg{lattice} the argument \code{x} is always a formula. In \pkg{mice} the argument \code{data} is always a formula object, whereas in \pkg{lattice} the argument \code{data} is usually a data frame. All other arguments have identical interpretation. \code{densityplot} errs on empty groups, which occurs if all observations in the subgroup contain \code{NA}. The relevant error message is: \code{Error in density.default: ... need at least 2 points to select a bandwidth automatically}. There is yet no workaround for this problem. Use the more robust \code{bwplot} or \code{stripplot} as a replacement. } \examples{ imp <- mice(boys, maxit = 1) ### density plot of head circumference per imputation ### blue is observed, red is imputed densityplot(imp, ~ hc | .imp) ### All combined in one panel. densityplot(imp, ~hc) } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{stripplot}}, \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the package, as well as \code{\link[lattice:histogram]{densityplot}}, \code{\link[lattice:panel.densityplot]{panel.densityplot}}, \code{\link[lattice:print.trellis]{print.trellis}}, \code{\link[lattice:trellis.par.get]{trellis.par.set}} } \author{ Stef van Buuren } \keyword{hplot} mice/man/mice.impute.2l.lmer.Rd0000644000176200001440000000610514330031606015720 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2l.lmer.R \name{mice.impute.2l.lmer} \alias{mice.impute.2l.lmer} \title{Imputation by a two-level normal model using \code{lmer}} \usage{ mice.impute.2l.lmer(y, ry, x, type, wy = NULL, intercept = TRUE, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. Random variables are identified by a '2'. The class variable (only one is allowed) is coded as '-2'. Fixed effects are indicated by a '1'.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{intercept}{Logical determining whether the intercept is automatically added.} \item{\dots}{Arguments passed down to \code{lmer}} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate systematically and sporadically missing data using a two-level normal model using \code{lme4::lmer()}. } \details{ Data are missing systematically if they have not been measured, e.g., in the case where we combine data from different sources. Data are missing sporadically if they have been partially observed. While the method is fully Bayesian, it may fix parameters of the variance-covariance matrix or the random effects to their estimated value in cases where creating draws from the posterior is not possible. The procedure throws a warning when this happens. If \code{lme4::lmer()} fails, the procedure prints the warning \code{"lmer does not run. Simplify imputation model"} and returns the current imputation. If that happens we see flat lines in the trace line plots. Thus, the appearance of flat trace lines should be taken as an additional alert to a problem with imputation model fitting. } \references{ Jolani S. (2017) Hierarchical imputation of systematically and sporadically missing data: An approximate Bayesian approach using chained equations. Forthcoming. Jolani S., Debray T.P.A., Koffijberg H., van Buuren S., Moons K.G.M. (2015). Imputation of systematically missing predictors in an individual participant data meta-analysis: a generalized approach using MICE. \emph{Statistics in Medicine}, 34:1841-1863. Van Buuren, S. (2011) Multiple imputation of multilevel data. In Hox, J.J. and and Roberts, J.K. (Eds.), \emph{The Handbook of Advanced Multilevel Analysis}, Chapter 10, pp. 173--196. Milton Park, UK: Routledge. } \seealso{ Other univariate-2l: \code{\link{mice.impute.2l.bin}()}, \code{\link{mice.impute.2l.norm}()}, \code{\link{mice.impute.2l.pan}()} } \author{ Shahab Jolani, 2017 } \concept{univariate-2l} \keyword{datagen} mice/man/futuremice.Rd0000644000176200001440000001032514422737141014407 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/futuremice.R \name{futuremice} \alias{futuremice} \title{Wrapper function that runs MICE in parallel} \usage{ futuremice( data, m = 5, parallelseed = NA, n.core = NULL, seed = NA, use.logical = TRUE, future.plan = "multisession", packages = NULL, globals = NULL, ... ) } \arguments{ \item{data}{A data frame or matrix containing the incomplete data. Similar to the first argument of \code{\link{mice}}.} \item{m}{The number of desired imputated datasets. By default $m=5$ as with \code{mice}} \item{parallelseed}{A scalar to be used to obtain reproducible results over the futures. The default \code{parallelseed = NA} will result in a seed value that is randomly drawn between -999999999 and 999999999.} \item{n.core}{A scalar indicating the number of cores that should be used.} \item{seed}{A scalar to be used as the seed value for the mice algorithm within each parallel stream. Please note that the imputations will be the same for all streams and, hence, this should be used if and only if \code{n.core = 1} and if it is desired to obtain the same output as under \code{mice}.} \item{use.logical}{A logical indicating whether logical (\code{TRUE}) or physical (\code{FALSE}) CPU's on machine should be used.} \item{future.plan}{A character indicating how \code{future}s are resolved. The default \code{multisession} resolves futures asynchronously (in parallel) in separate \code{R} sessions running in the background. See \code{\link[future]{plan}} for more information on future plans.} \item{packages}{A character vector with additional packages to be used in \code{mice} (e.g., for using external imputation functions).} \item{globals}{A character string with additional functions to be exported to each future (e.g., user-written imputation functions).} \item{...}{Named arguments that are passed down to function \code{\link{mice}}.} } \value{ A mids object as defined by \code{\link{mids-class}} } \description{ This is a wrapper function for \code{\link{mice}}, using multiple cores to execute \code{\link{mice}} in parallel. As a result, the imputation procedure can be sped up, which may be useful in general. By default, \code{\link{futuremice}} distributes the number of imputations \code{m} about equally over the cores. } \details{ This function relies on package \code{\link[furrr]{furrr}}, which is a package for R versions 3.2.0 and later. We have chosen to use furrr function \code{future_map} to allow the use of \code{futuremice} on Mac, Linux and Windows systems. This wrapper function combines the output of \code{\link[furrr]{future_map}} with function \code{\link{ibind}} from the \code{\link{mice}} package. A \code{mids} object is returned and can be used for further analyses. A seed value can be specified in the global environment, which will yield reproducible results. A seed value can also be specified within the \code{\link{futuremice}} call, through specifying the argument \code{parallelseed}. If \code{parallelseed} is not specified, a seed value is drawn randomly by default, and accessible through \code{$parallelseed} in the output object. Hence, results will always be reproducible, regardless of whether the seed is specified in the global environment, or by setting the same seed within the function (potentially by extracting the seed from the \code{futuremice} output object. } \examples{ # 150 imputations in dataset nhanes, performed by 3 cores \dontrun{ imp1 <- futuremice(data = nhanes, m = 150, n.core = 3) # Making use of arguments in mice. imp2 <- futuremice(data = nhanes, m = 100, method = "norm.nob") imp2$method fit <- with(imp2, lm(bmi ~ hyp)) pool(fit) } } \references{ Volker, T.B. and Vink, G. (2022). futuremice: The future starts today. \url{https://www.gerkovink.com/miceVignettes/futuremice/Vignette_futuremice.html} #'Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/parallel-computation.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link[future]{future}}, \code{\link[furrr]{furrr}}, \code{\link[furrr]{future_map}}, \code{\link[future]{plan}}, \code{\link{mice}}, \code{\link{mids-class}} } \author{ Thom Benjamin Volker, Gerko Vink } mice/man/mice.impute.polr.Rd0000644000176200001440000001104214330031647015422 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.polr.R \name{mice.impute.polr} \alias{mice.impute.polr} \title{Imputation of ordered data by polytomous regression} \usage{ mice.impute.polr( y, ry, x, wy = NULL, nnet.maxit = 100, nnet.trace = FALSE, nnet.MaxNWts = 1500, polr.to.loggedEvents = FALSE, ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{nnet.maxit}{Tuning parameter for \code{nnet()}.} \item{nnet.trace}{Tuning parameter for \code{nnet()}.} \item{nnet.MaxNWts}{Tuning parameter for \code{nnet()}.} \item{polr.to.loggedEvents}{A logical indicating whether each fallback to the \code{multinom()} function should be written to \code{loggedEvents}. The default is \code{FALSE}.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes missing data in a categorical variable using polytomous regression } \details{ The function \code{mice.impute.polr()} imputes for ordered categorical response variables by the proportional odds logistic regression (polr) model. The function repeatedly applies logistic regression on the successive splits. The model is also known as the cumulative link model. By default, ordered factors with more than two levels are imputed by \code{mice.impute.polr}. The algorithm of \code{mice.impute.polr} uses the function \code{polr()} from the \code{MASS} package. In order to avoid bias due to perfect prediction, the algorithm augment the data according to the method of White, Daniel and Royston (2010). The call to \code{polr} might fail, usually because the data are very sparse. In that case, \code{multinom} is tried as a fallback. If the local flag \code{polr.to.loggedEvents} is set to TRUE, a record is written to the \code{loggedEvents} component of the \code{\link{mids}} object. Use \code{mice(data, polr.to.loggedEvents = TRUE)} to set the flag. } \note{ In December 2019 Simon White alerted that the \code{polr} could always fail silently. I can confirm this behaviour for versions \code{mice 3.0.0 - mice 3.6.6}, so any method requests for \code{polr} in these versions were in fact handled by \code{multinom}. See \url{https://github.com/amices/mice/issues/206} for details. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} Dissertation. Rotterdam: Erasmus University. White, I.R., Daniel, R. Royston, P. (2010). Avoiding bias due to perfect prediction in multiple imputation of incomplete categorical variables. \emph{Computational Statistics and Data Analysis}, 54, 2267-2275. Venables, W.N. & Ripley, B.D. (2002). \emph{Modern applied statistics with S-Plus (4th ed)}. Springer, Berlin. } \seealso{ \code{\link{mice}}, \code{\link[nnet]{multinom}}, \code{\link[MASS]{polr}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000-2010 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mammalsleep.Rd0000644000176200001440000000364214330031606014527 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mammalsleep.R \docType{data} \name{mammalsleep} \alias{mammalsleep} \alias{sleep} \title{Mammal sleep data} \format{ \code{mammalsleep} is a data frame with 62 rows and 11 columns: \describe{ \item{species}{Species of animal} \item{bw}{Body weight (kg)} \item{brw}{Brain weight (g)} \item{sws}{Slow wave ("nondreaming") sleep (hrs/day)} \item{ps}{Paradoxical ("dreaming") sleep (hrs/day)} \item{ts}{Total sleep (hrs/day) (sum of slow wave and paradoxical sleep)} \item{mls}{Maximum life span (years)} \item{gt}{Gestation time (days)} \item{pi}{Predation index (1-5), 1 = least likely to be preyed upon} \item{sei}{Sleep exposure index (1-5), 1 = least exposed (e.g. animal sleeps in a well-protected den), 5 = most exposed} \item{odi}{Overall danger index (1-5) based on the above two indices and other information, 1 = least danger (from other animals), 5 = most danger (from other animals)} } } \source{ Allison, T., Cicchetti, D.V. (1976). Sleep in Mammals: Ecological and Constitutional Correlates. Science, 194(4266), 732-734. } \description{ Dataset from Allison and Cicchetti (1976) of 62 mammal species on the interrelationship between sleep, ecological, and constitutional variables. The dataset contains missing values on five variables. } \details{ Allison and Cicchetti (1976) investigated the interrelationship between sleep, ecological, and constitutional variables. They assessed these variables for 39 mammalian species. The authors concluded that slow-wave sleep is negatively associated with a factor related to body size. This suggests that large amounts of this sleep phase are disadvantageous in large species. Also, paradoxical sleep (REM sleep) was associated with a factor related to predatory danger, suggesting that large amounts of this sleep phase are disadvantageous in prey species. } \examples{ sleep <- data(mammalsleep) } \keyword{datasets} mice/man/D2.Rd0000644000176200001440000000335614436133175012514 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/D2.R \name{D2} \alias{D2} \title{Compare two nested models using D2-statistic} \usage{ D2(fit1, fit0 = NULL, use = "wald") } \arguments{ \item{fit1}{An object of class \code{mira}, produced by \code{with()}.} \item{fit0}{An object of class \code{mira}, produced by \code{with()}. The model in \code{fit0} is a nested within \code{fit1}. The default null model \code{fit0 = NULL} compares \code{fit1} to the intercept-only model.} \item{use}{A character string denoting Wald- or likelihood-based based tests. Can be either \code{"wald"} or \code{"likelihood"}. Only used if \code{method = "D2"}.} } \description{ The D2-statistic pools test statistics from the repeated analyses. The method is less powerful than the D1- and D3-statistics. } \note{ Warning: `D2()` assumes that the order of the variables is the same in different models. See \url{https://github.com/amices/mice/issues/420} for details. } \examples{ # Compare two linear models: imp <- mice(nhanes2, seed = 51009, print = FALSE) mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) D2(mi1, mi0) \dontrun{ # Compare two logistic regression models imp <- mice(boys, maxit = 2, print = FALSE) fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) D2(fit1, fit0) } } \references{ Li, K. H., X. L. Meng, T. E. Raghunathan, and D. B. Rubin. 1991. Significance Levels from Repeated p-Values with Multiply-Imputed Data. \emph{Statistica Sinica} 1 (1): 65–92. \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:chi} } \seealso{ \code{\link[mitml]{testModels}} } mice/man/is.mipo.Rd0000644000176200001440000000051513666252075013624 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mipo} \alias{is.mipo} \title{Check for \code{mipo} object} \usage{ is.mipo(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mipo} } \description{ Check for \code{mipo} object } mice/man/xyplot.mids.Rd0000644000176200001440000001636414330031647014536 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/xyplot.R \name{xyplot.mids} \alias{xyplot.mids} \alias{xyplot} \title{Scatterplot of observed and imputed data} \usage{ \method{xyplot}{mids}( x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), ..., subscripts = TRUE, subset = TRUE ) } \arguments{ \item{x}{A \code{mids} object, typically created by \code{mice()} or \code{mice.mids()}.} \item{data}{Formula that selects the data to be plotted. This argument follows the \pkg{lattice} rules for \emph{formulas}, describing the primary variables (used for the per-panel display) and the optional conditioning variables (which define the subsets plotted in different panels) to be used in the plot. The formula is evaluated on the complete data set in the \code{long} form. Legal variable names for the formula include \code{names(x$data)} plus the two administrative factors \code{.imp} and \code{.id}. \bold{Extended formula interface:} The primary variable terms (both the LHS \code{y} and RHS \code{x}) may consist of multiple terms separated by a \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in \emph{separate panels}. This behavior differs from standard \pkg{lattice}. \emph{Only combine terms of the same type}, i.e. only factors or only numerical variables. Mixing numerical and categorical data occasionally produces odds labeling of vertical axis.} \item{na.groups}{An expression evaluating to a logical vector indicating which two groups are distinguished (e.g. using different colors) in the display. The environment in which this expression is evaluated in the response indicator \code{is.na(x$data)}. The default \code{na.group = NULL} contrasts the observed and missing data in the LHS \code{y} variable of the display, i.e. groups created by \code{is.na(y)}. The expression \code{y} creates the groups according to \code{is.na(y)}. The expression \code{y1 & y2} creates groups by \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as \code{is.na(y1) | is.na(y2)}, and so on.} \item{groups}{This is the usual \code{groups} arguments in \pkg{lattice}. It differs from \code{na.groups} because it evaluates in the completed data \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas \code{na.groups} evaluates in the response indicator. See \code{\link{xyplot}} for more details. When both \code{na.groups} and \code{groups} are specified, \code{na.groups} takes precedence, and \code{groups} is ignored.} \item{as.table}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{theme}{A named list containing the graphical parameters. The default function \code{mice.theme} produces a short list of default colors, line width, and so on. The extensive list may be obtained from \code{trellis.par.get()}. Global graphical parameters like \code{col} or \code{cex} in high-level calls are still honored, so first experiment with the global parameters. Many setting consists of a pair. For example, \code{mice.theme} defines two symbol colors. The first is for the observed data, the second for the imputed data. The theme settings only exist during the call, and do not affect the trellis graphical parameters.} \item{allow.multiple}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{outer}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{drop.unused.levels}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{\dots}{Further arguments, usually not directly processed by the high-level functions documented here, but instead passed on to other functions.} \item{subscripts}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{subset}{See \code{\link[lattice:xyplot]{xyplot}}.} } \value{ The high-level functions documented here, as well as other high-level Lattice functions, return an object of class \code{"trellis"}. The \code{\link[lattice:update.trellis]{update}} method can be used to subsequently update components of the object, and the \code{\link[lattice:print.trellis]{print}} method (usually called by default) will plot it on an appropriate plotting device. } \description{ Plotting methods for imputed data using \pkg{lattice}. \code{xyplot()} produces a conditional scatterplots. The function automatically separates the observed (blue) and imputed (red) data. The function extends the usual features of \pkg{lattice}. } \details{ The argument \code{na.groups} may be used to specify (combinations of) missingness in any of the variables. The argument \code{groups} can be used to specify groups based on the variable values themselves. Only one of both may be active at the same time. When both are specified, \code{na.groups} takes precedence over \code{groups}. Use the \code{subset} and \code{na.groups} together to plots parts of the data. For example, select the first imputed data set by by \code{subset=.imp==1}. Graphical parameters like \code{col}, \code{pch} and \code{cex} can be specified in the arguments list to alter the plotting symbols. If \code{length(col)==2}, the color specification to define the observed and missing groups. \code{col[1]} is the color of the 'observed' data, \code{col[2]} is the color of the missing or imputed data. A convenient color choice is \code{col=mdc(1:2)}, a transparent blue color for the observed data, and a transparent red color for the imputed data. A good choice is \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the duration of the session by running \code{mice.theme()}. } \note{ The first two arguments (\code{x} and \code{data}) are reversed compared to the standard Trellis syntax implemented in \pkg{lattice}. This reversal was necessary in order to benefit from automatic method dispatch. In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas in \pkg{lattice} the argument \code{x} is always a formula. In \pkg{mice} the argument \code{data} is always a formula object, whereas in \pkg{lattice} the argument \code{data} is usually a data frame. All other arguments have identical interpretation. } \examples{ imp <- mice(boys, maxit = 1) # xyplot: scatterplot by imputation number # observe the erroneous outlying imputed values # (caused by imputing hgt from bmi) xyplot(imp, hgt ~ age | .imp, pch = c(1, 20), cex = c(1, 1.5)) # same, but label with missingness of wgt (four cases) xyplot(imp, hgt ~ age | .imp, na.group = wgt, pch = c(1, 20), cex = c(1, 1.5)) } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{mice}}, \code{\link{stripplot}}, \code{\link{densityplot}}, \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the package, as well as \code{\link[lattice:xyplot]{xyplot}}, \code{\link[lattice:panel.xyplot]{panel.xyplot}}, \code{\link[lattice:print.trellis]{print.trellis}}, \code{\link[lattice:trellis.par.get]{trellis.par.set}} } \author{ Stef van Buuren } \keyword{hplot} mice/man/complete.mids.Rd0000644000176200001440000000654014330031606014775 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/complete.R \name{complete.mids} \alias{complete.mids} \alias{complete} \title{Extracts the completed data from a \code{mids} object} \usage{ \method{complete}{mids}(data, action = 1L, include = FALSE, mild = FALSE, ...) } \arguments{ \item{data}{An object of class \code{mids} as created by the function \code{mice()}.} \item{action}{A numeric vector or a keyword. Numeric values between 1 and \code{data$m} return the data with imputation number \code{action} filled in. The value of \code{action = 0} return the original data, with missing values. \code{action} can also be one of the following keywords: \code{"all"}, \code{"long"}, \code{"broad"} and \code{"repeated"}. See the Details section for the interpretation. The default is \code{action = 1L} returns the first imputed data set.} \item{include}{A logical to indicate whether the original data with the missing values should be included.} \item{mild}{A logical indicating whether the return value should always be an object of class \code{mild}. Setting \code{mild = TRUE} overrides \code{action} keywords \code{"long"}, \code{"broad"} and \code{"repeated"}. The default is \code{FALSE}.} \item{\dots}{Additional arguments. Not used.} } \value{ Complete data set with missing values replaced by imputations. A \code{data.frame}, or a list of data frames of class \code{mild}. } \description{ Takes an object of class \code{mids}, fills in the missing data, and returns the completed data in a specified format. } \details{ The argument \code{action} can be length-1 character, which is matched to one of the following keywords: \describe{ \item{\code{"all"}}{produces a \code{mild} object of imputed data sets. When \code{include = TRUE}, then the original data are appended as the first list element;} \item{\code{"long"}}{ produces a data set where imputed data sets are stacked vertically. The columns are added: 1) \code{.imp}, integer, referring the imputation number, and 2) \code{.id}, character, the row names of \code{data$data};} \item{\code{"stacked"}}{ same as \code{"long"} but without the two additional columns;} \item{\code{"broad"}}{ produces a data set with where imputed data sets are stacked horizontally. Columns are ordered as in the original data. The imputation number is appended to each column name;} \item{\code{"repeated"}}{ same as \code{"broad"}, but with columns in a different order.} } } \note{ Technical note: \code{mice 3.7.5} renamed the \code{complete()} function to \code{complete.mids()} and exported it as an S3 method of the generic \code{tidyr::complete()}. Name clashes between \code{mice::complete()} and \code{tidyr::complete()} should no longer occur. } \examples{ # obtain first imputed data set sum(is.na(nhanes2)) imp <- mice(nhanes2, print = FALSE, maxit = 1) dat <- complete(imp) sum(is.na(dat)) # obtain stacked third and fifth imputation dat <- complete(imp, c(3, 5)) # obtain all datasets, with additional identifiers head(complete(imp, "long")) # same, but now as list, mild object dslist <- complete(imp, "all") length(dslist) # same, but also include the original data dslist <- complete(imp, "all", include = TRUE) length(dslist) # select original + 3 + 5, store as mild dslist <- complete(imp, c(0, 3, 5), mild = TRUE) names(dslist) } \seealso{ \code{\link{mice}}, \code{\link[=mids-class]{mids}} } \keyword{manip} mice/man/construct.blocks.Rd0000644000176200001440000000437514330031606015536 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/blocks.R \name{construct.blocks} \alias{construct.blocks} \title{Construct blocks from \code{formulas} and \code{predictorMatrix}} \usage{ construct.blocks(formulas = NULL, predictorMatrix = NULL) } \arguments{ \item{formulas}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names. The \code{formulas} argument is an alternative to the \code{predictorMatrix} argument that allows for more flexibility in specifying imputation models, e.g., for specifying interaction terms.} \item{predictorMatrix}{A numeric matrix of \code{length(blocks)} rows and \code{ncol(data)} columns, containing 0/1 data specifying the set of predictors to be used for each target column. Each row corresponds to a variable block, i.e., a set of variables to be imputed. A value of \code{1} means that the column variable is used as a predictor for the target block (in the rows). By default, the \code{predictorMatrix} is a square matrix of \code{ncol(data)} rows and columns with all 1's, except for the diagonal. Note: For two-level imputation models (which have \code{"2l"} in their names) other codes (e.g, \code{2} or \code{-2}) are also allowed.} } \value{ A \code{blocks} object. } \description{ This helper function attempts to find blocks of variables in the specification of the \code{formulas} and/or \code{predictorMatrix} objects. Blocks specified by \code{formulas} may consist of multiple variables. Blocks specified by \code{predictorMatrix} are assumed to consist of single variables. Any duplicates in names are removed, and the formula specification is preferred. \code{predictorMatrix} and \code{formulas}. When both arguments specify models for the same block, the model for the \code{predictMatrix} is removed, and priority is given to the specification given in \code{formulas}. } \examples{ form <- name.formulas(list(bmi + hyp ~ chl + age, chl ~ bmi)) pred <- make.predictorMatrix(nhanes[, c("age", "chl")]) construct.blocks(formulas = form, pred = pred) } \seealso{ \code{\link{make.blocks}}, \code{\link{name.blocks}} } mice/man/nhanes.Rd0000644000176200001440000000166714330031606013513 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nhanes.R \docType{data} \name{nhanes} \alias{nhanes} \title{NHANES example - all variables numerical} \format{ A data frame with 25 observations on the following 4 variables. \describe{ \item{age}{Age group (1=20-39, 2=40-59, 3=60+)} \item{bmi}{Body mass index (kg/m**2)} \item{hyp}{Hypertensive (1=no,2=yes)} \item{chl}{Total serum cholesterol (mg/dL)} } } \source{ Schafer, J.L. (1997). \emph{Analysis of Incomplete Multivariate Data.} London: Chapman & Hall. Table 6.14. } \description{ A small data set with non-monotone missing values. } \details{ A small data set with all numerical variables. The data set \code{nhanes2} is the same data set, but with \code{age} and \code{hyp} treated as factors. } \examples{ # create 5 imputed data sets imp <- mice(nhanes) # print the first imputed data set complete(imp) } \seealso{ \code{\link{nhanes2}} } \keyword{datasets} mice/man/mids-class.Rd0000644000176200001440000001137514334445701014304 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mids.R \name{mids-class} \alias{mids-class} \alias{mids} \title{Multiply imputed data set (\code{mids})} \description{ The \code{mids} object contains a multiply imputed data set. The \code{mids} object is generated by functions \code{mice()}, \code{mice.mids()}, \code{cbind.mids()}, \code{rbind.mids()} and \code{ibind.mids()}. } \details{ The \code{mids} class of objects has methods for the following generic functions: \code{print}, \code{summary}, \code{plot}. The \code{loggedEvents} entry is a matrix with five columns containing a record of automatic removal actions. It is \code{NULL} is no action was made. At initialization the program does the following three actions: \describe{ \item{1}{A variable that contains missing values, that is not imputed and that is used as a predictor is removed} \item{2}{A constant variable is removed} \item{3}{A collinear variable is removed.} } During iteration, the program does the following actions: \describe{ \item{1}{One or more variables that are linearly dependent are removed (for categorical data, a 'variable' corresponds to a dummy variable)} \item{2}{Proportional odds regression imputation that does not converge and is replaced by \code{polyreg}.} } Explanation of elements in \code{loggedEvents}: \describe{ \item{\code{it}}{iteration number at which the record was added,} \item{\code{im}}{imputation number,} \item{\code{dep}}{name of the dependent variable,} \item{\code{meth}}{imputation method used,} \item{\code{out}}{a (possibly long) character vector with the names of the altered or removed predictors.} } } \note{ The \code{mice} package does not use the S4 class definitions, and instead relies on the S3 list equivalent \code{oldClass(obj) <- "mids"}. } \section{Slots}{ \describe{ \item{\code{.Data}:}{Object of class \code{"list"} containing the following slots:} \item{\code{data}:}{Original (incomplete) data set.} \item{\code{imp}:}{A list of \code{ncol(data)} components with the generated multiple imputations. Each list components is a \code{data.frame} (\code{nmis[j]} by \code{m}) of imputed values for variable \code{j}.} \item{\code{m}:}{Number of imputations.} \item{\code{where}:}{The \code{where} argument of the \code{mice()} function.} \item{\code{blocks}:}{The \code{blocks} argument of the \code{mice()} function.} \item{\code{call}:}{Call that created the object.} \item{\code{nmis}:}{An array containing the number of missing observations per column.} \item{\code{method}:}{A vector of strings of \code{length(blocks} specifying the imputation method per block.} \item{\code{predictorMatrix}:}{A numerical matrix of containing integers specifying the predictor set.} \item{\code{visitSequence}:}{The sequence in which columns are visited.} \item{\code{formulas}:}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names.} \item{\code{post}:}{A vector of strings of length \code{length(blocks)} with commands for post-processing.} \item{\code{blots}:}{"Block dots". The \code{blots} argument to the \code{mice()} function.} \item{\code{ignore}:}{A logical vector of length \code{nrow(data)} indicating the rows in \code{data} used to build the imputation model. (new in \code{mice 3.12.0})} \item{\code{seed}:}{The seed value of the solution.} \item{\code{iteration}:}{Last Gibbs sampling iteration number.} \item{\code{lastSeedValue}:}{The most recent seed value.} \item{\code{chainMean}:}{A list of \code{m} components. Each component is a \code{length(visitSequence)} by \code{maxit} matrix containing the mean of the generated multiple imputations. The array can be used for monitoring convergence. Note that observed data are not present in this mean.} \item{\code{chainVar}:}{A list with similar structure of \code{chainMean}, containing the covariances of the imputed values.} \item{\code{loggedEvents}:}{A \code{data.frame} with five columns containing warnings, corrective actions, and other inside info.} \item{\code{version}:}{Version number of \code{mice} package that created the object.} \item{\code{date}:}{Date at which the object was created.} } } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{mice}}, \code{\link[=mira-class]{mira}}, \code{\link{mipo}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{classes} mice/man/nelsonaalen.Rd0000644000176200001440000000353414436640005014537 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nelsonaalen.R \name{nelsonaalen} \alias{nelsonaalen} \alias{hazard} \title{Cumulative hazard rate or Nelson-Aalen estimator} \usage{ nelsonaalen(data, timevar, statusvar) } \arguments{ \item{data}{A data frame containing the data.} \item{timevar}{The name of the time variable in \code{data}.} \item{statusvar}{The name of the event variable, e.g. death in \code{data}.} } \value{ A vector with \code{nrow(data)} elements containing the Nelson-Aalen estimates of the cumulative hazard function. } \description{ Calculates the cumulative hazard rate (Nelson-Aalen estimator) } \details{ This function is useful for imputing variables that depend on survival time. White and Royston (2009) suggested using the cumulative hazard to the survival time H0(T) rather than T or log(T) as a predictor in imputation models. See section 7.1 of Van Buuren (2012) for an example. } \examples{ require(MASS) leuk$status <- 1 ## no censoring occurs in leuk data (MASS) ch <- nelsonaalen(leuk, time, status) plot(x = leuk$time, y = ch, ylab = "Cumulative hazard", xlab = "Time") ### See example on http://www.engineeredsoftware.com/lmar/pe_cum_hazard_function.htm time <- c(43, 67, 92, 94, 149, rep(149, 7)) status <- c(rep(1, 5), rep(0, 7)) eng <- data.frame(time, status) ch <- nelsonaalen(eng, time, status) plot(x = time, y = ch, ylab = "Cumulative hazard", xlab = "Time") } \references{ White, I. R., Royston, P. (2009). Imputing missing covariate values for the Cox model. \emph{Statistics in Medicine}, \emph{28}(15), 1982-1998. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-toomany.html#a-further-improvement-survival-as-predictor-variable}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \author{ Stef van Buuren, 2012 } \keyword{misc} mice/man/mdc.Rd0000644000176200001440000000522314330031606012772 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mdc.R \name{mdc} \alias{mdc} \title{Graphical parameter for missing data plots} \usage{ mdc( r = "observed", s = "symbol", transparent = TRUE, cso = grDevices::hcl(240, 100, 40, 0.7), csi = grDevices::hcl(0, 100, 40, 0.7), csc = "gray50", clo = grDevices::hcl(240, 100, 40, 0.8), cli = grDevices::hcl(0, 100, 40, 0.8), clc = "gray50" ) } \arguments{ \item{r}{A numerical or character vector. The numbers 1-6 request colors as follows: 1=\code{cso}, 2=\code{csi}, 3=\code{csc}, 4=\code{clo}, 5=\code{cli} and 6=\code{clc}. Alternatively, \code{r} may contain the strings ' \code{observed}', '\code{missing}', or '\code{both}', or abbreviations thereof.} \item{s}{A character vector containing the strings '\code{symbol}' or ' \code{line}', or abbreviations thereof.} \item{transparent}{A logical indicating whether alpha-transparency is allowed. The default is \code{TRUE}.} \item{cso}{The symbol color for the observed data. The default is a transparent blue.} \item{csi}{The symbol color for the missing or imputed data. The default is a transparent red.} \item{csc}{The symbol color for the combined observed and imputed data. The default is a grey color.} \item{clo}{The line color for the observed data. The default is a slightly darker transparent blue.} \item{cli}{The line color for the missing or imputed data. The default is a slightly darker transparent red.} \item{clc}{The line color for the combined observed and imputed data. The default is a grey color.} } \value{ \code{mdc()} returns a vector containing color definitions. The length of the output vector is calculate from the length of \code{r} and \code{s}. Elements of the input vectors are repeated if needed. } \description{ \code{mdc} returns colors used to distinguish observed, missing and combined data in plotting. \code{mice.theme} return a partial list of named objects that can be used as a theme in \code{stripplot}, \code{bwplot}, \code{densityplot} and \code{xyplot}. } \details{ This function eases consistent use of colors in plots. The default follows the Abayomi convention, which uses blue for observed data, red for missing or imputed data, and black for combined data. } \examples{ # all six colors mdc(1:6) # lines color for observed and missing data mdc(c("obs", "mis"), "lin") } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. } \seealso{ \code{\link{hcl}}, \code{\link{rgb}}, \code{\link{xyplot.mids}}, \code{\link[lattice:xyplot]{xyplot}}, \code{\link[lattice:trellis.par.get]{trellis.par.set}} } \author{ Stef van Buuren, sept 2012. } \keyword{hplot} mice/man/pool.Rd0000644000176200001440000001652414334522175013220 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.R \name{pool} \alias{pool} \alias{pool.syn} \title{Combine estimates by pooling rules} \usage{ pool(object, dfcom = NULL, rule = NULL, custom.t = NULL) pool.syn(object, dfcom = NULL, rule = "reiter2003") } \arguments{ \item{object}{An object of class \code{mira} (produced by \code{with.mids()} or \code{as.mira()}), or a \code{list} with model fits.} \item{dfcom}{A positive number representing the degrees of freedom in the complete-data analysis. Normally, this would be the number of independent observation minus the number of fitted parameters. The default (\code{dfcom = NULL}) extract this information in the following order: 1) the component \code{residual.df} returned by \code{glance()} if a \code{glance()} function is found, 2) the result of \code{df.residual(} applied to the first fitted model, and 3) as \code{999999}. In the last case, the warning \code{"Large sample assumed"} is printed. If the degrees of freedom is incorrect, specify the appropriate value manually.} \item{rule}{A string indicating the pooling rule. Currently supported are \code{"rubin1987"} (default, for missing data) and \code{"reiter2003"} (for synthetic data created from a complete data set).} \item{custom.t}{A custom character string to be parsed as a calculation rule for the total variance \code{t}. The custom rule can use the other calculated pooling statistics where the dimensions must come from \code{.data$}. The default \code{t} calculation would have the form \code{".data$ubar + (1 + 1 / .data$m) * .data$b"}. See examples for an example.} } \value{ An object of class \code{mipo}, which stands for 'multiple imputation pooled outcome'. For rule \code{"reiter2003"} values for \code{lambda} and \code{fmi} are set to `NA`, as these statistics do not apply for data synthesised from fully observed data. } \description{ The \code{pool()} function combines the estimates from \code{m} repeated complete data analyses. The typical sequence of steps to perform a multiple imputation analysis is: \enumerate{ \item Impute the missing data by the \code{mice()} function, resulting in a multiple imputed data set (class \code{mids}); \item Fit the model of interest (scientific model) on each imputed data set by the \code{with()} function, resulting an object of class \code{mira}; \item Pool the estimates from each model into a single set of estimates and standard errors, resulting in an object of class \code{mipo}; \item Optionally, compare pooled estimates from different scientific models by the \code{D1()} or \code{D3()} functions. } A common error is to reverse steps 2 and 3, i.e., to pool the multiply-imputed data instead of the estimates. Doing so may severely bias the estimates of scientific interest and yield incorrect statistical intervals and p-values. The \code{pool()} function will detect this case. } \details{ The \code{pool()} function averages the estimates of the complete data model, computes the total variance over the repeated analyses by Rubin's rules (Rubin, 1987, p. 76), and computes the following diagnostic statistics per estimate: \enumerate{ \item Relative increase in variance due to nonresponse {\code{r}}; \item Residual degrees of freedom for hypothesis testing {\code{df}}; \item Proportion of total variance due to missingness {\code{lambda}}; \item Fraction of missing information {\code{fmi}}. } The degrees of freedom calculation for the pooled estimates uses the Barnard-Rubin adjustment for small samples (Barnard and Rubin, 1999). The \code{pool.syn()} function combines estimates by Reiter's partially synthetic data pooling rules (Reiter, 2003). This combination rule assumes that the data that is synthesised is completely observed. Pooling differs from Rubin's method in the calculation of the total variance and the degrees of freedom. Pooling requires the following input from each fitted model: \enumerate{ \item the estimates of the model; \item the standard error of each estimate; \item the residual degrees of freedom of the model. } The \code{pool()} and \code{pool.syn()} functions rely on the \code{broom::tidy} and \code{broom::glance} for extracting these parameters. Since \code{mice 3.0+}, the \code{broom} package takes care of filtering out the relevant parts of the complete-data analysis. It may happen that you'll see the messages like \code{Error: No tidy method for objects of class ...} or \code{Error: No glance method for objects of class ...}. The message means that your complete-data method used in \code{with(imp, ...)} has no \code{tidy} or \code{glance} method defined in the \code{broom} package. The \code{broom.mixed} package contains \code{tidy} and \code{glance} methods for mixed models. If you are using a mixed model, first run \code{library(broom.mixed)} before calling \code{pool()}. If no \code{tidy} or \code{glance} methods are defined for your analysis tabulate the \code{m} parameter estimates and their variance estimates (the square of the standard errors) from the \code{m} fitted models stored in \code{fit$analyses}. For each parameter, run \code{\link{pool.scalar}} to obtain the pooled parameters estimate, its variance, the degrees of freedom, the relative increase in variance and the fraction of missing information. An alternative is to write your own \code{glance()} and \code{tidy()} methods and add these to \code{broom} according to the specifications given in \url{https://broom.tidymodels.org}. In versions prior to \code{mice 3.0} pooling required that \code{coef()} and \code{vcov()} methods were available for fitted objects. \emph{This feature is no longer supported}. The reason is that \code{vcov()} methods are inconsistent across packages, leading to buggy behaviour of the \code{pool()} function. Since \code{mice 3.13.2} function \code{pool()} uses the robust the standard error estimate for pooling when it can extract \code{robust.se} from the \code{tidy()} object. } \examples{ # impute missing data, analyse and pool using the classic MICE workflow imp <- mice(nhanes, maxit = 2, m = 2) fit <- with(data = imp, exp = lm(bmi ~ hyp + chl)) summary(pool(fit)) # generate fully synthetic data, analyse and pool imp <- mice(cars, maxit = 2, m = 2, where = matrix(TRUE, nrow(cars), ncol(cars)) ) fit <- with(data = imp, exp = lm(speed ~ dist)) summary(pool.syn(fit)) # use a custom pooling rule for the total variance about the estimate # e.g. use t = b + b/m instead of t = ubar + b + b/m imp <- mice(nhanes, maxit = 2, m = 2) fit <- with(data = imp, exp = lm(bmi ~ hyp + chl)) pool(fit, custom.t = ".data$b + .data$b / .data$m") } \references{ Barnard, J. and Rubin, D.B. (1999). Small sample degrees of freedom with multiple imputation. \emph{Biometrika}, 86, 948-955. Rubin, D.B. (1987). \emph{Multiple Imputation for Nonresponse in Surveys}. New York: John Wiley and Sons. Reiter, J.P. (2003). Inference for Partially Synthetic, Public Use Microdata Sets. \emph{Survey Methodology}, \bold{29}, 181-189. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{with.mids}}, \code{\link{as.mira}}, \code{\link{pool.scalar}}, \code{\link[broom:reexports]{glance}}, \code{\link[broom:reexports]{tidy}} \url{https://github.com/amices/mice/issues/142}, \url{https://github.com/amices/mice/issues/274} } mice/man/nic.Rd0000644000176200001440000000124214330031606012775 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ncc.R \name{nic} \alias{nic} \title{Number of incomplete cases} \usage{ nic(x) } \arguments{ \item{x}{An \code{R} object. Currently supported are methods for the following classes: \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} can be a vector.} } \value{ Number of elements in \code{x} with incomplete data. } \description{ Calculates the number of incomplete cases. } \examples{ nic(nhanes) # the remaining 12 rows nic(nhanes[, c("bmi", "hyp")]) # number of cases with incomplete bmi and hyp } \seealso{ \code{\link{ncc}}, \code{\link{cci}} } \author{ Stef van Buuren, 2017 } mice/man/glance.mipo.Rd0000644000176200001440000000121413666252233014433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidiers.R \name{glance.mipo} \alias{glance.mipo} \title{Glance method to extract information from a `mipo` object} \usage{ \method{glance}{mipo}(x, ...) } \arguments{ \item{x}{An object with multiply-imputed models from `mice` (class: `mipo`)} \item{...}{extra arguments (not used)} } \value{ a dataframe with one row and the following columns: \itemize{ \item nimp \item nobs } } \description{ Glance method to extract information from a `mipo` object } \note{ If x contains `lm` models, R2 and Adj.R2 are included in the output } \concept{tidiers} \keyword{internal} mice/man/ic.Rd0000644000176200001440000000153414330031606012623 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cc.R \name{ic} \alias{ic} \title{Select incomplete cases} \usage{ ic(x) } \arguments{ \item{x}{An \code{R} object. Methods are available for classes \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} could be a vector.} } \value{ A \code{vector}, \code{matrix} or \code{data.frame} containing the data of the complete cases. } \description{ Extracts incomplete cases from a data set. The companion function for selecting the complete cases is \code{\link{cc}}. } \examples{ ic(nhanes) # get the 12 rows with incomplete cases ic(nhanes[1:10, ]) # incomplete cases within the first ten rows ic(nhanes[, c("bmi", "hyp")]) # restrict extraction to variables bmi and hyp } \seealso{ \code{\link{cc}}, \code{\link{ici}} } \author{ Stef van Buuren, 2017. } \keyword{univar} mice/man/as.mira.Rd0000644000176200001440000000107014330031606013555 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as.R \name{as.mira} \alias{as.mira} \title{Create a \code{mira} object from repeated analyses} \usage{ as.mira(fitlist) } \arguments{ \item{fitlist}{A list containing $m$ fitted analysis objects} } \value{ An S3 object of class \code{mira}. } \description{ The \code{as.mira()} function takes the results of repeated complete-data analysis stored as a list, and turns it into a \code{mira} object that can be pooled. } \seealso{ \code{\link[=mira-class]{mira}} } \author{ Stef van Buuren } mice/man/selfreport.Rd0000644000176200001440000000716214330031606014420 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/selfreport.R \docType{data} \name{selfreport} \alias{selfreport} \alias{mgg} \title{Self-reported and measured BMI} \format{ A data frame with 2060 rows and 15 variables: \describe{ \item{src}{Study, either \code{krul} or \code{mgg} (factor)} \item{id}{Person identification number} \item{pop}{Population, all \code{NL} (factor)} \item{age}{Age of respondent in years} \item{sex}{Sex of respondent (factor)} \item{hm}{Height measured (cm)} \item{wm}{Weight measured (kg)} \item{hr}{Height reported (cm)} \item{wr}{Weight reported (kg)} \item{prg}{Pregnancy (factor), all \code{Not pregnant}} \item{edu}{Educational level (factor)} \item{etn}{Ethnicity (factor)} \item{web}{Obtained through web survey (factor)} \item{bm}{BMI measured (kg/m2)} \item{br}{BMI reported (kg/m2)} } } \source{ Krul, A., Daanen, H. A. M., Choi, H. (2010). Self-reported and measured weight, height and body mass index (BMI) in Italy, The Netherlands and North America. \emph{European Journal of Public Health}, \emph{21}(4), 414-419. Van Keulen, H.M.,, Chorus, A.M.J., Verheijden, M.W. (2011). \emph{Monitor Convenant Gezond Gewicht Nulmeting (determinanten van) beweeg- en eetgedrag van kinderen (4-11 jaar), jongeren (12-17 jaar) en volwassenen (18+ jaar)}. TNO/LS 2011.016. Leiden: TNO. Van der Klauw, M., Van Keulen, H.M., Verheijden, M.W. (2011). \emph{Monitor Convenant Gezond Gewicht Beweeg- en eetgedrag van kinderen (4-11 jaar), jongeren (12-17 jaar) en volwassenen (18+ jaar) in 2010 en 2011.} TNO/LS 2011.055. Leiden: TNO. (in Dutch) Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-prevalence.html#sec:srcdata}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Dataset containing height and weight data (measured, self-reported) from two studies. } \details{ This dataset combines two datasets: \code{krul} data (Krul, 2010) (1257 persons) and the \code{mgg} data (Van Keulen 2011; Van der Klauw 2011) (803 persons). The \code{krul} dataset contains height and weight (both measures and self-reported) from 1257 Dutch adults, whereas the \code{mgg} dataset contains self-reported height and weight for 803 Dutch adults. Section 7.3 in Van Buuren (2012) shows how the missing measured data can be imputed in the \code{mgg} data, so corrected prevalence estimates can be calculated. } \examples{ md.pattern(selfreport[, c("age", "sex", "hm", "hr", "wm", "wr")]) ### FIMD Section 7.3.5 Application bmi <- function(h, w) { return(w / (h / 100)^2) } init <- mice(selfreport, maxit = 0) meth <- init$meth meth["bm"] <- "~bmi(hm,wm)" pred <- init$pred pred[, c("src", "id", "web", "bm", "br")] <- 0 imp <- mice(selfreport, pred = pred, meth = meth, seed = 66573, maxit = 2, m = 1) ## imp <- mice(selfreport, pred=pred, meth=meth, seed=66573, maxit=20, m=10) ### Like FIMD Figure 7.6 cd <- complete(imp, 1) xy <- xy.coords(cd$bm, cd$br - cd$bm) plot(xy, col = mdc(2), xlab = "Measured BMI", ylab = "Reported - Measured BMI", xlim = c(17, 45), ylim = c(-5, 5), type = "n", lwd = 0.7 ) polygon(x = c(30, 20, 30), y = c(0, 10, 10), col = "grey95", border = NA) polygon(x = c(30, 40, 30), y = c(0, -10, -10), col = "grey95", border = NA) abline(0, 0, lty = 2, lwd = 0.7) idx <- cd$src == "krul" xyc <- xy xyc$x <- xy$x[idx] xyc$y <- xy$y[idx] xys <- xy xys$x <- xy$x[!idx] xys$y <- xy$y[!idx] points(xyc, col = mdc(1), cex = 0.7) points(xys, col = mdc(2), cex = 0.7) lines(lowess(xyc), col = mdc(4), lwd = 2) lines(lowess(xys), col = mdc(5), lwd = 2) text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3) box(lwd = 1) } \keyword{datasets} mice/man/mice.impute.2lonly.norm.Rd0000644000176200001440000001266214330031606016643 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2lonly.norm.R \name{mice.impute.2lonly.norm} \alias{mice.impute.2lonly.norm} \alias{2lonly.norm} \title{Imputation at level 2 by Bayesian linear regression} \usage{ mice.impute.2lonly.norm(y, ry, x, type, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Group identifier must be specified by '-2'. Predictors must be specified by '1'.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ A vector of length \code{nmis} with imputations. } \description{ Imputes univariate missing data at level 2 using Bayesian linear regression analysis. Variables are level 1 are aggregated at level 2. The group identifier at level 2 must be indicated by \code{type = -2} in the \code{predictorMatrix}. } \details{ This function allows in combination with \code{\link{mice.impute.2l.pan}} switching regression imputation between level 1 and level 2 as described in Yucel (2008) or Gelman and Hill (2007, p. 541). The function checks for partial missing level-2 data. Level-2 data are assumed to be constant within the same cluster. If one or more entries are missing, then the procedure aborts with an error message that identifies the cluster with incomplete level-2 data. In such cases, one may first fill in the cluster mean (or mode) by the \code{2lonly.mean} method to remove inconsistencies. } \note{ For a more general approach, see \code{miceadds::mice.impute.2lonly.function()}. } \examples{ # simulate some data # x,y ... level 1 variables # v,w ... level 2 variables G <- 250 # number of groups n <- 20 # number of persons beta <- .3 # regression coefficient rho <- .30 # residual intraclass correlation rho.miss <- .10 # correlation with missing response missrate <- .50 # missing proportion y1 <- rep(rnorm(G, sd = sqrt(rho)), each = n) + rnorm(G * n, sd = sqrt(1 - rho)) w <- rep(round(rnorm(G), 2), each = n) v <- rep(round(runif(G, 0, 3)), each = n) x <- rnorm(G * n) y <- y1 + beta * x + .2 * w + .1 * v dfr0 <- dfr <- data.frame("group" = rep(1:G, each = n), "x" = x, "y" = y, "w" = w, "v" = v) dfr[rho.miss * x + rnorm(G * n, sd = sqrt(1 - rho.miss)) < qnorm(missrate), "y"] <- NA dfr[rep(rnorm(G), each = n) < qnorm(missrate), "w"] <- NA dfr[rep(rnorm(G), each = n) < qnorm(missrate), "v"] <- NA # empty mice imputation imp0 <- mice(as.matrix(dfr), maxit = 0) predM <- imp0$predictorMatrix impM <- imp0$method # multilevel imputation predM1 <- predM predM1[c("w", "y", "v"), "group"] <- -2 predM1["y", "x"] <- 1 # fixed x effects imputation impM1 <- impM impM1[c("y", "w", "v")] <- c("2l.pan", "2lonly.norm", "2lonly.pmm") # y ... imputation using pan # w ... imputation at level 2 using norm # v ... imputation at level 2 using pmm imp1 <- mice(as.matrix(dfr), m = 1, predictorMatrix = predM1, method = impM1, maxit = 1, paniter = 500 ) # Demonstration that 2lonly.norm aborts for partial missing data. # Better use 2lonly.mean for repair. data <- data.frame( patid = rep(1:4, each = 5), sex = rep(c(1, 2, 1, 2), each = 5), crp = c( 68, 78, 93, NA, 143, 5, 7, 9, 13, NA, 97, NA, 56, 52, 34, 22, 30, NA, NA, 45 ) ) pred <- make.predictorMatrix(data) pred[, "patid"] <- -2 # only missing value (out of five) for patid == 1 data[3, "sex"] <- NA \dontrun{ # The following fails because 2lonly.norm found partially missing # level-2 data # imp <- mice(data, method = c("", "2lonly.norm", "2l.pan"), # predictorMatrix = pred, maxit = 1, m = 2) # > iter imp variable # > 1 1 sex crpError in .imputation.level2(y = y, ... : # > Method 2lonly.norm found the following clusters with partially missing # > level-2 data: 1 # > Method 2lonly.mean can fix such inconsistencies. } # In contrast, if all sex values are missing for patid == 1, it runs fine, # except on r-patched-solaris-x86. I used dontrun to evade CRAN errors. \dontrun{ data[1:5, "sex"] <- NA imp <- mice(data, method = c("", "2lonly.norm", "2l.pan"), predictorMatrix = pred, maxit = 1, m = 2 ) } } \references{ Gelman, A. and Hill, J. (2007). \emph{Data analysis using regression and multilevel/hierarchical models}. Cambridge, Cambridge University Press. Yucel, RM (2008). Multiple imputation inference for multivariate multilevel continuous data with ignorable non-response. \emph{Philosophical Transactions of the Royal Society A}, \bold{366}, 2389-2404. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice.impute.norm}}, \code{\link{mice.impute.2lonly.pmm}}, \code{\link{mice.impute.2l.pan}}, \code{\link{mice.impute.2lonly.mean}} Other univariate-2lonly: \code{\link{mice.impute.2lonly.mean}()}, \code{\link{mice.impute.2lonly.pmm}()} } \author{ Alexander Robitzsch (IPN - Leibniz Institute for Science and Mathematics Education, Kiel, Germany), \email{robitzsch@ipn.uni-kiel.de} } \concept{univariate-2lonly} mice/man/glm.mids.Rd0000644000176200001440000000310114330031606013732 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lm.R \name{glm.mids} \alias{glm.mids} \title{Generalized linear model for \code{mids} object} \usage{ glm.mids(formula, family = gaussian, data, ...) } \arguments{ \item{formula}{a formula expression as for other regression models, of the form response ~ predictors. See the documentation of \code{\link{lm}} and \code{\link{formula}} for details.} \item{family}{The family of the glm model} \item{data}{An object of type \code{mids}, which stands for 'multiply imputed data set', typically created by function \code{mice()}.} \item{\dots}{Additional parameters passed to \code{\link{glm}}.} } \value{ An objects of class \code{mira}, which stands for 'multiply imputed repeated analysis'. This object contains \code{data$m} distinct \code{glm.objects}, plus some descriptive information. } \description{ Applies \code{glm()} to a multiply imputed data set } \details{ This function is included for backward compatibility with V1.0. The function is superseded by \code{\link{with.mids}}. } \examples{ imp <- mice(nhanes) # logistic regression on the imputed data fit <- glm.mids((hyp == 2) ~ bmi + chl, data = imp, family = binomial) fit } \references{ Van Buuren, S., Groothuis-Oudshoorn, C.G.M. (2000) \emph{Multivariate Imputation by Chained Equations: MICE V1.0 User's manual.} Leiden: TNO Quality of Life. } \seealso{ \code{\link{with.mids}}, \code{\link{glm}}, \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{multivariate} mice/man/is.mads.Rd0000644000176200001440000000051513666252075013604 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mads} \alias{is.mads} \title{Check for \code{mads} object} \usage{ is.mads(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mads} } \description{ Check for \code{mads} object } mice/man/extend.formulas.Rd0000644000176200001440000000525414330031606015351 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{extend.formulas} \alias{extend.formulas} \title{Extends formula's with predictor matrix settings} \usage{ extend.formulas( formulas, data, blocks, predictorMatrix = NULL, auxiliary = TRUE, include.intercept = FALSE, ... ) } \arguments{ \item{formulas}{A named list of formula's, or expressions that can be converted into formula's by \code{as.formula}. List elements correspond to blocks. The block to which the list element applies is identified by its name, so list names must correspond to block names. The \code{formulas} argument is an alternative to the \code{predictorMatrix} argument that allows for more flexibility in specifying imputation models, e.g., for specifying interaction terms.} \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} \item{blocks}{List of vectors with variable names per block. List elements may be named to identify blocks. Variables within a block are imputed by a multivariate imputation method (see \code{method} argument). By default each variable is placed into its own block, which is effectively fully conditional specification (FCS) by univariate models (variable-by-variable imputation). Only variables whose names appear in \code{blocks} are imputed. The relevant columns in the \code{where} matrix are set to \code{FALSE} of variables that are not block members. A variable may appear in multiple blocks. In that case, it is effectively re-imputed each time that it is visited.} \item{predictorMatrix}{A numeric matrix of \code{length(blocks)} rows and \code{ncol(data)} columns, containing 0/1 data specifying the set of predictors to be used for each target column. Each row corresponds to a variable block, i.e., a set of variables to be imputed. A value of \code{1} means that the column variable is used as a predictor for the target block (in the rows). By default, the \code{predictorMatrix} is a square matrix of \code{ncol(data)} rows and columns with all 1's, except for the diagonal. Note: For two-level imputation models (which have \code{"2l"} in their names) other codes (e.g, \code{2} or \code{-2}) are also allowed.} \item{auxiliary}{A logical that indicates whether the variables listed in \code{predictors} should be added to the formula as main effects. The default is \code{TRUE}.} \item{include.intercept}{A logical that indicated whether the intercept should be included in the result.} \item{...}{Named arguments that are passed down to the univariate imputation functions.} } \value{ A list of formula's } \description{ Extends formula's with predictor matrix settings } \keyword{internal} mice/man/md.pairs.Rd0000644000176200001440000000275014330031647013753 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/md.pairs.R \name{md.pairs} \alias{md.pairs} \title{Missing data pattern by variable pairs} \usage{ md.pairs(data) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} } \value{ A list of four components named \code{rr}, \code{rm}, \code{mr} and \code{mm}. Each component is square numerical matrix containing the number observations within four missing data pattern. } \description{ Number of observations per variable pair. } \details{ The four components in the output value is have the following interpretation: \describe{ \item{list('rr')}{response-response, both variables are observed} \item{list('rm')}{response-missing, row observed, column missing} \item{list('mr')}{missing -response, row missing, column observed} \item{list('mm')}{missing -missing, both variables are missing} } } \examples{ pat <- md.pairs(nhanes) pat # show that these four matrices decompose the total sample size # for each pair pat$rr + pat$rm + pat$mr + pat$mm # percentage of usable cases to impute row variable from column variable round(100 * pat$mr / (pat$mr + pat$mm)) } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2009 } \keyword{univar} mice/man/reexports.Rd0000644000176200001440000000100114330031606014250 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/filter.R, R/tidiers.R \docType{import} \name{reexports} \alias{reexports} \alias{filter} \alias{tidy} \alias{glance} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{dplyr}{\code{\link[dplyr]{filter}}} \item{generics}{\code{\link[generics]{glance}}, \code{\link[generics]{tidy}}} }} mice/man/pool.r.squared.Rd0000644000176200001440000000365514330031647015117 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.r.squared.R \name{pool.r.squared} \alias{pool.r.squared} \title{Pools R^2 of m models fitted to multiply-imputed data} \usage{ pool.r.squared(object, adjusted = FALSE) } \arguments{ \item{object}{An object of class 'mira' or 'mipo', produced by \code{lm.mids}, \code{with.mids}, or \code{pool} with \code{lm} as modeling function.} \item{adjusted}{A logical value. If adjusted=TRUE then the adjusted R^2 is calculated. The default value is FALSE.} } \value{ Returns a 1x4 table with components. Component \code{est} is the pooled R^2 estimate. Component \code{lo95} is the 95 \% lower bound of the pooled R^2. Component \code{hi95} is the 95 \% upper bound of the pooled R^2. Component \code{fmi} is the fraction of missing information due to nonresponse. } \description{ The function pools the coefficients of determination R^2 or the adjusted coefficients of determination (R^2_a) obtained with the \code{lm} modeling function. For pooling it uses the Fisher \emph{z}-transformation. } \examples{ imp <- mice(nhanes, print = FALSE, seed = 16117) fit <- with(imp, lm(chl ~ age + hyp + bmi)) # input: mira object pool.r.squared(fit) pool.r.squared(fit, adjusted = TRUE) # input: mipo object est <- pool(fit) pool.r.squared(est) pool.r.squared(est, adjusted = TRUE) } \references{ Harel, O (2009). The estimation of R^2 and adjusted R^2 in incomplete data sets using multiple imputation, Journal of Applied Statistics, 36:1109-1118. Rubin, D.B. (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley and Sons. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{pool}},\code{\link{pool.scalar}} } \author{ Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 } \keyword{htest} mice/man/bwplot.mids.Rd0000644000176200001440000001742614330031647014506 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bwplot.R \name{bwplot.mids} \alias{bwplot.mids} \alias{bwplot} \title{Box-and-whisker plot of observed and imputed data} \usage{ \method{bwplot}{mids}( x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), mayreplicate = TRUE, allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), ..., subscripts = TRUE, subset = TRUE ) } \arguments{ \item{x}{A \code{mids} object, typically created by \code{mice()} or \code{mice.mids()}.} \item{data}{Formula that selects the data to be plotted. This argument follows the \pkg{lattice} rules for \emph{formulas}, describing the primary variables (used for the per-panel display) and the optional conditioning variables (which define the subsets plotted in different panels) to be used in the plot. The formula is evaluated on the complete data set in the \code{long} form. Legal variable names for the formula include \code{names(x$data)} plus the two administrative factors \code{.imp} and \code{.id}. \bold{Extended formula interface:} The primary variable terms (both the LHS \code{y} and RHS \code{x}) may consist of multiple terms separated by a \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in \emph{separate panels}. This behavior differs from standard \pkg{lattice}. \emph{Only combine terms of the same type}, i.e. only factors or only numerical variables. Mixing numerical and categorical data occasionally produces odds labeling of vertical axis. For convenience, in \code{stripplot()} and \code{bwplot} the formula \code{y~.imp} may be abbreviated as \code{y}. This applies only to a single \code{y}, and does not (yet) work for \code{y1+y2~.imp}.} \item{na.groups}{An expression evaluating to a logical vector indicating which two groups are distinguished (e.g. using different colors) in the display. The environment in which this expression is evaluated in the response indicator \code{is.na(x$data)}. The default \code{na.group = NULL} contrasts the observed and missing data in the LHS \code{y} variable of the display, i.e. groups created by \code{is.na(y)}. The expression \code{y} creates the groups according to \code{is.na(y)}. The expression \code{y1 & y2} creates groups by \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as \code{is.na(y1) | is.na(y2)}, and so on.} \item{groups}{This is the usual \code{groups} arguments in \pkg{lattice}. It differs from \code{na.groups} because it evaluates in the completed data \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas \code{na.groups} evaluates in the response indicator. See \code{\link{xyplot}} for more details. When both \code{na.groups} and \code{groups} are specified, \code{na.groups} takes precedence, and \code{groups} is ignored.} \item{as.table}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{theme}{A named list containing the graphical parameters. The default function \code{mice.theme} produces a short list of default colors, line width, and so on. The extensive list may be obtained from \code{trellis.par.get()}. Global graphical parameters like \code{col} or \code{cex} in high-level calls are still honored, so first experiment with the global parameters. Many setting consists of a pair. For example, \code{mice.theme} defines two symbol colors. The first is for the observed data, the second for the imputed data. The theme settings only exist during the call, and do not affect the trellis graphical parameters.} \item{mayreplicate}{A logical indicating whether color, line widths, and so on, may be replicated. The graphical functions attempt to choose "intelligent" graphical parameters. For example, the same color can be replicated for different element, e.g. use all reds for the imputed data. Replication may be switched off by setting the flag to \code{FALSE}, in order to allow the user to gain full control.} \item{allow.multiple}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{outer}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{drop.unused.levels}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{\dots}{Further arguments, usually not directly processed by the high-level functions documented here, but instead passed on to other functions.} \item{subscripts}{See \code{\link[lattice:xyplot]{xyplot}}.} \item{subset}{See \code{\link[lattice:xyplot]{xyplot}}.} } \value{ The high-level functions documented here, as well as other high-level Lattice functions, return an object of class \code{"trellis"}. The \code{\link[lattice:update.trellis]{update}} method can be used to subsequently update components of the object, and the \code{\link[lattice:print.trellis]{print}} method (usually called by default) will plot it on an appropriate plotting device. } \description{ Plotting methods for imputed data using \pkg{lattice}. \code{bwplot} produces box-and-whisker plots. The function automatically separates the observed and imputed data. The functions extend the usual features of \pkg{lattice}. } \details{ The argument \code{na.groups} may be used to specify (combinations of) missingness in any of the variables. The argument \code{groups} can be used to specify groups based on the variable values themselves. Only one of both may be active at the same time. When both are specified, \code{na.groups} takes precedence over \code{groups}. Use the \code{subset} and \code{na.groups} together to plots parts of the data. For example, select the first imputed data set by by \code{subset=.imp==1}. Graphical parameters like \code{col}, \code{pch} and \code{cex} can be specified in the arguments list to alter the plotting symbols. If \code{length(col)==2}, the color specification to define the observed and missing groups. \code{col[1]} is the color of the 'observed' data, \code{col[2]} is the color of the missing or imputed data. A convenient color choice is \code{col=mdc(1:2)}, a transparent blue color for the observed data, and a transparent red color for the imputed data. A good choice is \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the duration of the session by running \code{mice.theme()}. } \note{ The first two arguments (\code{x} and \code{data}) are reversed compared to the standard Trellis syntax implemented in \pkg{lattice}. This reversal was necessary in order to benefit from automatic method dispatch. In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas in \pkg{lattice} the argument \code{x} is always a formula. In \pkg{mice} the argument \code{data} is always a formula object, whereas in \pkg{lattice} the argument \code{data} is usually a data frame. All other arguments have identical interpretation. } \examples{ imp <- mice(boys, maxit = 1) ### box-and-whisker plot per imputation of all numerical variables bwplot(imp) ### tv (testicular volume), conditional on region bwplot(imp, tv ~ .imp | reg) ### same data, organized in a different way bwplot(imp, tv ~ reg | .imp, theme = list()) } \references{ Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data Visualization with R}, Springer. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{densityplot}}, \code{\link{stripplot}}, \code{\link{lattice}} for an overview of the package, as well as \code{\link[lattice:xyplot]{bwplot}}, \code{\link[lattice:panel.xyplot]{panel.bwplot}}, \code{\link[lattice:print.trellis]{print.trellis}}, \code{\link[lattice:trellis.par.get]{trellis.par.set}} } \author{ Stef van Buuren } \keyword{hplot} mice/man/mice.impute.norm.Rd0000644000176200001440000000637214330031647015433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.R \name{mice.impute.norm} \alias{mice.impute.norm} \alias{norm} \title{Imputation by Bayesian linear regression} \usage{ mice.impute.norm(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Calculates imputations for univariate missing data by Bayesian linear regression, also known as the normal model. } \details{ Imputation of \code{y} by the normal model by the method defined by Rubin (1987, p. 167). The procedure is as follows: \enumerate{ \item{Calculate the cross-product matrix \eqn{S=X_{obs}'X_{obs}}.} \item{Calculate \eqn{V = (S+{diag}(S)\kappa)^{-1}}, with some small ridge parameter \eqn{\kappa}.} \item{Calculate regression weights \eqn{\hat\beta = VX_{obs}'y_{obs}.}} \item{Draw a random variable \eqn{\dot g \sim \chi^2_\nu} with \eqn{\nu=n_1 - q}.} \item{Calculate \eqn{\dot\sigma^2 = (y_{obs} - X_{obs}\hat\beta)'(y_{obs} - X_{obs}\hat\beta)/\dot g.}} \item{Draw \eqn{q} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_1}.} \item{Calculate \eqn{V^{1/2}} by Cholesky decomposition.} \item{Calculate \eqn{\dot\beta = \hat\beta + \dot\sigma\dot z_1 V^{1/2}}.} \item{Draw \eqn{n_0} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_2}.} \item{Calculate the \eqn{n_0} values \eqn{y_{imp} = X_{mis}\dot\beta + \dot z_2\dot\sigma}.} } Using \code{mice.impute.norm} for all columns emulates Schafer's NORM method (Schafer, 1997). } \references{ Rubin, D.B (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley & Sons. Schafer, J.L. (1997). Analysis of incomplete multivariate data. London: Chapman & Hall. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn } \concept{univariate imputation functions} \keyword{datagen} mice/man/employee.Rd0000644000176200001440000000254214330031606014047 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/employee.R \docType{data} \name{employee} \alias{employee} \title{Employee selection data} \format{ A data frame with 20 rows and 3 variables: \describe{ \item{IQ}{candidate IQ score} \item{wbeing}{candidate well-being score} \item{jobperf}{candidate job performance score} } } \source{ Enders (2010), Applied Missing Data Analysis, p. 218 } \usage{ employee } \description{ A toy example from Craig Enders. } \details{ Enders describes these data as follows: I designed these data to mimic an employee selection scenario in which prospective employees complete an IQ test and a psychological well-being questionnaire during their interview. The company subsequently hires the applications that score in the upper half of the IQ distribution, and a supervisor rates their job performance following a 6-month probationary period. Note that the job performance scores are missing at random (MAR) (i.e. individuals in the lower half of the IQ distribution were never hired, and thus have no performance rating). In addition, I randomly deleted three of the well-being scores in order to mimic a situation where the applicant's well-being questionnaire is inadvertently lost. A larger version of this data set in present as \code{\link[miceadds:data.enders]{data.enders.employee}}. } \keyword{datasets} mice/man/mice.impute.lasso.norm.Rd0000644000176200001440000000644514330031647016554 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.lasso.norm.R \name{mice.impute.lasso.norm} \alias{mice.impute.lasso.norm} \alias{lasso.norm} \title{Imputation by direct use of lasso linear regression} \usage{ mice.impute.lasso.norm(y, ry, x, wy = NULL, nfolds = 10, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{nfolds}{The number of folds for the cross-validation of the lasso penalty. The default is 10.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing normal data using lasso linear regression with bootstrap. } \details{ The method consists of the following steps: \enumerate{ \item For a given y variable under imputation, draw a bootstrap version y* with replacement from the observed cases \code{y[ry]}, and stores in x* the corresponding values from \code{x[ry, ]}. \item Fit a regularised (lasso) linear regression with y* as the outcome, and x* as predictors. A vector of regression coefficients bhat is obtained. All of these coefficients are considered random draws from the imputation model parameters posterior distribution. Same of these coefficients will be shrunken to 0. \item Draw the imputed values from the predictive distribution defined by the original (non-bootstrap) data, bhat, and estimated error variance. } The method is based on the Direct Use of Regularized Regression (DURR) proposed by Zhao & Long (2016) and Deng et al (2016). } \references{ Deng, Y., Chang, C., Ido, M. S., & Long, Q. (2016). Multiple imputation for general missing data patterns in the presence of high-dimensional data. Scientific reports, 6(1), 1-10. Zhao, Y., & Long, Q. (2016). Multiple imputation in the presence of high-dimensional data. Statistical Methods in Medical Research, 25(5), 2021-2035. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Edoardo Costantini, 2021 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.2lonly.pmm.Rd0000644000176200001440000001131014330031606016446 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2lonly.pmm.R \name{mice.impute.2lonly.pmm} \alias{mice.impute.2lonly.pmm} \alias{2lonly.pmm} \title{Imputation at level 2 by predictive mean matching} \usage{ mice.impute.2lonly.pmm(y, ry, x, type, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Group identifier must be specified by '-2'. Predictors must be specified by '1'.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ A vector of length \code{nmis} with imputations. } \description{ Imputes univariate missing data at level 2 using predictive mean matching. Variables are level 1 are aggregated at level 2. The group identifier at level 2 must be indicated by \code{type = -2} in the \code{predictorMatrix}. } \details{ This function allows in combination with \code{\link{mice.impute.2l.pan}} switching regression imputation between level 1 and level 2 as described in Yucel (2008) or Gelman and Hill (2007, p. 541). The function checks for partial missing level-2 data. Level-2 data are assumed to be constant within the same cluster. If one or more entries are missing, then the procedure aborts with an error message that identifies the cluster with incomplete level-2 data. In such cases, one may first fill in the cluster mean (or mode) by the \code{2lonly.mean} method to remove inconsistencies. } \note{ The extension to categorical variables transforms a dependent factor variable by means of the \code{as.integer()} function. This may make sense for categories that are approximately ordered, but less so for pure nominal measures. For a more general approach, see \code{miceadds::mice.impute.2lonly.function()}. } \examples{ # simulate some data # x,y ... level 1 variables # v,w ... level 2 variables G <- 250 # number of groups n <- 20 # number of persons beta <- .3 # regression coefficient rho <- .30 # residual intraclass correlation rho.miss <- .10 # correlation with missing response missrate <- .50 # missing proportion y1 <- rep(rnorm(G, sd = sqrt(rho)), each = n) + rnorm(G * n, sd = sqrt(1 - rho)) w <- rep(round(rnorm(G), 2), each = n) v <- rep(round(runif(G, 0, 3)), each = n) x <- rnorm(G * n) y <- y1 + beta * x + .2 * w + .1 * v dfr0 <- dfr <- data.frame("group" = rep(1:G, each = n), "x" = x, "y" = y, "w" = w, "v" = v) dfr[rho.miss * x + rnorm(G * n, sd = sqrt(1 - rho.miss)) < qnorm(missrate), "y"] <- NA dfr[rep(rnorm(G), each = n) < qnorm(missrate), "w"] <- NA dfr[rep(rnorm(G), each = n) < qnorm(missrate), "v"] <- NA # empty mice imputation imp0 <- mice(as.matrix(dfr), maxit = 0) predM <- imp0$predictorMatrix impM <- imp0$method # multilevel imputation predM1 <- predM predM1[c("w", "y", "v"), "group"] <- -2 predM1["y", "x"] <- 1 # fixed x effects imputation impM1 <- impM impM1[c("y", "w", "v")] <- c("2l.pan", "2lonly.norm", "2lonly.pmm") # turn v into a categorical variable dfr$v <- as.factor(dfr$v) levels(dfr$v) <- LETTERS[1:4] # y ... imputation using pan # w ... imputation at level 2 using norm # v ... imputation at level 2 using pmm # skip imputation on solaris is.solaris <- function() grepl("SunOS", Sys.info()["sysname"]) if (!is.solaris()) { imp <- mice(dfr, m = 1, predictorMatrix = predM1, method = impM1, maxit = 1, paniter = 500 ) } } \references{ Gelman, A. and Hill, J. (2007). \emph{Data analysis using regression and multilevel/hierarchical models}. Cambridge, Cambridge University Press. Yucel, RM (2008). Multiple imputation inference for multivariate multilevel continuous data with ignorable non-response. \emph{Philosophical Transactions of the Royal Society A}, \bold{366}, 2389-2404. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice.impute.pmm}}, \code{\link{mice.impute.2lonly.norm}}, \code{\link{mice.impute.2l.pan}}, \code{\link{mice.impute.2lonly.mean}} Other univariate-2lonly: \code{\link{mice.impute.2lonly.mean}()}, \code{\link{mice.impute.2lonly.norm}()} } \author{ Alexander Robitzsch (IPN - Leibniz Institute for Science and Mathematics Education, Kiel, Germany), \email{robitzsch@ipn.uni-kiel.de} } \concept{univariate-2lonly} mice/man/mice.impute.logreg.Rd0000644000176200001440000000650614330031647015736 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.logreg.R \name{mice.impute.logreg} \alias{mice.impute.logreg} \title{Imputation by logistic regression} \usage{ mice.impute.logreg(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using logistic regression. } \details{ Imputation for binary response variables by the Bayesian logistic regression model (Rubin 1987, p. 169-170). The Bayesian method consists of the following steps: \enumerate{ \item Fit a logit, and find (bhat, V(bhat)) \item Draw BETA from N(bhat, V(bhat)) \item Compute predicted scores for m.d., i.e. logit-1(X BETA) \item Compare the score to a random (0,1) deviate, and impute. } The method relies on the standard \code{glm.fit} function. Warnings from \code{glm.fit} are suppressed. Perfect prediction is handled by the data augmentation method. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. ISBN 90-74479-08-1. Venables, W.N. & Ripley, B.D. (1997). Modern applied statistics with S-Plus (2nd ed). Springer, Berlin. White, I., Daniel, R. and Royston, P (2010). Avoiding bias due to perfect prediction in multiple imputation of incomplete categorical variables. Computational Statistics and Data Analysis, 54:22672275. } \seealso{ \code{\link{mice}}, \code{\link{glm}}, \code{\link{glm.fit}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn } \concept{univariate imputation functions} \keyword{datagen} mice/man/pmm.match.Rd0000644000176200001440000000337314335404116014124 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.pmm.R \name{.pmm.match} \alias{.pmm.match} \title{Finds an imputed value from matches in the predictive metric (deprecated)} \usage{ .pmm.match(z, yhat = yhat, y = y, donors = 5, ...) } \arguments{ \item{z}{A scalar containing the predicted value for the current case to be imputed.} \item{yhat}{A vector containing the predicted values for all cases with an observed outcome.} \item{y}{A vector of \code{length(yhat)} elements containing the observed outcome} \item{donors}{The size of the donor pool among which a draw is made. The default is \code{donors = 5}. Setting \code{donors = 1} always selects the closest match. Values between 3 and 10 provide the best results. Note: This setting was changed from 3 to 5 in version 2.19, based on simulation work by Tim Morris (UCL).} \item{\dots}{Other parameters (not used).} } \value{ A scalar containing the observed value of the selected donor. } \description{ This function finds matches among the observed data in the predictive mean metric. It selects the \code{donors} closest matches, randomly samples one of the donors, and returns the observed value of the match. } \details{ This function is included for backward compatibility. It was used up to \code{mice 2.21}. The current \code{mice.impute.pmm()} function calls the faster \code{C} function \code{matcher} instead of \code{.pmm.match()}. } \references{ Schenker N & Taylor JMG (1996) Partially parametric techniques for multiple imputation. \emph{Computational Statistics and Data Analysis}, 22, 425-446. Little RJA (1988) Missing-data adjustments in large surveys (with discussion). \emph{Journal of Business Economics and Statistics}, 6, 287-301. } \author{ Stef van Buuren } mice/man/xyplot.mads.Rd0000644000176200001440000000437614330031606014521 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/xyplot.mads.R \name{xyplot.mads} \alias{xyplot.mads} \title{Scatterplot of amputed and non-amputed data against weighted sum scores} \usage{ \method{xyplot}{mads}( x, data, which.pat = NULL, standardized = TRUE, layout = NULL, colors = mdc(1:2), ... ) } \arguments{ \item{x}{A \code{mads} object, typically created by \code{\link{ampute}}.} \item{data}{A string or vector of variable names that needs to be plotted. As a default, all variables will be plotted.} \item{which.pat}{A scalar or vector indicating which patterns need to be plotted. As a default, all patterns are plotted.} \item{standardized}{Logical. Whether the scatterplots need to be created from standardized data or not. Default is TRUE.} \item{layout}{A vector of two values indicating how the scatterplots of one pattern should be divided over the plot. For example, \code{c(2, 3)} indicates that the scatterplots of six variables need to be placed on 3 rows and 2 columns. There are several defaults for different #variables. Note that for more than 9 variables, multiple plots will be created automatically.} \item{colors}{A vector of two RGB values defining the colors of the non-amputed and amputed data respectively. RGB values can be obtained with \code{\link{hcl}}.} \item{\dots}{Not used, but for consistency with generic} } \value{ A list containing the scatterplots. Note that a new pattern will always be shown in a new plot. } \description{ Plotting method to investigate relation between amputed data and the weighted sum scores. Based on \code{\link{lattice}}. \code{xyplot} produces scatterplots. The function plots the variables against the weighted sum scores. The function automatically separates the amputed and non-amputed data to see the relation between the amputation and the weighted sum scores. } \note{ The \code{mads} object contains all the information you need to make any desired plots. Check \code{\link{mads-class}} or the vignette \emph{Multivariate Amputation using Ampute} to understand the contents of class object \code{mads}. } \seealso{ \code{\link{ampute}}, \code{\link{bwplot}}, \code{\link{Lattice}} for an overview of the package, \code{\link{mads-class}} } \author{ Rianne Schouten, 2016 } mice/man/mira-class.Rd0000644000176200001440000000465014330031647014272 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mira.R \docType{class} \name{mira-class} \alias{mira-class} \alias{mira} \title{Multiply imputed repeated analyses (\code{mira})} \description{ The \code{mira} object is generated by the \code{with.mids()} function. The \code{as.mira()} function takes the results of repeated complete-data analysis stored as a list, and turns it into a \code{mira} object that can be pooled. } \details{ In versions prior to \code{mice 3.0} pooling required only that \code{coef()} and \code{vcov()} methods were available for fitted objects. \emph{This feature is no longer supported}. The reason is that \code{vcov()} methods are inconsistent across packages, leading to buggy behaviour of the \code{pool()} function. Since \code{mice 3.0+}, the \code{broom} package takes care of filtering out the relevant parts of the complete-data analysis. It may happen that you'll see the messages like \code{No method for tidying an S3 object of class ...} or \code{Error: No glance method for objects of class ...}. The royal way to solve this problem is to write your own \code{glance()} and \code{tidy()} methods and add these to \code{broom} according to the specifications given in \url{https://broom.tidymodels.org}. #'The \code{mira} class of objects has methods for the following generic functions: \code{print}, \code{summary}. Many of the functions of the \code{mice} package do not use the S4 class definitions, and instead rely on the S3 list equivalent \code{oldClass(obj) <- "mira"}. } \section{Slots}{ \describe{ #' \item{\code{.Data}:}{Object of class \code{"list"} containing the following slots:} \item{\code{call}:}{The call that created the object.} \item{\code{call1}:}{The call that created the \code{mids} object that was used in \code{call}.} \item{\code{nmis}:}{An array containing the number of missing observations per column.} \item{\code{analyses}:}{A list of \code{m} components containing the individual fit objects from each of the \code{m} complete data analyses.} } } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{with.mids}}, \code{\link[=mids-class]{mids}}, \code{\link{mipo}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{classes} mice/man/fdgs.Rd0000644000176200001440000000365014335404116013161 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fdgs.R \docType{data} \name{fdgs} \alias{fdgs} \title{Fifth Dutch growth study 2009} \format{ \code{fdgs} is a data frame with 10030 rows and 8 columns: \describe{ \item{id}{Person number} \item{reg}{Region (factor, 5 levels)} \item{age}{Age (years)} \item{sex}{Sex (boy, girl)} \item{hgt}{Height (cm)} \item{wgt}{Weight (kg)} \item{hgt.z}{Height Z-score} \item{wgt.z}{Weight Z-score} } } \source{ Schonbeck, Y., Talma, H., van Dommelen, P., Bakker, B., Buitendijk, S. E., Hirasing, R. A., van Buuren, S. (2011). Increase in prevalence of overweight in Dutch children and adolescents: A comparison of nationwide growth studies in 1980, 1997 and 2009. \emph{PLoS ONE}, \emph{6}(11), e27608. Schonbeck, Y., Talma, H., van Dommelen, P., Bakker, B., Buitendijk, S. E., Hirasing, R. A., van Buuren, S. (2013). The world's tallest nation has stopped growing taller: the height of Dutch children from 1955 to 2009. \emph{Pediatric Research}, \emph{73}(3), 371-377. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-nonresponse.html#fifth-dutch-growth-study}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Boca Raton, FL.: Chapman & Hall/CRC Press. } \description{ Age, height, weight and region of 10030 children measured within the Fifth Dutch Growth Study 2009 } \details{ The data set contains data from children of Dutch descent (biological parents are born in the Netherlands). Children with growth-related diseases were excluded. The data were used to construct new growth charts of children of Dutch descent (Schonbeck 2013), and to calculate overweight and obesity prevalence (Schonbeck 2011). Some groups were underrepresented. Multiple imputation was used to create synthetic cases that were used to correct for the nonresponse. See Van Buuren (2012), chapter 8 for details. } \examples{ data <- data(fdgs) summary(data) } \keyword{datasets} mice/man/mice.impute.sample.Rd0000644000176200001440000000270614330031647015736 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.sample.R \name{mice.impute.sample} \alias{mice.impute.sample} \title{Imputation by simple random sampling} \usage{ mice.impute.sample(y, ry, x = NULL, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes a random sample from the observed \code{y} data } \details{ This function takes a simple random sample from the observed values in \code{y}, and returns these as imputations. } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000, 2017 } \keyword{datagen} mice/man/mice.impute.ri.Rd0000644000176200001440000000504014330031647015061 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.ri.R \name{mice.impute.ri} \alias{mice.impute.ri} \alias{ri} \title{Imputation by the random indicator method for nonignorable data} \usage{ mice.impute.ri(y, ry, x, wy = NULL, ri.maxit = 10, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{ri.maxit}{Number of inner iterations} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes nonignorable missing data by the random indicator method. } \details{ The random indicator method estimates an offset between the distribution of the observed and missing data using an algorithm that iterates over the response and imputation models. This routine assumes that the response model and imputation model have same predictors. For an MNAR alternative see also \code{\link{mice.impute.mnar.logreg}}. } \references{ Jolani, S. (2012). \emph{Dual Imputation Strategies for Analyzing Incomplete Data}. Dissertation. University of Utrecht, Dec 7 2012. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()} } \author{ Shahab Jolani (University of Utrecht) } \concept{univariate imputation functions} \keyword{datagen} mice/man/ncc.Rd0000644000176200001440000000111314330031606012764 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ncc.R \name{ncc} \alias{ncc} \title{Number of complete cases} \usage{ ncc(x) } \arguments{ \item{x}{An \code{R} object. Currently supported are methods for the following classes: \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} can be a vector.} } \value{ Number of elements in \code{x} with complete data. } \description{ Calculates the number of complete cases. } \examples{ ncc(nhanes) # 13 complete cases } \seealso{ \code{\link{nic}}, \code{\link{cci}} } \author{ Stef van Buuren, 2017 } mice/man/version.Rd0000644000176200001440000000076014330031606013715 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/zzz.R \name{version} \alias{version} \title{Echoes the package version number} \usage{ version(pkg = "mice") } \arguments{ \item{pkg}{A character vector with the package name.} } \value{ A character vector containing the package name, version number and installed directory. } \description{ Echoes the package version number } \examples{ version() version("base") } \author{ Stef van Buuren, Oct 2010 } \keyword{misc} mice/man/popmis.Rd0000644000176200001440000000167314330031606013543 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/popmis.R \docType{data} \name{popmis} \alias{popmis} \title{Hox pupil popularity data with missing popularity scores} \format{ A data frame with 2000 rows and 7 columns: \describe{ \item{pupil}{Pupil number within school} \item{school}{School number} \item{popular}{Pupil popularity with 848 missing entries} \item{sex}{Pupil gender} \item{texp}{Teacher experience (years)} \item{const}{Constant intercept term} \item{teachpop}{Teacher popularity} } } \source{ Hox, J. J. (2002) \emph{Multilevel analysis. Techniques and applications.} Mahwah, NJ: Lawrence Erlbaum. } \description{ Hox pupil popularity data with some missing popularity scores } \details{ The original, complete dataset was generated by Joop Hox as an example of well-behaved multilevel data set. The distributed data contains missing data in pupil popularity. } \examples{ popmis[1:3, ] } \keyword{datasets} mice/man/plot.mids.Rd0000644000176200001440000000344214330031606014141 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R \name{plot.mids} \alias{plot.mids} \title{Plot the trace lines of the MICE algorithm} \usage{ \method{plot}{mids}( x, y = NULL, theme = mice.theme(), layout = c(2, 3), type = "l", col = 1:10, lty = 1, ... ) } \arguments{ \item{x}{An object of class \code{mids}} \item{y}{A formula that specifies which variables, stream and iterations are plotted. If omitted, all streams, variables and iterations are plotted.} \item{theme}{The trellis theme to applied to the graphs. The default is \code{mice.theme()}.} \item{layout}{A vector of length 2 given the number of columns and rows in the plot. The default is \code{c(2, 3)}.} \item{type}{Parameter \code{type} of \code{\link{panel.xyplot}}.} \item{col}{Parameter \code{col} of \code{\link{panel.xyplot}}.} \item{lty}{Parameter \code{lty} of \code{\link{panel.xyplot}}.} \item{...}{Extra arguments for \code{\link{xyplot}}.} } \value{ An object of class \code{"trellis"}. } \description{ Trace line plots portray the value of an estimate against the iteration number. The estimate can be anything that you can calculate, but typically are chosen as parameter of scientific interest. The \code{plot} method for a \code{mids} object plots the mean and standard deviation of the imputed (not observed) values against the iteration number for each of the $m$ replications. By default, the function plot the development of the mean and standard deviation for each incomplete variable. On convergence, the streams should intermingle and be free of any trend. } \examples{ imp <- mice(nhanes, print = FALSE) plot(imp, bmi + chl ~ .it | .ms, layout = c(2, 1)) } \seealso{ \code{\link{mice}}, \code{\link[=mids-class]{mids}}, \code{\link{xyplot}} } \author{ Stef van Buuren 2011 } mice/man/convergence.Rd0000644000176200001440000000520214433400023014517 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/convergence.R \name{convergence} \alias{convergence} \title{Computes convergence diagnostics for a \code{mids} object} \usage{ convergence(data, diagnostic = "all", parameter = "mean", ...) } \arguments{ \item{data}{An object of class \code{mids} as created by the function \code{mice()}.} \item{diagnostic}{A keyword. One of the following keywords: \code{"ac"}, \code{"all"}, \code{"gr"} and \code{"psrf"}. See the Details section for the interpretation. The default is \code{diagnostic = "all"} which returns both the autocorrelation and potential scale reduction factor per iteration.} \item{parameter}{A keyword. One of the following keywords: \code{"mean"} or \code{"sd"} to evaluate chain means or chain standard deviations, respectively.} \item{\dots}{Additional arguments. Not used.} } \value{ A \code{data.frame} with the autocorrelation and/or potential scale reduction factor per iteration of the MICE algorithm. } \description{ Takes an object of class \code{mids}, computes the autocorrelation and/or potential scale reduction factor, and returns a \code{data.frame} with the specified diagnostic(s) per iteration. } \details{ The argument \code{diagnostic} can be length-1 character, which is matched to one of the following keywords: \describe{ \item{\code{"all"}}{computes both the lag-1 autocorrelation as well as the potential scale reduction factor (cf. Vehtari et al., 2021) per iteration of the MICE algorithm;} \item{\code{"ac"}}{computes only the autocorrelation per iteration;} \item{\code{"psrf"}}{computes only the potential scale reduction factor per iteration;} \item{\code{"gr"}}{same as \code{psrf}, the potential scale reduction factor is colloquially called the Gelman-Rubin diagnostic.} } In the unlikely event of perfect convergence, the autocorrelation equals zero and the potential scale reduction factor equals one. To interpret the convergence diagnostic(s) in the output of the function, it is recommended to plot the diagnostics (ac and/or psrf) against the iteration number (.it) per imputed variable (vrb). A persistently decreasing trend across iterations indicates potential non-convergence. } \examples{ \dontrun{ # obtain imputed data set imp <- mice(nhanes2, print = FALSE) # compute convergence diagnostics convergence(imp) } } \references{ Vehtari, A., Gelman, A., Simpson, D., Carpenter, B., & Burkner, P.-C. (2021). Rank-Normalization, Folding, and Localization: An Improved R for Assessing Convergence of MCMC. Bayesian Analysis, 1(1), 1-38. https://doi.org/10.1214/20-BA1221 } \seealso{ \code{\link{mice}}, \code{\link[=mids-class]{mids}} } \keyword{none} mice/man/mnar_demo_data.Rd0000644000176200001440000000100414330031606015152 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mnar_demo_data.R \docType{data} \name{mnar_demo_data} \alias{mnar_demo_data} \title{MNAR demo data} \format{ An object of class \code{data.frame} with 500 rows and 3 columns. } \source{ \url{https://github.com/moreno-betancur/NARFCS/blob/master/datmis.csv} } \usage{ mnar_demo_data } \description{ A toy example from Margarita Moreno-Betancur for checking NARFCS. } \details{ A small dataset with just three columns. } \keyword{datasets} mice/man/squeeze.Rd0000644000176200001440000000160414330031606013707 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/squeeze.R \name{squeeze} \alias{squeeze} \title{Squeeze the imputed values to be within specified boundaries.} \usage{ squeeze(x, bounds = c(min(x[r]), max(x[r])), r = rep.int(TRUE, length(x))) } \arguments{ \item{x}{A numerical vector with values} \item{bounds}{A numerical vector of length 2 containing the lower and upper bounds. By default, the bounds are to the minimum and maximum values in \code{x}.} \item{r}{A logical vector of length \code{length(x)} that is used to select a subset in \code{x} before calculating automatic bounds.} } \value{ A vector of length \code{length(x)}. } \description{ This function replaces any values in \code{x} that are lower than \code{bounds[1]} by \code{bounds[1]}, and replaces any values higher than \code{bounds[2]} by \code{bounds[2]}. } \author{ Stef van Buuren, 2011. } mice/man/extractBS.Rd0000644000176200001440000000065713666252075014154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/auxiliary.R \name{extractBS} \alias{extractBS} \title{Extract broken stick estimates from a \code{lmer} object} \usage{ extractBS(fit) } \arguments{ \item{fit}{An object of class \code{lmer}} } \value{ A matrix containing broken stick estimates } \description{ Extract broken stick estimates from a \code{lmer} object } \author{ Stef van Buuren, 2012 } mice/man/mipo.Rd0000644000176200001440000000567714330031647013215 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mipo.R \name{mipo} \alias{mipo} \alias{summary.mipo} \alias{print.mipo} \alias{print.mipo.summary} \alias{process_mipo} \title{\code{mipo}: Multiple imputation pooled object} \usage{ mipo(mira.obj, ...) \method{summary}{mipo}( object, type = c("tests", "all"), conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE, ... ) \method{print}{mipo}(x, ...) \method{print}{mipo.summary}(x, ...) process_mipo(z, x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE) } \arguments{ \item{mira.obj}{An object of class \code{mira}} \item{\dots}{Arguments passed down} \item{object}{An object of class \code{mipo}} \item{conf.int}{Logical indicating whether to include a confidence interval. The default is \code{FALSE}.} \item{conf.level}{Confidence level of the interval, used only if \code{conf.int = TRUE}. Number between 0 and 1.} \item{exponentiate}{Flag indicating whether to exponentiate the coefficient estimates and confidence intervals (typical for logistic regression).} \item{x}{An object of class \code{mipo}} \item{z}{Data frame with a tidied version of a coefficient matrix} } \value{ The \code{summary} method returns a data frame with summary statistics of the pooled analysis. } \description{ The \code{mipo} object contains the results of the pooling step. The function \code{\link{pool}} generates an object of class \code{mipo}. } \details{ An object class \code{mipo} is a \code{list} with elements: \code{call}, \code{m}, \code{pooled} and \code{glanced}. The \code{pooled} elements is a data frame with columns: \tabular{ll}{ \code{estimate}\tab Pooled complete data estimate\cr \code{ubar} \tab Within-imputation variance of \code{estimate}\cr \code{b} \tab Between-imputation variance of \code{estimate}\cr \code{t} \tab Total variance, of \code{estimate}\cr \code{dfcom} \tab Degrees of freedom in complete data\cr \code{df} \tab Degrees of freedom of $t$-statistic\cr \code{riv} \tab Relative increase in variance\cr \code{lambda} \tab Proportion attributable to the missingness\cr \code{fmi} \tab Fraction of missing information\cr } The names of the terms are stored as \code{row.names(pooled)}. The \code{glanced} elements is a \code{data.frame} with \code{m} rows. The precise composition depends on the class of the complete-data analysis. At least field \code{nobs} is expected to be present. The \code{process_mipo} is a helper function to process a tidied mipo object, and is normally not called directly. It adds a confidence interval, and optionally exponentiates, the result. } \references{ van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{pool}}, \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} } \keyword{classes} \keyword{internal} mice/man/fdd.Rd0000644000176200001440000000775014330031606012773 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fdd.R \docType{data} \name{fdd} \alias{fdd} \alias{fdd.pred} \title{SE Fireworks disaster data} \format{ \code{fdd} is a data frame with 52 rows and 65 columns: \describe{ \item{id}{Client number} \item{trt}{Treatment (E=EMDR, C=CBT)} \item{pp}{Per protocol (Y/N)} \item{trtp}{Number of parental treatments} \item{sex}{Sex: M/F} \item{etn}{Ethnicity: NL/OTHER} \item{age}{Age (years)} \item{trauma}{Trauma count (1-5)} \item{prop1}{PROPS total score T1} \item{prop2}{PROPS total score T2} \item{prop3}{PROPS total score T3} \item{crop1}{CROPS total score T1} \item{crop2}{CROPS total score T2} \item{crop3}{CROPS total score T3} \item{masc1}{MASC score T1} \item{masc2}{MASC score T2} \item{masc3}{MASC score T3} \item{cbcl1}{CBCL T1} \item{cbcl3}{CBCL T3} \item{prs1}{PRS total score T1} \item{prs2}{PRS total score T2} \item{prs3}{PRS total score T3} \item{ypa1}{PTSD-RI B intrusive recollection parent T1} \item{ypb1}{PTSD-RI C avoidant/numbing parent T1} \item{ypc1}{PTSD-RI D hyper-arousal parent T1} \item{yp1}{PTSD-RI B+C+D parent T1} \item{ypa2}{PTSD-RI B intrusive recollection parent T2} \item{ypb2}{PTSD-RI C avoidant/numbing parent T2} \item{ypc2}{PTSD-RI D hyper-arousal parent T2} \item{yp2}{PTSD-RI B+C+D parent T1} \item{ypa3}{PTSD-RI B intrusive recollection parent T3} \item{ypb3}{PTSD-RI C avoidant/numbing parent T3} \item{ypc3}{PTSD-RI D hyper-arousal parent T3} \item{yp3}{PTSD-RI B+C+D parent T3} \item{yca1}{PTSD-RI B intrusive recollection child T1} \item{ycb1}{PTSD-RI C avoidant/numbing child T1} \item{ycc1}{PTSD-RI D hyper-arousal child T1} \item{yc1}{PTSD-RI B+C+D child T1} \item{yca2}{PTSD-RI B intrusive recollection child T2} \item{ycb2}{PTSD-RI C avoidant/numbing child T2} \item{ycc2}{PTSD-RI D hyper-arousal child T2} \item{yc2}{PTSD-RI B+C+D child T2} \item{yca3}{PTSD-RI B intrusive recollection child T3} \item{ycb3}{PTSD-RI C avoidant/numbing child T3} \item{ycc3}{PTSD-RI D hyper-arousal child T3} \item{yc3}{PTSD-RI B+C+D child T3} \item{ypf1}{PTSD-RI parent full T1} \item{ypf2}{PTSD-RI parent full T2} \item{ypf3}{PTSD-RI parent full T3} \item{ypp1}{PTSD parent partial T1} \item{ypp2}{PTSD parent partial T2} \item{ypp3}{PTSD parent partial T3} \item{ycf1}{PTSD child full T1} \item{ycf2}{PTSD child full T2} \item{ycf3}{PTSD child full T3} \item{ycp1}{PTSD child partial T1} \item{ycp2}{PTSD child partial T2} \item{ycp3}{PTSD child partial T3} \item{cbin1}{CBCL Internalizing T1} \item{cbin3}{CBCL Internalizing T3} \item{cbex1}{CBCL Externalizing T1} \item{cbex3}{CBCL Externalizing T3} \item{bir1}{Birlison T1} \item{bir2}{Birlison T2} \item{bir3}{Birlison T3} } \code{fdd.pred} is the 65 by 65 binary predictor matrix used to impute \code{fdd}. } \source{ de Roos, C., Greenwald, R., den Hollander-Gijsman, M., Noorthoorn, E., van Buuren, S., de Jong, A. (2011). A Randomised Comparison of Cognitive Behavioral Therapy (CBT) and Eye Movement Desensitisation and Reprocessing (EMDR) in disaster-exposed children. \emph{European Journal of Psychotraumatology}, \emph{2}, 5694. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-fdd.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Boca Raton, FL.: Chapman & Hall/CRC Press. } \description{ Multiple outcomes of a randomized study to reduce post-traumatic stress. } \details{ Data from a randomized experiment to reduce post-traumatic stress by two treatments: Eye Movement Desensitization and Reprocessing (EMDR) (experimental treatment), and cognitive behavioral therapy (CBT) (control treatment). 52 children were randomized to one of these two treatments. Outcomes were measured at three time points: at baseline (pre-treatment, T1), post-treatment (T2, 4-8 weeks), and at follow-up (T3, 3 months). For more details, see de Roos et al (2011). Some person covariates were reshuffled. The imputation methodology is explained in Chapter 9 of van Buuren (2012). } \examples{ data <- fdd md.pattern(fdd) } \keyword{datasets} mice/man/mids2spss.Rd0000644000176200001440000000472514330031606014164 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mids2spss.R \name{mids2spss} \alias{mids2spss} \title{Export \code{mids} object to SPSS} \usage{ mids2spss( imp, filename = "midsdata", path = getwd(), compress = FALSE, silent = FALSE ) } \arguments{ \item{imp}{The \code{imp} argument is an object of class \code{mids}, typically produced by the \code{mice()} function.} \item{filename}{A character string describing the name of the output data file and its extension.} \item{path}{A character string containing the path of the output file. The value in \code{path} is appended to \code{filedat}. By default, files are written to the current \code{R} working directory. If \code{path=NULL} then no file path appending is done.} \item{compress}{A logical flag stating whether the resulting SPSS set should be a compressed \code{.zsav} file.} \item{silent}{A logical flag stating whether the location of the saved file should be printed.} } \value{ The return value is \code{NULL}. } \description{ Converts a \code{mids} object into a format recognized by SPSS, and writes the data and the SPSS syntax files. } \details{ This function automates most of the work needed to export a \code{mids} object to SPSS. It uses \code{haven::write_sav()} to facilitate the export to an SPSS \code{.sav} or \code{.zsav} file. Below are some things to pay attention to. The \code{SPSS} syntax file has the proper file names and separators set, so in principle it should run and read the data without alteration. \code{SPSS} is more strict than \code{R} with respect to the paths. Always use the full path, otherwise \code{SPSS} may not be able to find the data file. Factors in \code{R} translate into categorical variables in \code{SPSS}. The internal coding of factor levels used in \code{R} is exported. This is generally acceptable for \code{SPSS}. However, when the data are to be combined with existing \code{SPSS} data, watch out for any changes in the factor levels codes. \code{SPSS} will recognize the data set as a multiply imputed data set, and do automatic pooling in procedures where that is supported. Note however that pooling is an extra option only available to those who license the \code{MISSING VALUES} module. Without this license, \code{SPSS} will still recognize the structure of the data, but it will not pool the multiply imputed estimates into a single inference. } \seealso{ \code{\link[=mids-class]{mids}} } \author{ Gerko Vink, dec 2020. } \keyword{manip} mice/man/mice.impute.mean.Rd0000644000176200001440000000513214330031647015371 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.mean.R \name{mice.impute.mean} \alias{mice.impute.mean} \title{Imputation by the mean} \usage{ mice.impute.mean(y, ry, x = NULL, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes the arithmetic mean of the observed data } \section{Warning}{ Imputing the mean of a variable is almost never appropriate. See Little and Rubin (2002, p. 61-62) or Van Buuren (2012, p. 10-11) } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Little, R.J.A. and Rubin, D.B. (2002). Statistical Analysis with Missing Data. New York: John Wiley and Sons. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-simplesolutions.html#sec:meanimp}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice}}, \code{\link{mean}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.lasso.select.norm.Rd0000644000176200001440000000761214330031647020027 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.lasso.select.norm.R \name{mice.impute.lasso.select.norm} \alias{mice.impute.lasso.select.norm} \alias{lasso.select.norm} \title{Imputation by indirect use of lasso linear regression} \usage{ mice.impute.lasso.select.norm(y, ry, x, wy = NULL, nfolds = 10, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{nfolds}{The number of folds for the cross-validation of the lasso penalty. The default is 10.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using Bayesian linear regression following a preprocessing lasso variable selection step. } \details{ The method consists of the following steps: \enumerate{ \item For a given \code{y} variable under imputation, fit a linear regression with lasso penalty using \code{y[ry]} as dependent variable and \code{x[ry, ]} as predictors. Coefficients that are not shrunk to 0 define an active set of predictors that will be used for imputation \item Define a Bayesian linear model using \code{y[ry]} as the dependent variable, the active set of \code{x[ry, ]} as predictors, and standard non-informative priors \item Draw parameter values for the intercept, regression weights, and error variance from their posterior distribution \item Draw imputations from the posterior predictive distribution } The user can specify a \code{predictorMatrix} in the \code{mice} call to define which predictors are provided to this univariate imputation method. The lasso regularization will select, among the variables indicated by the user, the ones that are important for imputation at any given iteration. Therefore, users may force the exclusion of a predictor from a given imputation model by specifying a \code{0} entry. However, a non-zero entry does not guarantee the variable will be used, as this decision is ultimately made by the lasso variable selection procedure. The method is based on the Indirect Use of Regularized Regression (IURR) proposed by Zhao & Long (2016) and Deng et al (2016). } \references{ Deng, Y., Chang, C., Ido, M. S., & Long, Q. (2016). Multiple imputation for general missing data patterns in the presence of high-dimensional data. Scientific reports, 6(1), 1-10. Zhao, Y., & Long, Q. (2016). Multiple imputation in the presence of high-dimensional data. Statistical Methods in Medical Research, 25(5), 2021-2035. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Edoardo Costantini, 2021 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.cart.Rd0000644000176200001440000000703114436637036015415 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.cart.R \name{mice.impute.cart} \alias{mice.impute.cart} \alias{cart} \title{Imputation by classification and regression trees} \usage{ mice.impute.cart(y, ry, x, wy = NULL, minbucket = 5, cp = 1e-04, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{minbucket}{The minimum number of observations in any terminal node used. See \code{\link{rpart.control}} for details.} \item{cp}{Complexity parameter. Any split that does not decrease the overall lack of fit by a factor of cp is not attempted. See \code{\link{rpart.control}} for details.} \item{...}{Other named arguments passed down to \code{rpart()}.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} Numeric vector of length \code{sum(!ry)} with imputations } \description{ Imputes univariate missing data using classification and regression trees. } \details{ Imputation of \code{y} by classification and regression trees. The procedure is as follows: \enumerate{ \item Fit a classification or regression tree by recursive partitioning; \item For each \code{ymis}, find the terminal node they end up according to the fitted tree; \item Make a random draw among the member in the node, and take the observed value from that draw as the imputation. } } \examples{ imp <- mice(nhanes2, meth = "cart", minbucket = 4) plot(imp) } \references{ Doove, L.L., van Buuren, S., Dusseldorp, E. (2014), Recursive partitioning for missing data imputation in the presence of interaction Effects. Computational Statistics & Data Analysis, 72, 92-104. Breiman, L., Friedman, J. H., Olshen, R. A., and Stone, C. J. (1984), Classification and regression trees, Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-cart.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{mice}}, \code{\link{mice.impute.rf}}, \code{\link[rpart]{rpart}}, \code{\link[rpart]{rpart.control}} Other univariate imputation functions: \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Lisa Doove, Stef van Buuren, Elise Dusseldorp, 2012 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mcar.Rd0000644000176200001440000001302414330032376013154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mcar.R \name{mcar} \alias{mcar} \title{Jamshidian and Jalal's Non-Parametric MCAR Test} \usage{ mcar( x, imputed = mice(x, method = "norm"), min_n = 6, method = "auto", replications = 10000, use_chisq = 30, alpha = 0.05 ) } \arguments{ \item{x}{An object for which a method exists; usually a \code{data.frame}.} \item{imputed}{Either an object of class \code{mids}, as returned by \code{\link[=mice]{mice()}}, or a list of \code{data.frame}s.} \item{min_n}{Atomic numeric, must be greater than 1. When there are missing data patterns with fewer than \code{min_n} cases, all cases with that pattern will be removed from \code{x} and \code{imputed}.} \item{method}{Atomic character. If it is known (or assumed) that data are either multivariate normally distributed or not, then use either \code{method = "hawkins"} or \code{method = "nonparametric"}, respectively. The default argument \code{method = "auto"} follows the procedure outlined in the Details section, and in Figure 7 of Jamshidian and Jalal (2010).} \item{replications}{Number of replications used to simulate the Neyman distribution when performing Hawkins' test. As this method is based on random sampling, use a high number of \code{replications} (and optionally, \code{\link[=set.seed]{set.seed()}}) to minimize Monte Carlo error and ensure reproducibility.} \item{use_chisq}{Atomic integer, indicating the minimum number of cases within a group \emph{k} that triggers the use of asymptotic Chi-square distribution instead of the emprical distribution in the Neyman uniformity test, which is performed as part of Hawkins' test.} \item{alpha}{Atomic numeric, indicating the significance level of tests.} } \value{ An object of class \code{mcar_object}. } \description{ Test whether missingness is contingent upon the observed variables, according to the methodology developed by Jamshidian and Jalal (2010) (see Details). } \details{ Three types of missingness have been distinguished in the literature (Rubin, 1976): Missing completely at random (MCAR), which means that missingness is random; missing at random (MAR), which means that missingness is contingent on the \emph{observed}; and missing not at random (MNAR), which means that missingness is related to unobserved data. Jamshidian and Jalal's non-parametric MCAR test assumes that the missing data are either MCAR or MAR, and tests whether the missingness is independent of the observed values. If so, the covariance matrices of the imputed data will be equal accross groups with different patterns of missingness. This test consists of the following procedure: \enumerate{ \item Data are imputed. \item The imputed data are split into \emph{k} groups according to the \emph{k} missing data patterns in the original data (see \code{\link[=md.pattern]{md.pattern()}}). \item Perform Hawkins' test for equality of covariances across the \emph{k} groups. \item If the test is \emph{not significant}, conclude that there is no evidence against multivariate normality of the data, nor against MCAR. \item If the test \emph{is significant}, and multivariate normality of the data can be assumed, then it can be concluded that missingness is MAR. \item If multivariate normality cannot be assumed, then perform the Anderson-Darling non-parametric test for equality of covariances across the \emph{k} groups. \item If the Anderson-Darling test is \emph{not significant}, this is evidence against multivariate normality - but no evidence against MCAR. \item If the Anderson-Darling test \emph{is significant}, this is evidence it can be concluded that missingness is MAR. } Note that, despite its name in common parlance, an MCAR test can only indicate whether missingness is MCAR or MAR. The procedure cannot distinguish MCAR from MNAR, so a non-significant result does not rule out MNAR. This is a re-implementation of the function \code{TestMCARNormality}, which was originally published in the R-packgage \code{MissMech}, which has been removed from CRAN. This new implementation is faster, as its backend is written in C++. It also enhances the functionality of the original: \itemize{ \item Multiply imputed data can now be used; the median p-value and test statistic across replications is then reported, as suggested by Eekhout, Wiel, and Heymans (2017). \item The printing method for an \code{mcar_object} gives a warning when at least one p-value of either test was significant. In this case, it is recommended to inspect the range of p-values, and consider potential violations of MCAR. \item A plotting method for an \code{mcar_object} is provided. \item A plotting method for the \verb{$md.pattern} element of an \code{mcar_object} is provided. } } \examples{ res <- mcar(nhanes) # Examine test results res # Plot p-values across imputed data sets plot(res) # Plot md patterns used for the test plot(res, type = "md.pattern") # Note difference with the raw md.patterns: md.pattern(nhanes) } \references{ Rubin, D. B. (1976). Inference and Missing Data. Biometrika, Vol. 63, No. 3, pp. 581-592. \doi{10.2307/2335739} Eekhout, I., M. A. Wiel, & M. W. Heymans (2017). Methods for Significance Testing of Categorical Covariates in Logistic Regression Models After Multiple Imputation: Power and Applicability Analysis. BMC Medical Research Methodology 17 (1): 129. Jamshidian, M., & Jalal, S. (2010). Tests of homoscedasticity, normality, and missing completely at random for incomplete multivariate data. Psychometrika, 75(4), 649–674. \doi{10.1007/s11336-010-9175-3} } \author{ Caspar J. Van Lissa } \keyword{internal} mice/man/flux.Rd0000644000176200001440000000536314330031606013212 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/flux.R \name{flux} \alias{flux} \title{Influx and outflux of multivariate missing data patterns} \usage{ flux(data, local = names(data)) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as NA's.} \item{local}{A vector of names of columns of \code{data}. The default is to include all columns in the calculations.} } \value{ A data frame with \code{ncol(data)} rows and six columns: pobs = Proportion observed, influx = Influx outflux = Outflux ainb = Average inbound statistic aout = Average outbound statistic fico = Fraction of incomplete cases among cases with \code{Yj} observed } \description{ Influx and outflux are statistics of the missing data pattern. These statistics are useful in selecting predictors that should go into the imputation model. } \details{ Infux and outflux have been proposed by Van Buuren (2018), chapter 4. Influx is equal to the number of variable pairs \code{(Yj , Yk)} with \code{Yj} missing and \code{Yk} observed, divided by the total number of observed data cells. Influx depends on the proportion of missing data of the variable. Influx of a completely observed variable is equal to 0, whereas for completely missing variables we have influx = 1. For two variables with the same proportion of missing data, the variable with higher influx is better connected to the observed data, and might thus be easier to impute. Outflux is equal to the number of variable pairs with \code{Yj} observed and \code{Yk} missing, divided by the total number of incomplete data cells. Outflux is an indicator of the potential usefulness of \code{Yj} for imputing other variables. Outflux depends on the proportion of missing data of the variable. Outflux of a completely observed variable is equal to 1, whereas outflux of a completely missing variable is equal to 0. For two variables having the same proportion of missing data, the variable with higher outflux is better connected to the missing data, and thus potentially more useful for imputing other variables. FICO is an outbound statistic defined by the fraction of incomplete cases among cases with \code{Yj} observed (White and Carlin, 2010). } \references{ Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation compared with complete-case analysis for missing covariate values. \emph{Statistics in Medicine}, \emph{29}, 2920-2931. } \seealso{ \code{\link{fluxplot}}, \code{\link{md.pattern}}, \code{\link{fico}} } \author{ Stef van Buuren, 2012 } \keyword{misc} mice/man/make.predictorMatrix.Rd0000644000176200001440000000222614347615202016333 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predictorMatrix.R \name{make.predictorMatrix} \alias{make.predictorMatrix} \title{Creates a \code{predictorMatrix} argument} \usage{ make.predictorMatrix(data, blocks = make.blocks(data), predictorMatrix = NULL) } \arguments{ \item{data}{A \code{data.frame} with the source data} \item{blocks}{An optional specification for blocks of variables in the rows. The default assigns each variable in its own block.} \item{predictorMatrix}{A predictor matrix from which rows with the same names are copied into the output predictor matrix.} } \value{ A matrix } \description{ This helper function creates a valid \code{predictMatrix}. The \code{predictorMatrix} is an argument to the \code{mice} function. It specifies the target variable or block in the rows, and the predictor variables on the columns. An entry of \code{0} means that the column variable is NOT used to impute the row variable or block. A nonzero value indicates that it is used. } \examples{ make.predictorMatrix(nhanes) make.predictorMatrix(nhanes, blocks = make.blocks(nhanes, "collect")) } \seealso{ \code{\link{make.blocks}} } mice/man/norm.draw.Rd0000644000176200001440000000261114330031606014134 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.R \name{norm.draw} \alias{norm.draw} \alias{.norm.draw} \title{Draws values of beta and sigma by Bayesian linear regression} \usage{ norm.draw(y, ry, x, rank.adjust = TRUE, ...) .norm.draw(y, ry, x, rank.adjust = TRUE, ...) } \arguments{ \item{y}{Incomplete data vector of length \code{n}} \item{ry}{Vector of missing data pattern (\code{FALSE}=missing, \code{TRUE}=observed)} \item{x}{Matrix (\code{n} x \code{p}) of complete covariates.} \item{rank.adjust}{Argument that specifies whether \code{NA}'s in the coefficients need to be set to zero. Only relevant when \code{ls.meth = "qr"} AND the predictor matrix is rank-deficient.} \item{...}{Other named arguments.} } \value{ A \code{list} containing components \code{coef} (least squares estimate), \code{beta} (drawn regression weights) and \code{sigma} (drawn value of the residual standard deviation). } \description{ This function draws random values of beta and sigma under the Bayesian linear regression model as described in Rubin (1987, p. 167). This function can be called by user-specified imputation functions. } \references{ Rubin, D.B. (1987). \emph{Multiple imputation for nonresponse in surveys}. New York: Wiley. } \author{ Gerko Vink, 2018, for this version, based on earlier versions written by Stef van Buuren, Karin Groothuis-Oudshoorn, 2017 } mice/man/fluxplot.Rd0000644000176200001440000000652214330031606014107 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/flux.R \name{fluxplot} \alias{fluxplot} \title{Fluxplot of the missing data pattern} \usage{ fluxplot( data, local = names(data), plot = TRUE, labels = TRUE, xlim = c(0, 1), ylim = c(0, 1), las = 1, xlab = "Influx", ylab = "Outflux", main = paste("Influx-outflux pattern for", deparse(substitute(data))), eqscplot = TRUE, pty = "s", lwd = 1, ... ) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as NA's.} \item{local}{A vector of names of columns of \code{data}. The default is to include all columns in the calculations.} \item{plot}{Should a graph be produced?} \item{labels}{Should the points be labeled?} \item{xlim}{See \code{par}.} \item{ylim}{See \code{par}.} \item{las}{See \code{par}.} \item{xlab}{See \code{par}.} \item{ylab}{See \code{par}.} \item{main}{See \code{par}.} \item{eqscplot}{Should a square plot be produced?} \item{pty}{See \code{par}.} \item{lwd}{See \code{par}. Controls axis line thickness and diagonal} \item{\dots}{Further arguments passed to \code{plot()} or \code{eqscplot()}.} } \value{ An invisible data frame with \code{ncol(data)} rows and six columns: pobs = Proportion observed, influx = Influx outflux = Outflux ainb = Average inbound statistic aout = Average outbound statistic fico = Fraction of incomplete cases among cases with \code{Yj} observed } \description{ Influx and outflux are statistics of the missing data pattern. These statistics are useful in selecting predictors that should go into the imputation model. } \details{ Infux and outflux have been proposed by Van Buuren (2012), chapter 4. Influx is equal to the number of variable pairs \code{(Yj , Yk)} with \code{Yj} missing and \code{Yk} observed, divided by the total number of observed data cells. Influx depends on the proportion of missing data of the variable. Influx of a completely observed variable is equal to 0, whereas for completely missing variables we have influx = 1. For two variables with the same proportion of missing data, the variable with higher influx is better connected to the observed data, and might thus be easier to impute. Outflux is equal to the number of variable pairs with \code{Yj} observed and \code{Yk} missing, divided by the total number of incomplete data cells. Outflux is an indicator of the potential usefulness of \code{Yj} for imputing other variables. Outflux depends on the proportion of missing data of the variable. Outflux of a completely observed variable is equal to 1, whereas outflux of a completely missing variable is equal to 0. For two variables having the same proportion of missing data, the variable with higher outflux is better connected to the missing data, and thus potentially more useful for imputing other variables. } \references{ Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation compared with complete-case analysis for missing covariate values. \emph{Statistics in Medicine}, \emph{29}, 2920-2931. } \seealso{ \code{\link{flux}}, \code{\link{md.pattern}}, \code{\link{fico}} } \author{ Stef van Buuren, 2012 } \keyword{misc} mice/man/mice.impute.2l.bin.Rd0000644000176200001440000000507114330031606015532 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2l.bin.R \name{mice.impute.2l.bin} \alias{mice.impute.2l.bin} \title{Imputation by a two-level logistic model using \code{glmer}} \usage{ mice.impute.2l.bin(y, ry, x, type, wy = NULL, intercept = TRUE, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. Random variables are identified by a '2'. The class variable (only one is allowed) is coded as '-2'. Fixed effects are indicated by a '1'.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{intercept}{Logical determining whether the intercept is automatically added.} \item{\dots}{Arguments passed down to \code{glmer}} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate systematically and sporadically missing data using a two-level logistic model using \code{lme4::glmer()} } \details{ Data are missing systematically if they have not been measured, e.g., in the case where we combine data from different sources. Data are missing sporadically if they have been partially observed. } \examples{ library(tidyr) library(dplyr) data("toenail2") data <- tidyr::complete(toenail2, patientID, visit) \%>\% tidyr::fill(treatment) \%>\% dplyr::select(-time) \%>\% dplyr::mutate(patientID = as.integer(patientID)) \dontrun{ pred <- mice(data, print = FALSE, maxit = 0, seed = 1)$pred pred["outcome", "patientID"] <- -2 imp <- mice(data, method = "2l.bin", pred = pred, maxit = 1, m = 1, seed = 1) } } \references{ Jolani S., Debray T.P.A., Koffijberg H., van Buuren S., Moons K.G.M. (2015). Imputation of systematically missing predictors in an individual participant data meta-analysis: a generalized approach using MICE. \emph{Statistics in Medicine}, 34:1841-1863. } \seealso{ Other univariate-2l: \code{\link{mice.impute.2l.lmer}()}, \code{\link{mice.impute.2l.norm}()}, \code{\link{mice.impute.2l.pan}()} } \author{ Shahab Jolani, 2015; adapted to mice, SvB, 2018 } \concept{univariate-2l} \keyword{datagen} mice/man/is.mira.Rd0000644000176200001440000000051513666252075013610 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is.R \name{is.mira} \alias{is.mira} \title{Check for \code{mira} object} \usage{ is.mira(x) } \arguments{ \item{x}{An object} } \value{ A logical indicating whether \code{x} is an object of class \code{mira} } \description{ Check for \code{mira} object } mice/man/getfit.Rd0000644000176200001440000000276214330031606013516 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getfit.R \name{getfit} \alias{getfit} \title{Extract list of fitted models} \usage{ getfit(x, i = -1L, simplify = FALSE) } \arguments{ \item{x}{An object of class \code{mira}, typically produced by a call to \code{with()}.} \item{i}{An integer between 1 and \code{x$m} signalling the index of the repeated analysis. The default \code{i= -1} return a list with all analyses.} \item{simplify}{Should the return value be unlisted?} } \value{ If \code{i = -1} an object of class \code{mira} containing all analyses. If \code{i} selects one of the analyses, then it return an object whose with class inherited from that element. } \description{ Function \code{getfit()} returns the list of objects containing the repeated analysis results, or optionally, one of these fitted objects. The function looks for a list element called \code{analyses}, and return this component as a list with \code{mira} class. If element \code{analyses} is not found in \code{x}, then it returns \code{x} as a \code{mira} object. } \details{ No checking is done for validity of objects. The function also processes objects of class \code{mitml.result} from the \code{mitml} package. } \examples{ imp <- mice(nhanes, print = FALSE, seed = 21443) fit <- with(imp, lm(bmi ~ chl + hyp)) f1 <- getfit(fit) class(f1) f2 <- getfit(fit, 2) class(f2) } \seealso{ \code{\link[=mira-class]{mira}}, \code{\link{with.mids}} } \author{ Stef van Buuren, 2012, 2020 } \keyword{manip} mice/man/ampute.continuous.Rd0000644000176200001440000000407614330031606015734 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.continuous.R \name{ampute.continuous} \alias{ampute.continuous} \title{Multivariate amputation based on continuous probability functions} \usage{ ampute.continuous(P, scores, prop, type) } \arguments{ \item{P}{A vector containing the pattern numbers of the cases's candidacies. For each case, a value between 1 and #patterns is given. For example, a case with value 2 is candidate for missing data pattern 2.} \item{scores}{A list containing vectors with the candidates's weighted sum scores, the result of an underlying function in \code{\link{ampute}}.} \item{prop}{A scalar specifying the proportion of missingness. Should be a value between 0 and 1. Default is a missingness proportion of 0.5.} \item{type}{A vector of strings containing the type of missingness for each pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or '\code{"RIGHT"}. If a single missingness type is entered, all patterns will be created by the same type. If missingness types should differ over patterns, a vector of missingness types should be entered. Default is RIGHT for all patterns and is the result of \code{\link{ampute.default.type}}.} } \value{ A list containing vectors with \code{0} if a case should be made missing and \code{1} if a case should remain complete. The first vector refers to the first pattern, the second vector to the second pattern, etcetera. } \description{ This function creates a missing data indicator for each pattern. The continuous probability distributions (Van Buuren, 2012, pp. 63, 64) will be induced on the weighted sum scores, calculated earlier in the multivariate amputation function \code{\link{ampute}}. } \references{ #'Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-linearnormal.html#sec:generateuni}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.type}} } \author{ Rianne Schouten [aut, cre], Gerko Vink [aut], Peter Lugtig [ctb], 2016 } \keyword{internal} mice/man/mice.impute.pmm.Rd0000644000176200001440000001524014335404116015243 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.pmm.R \name{mice.impute.pmm} \alias{mice.impute.pmm} \alias{pmm} \title{Imputation by predictive mean matching} \usage{ mice.impute.pmm( y, ry, x, wy = NULL, donors = 5L, matchtype = 1L, exclude = -99999999, ridge = 1e-05, use.matcher = FALSE, ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{donors}{The size of the donor pool among which a draw is made. The default is \code{donors = 5L}. Setting \code{donors = 1L} always selects the closest match, but is not recommended. Values between 3L and 10L provide the best results in most cases (Morris et al, 2015).} \item{matchtype}{Type of matching distance. The default choice (\code{matchtype = 1L}) calculates the distance between the \emph{predicted} value of \code{yobs} and the \emph{drawn} values of \code{ymis} (called type-1 matching). Other choices are \code{matchtype = 0L} (distance between predicted values) and \code{matchtype = 2L} (distance between drawn values).} \item{exclude}{Value or vector of values to exclude from the imputation donor pool in \code{y}} \item{ridge}{The ridge penalty used in \code{.norm.draw()} to prevent problems with multicollinearity. The default is \code{ridge = 1e-05}, which means that 0.01 percent of the diagonal is added to the cross-product. Larger ridges may result in more biased estimates. For highly noisy data (e.g. many junk variables), set \code{ridge = 1e-06} or even lower to reduce bias. For highly collinear data, set \code{ridge = 1e-04} or higher.} \item{use.matcher}{Logical. Set \code{use.matcher = TRUE} to specify the C function \code{matcher()}, the now deprecated matching function that was default in versions \code{2.22} (June 2014) to \code{3.11.7} (Oct 2020). Since version \code{3.12.0} \code{mice()} uses the much faster \code{matchindex} C function. Use the deprecated \code{matcher} function only for exact reproduction.} \item{\dots}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputation by predictive mean matching } \details{ Imputation of \code{y} by predictive mean matching, based on van Buuren (2012, p. 73). The procedure is as follows: \enumerate{ \item{Calculate the cross-product matrix \eqn{S=X_{obs}'X_{obs}}.} \item{Calculate \eqn{V = (S+{diag}(S)\kappa)^{-1}}, with some small ridge parameter \eqn{\kappa}.} \item{Calculate regression weights \eqn{\hat\beta = VX_{obs}'y_{obs}.}} \item{Draw \eqn{q} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_1}.} \item{Calculate \eqn{V^{1/2}} by Cholesky decomposition.} \item{Calculate \eqn{\dot\beta = \hat\beta + \dot\sigma\dot z_1 V^{1/2}}.} \item{Calculate \eqn{\dot\eta(i,j)=|X_{{obs},[i]|}\hat\beta-X_{{mis},[j]}\dot\beta} with \eqn{i=1,\dots,n_1} and \eqn{j=1,\dots,n_0}.} \item{Construct \eqn{n_0} sets \eqn{Z_j}, each containing \eqn{d} candidate donors, from Y_{obs} such that \eqn{\sum_d\dot\eta(i,j)} is minimum for all \eqn{j=1,\dots,n_0}. Break ties randomly.} \item{Draw one donor \eqn{i_j} from \eqn{Z_j} randomly for \eqn{j=1,\dots,n_0}.} \item{Calculate imputations \eqn{\dot y_j = y_{i_j}} for \eqn{j=1,\dots,n_0}.} } The name \emph{predictive mean matching} was proposed by Little (1988). } \examples{ # We normally call mice.impute.pmm() from within mice() # But we may call it directly as follows (not recommended) set.seed(53177) xname <- c("age", "hgt", "wgt") r <- stats::complete.cases(boys[, xname]) x <- boys[r, xname] y <- boys[r, "tv"] ry <- !is.na(y) table(ry) # percentage of missing data in tv sum(!ry) / length(ry) # Impute missing tv data yimp <- mice.impute.pmm(y, ry, x) length(yimp) hist(yimp, xlab = "Imputed missing tv") # Impute all tv data yimp <- mice.impute.pmm(y, ry, x, wy = rep(TRUE, length(y))) length(yimp) hist(yimp, xlab = "Imputed missing and observed tv") plot(jitter(y), jitter(yimp), main = "Predictive mean matching on age, height and weight", xlab = "Observed tv (n = 224)", ylab = "Imputed tv (n = 224)" ) abline(0, 1) cor(y, yimp, use = "pair") # Use blots to exclude different values per column # Create blots object blots <- make.blots(boys) # Exclude ml 1 through 5 from tv donor pool blots$tv$exclude <- c(1:5) # Exclude 100 random observed heights from tv donor pool blots$hgt$exclude <- sample(unique(boys$hgt), 100) imp <- mice(boys, method = "pmm", print = FALSE, blots = blots, seed=123) blots$hgt$exclude \%in\% unlist(c(imp$imp$hgt)) # MUST be all FALSE blots$tv$exclude \%in\% unlist(c(imp$imp$tv)) # MUST be all FALSE } \references{ Little, R.J.A. (1988), Missing data adjustments in large surveys (with discussion), Journal of Business Economics and Statistics, 6, 287--301. Morris TP, White IR, Royston P (2015). Tuning multiple imputation by predictive mean matching and local residual draws. BMC Med Res Methodol. ;14:75. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-pmm.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Gerko Vink, Stef van Buuren, Karin Groothuis-Oudshoorn } \concept{univariate imputation functions} \keyword{datagen} mice/man/getqbar.Rd0000644000176200001440000000047613666252075013701 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getfit.R \name{getqbar} \alias{getqbar} \title{Extract estimate from \code{mipo} object} \usage{ getqbar(x) } \arguments{ \item{x}{An object of class \code{mipo}} } \description{ \code{getqbar} returns a named vector of pooled estimates. } mice/man/mice.impute.norm.nob.Rd0000644000176200001440000000645414330031647016211 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.nob.R \name{mice.impute.norm.nob} \alias{mice.impute.norm.nob} \alias{norm.nob} \title{Imputation by linear regression without parameter uncertainty} \usage{ mice.impute.norm.nob(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using linear regression analysis without accounting for the uncertainty of the model parameters. } \details{ This function creates imputations using the spread around the fitted linear regression line of \code{y} given \code{x}, as fitted on the observed data. This function is provided mainly to allow comparison between proper (e.g., as implemented in \code{mice.impute.norm} and improper (this function) normal imputation methods. For large data, having many rows, differences between proper and improper methods are small, and in those cases one may opt for speed by using \code{mice.impute.norm.nob}. } \section{Warning}{ The function does not incorporate the variability of the regression weights, so it is not 'proper' in the sense of Rubin. For small samples, variability of the imputed data is therefore underestimated. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. } \seealso{ \code{\link{mice}}, \code{\link{mice.impute.norm}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Gerko Vink, Stef van Buuren, Karin Groothuis-Oudshoorn, 2018 } \concept{univariate imputation functions} \keyword{datagen} mice/man/make.post.Rd0000644000176200001440000000121114330031606014121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/post.R \name{make.post} \alias{make.post} \title{Creates a \code{post} argument} \usage{ make.post(data) } \arguments{ \item{data}{A data frame or a matrix containing the incomplete data. Missing values are coded as \code{NA}.} } \value{ Character vector of \code{ncol(data)} element } \description{ This helper function creates a valid \code{post} vector. The \code{post} vector is an argument to the \code{mice} function that specifies post-processing for a variable after each iteration of imputation. } \examples{ make.post(nhanes2) } \seealso{ \code{\link{mice}} } mice/man/windspeed.Rd0000644000176200001440000000262614330031606014215 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/windspeed.R \docType{data} \name{windspeed} \alias{windspeed} \title{Subset of Irish wind speed data} \format{ A data frame with 433 rows and 6 columns containing the daily average wind speeds within the period 1961-1978 at meteorological stations in the Republic of Ireland. The data are a random sample from a larger data set. \describe{ \item{RochePt}{Roche Point} \item{Rosslare}{Rosslare} \item{Shannon}{Shannon} \item{Dublin}{Dublin} \item{Clones}{Clones} \item{MalinHead}{Malin Head} } } \description{ Subset of Irish wind speed data } \details{ The original data set is much larger and was analyzed in detail by Haslett and Raftery (1989). Van Buuren et al (2006) used this subset to investigate the influence of extreme MAR mechanisms on the quality of imputation. } \examples{ windspeed[1:3, ] } \references{ Haslett, J. and Raftery, A. E. (1989). \emph{Space-time Modeling with Long-memory Dependence: Assessing Ireland's Wind Power Resource (with Discussion)}. Applied Statistics 38, 1-50. \url{http://lib.stat.cmu.edu/datasets/wind.desc} and \url{http://lib.stat.cmu.edu/datasets/wind.data} van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. } \keyword{datasets} mice/man/brandsma.Rd0000644000176200001440000000426714330031606014025 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/brandsma.R \docType{data} \name{brandsma} \alias{brandsma} \title{Brandsma school data used Snijders and Bosker (2012)} \format{ \code{brandsma} is a data frame with 4106 rows and 14 columns: \describe{ \item{\code{sch}}{School number} \item{\code{pup}}{Pupil ID} \item{\code{iqv}}{IQ verbal} \item{\code{iqp}}{IQ performal} \item{\code{sex}}{Sex of pupil} \item{\code{ses}}{SES score of pupil} \item{\code{min}}{Minority member 0/1} \item{\code{rpg}}{Number of repeated groups, 0, 1, 2} \item{\code{lpr}}{language score PRE} \item{\code{lpo}}{language score POST} \item{\code{apr}}{Arithmetic score PRE} \item{\code{apo}}{Arithmetic score POST} \item{\code{den}}{Denomination classification 1-4 - at school level} \item{\code{ssi}}{School SES indicator - at school level} } } \source{ Constructed from \code{MLbook_2nded_total_4106-99.sav} from \url{https://www.stats.ox.ac.uk/~snijders/mlbook.htm} by function \code{data-raw/R/brandsma.R} } \description{ Dataset with raw data from Snijders and Bosker (2012) containing data from 4106 pupils attending 216 schools. This dataset includes all pupils and schools with missing data. } \note{ This dataset is constructed from the raw data. There are a few differences with the data set used in Chapter 4 and 5 of Snijders and Bosker: \enumerate{ \item All schools are included, including the five school with missing values on \code{langpost}. \item Missing \code{denomina} codes are left as missing. \item Aggregates are undefined in the presence of missing data in the underlying values. Variables \code{ses}, \code{iqv} and \code{iqp} are in their original scale, and not globally centered. No aggregate variables at the school level are included. \item There is a wider selection of original variables. Note however that the source data contain an even wider set of variables. } } \references{ Brandsma, HP and Knuver, JWM (1989), Effects of school and classroom characteristics on pupil progress in language and arithmetic. International Journal of Educational Research, 13(7), 777 - 788. Snijders, TAB and Bosker RJ (2012). Multilevel Analysis, 2nd Ed. Sage, Los Angeles, 2012. } \keyword{datasets} mice/man/ampute.discrete.Rd0000644000176200001440000000416514330031606015327 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.discrete.R \name{ampute.discrete} \alias{ampute.discrete} \title{Multivariate amputation based on discrete probability functions} \usage{ ampute.discrete(P, scores, prop, odds) } \arguments{ \item{P}{A vector containing the pattern numbers of candidates. For each case, a value between 1 and #patterns is given. For example, a case with value 2 is candidate for missing data pattern 2.} \item{scores}{A list containing vectors with the candidates's weighted sum scores, the result of an underlying function in \code{\link{ampute}}.} \item{prop}{A scalar specifying the proportion of missingness. Should be a value between 0 and 1. Default is a missingness proportion of 0.5.} \item{odds}{A matrix where #patterns defines the #rows. Each row should contain the odds of being missing for the corresponding pattern. The amount of odds values defines in how many quantiles the sum scores will be divided. The values are relative probabilities: a quantile with odds value 4 will have a probability of being missing that is four times higher than a quantile with odds 1. The #quantiles may differ between the patterns, specify NA for cells remaining empty. Default is 4 quantiles with odds values 1, 2, 3 and 4, the result of \code{\link{ampute.default.odds}}.} } \value{ A list containing vectors with \code{0} if a case should be made missing and \code{1} if a case should remain complete. The first vector refers to the first pattern, the second vector to the second pattern, etcetera. } \description{ This function creates a missing data indicator for each pattern. Odds probabilities (Brand, 1999, pp. 110-113) will be induced on the weighted sum scores, calculated earlier in the multivariate amputation function \code{\link{ampute}}. } \references{ Brand, J.P.L. (1999). \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} Dissertation. Rotterdam: Erasmus University. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.odds}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/mice.theme.Rd0000644000176200001440000000156514330031606014252 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.theme.R \name{mice.theme} \alias{mice.theme} \title{Set the theme for the plotting Trellis functions} \usage{ mice.theme(transparent = TRUE, alpha.fill = 0.3) } \arguments{ \item{transparent}{A logical indicating whether alpha-transparency is allowed. The default is \code{TRUE}.} \item{alpha.fill}{A numerical values between 0 and 1 that indicates the default alpha value for fills.} } \value{ \code{mice.theme()} returns a named list that can be used as a theme in the functions in \pkg{lattice}. By default, the \code{mice.theme()} function sets \code{transparent <- TRUE} if the current device \code{.Device} supports semi-transparent colors. } \description{ The \code{mice.theme()} function sets default choices for Trellis plots that are built into \pkg{mice}. } \author{ Stef van Buuren 2011 } mice/man/walking.Rd0000644000176200001440000000446714330031606013674 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/walking.R \docType{data} \name{walking} \alias{walking} \title{Walking disability data} \format{ A data frame with 890 rows on the following 5 variables: \describe{ \item{sex}{Sex of respondent (factor)} \item{age}{Age of respondent} \item{YA}{Item administered in samples A and E (factor)} \item{YB}{Item administered in samples B and E (factor)} \item{src}{Source: Sample A, B or E (factor)} } } \description{ Two items YA and YB measuring walking disability in samples A, B and E. } \details{ Example dataset to demonstrate imputation of two items (YA and YB). Item YA is administered to sample A and sample E, item YB is administered to sample B and sample E, so sample E acts as a bridge study. Imputation using a bridge study is better than simple equating or than imputation under independence. Item YA corresponds to the HAQ8 item, and item YB corresponds to the GAR9 items from Van Buuren et al (2005). Sample E (as well as sample B) is the Euridiss study (n=292), sample A is the ERGOPLUS study (n=306). See Van Buuren (2018) section 9.4 for more details on the imputation methodology. } \examples{ md.pattern(walking) micemill <- function(n) { for (i in 1:n) { imp <<- mice.mids(imp) # global assignment cors <- with(imp, cor(as.numeric(YA), as.numeric(YB), method = "kendall" )) tau <<- rbind(tau, getfit(cors, s = TRUE)) # global assignment } } plotit <- function() { matplot( x = 1:nrow(tau), y = tau, ylab = expression(paste("Kendall's ", tau)), xlab = "Iteration", type = "l", lwd = 1, lty = 1:10, col = "black" ) } tau <- NULL imp <- mice(walking, max = 0, m = 10, seed = 92786) pred <- imp$pred pred[, c("src", "age", "sex")] <- 0 imp <- mice(walking, max = 0, m = 3, seed = 92786, pred = pred) micemill(5) plotit() ### to get figure 9.8 van Buuren (2018) use m=10 and micemill(20) } \references{ van Buuren, S., Eyres, S., Tennant, A., Hopman-Rock, M. (2005). Improving comparability of existing data by Response Conversion. \emph{Journal of Official Statistics}, \bold{21}(1), 53-72. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-codingsystems.html#sec:impbridge}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \keyword{datasets} mice/man/mice.impute.midastouch.Rd0000644000176200001440000001373614330031647016622 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.midastouch.R \name{mice.impute.midastouch} \alias{mice.impute.midastouch} \title{Imputation by predictive mean matching with distance aided donor selection} \usage{ mice.impute.midastouch( y, ry, x, wy = NULL, ridge = 1e-05, midas.kappa = NULL, outout = TRUE, neff = NULL, debug = NULL, ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{ridge}{The ridge penalty used in \code{.norm.draw()} to prevent problems with multicollinearity. The default is \code{ridge = 1e-05}, which means that 0.01 percent of the diagonal is added to the cross-product. Larger ridges may result in more biased estimates. For highly noisy data (e.g. many junk variables), set \code{ridge = 1e-06} or even lower to reduce bias. For highly collinear data, set \code{ridge = 1e-04} or higher.} \item{midas.kappa}{Scalar. If \code{NULL} (default) then the optimal \code{kappa} gets selected automatically. Alternatively, the user may specify a scalar. Siddique and Belin 2008 find \code{midas.kappa = 3} to be sensible.} \item{outout}{Logical. If \code{TRUE} (default) one model is estimated for each donor (leave-one-out principle). For speedup choose \code{outout = FALSE}, which estimates one model for all observations leading to in-sample predictions for the donors and out-of-sample predictions for the recipients. Mind the inappropriateness, though.} \item{neff}{FOR EXPERTS. Null or character string. The name of an existing environment in which the effective sample size of the donors for each loop (CE iterations times multiple imputations) is supposed to be written. The effective sample size is necessary to compute the correction for the total variance as originally suggested by Parzen, Lipsitz and Fitzmaurice 2005. The objectname is \code{midastouch.neff}.} \item{debug}{FOR EXPERTS. Null or character string. The name of an existing environment in which the input is supposed to be written. The objectname is \code{midastouch.inputlist}.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using predictive mean matching. } \details{ Imputation of \code{y} by predictive mean matching, based on Rubin (1987, p. 168, formulas a and b) and Siddique and Belin 2008. The procedure is as follows: \enumerate{ \item Draw a bootstrap sample from the donor pool. \item Estimate a beta matrix on the bootstrap sample by the leave one out principle. \item Compute type II predicted values for \code{yobs} (nobs x 1) and \code{ymis} (nmis x nobs). \item Calculate the distance between all \code{yobs} and the corresponding \code{ymis}. \item Convert the distances in drawing probabilities. \item For each recipient draw a donor from the entire pool while considering the probabilities from the model. \item Take its observed value in \code{y} as the imputation. } } \examples{ # do default multiple imputation on a numeric matrix imp <- mice(nhanes, method = "midastouch") imp # list the actual imputations for BMI imp$imp$bmi # first completed data matrix complete(imp) # imputation on mixed data with a different method per column mice(nhanes2, method = c("sample", "midastouch", "logreg", "norm")) } \references{ Gaffert, P., Meinfelder, F., Bosch V. (2015) Towards an MI-proper Predictive Mean Matching, Discussion Paper. \url{https://www.uni-bamberg.de/fileadmin/uni/fakultaeten/sowi_lehrstuehle/statistik/Personen/Dateien_Florian/properPMM.pdf} Little, R.J.A. (1988), Missing data adjustments in large surveys (with discussion), Journal of Business Economics and Statistics, 6, 287--301. Parzen, M., Lipsitz, S. R., Fitzmaurice, G. M. (2005), A note on reducing the bias of the approximate Bayesian bootstrap imputation variance estimator. Biometrika \bold{92}, 4, 971--974. Rubin, D.B. (1987), Multiple imputation for nonresponse in surveys. New York: Wiley. Siddique, J., Belin, T.R. (2008), Multiple imputation using an iterative hot-deck with distance-based donor selection. Statistics in medicine, \bold{27}, 1, 83--102 Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006), Fully conditional specification in multivariate imputation. \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. Van Buuren, S., Groothuis-Oudshoorn, K. (2011), \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}, 3, 1--67. \doi{10.18637/jss.v045.i03} } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Philipp Gaffert, Florian Meinfelder, Volker Bosch 2015 } \concept{univariate imputation functions} \keyword{datagen} mice/man/boys.Rd0000644000176200001440000000544714330031606013213 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/boys.R \docType{data} \name{boys} \alias{boys} \title{Growth of Dutch boys} \format{ A data frame with 748 rows on the following 9 variables: \describe{ \item{age}{Decimal age (0-21 years)} \item{hgt}{Height (cm)} \item{wgt}{Weight (kg)} \item{bmi}{Body mass index} \item{hc}{Head circumference (cm)} \item{gen}{Genital Tanner stage (G1-G5)} \item{phb}{Pubic hair (Tanner P1-P6)} \item{tv}{Testicular volume (ml)} \item{reg}{Region (north, east, west, south, city)} } } \source{ Fredriks, A.M,, van Buuren, S., Burgmeijer, R.J., Meulmeester JF, Beuker, R.J., Brugman, E., Roede, M.J., Verloove-Vanhorick, S.P., Wit, J.M. (2000) Continuing positive secular growth change in The Netherlands 1955-1997. \emph{Pediatric Research}, \bold{47}, 316-323. Fredriks, A.M., van Buuren, S., Wit, J.M., Verloove-Vanhorick, S.P. (2000). Body index measurements in 1996-7 compared with 1980. \emph{Archives of Disease in Childhood}, \bold{82}, 107-112. } \description{ Height, weight, head circumference and puberty of 748 Dutch boys. } \details{ Random sample of 10\% from the cross-sectional data used to construct the Dutch growth references 1997. Variables \code{gen} and \code{phb} are ordered factors. \code{reg} is a factor. } \examples{ # create two imputed data sets imp <- mice(boys, m = 1, maxit = 2) z <- complete(imp, 1) # create imputations for age <8yrs plot(z$age, z$gen, col = mdc(1:2)[1 + is.na(boys$gen)], xlab = "Age (years)", ylab = "Tanner Stage Genital" ) # figure to show that the default imputation method does not impute BMI # consistently plot(z$bmi, z$wgt / (z$hgt / 100)^2, col = mdc(1:2)[1 + is.na(boys$bmi)], xlab = "Imputed BMI", ylab = "Calculated BMI" ) # also, BMI distributions are somewhat different oldpar <- par(mfrow = c(1, 2)) MASS::truehist(z$bmi[!is.na(boys$bmi)], h = 1, xlim = c(10, 30), ymax = 0.25, col = mdc(1), xlab = "BMI observed" ) MASS::truehist(z$bmi[is.na(boys$bmi)], h = 1, xlim = c(10, 30), ymax = 0.25, col = mdc(2), xlab = "BMI imputed" ) par(oldpar) # repair the inconsistency problem by passive imputation meth <- imp$meth meth["bmi"] <- "~I(wgt/(hgt/100)^2)" pred <- imp$predictorMatrix pred["hgt", "bmi"] <- 0 pred["wgt", "bmi"] <- 0 imp2 <- mice(boys, m = 1, maxit = 2, meth = meth, pred = pred) z2 <- complete(imp2, 1) # show that new imputations are consistent plot(z2$bmi, z2$wgt / (z2$hgt / 100)^2, col = mdc(1:2)[1 + is.na(boys$bmi)], ylab = "Calculated BMI" ) # and compare distributions oldpar <- par(mfrow = c(1, 2)) MASS::truehist(z2$bmi[!is.na(boys$bmi)], h = 1, xlim = c(10, 30), ymax = 0.25, col = mdc(1), xlab = "BMI observed" ) MASS::truehist(z2$bmi[is.na(boys$bmi)], h = 1, xlim = c(10, 30), ymax = 0.25, col = mdc(2), xlab = "BMI imputed" ) par(oldpar) } \keyword{datasets} mice/man/mice.impute.norm.predict.Rd0000644000176200001440000000635614330031647017066 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.norm.predict.R \name{mice.impute.norm.predict} \alias{mice.impute.norm.predict} \alias{norm.predict} \title{Imputation by linear regression through prediction} \usage{ mice.impute.norm.predict(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes the "best value" according to the linear regression model, also known as \emph{regression imputation}. } \details{ Calculates regression weights from the observed data and returns predicted values to as imputations. This method is known as \emph{regression imputation}. } \section{Warning}{ THIS METHOD SHOULD NOT BE USED FOR DATA ANALYSIS. This method is seductive because it imputes the most likely value according to the model. However, it ignores the uncertainty of the missing values and artificially amplifies the relations between the columns of the data. Application of richer models having more parameters does not help to evade these issues. Stochastic regression methods, like \code{\link{mice.impute.pmm}} or \code{\link{mice.impute.norm}}, are generally preferred. At best, prediction can give reasonable estimates of the mean, especially if normality assumptions are plausible. See Little and Rubin (2002, p. 62-64) or Van Buuren (2012, p. 11-13, p. 45-46) for a discussion of this method. } \references{ Little, R.J.A. and Rubin, D.B. (2002). Statistical Analysis with Missing Data. New York: John Wiley and Sons. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-linearnormal.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \seealso{ Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Gerko Vink, Stef van Buuren, 2018 } \concept{univariate imputation functions} \keyword{datagen} mice/man/make.blots.Rd0000644000176200001440000000163114330031606014265 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/blots.R \name{make.blots} \alias{make.blots} \title{Creates a \code{blots} argument} \usage{ make.blots(data, blocks = make.blocks(data)) } \arguments{ \item{data}{A \code{data.frame} with the source data} \item{blocks}{An optional specification for blocks of variables in the rows. The default assigns each variable in its own block.} } \value{ A matrix } \description{ This helper function creates a valid \code{blots} object. The \code{blots} object is an argument to the \code{mice} function. The name \code{blots} is a contraction of blocks-dots. Through \code{blots}, the user can specify any additional arguments that are specifically passed down to the lowest level imputation function. } \examples{ make.predictorMatrix(nhanes) make.blots(nhanes, blocks = name.blocks(c("age", "hyp"), "xxx")) } \seealso{ \code{\link{make.blocks}} } mice/man/potthoffroy.Rd0000644000176200001440000000405314330031606014612 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/potthoffroy.R \docType{data} \name{potthoffroy} \alias{potthoffroy} \title{Potthoff-Roy data} \format{ \code{tbs} is a data frame with 27 rows and 6 columns: \describe{ \item{id}{Person number} \item{sex}{Sex M/F} \item{d8}{Distance at age 8 years} \item{d10}{Distance at age 10 years} \item{d12}{Distance at age 12 years} \item{d14}{Distance at age 14 years} } } \source{ Potthoff, R. F., Roy, S. N. (1964). A generalized multivariate analysis of variance model usefully especially for growth curve problems. \emph{Biometrika}, \emph{51}(3), 313-326. Little, R. J. A., Rubin, D. B. (1987). \emph{Statistical Analysis with Missing Data.} New York: John Wiley & Sons. Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/ex-ch-longitudinal.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. } \description{ Data from Potthoff-Roy (1964) with repeated measures on dental fissures. } \details{ This data set is the famous Potthoff-Roy data, used to demonstrate MANOVA on repeated measure data. Potthoff and Roy (1964) published classic data on a study in 16 boys and 11 girls, who at ages 8, 10, 12, and 14 had the distance (mm) from the center of the pituitary gland to the pteryomaxillary fissure measured. Changes in pituitary-pteryomaxillary distances during growth is important in orthodontic therapy. The goals of the study were to describe the distance in boys and girls as simple functions of age, and then to compare the functions for boys and girls. The data have been reanalyzed by many authors including Jennrich and Schluchter (1986), Little and Rubin (1987), Pinheiro and Bates (2000), Verbeke and Molenberghs (2000) and Molenberghs and Kenward (2007). See Chapter 9 of Van Buuren (2012) for a challenging exercise using these data. } \examples{ ### create missing values at age 10 as in Little and Rubin (1987) phr <- potthoffroy idmis <- c(3, 6, 9, 10, 13, 16, 23, 24, 27) phr[idmis, 4] <- NA phr md.pattern(phr) } \keyword{datasets} mice/man/mice.impute.quadratic.Rd0000644000176200001440000001076514436637251016450 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.quadratic.R \name{mice.impute.quadratic} \alias{mice.impute.quadratic} \alias{quadratic} \title{Imputation of quadratic terms} \usage{ mice.impute.quadratic(y, ry, x, wy = NULL, quad.outcome = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{quad.outcome}{The name of the outcome in the quadratic analysis as a character string. For example, if the substantive model of interest is \code{y ~ x + xx}, then \code{"y"} would be the \code{quad.outcome}} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes incomplete variable that appears as both main effect and quadratic effect in the complete-data model. } \details{ This function implements the "polynomial combination" method. First, the polynomial combination \eqn{Z = Y \beta_1 + Y^2 \beta_2} is formed. \eqn{Z} is imputed by predictive mean matching, followed by a decomposition of the imputed data \eqn{Z} into components \eqn{Y} and \eqn{Y^2}. See Van Buuren (2012, pp. 139-141) and Vink et al (2012) for more details. The method ensures that 1) the imputed data for \eqn{Y} and \eqn{Y^2} are mutually consistent, and 2) that provides unbiased estimates of the regression weights in a complete-data linear regression that use both \eqn{Y} and \eqn{Y^2}. } \note{ There are two situations to consider. If only the linear term \code{Y} is present in the data, calculate the quadratic term \code{YY} after imputation. If both the linear term \code{Y} and the the quadratic term \code{YY} are variables in the data, then first impute \code{Y} by calling \code{mice.impute.quadratic()} on \code{Y}, and then impute \code{YY} by passive imputation as \code{meth["YY"] <- "~I(Y^2)"}. See example section for details. Generally, we would like \code{YY} to be present in the data if we need to preserve quadratic relations between \code{YY} and any third variables in the multivariate incomplete data that we might wish to impute. } \examples{ # Create Data B1 <- .5 B2 <- .5 X <- rnorm(1000) XX <- X^2 e <- rnorm(1000, 0, 1) Y <- B1 * X + B2 * XX + e dat <- data.frame(x = X, xx = XX, y = Y) # Impose 25 percent MCAR Missingness dat[0 == rbinom(1000, 1, 1 - .25), 1:2] <- NA # Prepare data for imputation ini <- mice(dat, maxit = 0) meth <- c("quadratic", "~I(x^2)", "") pred <- ini$pred pred[, "xx"] <- 0 # Impute data imp <- mice(dat, meth = meth, pred = pred, quad.outcome = "y") # Pool results pool(with(imp, lm(y ~ x + xx))) # Plot results stripplot(imp) plot(dat$x, dat$xx, col = mdc(1), xlab = "x", ylab = "xx") cmp <- complete(imp) points(cmp$x[is.na(dat$x)], cmp$xx[is.na(dat$x)], col = mdc(2)) } \seealso{ \code{\link{mice.impute.pmm}} Van Buuren, S. (2018). \href{https://stefvanbuuren.name/fimd/sec-knowledge.html#sec:quadratic}{\emph{Flexible Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. Boca Raton, FL. Vink, G., van Buuren, S. (2013). Multiple Imputation of Squared Terms. \emph{Sociological Methods & Research}, 42:598-607. Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Mingyang Cai and Gerko Vink } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.jomoImpute.Rd0000644000176200001440000000632214436133175016611 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.jomoImpute.R \name{mice.impute.jomoImpute} \alias{mice.impute.jomoImpute} \title{Multivariate multilevel imputation using \code{jomo}} \usage{ mice.impute.jomoImpute( data, formula, type, m = 1, silent = TRUE, format = "imputes", ... ) } \arguments{ \item{data}{A data frame containing incomplete and auxiliary variables, the cluster indicator variable, and any other variables that should be present in the imputed datasets.} \item{formula}{A formula specifying the role of each variable in the imputation model. The basic model is constructed by \code{model.matrix}, thus allowing to include derived variables in the imputation model using \code{I()}. See \code{\link[mitml]{jomoImpute}}.} \item{type}{An integer vector specifying the role of each variable in the imputation model (see \code{\link[mitml]{jomoImpute}})} \item{m}{The number of imputed data sets to generate. Default is 10.} \item{silent}{(optional) Logical flag indicating if console output should be suppressed. Default is \code{FALSE}.} \item{format}{A character vector specifying the type of object that should be returned. The default is \code{format = "list"}. No other formats are currently supported.} \item{...}{Other named arguments: \code{n.burn}, \code{n.iter}, \code{group}, \code{prior}, \code{silent} and others.} } \value{ A list of imputations for all incomplete variables in the model, that can be stored in the the \code{imp} component of the \code{mids} object. } \description{ This function is a wrapper around the \code{jomoImpute} function from the \code{mitml} package so that it can be called to impute blocks of variables in \code{mice}. The \code{mitml::jomoImpute} function provides an interface to the \code{jomo} package for multiple imputation of multilevel data \url{https://CRAN.R-project.org/package=jomo}. Imputations can be generated using \code{type} or \code{formula}, which offer different options for model specification. } \note{ The number of imputations \code{m} is set to 1, and the function is called \code{m} times so that it fits within the \code{mice} iteration scheme. This is a multivariate imputation function using a joint model. } \examples{ \dontrun{ # Note: Requires mitml 0.3-5.7 blocks <- list(c("bmi", "chl", "hyp"), "age") method <- c("jomoImpute", "pmm") ini <- mice(nhanes, blocks = blocks, method = method, maxit = 0) pred <- ini$pred pred["B1", "hyp"] <- -2 imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1) } } \references{ Grund S, Luedtke O, Robitzsch A (2016). Multiple Imputation of Multilevel Missing Data: An Introduction to the R Package \code{pan}. SAGE Open. Quartagno M and Carpenter JR (2015). Multiple imputation for IPD meta-analysis: allowing for heterogeneity and studies with missing covariates. Statistics in Medicine, 35:2938-2954, 2015. } \seealso{ \code{\link[mitml]{jomoImpute}} Other multivariate-2l: \code{\link{mice.impute.panImpute}()} } \author{ Stef van Buuren, 2018, building on work of Simon Grund, Alexander Robitzsch and Oliver Luedtke (authors of \code{mitml} package) and Quartagno and Carpenter (authors of \code{jomo} package). } \concept{multivariate-2l} \keyword{datagen} mice/man/ifdo.Rd0000644000176200001440000000065213666252075013171 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/auxiliary.R \name{ifdo} \alias{ifdo} \title{Conditional imputation helper} \usage{ ifdo(cond, action) } \arguments{ \item{cond}{a condition} \item{action}{the action to do} } \value{ Currently returns an error message. } \description{ Sorry, the \code{ifdo()} function is not yet implemented. } \author{ Stef van Buuren, 2012 } \keyword{internal} mice/man/ampute.default.odds.Rd0000644000176200001440000000154314330031606016076 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.default.R \name{ampute.default.odds} \alias{ampute.default.odds} \title{Default \code{odds} in \code{ampute()}} \usage{ ampute.default.odds(patterns) } \arguments{ \item{patterns}{A matrix of size #patterns by #variables where 0 indicates a variable should have missing values and 1 indicates a variable should remain complete. Could be the result of \code{\link{ampute.default.patterns}}.} } \value{ A matrix where #rows equals #patterns. Default is 4 quantiles with odds values 1, 2, 3 and 4, for each pattern, imitating a RIGHT type of missingness. } \description{ Defines the default odds matrix for the multivariate amputation function \code{ampute}. } \seealso{ \code{\link{ampute}}, \code{\link{ampute.default.patterns}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/print.Rd0000644000176200001440000000153113666252075013401 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print.R \name{print.mids} \alias{print.mids} \alias{print.mira} \alias{print.mice.anova} \alias{print.mice.anova.summary} \title{Print a \code{mids} object} \usage{ \method{print}{mids}(x, ...) \method{print}{mira}(x, ...) \method{print}{mice.anova}(x, ...) \method{print}{mice.anova.summary}(x, ...) } \arguments{ \item{x}{Object of class \code{mids}, \code{mira} or \code{mipo}} \item{...}{Other parameters passed down to \code{print.default()}} } \value{ \code{NULL} \code{NULL} \code{NULL} \code{NULL} } \description{ Print a \code{mids} object Print a \code{mira} object Print a \code{mice.anova} object Print a \code{summary.mice.anova} object } \seealso{ \code{\link[=mids-class]{mids}} \code{\link[=mira-class]{mira}} \code{\link{mipo}} \code{\link{mipo}} } mice/man/mids2mplus.Rd0000644000176200001440000000317613666252075014353 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mids2mplus.R \name{mids2mplus} \alias{mids2mplus} \title{Export \code{mids} object to Mplus} \usage{ mids2mplus( imp, file.prefix = "imp", path = getwd(), sep = "\\t", dec = ".", silent = FALSE ) } \arguments{ \item{imp}{The \code{imp} argument is an object of class \code{mids}, typically produced by the \code{mice()} function.} \item{file.prefix}{A character string describing the prefix of the output data files.} \item{path}{A character string containing the path of the output file. By default, files are written to the current \code{R} working directory.} \item{sep}{The separator between the data fields.} \item{dec}{The decimal separator for numerical data.} \item{silent}{A logical flag stating whether the names of the files should be printed.} } \value{ The return value is \code{NULL}. } \description{ Converts a \code{mids} object into a format recognized by Mplus, and writes the data and the Mplus input files } \details{ This function automates most of the work needed to export a \code{mids} object to \code{Mplus}. The function writes the multiple imputation datasets, the file that contains the names of the multiple imputation data sets and an \code{Mplus} input file. The \code{Mplus} input file has the proper file names, so in principle it should run and read the data without alteration. \code{Mplus} will recognize the data set as a multiply imputed data set, and do automatic pooling in procedures where that is supported. } \seealso{ \code{\link[=mids-class]{mids}}, \code{\link{mids2spss}} } \author{ Gerko Vink, 2011. } \keyword{manip} mice/man/ampute.mcar.Rd0000644000176200001440000000301414330031606014437 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ampute.mcar.R \name{ampute.mcar} \alias{ampute.mcar} \title{Multivariate amputation under a MCAR mechanism} \usage{ ampute.mcar(P, patterns, prop) } \arguments{ \item{P}{A vector containing the pattern numbers of the cases' candidates. For each case, a value between 1 and #patterns is given. For example, a case with value 2 is candidate for missing data pattern 2.} \item{patterns}{A matrix of size #patterns by #variables where \code{0} indicates a variable should have missing values and \code{1} indicates a variable should remain complete. The user may specify as many patterns as desired. One pattern (a vector) is also possible. Could be the result of \code{\link{ampute.default.patterns}}, default will be a square matrix of size #variables where each pattern has missingness on one variable only.} \item{prop}{A scalar specifying the proportion of missingness. Should be a value between 0 and 1. Default is a missingness proportion of 0.5.} } \value{ A list containing vectors with \code{0} if a case should be made missing and \code{1} if a case should remain complete. The first vector refers to the first pattern, the second vector to the second pattern, etcetera. } \description{ This function creates a missing data indicator for each pattern, based on a MCAR missingness mechanism. The function is used in the multivariate amputation function \code{\link{ampute}}. } \seealso{ \code{\link{ampute}} } \author{ Rianne Schouten, 2016 } \keyword{internal} mice/man/make.where.Rd0000644000176200001440000000217114334522175014266 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/where.R \name{make.where} \alias{make.where} \title{Creates a \code{where} argument} \usage{ make.where(data, keyword = c("missing", "all", "none", "observed")) } \arguments{ \item{data}{A \code{data.frame} with the source data} \item{keyword}{An optional keyword, one of \code{"missing"} (missing values are imputed), \code{"observed"} (observed values are imputed), \code{"all"} and \code{"none"}. The default is \code{keyword = "missing"}} } \value{ A matrix with logical } \description{ This helper function creates a valid \code{where} matrix. The \code{where} matrix is an argument to the \code{mice} function. It has the same size as \code{data} and specifies which values are to be imputed (\code{TRUE}) or nor (\code{FALSE}). } \examples{ head(make.where(nhanes), 3) # create & analyse synthetic data where <- make.where(nhanes2, "all") imp <- mice(nhanes2, m = 10, where = where, print = FALSE, seed = 123 ) fit <- with(imp, lm(chl ~ bmi + age + hyp)) summary(pool.syn(fit)) } \seealso{ \code{\link{make.blocks}}, \code{\link{make.predictorMatrix}} } mice/man/bwplot.mads.Rd0000644000176200001440000000414214330031606014460 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bwplot.mads.R \name{bwplot.mads} \alias{bwplot.mads} \title{Box-and-whisker plot of amputed and non-amputed data} \usage{ \method{bwplot}{mads}( x, data, which.pat = NULL, standardized = TRUE, descriptives = TRUE, layout = NULL, ... ) } \arguments{ \item{x}{A \code{mads} (\code{\link{mads-class}}) object, typically created by \code{\link{ampute}}.} \item{data}{A string or vector of variable names that needs to be plotted. As a default, all variables will be plotted.} \item{which.pat}{A scalar or vector indicating which patterns need to be plotted. As a default, all patterns are plotted.} \item{standardized}{Logical. Whether the box-and-whisker plots need to be created from standardized data or not. Default is TRUE.} \item{descriptives}{Logical. Whether the mean, variance and n of the variables need to be printed. This is useful to examine the effect of the amputation. Default is TRUE.} \item{layout}{A vector of two values indicating how the boxplots of one pattern should be divided over the plot. For example, \code{c(2, 3)} indicates that the boxplots of six variables need to be placed on 3 rows and 2 columns. Default is 1 row and an amount of columns equal to #variables. Note that for more than 6 variables, multiple plots will be created automatically.} \item{\dots}{Not used, but for consistency with generic} } \value{ A list containing the box-and-whisker plots. Note that a new pattern will always be shown in a new plot. } \description{ Plotting method to investigate the relation between the data variables and the amputed data. The function shows how the amputed values are related to the variable values. } \note{ The \code{mads} object contains all the information you need to make any desired plots. Check \code{\link{mads-class}} or the vignette \emph{Multivariate Amputation using Ampute} to understand the contents of class object \code{mads}. } \seealso{ \code{\link{ampute}}, \code{\link{bwplot}}, \code{\link{Lattice}} for an overview of the package, \code{\link{mads-class}} } \author{ Rianne Schouten, 2016 } mice/man/mice.impute.lda.Rd0000644000176200001440000000712714330031647015217 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.lda.R \name{mice.impute.lda} \alias{mice.impute.lda} \title{Imputation by linear discriminant analysis} \usage{ mice.impute.lda(y, ry, x, wy = NULL, ...) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{...}{Other named arguments. Not used.} } \value{ Vector with imputed data, of type factor, and of length \code{sum(wy)} } \description{ Imputes univariate missing data using linear discriminant analysis } \details{ Imputation of categorical response variables by linear discriminant analysis. This function uses the Venables/Ripley functions \code{lda()} and \code{predict.lda()} to compute posterior probabilities for each incomplete case, and draws the imputations from this posterior. This function can be called from within the Gibbs sampler by specifying \code{"lda"} in the \code{method} argument of \code{mice()}. This method is usually faster and uses fewer resources than calling the function, but the statistical properties may not be as good (Brand, 1999). \code{\link{mice.impute.polyreg}}. } \section{Warning}{ The function does not incorporate the variability of the discriminant weight, so it is not 'proper' in the sense of Rubin. For small samples and rare categories in the \code{y}, variability of the imputed data could therefore be underestimated. Added: SvB June 2009 Tried to include bootstrap, but disabled since bootstrapping may easily lead to constant variables within groups. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. ISBN 90-74479-08-1. Venables, W.N. & Ripley, B.D. (1997). Modern applied statistics with S-PLUS (2nd ed). Springer, Berlin. } \seealso{ \code{\link{mice}}, \code{link{mice.impute.polyreg}}, \code{\link[MASS]{lda}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.polyreg}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \concept{univariate imputation functions} \keyword{datagen} mice/man/mice.impute.passive.Rd0000644000176200001440000000250514330031647016124 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.passive.R \name{mice.impute.passive} \alias{mice.impute.passive} \title{Passive imputation} \usage{ mice.impute.passive(data, func) } \arguments{ \item{data}{A data frame} \item{func}{A \code{formula} specifying the transformations on data} } \value{ The result of applying \code{formula} } \description{ Calculate new variable during imputation } \details{ Passive imputation is a special internal imputation function. Using this facility, the user can specify, at any point in the \code{mice} Gibbs sampling algorithm, a function on the imputed data. This is useful, for example, to compute a cubic version of a variable, a transformation like \code{Q = W/H^2} based on two variables, or a mean variable like \code{(x_1+x_2+x_3)/3}. The so derived variables might be used in other places in the imputation model. The function allows to dynamically derive virtually any function of the imputed data at virtually any time. } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{mice}} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 } \keyword{datagen} mice/man/pool.compare.Rd0000644000176200001440000000735114330031647014636 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.compare.R \name{pool.compare} \alias{pool.compare} \title{Compare two nested models fitted to imputed data} \usage{ pool.compare(fit1, fit0, method = c("wald", "likelihood"), data = NULL) } \arguments{ \item{fit1}{An object of class 'mira', produced by \code{with.mids()}.} \item{fit0}{An object of class 'mira', produced by \code{with.mids()}. The model in \code{fit0} is a nested fit0 of \code{fit1}.} \item{method}{Either \code{"wald"} or \code{"likelihood"} specifying the type of comparison. The default is \code{"wald"}.} \item{data}{No longer used.} } \value{ A list containing several components. Component \code{call} is the call to the \code{pool.compare} function. Component \code{call11} is the call that created \code{fit1}. Component \code{call12} is the call that created the imputations. Component \code{call01} is the call that created \code{fit0}. Component \code{call02} is the call that created the imputations. Components \code{method} is the method used to compare two models: 'Wald' or 'likelihood'. Component \code{nmis} is the number of missing entries for each variable. Component \code{m} is the number of imputations. Component \code{qhat1} is a matrix, containing the estimated coefficients of the \emph{m} repeated complete data analyses from \code{fit1}. Component \code{qhat0} is a matrix, containing the estimated coefficients of the \emph{m} repeated complete data analyses from \code{fit0}. Component \code{ubar1} is the mean of the variances of \code{fit1}, formula (3.1.3), Rubin (1987). Component \code{ubar0} is the mean of the variances of \code{fit0}, formula (3.1.3), Rubin (1987). Component \code{qbar1} is the pooled estimate of \code{fit1}, formula (3.1.2) Rubin (1987). Component \code{qbar0} is the pooled estimate of \code{fit0}, formula (3.1.2) Rubin (1987). Component \code{Dm} is the test statistic. Component \code{rm} is the relative increase in variance due to nonresponse, formula (3.1.7), Rubin (1987). Component \code{df1}: df1 = under the null hypothesis it is assumed that \code{Dm} has an F distribution with (df1,df2) degrees of freedom. Component \code{df2}: df2. Component \code{pvalue} is the P-value of testing whether the model \code{fit1} is statistically different from the smaller \code{fit0}. } \description{ This function is deprecated in V3. Use \code{\link{D1}} or \code{\link{D3}} instead. } \details{ Compares two nested models after m repeated complete data analysis The function is based on the article of Meng and Rubin (1992). The Wald-method can be found in paragraph 2.2 and the likelihood method can be found in paragraph 3. One could use the Wald method for comparison of linear models obtained with e.g. \code{lm} (in \code{with.mids()}). The likelihood method should be used in case of logistic regression models obtained with \code{glm()} in \code{with.mids()}. The function assumes that \code{fit1} is the larger model, and that model \code{fit0} is fully contained in \code{fit1}. In case of \code{method='wald'}, the null hypothesis is tested that the extra parameters are all zero. } \references{ Li, K.H., Meng, X.L., Raghunathan, T.E. and Rubin, D. B. (1991). Significance levels from repeated p-values with multiply-imputed data. Statistica Sinica, 1, 65-92. Meng, X.L. and Rubin, D.B. (1992). Performing likelihood ratio tests with multiple-imputed data sets. Biometrika, 79, 103-111. van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ \code{\link{lm.mids}}, \code{\link{glm.mids}} } \author{ Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 } \keyword{htest} mice/man/mice.impute.panImpute.Rd0000644000176200001440000000627614436064333016432 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.panImpute.R \name{mice.impute.panImpute} \alias{mice.impute.panImpute} \title{Impute multilevel missing data using \code{pan}} \usage{ mice.impute.panImpute( data, formula, type, m = 1, silent = TRUE, format = "imputes", ... ) } \arguments{ \item{data}{A data frame containing incomplete and auxiliary variables, the cluster indicator variable, and any other variables that should be present in the imputed datasets.} \item{formula}{A formula specifying the role of each variable in the imputation model. The basic model is constructed by \code{model.matrix}, thus allowing to include derived variables in the imputation model using \code{I()}. See \code{\link[mitml]{panImpute}}.} \item{type}{An integer vector specifying the role of each variable in the imputation model (see \code{\link[mitml]{panImpute}})} \item{m}{The number of imputed data sets to generate.} \item{silent}{(optional) Logical flag indicating if console output should be suppressed. Default is to \code{FALSE}.} \item{format}{A character vector specifying the type of object that should be returned. The default is \code{format = "list"}. No other formats are currently supported.} \item{...}{Other named arguments: \code{n.burn}, \code{n.iter}, \code{group}, \code{prior}, \code{silent} and others.} } \value{ A list of imputations for all incomplete variables in the model, that can be stored in the the \code{imp} component of the \code{mids} object. } \description{ This function is a wrapper around the \code{panImpute} function from the \code{mitml} package so that it can be called to impute blocks of variables in \code{mice}. The \code{mitml::panImpute} function provides an interface to the \code{pan} package for multiple imputation of multilevel data (Schafer & Yucel, 2002). Imputations can be generated using \code{type} or \code{formula}, which offer different options for model specification. } \note{ The number of imputations \code{m} is set to 1, and the function is called \code{m} times so that it fits within the \code{mice} iteration scheme. This is a multivariate imputation function using a joint model. } \examples{ blocks <- list(c("bmi", "chl", "hyp"), "age") method <- c("panImpute", "pmm") ini <- mice(nhanes, blocks = blocks, method = method, maxit = 0) pred <- ini$pred pred["B1", "hyp"] <- -2 imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1) } \references{ Grund S, Luedtke O, Robitzsch A (2016). Multiple Imputation of Multilevel Missing Data: An Introduction to the R Package \code{pan}. SAGE Open. Schafer JL (1997). Analysis of Incomplete Multivariate Data. London: Chapman & Hall. Schafer JL, and Yucel RM (2002). Computational strategies for multivariate linear mixed-effects models with missing values. Journal of Computational and Graphical Statistics, 11, 437-457. } \seealso{ \code{\link[mitml]{panImpute}} Other multivariate-2l: \code{\link{mice.impute.jomoImpute}()} } \author{ Stef van Buuren, 2018, building on work of Simon Grund, Alexander Robitzsch and Oliver Luedtke (authors of \code{mitml} package) and Joe Schafer (author of \code{pan} package). } \concept{multivariate-2l} \keyword{datagen} mice/man/mice.impute.2l.pan.Rd0000644000176200001440000001066414436064333015556 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.2l.pan.R \name{mice.impute.2l.pan} \alias{mice.impute.2l.pan} \alias{2l.pan} \title{Imputation by a two-level normal model using \code{pan}} \usage{ mice.impute.2l.pan( y, ry, x, type, intercept = TRUE, paniter = 500, groupcenter.slope = FALSE, ... ) } \arguments{ \item{y}{Incomplete data vector of length \code{n}} \item{ry}{Vector of missing data pattern (\code{FALSE}=missing, \code{TRUE}=observed)} \item{x}{Matrix (\code{n} x \code{p}) of complete covariates.} \item{type}{Vector of length \code{ncol(x)} identifying random and class variables. Random effects are identified by a '2'. The group variable (only one is allowed) is coded as '-2'. Random effects also include the fixed effect. If for a covariates X1 group means shall be calculated and included as further fixed effects choose '3'. In addition to the effects in '3', specification '4' also includes random effects of X1.} \item{intercept}{Logical determining whether the intercept is automatically added.} \item{paniter}{Number of iterations in \code{pan}. Default is 500.} \item{groupcenter.slope}{If \code{TRUE}, in case of group means (\code{type} is '3' or'4') group mean centering for these predictors are conducted before doing imputations. Default is \code{FALSE}.} \item{...}{Other named arguments.} } \value{ A vector of length \code{nmis} with imputations. } \description{ Imputes univariate missing data using a two-level normal model with homogeneous within group variances. Aggregated group effects (i.e. group means) can be automatically created and included as predictors in the two-level regression (see argument \code{type}). This function needs the \code{pan} package. } \details{ Implements the Gibbs sampler for the linear two-level model with homogeneous within group variances which is a special case of a multivariate linear mixed effects model (Schafer & Yucel, 2002). For a two-level imputation with heterogeneous within-group variances see \code{\link{mice.impute.2l.norm}}. % The random intercept is automatically added in % \code{mice.impute.2l.norm()}. } \note{ This function does not implement the \code{where} functionality. It always produces \code{nmis} imputation, irrespective of the \code{where} argument of the \code{mice} function. } \examples{ # simulate some data # two-level regression model with fixed slope # number of groups G <- 250 # number of persons n <- 20 # regression parameter beta <- .3 # intraclass correlation rho <- .30 # correlation with missing response rho.miss <- .10 # missing proportion missrate <- .50 y1 <- rep(rnorm(G, sd = sqrt(rho)), each = n) + rnorm(G * n, sd = sqrt(1 - rho)) x <- rnorm(G * n) y <- y1 + beta * x dfr0 <- dfr <- data.frame("group" = rep(1:G, each = n), "x" = x, "y" = y) dfr[rho.miss * x + rnorm(G * n, sd = sqrt(1 - rho.miss)) < qnorm(missrate), "y"] <- NA # empty imputation in mice imp0 <- mice(as.matrix(dfr), maxit = 0) predM <- imp0$predictorMatrix impM <- imp0$method # specify predictor matrix and method predM1 <- predM predM1["y", "group"] <- -2 predM1["y", "x"] <- 1 # fixed x effects imputation impM1 <- impM impM1["y"] <- "2l.pan" # multilevel imputation imp1 <- mice(as.matrix(dfr), m = 1, predictorMatrix = predM1, method = impM1, maxit = 1 ) # multilevel analysis library(lme4) mod <- lmer(y ~ (1 + x | group) + x, data = complete(imp1)) summary(mod) # Examples of predictorMatrix specification # random x effects # predM1["y","x"] <- 2 # fixed x effects and group mean of x # predM1["y","x"] <- 3 # random x effects and group mean of x # predM1["y","x"] <- 4 } \references{ Schafer J L, Yucel RM (2002). Computational strategies for multivariate linear mixed-effects models with missing values. \emph{Journal of Computational and Graphical Statistics}. \bold{11}, 437-457. Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} } \seealso{ Other univariate-2l: \code{\link{mice.impute.2l.bin}()}, \code{\link{mice.impute.2l.lmer}()}, \code{\link{mice.impute.2l.norm}()} } \author{ Alexander Robitzsch (IPN - Leibniz Institute for Science and Mathematics Education, Kiel, Germany), \email{robitzsch@ipn.uni-kiel.de} Alexander Robitzsch (IPN - Leibniz Institute for Science and Mathematics Education, Kiel, Germany), \email{robitzsch@ipn.uni-kiel.de}. } \concept{univariate-2l} mice/man/mice.impute.polyreg.Rd0000644000176200001440000000741714330031647016142 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mice.impute.polyreg.R \name{mice.impute.polyreg} \alias{mice.impute.polyreg} \title{Imputation of unordered data by polytomous regression} \usage{ mice.impute.polyreg( y, ry, x, wy = NULL, nnet.maxit = 100, nnet.trace = FALSE, nnet.MaxNWts = 1500, ... ) } \arguments{ \item{y}{Vector to be imputed} \item{ry}{Logical vector of length \code{length(y)} indicating the the subset \code{y[ry]} of elements in \code{y} to which the imputation model is fitted. The \code{ry} generally distinguishes the observed (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.} \item{x}{Numeric design matrix with \code{length(y)} rows with predictors for \code{y}. Matrix \code{x} may have no missing values.} \item{wy}{Logical vector of length \code{length(y)}. A \code{TRUE} value indicates locations in \code{y} for which imputations are created.} \item{nnet.maxit}{Tuning parameter for \code{nnet()}.} \item{nnet.trace}{Tuning parameter for \code{nnet()}.} \item{nnet.MaxNWts}{Tuning parameter for \code{nnet()}.} \item{...}{Other named arguments.} } \value{ Vector with imputed data, same type as \code{y}, and of length \code{sum(wy)} } \description{ Imputes missing data in a categorical variable using polytomous regression } \details{ The function \code{mice.impute.polyreg()} imputes categorical response variables by the Bayesian polytomous regression model. See J.P.L. Brand (1999), Chapter 4, Appendix B. By default, unordered factors with more than two levels are imputed by \code{mice.impute.polyreg()}. The method consists of the following steps: \enumerate{ \item Fit categorical response as a multinomial model \item Compute predicted categories \item Add appropriate noise to predictions } The algorithm of \code{mice.impute.polyreg} uses the function \code{multinom()} from the \code{nnet} package. In order to avoid bias due to perfect prediction, the algorithm augment the data according to the method of White, Daniel and Royston (2010). } \references{ Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of multiple imputation strategies for the statistical analysis of incomplete data sets.} Dissertation. Rotterdam: Erasmus University. White, I.R., Daniel, R. Royston, P. (2010). Avoiding bias due to perfect prediction in multiple imputation of incomplete categorical variables. \emph{Computational Statistics and Data Analysis}, 54, 2267-2275. Venables, W.N. & Ripley, B.D. (2002). \emph{Modern applied statistics with S-Plus (4th ed)}. Springer, Berlin. } \seealso{ \code{\link{mice}}, \code{\link[nnet]{multinom}}, \code{\link[MASS]{polr}} Other univariate imputation functions: \code{\link{mice.impute.cart}()}, \code{\link{mice.impute.lasso.logreg}()}, \code{\link{mice.impute.lasso.norm}()}, \code{\link{mice.impute.lasso.select.logreg}()}, \code{\link{mice.impute.lasso.select.norm}()}, \code{\link{mice.impute.lda}()}, \code{\link{mice.impute.logreg.boot}()}, \code{\link{mice.impute.logreg}()}, \code{\link{mice.impute.mean}()}, \code{\link{mice.impute.midastouch}()}, \code{\link{mice.impute.mnar.logreg}()}, \code{\link{mice.impute.mpmm}()}, \code{\link{mice.impute.norm.boot}()}, \code{\link{mice.impute.norm.nob}()}, \code{\link{mice.impute.norm.predict}()}, \code{\link{mice.impute.norm}()}, \code{\link{mice.impute.pmm}()}, \code{\link{mice.impute.polr}()}, \code{\link{mice.impute.quadratic}()}, \code{\link{mice.impute.rf}()}, \code{\link{mice.impute.ri}()} } \author{ Stef van Buuren, Karin Groothuis-Oudshoorn, 2000-2010 } \concept{univariate imputation functions} \keyword{datagen} mice/DESCRIPTION0000644000176200001440000000736114437371702012714 0ustar liggesusersPackage: mice Type: Package Version: 3.16.0 Title: Multivariate Imputation by Chained Equations Date: 2023-05-24 Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"), email = "stef.vanbuuren@tno.nl"), person("Karin", "Groothuis-Oudshoorn", role = "aut", email = "c.g.m.oudshoorn@utwente.nl"), person("Gerko","Vink", role = "ctb", email = "g.vink@uu.nl"), person("Rianne","Schouten", role = "ctb", email = "R.M.Schouten@uu.nl"), person("Alexander", "Robitzsch", role = "ctb", email = "robitzsch@ipn.uni-kiel.de"), person("Patrick", "Rockenschaub", role = "ctb", email = "rockenschaub.patrick@gmail.com"), person("Lisa","Doove", role = "ctb", email = "lisa.doove@ppw.kuleuven.be"), person("Shahab","Jolani", role = "ctb", email = "s.jolani@maastrichtuniversity.nl"), person("Margarita","Moreno-Betancur", role="ctb", email = "margarita.moreno@mcri.edu.au"), person("Ian", "White", role="ctb", email = "ian.white@ucl.ac.uk"), person("Philipp","Gaffert", role = "ctb", email = "philipp.gaffert@gfk.com"), person("Florian","Meinfelder", role = "ctb", email = "florian.meinfelder@uni-bamberg.de"), person("Bernie","Gray", role = "ctb", email = "bfgray3@gmail.com"), person("Vincent", "Arel-Bundock", role = "ctb", email = "vincent.arel-bundock@umontreal.ca"), person("Mingyang", "Cai", role = "ctb", email = "m.cai@uu.nl"), person("Thom", "Volker", role = "ctb", email = "t.b.volker@uu.nl"), person("Edoardo", "Costantini", role = "ctb", email = "e.costantini@tilburguniversity.edu"), person("Caspar", "van Lissa", role = "ctb", email = "c.j.vanlissa@uu.nl"), person("Hanne", "Oberman", role = "ctb", email = "h.i.oberman@uu.nl")) Maintainer: Stef van Buuren Depends: R (>= 2.10.0) Imports: broom, dplyr, generics, glmnet, graphics, grDevices, lattice, methods, mitml, nnet, Rcpp, rpart, rlang, stats, tidyr, utils Suggests: broom.mixed, future, furrr, haven, knitr, lme4, MASS, miceadds, pan, parallelly, purrr, ranger, randomForest, rmarkdown, rstan, survival, testthat Description: Multiple imputation using Fully Conditional Specification (FCS) implemented by the MICE algorithm as described in Van Buuren and Groothuis-Oudshoorn (2011) . Each variable has its own imputation model. Built-in imputation models are provided for continuous data (predictive mean matching, normal), binary data (logistic regression), unordered categorical data (polytomous logistic regression) and ordered categorical data (proportional odds). MICE can also impute continuous two-level data (normal model, pan, second-level variables). Passive imputation can be used to maintain consistency between variables. Various diagnostic plots are available to inspect the quality of the imputations. Encoding: UTF-8 LazyLoad: yes LazyData: yes URL: https://github.com/amices/mice, https://amices.org/mice/, https://stefvanbuuren.name/fimd/ BugReports: https://github.com/amices/mice/issues LinkingTo: cpp11, Rcpp License: GPL (>= 2) RoxygenNote: 7.2.3 NeedsCompilation: yes Packaged: 2023-06-04 21:03:06 UTC; buurensv Author: Stef van Buuren [aut, cre], Karin Groothuis-Oudshoorn [aut], Gerko Vink [ctb], Rianne Schouten [ctb], Alexander Robitzsch [ctb], Patrick Rockenschaub [ctb], Lisa Doove [ctb], Shahab Jolani [ctb], Margarita Moreno-Betancur [ctb], Ian White [ctb], Philipp Gaffert [ctb], Florian Meinfelder [ctb], Bernie Gray [ctb], Vincent Arel-Bundock [ctb], Mingyang Cai [ctb], Thom Volker [ctb], Edoardo Costantini [ctb], Caspar van Lissa [ctb], Hanne Oberman [ctb] Repository: CRAN Date/Publication: 2023-06-05 14:40:02 UTC mice/build/0000755000176200001440000000000014437176011012272 5ustar liggesusersmice/build/partial.rdb0000644000176200001440000000007414437176011014420 0ustar liggesusersb```b`a 00 FN ͚Z d@$w7mice/tests/0000755000176200001440000000000014437176012012336 5ustar liggesusersmice/tests/testthat/0000755000176200001440000000000014437371702014201 5ustar liggesusersmice/tests/testthat/test-mice.impute.iurr.logreg.R0000644000176200001440000000713614433156031021755 0ustar liggesuserscontext("mice.impute.lasso.select.logreg") ######################### # TEST 1: Simple problem # ######################### set.seed(123) # generate data n <- 1e3 y <- rnorm(n) x <- y * .3 + rnorm(n, 0, .25) x2 <- x + rnorm(n, 2, 3) x <- cbind(x, x2) y <- as.numeric(cut(y, 2)) - 1 # make missingness y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Use univariate imputation model set.seed(123) imps_t1 <- mice.impute.lasso.select.logreg(y, ry, x) test_that("Returns requested length", { expect_equal(length(imps_t1), sum(!ry)) }) test_that("Returns dichotomous imputations", { expect_equal(length(unique(imps_t1)), 2) }) ######################### # TEST 2: Nothing is important # ######################### set.seed(20211013) n <- 1e4 p <- 3 b0 <- 1 bs <- rep(0, p) x <- cbind(matrix(rnorm(n * p), n, p)) y <- b0 + x %*% bs + rnorm(n) y <- as.numeric(cut(y, 2)) - 1 # Missing values y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Use univariate imputation model set.seed(123) imps_t2 <- mice.impute.lasso.select.logreg(y, ry, x) test_that("Returns dichotomous imputations", { expect_equal(length(unique(imps_t2)), 2) }) ######################### # TEST 3: Everything is important # ######################### n <- 1e4 p <- 10 b0 <- 1 bs <- rep(1, p) x <- cbind(matrix(rnorm(n * p), n, p)) y <- b0 + x %*% bs + rnorm(n) y <- as.numeric(cut(y, 2)) - 1 # Missing values y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Use univariate imputation model set.seed(123) imps_t3 <- mice.impute.lasso.select.logreg(y, ry, x) test_that("Works when all predictors are important", { expect_equal(length(unique(imps_t3)), 2) }) ######################### # TEST 4: Use it within mice call # ######################### # Generate some dichotomous data n <- 1e2 p <- 4 Sigma <- matrix(.7, nrow = p, ncol = p) diag(Sigma) <- 1 X <- as.data.frame(MASS::mvrnorm(n, rep(0, p), Sigma)) # Discretize and impose miss for (j in 1:2) { X[, j] <- cut(X[, j], 2) # Make it discrete X[sample(1:n, n * .3), j] <- NA # Impose missings } # Imputations iurr_default <- mice(X, m = 2, maxit = 2, method = "lasso.select.logreg", print = FALSE ) iurr_custom <- mice(X, m = 2, maxit = 2, method = "lasso.select.logreg", nfolds = 5, print = FALSE ) logreg_default <- mice(X, m = 2, maxit = 2, method = "logreg", print = FALSE ) # Tests test_that("mice call works", { expect_equal(class(iurr_custom), "mids") }) test_that("mice call works w/ custom arguments", { expect_equal(class(iurr_custom), "mids") }) test_that("same class as logreg default method", { expect_equal( class(complete(logreg_default)[, 1]), class(complete(iurr_default)[, 1]) ) }) ######################### # TEST 5: Perfect Prediction / Complete Separation # ######################### set.seed(123) # Generate some dichotomous data n <- 1e2 p <- 4 Sigma <- matrix(.7, nrow = p, ncol = p) diag(Sigma) <- 1 x <- MASS::mvrnorm(n, rep(0, p), Sigma) # Create Perfect Predictor y <- factor(x[, 1] < 0, labels = c("y", "n")) # Missing values y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Imputation well behaved wellBehaved <- tryCatch( expr = { mice.impute.lasso.select.logreg(y = y, ry = ry, x = x[, -1]) }, error = function(e) { e }, warning = function(w) { w } ) # Imputation perfect prediction perfectPred <- tryCatch( expr = { mice.impute.lasso.select.logreg(y = y, ry = ry, x = x) }, error = function(e) { e }, warning = function(w) { w } ) # Test test_that("Complete separation results in same class as well behaved case", { expect_true(all.equal(class(wellBehaved), class(perfectPred))) }) mice/tests/testthat/test-pool.r.squared.R0000644000176200001440000000074414330031606020146 0ustar liggesuserscontext("pool.r.squared") data(nhanes) imp <- mice::mice(nhanes, maxit = 2, m = 2, seed = 10, print = FALSE) fit_mira <- with(data = imp, exp = lm(chl ~ age + bmi)) fit_mipo <- mice::pool(fit_mira) test_that("pool.r.squared mira", { result <- as.vector(pool.r.squared(fit_mira, adjusted = FALSE)[1, ]) expect_equal(length(result), 4L) }) test_that("r.squared mipo", { result <- as.vector(pool.r.squared(fit_mipo, adjusted = FALSE)[1, ]) expect_equal(length(result), 4L) }) mice/tests/testthat/test-mice.R0000644000176200001440000001633714334445701016225 0ustar liggesuserscontext("mice: complete") nhanes_mids <- mice(nhanes, m = 2, print = FALSE) nhanes_complete <- complete(nhanes_mids) test_that("No missing values remain in imputed nhanes data set", { expect_gt(sum(is.na(nhanes)), 0) expect_equal(sum(is.na(nhanes_complete)), 0) }) test_that("Data set in returned mids object is identical to nhanes data set", { expect_identical(nhanes_mids$data, nhanes) }) context("mice: blocks") test_that("blocks run as expected", { expect_silent(imp1b <<- mice(nhanes, blocks = list(c("age", "hyp"), chl = "chl", "bmi"), print = FALSE, m = 1, maxit = 1, seed = 1 )) expect_silent(imp2b <<- mice(nhanes2, blocks = list(c("age", "hyp", "bmi"), "chl", "bmi"), print = FALSE, m = 1, maxit = 1, seed = 1 )) # expect_silent(imp3b <<- mice(nhanes2, # blocks = list(c("hyp", "hyp", "hyp"), "chl", "bmi"), # print = FALSE, m = 1, maxit = 1, seed = 1)) expect_silent(imp4b <<- mice(boys, blocks = list(c("gen", "phb"), "tv"), print = FALSE, m = 1, maxit = 1, seed = 1 )) expect_silent(imp5b <<- mice(nhanes, blocks = list(c("age", "hyp")), print = FALSE, m = 1, maxit = 1, seed = 1 )) }) test_that("Block names are generated automatically", { expect_identical(names(imp1b$blocks), c("B1", "chl", "bmi")) }) test_that("Method `pmm` is used for mixed variable types", { expect_identical(unname(imp2b$method[1]), "pmm") }) # test_that("Method `logreg` if all are binary", { # expect_identical(unname(imp3b$method[1]), "logreg") # }) test_that("Method `polr` if all are ordered", { expect_identical(unname(imp4b$method[1]), "polr") }) test_that("Method `polr` works with one block", { expect_identical(unname(imp5b$method[1]), "pmm") }) # check for equality of `scatter` and `collect` for univariate models # the following models yield the same imputations imp1 <- mice(nhanes, blocks = make.blocks(nhanes, "scatter"), print = FALSE, m = 1, maxit = 1, seed = 123 ) imp2 <- mice(nhanes, blocks = make.blocks(nhanes, "collect"), print = FALSE, m = 1, maxit = 1, seed = 123 ) imp3 <- mice(nhanes, blocks = list("age", c("bmi", "hyp", "chl")), print = FALSE, m = 1, maxit = 1, seed = 123 ) imp4 <- mice(nhanes, blocks = list(c("bmi", "hyp", "chl"), "age"), print = FALSE, m = 1, maxit = 1, seed = 123 ) test_that("Univariate yield same imputes for `scatter` and `collect`", { expect_identical(complete(imp1), complete(imp2)) expect_identical(complete(imp1), complete(imp3)) expect_identical(complete(imp1), complete(imp4)) }) # potentially, we may also change the visitSequence, but mice # is quite persistent in overwriting a user-specified # visitSequence for complete columns, so this not # currently not an option. Defer optimizing this to later. # another trick is to specify where for age by hand, so it forces # mice to impute age by pmm, but then, this would need to be # done in both imp1 and imp2 models. context("mice: formulas") test_that("formulas run as expected", { expect_silent(imp1f <<- mice(nhanes, formulas = list( age + hyp ~ chl + bmi, chl ~ age + hyp + bmi, bmi ~ age + hyp + chl ), print = FALSE, m = 1, maxit = 1, seed = 1 )) expect_warning(imp2f <<- mice(nhanes2, formulas = list( age + hyp + bmi ~ chl + bmi, chl ~ age + hyp + bmi + bmi, bmi ~ age + hyp + bmi + chl ), print = FALSE, m = 1, maxit = 1, seed = 1 )) # expect_silent(imp3f <<- mice(nhanes2, # formulas = list( hyp + hyp + hyp ~ chl + bmi, # chl ~ hyp + hyp + hyp + bmi, # bmi ~ hyp + hyp + hyp + chl), # print = FALSE, m = 1, maxit = 1, seed = 1)) expect_silent(imp4f <<- mice(boys, formulas = list( gen + phb ~ tv, tv ~ gen + phb ), print = FALSE, m = 1, maxit = 1, seed = 1 )) expect_silent(imp5f <<- mice(nhanes, formulas = list(age + hyp ~ 1), print = FALSE, m = 1, maxit = 1, seed = 1 )) }) test_that("Formula names are generated automatically", { expect_identical(names(imp1f$blocks), c("F1", "chl", "bmi")) }) test_that("Method `pmm` is used for mixed variable types", { expect_identical(unname(imp2f$method[1]), "pmm") }) # test_that("Method `logreg` if all are binary", { # expect_identical(unname(imp3f$method[1]), "logreg") # }) test_that("Method `polr` if all are ordered", { expect_identical(unname(imp4f$method[1]), "polr") }) test_that("Method `polr` works with one block", { expect_identical(unname(imp5f$method[1]), "pmm") }) context("mice: where") # # all TRUE imp1 <- mice(nhanes, where = matrix(TRUE, nrow = 25, ncol = 4), maxit = 1, m = 1, print = FALSE ) # # all FALSE imp2 <- mice(nhanes, where = matrix(FALSE, nrow = 25, ncol = 4), maxit = 1, m = 1, print = FALSE ) # # alternate imp3 <- mice(nhanes, where = matrix(c(FALSE, TRUE), nrow = 25, ncol = 4), maxit = 1, m = 1, print = FALSE ) # # whacky situation where we expect no imputes for the incomplete cases imp4 <- mice(nhanes2, where = matrix(TRUE, nrow = 25, ncol = 4), maxit = 1, meth = c("pmm", "", "", ""), m = 1, print = FALSE ) test_that("`where` produces correct number of imputes", { expect_identical(nrow(imp1$imp$age), 25L) expect_identical(nrow(imp2$imp$age), 0L) expect_identical(nrow(imp3$imp$age), 12L) expect_identical(sum(is.na(imp4$imp$age)), nrow(nhanes2) - sum(complete.cases(nhanes2))) }) context("mice: ignore") # # all TRUE test_that("`ignore` throws appropriate errors and warnings", { expect_error( mice(nhanes, maxit = 1, m = 1, print = FALSE, seed = 1, ignore = TRUE), "does not match" ) expect_error( mice(nhanes, maxit = 1, m = 1, print = FALSE, seed = 1, ignore = "string"), "not a logical" ) expect_warning( mice(nhanes, maxit = 1, m = 1, print = FALSE, seed = 1, ignore = c(rep(FALSE, 9), rep(TRUE, nrow(nhanes) - 9)) ), "Fewer than 10 rows" ) }) # Check that the ignore argument is taken into account when # calculating the results # # all FALSE imp1 <- mice(nhanes, maxit = 1, m = 1, print = FALSE, seed = 1, ignore = rep(FALSE, nrow(nhanes)) ) # # NULL imp2 <- mice(nhanes, maxit = 1, m = 1, print = FALSE, seed = 1) # # alternate alternate <- rep(c(TRUE, FALSE), nrow(nhanes))[1:nrow(nhanes)] imp3 <- mice(nhanes, maxit = 0, m = 1, print = FALSE, seed = 1, ignore = alternate ) test_that("`ignore` changes the imputation results", { expect_identical(complete(imp1), complete(imp2)) expect_failure(expect_identical(complete(imp1), complete(imp3))) }) # Check that rows flagged as ignored are indeed ignored by the # univariate sampler in mice artificial <- data.frame( age = c(1, 1), bmi = c(NA, 40.0), hyp = c(1, 1), chl = c(200, 200), row.names = paste0("a", 1:2) ) imp1 <- mice( rbind(nhanes, artificial), maxit = 1, m = 1, print = FALSE, seed = 1, donors = 1L, matchtype = 0 ) imp2 <- mice( rbind(nhanes, artificial), maxit = 1, m = 1, print = FALSE, seed = 1, donors = 1L, matchtype = 0, ignore = c(rep(FALSE, nrow(nhanes)), rep(TRUE, nrow(artificial))) ) test_that("`ignore` works with pmm", { expect_equal(complete(imp1)["a1", "bmi"], 40.0) expect_failure(expect_equal(complete(imp2)["a1", "bmi"], 40.0)) }) mice/tests/testthat/test-make.predictorMatrix.R0000644000176200001440000000037514330031606021366 0ustar liggesuserscontext("make.predictorMatrix") blocks1 <- make.blocks(c("bmi", "chl", "hyp", "age")) test_that("errors on invalid data arguments", { expect_error( make.predictorMatrix(data, blocks = blocks1), "Data should be a matrix or data frame" ) }) mice/tests/testthat/test-mice.impute.norm.R0000644000176200001440000001127514330031606020464 0ustar liggesuserscontext("mice.impute.norm") ######################### # TEST 1: Simple problem # ######################### set.seed(123) # generate data y <- rnorm(10) x <- y * .3 + rnorm(10, 0, .25) x2 <- x + rnorm(10, 2, 3) x <- cbind(1, x, x2) # make missingness y[5:6] <- NA ry <- !is.na(y) set.seed(123) svd <- .norm.draw(y, ry, x, ls.meth = "svd") set.seed(123) ridge <- .norm.draw(y, ry, x, ls.meth = "ridge") set.seed(123) qr <- .norm.draw(y, ry, x, ls.meth = "qr") # tests for test1 test_that("Estimates are equal", { expect_equal(svd$coef, matrix(qr$coef)) expect_equal(svd$beta, matrix(qr$beta)) expect_equal(svd$sigma, qr$sigma) }) test_that("Correct estimation method used", { expect_equal(svd$estimation, "svd") expect_equal(qr$estimation, "qr") expect_equal(ridge$estimation, "ridge") }) # svd and qr deliver same estimates; ridge should be different! ##################################### # TEST 2: extremely high correlation # ##################################### x <- matrix(c(1:1000, seq(from = 2, to = 2000, by = 2)) + rnorm(1000), nrow = 1000, ncol = 2) y <- t(c(5, 3) %*% t(x)) y[5:6] <- NA ry <- !is.na(y) svd <- .norm.draw(y, ry, x, ls.meth = "svd") ridge <- .norm.draw(y, ry, x, ls.meth = "ridge") qr <- .norm.draw(y, ry, x, ls.meth = "qr") # tests for test2 test_that("Estimates are equal", { expect_equal(svd$coef, matrix(qr$coef)) expect_equal(svd$beta, matrix(qr$beta)) expect_equal(svd$sigma, qr$sigma) }) test_that("Correct estimation method used", { expect_equal(svd$estimation, "svd") expect_equal(qr$estimation, "qr") expect_equal(ridge$estimation, "ridge") }) # svd and qr deliver same estimates; ridge should be different! ##################################### # TEST 3: correct imputation model # ##################################### expect_warning(imp.qr <- mice(mammalsleep[, -1], ls.meth = "qr", seed = 123, print = FALSE, use.matcher = TRUE)) expect_warning(imp.svd <- mice(mammalsleep[, -1], ls.meth = "svd", seed = 123, print = FALSE, use.matcher = TRUE)) expect_warning(imp.ridge <- mice(mammalsleep[, -1], ls.meth = "ridge", seed = 123, print = FALSE, use.matcher = TRUE)) test_that("Imputations are equal", { expect_equal(imp.qr$imp, imp.svd$imp) expect_false(identical(imp.qr$imp, imp.ridge$imp)) }) ##################################### # TEST 4: exactly singular system # ##################################### # test on faulty imputation model (exactly singular system) expect_warning(imp.qr <- mice(mammalsleep, ls.meth = "qr", seed = 123, print = FALSE)) expect_warning(imp.svd <- mice(mammalsleep, ls.meth = "svd", seed = 123, print = FALSE)) expect_warning(imp.ridge <- mice(mammalsleep, ls.meth = "ridge", seed = 123, print = FALSE)) test_that("Imputations are not equal", { expect_false(identical(imp.qr$imp, imp.svd$imp)) expect_false(identical(imp.qr$imp, imp.ridge$imp)) }) # difference stems from added ridge penalty when necessary (when and where depends # on starting state of algorithm). ##################################### # TEST 4: returns requested length # ##################################### xname <- c("age", "hgt", "wgt") br <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ] r <- stats::complete.cases(br[, xname]) x <- br[r, xname] y <- br[r, "tv"] ry <- !is.na(y) wy1 <- !ry wy2 <- rep(TRUE, length(y)) wy3 <- rep(FALSE, length(y)) wy4 <- rep(c(TRUE, FALSE), times = c(1, length(y) - 1)) test_that("Returns requested length", { expect_equal(length(mice.impute.norm(y, ry, x)), sum(!ry)) expect_equal(length(mice.impute.norm(y, ry, x, wy = wy1)), sum(wy1)) expect_equal(length(mice.impute.norm(y, ry, x, wy = wy2)), sum(wy2)) expect_equal(length(mice.impute.norm(y, ry, x, wy = wy3)), sum(wy3)) expect_equal(length(mice.impute.norm(y, ry, x, wy = wy4)), sum(wy4)) }) ### x <- airquality[, c("Wind", "Temp", "Month")] y <- airquality[, "Ozone"] ry <- !is.na(y) # do imputations depend on x column order? x1 <- x[, c(1, 2, 3)] x2 <- x[, c(1, 3, 2)] set.seed(123) pmm1 <- mice.impute.pmm(y, ry, x1) set.seed(123) pmm2 <- mice.impute.pmm(y, ry, x2) set.seed(123) norm1 <- mice.impute.norm(y, ry, x1) set.seed(123) norm2 <- mice.impute.norm(y, ry, x2) set.seed(123) norm.nob1 <- mice.impute.norm.nob(y, ry, x1) set.seed(123) norm.nob2 <- mice.impute.norm.nob(y, ry, x2) set.seed(123) norm.predict1 <- mice.impute.norm.predict(y, ry, x1) set.seed(123) norm.predict2 <- mice.impute.norm.predict(y, ry, x2) set.seed(123) norm.boot1 <- mice.impute.norm.boot(y, ry, x1) set.seed(123) norm.boot2 <- mice.impute.norm.boot(y, ry, x2) test_that("Imputations are invariant to column order", { # expect_equal(pmm1, pmm2) # expect_equal(norm1, norm2) expect_equal(norm.nob1, norm.nob2) expect_equal(norm.predict1, norm.predict2) expect_equal(norm.boot1, norm.boot2) }) mice/tests/testthat/test-complete.R0000644000176200001440000000170014347334322017104 0ustar liggesuserscontext("complete") imp <- mice(nhanes, maxit = 1, m = 2, seed = 123, print = FALSE) lng <- subset(complete(imp, "long"), .imp == 1, select = c(age, bmi, hyp, chl)) all <- complete(imp, "all")[[1]] test_that("long and all produce same data", { expect_equal(lng, all) }) # mids workflow using saved objects imp <- mice(nhanes, seed = 123, print = FALSE) fit <- with(imp, lm(chl ~ age + bmi + hyp)) est <- pool(fit) est.mice <- est # mild workflow using saved objects and base::lapply idl <- complete(imp, "all") fit <- lapply(idl, lm, formula = chl ~ age + bmi + hyp) est <- pool(fit) est.mild <- est # long workflow using base::by cmp <- complete(imp, "long") fit <- by(cmp, as.factor(cmp$.imp), lm, formula = chl ~ age + bmi + hyp) est <- pool(fit) est.long <- est test_that("workflow mids, mild and long produce same estimates", { expect_identical(getqbar(est.mice), getqbar(est.mild)) expect_identical(getqbar(est.mice), getqbar(est.long)) }) mice/tests/testthat/test-mice.impute.2l.lmer.R0000644000176200001440000000123513666252075020777 0ustar liggesuserscontext("mice.impute.2l.lmer") d <- brandsma[1:200, c("sch", "lpo")] pred <- make.predictorMatrix(d) pred["lpo", "sch"] <- -2 test_that("mice::mice.impute.2l.lmer() runs empty model", { expect_silent(imp <- mice(d, method = "2l.lmer", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) d <- brandsma[1:200, c("sch", "lpo", "iqv")] d[c(1, 11, 21), "iqv"] <- NA pred <- make.predictorMatrix(d) pred[c("lpo", "iqv"), "sch"] <- -2 test_that("2l.lmer() runs random intercept, one predictor", { expect_silent(imp <- mice(d, method = "2l.lmer", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) mice/tests/testthat/test-mice.impute.rf.R0000644000176200001440000000140614433400023020110 0ustar liggesusers# Outcommented to reduce dependencies under the _R_CHECK_DEPENDS_ONLY=true flag # context("mice.impute.rf") # # ##################################### # # TEST 1: runs with single miss val # # ##################################### # # data <- matrix( # c( # 1.0, 10.5, 1.5, 13.2, 1.8, 8.0, 1.7, 15.0, 23.0, 40.0, # 2.0, 21.0, 3.3, 38.0, 4.5, -2.3, NA, -2.4 # ), # nrow = 9, ncol = 2, byrow = TRUE # ) # df <- data.frame(data) # # par <- list( # y = df$X1, # ry = !is.na(df$X1), # x = df[, "X2", drop = FALSE] # ) # # test_that( # "Runs with a single missing value", # { # expect_visible(do.call(mice.impute.rf, c(par, list(rfPackage = "ranger")))) # expect_visible(do.call(mice.impute.rf, c(par, list(rfPackage = "randomForest")))) # } # ) mice/tests/testthat/test-mice.impute.logreg.R0000644000176200001440000000173714330031647020777 0ustar liggesuserscontext("test-mice.impute.logreg.R") ######################### # TEST 1: Perfect Prediction / Complete Separation # ######################### set.seed(123) # Generate some dichotomous data n <- 1e2 p <- 4 Sigma <- matrix(.7, nrow = p, ncol = p) diag(Sigma) <- 1 x <- MASS::mvrnorm(n, rep(0, p), Sigma) # Create Perfect Predictor y <- factor(x[, 1] < 0, labels = c("y", "n")) # Missing values y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Imputation well behaved wellBehaved <- tryCatch( expr = { mice.impute.logreg(y = y, ry = ry, x = x[, -1]) }, error = function(e) { e }, warning = function(w) { w } ) # Imputation perfect prediction perfectPred <- tryCatch( expr = { mice.impute.logreg(y = y, ry = ry, x = x) }, error = function(e) { e }, warning = function(w) { w } ) # Test test_that("Complete separation results in same class as well behaved case", { expect_true(all.equal(class(wellBehaved), class(perfectPred))) }) mice/tests/testthat/test-mice.impute.2l.bin.R0000644000176200001440000000226714330031606020576 0ustar liggesuserscontext("mice.impute.2l.bin") # toenail: outcome is factor data("toenail2") data <- tidyr::complete(toenail2, patientID, visit) %>% tidyr::fill(treatment) %>% dplyr::select(-time) %>% dplyr::mutate(patientID = as.integer(patientID)) summary(data) # fit1 <- glm(outcome ~ treatment * month, data = toenail2, family = binomial) # fit2 <- glm(outcome ~ treatment * visit, data = toenail2, family = binomial) # fit3 <- lme4::glmer(outcome ~ treatment * visit + (1 | ID), data = data, family = binomial) pred <- make.predictorMatrix(data) pred["outcome", "patientID"] <- -2 test_that("mice::mice.impute.2l.bin() accepts factor outcome", { expect_silent(imp <- mice(data, method = "2l.bin", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) # toenail: outcome is 0/1 data("toenail") data <- tidyr::complete(toenail, ID, visit) %>% tidyr::fill(treatment) %>% dplyr::select(-month) summary(data) pred <- make.predictorMatrix(data) pred["outcome", "ID"] <- -2 test_that("mice::mice.impute.2l.bin() accepts 0/1 outcome", { expect_silent(imp <- mice(data, method = "2l.bin", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) mice/tests/testthat/test-mice.impute.panImpute.R0000644000176200001440000000131114436064333021453 0ustar liggesuserscontext("mice.impute.panImpute") data <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ] type <- c(2, 0, 0, 0, -2, 0, 1, 1, 0) names(type) <- names(data) z1 <- mice.impute.panImpute(data = data, type = type, format = "native") test_that("panImpute returns native class", { expect_is(z1, "mitml") }) blocks <- make.blocks(list(c("bmi", "chl", "hyp"), "age")) method <- c("panImpute", "pmm") pred <- make.predictorMatrix(nhanes, blocks) pred["B1", "hyp"] <- -2 imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1, seed = 1, print = FALSE ) z <- complete(imp) test_that("mice can call panImpute", { expect_equal(sum(is.na(z$bmi)), 0) expect_equal(sum(is.na(z$chl)), 0) }) mice/tests/testthat/test-newdata.R0000644000176200001440000000270614334522175016727 0ustar liggesuserscontext("mice.mids: newdata") # Check that mice.mids correctly appends the newdata to the # existing mids object init0 <- mice(nhanes, maxit = 0, m = 1, print = FALSE, seed = 1) init1 <- mice(nhanes, maxit = 0, m = 1, print = FALSE) init1$ignore <- rep(FALSE, nrow(nhanes)) init2 <- mice.mids(init0, newdata = nhanes, maxit = 0, print = FALSE) test_that("`newdata` works like rbind with ignore", { expect_equal(complete(rbind(init0, init1)), complete(init2)) }) imp <- mice(nhanes2, maxit = 0, m = 1, seed = 1) test_that("`newdata` produces warning `invalid factor level, NA generated`", { expect_silent(mice.mids(imp, newdata = nhanes2[1, ], print = FALSE)) }) # Check that rows flagged as ignored are indeed ignored by the # univariate sampler in mice.mids artificial <- data.frame( age = c(1, 1), bmi = c(NA, 40.0), hyp = c(1, 1), chl = c(200, 200), row.names = paste0("a", 1:2) ) imp1 <- mice(nhanes, maxit = 1, m = 1, print = FALSE, seed = 1, donors = 1L, matchtype = 0 ) imp2 <- mice.mids(imp1, newdata = artificial, maxit = 1, print = FALSE) imp2b <- mice.mids(imp1, newdata = artificial, maxit = 1, print = FALSE) test_that("`newdata` works with pmm", { expect_failure(expect_equal(complete(imp2)["a1", "bmi"], 40.0)) }) test_that("`newdata` returns filtered mids object", { expect_equal(nrow(complete(imp2)), nrow(artificial)) }) test_that("`newdata` uses a common seed", { expect_true(identical(complete(imp2), complete(imp2b))) }) mice/tests/testthat/test-blocks.R0000644000176200001440000000160414344575371016564 0ustar liggesuserscontext("blocks") imp <- mice(nhanes, blocks = make.blocks(list(c("bmi", "chl"), "bmi", "age")), m = 10, print = FALSE) # plot(imp) test_that("removes variables from 'where'", { expect_identical(sum(imp$where[, "hyp"]), 0L) }) # reprex https://github.com/amices/mice/issues/326 imp1 <- mice(nhanes, seed = 1, m = 1, maxit = 2, print = FALSE) imp2 <- mice(nhanes, blocks = list(c("bmi", "hyp"), "chl"), m = 1, maxit = 2, seed = 1, print = FALSE) test_that("expands a univariate method to all variables in the block", { expect_identical(complete(imp1, 1), complete(imp2, 1)) }) imp3 <- mice(nhanes, blocks = list(c("hyp", "bmi"), "chl"), m = 1, maxit = 2, seed = 1, print = FALSE) imp4 <- mice(nhanes, visitSequence = c("hyp", "bmi", "chl"), m = 1, maxit = 2, seed = 1, print = FALSE) test_that("blocks alter the visit sequence", { expect_identical(complete(imp3, 1), complete(imp3, 1)) }) mice/tests/testthat/test-mice.impute.polr.R0000644000176200001440000000027314330031606020461 0ustar liggesuserscontext("polr") # following halts with # "Error in apply(draws, 2, sum) : dim(X) must have a positive length" # imp1 <- mice(boys, blocks = list("gen"), print = FALSE, m = 1, maxit = 1) mice/tests/testthat/test-rbind.R0000644000176200001440000001067314330031606016372 0ustar liggesuserscontext("rbind.mids") expect_warning(imp1 <<- mice(nhanes[1:13, ], m = 2, maxit = 1, print = FALSE)) test_that("Constant variables are not imputed by default", { expect_equal(sum(is.na(complete(imp1))), 6L) }) expect_warning(imp1b <<- mice(nhanes[1:13, ], m = 2, maxit = 1, print = FALSE, remove.constant = FALSE)) test_that("Constant variables are imputed for remove.constant = FALSE", { expect_equal(sum(is.na(complete(imp1b))), 0L) }) imp2 <- mice(nhanes[14:25, ], m = 2, maxit = 1, print = FALSE) imp3 <- mice(nhanes2, m = 2, maxit = 1, print = FALSE) imp4 <- mice(nhanes2, m = 1, maxit = 1, print = FALSE) expect_warning(imp5 <<- mice(nhanes[1:13, ], m = 2, maxit = 2, print = FALSE)) expect_error(imp6 <<- mice(nhanes[1:13, 2:3], m = 2, maxit = 2, print = FALSE), "`mice` detected constant and/or collinear variables. No predictors were left after their removal.") nh3 <- nhanes colnames(nh3) <- c("AGE", "bmi", "hyp", "chl") imp7 <- mice(nh3[14:25, ], m = 2, maxit = 2, print = FALSE) expect_warning(imp8 <<- mice(nhanes[1:13, ], m = 2, maxit = 2, print = FALSE)) imp9 <- mice(nhanes, m = 2, maxit = 1, print = FALSE, ignore = c(rep(FALSE, 20), rep(TRUE, 5)) ) mylist <- list(age = NA, bmi = NA, hyp = NA, chl = NA) nhalf <- nhanes[13:25, ] test_that("Expands number of rows and imputes", { expect_equal(nrow(complete(rbind(imp1, imp2))), 25L) expect_equal(nrow(rbind(imp1, imp2)$imp$bmi), 9L) }) test_that("throws error", { expect_error(rbind(imp1, imp3), "datasets have different factor variables") expect_error(rbind(imp3, imp4), "number of imputations differ") expect_error(rbind(imp1, imp7), "datasets have different variable names") }) test_that("throws warning", { expect_warning( rbind(imp1, imp5), "iterations differ, so no convergence diagnostics calculated" ) expect_warning( rbind(imp5, imp9), "iterations differ, so no convergence diagnostics calculated" ) }) r1 <- rbind(imp8, imp5) r2 <- rbind(imp1, mylist) r3 <- rbind(imp1, nhalf) r4 <- rbind(imp1, imp2) r5 <- rbind(imp2, imp9) test_that("Produces longer imputed data", { expect_identical(nrow(complete(r1)), 26L) expect_identical(nrow(complete(r2)), 14L) }) test_that("Constant variables are not imputed", { expect_equal(sum(is.na(complete(r3))), 15L) expect_equal(sum(is.na(complete(r4))), 6L) }) test_that("`ignore` is correctly appended", { expect_equal(r2$ignore, rep(FALSE, 14)) expect_equal(r5$ignore, c(rep(FALSE, 32), rep(TRUE, 5))) }) # r11 <- mice.mids(rbind(imp1, imp5), print = FALSE) # test_that("plot throws error on convergence diagnostics", { # expect_error(plot(r11), "no convergence diagnostics found") # }) r21 <- mice.mids(r2, print = FALSE) r31 <- mice.mids(r3, print = FALSE) # issue #59 set.seed <- 818 x <- rnorm(10) D <- data.frame(x = x, y = 2 * x + rnorm(10)) D[c(2:4, 7), 1] <- NA expect_error(D_mids <<- mice(D[1:5, ], print = FALSE), "`mice` detected constant and/or collinear variables. No predictors were left after their removal.") expect_warning(D_mids <<- mice(D[1:5, ], print = FALSE, remove.collinear = FALSE)) D_rbind <- mice:::rbind.mids(D_mids, D[6:10, ]) cmp <- complete(D_rbind, 1) test_that("Solves issue #59, rbind", expect_identical(cmp[6:10, ], D[6:10, ])) test_that("rbind does not throw a warning (#114)", { expect_silent(rbind(ordered(c(1, 2)))) }) # calculate chainMean and chainVar # imp1 <- mice(nhanes[1:13, ], m = 5, maxit = 25, print = FALSE, seed = 123) # imp2 <- mice(nhanes[14:25, ], m = 5, maxit = 25, print = FALSE, seed = 456) # z <- rbind(imp1, imp2) # plot(z) # # imp3 <- mice(nhanes, m = 5, maxit = 25, print = FALSE, seed = 123) # plot(imp3) # # An interesting observation is that the SD(hyp, a) < SD(hyp, imp3). This is # because SD(hyp, imp1) = 0. # issue 319: https://github.com/amices/mice/issues/319 # impute a subset # do not touch or impute non-selected rows # return full data as mids object # example: impute even rows only data <- nhanes odd <- as.logical((1:nrow(data)) %% 2) # method 1: ignore + where where <- make.where(data) where[odd, ] <- FALSE imp1 <- mice(nhanes, ignore = odd, where = where, seed = 1, m = 2, print = FALSE) c1 <- complete(imp1, 2) # method 2: filter + rbind imp2 <- mice(data[!odd, ], seed = 1, m = 2, print = FALSE) imp2 <- rbind(imp2, data[odd, ]) idx <- order(as.numeric(rownames(imp2$data))) imp2$data <- imp2$data[idx, ] imp2$where <- imp2$where[idx, ] c2 <- complete(imp2, 2) test_that("ignore + where is identical to filter + rbind (#319)", { expect_identical(c1, c2) }) mice/tests/testthat/test-mice.impute.durr.norm.R0000644000176200001440000000171314433156031021437 0ustar liggesuserscontext("mice.impute.lasso.norm") ######################### # TEST 1: Simple problem # ######################### set.seed(123) # generate data n <- 1e3 y <- rnorm(n) x <- y * .3 + rnorm(n, 0, .25) x2 <- x + rnorm(n, 2, 3) x <- cbind(x, x2) # make missingness y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) # Use univariate imputation model set.seed(123) imps <- mice.impute.lasso.norm(y, ry, x) test_that("Returns requested length", { expect_equal(length(imps), sum(!ry)) }) ######################### # TEST 2: Use it within mice call # ######################### boys_cont <- boys[, 1:4] durr_default <- mice(boys_cont, m = 2, maxit = 2, method = "lasso.norm", print = FALSE ) durr_custom <- mice(boys_cont, m = 2, maxit = 2, method = "lasso.norm", nfolds = 5, print = FALSE ) test_that("mice call works", { expect_equal(class(durr_custom), "mids") }) test_that("mice call works w/ custom arguments", { expect_equal(class(durr_custom), "mids") }) mice/tests/testthat/test-ampute.R0000644000176200001440000002747314335404116016602 0ustar liggesuserscontext("ampute") # make objects for testfunctions sigma <- matrix(data = c(1, 0.2, 0.2, 0.2, 1, 0.2, 0.2, 0.2, 1), nrow = 3) complete.data <- MASS::mvrnorm(n = 100, mu = c(5, 5, 5), Sigma = sigma) test_that("all examples work", { compl_boys <- cc(boys)[1:3] expect_error(ampute(data = compl_boys), NA) mads_boys <- ampute(data = compl_boys) my_patterns <- mads_boys$patterns my_patterns[1:3, 2] <- 0 my_weights <- mads_boys$weights my_weights[2, 1] <- 2 my_weights[3, 1] <- 0.5 expect_error(ampute( data = compl_boys, patterns = my_patterns, freq = c(0.3, 0.3, 0.4), weights = my_weights, type = c("RIGHT", "TAIL", "LEFT") ), NA) }) test_that("all arguments work", { set.seed(123) # empty run expect_error(ampute(data = complete.data, run = FALSE), NA) # missingness by cells expect_error(ampute(data = complete.data, prop = 0.1, bycases = FALSE), NA) # prop with 3 dec, weigths with negative values, unequal odds matrix expect_error(ampute( data = complete.data, prop = 0.314, freq = c(0.25, 0.4, 0.35), patterns = matrix( data = c( 1, 0, 1, 0, 1, 0, 0, 1, 1 ), nrow = 3, byrow = TRUE ), weights = matrix( data = c( -1, 1, 0, -4, -4, 1, 0, 0, -1 ), nrow = 3, byrow = TRUE ), odds = matrix( data = c( 1, 4, NA, NA, 0, 3, 3, NA, 4, 1, 1, 4 ), nrow = 3, byrow = TRUE ), cont = FALSE ), NA) # 1 pattern with vector for patterns and weights expect_error(ampute( data = complete.data, freq = 1, patterns = c(1, 0, 1), weights = c(3, 3, 0) ), NA) # multiple patterns given in vectors expect_error(ampute( data = complete.data, patterns = c(1, 0, 1, 1, 0, 0), cont = TRUE, weights = c(1, 4, -2, 0, 1, 2), type = c("LEFT", "TAIL") ), NA) # one pattern with odds vector expect_error(ampute( data = complete.data, patterns = c(1, 0, 1), weights = c(4, 1, 0), odds = c(2, 1), cont = FALSE ), NA) # argument standardized expect_error(ampute(data = complete.data, std = FALSE), NA) # sum scores cannot be NaN dich.data <- matrix(c( 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 ), ncol = 2, byrow = FALSE) wss <- expect_warning(ampute(data = dich.data, mech = "MNAR")$scores) check_na <- function(x) { return(any(is.na(x))) } expect_false(any(unlist(lapply(wss, check_na)))) }) test_that("function works around unusual arguments", { # data nasty.data <- complete.data nasty.data[, 1] <- rep(c("one", "two"), 50) # when data is categorical and mech != mcar, warning is expected expect_warning( ampute(data = nasty.data), "Data is made numeric because the calculation of weights requires numeric data" ) # when data is categorical and mech = mcar, function can continue expect_warning(ampute(data = nasty.data, mech = "MCAR"), NA) # patterns expect_error(ampute(data = complete.data, patterns = c(0, 0, 0), mech = "MCAR"), NA) expect_error(ampute(data = complete.data, patterns = c(0, 0, 1, 0, 0, 0), mech = "MNAR"), NA) expect_warning(ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0))) # freq expect_warning(ampute(data = complete.data, freq = c(0.8, 0.4))) # prop expect_warning(ampute(data = complete.data, prop = 1)) expect_error(ampute(data = complete.data, prop = 48.5), NA) # mech, type and weights expect_warning( ampute(data = complete.data, mech = c("MCAR", "MAR")), "Mechanism should contain merely MCAR, MAR or MNAR. First element is used" ) expect_warning( ampute(data = complete.data, type = c("LEFT", "RIGHT")), "Type should either have length 1 or length equal to #patterns, first element is used for all patterns" ) expect_warning( ampute( data = complete.data, mech = "MCAR", odds = matrix( data = c( 1, 4, NA, NA, 0, 3, 3, NA, 4, 1, 1, 4 ), nrow = 3, byrow = TRUE ), cont = FALSE ), "Odds matrix is not used when mechanism is MCAR" ) expect_warning( ampute( data = complete.data, mech = "MCAR", weights = c(1, 3, 4) ), "Weights matrix is not used when mechanism is MCAR" ) expect_warning(ampute(data = complete.data, odds = matrix( data = c( 1, 4, NA, NA, 0, 3, 3, NA, 4, 1, 1, 4 ), nrow = 3, byrow = TRUE ))) expect_warning(ampute(data = complete.data, cont = FALSE, type = "LEFT")) }) test_that("error messages work properly", { # data expect_error( ampute(data = as.list(complete.data)), "Data should be a matrix or data frame" ) nasty.data <- complete.data nasty.data[1:10, 1] <- NA expect_error(ampute(data = nasty.data), "Data cannot contain NAs") expect_error( ampute(data = as.data.frame(complete.data[, 1])), "Data should contain at least two columns" ) # prop expect_error(ampute(data = complete.data, prop = 104)) expect_error( ampute(data = complete.data, prop = 0.9, bycases = FALSE), "Proportion of missing cells is too large in combination with the desired number of missing variables" ) # patterns expect_error( ampute(data = complete.data, patterns = c(1, 1, 1)), "One pattern with merely ones results to no amputation at all, the procedure is therefore stopped" ) expect_error( ampute(data = complete.data, patterns = c(0, 0, 0), mech = "MAR"), "Patterns object contains merely zeros and this kind of pattern is not possible when mechanism is MAR" ) expect_error( ampute(data = complete.data, patterns = c(1, 0, 1, 1)), "Length of pattern vector does not match #variables" ) expect_error( ampute(data = complete.data, patterns = c(1, 0, 2)), "Argument patterns can only contain 0 and 1, pattern 1 contains another element" ) expect_error( ampute(data = complete.data, mech = "MAR", patterns = c(0, 0, 1, 0, 0, 0)), "Patterns object contains merely zeros and this kind of pattern is not possible when mechanism is MAR" ) # mech, type, weights and odds expect_error( ampute(data = complete.data, mech = "MAAR"), "Mechanism should be either MCAR, MAR or MNAR" ) expect_error( ampute(data = complete.data, type = "MARLEFT"), "Type should contain LEFT, MID, TAIL or RIGHT" ) expect_error( ampute(data = complete.data, weights = c(1, 2, 1, 4)), "Length of weight vector does not match #variables" ) expect_error(ampute( data = complete.data, odds = matrix(c(1, 4, -3, 2, 1, 1), nrow = 3), cont = FALSE ), "Odds matrix can only have positive values") expect_error( ampute( data = complete.data, patterns = matrix( data = c( 1, 0, 1, 0, 1, 0, 0, 1, 1 ), nrow = 3, byrow = TRUE ), weights = matrix( data = c( -1, 1, 0, -4, -4, 1, 0, 0, -1, 1, 1, 0 ), nrow = 4, byrow = TRUE ) ), "The objects patterns and weights are not matching" ) expect_error( ampute( data = complete.data, patterns = matrix( data = c( 1, 0, 1, 0, 1, 0, 0, 1, 1 ), nrow = 3, byrow = TRUE ), odds = matrix( data = c( 1, 4, NA, NA, 0, 3, 3, 0 ), nrow = 2, byrow = TRUE ), cont = FALSE ), "The objects patterns and odds are not matching" ) }) # The following tests were created to evaluate the patterns and weights matrices in case of a pattern with only 1's (#449) test_that("patterns and weights matrices have right dimensions", { suppressWarnings( expect_true(all( ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0))$patterns == c(0, 1, 0) )) ) suppressWarnings( expect_true(all( ampute(data = complete.data, patterns = c(0, 1, 0, 1, 1, 1))$patterns == c(0, 1, 0) )) ) suppressWarnings( expect_true(all( ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0, 1, 1, 1))$patterns == c(0, 1, 0) )) ) suppressWarnings( expect_true(all( ampute( data = complete.data, patterns = c(1, 1, 1, 0, 1, 0), weights = c(1, 0, 0, 0, 1, 0) )$weights == c(0, 1, 0) )) ) suppressWarnings( expect_true(all( ampute( data = complete.data, patterns = c(0, 1, 0, 1, 1, 1), weights = c(1, 0, 0, 0, 1, 0) )$weights == c(1, 0, 0) )) ) suppressWarnings( expect_true(all( ampute( data = complete.data, patterns = c(0, 1, 0, 1, 1, 1), weights = c(1, 0, 0) )$weights == c(1, 0, 0) )) ) suppressWarnings( expect_true(all( ampute( data = complete.data, patterns = c(1, 1, 1, 0, 1, 0, 1, 1, 1), weights = c(1, 0, 0) )$weights == c(1, 0, 0) )) ) suppressWarnings( expect_true(all( ampute( data = complete.data, patterns = c(1, 1, 1, 0, 1, 0, 1, 1, 1), weights = c(1, 0, 0, 0, 1, 0, 0, 0, 1) )$weights == c(0, 1, 0) )) ) }) test_that("prop and freq are properly adjusted when patterns contain only 1's", { suppressWarnings( expect_equal(ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0))$prop, 0.25) ) suppressWarnings( expect_equal(ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0))$freq, 1) ) suppressWarnings( expect_equal( ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0, 0, 1, 0))$prop, 1 / 3 ) ) suppressWarnings( expect_true(all( ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0, 0, 1, 0))$freq == c(0.5, 0.5) )) ) suppressWarnings( expect_equal( ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0, 1, 1, 1))$prop, 1 / 3 * 0.5 ) ) suppressWarnings( expect_equal( ampute(data = complete.data, patterns = c(1, 1, 1, 0, 1, 0, 1, 1, 1))$freq, 1 ) ) }) # The following test was created to evaluate warnings when not all patterns can be generated (#317) test_that("warnings appear when not all patterns can be generated", { set.seed(12032021) binary.data <- lapply( runif(10, 0.05, 0.15), function(p, n) rbinom(n, 1, p), n = 10 ) %>% do.call(what = "data.frame") %>% rlang::set_names(paste0("type", LETTERS[1:ncol(.)])) expect_warning( ampute( data = binary.data ) ) df <- matrix(c(runif(1000, 0.5, 1), rep(0, 1000)), nrow = 1000, byrow = FALSE) expect_warning( ampute(df, pattern = c(0, 1)), "The weighted sum scores of all candidates in pattern 1 are the same, they will be amputed with probability 0.5" ) }) # The following test was contributed by Shangzhi-hong (#216) Dec 2019 context("ampute robust version") set.seed(1) # Set-up # Dataset NUM_OBS_DF <- 25 NUM_VAR_DF <- 10 data <- replicate( n = NUM_VAR_DF, expr = { rnorm(n = NUM_OBS_DF, mean = 1, sd = 1) }, simplify = "matrix" ) # Ampute pattern covNum <- NUM_VAR_DF - 1 misPatCov1 <- t(combn( x = covNum, m = 1, FUN = function(x) replace(rep(1, covNum), x, 0) )) misPat1 <- cbind(rep(1, choose(covNum, 1)), misPatCov1) misPatCov2 <- t(combn( x = covNum, m = 2, FUN = function(x) replace(rep(1, covNum), x, 0) )) misPat2 <- cbind(rep(1, choose(covNum, 2)), misPatCov2) patterns <- rbind(misPat1, misPat2) weights <- matrix(0, nrow = nrow(patterns), ncol = ncol(patterns)) weights[, 1] <- 1 prop <- 0.5 mech <- "MAR" type <- "RIGHT" bycases <- TRUE # Other params # freq <- NULL # std <- TRUE # cont <- TRUE # type <- NULL # odds <- NULL # run <- TRUE test_that("ampute() works under extreme condition", { expect_warning( ampDf <- ampute( data = data, prop = prop, mech = mech, type = type, bycases = bycases, patterns = patterns, weights = weights )$amp ) outProp <- sum(complete.cases(ampDf)) / NUM_OBS_DF expect_true(outProp > 0.3 & outProp < 0.7) }) # --- end test Shangzhi-hong (#216) Dec 2019 mice/tests/testthat/test-as.mids.R0000644000176200001440000000404614330031606016627 0ustar liggesuserscontext("as.mids") nhanes3 <- nhanes rownames(nhanes3) <- LETTERS[1:nrow(nhanes3)] imp <- mice(nhanes3, m = 2, maxit = 1, print = FALSE) X <- complete(imp, action = "long", include = TRUE) # create dataset with .imp variable as numeric X2 <- X # nhanes example test1 <- as.mids(X) # nhanes example test2 <- as.mids(X2) # nhanes example, where we explicitly specify .id as column 2 test3 <- as.mids(X, .id = 2) # nhanes example with .id where .imp is numeric test4 <- as.mids(X2, .id = 2) #' # example without an .id variable #' # variable .id not preserved test5 <- as.mids(X[, -2]) #' # reverse data order rev <- ncol(X):1 test6 <- as.mids(X[, rev]) # as() syntax has fewer options test7 <- as(X, "mids") test8 <- as(X2, "mids") test9 <- as(X2[, -2], "mids") test10 <- as(X[, rev], "mids") test_that("as.mids() produces a `mids` object", { expect_is(test1, "mids") expect_is(test2, "mids") expect_is(test3, "mids") expect_is(test4, "mids") expect_is(test5, "mids") expect_is(test7, "mids") expect_is(test8, "mids") expect_is(test9, "mids") expect_is(test10, "mids") expect_error( as(X[-(1:10), ], "mids"), "Unequal group sizes in imputation index `.imp`" ) expect_error( as(X[, -(1:2)], "mids"), "Imputation index `.imp` not found" ) }) test_that("complete() reproduces the original data", { expect_true(all(complete(test1, action = "long", include = TRUE) == X, na.rm = TRUE)) expect_true(all(complete(test2, action = "long", include = TRUE) == X, na.rm = TRUE)) expect_true(all(complete(test3, action = "long", include = TRUE) == X, na.rm = TRUE)) expect_true(all(complete(test4, action = "long", include = TRUE) == X, na.rm = TRUE)) expect_true(all(complete(test5, action = "long", include = TRUE)[, -2] == X[, -2], na.rm = TRUE)) expect_true(all(complete(test6, action = "long", include = TRUE)[, -(1:2)] == X[, rev][, -(5:6)], na.rm = TRUE)) }) # works with dplyr library(dplyr) X3 <- X %>% group_by(hyp) %>% mutate(chlm = mean(chl, na.rm = TRUE)) test_that("handles grouped_df", { expect_silent(as.mids(X3)) }) mice/tests/testthat/test-filter.R0000644000176200001440000000176414330031606016562 0ustar liggesusers context("filter.mids") imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE, seed = 1) imp_l <- imp class(imp_l) <- "list" test_that("throws error", { # outcommented first two tests because they also throw a # deprecated filter_ warning # expect_error(filter("mids")) # expect_error(filter(imp_l)) expect_error(filter(imp, "TRUE")) expect_error(filter(imp, rep(1, nrow(nhanes)))) }) imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) test_that("filtered mids is subset", { expect_equal(complete(imp_f), complete(imp)[1:13, ]) expect_equal(imp_f$ignore, imp$ignore[1:13]) expect_equal(imp_f$where, imp$where[1:13, ]) expect_equal(imp_f$nmis, colSums(is.na(imp$data[1:13, ]))) expect_null(imp_f$chainMean) }) imp_fa <- filter(imp, rep(TRUE, nrow(nhanes))) imp2 <- mice.mids(imp, maxit = 1, printFlag = FALSE) imp_fa2 <- mice.mids(imp_fa, maxit = 1, printFlag = FALSE) test_that("other elements of mids are left unchanged", { expect_equal(complete(imp2), complete(imp_fa2)) }) mice/tests/testthat/test-mice.impute.jomoImpute.R0000644000176200001440000000134414330031606021635 0ustar liggesuserscontext("mice.impute.jomoImpute") data <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ] type <- c(2, 0, 0, 0, -2, 0, 1, 1, 0) names(type) <- names(data) z1 <- mice.impute.jomoImpute(data = data, type = type, format = "native") test_that("jomoImpute returns native class", { expect_is(z1, "mitml") }) blocks <- make.blocks(list(c("bmi", "chl", "hyp"), "age")) method <- c("jomoImpute", "pmm") pred <- make.predictorMatrix(nhanes, blocks) pred["B1", "hyp"] <- -2 # imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, # maxit = 1, seed = 1, print = FALSE) # z <- complete(imp) # # test_that("mice can call jomoImpute", { # expect_equal(sum(is.na(z$bmi)), 0) # expect_equal(sum(is.na(z$chl)), 0) # }) mice/tests/testthat/test-mice.impute.mpmm.R0000644000176200001440000000106414347334322020463 0ustar liggesuserscontext("mice.impute.mpmm") set.seed(1) beta2 <- beta1 <- .5 x <- rnorm(1000) e <- rnorm(1000, 0, 1) y <- beta1 * x + beta2 * x^2 + e # dat <- data.frame(x = x, x2 = x^2, y = y) # worked dat <- data.frame(y = y, x = x, x2 = x^2) # did not work m <- as.logical(rbinom(1000, 1, 0.25)) dat[m, c("x", "x2")] <- NA blk <- list("y", c("x", "x2")) meth <- c("", "mpmm") imp <- mice(dat, blocks = blk, method = meth, print = FALSE, m = 1, maxit = 1) test_that("mpmm() works for any column order in data", { expect_identical(complete(imp)$x^2, complete(imp)$x2) }) mice/tests/testthat/test-mice.impute.pmm.R0000644000176200001440000000205314334523631020304 0ustar liggesuserscontext("mice.impute.pmm") xname <- c("age", "hgt", "wgt") br <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ] r <- stats::complete.cases(br[, xname]) x <- br[r, xname] y <- br[r, "tv"] ry <- !is.na(y) wy1 <- !ry wy2 <- rep(TRUE, length(y)) wy3 <- rep(FALSE, length(y)) wy4 <- rep(c(TRUE, FALSE), times = c(1, length(y) - 1)) test_that("Returns requested length", { expect_equal(length(mice.impute.pmm(y, ry, x)), sum(!ry)) expect_equal(length(mice.impute.pmm(y, ry, x, wy = wy1)), sum(wy1)) expect_equal(length(mice.impute.pmm(y, ry, x, wy = wy2)), sum(wy2)) expect_equal(length(mice.impute.pmm(y, ry, x, wy = wy3)), sum(wy3)) expect_equal(length(mice.impute.pmm(y, ry, x, wy = wy4)), sum(wy4)) }) test_that("Excludes donors", { expect_false(all(c(15:25) %in% mice.impute.pmm(y, ry, x, exclude = c(15:25)))) }) imp1 <- mice(nhanes, printFlag = FALSE, seed = 123) imp2 <- mice(nhanes, printFlag = FALSE, seed = 123, exclude = c(-1, 1032)) test_that("excluding unobserved values does not impact pmm", { expect_identical(imp1$imp, imp2$imp) }) mice/tests/testthat/test-mice.impute.2lonly.mean.R0000644000176200001440000000343214330031606021643 0ustar liggesuserscontext("mice.impute.2lonly.mean") set.seed(66322) y <- popmis$texp y[rbinom(length(y), size = 1, prob = 0.5) == 1] <- NA x <- popmis[, c("pupil", "school", "sex")] ry <- !is.na(y) wy1 <- !ry wy2 <- rep(TRUE, length(y)) wy3 <- rep(FALSE, length(y)) wy4 <- rep(c(TRUE, FALSE), times = c(1, length(y) - 1)) type <- c(1, -2, 1) yn <- y test_that("Returns requested length, for numeric", { expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy1)), sum(wy1)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy2)), sum(wy2)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy3)), sum(wy3)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy4)), sum(wy4)) }) # test extension to factors set.seed(66322) y <- popmis$texp y <- cut(y, breaks = c(0, 5, 10, 20, 30)) y[rbinom(length(y), size = 1, prob = 0.5) == 1] <- NA test_that("Returns requested length, for factor", { expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy1)), sum(wy1)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy2)), sum(wy2)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy3)), sum(wy3)) expect_equal(length(mice.impute.2lonly.mean(y, ry, x, type, wy4)), sum(wy4)) }) # check whether imputes for numeric and factor are identical # tn <- mice.impute.2lonly.mean(yn, ry, x, type, wy1) # tf <- mice.impute.2lonly.mean(y, ry, x, type, wy1) # check what happens if all values within a class are missing yn[1:100] <- NA imn <- mice.impute.2lonly.mean(yn, ry, x, type, wy1) zn <- table(imn, useNA = "al") y[1:100] <- NA imf <- mice.impute.2lonly.mean(y, ry, x, type, wy1) zf <- table(imf, useNA = "al") test_that("Return NA for classes without values", { expect_equal(as.numeric(zn[length(zn)]), 39) expect_equal(as.numeric(zf[length(zf)]), 39) }) mice/tests/testthat/test-mice.impute.durr.logreg.R0000644000176200001440000000500714433155521021746 0ustar liggesuserscontext("mice.impute.lasso.norm") ######################### # TEST 1: Simple problem # ######################### set.seed(123) # generate data n <- 1e3 y <- rnorm(n) x <- y * .3 + rnorm(n, 0, .25) x2 <- x + rnorm(n, 2, 3) x <- cbind(x, x2) y <- as.numeric(cut(y, 2)) - 1 # make missingness y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Use univariate imputation model set.seed(123) imps <- mice.impute.lasso.logreg(y, ry, x) test_that("Returns a matrix of dimensionality sum(wy) x 1", { expect_true(is.matrix(imps)) expect_equal(dim(imps), c(sum(wy), 1)) }) ######################### # TEST 2: Use it within mice call # ######################### # Generate some dichotomous data n <- 1e2 p <- 4 Sigma <- matrix(.7, nrow = p, ncol = p) diag(Sigma) <- 1 X <- as.data.frame(MASS::mvrnorm(n, rep(0, p), Sigma)) # Discretize and impose miss for (j in 1:2) { X[, j] <- cut(X[, j], 2) # Make it discrete X[sample(1:n, n * .3), j] <- NA # Impose missings } # Imputations durr_default <- mice(X, m = 2, maxit = 2, method = "lasso.logreg", print = FALSE ) durr_custom <- mice(X, m = 2, maxit = 2, method = "lasso.logreg", nfolds = 5, print = FALSE ) logreg_default <- mice(X, m = 2, maxit = 2, method = "logreg", print = FALSE ) # Tests test_that("mice call works", { expect_equal(class(durr_custom), "mids") }) test_that("mice call works w/ custom arguments", { expect_equal(class(durr_custom), "mids") }) test_that("same class as logreg default method", { expect_equal( class(complete(logreg_default)[, 1]), class(complete(durr_default)[, 1]) ) }) ######################### # TEST 3: Perfect Prediction / Complete Separation # ######################### set.seed(123) # Generate some dichotomous data n <- 1e2 p <- 4 Sigma <- matrix(.7, nrow = p, ncol = p) diag(Sigma) <- 1 x <- MASS::mvrnorm(n, rep(0, p), Sigma) # Create Perfect Predictor y <- factor(x[, 1] < 0, labels = c("y", "n")) # Missing values y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Imputation well behaved wellBehaved <- tryCatch( expr = { mice.impute.lasso.logreg(y = y, ry = ry, x = x[, -1]) }, error = function(e) { e }, warning = function(w) { w } ) # Imputation perfect prediction perfectPred <- tryCatch( expr = { mice.impute.lasso.logreg(y = y, ry = ry, x = x) }, error = function(e) { e }, warning = function(w) { w } ) # Test test_that("Complete separation results in same class as well behaved case", { expect_true(all.equal(class(wellBehaved), class(perfectPred))) }) mice/tests/testthat/test-mice-initialize.R0000644000176200001440000002042114430511431020340 0ustar liggesuserscontext("mice-initialize") data <- nhanes # case A: no predictorMatrix, blocks or formulas arguments imp1 <- mice(data, print = FALSE, m = 1, maxit = 1) pred <- imp1$predictorMatrix form <- imp1$formulas test_that("Case A finds blocks", { expect_identical(names(imp1$blocks), colnames(data)) }) test_that("Case A finds formulas", { expect_identical( attr(terms(form[["bmi"]]), "term.labels"), names(pred["bmi", ])[pred["bmi", ] == 1] ) }) # case B: only predictorMatrix argument pred1 <- matrix(1, nrow = 4, ncol = 4) pred2 <- matrix(1, nrow = 2, ncol = 2) pred3 <- matrix(1, nrow = 2, ncol = 2, dimnames = list(c("bmi", "hyp"), c("bmi", "hyp")) ) pred4 <- matrix(1, nrow = 2, ncol = 3, dimnames = list(c("bmi", "hyp"), c("bmi", "hyp", "chl")) ) imp1 <- mice(data, predictorMatrix = pred1, print = FALSE, m = 1, maxit = 1) imp3 <- mice(data, predictorMatrix = pred3, print = FALSE, m = 1, maxit = 1) test_that("Case B tests the predictorMatrix", { expect_equal(nrow(imp1$predictorMatrix), 4L) expect_error(mice(data, predictorMatrix = pred2, "Missing row/column names in `predictorMatrix`." )) expect_equal(nrow(imp3$predictorMatrix), 2L) expect_error(mice(data, predictorMatrix = pred4)) }) pred <- imp3$predictorMatrix blocks <- imp3$blocks test_that("Case B finds blocks", { expect_identical(names(blocks), c("bmi", "hyp")) }) form <- imp3$formulas test_that("Case B finds formulas", { expect_identical( attr(terms(form[["bmi"]]), "term.labels"), names(pred["bmi", ])[pred["bmi", ] == 1] ) }) # Case C: Only blocks argument imp1.0 <- mice(data, blocks = list("bmi", "chl", "hyp"), m = 1, maxit = 0, seed = 11) imp2.0 <- mice(data, blocks = list(c("bmi", "chl"), "hyp"), m = 1, maxit = 0, seed = 11) imp3.0 <- mice(data, blocks = list(all = c("bmi", "chl", "hyp")), m = 1, maxit = 0, seed = 11) test_that("Case C imputations are identical after initialization", { expect_identical(complete(imp1.0), complete(imp2.0)) expect_identical(complete(imp1.0), complete(imp3.0)) }) imp1 <- mice(data, blocks = list("bmi", "chl", "hyp"), print = FALSE, m = 1, maxit = 1, seed = 11) imp2 <- mice(data, blocks = list(c("bmi", "chl"), "hyp"), print = FALSE, m = 1, maxit = 1, seed = 11) imp3 <- mice(data, blocks = list(all = c("bmi", "chl", "hyp")), print = FALSE, m = 1, maxit = 1, seed = 11) test_that("Case C finds blocks", { expect_identical(names(imp2$blocks), c("B1", "hyp")) expect_identical(names(imp3$blocks), c("all")) }) test_that("Case C finds predictorMatrix", { expect_identical(imp2$predictorMatrix["hyp", "hyp"], 0) expect_identical(dim(imp3$predictorMatrix), c(1L, 4L)) }) test_that("Case C finds formulas", { expect_identical(sort(all.vars(imp2$formulas[["B1"]])), sort(colnames(data))) }) test_that("Case C yields same imputations for FCS and multivariate", { expect_identical(complete(imp1), complete(imp2)) expect_identical(complete(imp1), complete(imp3)) }) # Case D: Only formulas argument # univariate models form1 <- list( bmi ~ age + hyp + chl, hyp ~ age + bmi + chl, chl ~ age + bmi + hyp ) imp1 <- mice(data, formulas = form1, method = "norm.nob", print = FALSE, m = 1, maxit = 1, seed = 12199 ) # same model using dot notation form2 <- list(bmi ~ ., hyp ~ ., chl ~ .) imp2 <- mice(data, formulas = form2, method = "norm.nob", print = FALSE, m = 1, maxit = 1, seed = 12199 ) # multivariate models (= repeated univariate) form3 <- list( bmi + hyp ~ age + chl, chl ~ age + bmi + hyp ) imp3 <- mice(data, formulas = form3, method = "norm.nob", print = FALSE, m = 1, maxit = 1, seed = 12199 ) # same model using dot notation form4 <- list(bmi + hyp ~ ., chl ~ .) imp4 <- mice(data, formulas = form4, method = "norm.nob", print = FALSE, m = 1, maxit = 1, seed = 12199 ) test_that("Case D yields same imputations for dot notation", { expect_identical(complete(imp1), complete(imp2)) expect_identical(complete(imp3), complete(imp4)) }) test_that("Case D yields same imputations for FCS and multivariate", { expect_equal(complete(imp1), complete(imp3)) expect_equal(complete(imp2), complete(imp4)) }) # Case E: predictMatrix and blocks blocks1 <- make.blocks(c("bmi", "chl", "hyp", "age")) blocks2 <- make.blocks(list(c("bmi", "chl"), "hyp")) blocks3 <- make.blocks(list(all = c("bmi", "chl", "hyp"))) pred1 <- make.predictorMatrix(data, blocks = blocks1) pred2 <- make.predictorMatrix(data, blocks = blocks2) pred3 <- make.predictorMatrix(data, blocks = blocks3) imp1 <- mice(data, blocks = blocks1, pred = pred1, m = 1, maxit = 1, print = FALSE) imp1a <- mice(data, blocks = blocks1, pred = matrix(1, nr = 4, nc = 4), m = 1, maxit = 1, print = FALSE) imp2 <- mice(data, blocks = blocks2, pred = pred2, m = 1, maxit = 1, print = FALSE) imp2a <- mice(data, blocks = blocks2, pred = matrix(1, nr = 2, nc = 4), m = 1, maxit = 1, print = FALSE) imp3 <- mice(data, blocks = blocks3, pred = pred3, m = 1, maxit = 1, print = FALSE) imp3a <- mice(data, blocks = blocks3, pred = matrix(1, nr = 1, nc = 4), m = 1, maxit = 1, print = FALSE) test_that("Case E borrows rownames from blocks", { expect_identical(rownames(imp1a$predictorMatrix), names(blocks1)) expect_identical(rownames(imp2a$predictorMatrix), names(blocks2)) expect_identical(rownames(imp3a$predictorMatrix), names(blocks3)) }) test_that("Case E borrows colnames from data", { expect_identical(colnames(imp1a$predictorMatrix), names(data)) expect_identical(colnames(imp2a$predictorMatrix), names(data)) expect_identical(colnames(imp3a$predictorMatrix), names(data)) }) test_that("Case E name setting fails on incompatible sizes", { expect_error( mice(data, blocks = blocks2, pred = matrix(1, nr = 2, nc = 2)), "Unable to set column names of predictorMatrix" ) expect_error( mice(data, blocks = blocks2, pred = matrix(1, nr = 1, nc = 4)), "Unable to set row names of predictorMatrix" ) expect_error(mice(data, blocks = blocks2, pred = matrix(1, nr = 4, nc = 4))) }) colnames(pred1) <- c("A", "B", "chl", "bmi") pred2a <- pred2[, -(1:4), drop = FALSE] test_that("Case E detects incompatible arguments", { expect_error( mice(data, blocks = blocks1, pred = pred1), "Names not found in data: A, B" ) expect_error( mice(data, blocks = blocks1, pred = pred2), "Names not found in blocks: B1" ) expect_error( mice(data, blocks = blocks2, pred = matrix(1, nr = 1, nc = 4)), "Unable to set row names of predictorMatrix" ) expect_error(mice(data, blocks = blocks2, pred = matrix(1, nr = 4, nc = 4))) expect_error( mice(data, blocks = blocks2, pred = pred2a), "predictorMatrix has no rows or columns" ) }) # Case F: predictMatrix and formulas blocks1 <- make.blocks(c("bmi", "chl", "hyp", "age")) blocks2 <- make.blocks(list(c("bmi", "hyp"), "hyp")) pred1 <- make.predictorMatrix(data, blocks = blocks1) pred2 <- make.predictorMatrix(data, blocks = blocks2) form1 <- list( bmi ~ age + hyp + chl, hyp ~ age + bmi + chl, chl ~ age + bmi + hyp ) form2 <- list(bmi ~ ., hyp ~ ., chl ~ .) form3 <- list( bmi + hyp ~ age + chl, chl ~ age + bmi + hyp ) form4 <- list(bmi + hyp ~ ., chl ~ .) # blocks1 and form1 are compatible imp1 <- mice(data, formulas = form1, pred = matrix(1, nr = 4, nc = 4), m = 1, maxit = 1, print = FALSE, seed = 3) test_that("Case F combines forms and pred in blocks", { expect_identical(unname(attr(imp1$blocks, "calltype")), c(rep("formula", 3), "type")) }) # dots and unnamed predictorMatrix imp2 <- mice(data, formulas = form2, pred = matrix(1, nr = 4, nc = 4), m = 1, maxit = 1, print = FALSE, seed = 3) test_that("Case F dots and specified form produce same imputes", { expect_identical(complete(imp1), complete(imp2)) }) # error test_that("Case F generates error if it cannot handle non-square predictor", { expect_error( mice(data, formulas = form2, pred = pred2), "If no blocks are specified, predictorMatrix must have same number of rows and columns" ) }) ## Error in formulas[[h]] : subscript out of bounds imp3 <- mice(data, formulas = form3, pred = pred1, m = 1, maxit = 0, print = FALSE, seed = 3) imp3a <- mice(data, formulas = form3, pred = pred1, m = 1, maxit = 1, print = FALSE, seed = 3) # err on matrix columns nh <- nhanes nh$hyp <- as.matrix(nh$hyp) test_that("MICE does not accept data.frames with embedded matrix ", { expect_error( mice(nh), "Cannot handle columns with class matrix: hyp" ) }) mice/tests/testthat/test-check.visitSequence.R0000644000176200001440000000043613666252075021213 0ustar liggesuserscontext("check.visitSequence") data <- nhanes test_that("mice() takes numerical and character visitSequence", { expect_silent(imp <- mice(data, visitSequence = 4:1, m = 1, print = FALSE)) expect_silent(imp <- mice(data, visitSequence = rev(names(data)), m = 1, print = FALSE)) }) mice/tests/testthat/test-md.pattern.R0000644000176200001440000000115114330031606017337 0ustar liggesuserscontext("md.pattern") test_that("patterns run as expected", { # boys expect_silent(md.pattern(boys, plot = FALSE)) # nhanes expect_silent(md.pattern(nhanes, plot = FALSE)) # one whole column missing (single pattern) - should not produce output expect_silent(md.pattern(cbind(na.omit(nhanes), NA), plot = FALSE)) # no missings (no pattern) - should produce output expect_output(md.pattern(na.omit(nhanes), plot = FALSE)) # feed single column - expect error expect_error(md.pattern(nhanes$bmi)) # feed other than dataframe/matrix - expect error expect_error(md.pattern(as.list(nhanes))) # }) mice/tests/testthat/test-D1.R0000644000176200001440000000317314330031606015535 0ustar liggesuserscontext("D1") imp <- mice(nhanes2, print = FALSE, m = 10, seed = 219) fit1 <- with(data = imp, expr = glm(hyp == "yes" ~ age + chl, family = binomial)) fit0 <- with(data = imp, expr = glm(hyp == "yes" ~ 1, family = binomial)) empty <- with(data = imp, expr = glm(hyp == "yes" ~ 0, family = binomial)) # stat1 <- pool.compare(fit1, fit0, method = "wald") # deprecated because it relies on full vcov, which is not present anymore # in the mipo object # the next tests were remove because they failed on many # systems, not yet clear what the cause is (#128) # This is solved in #132 # three new ways to compare fit1 to the intercept-only model z1 <- D1(fit1, fit0) z2 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), df.com = 21) z3 <- D1(fit1) test_that("compares fit1 to the intercept-only model", { expect_identical(z1$result, z2$test) expect_identical(z1$test, z3$test) }) # two ways to compare fit1 to the empty model z4 <- D1(fit1, empty) z5 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(empty), df.com = 21) test_that("compares fit1 to empty model", { expect_identical(z4$result, z5$test) }) context("D2") z1 <- D2(fit1, fit0) z2 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), method = "D2") z3 <- D2(fit1) test_that("compares fit1 to the intercept-only model", { expect_identical(z1$result, z2$test) expect_identical(z1$test, z3$test) }) # two ways to compare fit1 to the empty model z4 <- D2(fit1, empty) z5 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(empty), method = "D2") test_that("compares fit1 to empty model", { expect_identical(z4$result, z5$test) }) mice/tests/testthat/test-loggedEvents.R0000644000176200001440000000056214330031606017716 0ustar liggesuserscontext("mice: loggedEvents") imp1 <- mice(nhanes, m = 2, print = FALSE) # copy of data, different names data2 <- cbind(nhanes, nhanes) colnames(data2)[5:8] <- c("age2", "bmi2", "hyp2", "chl2") imp2 <- suppressWarnings(mice(data2, m = 2, print = FALSE)) test_that("loggedEvents is NULL", { expect_null(imp1$loggedEvents) expect_type(imp2$loggedEvents, "list") }) mice/tests/testthat/test-blots.R0000644000176200001440000000114514330031606016411 0ustar liggesuserscontext("blots") # global change of donors argument blocks1 <- name.blocks(list(c("bmi", "chl"), "hyp")) imp0 <- mice(nhanes, blocks = blocks1, donors = 10, m = 1, maxit = 1, print = FALSE) # vary donors, depending on block blots1 <- list(B1 = list(donors = 10), hyp = list(donors = 1)) imp1 <- mice(nhanes, blocks = blocks1, blots = blots1, m = 1, maxit = 1, print = FALSE) test_that("errors when mixing same global and local argument", { expect_error( mice(nhanes, blocks = blocks1, blots = blots1, donors = 7, print = FALSE), 'formal argument "donors" matched by multiple actual arguments' ) }) mice/tests/testthat/test-anova.R0000644000176200001440000000226014330031606016371 0ustar liggesuserscontext("anova") imp <- mice(nhanes2, m = 10, print = FALSE, seed = 71242) m2 <- with(imp, lm(chl ~ age + bmi)) m1 <- with(imp, lm(chl ~ bmi)) m0 <- with(imp, lm(chl ~ 1)) # anova methods test_that("anova.mira() produces silent D1 and D3", { expect_silent(z1 <- anova(m2, m1, m0)) expect_silent(z3 <- anova(m2, m1, m0, method = "D3")) }) test_that("anova.mira() produces silent with D2", { expect_silent(z2a <- anova(m2, m1, m0, method = "D2")) expect_silent(z2b <- anova(m2, m1, m0, method = "D2", use = "likelihood")) }) context("Cox model tests") library(survival) set.seed(1) data <- survival::lung data$age[rbinom(nrow(data), size = 1, prob = 0.2) == 1] <- NA data$sex[rbinom(nrow(data), size = 1, prob = 0.2) == 1] <- NA data$ph.ecog[rbinom(nrow(data), size = 1, prob = 0.2) == 1] <- NA imp <- mice(data, print = FALSE) m1 <- with(imp, coxph(Surv(time, status) ~ age)) m2 <- with(imp, coxph(Surv(time, status) ~ age + sex)) m3 <- with(imp, coxph(Surv(time, status) ~ age + sex + ph.ecog)) test_that("runs tests for the Cox model", { expect_silent(pool(m1)) expect_silent(D1(m2, m1)) expect_silent(D2(m2, m1)) expect_error(D3(m2, m1)) expect_silent(anova(m3, m2, m1)) }) mice/tests/testthat/test-check.formula.R0000644000176200001440000000453414330031606020014 0ustar liggesuserscontext("check.formulas") data <- nhanes where <- is.na(data) # blocks <- name.blocks(list("bmi", "age", "chl")) # ini <- mice(data, blocks = blocks, maxit = 0) # # # classic type specification # setup <- list(blocks = blocks, # predictorMatrix = ini$predictorMatrix, # formulas = NULL) # # v1 <- mice:::check.formulas(setup, data) # # # using a formula # #formulas <- v1$formulas # setup <- list(blocks = blocks, # predictorMatrix = ini$predictorMatrix, # formulas = formulas) # #v2 <- mice:::check.formulas(setup, data) # #v2$formulas # # test_that("updates `mode.formula` attribute", { # # expect_false(identical(v2$formulas, v2$formulas.arg)) # # expect_identical(v2$formulas[[1]], v2$formulas.arg[[1]]) # }) # # # try dot in formula # formulas <- list(bmi ~ ., age ~ ., chl ~ .) # formulas <- name.formulas(formulas) # setup <- list(blocks = blocks, # predictorMatrix = ini$predictorMatrix, # formulas = formulas) # #v3 <- mice:::check.formulas(setup, data) # #v3$formulas # # # classic specification using predictorMatrix # imp1 <- mice(nhanes, seed = 51212, print = FALSE, m = 1) # cmp1 <- complete(imp1) # # # formula specification # form <- list(age ~ ., bmi ~ ., hyp ~., chl ~ .) # imp2 <- mice(nhanes, formulas = form, seed = 51212, print = FALSE, m = 1) # cmp2 <- complete(imp2) # # test_that("predictorMatrix and formula yield same imputations", { # expect_identical(cmp1, cmp2) # expect_identical(imp1$imp, imp2$imp) # }) # formula specification form <- name.blocks(list(bmi ~ ., hyp ~ ., chl ~ .)) imp3 <- mice(nhanes, formulas = form, seed = 51212, print = FALSE, m = 1) cmp3 <- complete(imp3) # old.form <- c("", "bmi ~ chl + hyp", "hyp ~ bmi + chl", "chl ~ bmi + hyp") # imp <- mice(nhanes, formula = old.form, m = 1, maxit = 2, print = FALSE) # # form1 <- list(bmi = ~ 1, chl = ~ 1, hyp = ~ 1) # # impute given predictors # imp1 <- mice(nhanes, formula = form1, m = 1, maxit = 2, method = "norm.predict", # print = FALSE, seed = 1) # # impute the mean # imp2 <- mice(nhanes, formula = form1, m = 1, maxit = 2, method = "norm.predict", # print = FALSE, seed = 1) # # form2 <- list(bmi = "hyp ~ 1", chl = "hyp ~ 1", hyp = "hyp ~ 1") # imp3 <- mice(nhanes, formula = form2, m = 1, maxit = 2, method = "norm.predict", # print = FALSE, seed = 1) # mice/tests/testthat/test-remove.lindep.R0000644000176200001440000000331413666252075020055 0ustar liggesuserscontext("remove.lindep") set.seed(1) td <- matrix(rnorm(20), nrow = 5, ncol = 4) dimnames(td) <- list(1:5, LETTERS[1:4]) y <- td[, 1] ry <- rep(TRUE, 5) # data frame for storing the event log state <- list(it = 0, im = 0, dep = "y", meth = "test", log = FALSE) loggedEvents <- data.frame(it = 0, im = 0, dep = "", meth = "", out = "") fr <- 2 state$meth <- "k1" x <- td[, 2:4] k1 <- mice:::remove.lindep(x, y, ry, frame = fr) state$meth <- "k2" x[, 2] <- x[, 1] k2 <- mice:::remove.lindep(x, y, ry, frame = fr) state$meth <- "k3" x[, 3] <- 2 * x[, 1] k3 <- mice:::remove.lindep(x, y, ry, frame = fr) state$meth <- "k4" x <- td[, 2:4] y <- x[, 2] k4 <- mice:::remove.lindep(x, y, ry, frame = fr) state$meth <- "k5" x <- td[, 2:4] y <- x[, 2] x[, 3] <- x[, 1] <- x[, 2] k5 <- mice:::remove.lindep(x, y, ry, frame = fr) # one column x, same as y --> FALSE state$meth <- "k6" x <- td[, 2, drop = FALSE] y <- x[, 1] k6 <- mice:::remove.lindep(x, y, ry, frame = fr) # one column x, different from u --> TRUE state$meth <- "k7" x <- td[, 2, drop = FALSE] y <- td[, 1] k7 <- mice:::remove.lindep(x, y, ry, frame = fr) # two columns, same x and y --> FALSE, FALSE state$meth <- "k8" x <- td[, 2:3, drop = FALSE] x[, 2] <- x[, 1] y <- x[, 1] k8 <- mice:::remove.lindep(x, y, ry, frame = fr) loggedEvents test_that("removes copies", { expect_identical(unname(k1), c(TRUE, TRUE, TRUE)) expect_identical(unname(k2), c(FALSE, TRUE, TRUE)) # expect_identical(unname(k3), c(FALSE, FALSE, TRUE)) expect_identical(unname(k4), c(TRUE, FALSE, TRUE)) expect_identical(unname(k5), c(FALSE, FALSE, FALSE)) expect_identical(unname(k6), c(FALSE)) expect_identical(unname(k7), c(TRUE)) expect_identical(unname(k8), c(FALSE, FALSE)) }) mice/tests/testthat/test-mira.R0000644000176200001440000000027013666252075016234 0ustar liggesuserscontext("mira") imp <- mice(nhanes, print = FALSE, maxit = 1, seed = 121, m = 1) fit <- with(imp, sd(bmi)) test_that("list prints without an error", { expect_output(print(fit)) }) mice/tests/testthat/test-D3.R0000644000176200001440000000743414433400023015540 0ustar liggesuserscontext("D3") # The following test was contributed by jawitte # https://github.com/amices/mice/issues/226 set.seed(1) A <- rnorm(100) B <- 0.1 * A + rnorm(100) fit1 <- lapply(1:5, function(m) { lm(B ~ A) }) fit0 <- lapply(1:5, function(m) { lm(B ~ 1) }) # outcomment to evade dependency of lmtest # x1 <- lmtest::lrtest(fit1[[1]], fit0[[1]]) x1 <- structure(list(`#Df` = c(3, 2), LogLik = c(-137.087912980007, -137.516434459951), Df = c(NA, -1), Chisq = c(NA, 0.857042959888474), `Pr(>Chisq)` = c(NA, 0.354567523408569)), class = c("anova", "data.frame"), row.names = c("1", "2"), heading = c("Likelihood ratio test\n", "Model 1: B ~ A\nModel 2: B ~ 1")) x2 <- D3(fit1 = fit1, fit0 = fit0) x3 <- mitml::testModels(fit1, fit0, method = "D3") # tests for complete data test_that("lm, complete data: D3() and lrtest() calculate same test statistic", { expect_equal(x1$Chisq[2], x2$result[1]) }) test_that("lm, complete data: testModels() and lrtest() calculate same test statistic", { expect_equal(x1$Chisq[2], x3$test[1]) }) # FIXME: # for imputed data, there are discrepancies between mitml and mice # the tests below compare mitml and mice, but none of these seem to work # so I have a outcommented the critical lines imp <- mice(nhanes, print = FALSE, m = 10, seed = 219) fit1 <- with(data = imp, expr = lm(hyp ~ age + chl)) fit0 <- with(data = imp, expr = lm(hyp ~ 1)) empty <- with(data = imp, expr = lm(hyp ~ 0)) # stat1 <- pool.compare(fit1, fit0, method = "likelihood") z1 <- D3(fit1, fit0) z2 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), method = "D3") # This test fails # FIXME # test_that("lm: mice and mitml calculate same F", { # expect_equal(z1$result[1], z2$test[1]) # }) # using lmer suppressPackageStartupMessages(library(mitml, quietly = TRUE)) library(lme4, quietly = TRUE) # library(broom.mixed, quietly = TRUE) data(studentratings) fml <- ReadDis + SES ~ ReadAchiev + (1 | ID) set.seed(26262) imp <- mitml::panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5, silent = TRUE ) implist <- mitml::mitmlComplete(imp, print = 1:5) fit0 <- with(implist, lmer(ReadAchiev ~ (1 | ID), REML = FALSE)) fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + SES + (1 | ID), REML = FALSE)) # outcommented to evade dependency on broom.mixed under the _R_CHECK_DEPENDS_ONLY=true flag # # likelihood test # z3 <- D3(fit1, fit0) # z4 <- mitml::testModels(fit1, fit0, method = "D3") # This test fails. # FIXME # test_that("lmer: mice and mitml calculate same F", { # expect_equal(z3$result[1], z4$test[1]) # }) # glm # imp <- mice(nhanes2, print = FALSE, m = 10, seed = 219) # # fit1 <- with(data = imp, expr = glm(hyp == "yes" ~ age + chl, family = binomial)) # fit0 <- with(data = imp, expr = glm(hyp == "yes" ~ 1, family = binomial)) # empty <- with(data = imp, expr = glm(hyp == "yes" ~ 0, family = binomial)) # # model dev1.L does not look right, negative Dm, convergence problems # FIXME # z5 <- D3(fit1, fit0) # mitml can't do this case # z6 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), method = "D3") # crashes on terms # FIXME # z5a <- D3(fit1, empty) # This test fails. # FIXME # test_that("glm: mice and mitml calculate same F", { # expect_equal(z5$result[1], z6$test[1]) # }) # data with factors imp <- mice(nhanes2, print = FALSE, m = 10, seed = 219) fit1 <- with(data = imp, expr = lm(bmi ~ age + chl + hyp)) fit0 <- with(data = imp, expr = lm(bmi ~ age)) empty <- with(data = imp, expr = lm(bmi ~ 0)) z7 <- D3(fit1, fit0) z8 <- mitml::testModels(as.mitml.result(fit1), as.mitml.result(fit0), method = "D3") # This test fails. # FIXME # test_that("factors: mice and mitml calculate same F", { # expect_equal(z7$result[1], z8$test[1]) # }) mice/tests/testthat/test-with.R0000644000176200001440000000123314433400023016234 0ustar liggesusers# See #292: with.mids() using eval_tidy() breaks compatibility with metafor # outcommented to evade dependency on metafor # dat <- dat.bcg # dat <- escalc(measure = "RR", ai = tpos, bi = tneg, ci = cpos, di = cneg, data = dat) # dat$ablat[c(2, 4, 8)] <- NA # # predMatrix <- make.predictorMatrix(dat) # predMatrix[, ] <- 0 # predMatrix["ablat", c("yi", "year")] <- 1 # # impMethod <- make.method(dat) # impMethod["ablat"] <- "pmm" # impMethod # imp <- mice(dat, print = FALSE, predictorMatrix = predMatrix, method = impMethod, seed = 1234) # test_that("does not break metafor package", { # expect_silent(fit <- with(imp, rma(yi, vi, mods = ~ ablat + year))) # }) mice/tests/testthat/test-tidiers.R0000644000176200001440000000202014334445701016733 0ustar liggesuserscontext("tidiers") data(nhanes) imp <- mice::mice(nhanes, maxit = 2, m = 2, seed = 1, print = FALSE, use.matcher = TRUE) fit_mira <- with(imp, lm(chl ~ age + bmi)) fit_mipo <- mice::pool(fit_mira) test_that("glance.mipo: nhanes lm", { tmp <- glance(fit_mipo) expect_true(inherits(tmp, "data.frame")) expect_equal(tmp$adj.r.squared[1], 0.4966436, tolerance = .00001) expect_equal(tmp$r.squared[1], 0.539119, tolerance = .00001) }) test_that("tidy.mipo: nhanes lm", { tmp <- tidy(fit_mipo) expect_true(inherits(tmp, "data.frame")) expect_equal(dim(tmp), c(3, 13)) tmp <- tidy(fit_mipo, conf.int = TRUE) expect_true(inherits(tmp, "data.frame")) expect_equal(dim(tmp), c(3, 15)) expect_equal(tmp$conf.low, c( -171.676808396086, -12.5277617578218, 3.42203157045941 )) tmp <- tidy(fit_mipo, conf.int = TRUE, conf.level = .99) expect_true(inherits(tmp, "data.frame")) expect_equal(dim(tmp), c(3, 15)) expect_equal(tmp$conf.low, c( -216.910255354075, -63.8124944550467, 2.16193377446054 )) }) mice/tests/testthat/test-mice.impute.2l.norm.R0000644000176200001440000000051113666252075021007 0ustar liggesuserscontext("mice.impute.2l.norm") d1 <- brandsma[1:200, c("sch", "lpo")] pred <- make.predictorMatrix(d1) pred["lpo", "sch"] <- -2 test_that("mice::mice.impute.2l.norm() runs empty model", { expect_silent(imp <- mice(d1, method = "2l.norm", print = FALSE, pred = pred, m = 1, maxit = 1)) expect_false(anyNA(complete(imp))) }) mice/tests/testthat/test-cbind.R0000644000176200001440000000771014330031606016351 0ustar liggesuserscontext("cbind.mids") data <- nhanes data1 <- data[, c("age", "bmi")] data2 <- data[, c("hyp", "chl")] imp1 <- mice(data1, m = 2, maxit = 1, print = FALSE) imp2 <- mice(data2, m = 2, maxit = 1, print = FALSE) imp <- cbind(imp1, imp2) test_that("combines imputations", { expect_identical(ncol(complete(imp)), 4L) expect_identical(complete(imp1), complete(imp)[, c("age", "bmi")]) }) # when using blocks data <- nhanes data1 <- data[, c("age", "bmi")] data2 <- data[, c("hyp", "chl")] imp1 <- mice(data1, m = 1, maxit = 1, print = FALSE) imp2 <- mice(data2, blocks = list(c("hyp", "chl")), m = 1, maxit = 1, print = FALSE) imp <- cbind(imp1, imp2) test_that("combines imputations with blocks", { expect_identical(ncol(complete(imp)), 4L) expect_identical(complete(imp1), complete(imp)[, c("age", "bmi")]) }) # handling of duplicate variable names data <- nhanes data1 <- data[, c("age", "bmi", "hyp")] data2 <- data[, c("hyp", "chl")] imp1 <- mice(data1, m = 1, maxit = 1, print = FALSE) imp2 <- mice(data2, m = 1, maxit = 1, print = FALSE) imp <- cbind(imp1, imp2) impc <- mice.mids(imp, max = 2, print = FALSE) test_that("duplicate variable adds a column", { expect_identical(ncol(complete(impc)), 5L) }) # handling of duplicate blocks imp1 <- mice(data1, blocks = list(c("age", "bmi"), "hyp"), m = 1, maxit = 1, print = FALSE) imp2 <- mice(data2, blocks = list(c("hyp", "chl")), m = 1, maxit = 1, print = FALSE) imp <- cbind(imp1, imp2) impc <- mice.mids(imp, max = 2, print = FALSE) test_that("duplicate blocks names renames block", { expect_identical(names(impc$blocks)[3], "B1.1") }) # cbind - no second argument imp1 <- mice(nhanes, blocks = list(c("bmi", "chl"), "hyp"), print = FALSE, maxit = 1, m = 1) imp2 <- cbind(imp1) imp3 <- cbind(imp1, NULL) imp4 <- cbind(imp1, character(0)) test_that("returns imp1 object if there is nothing to bind", { expect_identical(imp2, imp1) expect_identical(imp3, imp1) expect_identical(imp4, imp1) }) # cbind - unnamed constant imp2 <- cbind(imp1, 1) imp3 <- cbind(imp1, NA) imp4 <- cbind(imp1, "male") test_that("replicates unnamed constant", { expect_identical(ncol(complete(imp2)), 5L) expect_identical(ncol(complete(imp3)), 5L) expect_identical(ncol(complete(imp4)), 5L) }) imp6 <- cbind(imp1, int = 51:75, out = 15, NA) test_that("appends names vectors and constants", { expect_identical(ncol(complete(imp6)), 7L) expect_error( cbind(imp1, c(NA, 9)), "arguments imply differing number of rows: 25, 2" ) }) # matrix, factor, data.frame # NOTE: cbind() dispatches to wrong function if there is a data.frame # so use cbind.mids() imp8 <- mice:::cbind.mids(imp1, ma = matrix(1:50, nrow = 25, ncol = 2), age = nhanes2$age, df = nhanes2[, c("hyp", "chl")] ) test_that("appends matrix, factor and data.frame", { expect_identical(ncol(complete(imp8)), 9L) }) # impc <- mice.mids(imp8, max = 2, print = FALSE) # NOTE: now using own version of cbind() imp9 <- cbind(imp1, ma = matrix(1:50, nrow = 25, ncol = 2), age = nhanes2$age, df = nhanes2[, c("hyp", "chl")] ) test_that("appends matrix, factor and data.frame", { expect_identical(ncol(complete(imp9)), 9L) }) impc <- mice.mids(imp9, max = 2, print = FALSE) test_that("combined object works as input to mice.mids", { expect_true(is.mids(impc)) }) test_that("cbind does not throw a warning (#114)", { expect_silent(cbind(ordered(c(1, 2)))) }) # # cbind data.frame (rename to age.1) # imp1 <- mice(nhanes, blocks = list(c("bmi", "chl"), "hyp"), print = FALSE, maxit = 1, m = 1) # agevar <- nhanes$age # agevar[1:5] <- NA # imp2 <- mice:::cbind.mids(imp1, data.frame(age = agevar, hyp = "test")) # imp3 <- mice.mids(imp2, max = 2, print = FALSE) # complete(imp3) # # # cbind data.frame (use quoted name) # imp1 <- mice(nhanes, blocks = list(c("bmi", "chl"), "hyp"), print = FALSE, maxit = 1, m = 1) # agevar <- nhanes$age # agevar[1:5] <- NA # imp2 <- mice:::cbind.mids(imp1, age = agevar, hyp = "test") # imp3 <- mice.mids(imp2, max = 2, print = FALSE) # complete(imp3) # mice/tests/testthat/test-mice.impute.2lonly.norm.R0000644000176200001440000000252014330031606021673 0ustar liggesuserscontext("mice.impute.2lonly.norm") ## https://stackoverflow.com/questions/58266785/mice-2l-pan-multilevel-multiple-imputation-error-missing-values-in-pred-not-all?sem=2 library("pan") # Not multilevel to illustrate need set.seed(100) data <- data.frame( patid = rep(1:4, each = 5), sex = rep(c(1, 2, 1, 2), each = 5), crp = c( 68, 78, 93, NA, 143, 5, 7, 9, 13, NA, 97, NA, 56, 52, 34, 22, 30, NA, NA, 45 ) ) pred <- make.predictorMatrix(data) pred[, "patid"] <- -2 # only missing value (out of five) for patid == 1 data[3, "sex"] <- NA test_that("2lonly.norm stops with partially missing level-2 data", { expect_error( mice(data, method = c("", "2lonly.norm", "2l.pan"), predictorMatrix = pred, maxit = 1, m = 2, print = FALSE ), "Method 2lonly.norm found the following clusters with partially missing\n level-2 data: 1\n Method 2lonly.mean can fix such inconsistencies." ) }) set.seed(66322) y <- popmis[1:200, "texp"] x <- popmis[1:200, c("pupil", "school", "sex")] y[x$school %in% 1:3] <- NA ry <- !is.na(y) wy1 <- !ry wy2 <- rep(TRUE, length(y)) wy3 <- rep(FALSE, length(y)) wy4 <- rep(c(TRUE, FALSE), times = c(1, length(y) - 1)) type <- c(1, -2, 1) yn <- y y <- as.numeric(y) set.seed(1) z1 <- mice.impute.2lonly.norm(y, ry = ry, x, type) z2 <- mice.impute.2lonly.pmm(y, ry = ry, x, type) mice/tests/testthat/test-formulas.R0000644000176200001440000000123314330031606017114 0ustar liggesuserscontext("formulas") data <- nhanes test_that("model.matrix() deletes incomplete cases", { expect_identical(dim(model.matrix(~age, data)), c(25L, 2L)) expect_identical(dim(model.matrix(~chl, data)), c(15L, 2L)) expect_identical(dim(model.matrix(~ poly(age), data)), c(25L, 2L)) expect_error( model.matrix(~ poly(chl), data), "missing values are not allowed in 'poly'" ) expect_identical(dim(model.matrix(~ poly(chl, raw = TRUE), data)), c(15L, 2L)) }) # in MICE we can now use poly() form <- list(bmi ~ poly(chl, 2) + age + hyp) pred <- make.predictorMatrix(nhanes) imp1 <- mice(data, form = form, pred = pred, m = 1, maxit = 2, print = FALSE) mice/tests/testthat/test-parlmice.R0000644000176200001440000000507114335404116017071 0ustar liggesusers# Same seed - multiple cores - # Result: Imputations not equal between mice and parlmice test_that("Warning and Imputations between mice and parlmice are unequal", { skip_if_not(parallel::detectCores() > 2) expect_warning(A <- parlmice(nhanes, m = 2, seed = 123)) B <- mice(nhanes, m = 2, print = FALSE, seed = 123) expect_false(all(complete(A, "long") == complete(B, "long"))) }) # Same seed - single core - # Result: Imputations equal between mice and parlmice test_that("Imputations are equal between mice and parlmice", { expect_warning(C <- parlmice(nhanes, n.core = 1, n.imp.core = 5, seed = 123)) D <- mice(nhanes, m = 5, print = FALSE, seed = 123) expect_identical(complete(C, "long"), complete(D, "long")) }) # Should return m = 8 test_that("Cores and n.imp.core specified. Override m", { expect_warning(I <- parlmice(nhanes, n.core = 2, n.imp.core = 4)) expect_identical(I$m, 2 * 4) }) # Should return m = 3x5=15 test_that("n.imp.core not specified", { expect_warning(J <- parlmice(nhanes, n.core = 2)) expect_identical(J$m, 2 * 5) }) # Should return m = 2x7=42 test_that("n.imp.core not specified", { expect_warning(K <- parlmice(nhanes, n.core = 2, m = 7)) expect_identical(K$m, 2 * 7) }) # Should return error test_that("n.core larger than logical CPU cores", { expect_error(suppresWarnings(parlmice(nhanes, n.core = parallel::detectCores() + 1))) }) # # NOT RUN ON R CMD CHECK AND CRAN CHECK - TOO MANY PARALLEL PROCESSES SPAWNED # # Should return m = n.imp.core * parallel::detectCores() - 1 # test_that("Warning because n.core not specified", { # expect_warning(H <- parlmice(nhanes, n.imp.core = 3)) # expect_identical(H$m, 3 * (parallel::detectCores() - 1)) # }) # # #Same cluster.seed - multiple cores # #Result: Imputations equal between parlmice instances # imp1 <- parlmice(nhanes, m=2, cluster.seed = 123) # imp2 <- parlmice(nhanes, m=2, cluster.seed = 123) # test_that("cluster.seed", { # expect_equal(imp1, imp2) # }) # # #Should run without failure # df <- boys # meth <- make.method(df) # pred <- make.predictorMatrix(df) # visit <- 9:1 # imp3 <- parlmice(df, method = meth, # predictorMatrix = pred, # visitSequence = visit, # n.core = 2, # n.imp.core = 4, # maxit = 3, # cluster.seed = 123) # test_that("Runs when overriding defaults", { # expect_identical(imp3$pred, pred) # expect_identical(imp3$iteration, 3) # expect_identical(imp3$method, meth) # expect_identical(imp3$visitSequence, names(df)[visit]) # expect_identical(imp3$m, 2*4) # }) mice/tests/testthat/test-pool.R0000644000176200001440000001341114436064333016250 0ustar liggesuserscontext("pool") # set the random generator to V3.5.0 to ensure that this test # passes in V3.6.0 and later # see mail Kurt Hornik, dated 06mar19 # FIXME: consider using the new generator once V3.6.0 is out, # at the expense of breaking reproducibility of the examples in # https://stefvanbuuren.name/fimd/ suppressWarnings(RNGversion("3.5.0")) imp <- mice(nhanes2, print = FALSE, maxit = 2, seed = 121, use.matcher = TRUE) fit <- with(imp, lm(bmi ~ chl + age + hyp)) est <- pool(fit) # fitlist <- fit$analyses # est <- mice:::pool.fitlist(fitlist) mn <- c(18.76175, 0.05359003, -4.573652, -6.635969, 2.163629) se <- c(4.002796, 0.02235067, 2.033986, 2.459769, 2.02898) test_that("retains same numerical result", { expect_equal(unname(getqbar(est)), mn, tolerance = 0.00001) expect_equal(unname(summary(est)[, "std.error"]), se, tolerance = 0.00001) }) imp <- mice(nhanes2, print = FALSE, m = 10, seed = 219) fit0 <- with(data = imp, expr = glm(hyp == "yes" ~ 1, family = binomial)) fit1 <- with(data = imp, expr = glm(hyp == "yes" ~ chl + bmi, family = binomial)) D1(fit1, fit0) D3(fit1, fit0) # test_that("retains same numerical result", { # expect_equal(round(as.vector(stat1$pvalue), 3), 0.188) # expect_equal(stat2$pvalue, 0) # }) # # Venables & Ripley, 2nd, p 235 birthwt <- MASS::birthwt bwt <- with( birthwt, data.frame( low = factor(low), age = age, lwt = lwt, race = factor(race, labels = c("white", "black", "other")), smoke = smoke > 0, ptd = factor(ptl > 0), ht = ht > 0, ui = ui > 0, ftv = factor(ftv) ) ) levels(bwt$ftv)[-(1:2)] <- "2" birthwt.glm <- glm(low ~ ., family = binomial, data = bwt) summary(birthwt.glm) birthwt.step <- step(birthwt.glm, trace = FALSE) LLlogistic <- function(formula, data, coefs) { ### Calculates -2 loglikelihood of a model. logistic <- function(mu) exp(mu) / (1 + exp(mu)) Xb <- model.matrix(formula, data) %*% coefs y <- model.frame(formula, data)[1][, 1] if (is.factor(y)) y <- (0:1)[y] p <- logistic(Xb) ## in case values of categorical var are other than 0 and 1. y <- (y - min(y)) / (max(y) - min(y)) term1 <- term2 <- rep(0, length(y)) term1[y != 0] <- y[y != 0] * log(y[y != 0] / p[y != 0]) term2[y == 0] <- (1 - y[y == 0]) * log((1 - y[y == 0]) / (1 - p[y == 0])) 2 * sum(term1 + term2) } model1 <- glm(low ~ ., family = binomial, data = bwt) model0 <- update(model1, formula = . ~ . - age - ftv) model.null <- update(model1, formula = . ~ 1) ll1 <- LLlogistic(formula = formula(model1), data = bwt, coefs = coef(model1)) ll0 <- LLlogistic(formula = formula(model0), data = bwt, coefs = coef(model0)) llnull <- LLlogistic(formula = formula(model.null), data = bwt, coefs = coef(model.null)) identical(deviance(model1), ll1, num.eq = FALSE) identical(deviance(model0), ll0, num.eq = FALSE) identical(deviance(model.null), llnull, num.eq = FALSE) # try out coef.fix for binary data f1 <- fix.coef(model1, beta = coef(model1)) broom::glance(model1) broom::glance(f1) identical(broom::glance(f1)$deviance, broom::glance(model1)$deviance) beta <- coef(model1) beta["age"] <- 0 beta["smokeTRUE"] <- 0 f2 <- fix.coef(model1, beta) broom::glance(f2)$deviance set.seed(123) bwt.mis <- bwt bwt.mis$smoke[runif(nrow(bwt)) < 0.001] <- NA bwt.mis$lwt[runif(nrow(bwt)) < 0.01] <- NA imp <- mice(bwt.mis, print = FALSE, m = 10) fit1 <- with(data = imp, expr = glm(low ~ age + lwt + race + smoke + ptd + ht + ui + ftv, family = binomial)) fit0 <- with(data = imp, glm(low ~ lwt + race + smoke + ptd + ht + ui, family = binomial)) D1(fit1, fit0) D3(fit1, fit0) # --- test restriction of parameters # all parameters estimated fit <- lm(bmi ~ age + hyp + chl, data = nhanes) coef(fit) formula(fit) newformula <- bmi ~ 0 + I(18.26966503 - 5.78652468 * age + 2.10467529 * hyp + 0.08044924 * chl) newformula <- . ~ 0 + I(18.26966503 * 1L - 5.78652468 * age + 2.10467529 * hyp + 0.08044924 * chl) fit2 <- update(fit, formula = newformula) coef(fit2) summary(fit) summary(fit2) cor(predict(fit), predict(fit) + residuals(fit))^2 cor(predict(fit2), predict(fit2) + residuals(fit2))^2 newformula <- bmi ~ 0 + offset(18.26966503 - 5.78652468 * age + 2.10467529 * hyp + 0.08044924 * chl) fit3 <- update(fit, formula = newformula) coef(fit3) summary(fit3) cor(predict(fit3), predict(fit3) + residuals(fit3))^2 # compare to mitml::anova.mitml.result suppressPackageStartupMessages(library(mitml, quietly = TRUE)) library(lme4, quietly = TRUE) data(studentratings) fml <- ReadDis + SES ~ ReadAchiev + (1 | ID) imp <- mitml::panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5, silent = TRUE ) implist <- mitml::mitmlComplete(imp, print = 1:5) # * Example 1: multiparameter hypothesis test for 'ReadDis' and 'SES' # This tests the hypothesis that both effects are zero. fit0 <- with(implist, lmer(ReadAchiev ~ (1 | ID), REML = FALSE)) fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + (1 | ID), REML = FALSE)) # apply Rubin's rules testEstimates(fit1) # Wald test # multiparameter hypothesis test using D1 (default) mitml::testModels(fit1, fit0) # stats <- pool.compare(as.mira(fit1), as.mira(fit0), method = "wald") # Is the same, but probably consequence of single parameter differerence # Wald test - multiparameter difference - incorrect because now our # ubar is vector, not a matrix anymore fit0 <- with(implist, lmer(ReadAchiev ~ (1 | ID), REML = FALSE)) fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + SES + (1 | ID), REML = FALSE)) mitml::testModels(fit1, fit0) # stats <- pool.compare(as.mira(fit1), as.mira(fit0), method = "wald") # Is the same, but probably consequence of single parameter differerence # likelihood test mitml::testModels(fit1, fit0, method = "D3") # stats <- pool.compare(as.mira(fit1), as.mira(fit0), method = "likelihood") # --- fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + SES + (1 | ID), REML = FALSE)) mice/tests/testthat/test-mice.impute.iurr.norm.R0000644000176200001440000000367114433156031021451 0ustar liggesuserscontext("mice.impute.lasso.select.norm") ######################### # TEST 1: Simple problem # ######################### set.seed(123) # generate data n <- 1e3 y <- rnorm(n) x <- y * .3 + rnorm(n, 0, .25) x2 <- x + rnorm(n, 2, 3) x <- cbind(x, x2) # make missingness y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Use univariate imputation model set.seed(123) imps_t1 <- mice.impute.lasso.select.norm(y, ry, x) test_that("Returns requested length", { expect_equal(length(imps_t1), sum(!ry)) }) ######################### # TEST 2: Nothing is important # ######################### n <- 1e2 p <- 10 b0 <- 100 bs <- rep(0, p) x <- matrix(rnorm(n * p), n, p) y <- b0 + x %*% bs + rnorm(n) # Missing values y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Use univariate imputation model set.seed(123) imps_t2 <- mice.impute.lasso.select.norm(y, ry, x) test_that("Returns requested length w/ intercept only model", { expect_equal(length(imps_t2), sum(!ry)) }) ######################### # TEST 3: Everything is important # ######################### n <- 1e2 p <- 10 b0 <- 100 bs <- rep(1, p) x <- matrix(rnorm(n * p), n, p) y <- b0 + x %*% bs + rnorm(n) # Missing values y[sample(1:n, n * .3)] <- NA ry <- !is.na(y) wy <- !ry # Use univariate imputation model set.seed(123) imps_t3 <- mice.impute.lasso.select.norm(y, ry, x) test_that("Returns requested length when all predictors are important", { expect_equal(length(imps_t3), sum(!ry)) }) ######################### # TEST 4: Use it within mice call # ######################### boys_cont <- boys[, 1:4] iurr_default <- mice(boys_cont, m = 2, maxit = 2, method = "lasso.select.norm", print = FALSE ) iurr_custom <- mice(boys_cont, m = 2, maxit = 2, method = "lasso.select.norm", nfolds = 5, print = FALSE ) test_that("mice call works", { expect_equal(class(iurr_custom), "mids") }) test_that("mice call works w/ custom arguments", { expect_equal(class(iurr_custom), "mids") }) mice/tests/testthat.R0000755000176200001440000000006413666251671014334 0ustar liggesuserslibrary(testthat) library(mice) test_check("mice") mice/src/0000755000176200001440000000000014437176012011763 5ustar liggesusersmice/src/match.cpp0000644000176200001440000000330614335377210013565 0ustar liggesusers#include #include using namespace std; using namespace Rcpp; // [[Rcpp::export]] IntegerVector matcher(NumericVector obs, NumericVector mis, int k) { // fast predictive mean matching algorithm // for each of the n0 elements in mis // 1) calculate the difference with obs // 2) add small noise to break ties // 3) find the k indices of the k closest predictors // 4) randomly draw one index // and return the vector of n0 matched positions // SvB 26/01/2014 // declarations int jj; int n1 = obs.size(); int n0 = mis.size(); double dk = 0; int count = 0; int goal = 0; NumericVector d(n1); NumericVector d2(n1); IntegerVector matched(n0); // restrict 1 <= k <= n1 k = (k <= n1) ? k : n1; k = (k >= 1) ? k : 1; // in advance, uniform sample from k potential donors NumericVector which = floor(runif(n0, 1, k + 1)); NumericVector mm = range(obs); double small = (mm[1] - mm[0]) / 65536; // loop over the missing values for(int i = 0; i < n0; i++) { // calculate the distance and add noise to break ties d2 = runif(n1, 0, small); dk = mis[i]; for (int j = 0; j < n1; j++) d[j] = std::abs(obs[j] - dk) + d2[j]; // find the k'th lowest value in d for (int j = 0; j < n1; j++) d2[j] = d[j]; std::nth_element (d2.begin(), d2.begin() + k - 1, d2.end()); // find index of donor which[i] dk = d2[k-1]; count = 0; goal = (int) which[i]; for (jj = 0; jj < n1; jj++) { if (d[jj] <= dk) count++; if (count == goal) break; } // and store the result matched[i] = jj; } // increase index to offset 1 return matched + 1; } mice/src/matchindex.cpp0000644000176200001440000001300114335377156014617 0ustar liggesusers#include #include #include using namespace std; using namespace Rcpp; //' Find index of matched donor units //' //' @param d Numeric vector with values from donor cases. //' @param t Numeric vector with values from target cases. //' @param k Integer, number of unique donors from which a random draw is made. //' For \code{k = 1} the function returns the index in \code{d} corresponding //' to the closest unit. For multiple imputation, the //' advice is to set values in the range of \code{k = 5} to \code{k = 10}. //' @return An integer vector with \code{length(t)} elements. Each //' element is an index in the array \code{d}. //' @details //' For each element in \code{t}, the method finds the \code{k} nearest //' neighbours in \code{d}, randomly draws one of these neighbours, and //' returns its position in vector \code{d}. //' //' Fast predictive mean matching algorithm in seven steps: //' //' 1. Shuffle records to remove effects of ties //' //' 2. Obtain sorting order on shuffled data //' //' 3. Calculate index on input data and sort it //' //' 4. Pre-sample vector \code{h} with values between 1 and \code{k} //' //' For each of the \code{n0} elements in \code{t}: //' //' 5. find the two adjacent neighbours //' //' 6. find the \code{h_i}'th nearest neighbour //' //' 7. store the index of that neighbour //' //' Return vector of \code{n0} positions in \code{d}. //' //' We may use the function to perform predictive mean matching under a given //' predictive model. To do so, specify both \code{d} and \code{t} as //' predictions from the same model. Suppose that \code{y} contains the observed //' outcomes of the donor cases (in the same sequence as \code{d}), then //' \code{y[matchindex(d, t)]} returns one matched outcome for every //' target case. //' //' See \url{https://github.com/amices/mice/issues/236}. //' This function is a replacement for the \code{matcher()} function that has //' been in default in \code{mice} since version \code{2.22} (June 2014). //' @examples //' set.seed(1) //' //' # Inputs need not be sorted //' d <- c(-5, 5, 0, 10, 12) //' t <- c(-6, -4, 0, 2, 4, -2, 6) //' //' # Index (in vector a) of closest match //' idx <- matchindex(d, t, 1) //' idx //' //' # To check: show values of closest match //' //' # Random draw among indices of the 5 closest predictors //' matchindex(d, t) //' //' # An example //' train <- mtcars[1:20, ] //' test <- mtcars[21:32, ] //' fit <- lm(mpg ~ disp + cyl, data = train) //' d <- fitted.values(fit) //' t <- predict(fit, newdata = test) # note: not using mpg //' idx <- matchindex(d, t) //' //' # Borrow values from train to produce 12 synthetic values for mpg in test. //' # Synthetic values are plausible values that could have been observed if //' # they had been measured. //' train$mpg[idx] //' //' # Exercise: Create a distribution of 1000 plausible values for each of the //' # twelve mpg entries in test, and count how many times the true value //' # (which we know here) is located within the inter-quartile range of each //' # distribution. Is your count anywhere close to 500? Why? Why not? //' @author Stef van Buuren, Nasinski Maciej, Alexander Robitzsch //' @export // [[Rcpp::export]] IntegerVector matchindex(NumericVector d, NumericVector t, int k = 5) { Environment base("package:base"); Function sample = base["sample"]; // declarations int n1 = d.size(); int n0 = t.size(); // 1. Shuffle records to remove effects of ties // Suggested by Alexander Robitzsch // https://github.com/stefvanbuuren/mice/issues/236 // Call base::sample() to advance .Random.seed IntegerVector ishuf= sample(n1); ishuf = ishuf - 1; NumericVector yshuf(n1); for (int i = 0; i < n1; i++) {yshuf(i) = d(ishuf(i));} // 2. Obtain sorting order on shuffled data // https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes IntegerVector isort(n1); iota(isort.begin(), isort.end(), 0); stable_sort(isort.begin(), isort.end(), [yshuf](int i1, int i2) {return yshuf[i1] < yshuf[i2];}); // 3. Calculate index on input data and sort IntegerVector id(n1); std::vector ysort(n1); for (int i = 0; i < n1; i++) { id(i) = ishuf(isort(i)); ysort[i] = d(id(i)); } // 4. Pre-sample n0 values between 1 and k // restrict 1 <= k <= n1 k = (k <= n1) ? k : n1; k = (k >= 1) ? k : 1; IntegerVector kv(k); iota(kv.begin(), kv.end(), 1); IntegerVector h = sample(kv, n0, Rcpp::_["replace"] = true); IntegerVector idx(n0); // loop over the target units for (int i = 0; i < n0; i++) { double val = t(i); int hi = h(i); int count = 0; // 5. find the two adjacent neighbours std::vector::iterator iter; iter = std::lower_bound(ysort.begin(), ysort.end(), val); int r = iter - ysort.begin(); int l = r - 1; // 6. find the h_i'th nearest neighbour // 7. store the index of that neighbour // Compare elements on left and right of crossover // point to find the h'th closest match // Inspired on Polkas: https://github.com/Polkas/miceFast/issues/10 while (count < hi && l >= 0 && r < n1) { if (val - ysort[l] < ysort[r] - val) { idx(i) = id[l--]; } else { idx(i) = id[r++]; } count++; } // If right side is exhausted, take left elements while (count < hi && l >= 0) { idx(i) = id[l--]; count++; } // If left side is exhausted, take right elements while (count < hi && r < n1) { idx(i) = id[r++]; count++; } } return idx + 1; } mice/src/Makevars0000644000176200001440000000166614335377254013500 0ustar liggesusers## Use the R_HOME indirection to support installations of multiple R version PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` ## As an alternative, one can also add this code in a file 'configure' ## ## PKG_LIBS=`${R_HOME}/bin/Rscript -e "Rcpp:::LdFlags()"` ## ## sed -e "s|@PKG_LIBS@|${PKG_LIBS}|" \ ## src/Makevars.in > src/Makevars ## ## which together with the following file 'src/Makevars.in' ## ## PKG_LIBS = @PKG_LIBS@ ## ## can be used to create src/Makevars dynamically. This scheme is more ## powerful and can be expanded to also check for and link with other ## libraries. It should be complemented by a file 'cleanup' ## ## rm src/Makevars ## ## which removes the autogenerated file src/Makevars. ## ## Of course, autoconf can also be used to write configure files. This is ## done by a number of packages, but recommended only for more advanced users ## comfortable with autoconf and its related tools. mice/src/Makevars.win0000644000176200001440000000024114335377250014254 0ustar liggesusers ## Use the R_HOME indirection to support installations of multiple R version PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::LdFlags()") mice/src/legendre.cpp0000644000176200001440000000126614335377237014272 0ustar liggesusers#include using namespace Rcpp; // [[Rcpp::export]] Rcpp::NumericMatrix legendre(Rcpp::NumericVector x, int p) { int n = x.size(); Rcpp::IntegerVector polies (p+1); for(int i=0; i < polies.length(); i++) polies[i] = (i*2+1); Rcpp::NumericVector squirts = sqrt(polies); Rcpp::NumericMatrix y(n, p); for(int i = 0; i < n; ++i) { y(i,0) = 2 * x[i] - 1; y(i,1) = (3 * y(i,0) * y(i,0) - 1)/2; } for(int j=2; j < y.ncol(); j++){ for(int i = 0; i < n; ++i) { y(i,j) = (polies[j] * y(i,0) * y(i,j-1) - j * y(i,j-2))/(j+1); } } for(int j=0; j < y.ncol(); j++){ for(int i = 0; i < n; ++i) y(i,j) = squirts[j+1] * y(i,j); } return y; } mice/src/RcppExports.cpp0000644000176200001440000000422014335377244014765 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // legendre Rcpp::NumericMatrix legendre(Rcpp::NumericVector x, int p); RcppExport SEXP _mice_legendre(SEXP xSEXP, SEXP pSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericVector >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type p(pSEXP); rcpp_result_gen = Rcpp::wrap(legendre(x, p)); return rcpp_result_gen; END_RCPP } // matcher IntegerVector matcher(NumericVector obs, NumericVector mis, int k); RcppExport SEXP _mice_matcher(SEXP obsSEXP, SEXP misSEXP, SEXP kSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< NumericVector >::type mis(misSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); rcpp_result_gen = Rcpp::wrap(matcher(obs, mis, k)); return rcpp_result_gen; END_RCPP } // matchindex IntegerVector matchindex(NumericVector d, NumericVector t, int k); RcppExport SEXP _mice_matchindex(SEXP dSEXP, SEXP tSEXP, SEXP kSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type d(dSEXP); Rcpp::traits::input_parameter< NumericVector >::type t(tSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); rcpp_result_gen = Rcpp::wrap(matchindex(d, t, k)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { {"_mice_legendre", (DL_FUNC) &_mice_legendre, 2}, {"_mice_matcher", (DL_FUNC) &_mice_matcher, 3}, {"_mice_matchindex", (DL_FUNC) &_mice_matchindex, 3}, {NULL, NULL, 0} }; RcppExport void R_init_mice(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } mice/R/0000755000176200001440000000000014436642366011406 5ustar liggesusersmice/R/with.R0000644000176200001440000000453614330031647012477 0ustar liggesusers#' Evaluate an expression in multiple imputed datasets #' #' Performs a computation of each of imputed datasets in data. #' #' @param data An object of type \code{mids}, which stands for 'multiply imputed #' data set', typically created by a call to function \code{mice()}. #' @param expr An expression to evaluate for each imputed data set. Formula's #' containing a dot (notation for "all other variables") do not work. #' @param \dots Not used #' @return An object of S3 class \code{\link[=mira-class]{mira}} #' @note Version 3.11.10 changed to tidy evaluation on a quosure. This change #' should not affect any code that worked on previous versions. #' It turned out that the latter statement was not true (#292). #' Version 3.12.2 reverts to the old \code{with()} function. #' @author Karin Oudshoorn, Stef van Buuren 2009, 2012, 2020 #' @seealso \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}}, \code{\link{pool}}, #' \code{\link{D1}}, \code{\link{D3}}, \code{\link{pool.r.squared}} #' @references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords multivariate #' @examples #' imp <- mice(nhanes2, m = 2, print = FALSE, seed = 14221) #' #' # descriptive statistics #' getfit(with(imp, table(hyp, age))) #' #' # model fitting and testing #' fit1 <- with(imp, lm(bmi ~ age + hyp + chl)) #' fit2 <- with(imp, glm(hyp ~ age + chl, family = binomial)) #' fit3 <- with(imp, anova(lm(bmi ~ age + chl))) #' @method with mids #' @export with.mids <- function(data, expr, ...) { call <- match.call() if (!is.mids(data)) { stop("The data must have class mids") } analyses <- as.list(seq_len(data$m)) # do the repeated analysis, store the result. for (i in seq_along(analyses)) { data.i <- complete(data, i) analyses[[i]] <- eval(expr = substitute(expr), envir = data.i, enclos = parent.frame()) if (is.expression(analyses[[i]])) { analyses[[i]] <- eval(expr = analyses[[i]], envir = data.i, enclos = parent.frame()) } } # return the complete data analyses as a list of length nimp object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) # formula=formula(analyses[[1]]$terms)) oldClass(object) <- c("mira", "matrix") object } mice/R/md.pattern.R0000644000176200001440000001015514330031647013572 0ustar liggesusers#' Missing data pattern #' #' Display missing-data patterns. #' #' This function is useful for investigating any structure of missing #' observations in the data. In specific case, the missing data pattern could be #' (nearly) monotone. Monotonicity can be used to simplify the imputation model. #' See Schafer (1997) for details. Also, the missing pattern could suggest which #' variables could potentially be useful for imputation of missing entries. #' #' @param x A data frame or a matrix containing the incomplete data. Missing #' values are coded as NA's. #' @param plot Should the missing data pattern be made into a plot. Default is #' `plot = TRUE`. #' @param rotate.names Whether the variable names in the plot should be placed #' horizontally or vertically. Default is `rotate.names = FALSE`. #' @return A matrix with \code{ncol(x)+1} columns, in which each row corresponds #' to a missing data pattern (1=observed, 0=missing). Rows and columns are #' sorted in increasing amounts of missing information. The last column and row #' contain row and column counts, respectively. #' @author Gerko Vink, 2018, based on an earlier version of the same function by #' Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #' @references Schafer, J.L. (1997), Analysis of multivariate incomplete data. #' London: Chapman&Hall. #' #' Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @keywords univar #' @examples #' md.pattern(nhanes) #' # age hyp bmi chl #' # 13 1 1 1 1 0 #' # 1 1 1 0 1 1 #' # 3 1 1 1 0 1 #' # 1 1 0 0 1 2 #' # 7 1 0 0 0 3 #' # 0 8 9 10 27 #' @export md.pattern <- function(x, plot = TRUE, rotate.names = FALSE) { if (!(is.matrix(x) || is.data.frame(x))) { stop("Data should be a matrix or dataframe") } if (ncol(x) < 2) { stop("Data should have at least two columns") } R <- is.na(x) nmis <- colSums(R) # sort columnwise R <- matrix(R[, order(nmis)], dim(x)) pat <- apply(R, 1, function(x) paste(as.numeric(x), collapse = "")) # sort rowwise sortR <- matrix(R[order(pat), ], dim(x)) if (nrow(x) == 1) { mpat <- is.na(x) } else { mpat <- sortR[!duplicated(sortR), ] } # update row and column margins if (all(!is.na(x))) { cat(" /\\ /\\\n{ `---' }\n{ O O }\n==> V <==") cat(" No need for mice. This data set is completely observed.\n") cat(" \\ \\|/ /\n `-----'\n\n") mpat <- t(as.matrix(mpat, byrow = TRUE)) rownames(mpat) <- table(pat) } else { if (is.null(dim(mpat))) { mpat <- t(as.matrix(mpat)) } rownames(mpat) <- table(pat) } r <- cbind(abs(mpat - 1), rowSums(mpat)) r <- rbind(r, c(nmis[order(nmis)], sum(nmis))) if (plot) { op <- par(mar = rep(0, 4)) on.exit(par(op)) plot.new() if (is.null(dim(sortR[!duplicated(sortR), ]))) { R <- t(as.matrix(r[1:nrow(r) - 1, 1:ncol(r) - 1])) } else { if (is.null(dim(R))) { R <- t(as.matrix(R)) } R <- r[1:nrow(r) - 1, 1:ncol(r) - 1] } if (rotate.names) { adj <- c(0, 0.5) srt <- 90 length_of_longest_colname <- max(nchar(colnames(r))) / 2.6 plot.window( xlim = c(-1, ncol(R) + 1), ylim = c(-1, nrow(R) + length_of_longest_colname), asp = 1 ) } else { adj <- c(0.5, 0) srt <- 0 plot.window( xlim = c(-1, ncol(R) + 1), ylim = c(-1, nrow(R) + 1), asp = 1 ) } M <- cbind(c(row(R)), c(col(R))) - 1 shade <- ifelse(R[nrow(R):1, ], mdc(1), mdc(2)) rect(M[, 2], M[, 1], M[, 2] + 1, M[, 1] + 1, col = shade) for (i in 1:ncol(R)) { text(i - .5, nrow(R) + .3, colnames(r)[i], adj = adj, srt = srt) text(i - .5, -.3, nmis[order(nmis)][i]) } for (i in 1:nrow(R)) { text(ncol(R) + .3, i - .5, r[(nrow(r) - 1):1, ncol(r)][i], adj = 0) text(-.3, i - .5, rownames(r)[(nrow(r) - 1):1][i], adj = 1) } text(ncol(R) + .3, -.3, r[nrow(r), ncol(r)]) return(r) } else { return(r) } } mice/R/leiden85.R0000644000176200001440000000316514330031606013131 0ustar liggesusers#' Leiden 85+ study #' #' Subset of data from the Leiden 85+ study #' #' The data set concerns of subset of 956 members of a very old (85+) cohort in #' Leiden. #' #' Multiple imputation of this data set has been described in Boshuizen et al #' (1998), Van Buuren et al (1999) and Van Buuren (2012), chapter 7. #' #' The data set is not available as part of \code{mice}. #' #' @name leiden85 #' @docType data #' @format \code{leiden85} is a data frame with 956 rows and 336 columns. #' @source #' #' Lagaay, A. M., van der Meij, J. C., Hijmans, W. (1992). Validation of #' medical history taking as part of a population based survey in subjects aged #' 85 and over. \emph{Brit. Med. J.}, \emph{304}(6834), 1091-1092. #' #' Izaks, G. J., van Houwelingen, H. C., Schreuder, G. M., Ligthart, G. J. #' (1997). The association between human leucocyte antigens (HLA) and mortality #' in community residents aged 85 and older. \emph{Journal of the American #' Geriatrics Society}, \emph{45}(1), 56-60. #' #' Boshuizen, H. C., Izaks, G. J., van Buuren, S., Ligthart, G. J. (1998). #' Blood pressure and mortality in elderly people aged 85 and older: Community #' based study. \emph{Brit. Med. J.}, \emph{316}(7147), 1780-1784. #' #' Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of #' missing blood pressure covariates in survival analysis. \emph{Statistics in #' Medicine}, \bold{18}, 681--694. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-toomany.html#sec:leiden85cohort}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords datasets NULL mice/R/ampute.mcar.R0000644000176200001440000000430614330031606013726 0ustar liggesusers#' Multivariate amputation under a MCAR mechanism #' #' This function creates a missing data indicator for each pattern, based on a MCAR #' missingness mechanism. The function is used in the multivariate amputation function #' \code{\link{ampute}}. #' #' @param P A vector containing the pattern numbers of the cases' candidates. #' For each case, a value between 1 and #patterns is given. For example, a #' case with value 2 is candidate for missing data pattern 2. #' @param patterns A matrix of size #patterns by #variables where \code{0} indicates #' a variable should have missing values and \code{1} indicates a variable should #' remain complete. The user may specify as many patterns as desired. One pattern #' (a vector) is also possible. Could be the result of \code{\link{ampute.default.patterns}}, #' default will be a square matrix of size #variables where each pattern has missingness #' on one variable only. #' @param prop A scalar specifying the proportion of missingness. Should be a value #' between 0 and 1. Default is a missingness proportion of 0.5. #' @return A list containing vectors with \code{0} if a case should be made missing #' and \code{1} if a case should remain complete. The first vector refers to the #' first pattern, the second vector to the second pattern, etcetera. #' @author Rianne Schouten, 2016 #' @seealso \code{\link{ampute}} #' @keywords internal #' @export ampute.mcar <- function(P, patterns, prop) { f <- function(i) { # If there are no candidates in a certain pattern, the list will receive a 0 if (length(P[P == (i + 1)]) == 0) { return(0) } else { # Otherwise, for all candidates in the pattern, the total proportion of # missingness is used to define the probabilities to be missing. nf <- length(P[P == (i + 1)]) R.temp <- 1 - rbinom(n = nf, size = 1, prob = prop) # Based on the probabilities, each candidate will receive a missing data # indicator 0, meaning he will be made missing or missing data indicator 1, # meaning the candidate will remain complete. R.temp <- replace(P, P == (i + 1), R.temp) R.temp <- replace(R.temp, P != (i + 1), 1) return(R.temp) } } lapply(seq_len(nrow(patterns)), f) } mice/R/ampute.R0000644000176200001440000006114614433402334013016 0ustar liggesusers#' Generate missing data for simulation purposes #' #' This function generates multivariate missing data under a MCAR, MAR or MNAR #' missing data mechanism. Imputation of data sets containing missing values can #' be performed with \code{\link{mice}}. #' #' This function generates missing values in complete data sets. Amputation of complete #' data sets is useful for the evaluation of imputation techniques, such as multiple #' imputation (performed with function \code{\link{mice}} in this package). #' #' The basic strategy underlying multivariate imputation was suggested by #' Don Rubin during discussions in the 90's. Brand (1997) created one particular #' implementation, and his method found its way into the FCS paper #' (Van Buuren et al, 2006). #' #' Until recently, univariate amputation procedures were used to generate missing #' data in complete, simulated data sets. With this approach, variables are made #' incomplete one variable at a time. When more than one variable needs to be amputed, #' the procedure is repeated multiple times. #' #' With the univariate approach, it is difficult to relate the missingness on one #' variable to the missingness on another variable. A multivariate amputation procedure #' solves this issue and moreover, it does justice to the multivariate nature of #' data sets. Hence, \code{ampute} is developed to perform multivariate amputation. #' #' The idea behind the function is the specification of several missingness #' patterns. Each pattern is a combination of variables with and without missing #' values (denoted by \code{0} and \code{1} respectively). For example, one might #' want to create two missingness patterns on a data set with four variables. The #' patterns could be something like: \code{0,0,1,1} and \code{1,0,1,0}. #' Each combination of zeros and ones may occur. #' #' Furthermore, the researcher specifies the proportion of missingness, either the #' proportion of missing cases or the proportion of missing cells, and the relative #' frequency each pattern occurs. Consequently, the data is split into multiple subsets, #' one subset per pattern. Now, each case is candidate for a certain missingness pattern, #' but whether the case will have missing values eventually depends on other specifications. #' #' The first of these specifications is the missing mechanism. There are three possible #' mechanisms: the missingness depends completely on chance (MCAR), the missingness #' depends on the values of the observed variables (i.e. the variables that remain #' complete) (MAR) or on the values of the variables that will be made incomplete (MNAR). #' For a discussion on how missingness mechanisms are related to the observed data, #' we refer to \doi{10.1177/0049124118799376}{Schouten and Vink, 2018}. #' #' When the user specifies the missingness mechanism to be \code{"MCAR"}, the candidates #' have an equal probability of becoming incomplete. For a \code{"MAR"} or \code{"MNAR"} mechanism, #' weighted sum scores are calculated. These scores are a linear combination of the #' variables. #' #' In order to calculate the weighted sum scores, the data is standardized. For this reason, #' the data has to be numeric. Second, for each case, the values in #' the data set are multiplied with the weights, specified by argument \code{weights}. #' These weighted scores will be summed, resulting in a weighted sum score for each case. #' #' The weights may differ between patterns and they may be negative or zero as well. #' Naturally, in case of a MAR mechanism, the weights corresponding to the #' variables that will be made incomplete, have a 0. Note that this may be #' different for each pattern. In case of MNAR missingness, especially #' the weights of the variables that will be made incomplete are of importance. However, #' the other variables may be weighted as well. #' #' It is the relative difference between the weights that will result in an effect #' in the sum scores. For example, for the first missing data #' pattern mentioned above, the weights for the third and fourth variables could #' be set to 2 and 4. However, weight values of 0.2 and 0.4 will have the exact #' same effect on the weighted sum score: the fourth variable is weighted twice as #' much as variable 3. #' #' Based on the weighted sum scores, either a discrete or continuous distribution #' of probabilities is used to calculate whether a candidate will have missing values. #' #' For a discrete distribution of probabilities, the weighted sum scores are #' divided into subgroups of equal size (quantiles). Thereafter, the user #' specifies for each subgroup the odds of being missing. Both the number of #' subgroups and the odds values are important for the generation of missing data. #' For example, for a RIGHT-like mechanism, scoring in one of the #' higher quantiles should have high missingness odds, whereas for a MID-like #' mechanism, the central groups should have higher odds. Again, not the size of #' the odds values are of importance, but the relative distance between the values. #' #' The continuous distributions of probabilities are based on the logistic distribution function. #' The user can specify the type of missingness, which, again, may differ between patterns. #' #' For an example and more explanation about how the arguments interact with each other, #' we refer to the vignette #' \href{https://rianneschouten.github.io/mice_ampute/vignette/ampute.html}{Generate missing values with ampute} #' The amputation methodology is published in #' \doi{10.1080/00949655.2018.1491577}{Schouten, Lugtig and Vink, 2018}. #' #' @param data A complete data matrix or data frame. Values should be numeric. #' Categorical variables should have been transformed to dummies. #' @param prop A scalar specifying the proportion of missingness. Should be a value #' between 0 and 1. Default is a missingness proportion of 0.5. #' @param patterns A matrix or data frame of size #patterns by #variables where #' \code{0} indicates that a variable should have missing values and \code{1} indicates #' that a variable should remain complete. The user may specify as many patterns as #' desired. One pattern (a vector) is possible as well. Default #' is a square matrix of size #variables where each pattern has missingness on one #' variable only (created with \code{\link{ampute.default.patterns}}). After the #' amputation procedure, \code{\link{md.pattern}} can be used to investigate the #' missing data patterns in the data. #' @param freq A vector of length #patterns containing the relative frequency with #' which the patterns should occur. For example, for three missing data patterns, #' the vector could be \code{c(0.4, 0.4, 0.2)}, meaning that of all cases with #' missing values, 40 percent should have pattern 1, 40 percent pattern 2 and 20 #' percent pattern 3. The vector should sum to 1. Default is an equal probability #' for each pattern, created with \code{\link{ampute.default.freq}}. #' @param mech A string specifying the missingness mechanism, either "MCAR" #' (Missing Completely At Random), "MAR" (Missing At Random) or "MNAR" (Missing Not At #' Random). Default is a MAR missingness mechanism. #' @param weights A matrix or data frame of size #patterns by #variables. The matrix #' contains the weights that will be used to calculate the weighted sum scores. For #' a MAR mechanism, the weights of the variables that will be made incomplete should be #' zero. For a MNAR mechanism, these weights could have any possible value. Furthermore, #' the weights may differ between patterns and between variables. They may be negative #' as well. Within each pattern, the relative size of the values are of importance. #' The default weights matrix is made with \code{\link{ampute.default.weights}} and #' returns a matrix with equal weights for all variables. In case of MAR, variables #' that will be amputed will be weighted with \code{0}. For MNAR, variables #' that will be observed will be weighted with \code{0}. If the mechanism is MCAR, the #' weights matrix will not be used. #' @param std Logical. Whether the weighted sum scores should be calculated with #' standardized data or with non-standardized data. The latter is especially advised when #' making use of train and test sets in order to prevent leakage. #' @param cont Logical. Whether the probabilities should be based on a continuous #' or a discrete distribution. If TRUE, the probabilities of being missing are based #' on a continuous logistic distribution function. \code{\link{ampute.continuous}} #' will be used to calculate and assign the probabilities. These probabilities will then #' be based on the argument \code{type}. If FALSE, the probabilities of being missing are #' based on a discrete distribution (\code{\link{ampute.discrete}}) based on the \code{odds} #' argument. Default is TRUE. #' @param type A string or vector of strings containing the type of missingness for each #' pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or '\code{"RIGHT"}. #' If a single missingness type is given, all patterns will be created with the same #' type. If the missingness types should differ between patterns, a vector of missingness #' types should be given. Default is RIGHT for all patterns and is the result of #' \code{\link{ampute.default.type}}. #' @param odds A matrix where #patterns defines the #rows. Each row should contain #' the odds of being missing for the corresponding pattern. The number of odds values #' defines in how many quantiles the sum scores will be divided. The odds values are #' relative probabilities: a quantile with odds value 4 will have a probability of #' being missing that is four times higher than a quantile with odds 1. The #' number of quantiles may differ between the patterns, specify NA for cells remaining empty. #' Default is 4 quantiles with odds values 1, 2, 3 and 4 and is created by #' \code{\link{ampute.default.odds}}. #' @param bycases Logical. If TRUE, the proportion of missingness is defined in #' terms of cases. If FALSE, the proportion of missingness is defined in terms of #' cells. Default is TRUE. #' @param run Logical. If TRUE, the amputations are implemented. If FALSE, the #' return object will contain everything except for the amputed data set. #' #' @return Returns an S3 object of class \code{\link{mads-class}} (multivariate #' amputed data set) #' @author Rianne Schouten [aut, cre], Gerko Vink [aut], Peter Lugtig [ctb], 2016 #' @seealso \code{\link{mads-class}}, \code{\link{bwplot}}, \code{\link{xyplot}}, #' \code{\link{mice}} #' #' @references Brand, J.P.L. (1999) \emph{Development, implementation and #' evaluation of multiple imputation strategies for the statistical analysis of #' incomplete data sets.} pp. 110-113. Dissertation. Rotterdam: Erasmus University. #' #' Schouten, R.M., Lugtig, P and Vink, G. (2018) #' {Generating missing values for simulation purposes: A multivariate amputation procedure.}. #' \emph{Journal of Statistical Computation and Simulation}, 88(15): 1909-1930. #' \doi{10.1080/00949655.2018.1491577} #' #' Schouten, R.M. and Vink, G. (2018){The Dance of the Mechanisms: How Observed Information Influences the Validity of Missingness Assumptions}. #' \emph{Sociological Methods and Research}, 50(3): 1243-1258. #' \doi{10.1177/0049124118799376} #' #' Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn, C.G.M., Rubin, D.B. (2006) #' {Fully conditional specification in multivariate imputation.} #' \emph{Journal of Statistical Computation and Simulation}, 76(12): 1049-1064. #' \doi{10.1080/10629360600810434} #' #' Van Buuren, S. (2018) \href{https://stefvanbuuren.name/fimd/sec-FCS.html#sec:MICE}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' Vink, G. (2016) Towards a standardized evaluation of multiple imputation routines. #' @examples #' # start with a complete data set #' compl_boys <- cc(boys)[1:3] #' #' # Perform amputation with default settings #' mads_boys <- ampute(data = compl_boys) #' mads_boys$amp #' #' # Change default matrices as desired #' my_patterns <- mads_boys$patterns #' my_patterns[1:3, 2] <- 0 #' #' my_weights <- mads_boys$weights #' my_weights[2, 1] <- 2 #' my_weights[3, 1] <- 0.5 #' #' # Rerun amputation #' my_mads_boys <- ampute( #' data = compl_boys, patterns = my_patterns, freq = #' c(0.3, 0.3, 0.4), weights = my_weights, type = c("RIGHT", "TAIL", "LEFT") #' ) #' my_mads_boys$amp #' @export ampute <- function(data, prop = 0.5, patterns = NULL, freq = NULL, mech = "MAR", weights = NULL, std = TRUE, cont = TRUE, type = NULL, odds = NULL, bycases = TRUE, run = TRUE) { if (is.null(data)) { stop("Argument data is missing, with no default", call. = FALSE) } data <- check.dataform(data) if (anyNA(data)) { stop("Data cannot contain NAs", call. = FALSE) } if (ncol(data) < 2) { stop("Data should contain at least two columns", call. = FALSE) } data <- data.frame(data) if (any(vapply(data, Negate(is.numeric), logical(1))) && mech != "MCAR") { data <- as.data.frame(sapply(data, as.numeric)) warning("Data is made numeric because the calculation of weights requires numeric data", call. = FALSE ) } if (prop < 0 || prop > 100) { stop("Proportion of missingness should be a value between 0 and 1 (for a proportion) or between 1 and 100 (for a percentage)", call. = FALSE ) } else if (prop > 1) { prop <- prop / 100 } if (is.null(patterns)) { patterns <- ampute.default.patterns(n = ncol(data)) } else if (is.vector(patterns) && (length(patterns) / ncol(data)) %% 1 == 0) { patterns <- matrix(patterns, nrow = length(patterns) / ncol(data), byrow = TRUE) if (nrow(patterns) == 1 && all(patterns[1, ] %in% 1)) { stop("One pattern with merely ones results to no amputation at all, the procedure is therefore stopped", call. = FALSE) } } else if (is.vector(patterns)) { stop("Length of pattern vector does not match #variables", call. = FALSE) } patterns <- data.frame(patterns) if (is.null(freq)) { freq <- ampute.default.freq(patterns = patterns) } if (!is.vector(freq)) { freq <- as.vector(freq) warning("Frequency should be a vector", call. = FALSE) } if (length(freq) != nrow(patterns)) { if (length(freq) > nrow(patterns)) { freq <- freq[seq_along(nrow(patterns))] } else { freq <- c(freq, rep.int(0.2, nrow(patterns) - length(freq))) } warning(paste("Length of vector with relative frequencies does not match #patterns and is therefore changed to", freq), call. = FALSE) } if (sum(freq) != 1) { freq <- recalculate.freq(freq = freq) } check.pat <- check.patterns( patterns = patterns, freq = freq, prop = prop ) patterns.new <- check.pat[["patterns"]] freq <- check.pat[["freq"]] prop <- check.pat[["prop"]] if (!bycases) { prop <- recalculate.prop( prop = prop, freq = freq, patterns = patterns.new, k = ncol(data), n = nrow(data) ) } if (any(!mech %in% c("MCAR", "MAR", "MNAR"))) { stop("Mechanism should be either MCAR, MAR or MNAR", call. = FALSE) } if (!is.vector(mech)) { mech <- as.vector(mech) warning("Mechanism should contain merely MCAR, MAR or MNAR", call. = FALSE) } else if (length(mech) > 1) { mech <- mech[1] warning("Mechanism should contain merely MCAR, MAR or MNAR. First element is used", call. = FALSE ) } # Check if there is a pattern with merely zeroos if (!is.null(check.pat[["row.zero"]]) && mech == "MAR") { stop(paste("Patterns object contains merely zeros and this kind of pattern is not possible when mechanism is MAR"), call. = FALSE ) } if (mech == "MCAR" && !is.null(weights)) { weights <- NULL warning("Weights matrix is not used when mechanism is MCAR", call. = FALSE) } if (mech == "MCAR" && !is.null(odds)) { odds <- NULL warning("Odds matrix is not used when mechanism is MCAR", call. = FALSE) } if (mech != "MCAR" && !is.null(weights)) { if (is.vector(weights) && (length(weights) / ncol(data)) %% 1 == 0) { weights <- matrix(weights, nrow = length(weights) / ncol(data), byrow = TRUE) } else if (is.vector(weights)) { stop("Length of weight vector does not match #variables", call. = FALSE) } else if (!is.matrix(weights) && !is.data.frame(weights)) { stop("Weights matrix should be a matrix", call. = FALSE) } } if (is.null(weights)) { weights <- ampute.default.weights( patterns = patterns.new, mech = mech ) } weights <- as.data.frame(weights) if (!nrow(weights) == nrow(patterns.new)) { if (!is.null(check.pat[["row.one"]])) { weights <- weights[-check.pat[["row.one"]], ] } } if (!nrow(weights) == nrow(patterns.new)) { stop("The objects patterns and weights are not matching", call. = FALSE) } if (!is.vector(cont)) { cont <- as.vector(cont) warning("Continuous should contain merely TRUE or FALSE", call. = FALSE) } else if (length(cont) > 1) { cont <- cont[1] warning("Continuous should contain merely TRUE or FALSE. First element is used", call. = FALSE ) } if (!is.logical(cont)) { stop("Continuous should contain TRUE or FALSE", call. = FALSE) } if (cont && !is.null(odds)) { odds <- NULL warning("Odds matrix is not used when continuous probabilities (cont == TRUE) are specified", call. = FALSE ) } if (!cont && !is.null(type)) { type <- NULL warning("Type is not used when discrete probabilities (cont == FALSE) are specified", call. = FALSE ) } if (is.null(type)) { type <- ampute.default.type(patterns = patterns.new) } if (any(!type %in% c("LEFT", "MID", "TAIL", "RIGHT"))) { stop("Type should contain LEFT, MID, TAIL or RIGHT", call. = FALSE ) } if (!is.vector(type)) { type <- as.vector(type) warning("Type should be a vector of strings", call. = FALSE) } else if (!length(type) %in% c(1, nrow(patterns), nrow(patterns.new))) { type <- type[1] warning("Type should either have length 1 or length equal to #patterns, first element is used for all patterns", call. = FALSE) } if (mech != "MCAR" && !is.null(odds) && !is.matrix(odds)) { if (nrow(patterns.new) == 1 && is.vector(odds)) { odds <- matrix(odds, nrow = 1) } else { stop("Odds matrix should be a matrix", call. = FALSE) } } if (is.null(odds)) { odds <- ampute.default.odds(patterns = patterns.new) } if (!cont) { for (h in seq_len(nrow(odds))) { if (any(!is.na(odds[h, ]) & odds[h, ] < 0)) { stop("Odds matrix can only have positive values", call. = FALSE) } } } if (!nrow(odds) %in% c(nrow(patterns), nrow(patterns.new))) { stop("The objects patterns and odds are not matching", call. = FALSE) } # # Start using arguments # Create empty objects P <- NULL scores <- NULL missing.data <- NULL # Apply function (run = TRUE) or merely return objects (run = FALSE) if (run) { # Assign cases to the patterns according probs # Because 0 and 1 will be used for missingness, # the numbering of the patterns will start from 2 P <- sample.int( n = nrow(patterns.new), size = nrow(data), replace = TRUE, prob = freq ) + 1 # Check whether cases are assigned to all patterns non.used.patterns <- c(2:(nrow(patterns.new) + 1))[!c(2:(nrow(patterns.new) + 1)) %in% unique(P)] if (length(non.used.patterns) > 0) { warning(paste0("No records are assigned to patterns ", toString(non.used.patterns - 1), ". These patterns will not be generated. Consider reducing the number of patterns or increasing the dataset size."), call. = FALSE) } # Calculate missingness according MCAR or calculate weighted sum scores # Standardized data is used to calculate weighted sum scores if (mech == "MCAR") { R <- ampute.mcar( P = P, patterns = patterns.new, prop = prop ) } else { scores <- sumscores( P = P, data = data, std = std, weights = weights, patterns = patterns ) if (!cont) { R <- ampute.discrete( P = P, scores = scores, odds = odds, prop = prop ) } else if (cont) { R <- ampute.continuous( P = P, scores = scores, prop = round(prop, 3), type = type ) } } missing.data <- data for (i in seq_len(nrow(patterns.new))) { if (any(P == (i + 1))) { missing.data[R[[i]] == 0, patterns.new[i, ] == 0] <- NA } } } # # Create return object names(patterns.new) <- names(data) names(weights) <- names(data) call <- match.call() missing.data <- data.frame(missing.data) result <- list( call = call, prop = prop, patterns = patterns.new, freq = freq, mech = mech, weights = weights, cont = cont, std = std, type = type, odds = odds, amp = missing.data, cand = P - 1, scores = scores, data = as.data.frame(data) ) # # Return result oldClass(result) <- "mads" result } # This is an underlying function of multivariate amputation function ampute(). # This function is used to calculate the weighted sum scores of the candidates. # Based on the data, the weights matrix and the kind of mechanism, each case # will obtain a certain score that will define his probability to be made missing. # The calculation of the probabilities occur in the function ampute.mcar(), # ampute.continuous() or ampute.discrete(), based on the kind of missingness. sumscores <- function(P, data, std, weights, patterns) { weights <- as.matrix(weights) f <- function(i) { if (length(P[P == (i + 1)]) == 0) { return(0) # this will ensure length 1 which is used in ampute.continuous } else { candidates <- as.matrix(data[P == (i + 1), ]) # For each candidate in the pattern, a weighted sum score is calculated if (std) { length_unique <- function(x) { return(length(unique(x)) == 1) } # shangzhi-hong, Feb 2020, #216 if (nrow(candidates) > 1 && !(any(apply(candidates, 2, length_unique)))) { candidates <- scale(candidates) } } scores <- apply(candidates, 1, function(x) weights[i, ] %*% x) if (length(scores) > 1 && length(unique(scores)) != 1) { scores <- scale(scores) } return(scores) } } lapply(seq_len(nrow(patterns)), f) } # This is an underlying function of multivariate amputation function ampute(). # The function recalculates the proportion of missing cases for the desired # #missing cells. recalculate.prop <- function(prop, n, k, patterns, freq) { miss <- prop * n * k # Desired #missing cells # Calculate #cases according prop and #zeros in patterns cases <- vapply( seq_len(nrow(patterns)), function(i) (miss * freq[i]) / length(patterns[i, ][patterns[i, ] == 0]), numeric(1) ) if (sum(cases) > n) { stop("Proportion of missing cells is too large in combination with the desired number of missing variables", call. = FALSE ) } else { prop <- sum(cases) / n } prop } # This is an underlying function of multivariate amputation function ampute(). # The function recalculates the frequency vector to make the sum equal to 1. recalculate.freq <- function(freq) { freq / sum(freq) } # This is an underlying function of multivariate amputation function ampute(). # The function checks whether there are patterns with merely ones or zeroos. # In case of the first, these patterns will be removed, and argument prop # and freq will be changed. In case there is a pattern with merely zeroos, # this is ascertained and saved in the object row.zero. check.patterns <- function(patterns, freq, prop) { prop.one <- 0 row.one <- c() for (h in seq_len(nrow(patterns))) { if (any(!patterns[h, ] %in% c(0, 1))) { stop(paste("Argument patterns can only contain 0 and 1, pattern", h, "contains another element"), call. = FALSE) } if (all(patterns[h, ] %in% 1)) { prop.one <- prop.one + freq[h] row.one <- c(row.one, h) } } if (prop.one != 0) { warning(paste("Proportion of missingness has changed from", prop, "to", (1 - prop.one) * prop, "because of pattern(s) with merely ones"), call. = FALSE) prop <- (1 - prop.one) * prop freq <- freq[-row.one] freq <- recalculate.freq(freq) patterns <- patterns[-row.one, ] warning("Frequency vector and patterns matrix have changed because of pattern(s) with merely ones", call. = FALSE) } prop.zero <- 0 row.zero <- c() for (h in seq_len(nrow(patterns))) { if (all(patterns[h, ] %in% 0)) { prop.zero <- prop.zero + freq[h] row.zero <- c(row.zero, h) } } objects <- list( patterns = patterns, prop = prop, freq = freq, row.one = row.one, row.zero = row.zero ) objects } mice/R/print.R0000644000176200001440000000500314330031606012641 0ustar liggesusers#' Print a \code{mids} object #' #' @rdname print #' @param x Object of class \code{mids}, \code{mira} or \code{mipo} #' @param ... Other parameters passed down to \code{print.default()} #' @return \code{NULL} #' @seealso \code{\link[=mids-class]{mids}} #' @method print mids #' @export print.mids <- function(x, ...) { cat("Class: mids\n") cat("Number of multiple imputations: ", x$m, "\n") cat("Imputation methods:\n") print(x$method, ...) cat("PredictorMatrix:\n") print(head(x$predictorMatrix), ...) if (!is.null(x$loggedEvents)) { cat("Number of logged events: ", nrow(x$loggedEvents), "\n") print(head(x$loggedEvents), ...) } invisible(x) } #' Print a \code{mira} object #' #' @rdname print #' @return \code{NULL} #' @seealso \code{\link[=mira-class]{mira}} #' @method print mira #' @export print.mira <- function(x, ...) { if (is.mira(x)) { print.listof(x, ...) } else { print(x, ...) } invisible(x) } #' Print a \code{mice.anova} object #' #' @rdname print #' @return \code{NULL} #' @seealso \code{\link{mipo}} #' @method print mice.anova #' @export print.mice.anova <- function(x, ...) { z <- summary(x, ...) print(z$comparisons, row.names = FALSE) invisible(x) } #' Print a \code{summary.mice.anova} object #' #' @rdname print #' @return \code{NULL} #' @seealso \code{\link{mipo}} #' @method print mice.anova.summary #' @export print.mice.anova.summary <- function(x, ...) { cat("\nModels:\n") print(x$models, row.names = FALSE) cat("\nComparisons:\n") print(x$comparisons, row.names = FALSE) cat( "\nNumber of imputations: ", x$m, " Method", x$method ) if (x$method == "D2") cat(" (", x$use, ")", sep = "") cat("\n") invisible(x) } #' Print a \code{mads} object #' #' @param x Object of class \code{mads} #' @param ... Other parameters passed down to \code{print.default()} #' @return \code{NULL} #' @seealso \code{\link[=mads-class]{mads}} #' @method print mads #' @export print.mads <- function(x, ...) { if (is.mads(x)) { cat("Multivariate Amputed Data Set") cat("\nCall: ") print(x$call) cat("Class:", class(x)) cat("\nProportion of Missingness: ", x$prop) cat("\nFrequency of Patterns: ", x$freq) cat("\nPattern Matrix:\n") print(x$patterns) cat("Mechanism:") print(x$mech) cat("Weight Matrix:\n") print(x$weights) cat("Type Vector:\n") print(x$type) cat("Odds Matrix:\n") print(x$odds) cat("Head of Amputed Data Set\n") print(head(x$amp)) } else { print(x, ...) } invisible(x) } mice/R/mice.impute.2lonly.pmm.R0000644000176200001440000001416614430474274015761 0ustar liggesusers#' Imputation at level 2 by predictive mean matching #' #' Imputes univariate missing data at level 2 using predictive mean matching. #' Variables are level 1 are aggregated at level 2. The group identifier at #' level 2 must be indicated by \code{type = -2} in the \code{predictorMatrix}. #' #' @aliases 2lonly.pmm #' @inheritParams mice.impute.pmm #' @param type Group identifier must be specified by '-2'. Predictors must be #' specified by '1'. #' @param ... Other named arguments. #' @return A vector of length \code{nmis} with imputations. #' @author Alexander Robitzsch (IPN - Leibniz Institute for Science and #' Mathematics Education, Kiel, Germany), \email{robitzsch@@ipn.uni-kiel.de} #' @seealso \code{\link{mice.impute.pmm}}, #' \code{\link{mice.impute.2lonly.norm}}, \code{\link{mice.impute.2l.pan}}, #' \code{\link{mice.impute.2lonly.mean}} #' @details #' This function allows in combination with \code{\link{mice.impute.2l.pan}} #' switching regression imputation between level 1 and level 2 as described in #' Yucel (2008) or Gelman and Hill (2007, p. 541). #' #' The function checks for partial missing level-2 data. Level-2 data #' are assumed to be constant within the same cluster. If one or more #' entries are missing, then the procedure aborts with an error #' message that identifies the cluster with incomplete level-2 data. #' In such cases, one may first fill in the cluster mean (or mode) by #' the \code{2lonly.mean} method to remove inconsistencies. #' @references Gelman, A. and Hill, J. (2007). \emph{Data analysis using #' regression and multilevel/hierarchical models}. Cambridge, Cambridge #' University Press. #' #' Yucel, RM (2008). Multiple imputation inference for multivariate multilevel #' continuous data with ignorable non-response. \emph{Philosophical #' Transactions of the Royal Society A}, \bold{366}, 2389-2404. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' @note The extension to categorical variables transforms #' a dependent factor variable by means of the \code{as.integer()} #' function. This may make sense for categories that are #' approximately ordered, but less so for pure nominal measures. #' #' For a more general approach, see #' \code{miceadds::mice.impute.2lonly.function()}. #' @family univariate-2lonly #' @examples #' # simulate some data #' # x,y ... level 1 variables #' # v,w ... level 2 variables #' #' G <- 250 # number of groups #' n <- 20 # number of persons #' beta <- .3 # regression coefficient #' rho <- .30 # residual intraclass correlation #' rho.miss <- .10 # correlation with missing response #' missrate <- .50 # missing proportion #' y1 <- rep(rnorm(G, sd = sqrt(rho)), each = n) + rnorm(G * n, sd = sqrt(1 - rho)) #' w <- rep(round(rnorm(G), 2), each = n) #' v <- rep(round(runif(G, 0, 3)), each = n) #' x <- rnorm(G * n) #' y <- y1 + beta * x + .2 * w + .1 * v #' dfr0 <- dfr <- data.frame("group" = rep(1:G, each = n), "x" = x, "y" = y, "w" = w, "v" = v) #' dfr[rho.miss * x + rnorm(G * n, sd = sqrt(1 - rho.miss)) < qnorm(missrate), "y"] <- NA #' dfr[rep(rnorm(G), each = n) < qnorm(missrate), "w"] <- NA #' dfr[rep(rnorm(G), each = n) < qnorm(missrate), "v"] <- NA #' #' # empty mice imputation #' imp0 <- mice(as.matrix(dfr), maxit = 0) #' predM <- imp0$predictorMatrix #' impM <- imp0$method #' #' # multilevel imputation #' predM1 <- predM #' predM1[c("w", "y", "v"), "group"] <- -2 #' predM1["y", "x"] <- 1 # fixed x effects imputation #' impM1 <- impM #' impM1[c("y", "w", "v")] <- c("2l.pan", "2lonly.norm", "2lonly.pmm") #' #' # turn v into a categorical variable #' dfr$v <- as.factor(dfr$v) #' levels(dfr$v) <- LETTERS[1:4] #' #' # y ... imputation using pan #' # w ... imputation at level 2 using norm #' # v ... imputation at level 2 using pmm #' #' # skip imputation on solaris #' is.solaris <- function() grepl("SunOS", Sys.info()["sysname"]) #' if (!is.solaris()) { #' imp <- mice(dfr, #' m = 1, predictorMatrix = predM1, #' method = impM1, maxit = 1, paniter = 500 #' ) #' } #' @export mice.impute.2lonly.pmm <- function(y, ry, x, type, wy = NULL, ...) { .imputation.level2( y = y, ry = ry, x = x, type = type, wy = wy, method = "pmm", ... ) } # imputation function at level 2 # can be done with norm and pmm .imputation.level2 <- function(y, ry, x, type, wy, method, ...) { if (sum(type == -2L) != 1L) stop("No class variable") if (is.null(wy)) wy <- !ry # handle categorical data ylev <- NULL if (is.factor(y)) { ylev <- levels(y) y <- as.integer(y) } # extract cluster index clusterx <- x[, type == -2L] # clusters with one or more missing y's cm <- unique(clusterx[!ry]) # clusters with one or more observed y's co <- unique(clusterx[ry]) # cluster where all y's are observed cobs <- setdiff(co, cm) # clusters where some y's are missing csom <- intersect(co, cm) if (length(csom) > 0L) { stop(paste0( "Method 2lonly.", method, " found the following clusters with partially missing\n", " level-2 data: ", paste(csom, collapse = ", "), "\n", " Method 2lonly.mean can fix such inconsistencies." )) } # calculate aggregated values x <- cbind(1, as.matrix(x[, type %in% c(1L, 2L)])) a2 <- rowsum(cbind(x, y), clusterx, na.rm = TRUE) a2 <- a2 / rowsum(1 * cbind(!is.na(x), ry), clusterx) clusterx0 <- as.numeric(paste0(rownames(a2))) a1 <- cbind(clusterx0, a2) ry2 <- a1[, 1L] %in% cobs wy2 <- !(a1[, 1L] %in% unique(clusterx[!wy])) y2 <- a1[, ncol(a1)] x2 <- as.matrix(a1[, -c(1L:2L, ncol(a1))]) # norm imputation at level 2 if (method == "norm") { ximp2 <- mice.impute.norm( y = y2, ry = ry2, x = x2, wy = wy2, ... ) } # pmm imputation at level 2 if (method == "pmm") { ximp2 <- mice.impute.pmm( y = y2, ry = ry2, x = x2, wy = wy2, ... ) } # expland to full matrix cly2 <- a1[wy2, 1L] i1 <- match(clusterx, cly2) ximp <- (ximp2[i1])[wy] # turn back into factor if (!is.null(ylev)) { ximp <- factor(as.integer(ximp), levels = 1L:length(ylev), labels = ylev) } ximp } mice/R/mice.impute.cart.R0000644000176200001440000000750214436637016014700 0ustar liggesusers#' Imputation by classification and regression trees #' #' Imputes univariate missing data using classification and regression trees. #' #' @aliases mice.impute.cart cart #' #' @inheritParams mice.impute.pmm #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @param minbucket The minimum number of observations in any terminal node used. #' See \code{\link{rpart.control}} for details. #' @param cp Complexity parameter. Any split that does not decrease the overall #' lack of fit by a factor of cp is not attempted. See \code{\link{rpart.control}} #' for details. #' @param ... Other named arguments passed down to \code{rpart()}. #' @return Numeric vector of length \code{sum(!ry)} with imputations #' @details #' Imputation of \code{y} by classification and regression trees. The procedure #' is as follows: #' \enumerate{ #' \item Fit a classification or regression tree by recursive partitioning; #' \item For each \code{ymis}, find the terminal node they end up according to the fitted tree; #' \item Make a random draw among the member in the node, and take the observed value from that #' draw as the imputation. #' } #' @seealso \code{\link{mice}}, \code{\link{mice.impute.rf}}, #' \code{\link[rpart]{rpart}}, \code{\link[rpart]{rpart.control}} #' @author Lisa Doove, Stef van Buuren, Elise Dusseldorp, 2012 #' @references #' #' Doove, L.L., van Buuren, S., Dusseldorp, E. (2014), Recursive partitioning #' for missing data imputation in the presence of interaction Effects. #' Computational Statistics & Data Analysis, 72, 92-104. #' #' Breiman, L., Friedman, J. H., Olshen, R. A., and Stone, C. J. #' (1984), Classification and regression trees, Monterey, CA: Wadsworth & #' Brooks/Cole Advanced Books & Software. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-cart.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' @family univariate imputation functions #' @examples #' imp <- mice(nhanes2, meth = "cart", minbucket = 4) #' plot(imp) #' @keywords datagen #' @export mice.impute.cart <- function(y, ry, x, wy = NULL, minbucket = 5, cp = 1e-04, ...) { if (is.null(wy)) { wy <- !ry } minbucket <- max(1, minbucket) if (dim(x)[2] == 0) { x <- cbind(x, 1) dimnames(x) <- list(NULL, "int") } xobs <- data.frame(x[ry, , drop = FALSE]) xmis <- data.frame(x[wy, , drop = FALSE]) yobs <- y[ry] if (!is.factor(yobs)) { fit <- rpart::rpart(yobs ~ ., data = cbind(yobs, xobs), method = "anova", control = rpart::rpart.control(minbucket = minbucket, cp = cp, ...) ) leafnr <- floor(as.numeric(row.names(fit$frame[fit$where, ]))) fit$frame$yval <- as.numeric(row.names(fit$frame)) nodes <- predict(object = fit, newdata = xmis) donor <- lapply(nodes, function(s) yobs[leafnr == s]) impute <- vapply(seq_along(donor), function(s) sample(donor[[s]], 1), numeric(1)) } else { # escape with same impute if the dependent does not vary cat.has.all.obs <- table(yobs) == sum(ry) if (any(cat.has.all.obs)) { return(rep(levels(yobs)[cat.has.all.obs], sum(wy))) } xy <- cbind(yobs, xobs) xy <- droplevels(xy) # FIXME: rpart fails to runs on empty categories in yobs, # droplevels() removes empty levels, and this is # likely to present problems further down the road # potential problem case: table(yobs): 0 10 15, then # droplevels may forget about category 1 fit <- rpart::rpart(yobs ~ ., data = xy, method = "class", control = rpart::rpart.control(minbucket = minbucket, cp = cp, ...) ) nodes <- predict(object = fit, newdata = xmis) impute <- apply(nodes, MARGIN = 1, FUN = function(s) { sample(colnames(nodes), size = 1, prob = s ) } ) } impute } mice/R/potthoffroy.R0000644000176200001440000000403514330031606014074 0ustar liggesusers#' Potthoff-Roy data #' #' Data from Potthoff-Roy (1964) with repeated measures on dental fissures. #' #' This data set is the famous Potthoff-Roy data, used to demonstrate MANOVA on #' repeated measure data. Potthoff and Roy (1964) published classic data on a #' study in 16 boys and 11 girls, who at ages 8, 10, 12, and 14 had the distance #' (mm) from the center of the pituitary gland to the pteryomaxillary fissure #' measured. Changes in pituitary-pteryomaxillary distances during growth is #' important in orthodontic therapy. The goals of the study were to describe the #' distance in boys and girls as simple functions of age, and then to compare #' the functions for boys and girls. The data have been reanalyzed by many #' authors including Jennrich and Schluchter (1986), Little and Rubin (1987), #' Pinheiro and Bates (2000), Verbeke and Molenberghs (2000) and Molenberghs and #' Kenward (2007). See Chapter 9 of Van Buuren (2012) for a challenging #' exercise using these data. #' #' @name potthoffroy #' @docType data #' @format \code{tbs} is a data frame with 27 rows and 6 columns: #' \describe{ #' \item{id}{Person number} #' \item{sex}{Sex M/F} #' \item{d8}{Distance at age 8 years} #' \item{d10}{Distance at age 10 years} #' \item{d12}{Distance at age 12 years} #' \item{d14}{Distance at age 14 years} #' } #' @source Potthoff, R. F., Roy, S. N. (1964). A generalized multivariate #' analysis of variance model usefully especially for growth curve problems. #' \emph{Biometrika}, \emph{51}(3), 313-326. #' #' Little, R. J. A., Rubin, D. B. (1987). \emph{Statistical Analysis with #' Missing Data.} New York: John Wiley & Sons. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/ex-ch-longitudinal.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords datasets #' @examples #' ### create missing values at age 10 as in Little and Rubin (1987) #' #' phr <- potthoffroy #' idmis <- c(3, 6, 9, 10, 13, 16, 23, 24, 27) #' phr[idmis, 4] <- NA #' phr #' #' md.pattern(phr) NULL mice/R/check.R0000644000176200001440000000402714330031606012567 0ustar liggesuserscheck.data <- function(data, method) { check.dataform(data) } check.dataform <- function(data) { if (!(is.matrix(data) || is.data.frame(data))) { stop("Data should be a matrix or data frame", call. = FALSE) } if (ncol(data) < 2) { stop("Data should contain at least two columns", call. = FALSE) } data <- as.data.frame(data) mat <- sapply(data, is.matrix) df <- sapply(data, is.data.frame) if (any(mat)) { stop( "Cannot handle columns with class matrix: ", colnames(data)[mat] ) } if (any(df)) { stop( "Cannot handle columns with class data.frame: ", colnames(data)[df] ) } dup <- duplicated(colnames(data)) if (any(dup)) { stop( "Duplicate names found: ", paste(colnames(data)[dup], collapse = ", ") ) } data } check.m <- function(m) { m <- m[1L] if (!is.numeric(m)) { stop("Argument m not numeric", call. = FALSE) } m <- floor(m) if (m < 1L) { stop("Number of imputations (m) lower than 1.", call. = FALSE) } m } check.cluster <- function(data, predictorMatrix) { # stop if the cluster variable is a factor isclassvar <- apply(predictorMatrix == -2, 2, any) for (j in colnames(predictorMatrix)) { if (isclassvar[j] && lapply(data, is.factor)[[j]]) { stop("Convert cluster variable ", j, " to integer by as.integer()") } } TRUE } check.ignore <- function(ignore, data) { if (is.null(ignore)) { return(rep(FALSE, nrow(data))) } if (!is.logical(ignore)) { stop("Argument ignore not a logical.") } if (length(ignore) != nrow(data)) { stop( "length(ignore) (", length(ignore), ") does not match nrow(data) (", nrow(data), ")." ) } if (sum(!ignore) < 10L) { warning( "Fewer than 10 rows for fitting the imputation model. Are you sure?", call. = FALSE ) } ignore } check.newdata <- function(newdata, data) { if (is.null(newdata)) { stop("No newdata found.") } if (!is.data.frame(newdata)) { stop("newdata not a data.frame.") } newdata } mice/R/mice.mids.R0000644000176200001440000001337114334522175013376 0ustar liggesusers#' Multivariate Imputation by Chained Equations (Iteration Step) #' #' Takes a \code{mids} object, and produces a new object of class \code{mids}. #' #' This function enables the user to split up the computations of the Gibbs #' sampler into smaller parts. This is useful for the following reasons: #' \itemize{ \item RAM memory may become easily exhausted if the number of #' iterations is large. Returning to prompt/session level may alleviate these #' problems. \item The user can compute customized convergence statistics at #' specific points, e.g. after each iteration, for monitoring convergence. - #' For computing a 'few extra iterations'. } Note: The imputation model itself #' is specified in the \code{mice()} function and cannot be changed with #' \code{mice.mids}. The state of the random generator is saved with the #' \code{mids} object. #' #' @param obj An object of class \code{mids}, typically produces by a previous #' call to \code{mice()} or \code{mice.mids()} #' @param newdata An optional \code{data.frame} for which multiple imputations #' are generated according to the model in \code{obj}. #' @param maxit The number of additional Gibbs sampling iterations. #' @param printFlag A Boolean flag. If \code{TRUE}, diagnostic information #' during the Gibbs sampling iterations will be written to the command window. #' The default is \code{TRUE}. #' @param ... Named arguments that are passed down to the univariate imputation #' functions. #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #' @seealso \code{\link{complete}}, \code{\link{mice}}, \code{\link{set.seed}}, #' \code{\link[=mids-class]{mids}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords iteration #' @examples #' imp1 <- mice(nhanes, maxit = 1, seed = 123) #' imp2 <- mice.mids(imp1) #' #' # yields the same result as #' imp <- mice(nhanes, maxit = 2, seed = 123) #' #' # verification #' identical(imp$imp, imp2$imp) #' # #' @export mice.mids <- function(obj, newdata = NULL, maxit = 1, printFlag = TRUE, ...) { if (!is.mids(obj)) { stop("Object should be of type mids.") } # Set seed to last seed after previous imputation assign(".Random.seed", obj$lastSeedValue, pos = 1) # obj contains training data, newdata contains test data # overwrite obj with combined obj + imp.newdata if (!is.null(newdata)) { ignore <- rep(FALSE, nrow(obj$data)) if (!is.null(obj$ignore)) ignore <- obj$ignore newdata <- check.newdata(newdata, obj$data) imp.newdata <- mice(newdata, m = obj$m, maxit = 0, remove.collinear = FALSE, remove.constant = FALSE ) obj <- withCallingHandlers( rbind.mids(obj, imp.newdata), warning = function(w) { if (grepl("iterations differ", w$message)) { # Catch warnings concerning iterations, these differ by design invokeRestart("muffleWarning") } } ) # ignore newdata for model building, but do impute obj$ignore <- c(ignore, rep(TRUE, nrow(newdata))) } if (maxit < 1) { return(obj) } loggedEvents <- obj$loggedEvents state <- list( it = 0, im = 0, co = 0, dep = "", meth = "", log = !is.null(loggedEvents) ) if (is.null(loggedEvents)) { loggedEvents <- data.frame( it = 0, im = 0, co = 0, dep = "", meth = "", out = "" ) } # Initialize local variables call <- match.call() imp <- obj$imp where <- obj$where if (is.null(where)) where <- is.na(obj$data) blocks <- obj$blocks if (is.null(blocks)) blocks <- make.blocks(obj$data) ## OK. Iterate. sumIt <- obj$iteration + maxit from <- obj$iteration + 1 to <- from + maxit - 1 q <- sampler( obj$data, obj$m, obj$ignore, where, imp, blocks, obj$method, obj$visitSequence, obj$predictorMatrix, obj$formulas, obj$blots, obj$post, c(from, to), printFlag, ... ) imp <- q$imp ## combine with previous chainMean and chainVar vnames <- unique(unlist(obj$blocks)) nvis <- length(vnames) if (!is.null(obj$chainMean)) { chainMean <- chainVar <- array(0, dim = c(nvis, to, obj$m), dimnames = list( vnames, seq_len(to), paste("Chain", seq_len(obj$m)) ) ) for (j in seq_len(nvis)) { if (obj$iteration == 0) { chainMean[j, , ] <- q$chainMean[j, , ] chainVar[j, , ] <- q$chainVar[j, , ] } else { chainMean[j, seq_len(obj$iteration), ] <- obj$chainMean[j, , ] chainVar[j, seq_len(obj$iteration), ] <- obj$chainVar[j, , ] chainMean[j, from:to, ] <- q$chainMean[j, , ] chainVar[j, from:to, ] <- q$chainVar[j, , ] } } } else { chainMean <- chainVar <- NULL } if (!state$log) { loggedEvents <- NULL } if (state$log) { row.names(loggedEvents) <- seq_len(nrow(loggedEvents)) } ## save, and return midsobj <- list( data = obj$data, imp = imp, m = obj$m, where = where, blocks = obj$blocks, call = call, nmis = obj$nmis, method = obj$method, predictorMatrix = obj$predictorMatrix, visitSequence = obj$visitSequence, formulas = obj$formulas, post = obj$post, blots = obj$blots, ignore = obj$ignore, seed = obj$seed, iteration = sumIt, lastSeedValue = get(".Random.seed", envir = globalenv(), mode = "integer", inherits = FALSE ), chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date() ) oldClass(midsobj) <- "mids" if (!is.null(newdata)) { include <- c( rep(FALSE, nrow(midsobj$data) - nrow(newdata)), rep(TRUE, nrow(newdata)) ) midsobj <- filter(midsobj, include) } midsobj } mice/R/selfreport.R0000644000176200001440000000737014330031606013703 0ustar liggesusers#' Self-reported and measured BMI #' #' Dataset containing height and weight data (measured, self-reported) from two #' studies. #' #' This dataset combines two datasets: \code{krul} data (Krul, 2010) (1257 #' persons) and the \code{mgg} data (Van Keulen 2011; Van der Klauw 2011) (803 #' persons). The \code{krul} dataset contains height and weight (both measures #' and self-reported) from 1257 Dutch adults, whereas the \code{mgg} dataset #' contains self-reported height and weight for 803 Dutch adults. Section 7.3 in #' Van Buuren (2012) shows how the missing measured data can be imputed in the #' \code{mgg} data, so corrected prevalence estimates can be calculated. #' #' @name selfreport #' @aliases selfreport mgg #' @docType data #' @format A data frame with 2060 rows and 15 variables: #' \describe{ #' \item{src}{Study, either \code{krul} or \code{mgg} (factor)} #' \item{id}{Person identification number} #' \item{pop}{Population, all \code{NL} (factor)} #' \item{age}{Age of respondent in years} #' \item{sex}{Sex of respondent (factor)} #' \item{hm}{Height measured (cm)} #' \item{wm}{Weight measured (kg)} #' \item{hr}{Height reported (cm)} #' \item{wr}{Weight reported (kg)} #' \item{prg}{Pregnancy (factor), all \code{Not pregnant}} #' \item{edu}{Educational level (factor)} #' \item{etn}{Ethnicity (factor)} #' \item{web}{Obtained through web survey (factor)} #' \item{bm}{BMI measured (kg/m2)} #' \item{br}{BMI reported (kg/m2)} #' } #' @source Krul, A., Daanen, H. A. M., Choi, H. (2010). Self-reported and #' measured weight, height and body mass index (BMI) in Italy, The Netherlands #' and North America. \emph{European Journal of Public Health}, \emph{21}(4), #' 414-419. #' #' Van Keulen, H.M.,, Chorus, A.M.J., Verheijden, M.W. (2011). \emph{Monitor #' Convenant Gezond Gewicht Nulmeting (determinanten van) beweeg- en eetgedrag #' van kinderen (4-11 jaar), jongeren (12-17 jaar) en volwassenen (18+ jaar)}. #' TNO/LS 2011.016. Leiden: TNO. #' #' Van der Klauw, M., Van Keulen, H.M., Verheijden, M.W. (2011). \emph{Monitor #' Convenant Gezond Gewicht Beweeg- en eetgedrag van kinderen (4-11 jaar), #' jongeren (12-17 jaar) en volwassenen (18+ jaar) in 2010 en 2011.} TNO/LS #' 2011.055. Leiden: TNO. (in Dutch) #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-prevalence.html#sec:srcdata}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords datasets #' @examples #' md.pattern(selfreport[, c("age", "sex", "hm", "hr", "wm", "wr")]) #' #' ### FIMD Section 7.3.5 Application #' #' bmi <- function(h, w) { #' return(w / (h / 100)^2) #' } #' init <- mice(selfreport, maxit = 0) #' meth <- init$meth #' meth["bm"] <- "~bmi(hm,wm)" #' pred <- init$pred #' pred[, c("src", "id", "web", "bm", "br")] <- 0 #' imp <- mice(selfreport, pred = pred, meth = meth, seed = 66573, maxit = 2, m = 1) #' ## imp <- mice(selfreport, pred=pred, meth=meth, seed=66573, maxit=20, m=10) #' #' ### Like FIMD Figure 7.6 #' #' cd <- complete(imp, 1) #' xy <- xy.coords(cd$bm, cd$br - cd$bm) #' plot(xy, #' col = mdc(2), xlab = "Measured BMI", ylab = "Reported - Measured BMI", #' xlim = c(17, 45), ylim = c(-5, 5), type = "n", lwd = 0.7 #' ) #' polygon(x = c(30, 20, 30), y = c(0, 10, 10), col = "grey95", border = NA) #' polygon(x = c(30, 40, 30), y = c(0, -10, -10), col = "grey95", border = NA) #' abline(0, 0, lty = 2, lwd = 0.7) #' #' idx <- cd$src == "krul" #' xyc <- xy #' xyc$x <- xy$x[idx] #' xyc$y <- xy$y[idx] #' xys <- xy #' xys$x <- xy$x[!idx] #' xys$y <- xy$y[!idx] #' points(xyc, col = mdc(1), cex = 0.7) #' points(xys, col = mdc(2), cex = 0.7) #' lines(lowess(xyc), col = mdc(4), lwd = 2) #' lines(lowess(xys), col = mdc(5), lwd = 2) #' text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3) #' box(lwd = 1) NULL mice/R/stripplot.R0000644000176200001440000003115714330031647013563 0ustar liggesusers#' Stripplot of observed and imputed data #' #' Plotting methods for imputed data using \pkg{lattice}. #' \code{stripplot} produces one-dimensional #' scatterplots. The function #' automatically separates the observed and imputed data. The #' functions extend the usual features of \pkg{lattice}. #' #' The argument \code{na.groups} may be used to specify (combinations of) #' missingness in any of the variables. The argument \code{groups} can be used #' to specify groups based on the variable values themselves. Only one of both #' may be active at the same time. When both are specified, \code{na.groups} #' takes precedence over \code{groups}. #' #' Use the \code{subset} and \code{na.groups} together to plots parts of the #' data. For example, select the first imputed data set by by #' \code{subset=.imp==1}. #' #' Graphical parameters like \code{col}, \code{pch} and \code{cex} can be #' specified in the arguments list to alter the plotting symbols. If #' \code{length(col)==2}, the color specification to define the observed and #' missing groups. \code{col[1]} is the color of the 'observed' data, #' \code{col[2]} is the color of the missing or imputed data. A convenient color #' choice is \code{col=mdc(1:2)}, a transparent blue color for the observed #' data, and a transparent red color for the imputed data. A good choice is #' \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the #' duration of the session by running \code{mice.theme()}. #' #' @aliases stripplot #' @param x A \code{mids} object, typically created by \code{mice()} or #' \code{mice.mids()}. #' @param data Formula that selects the data to be plotted. This argument #' follows the \pkg{lattice} rules for \emph{formulas}, describing the primary #' variables (used for the per-panel display) and the optional conditioning #' variables (which define the subsets plotted in different panels) to be used #' in the plot. #' #' The formula is evaluated on the complete data set in the \code{long} form. #' Legal variable names for the formula include \code{names(x$data)} plus the #' two administrative factors \code{.imp} and \code{.id}. #' #' \bold{Extended formula interface:} The primary variable terms (both the LHS #' \code{y} and RHS \code{x}) may consist of multiple terms separated by a #' \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be #' taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and #' \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in #' \emph{separate panels}. This behavior differs from standard \pkg{lattice}. #' \emph{Only combine terms of the same type}, i.e. only factors or only #' numerical variables. Mixing numerical and categorical data occasionally #' produces odds labeling of vertical axis. #' #' For convenience, in \code{stripplot()} and \code{bwplot} the formula #' \code{y~.imp} may be abbreviated as \code{y}. This applies only to a single #' \code{y}, and does not (yet) work for \code{y1+y2~.imp}. #' #' @param na.groups An expression evaluating to a logical vector indicating #' which two groups are distinguished (e.g. using different colors) in the #' display. The environment in which this expression is evaluated in the #' response indicator \code{is.na(x$data)}. #' #' The default \code{na.group = NULL} contrasts the observed and missing data #' in the LHS \code{y} variable of the display, i.e. groups created by #' \code{is.na(y)}. The expression \code{y} creates the groups according to #' \code{is.na(y)}. The expression \code{y1 & y2} creates groups by #' \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as #' \code{is.na(y1) | is.na(y2)}, and so on. #' @param groups This is the usual \code{groups} arguments in \pkg{lattice}. It #' differs from \code{na.groups} because it evaluates in the completed data #' \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas #' \code{na.groups} evaluates in the response indicator. See #' \code{\link{xyplot}} for more details. When both \code{na.groups} and #' \code{groups} are specified, \code{na.groups} takes precedence, and #' \code{groups} is ignored. #' @param theme A named list containing the graphical parameters. The default #' function \code{mice.theme} produces a short list of default colors, line #' width, and so on. The extensive list may be obtained from #' \code{trellis.par.get()}. Global graphical parameters like \code{col} or #' \code{cex} in high-level calls are still honored, so first experiment with #' the global parameters. Many setting consists of a pair. For example, #' \code{mice.theme} defines two symbol colors. The first is for the observed #' data, the second for the imputed data. The theme settings only exist during #' the call, and do not affect the trellis graphical parameters. #' @param jitter.data See \code{\link[lattice:panel.xyplot]{panel.xyplot}}. #' @param horizontal See \code{\link[lattice:xyplot]{xyplot}}. #' @param as.table See \code{\link[lattice:xyplot]{xyplot}}. #' @param panel See \code{\link{xyplot}}. #' @param default.prepanel See \code{\link[lattice:xyplot]{xyplot}}. #' @param outer See \code{\link[lattice:xyplot]{xyplot}}. #' @param allow.multiple See \code{\link[lattice:xyplot]{xyplot}}. #' @param drop.unused.levels See \code{\link[lattice:xyplot]{xyplot}}. #' @param subscripts See \code{\link[lattice:xyplot]{xyplot}}. #' @param subset See \code{\link[lattice:xyplot]{xyplot}}. #' @param \dots Further arguments, usually not directly processed by the #' high-level functions documented here, but instead passed on to other #' functions. #' @return The high-level functions documented here, as well as other high-level #' Lattice functions, return an object of class \code{"trellis"}. The #' \code{\link[lattice:update.trellis]{update}} method can be used to #' subsequently update components of the object, and the #' \code{\link[lattice:print.trellis]{print}} method (usually called by default) #' will plot it on an appropriate plotting device. #' @note The first two arguments (\code{x} and \code{data}) are reversed #' compared to the standard Trellis syntax implemented in \pkg{lattice}. This #' reversal was necessary in order to benefit from automatic method dispatch. #' #' In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas #' in \pkg{lattice} the argument \code{x} is always a formula. #' #' In \pkg{mice} the argument \code{data} is always a formula object, whereas in #' \pkg{lattice} the argument \code{data} is usually a data frame. #' #' All other arguments have identical interpretation. #' #' @author Stef van Buuren #' @seealso \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{densityplot}}, #' \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the #' package, as well as \code{\link[lattice:xyplot]{stripplot}}, #' \code{\link[lattice:panel.stripplot]{panel.stripplot}}, #' \code{\link[lattice:print.trellis]{print.trellis}}, #' \code{\link[lattice:trellis.par.get]{trellis.par.set}} #' @references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #' Visualization with R}, Springer. #' #' van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @keywords hplot #' @examples #' imp <- mice(boys, maxit = 1) #' #' ### stripplot, all numerical variables #' \dontrun{ #' stripplot(imp) #' } #' #' ### same, but with improved display #' \dontrun{ #' stripplot(imp, col = c("grey", mdc(2)), pch = c(1, 20)) #' } #' #' ### distribution per imputation of height, weight and bmi #' ### labeled by their own missingness #' \dontrun{ #' stripplot(imp, hgt + wgt + bmi ~ .imp, #' cex = c(2, 4), pch = c(1, 20), jitter = FALSE, #' layout = c(3, 1) #' ) #' } #' #' ### same, but labeled with the missingness of wgt (just four cases) #' \dontrun{ #' stripplot(imp, hgt + wgt + bmi ~ .imp, #' na = wgt, cex = c(2, 4), pch = c(1, 20), jitter = FALSE, #' layout = c(3, 1) #' ) #' } #' #' ### distribution of age and height, labeled by missingness in height #' ### most height values are missing for those around #' ### the age of two years #' ### some additional missings occur in region WEST #' \dontrun{ #' stripplot(imp, age + hgt ~ .imp | reg, hgt, #' col = c(grDevices::hcl(0, 0, 40, 0.2), mdc(2)), pch = c(1, 20) #' ) #' } #' #' ### heavily jitted relation between two categorical variables #' ### labeled by missingness of gen #' ### aggregated over all imputed data sets #' \dontrun{ #' stripplot(imp, gen ~ phb, factor = 2, cex = c(8, 1), hor = TRUE) #' } #' #' ### circle fun #' stripplot(imp, gen ~ .imp, #' na = wgt, factor = 2, cex = c(8.6), #' hor = FALSE, outer = TRUE, scales = "free", pch = c(1, 19) #' ) #' @export stripplot.mids <- function(x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), panel = lattice::lattice.getOption("panel.stripplot"), default.prepanel = lattice::lattice.getOption("prepanel.default.stripplot"), jitter.data = TRUE, horizontal = FALSE, ..., subscripts = TRUE, subset = TRUE) { call <- match.call() if (!is.mids(x)) stop("Argument 'x' must be a 'mids' object") ## unpack data and response indicator cd <- data.frame(complete(x, "long", include = TRUE)) r <- as.data.frame(is.na(x$data)) ## evaluate na.group in response indicator nagp <- eval(expr = substitute(na.groups), envir = r, enclos = parent.frame()) if (is.expression(nagp)) nagp <- eval(expr = nagp, envir = r, enclos = parent.frame()) ## evaluate groups in imputed data ngp <- eval(expr = substitute(groups), envir = cd, enclos = parent.frame()) if (is.expression(ngp)) ngp <- eval(expr = ngp, envir = cd, enclos = parent.frame()) groups <- ngp ## evaluate subset in imputed data ss <- eval(expr = substitute(subset), envir = cd, enclos = parent.frame()) if (is.expression(ss)) ss <- eval(expr = ss, envir = cd, enclos = parent.frame()) subset <- ss ## evaluate further arguments before parsing dots <- list(...) args <- list( panel = panel, default.prepanel = default.prepanel, allow.multiple = allow.multiple, outer = outer, drop.unused.levels = drop.unused.levels, subscripts = subscripts, as.table = as.table, jitter.data = jitter.data, horizontal = horizontal ) ## create formula if not given (in call$data !) vnames <- names(cd)[-seq_len(2)] allfactors <- unlist(lapply(cd, is.factor))[-seq_len(2)] if (missing(data)) { vnames <- vnames[!allfactors] formula <- as.formula(paste0(paste0(vnames, collapse = "+"), "~ as.factor(.imp)")) } else { ## pad abbreviated formula abbrev <- !any(grepl("~", call$data)) if (abbrev) { if (length(call$data) > 1) { stop("Cannot pad extended formula.") } else { formula <- as.formula(paste(call$data, "~ as.factor(.imp)", sep = "")) } } else { formula <- data } } ## determine the y-variables form <- lattice::latticeParseFormula( model = formula, data = cd, subset = subset, groups = groups, multiple = allow.multiple, outer = outer, subscripts = TRUE, drop = drop.unused.levels ) ynames <- unlist(lapply(strsplit(form$left.name, " \\+ "), rm.whitespace)) xnames <- unlist(lapply(strsplit(form$right.name, " \\+ "), rm.whitespace)) ## calculate selection vector gp nona <- is.null(call$na.groups) if (!is.null(call$groups) && nona) { gp <- call$groups } else { if (nona) { na.df <- r[, ynames, drop = FALSE] gp <- unlist(lapply(na.df, rep, x$m + 1)) } else { gp <- rep(nagp, length(ynames) * (x$m + 1)) } } ## change axis defaults of extended formula interface if (is.null(call$xlab) && !is.na(match(".imp", xnames))) { dots$xlab <- "" if (length(xnames) == 1) dots$xlab <- "Imputation number" } if (is.null(call$ylab)) { args$ylab <- "" if (length(ynames) == 1) args$ylab <- ynames } if (is.null(call$scales)) { args$scales <- list() if (length(ynames) > 1) { args$scales <- list(x = list(relation = "free"), y = list(relation = "free")) } } ## ready args <- c( x = formula, data = list(cd), groups = list(gp), args, dots, subset = call$subset ) ## go tp <- do.call("stripplot", args) update(tp, par.settings = theme) } mice/R/ampute.continuous.R0000644000176200001440000001531014334522175015221 0ustar liggesusers#' Multivariate amputation based on continuous probability functions #' #' This function creates a missing data indicator for each pattern. The continuous #' probability distributions (Van Buuren, 2012, pp. 63, 64) will be induced on the #' weighted sum scores, calculated earlier in the multivariate amputation function #' \code{\link{ampute}}. #' #' @param P A vector containing the pattern numbers of the cases's candidacies. #' For each case, a value between 1 and #patterns is given. For example, a #' case with value 2 is candidate for missing data pattern 2. #' @param scores A list containing vectors with the candidates's weighted sum scores, #' the result of an underlying function in \code{\link{ampute}}. #' @param prop A scalar specifying the proportion of missingness. Should be a value #' between 0 and 1. Default is a missingness proportion of 0.5. #' @param type A vector of strings containing the type of missingness for each #' pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or '\code{"RIGHT"}. #' If a single missingness type is entered, all patterns will be created by the same #' type. If missingness types should differ over patterns, a vector of missingness #' types should be entered. Default is RIGHT for all patterns and is the result of #' \code{\link{ampute.default.type}}. #' @return A list containing vectors with \code{0} if a case should be made missing #' and \code{1} if a case should remain complete. The first vector refers to the #' first pattern, the second vector to the second pattern, etcetera. #' @author Rianne Schouten [aut, cre], Gerko Vink [aut], Peter Lugtig [ctb], 2016 #' @seealso \code{\link{ampute}}, \code{\link{ampute.default.type}} #' @references #' #'Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-linearnormal.html#sec:generateuni}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords internal #' @export ampute.continuous <- function(P, scores, prop, type) { # For a test data set, the shift of the logit function is calculated # in order to obtain the right proportion of missingness (area beneath the curve) # The set-up for this is created in subsequent lines, it is executed within # the for loop over i. testset <- scale(rnorm(n = 10000, mean = 0, sd = 1)) logit <- function(x) exp(x) / (1 + exp(x)) # An empty list is created, type argument is given the right length R <- vector(mode = "list", length = length(scores)) if (length(type) == 1) { type <- rep.int(type, length(scores)) } for (i in seq_along(scores)) { # The desired function is chosen formula <- switch(type[i], LEFT = function(x, b) logit(mean(x) - x + b), MID = function(x, b) logit(-abs(x - mean(x)) + 0.75 + b), TAIL = function(x, b) logit(abs(x - mean(x)) - 0.75 + b), function(x, b) logit(-mean(x) + x + b) ) shift <- bin.search( fun = function(shift) { sum(formula(x = testset, b = shift)) / length(testset) }, target = prop )$where if (length(shift) > 1) { shift <- shift[1] } scores.temp <- scores[[i]] # empty candidate group if (length(scores.temp) == 1 && scores.temp == 0) { R[[i]] <- 0 } else { if (length(scores.temp) == 1) { warning(paste("There is only 1 candidate for pattern", i, ",it will be amputed with probability", prop), call. = FALSE) probs <- prop } else if (length(unique(scores.temp)) == 1) { warning(paste("The weighted sum scores of all candidates in pattern", i, "are the same, they will be amputed with probability", prop), call. = FALSE) probs <- prop } else { probs <- formula(x = scores.temp, b = shift) } # Based on the probabilities, each candidate will receive a missing data # indicator 0, meaning it will be made missing or missing data indicator 1, # meaning the candidate will remain complete. R.temp <- 1 - rbinom(n = length(scores.temp), size = 1, prob = probs) R[[i]] <- replace(P, P == (i + 1), R.temp) R[[i]] <- replace(R[[i]], P != (i + 1), 1) } } R } # This is a custom adaptation of function binsearch from package gtools # (version 3.5.0) that returns the adjustment of the probability curves used # in the function ampute.continuous in ampute. bin.search <- function(fun, range = c(-8, 8), ..., target = 0, lower = ceiling(min(range)), upper = floor(max(range)), maxiter = 100, showiter = FALSE) { lo <- lower hi <- upper counter <- 0 val.lo <- round(fun(lo, ...), 3) val.hi <- round(fun(hi, ...), 3) sign <- if (val.lo > val.hi) -1 else 1 if (target * sign < val.lo * sign) { outside.range <- TRUE } else if (target * sign > val.hi * sign) { outside.range <- TRUE } else { outside.range <- FALSE } while (counter < maxiter && !outside.range) { counter <- counter + 1 if (hi - lo <= (1 / (10^3)) || lo < lower || hi > upper) { break } center <- round((hi - lo) / 2 + lo, 3) val <- round(fun(center, ...), 3) if (showiter) { cat("--------------\n") cat("Iteration #", counter, "\n") cat("lo=", lo, "\n") cat("hi=", hi, "\n") cat("center=", center, "\n") cat("fun(lo)=", val.lo, "\n") cat("fun(hi)=", val.hi, "\n") cat("fun(center)=", val, "\n") } if (val == target) { val.lo <- val.hi <- val lo <- hi <- center break } else if (sign * val < sign * target) { lo <- center val.lo <- val } else { hi <- center val.hi <- val } if (showiter) { cat("new lo=", lo, "\n") cat("new hi=", hi, "\n") cat("--------------\n") } } retval <- list(call = match.call(), numiter = counter) if (outside.range) { if (target * sign < val.lo * sign) { warning("The desired proportion of ", target, " is too small; ", val.lo, " is used instead.") retval$flag <- "Lower Boundary" retval$where <- lo retval$value <- val.lo } else { warning("The desired proportion of ", target, " is too large; ", val.hi, " is used instead.") retval$flag <- "Upper Boundary" retval$where <- hi retval$value <- val.hi } } else if (counter >= maxiter) { retval$flag <- "Maximum number of iterations reached" retval$where <- (lo + hi) / 2 retval$value <- (val.lo + val.hi) / 2 } else if (val.lo == target) { retval$flag <- "Found" retval$where <- lo retval$value <- val.lo } else if (val.hi == target) { retval$flag <- "Found" retval$where <- hi retval$value <- val.hi } else { retval$flag <- "Between Elements" retval$where <- (lo + hi) / 2 retval$value <- (val.lo + val.hi) / 2 } retval } mice/R/blots.R0000644000176200001440000000247114330031606012636 0ustar liggesusers#' Creates a \code{blots} argument #' #' This helper function creates a valid \code{blots} object. The #' \code{blots} object is an argument to the \code{mice} function. #' The name \code{blots} is a contraction of blocks-dots. #' Through \code{blots}, the user can specify any additional #' arguments that are specifically passed down to the lowest level #' imputation function. #' @param data A \code{data.frame} with the source data #' @param blocks An optional specification for blocks of variables in #' the rows. The default assigns each variable in its own block. #' @return A matrix #' @seealso \code{\link{make.blocks}} #' @examples #' make.predictorMatrix(nhanes) #' make.blots(nhanes, blocks = name.blocks(c("age", "hyp"), "xxx")) #' @export make.blots <- function(data, blocks = make.blocks(data)) { data <- check.dataform(data) blots <- vector("list", length(blocks)) for (i in seq_along(blots)) blots[[i]] <- alist() names(blots) <- names(blocks) blots } check.blots <- function(blots, data, blocks = NULL) { data <- check.dataform(data) if (is.null(blots)) { return(make.blots(data, blocks)) } blots <- as.list(blots) for (i in seq_along(blots)) blots[[i]] <- as.list(blots[[i]]) if (length(blots) == length(blocks) && is.null(names(blots))) { names(blots) <- names(blocks) } blots } mice/R/blocks.R0000644000176200001440000002010514347606422012776 0ustar liggesusers#' Creates a \code{blocks} argument #' #' This helper function generates a list of the type needed for #' \code{blocks} argument in the \code{[=mice]{mice}} function. #' @param data A \code{data.frame}, character vector with #' variable names, or \code{list} with variable names. #' @param partition A character vector of length 1 used to assign #' variables to blocks when \code{data} is a \code{data.frame}. Value #' \code{"scatter"} (default) will assign each column to it own #' block. Value \code{"collect"} assigns all variables to one block, #' whereas \code{"void"} produces an empty list. #' @param calltype A character vector of \code{length(block)} elements #' that indicates how the imputation model is specified. If #' \code{calltype = "type"} (the default), the underlying imputation #' model is called by means of the \code{type} argument. The #' \code{type} argument for block \code{h} is equivalent to #' row \code{h} in the \code{predictorMatrix}. #' The alternative is \code{calltype = "formula"}. This will pass #' \code{formulas[[h]]} to the underlying imputation #' function for block \code{h}, together with the current data. #' The \code{calltype} of a block is set automatically during #' initialization. Where a choice is possible, calltype #' \code{"formula"} is preferred over \code{"type"} since this is #' more flexible and extendable. However, what precisely happens #' depends also on the capabilities of the imputation #' function that is called. #' @return A named list of character vectors with variables names. #' @details Choices \code{"scatter"} and \code{"collect"} represent to two #' extreme scenarios for assigning variables to imputation blocks. #' Use \code{"scatter"} to create an imputation model based on #' \emph{fully conditionally specification} (FCS). Use \code{"collect"} to #' gather all variables to be imputed by a \emph{joint model} (JM). #' Scenario's in-between these two extremes represent #' \emph{hybrid} imputation models that combine FCS and JM. #' #' Any variable not listed in will not be imputed. #' Specification \code{"void"} represents the extreme scenario that #' skips imputation of all variables. #' #' A variable may be a member of multiple blocks. The variable will be #' re-imputed in each block, so the final imputations for variable #' will come from the last block that was executed. This scenario #' may be useful where the same complete background factors appear in #' multiple imputation blocks. #' #' A variable may appear multiple times within a given block. If a univariate #' imputation model is applied to such a block, then the variable is #' re-imputed each time as it appears in the block. #' @examples #' make.blocks(nhanes) #' make.blocks(c("age", "sex", "edu")) #' @export make.blocks <- function(data, partition = c("scatter", "collect", "void"), calltype = "type") { if (is.vector(data) && !is.list(data)) { v <- as.list(as.character(data)) names(v) <- as.character(data) ct <- rep(calltype, length(v)) names(ct) <- names(v) attr(v, "calltype") <- ct return(v) } if (is.list(data) && !is.data.frame(data)) { v <- name.blocks(data) if (length(calltype) == 1L) { ct <- rep(calltype, length(v)) names(ct) <- names(v) attr(v, "calltype") <- ct } else { ct <- calltype names(ct) <- names(v) attr(v, "calltype") <- ct } return(v) } data <- as.data.frame(data) partition <- match.arg(partition) switch(partition, scatter = { v <- as.list(names(data)) names(v) <- names(data) }, collect = { v <- list(names(data)) names(v) <- "collect" }, void = { v <- list() }, { v <- as.list(names(data)) names(v) <- names(data) } ) if (length(calltype) == 1L) { ct <- rep(calltype, length(v)) names(ct) <- names(v) attr(v, "calltype") <- ct } else { ct <- calltype names(ct) <- names(v) attr(v, "calltype") <- ct } v } #' Name imputation blocks #' #' This helper function names any unnamed elements in the \code{blocks} #' specification. This is a convenience function. #' @inheritParams mice #' @param prefix A character vector of length 1 with the prefix to #' be using for naming any unnamed blocks with two or more variables. #' @return A named list of character vectors with variables names. #' @seealso \code{\link{mice}} #' @details #' This function will name any unnamed list elements specified in #' the optional argument \code{blocks}. Unnamed blocks #' consisting of just one variable will be named after this variable. #' Unnamed blocks containing more than one variables will be named #' by the \code{prefix} argument, padded by an integer sequence #' stating at 1. #' @examples #' blocks <- list(c("hyp", "chl"), AGE = "age", c("bmi", "hyp"), "edu") #' name.blocks(blocks) #' @export name.blocks <- function(blocks, prefix = "B") { if (!is.list(blocks)) { return(make.blocks(blocks)) } if (is.null(names(blocks))) names(blocks) <- rep("", length(blocks)) inc <- 1 for (i in seq_along(blocks)) { if (names(blocks)[i] != "") next if (length(blocks[[i]]) == 1) { names(blocks)[i] <- blocks[[i]][1] } else { names(blocks)[i] <- paste0(prefix, inc) inc <- inc + 1 } } blocks } check.blocks <- function(blocks, data, calltype = "type") { data <- check.dataform(data) blocks <- name.blocks(blocks) # check that all variable names exists in data bv <- unique(unlist(blocks)) notFound <- !bv %in% colnames(data) if (any(notFound)) { stop(paste( "The following names were not found in `data`:", paste(bv[notFound], collapse = ", ") )) } if (length(calltype) == 1L) { ct <- rep(calltype, length(blocks)) names(ct) <- names(blocks) attr(blocks, "calltype") <- ct } else { ct <- calltype names(ct) <- names(blocks) attr(blocks, "calltype") <- ct } blocks } #' Construct blocks from \code{formulas} and \code{predictorMatrix} #' #' This helper function attempts to find blocks of variables in the #' specification of the \code{formulas} and/or \code{predictorMatrix} #' objects. Blocks specified by \code{formulas} may consist of #' multiple variables. Blocks specified by \code{predictorMatrix} are #' assumed to consist of single variables. Any duplicates in names are #' removed, and the formula specification is preferred. #' \code{predictorMatrix} and \code{formulas}. When both arguments #' specify models for the same block, the model for the #' \code{predictMatrix} is removed, and priority is given to the #' specification given in \code{formulas}. #' @inheritParams mice #' @return A \code{blocks} object. #' @seealso \code{\link{make.blocks}}, \code{\link{name.blocks}} #' @examples #' form <- name.formulas(list(bmi + hyp ~ chl + age, chl ~ bmi)) #' pred <- make.predictorMatrix(nhanes[, c("age", "chl")]) #' construct.blocks(formulas = form, pred = pred) #' @export construct.blocks <- function(formulas = NULL, predictorMatrix = NULL) { blocks.f <- blocks.p <- NULL if (!is.null(formulas)) { if (!all(sapply(formulas, is.formula))) { return(NULL) } blocks.f <- name.blocks(lapply(name.formulas(formulas), lhs)) ct <- rep("formula", length(blocks.f)) names(ct) <- names(blocks.f) attr(blocks.f, "calltype") <- ct if (is.null(predictorMatrix)) { return(blocks.f) } } if (!is.null(predictorMatrix)) { if (is.null(row.names(predictorMatrix))) { stop("No row names in predictorMatrix", call. = FALSE) } blocks.p <- name.blocks(row.names(predictorMatrix)) ct <- rep("type", length(blocks.p)) names(ct) <- names(blocks.p) attr(blocks.p, "calltype") <- ct if (is.null(formulas)) { return(blocks.p) } } # combine into unique blocks blocknames <- unique(c(names(blocks.f), names(blocks.p))) vars.f <- unlist(lapply(formulas, lhs)) keep <- setdiff(blocknames, vars.f) add.p <- blocks.p[names(blocks.p) %in% keep] blocks <- c(blocks.f, add.p) ct <- c( rep("formula", length(formulas)), rep("type", length(add.p)) ) names(ct) <- names(blocks) attr(blocks, "calltype") <- ct blocks } mice/R/rm.whitespace.R0000644000176200001440000000041714330031606014262 0ustar liggesusers## Authors: Gerko Vink, Stef van Buuren rm.whitespace <- function(string, side = "both") { side <- match.arg(side, c("left", "right", "both")) pattern <- switch(side, left = "^\\s+", right = "\\s+$", both = "^\\s+|\\s+$" ) sub(pattern, "", string) } mice/R/pops.R0000644000176200001440000000467314330031606012502 0ustar liggesusers#' Project on preterm and small for gestational age infants (POPS) #' #' Subset of data from the POPS study, a national, prospective study on preterm #' children, including all liveborn infants <32 weeks gestational age and/or <1500 #' g from 1983 (n = 1338). #' #' The data set concerns of subset of 959 children that survived up to the age #' of 19 years. #' #' Hille et al (2005) divided the 959 survivors into three groups: Full #' responders (examined at an outpatient clinic and completed the #' questionnaires, n = 596), postal responders (only completed the mailed #' questionnaires, n = 109), non-responders (did not respond to any of the #' mailed requests or telephone calls, or could not be traced, n = 254). #' #' Compared to the postal and non-responders, the full response group consists #' of more girls, contains more Dutch children, has higher educational and #' social economic levels and has fewer handicaps. The responders form a highly #' selective subgroup in the total cohort. #' #' Multiple imputation of this data set has been described in Hille et al (2007) #' and Van Buuren (2012), chapter 8. #' @note This dataset is not part of \code{mice}. #' @name pops #' @aliases pops pops.pred #' @docType data #' @format \code{pops} is a data frame with 959 rows and 86 columns. #' \code{pops.pred} is the 86 by 86 binary predictor matrix used for specifying #' the multiple imputation model. #' @source #' Hille, E. T. M., Elbertse, L., Bennebroek Gravenhorst, J., Brand, R., #' Verloove-Vanhorick, S. P. (2005). Nonresponse bias in a follow-up study of #' 19-year-old adolescents born as preterm infants. Pediatrics, 116(5):662666. #' #' Hille, E. T. M., Weisglas-Kuperus, N., Van Goudoever, J. B., Jacobusse, G. #' W., Ens-Dokkum, M. H., De Groot, L., Wit, J. M., Geven, W. B., Kok, J. H., De #' Kleine, M. J. K., Kollee, L. A. A., Mulder, A. L. M., Van Straaten, H. L. M., #' De Vries, L. S., Van Weissenbruch, M. M., Verloove-Vanhorick, S. P. (2007). #' Functional outcomes and participation in young adulthood for very preterm and #' very low birth weight infants: The Dutch project on preterm and small for #' gestational age infants at 19 years of age. Pediatrics, 120(3):587595. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-selective.html#pops-study-19-years-follow-up}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords datasets #' @examples #' pops <- data(pops) NULL mice/R/df.residual.R0000644000176200001440000000053314330031606013710 0ustar liggesusersdf.residual.mira <- function(object, ...) { fit <- object$analyses[[1]] df.residual(fit) } df.residual.lme <- function(object, ...) { object$fixDF[["X"]][1] } df.residual.mer <- function(object, ...) { sum(object@dims[2:4] * c(1, -1, -1)) + 1 } df.residual.multinom <- function(object, ...) { nrow(object$residuals) - object$edf } mice/R/mids.R0000644000176200001440000001167614334445701012467 0ustar liggesusers#' Multiply imputed data set (\code{mids}) #' #' The \code{mids} object contains a multiply imputed data set. The \code{mids} object is #' generated by functions \code{mice()}, \code{mice.mids()}, \code{cbind.mids()}, #' \code{rbind.mids()} and \code{ibind.mids()}. #' #' The \code{mids} #' class of objects has methods for the following generic functions: #' \code{print}, \code{summary}, \code{plot}. #' #' @section Slots: #' \describe{ #' \item{\code{.Data}:}{Object of class \code{"list"} containing the #' following slots:} #' \item{\code{data}:}{Original (incomplete) data set.} #' \item{\code{imp}:}{A list of \code{ncol(data)} components with #' the generated multiple imputations. Each list components is a #' \code{data.frame} (\code{nmis[j]} by \code{m}) of imputed values #' for variable \code{j}.} #' \item{\code{m}:}{Number of imputations.} #' \item{\code{where}:}{The \code{where} argument of the #' \code{mice()} function.} #' \item{\code{blocks}:}{The \code{blocks} argument of the #' \code{mice()} function.} #' \item{\code{call}:}{Call that created the object.} #' \item{\code{nmis}:}{An array containing the number of missing #' observations per column.} #' \item{\code{method}:}{A vector of strings of \code{length(blocks} #' specifying the imputation method per block.} #' \item{\code{predictorMatrix}:}{A numerical matrix of containing #' integers specifying the predictor set.} #' \item{\code{visitSequence}:}{The sequence in which columns are visited.} #' \item{\code{formulas}:}{A named list of formula's, or expressions that #' can be converted into formula's by \code{as.formula}. List elements #' correspond to blocks. The block to which the list element applies is #' identified by its name, so list names must correspond to block names.} #' \item{\code{post}:}{A vector of strings of length \code{length(blocks)} #' with commands for post-processing.} #' \item{\code{blots}:}{"Block dots". The \code{blots} argument to the \code{mice()} #' function.} #' \item{\code{ignore}:}{A logical vector of length \code{nrow(data)} indicating #' the rows in \code{data} used to build the imputation model. (new in \code{mice 3.12.0})} #' \item{\code{seed}:}{The seed value of the solution.} #' \item{\code{iteration}:}{Last Gibbs sampling iteration number.} #' \item{\code{lastSeedValue}:}{The most recent seed value.} #' \item{\code{chainMean}:}{A list of \code{m} components. Each #' component is a \code{length(visitSequence)} by \code{maxit} matrix #' containing the mean of the generated multiple imputations. #' The array can be used for monitoring convergence. #' Note that observed data are not present in this mean.} #' \item{\code{chainVar}:}{A list with similar structure of \code{chainMean}, #' containing the covariances of the imputed values.} #' \item{\code{loggedEvents}:}{A \code{data.frame} with five columns #' containing warnings, corrective actions, and other inside info.} #' \item{\code{version}:}{Version number of \code{mice} package that #' created the object.} #' \item{\code{date}:}{Date at which the object was created.} #' } #' #' @details #' The \code{loggedEvents} entry is a matrix with five columns containing a #' record of automatic removal actions. It is \code{NULL} is no action was #' made. At initialization the program does the following three actions: #' \describe{ #' \item{1}{A variable that contains missing values, that is not imputed #' and that is used as a predictor is removed} #' \item{2}{A constant variable is removed} #' \item{3}{A collinear variable is removed.} #' } #' During iteration, the program does the following #' actions: #' \describe{ #' \item{1}{One or more variables that are linearly dependent are removed #' (for categorical data, a 'variable' corresponds to a dummy variable)} #' \item{2}{Proportional odds regression imputation that does not converge #' and is replaced by \code{polyreg}.} #' } #' #' Explanation of elements in \code{loggedEvents}: #' \describe{ #' \item{\code{it}}{iteration number at which the record was added,} #' \item{\code{im}}{imputation number,} #' \item{\code{dep}}{name of the dependent variable,} #' \item{\code{meth}}{imputation method used,} #' \item{\code{out}}{a (possibly long) character vector with the #' names of the altered or removed predictors.} #' } #' #' @note The \code{mice} package does not use #' the S4 class definitions, and instead relies on the S3 list #' equivalent \code{oldClass(obj) <- "mids"}. #' #' @name mids-class #' @rdname mids-class #' @aliases mids-class mids #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #' @seealso \code{\link{mice}}, \code{\link[=mira-class]{mira}}, #' \code{\link{mipo}} #' @references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords classes NULL mice/R/mice.impute.jomoImpute.R0000644000176200001440000000662514436133166016101 0ustar liggesusers#' Multivariate multilevel imputation using \code{jomo} #' #' This function is a wrapper around the \code{jomoImpute} function #' from the \code{mitml} package so that it can be called to #' impute blocks of variables in \code{mice}. The \code{mitml::jomoImpute} #' function provides an interface to the \code{jomo} package for #' multiple imputation of multilevel data #' \url{https://CRAN.R-project.org/package=jomo}. #' Imputations can be generated using \code{type} or \code{formula}, #' which offer different options for model specification. #' #' @name mice.impute.jomoImpute #' @inheritParams mitml::jomoImpute #' @param data A data frame containing incomplete and auxiliary variables, #' the cluster indicator variable, and any other variables that should be #' present in the imputed datasets. #' @param type An integer vector specifying the role of each variable #' in the imputation model (see \code{\link[mitml]{jomoImpute}}) #' @param formula A formula specifying the role of each variable #' in the imputation model. The basic model is constructed #' by \code{model.matrix}, thus allowing to include derived variables #' in the imputation model using \code{I()}. See #' \code{\link[mitml]{jomoImpute}}. #' @param format A character vector specifying the type of object that should #' be returned. The default is \code{format = "list"}. No other formats are #' currently supported. #' @param ... Other named arguments: \code{n.burn}, \code{n.iter}, #' \code{group}, \code{prior}, \code{silent} and others. #' @return A list of imputations for all incomplete variables in the model, #' that can be stored in the the \code{imp} component of the \code{mids} #' object. #' @seealso \code{\link[mitml]{jomoImpute}} #' @note The number of imputations \code{m} is set to 1, and the function #' is called \code{m} times so that it fits within the \code{mice} #' iteration scheme. #' #' This is a multivariate imputation function using a joint model. #' @author Stef van Buuren, 2018, building on work of Simon Grund, #' Alexander Robitzsch and Oliver Luedtke (authors of \code{mitml} package) #' and Quartagno and Carpenter (authors of \code{jomo} package). #' @references #' Grund S, Luedtke O, Robitzsch A (2016). Multiple #' Imputation of Multilevel Missing Data: An Introduction to the R #' Package \code{pan}. SAGE Open. #' #' Quartagno M and Carpenter JR (2015). #' Multiple imputation for IPD meta-analysis: allowing for heterogeneity #' and studies with missing covariates. Statistics in Medicine, #' 35:2938-2954, 2015. #' #' @family multivariate-2l #' @keywords datagen #' @examples #' \dontrun{ #' # Note: Requires mitml 0.3-5.7 #' blocks <- list(c("bmi", "chl", "hyp"), "age") #' method <- c("jomoImpute", "pmm") #' ini <- mice(nhanes, blocks = blocks, method = method, maxit = 0) #' pred <- ini$pred #' pred["B1", "hyp"] <- -2 #' imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1) #' } #' @export mice.impute.jomoImpute <- function(data, formula, type, m = 1, silent = TRUE, format = "imputes", ...) { install.on.demand("mitml", ...) nat <- mitml::jomoImpute( data = data, formula = formula, type = type, m = m, silent = silent, ... ) if (format == "native") { return(nat) } cmp <- mitml::mitmlComplete(nat, print = 1)[, names(data)] if (format == "complete") { return(cmp) } if (format == "imputes") { return(single2imputes(cmp, is.na(data))) } NULL } mice/R/fdgs.R0000644000176200001440000000367014335404116012445 0ustar liggesusers#' Fifth Dutch growth study 2009 #' #' Age, height, weight and region of 10030 children measured within the Fifth #' Dutch Growth Study 2009 #' #' The data set contains data from children of Dutch descent (biological parents #' are born in the Netherlands). Children with growth-related diseases were #' excluded. The data were used to construct new growth charts of children of #' Dutch descent (Schonbeck 2013), and to calculate overweight and obesity #' prevalence (Schonbeck 2011). #' #' Some groups were underrepresented. Multiple imputation was used to create #' synthetic cases that were used to correct for the nonresponse. See Van Buuren #' (2012), chapter 8 for details. #' #' @name fdgs #' @aliases fdgs #' @docType data #' @format \code{fdgs} is a data frame with 10030 rows and 8 columns: #' \describe{ #' \item{id}{Person number} #' \item{reg}{Region (factor, 5 levels)} #' \item{age}{Age (years)} #' \item{sex}{Sex (boy, girl)} #' \item{hgt}{Height (cm)} #' \item{wgt}{Weight (kg)} #' \item{hgt.z}{Height Z-score} #' \item{wgt.z}{Weight Z-score} #' } #' @source Schonbeck, Y., Talma, H., van Dommelen, P., Bakker, B., Buitendijk, #' S. E., Hirasing, R. A., van Buuren, S. (2011). Increase in prevalence of #' overweight in Dutch children and adolescents: A comparison of nationwide #' growth studies in 1980, 1997 and 2009. \emph{PLoS ONE}, \emph{6}(11), #' e27608. #' #' Schonbeck, Y., Talma, H., van Dommelen, P., Bakker, B., Buitendijk, S. E., #' Hirasing, R. A., van Buuren, S. (2013). The world's tallest nation has #' stopped growing taller: the height of Dutch children from 1955 to 2009. #' \emph{Pediatric Research}, \emph{73}(3), 371-377. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-nonresponse.html#fifth-dutch-growth-study}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Boca Raton, FL.: Chapman & Hall/CRC Press. #' @keywords datasets #' @examples #' #' #' data <- data(fdgs) #' summary(data) NULL mice/R/visitSequence.R0000644000176200001440000000336114334522175014353 0ustar liggesusers#' Creates a \code{visitSequence} argument #' #' This helper function creates a valid \code{visitSequence}. The #' \code{visitSequence} is an argument to the \code{mice} function that #' specifies the sequence in which blocks are imputed. #' @inheritParams mice #' @return Vector containing block names #' @seealso \code{\link{mice}} #' @examples #' make.visitSequence(nhanes) #' @export make.visitSequence <- function(data = NULL, blocks = NULL) { if (!is.null(blocks)) { blocks <- name.blocks(blocks) return(names(blocks)) } data <- check.dataform(data) blocks <- make.blocks(data) names(blocks) } check.visitSequence <- function(visitSequence = NULL, data, where = NULL, blocks) { if (is.null(names(blocks)) || any(is.na(names(blocks)))) { stop("Missing names in `blocks`.") } if (is.null(visitSequence)) { return(make.visitSequence(data, blocks)) } if (is.null(where)) where <- is.na(data) nimp <- nimp(where, blocks) if (length(nimp) == 0) visitSequence <- nimp if (length(visitSequence) == 1 && is.character(visitSequence)) { code <- match.arg(visitSequence, choices = c("roman", "arabic", "monotone", "revmonotone") ) visitSequence <- switch(code, roman = names(blocks)[nimp > 0], arabic = rev(names(blocks)[nimp > 0]), monotone = names(blocks)[order(nimp)], revmonotone = rev(names(blocks)[order(nimp)]) ) } # legacy handling if (is.numeric(visitSequence)) { visitSequence <- colnames(data)[visitSequence] } # check against names(blocks) visitSequence <- visitSequence[is.element(visitSequence, names(blocks))] # remove any blocks without missing data visitSequence <- names((nimp > 0L)[visitSequence]) visitSequence } mice/R/parse.ums.R0000644000176200001440000000252414334522175013441 0ustar liggesusersparse.ums <- function(x, ums = NULL, umx = NULL, ...) { if (is.null(ums)) stop("Unidentifiable model specification (ums) not found.") if (!is.null(umx)) x <- base::cbind(x, umx) ## Unidentifiable part # e.g. specified in blots as list(X = list(ums = "-3+2*bmi")) mnar0 <- gsub("-", "+-", ums) mnar0 <- unlist(strsplit(mnar0, "+", fixed = TRUE)) if (mnar0[1L] == "") mnar0 <- mnar0[-1L] if (sum(!grepl("*", mnar0, fixed = TRUE)) == 0L) { stop("An intercept (constant) term must be included in the expression") } else if (sum(!grepl("*", mnar0, fixed = TRUE)) == 1L) { mnar0[!grepl("*", mnar0, fixed = TRUE)] <- paste( mnar0[!grepl("*", mnar0, fixed = TRUE)], "*intercept", sep = "" ) } else if (sum(!grepl("*", mnar0, fixed = TRUE)) > 1L) { stop("Only one intercept term allowed") } mnar <- strsplit(mnar0, "*", fixed = TRUE) # e.g. c("-3","2") mnar.parm <- as.numeric(unlist(lapply(mnar, function(x) x[1L]))) # e.g. c("intercept","bmi") mnar.vars <- unlist(lapply(mnar, function(x) x[2L])) mnar.parm <- mnar.parm[c(which(mnar.vars == "intercept"), which(mnar.vars != "intercept"))] mnar.vars <- mnar.vars[c(which(mnar.vars == "intercept"), which(mnar.vars != "intercept"))] xmnar <- as.matrix(cbind(1, as.matrix(x[, mnar.vars[!mnar.vars == "intercept"]]))) list(delta = mnar.parm, x = xmnar) } mice/R/mice.impute.midastouch.R0000644000176200001440000001761214330031647016101 0ustar liggesusers#' Imputation by predictive mean matching with distance aided donor selection #' #' Imputes univariate missing data using predictive mean matching. #' @aliases mice.impute.midastouch #' @inheritParams mice.impute.pmm #' @param midas.kappa Scalar. If \code{NULL} (default) then the #' optimal \code{kappa} gets selected automatically. Alternatively, the user #' may specify a scalar. Siddique and Belin 2008 find \code{midas.kappa = 3} #' to be sensible. #' @param outout Logical. If \code{TRUE} (default) one model is estimated #' for each donor (leave-one-out principle). For speedup choose #' \code{outout = FALSE}, which estimates one model for all observations #' leading to in-sample predictions for the donors and out-of-sample #' predictions for the recipients. Mind the inappropriateness, though. #' @param neff FOR EXPERTS. Null or character string. The name of an existing #' environment in which the effective sample size of the donors for each #' loop (CE iterations times multiple imputations) is supposed to be written. #' The effective sample size is necessary to compute the correction for the #' total variance as originally suggested by Parzen, Lipsitz and #' Fitzmaurice 2005. The objectname is \code{midastouch.neff}. #' @param debug FOR EXPERTS. Null or character string. The name of an existing #' environment in which the input is supposed to be written. The objectname #' is \code{midastouch.inputlist}. #' @return Vector with imputed data, same type as \code{y}, and of #' length \code{sum(wy)} #' @details Imputation of \code{y} by predictive mean matching, based on #' Rubin (1987, p. 168, formulas a and b) and Siddique and Belin 2008. #' The procedure is as follows: #' \enumerate{ #' \item Draw a bootstrap sample from the donor pool. #' \item Estimate a beta matrix on the bootstrap sample by the leave one out principle. #' \item Compute type II predicted values for \code{yobs} (nobs x 1) and \code{ymis} (nmis x nobs). #' \item Calculate the distance between all \code{yobs} and the corresponding \code{ymis}. #' \item Convert the distances in drawing probabilities. #' \item For each recipient draw a donor from the entire pool while considering the probabilities from the model. #' \item Take its observed value in \code{y} as the imputation. #' } #' @examples #' # do default multiple imputation on a numeric matrix #' imp <- mice(nhanes, method = "midastouch") #' imp #' #' # list the actual imputations for BMI #' imp$imp$bmi #' #' # first completed data matrix #' complete(imp) #' #' # imputation on mixed data with a different method per column #' mice(nhanes2, method = c("sample", "midastouch", "logreg", "norm")) #' @author Philipp Gaffert, Florian Meinfelder, Volker Bosch 2015 #' @references #' Gaffert, P., Meinfelder, F., Bosch V. (2015) Towards an MI-proper #' Predictive Mean Matching, Discussion Paper. #' \url{https://www.uni-bamberg.de/fileadmin/uni/fakultaeten/sowi_lehrstuehle/statistik/Personen/Dateien_Florian/properPMM.pdf} #' #' Little, R.J.A. (1988), Missing data adjustments in large #' surveys (with discussion), Journal of Business Economics and #' Statistics, 6, 287--301. #' #' Parzen, M., Lipsitz, S. R., Fitzmaurice, G. M. (2005), A note on reducing #' the bias of the approximate Bayesian bootstrap imputation variance estimator. #' Biometrika \bold{92}, 4, 971--974. #' #' Rubin, D.B. (1987), Multiple imputation for nonresponse in surveys. New York: Wiley. #' #' Siddique, J., Belin, T.R. (2008), Multiple imputation using an iterative #' hot-deck with distance-based donor selection. Statistics in medicine, #' \bold{27}, 1, 83--102 #' #' Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006), #' Fully conditional specification in multivariate imputation. #' \emph{Journal of Statistical Computation and Simulation}, \bold{76}, 12, #' 1049--1064. #' #' Van Buuren, S., Groothuis-Oudshoorn, K. (2011), \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}, 3, 1--67. \doi{10.18637/jss.v045.i03} #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.midastouch <- function(y, ry, x, wy = NULL, ridge = 1e-05, midas.kappa = NULL, outout = TRUE, neff = NULL, debug = NULL, ...) { if (is.null(wy)) { wy <- !ry } # auxiliaries if (!is.null(debug)) { midastouch.inputlist <- list(y = y, ry = ry, x = x, omega = NULL) } sminx <- .Machine$double.eps^(1 / 4) # ensure data format x <- data.matrix(x) storage.mode(x) <- "numeric" X <- cbind(1, x) y <- as.numeric(y) # get data dimensions nobs <- sum(ry) nmis <- sum(wy) n <- length(ry) obsind <- ry misind <- wy m <- ncol(X) yobs <- y[obsind] Xobs <- X[obsind, , drop = FALSE] Xmis <- X[misind, , drop = FALSE] # P-Step # bootstrap omega <- bootfunc.plain(nobs) if (!is.null(debug)) { midastouch.inputlist$omega <- omega assign( x = "midastouch.inputlist", value = midastouch.inputlist, envir = get(debug) ) } # beta estimation CX <- omega * Xobs XCX <- crossprod(Xobs, CX) if (ridge > 0) { diag(XCX) <- diag(XCX) * (1 + c(0, rep(ridge, m - 1))) } # check if any diagonal element is exactly zero diag0 <- diag(XCX) == 0 if (any(diag0)) { diag(XCX)[diag0] <- max(sminx, ridge) } Xy <- crossprod(CX, yobs) beta <- solve(XCX, Xy) yhat.obs <- c(Xobs %*% beta) # kappa estimation if (is.null(midas.kappa)) { mean.y <- as.vector(crossprod(yobs, omega) / nobs) eps <- yobs - yhat.obs r2 <- 1 - c(crossprod(omega, eps^2) / crossprod(omega, (yobs - mean.y)^2)) # slight deviation from the paper to ensure real results paper: a tiny # delta is added to the denominator R Code: min function is used, note # that this correction gets active for r2>.999 only midas.kappa <- min((50 * r2 / (1 - r2))^(3 / 8), 100) # if r2 cannot be determined (eg zero variance in yhat), use 3 as # suggested by Siddique / Belin if (is.na(midas.kappa)) { midas.kappa <- 3 } } # I-Step if (outout) { # P-step if out of sample predictions for donors # estimate one model per donor by leave-one-out XXarray_pre <- t(t(apply(X = Xobs, MARGIN = 1, FUN = tcrossprod)) * omega) ridgeind <- c(1:(m - 1)) * (m + 1) + 1 if (ridge > 0) { XXarray_pre[ridgeind, ] <- XXarray_pre[ridgeind, ] * (1 + ridge) } XXarray <- c(XCX) - XXarray_pre # check if any diagonal element is exactly zero diag0 <- XXarray[ridgeind, ] == 0 if (any(diag0)) { XXarray[ridgeind, ][diag0] <- max(sminx, ridge) } Xyarray <- c(Xy) - t(Xobs * yobs * omega) BETAarray <- apply(rbind(XXarray, Xyarray), 2, function(x, m) { solve(a = matrix(head(x, m^2), m), b = tail(x, m)) }, m = m) YHATdon <- rowSums(Xobs * t(BETAarray)) # each recipient has nobs different yhats YHATrec <- Xmis %*% BETAarray # distance calculations dist.mat <- YHATdon - t(YHATrec) } else { yhat.mis <- c(Xmis %*% beta) dist.mat <- yhat.obs - matrix( data = yhat.mis, nrow = nobs, ncol = nmis, byrow = TRUE ) } # convert distances to drawing probs // ensure real results delta.mat <- 1 / ((abs(dist.mat))^midas.kappa) delta.mat <- minmax(delta.mat) probs <- delta.mat * omega csums <- minmax(colSums(probs, na.rm = TRUE)) probs <- t(t(probs) / csums) # calculate neff if (!is.null(neff)) { if (!exists("midastouch.neff", envir = get(neff))) { assign(x = "midastouch.neff", value = list(), envir = get(neff)) } midastouch.neff <- get("midastouch.neff", envir = get(neff)) midastouch.neff[[length(midastouch.neff) + 1]] <- mean(1 / rowSums((t(delta.mat) / csums)^2)) assign(x = "midastouch.neff", value = midastouch.neff, envir = get(neff)) } # return result index <- apply(probs, 2, sample, x = nobs, size = 1, replace = FALSE) yimp <- y[obsind][index] yimp } mice/R/xyplot.mads.R0000644000176200001440000001072614330031606013777 0ustar liggesusers#' Scatterplot of amputed and non-amputed data against weighted sum scores #' #' Plotting method to investigate relation between amputed data and the weighted sum #' scores. Based on \code{\link{lattice}}. \code{xyplot} produces scatterplots. #' The function plots the variables against the weighted sum scores. The function #' automatically separates the amputed and non-amputed data to see the relation between #' the amputation and the weighted sum scores. #' #' @param x A \code{mads} object, typically created by \code{\link{ampute}}. #' @param data A string or vector of variable names that needs to be plotted. As #' a default, all variables will be plotted. #' @param which.pat A scalar or vector indicating which patterns need to be plotted. #' As a default, all patterns are plotted. #' @param standardized Logical. Whether the scatterplots need to be created #' from standardized data or not. Default is TRUE. #' @param layout A vector of two values indicating how the scatterplots of one #' pattern should be divided over the plot. For example, \code{c(2, 3)} indicates #' that the scatterplots of six variables need to be placed on 3 rows and 2 columns. #' There are several defaults for different #variables. Note that for more than #' 9 variables, multiple plots will be created automatically. #' @param colors A vector of two RGB values defining the colors of the non-amputed and #' amputed data respectively. RGB values can be obtained with \code{\link{hcl}}. #' @param \dots Not used, but for consistency with generic #' @return A list containing the scatterplots. Note that a new pattern #' will always be shown in a new plot. #' @note The \code{mads} object contains all the information you need to #' make any desired plots. Check \code{\link{mads-class}} or the vignette \emph{Multivariate #' Amputation using Ampute} to understand the contents of class object \code{mads}. #' @author Rianne Schouten, 2016 #' @seealso \code{\link{ampute}}, \code{\link{bwplot}}, \code{\link{Lattice}} for #' an overview of the package, \code{\link{mads-class}} #' @export xyplot.mads <- function(x, data, which.pat = NULL, standardized = TRUE, layout = NULL, colors = mdc(1:2), ...) { if (!is.mads(x)) { stop("Object is not of class mads") } if (missing(data)) data <- NULL yvar <- data if (is.null(yvar)) { varlist <- colnames(x$amp) } else { varlist <- yvar } if (is.null(which.pat)) { pat <- nrow(x$patterns) which.pat <- seq_len(pat) } else { pat <- length(which.pat) } if (standardized) { dat <- data.frame(scale(x$data)) xlab <- "Standardized values in pattern" } else { dat <- x$data xlab <- "Data values in pattern" } data <- NULL for (i in seq_len(pat)) { can <- which(x$cand == which.pat[i]) mis <- matrix(NA, nrow = length(can), ncol = 3) nc <- which(x$patterns[which.pat[i], ] == 0) if (length(nc) > 1) { mis[apply(is.na(x$amp[can, nc]), 1, all), 1] <- 1 mis[is.na(mis[, 1]), 1] <- 0 } else if (length(nc) == 1) { mis[is.na(x$amp[can, nc]), 1] <- 1 mis[is.na(mis[, 1]), 1] <- 0 } mis[, 2] <- rep.int(which.pat[i], length(can)) mis[, 3] <- unname(x$scores[[which.pat[i]]]) data <- rbind(data, cbind(mis, dat[can, ])) } colnames(data) <- c(".amp", ".pat", "scores", names(x$data)) data$.amp <- factor(data$.amp, levels = c(0, 1)) formula <- as.formula(paste0("scores ~ ", paste0(varlist, collapse = "+"))) if (is.null(layout)) { if (length(varlist) > 6) { layout <- c(3, 3) } else if (length(varlist) > 4) { layout <- c(3, 2) } else if (length(varlist) > 2) { layout <- c(2, 2) } else if (length(varlist) > 1) { layout <- c(2, 1) } } theme <- list( superpose.symbol = list(col = colors, pch = 1), plot.symbol = list(col = colors, pch = 1), strip.background = list(col = "grey95") ) key <- list( columns = 2, points = list(col = colors, pch = 1), text = list(c("Non-Amputed Data", "Amputed Data")) ) p <- stats::setNames( vector(mode = "list", length = pat), paste("Scatterplot Pattern", which.pat) ) for (i in seq_len(pat)) { p[[paste("Scatterplot Pattern", which.pat[i])]] <- xyplot( x = formula, data = data[data$.pat == which.pat[i], ], groups = data$.amp, par.settings = theme, multiple = TRUE, outer = TRUE, layout = layout, key = key, ylab = "Weighted sum scores", xlab = paste(xlab, which.pat[i]) ) } p } mice/R/barnard.rubin.R0000644000176200001440000000042514330031647014244 0ustar liggesusersbarnard.rubin <- function(m, b, t, dfcom = Inf) { lambda <- (1 + 1 / m) * b / t lambda[lambda < 1e-04] <- 1e-04 dfold <- (m - 1) / lambda^2 dfobs <- (dfcom + 1) / (dfcom + 3) * dfcom * (1 - lambda) ifelse(is.infinite(dfcom), dfold, dfold * dfobs / (dfold + dfobs)) } mice/R/zzz.R0000644000176200001440000000073214330031606012346 0ustar liggesusers#' Echoes the package version number #' #' @param pkg A character vector with the package name. #' @return A character vector containing the package name, version number and #' installed directory. #' @author Stef van Buuren, Oct 2010 #' @keywords misc #' @examples #' version() #' version("base") #' @export version <- function(pkg = "mice") { lib <- dirname(system.file(package = pkg)) d <- packageDescription(pkg) return(paste(d$Package, d$Version, d$Date, lib)) } mice/R/pool.compare.R0000644000176200001440000001576714330034531014125 0ustar liggesusers#' Compare two nested models fitted to imputed data #' #' This function is deprecated in V3. Use \code{\link{D1}} or #' \code{\link{D3}} instead. #' #' Compares two nested models after m repeated complete data analysis #' #' The function is based on the article of Meng and Rubin (1992). The #' Wald-method can be found in paragraph 2.2 and the likelihood method can be #' found in paragraph 3. One could use the Wald method for comparison of linear #' models obtained with e.g. \code{lm} (in \code{with.mids()}). The likelihood #' method should be used in case of logistic regression models obtained with #' \code{glm()} in \code{with.mids()}. #' #' The function assumes that \code{fit1} is the #' larger model, and that model \code{fit0} is fully contained in \code{fit1}. #' In case of \code{method='wald'}, the null hypothesis is tested that the extra #' parameters are all zero. #' #' @param fit1 An object of class 'mira', produced by \code{with.mids()}. #' @param fit0 An object of class 'mira', produced by \code{with.mids()}. The #' model in \code{fit0} is a nested fit0 of \code{fit1}. #' @param method Either \code{"wald"} or \code{"likelihood"} specifying #' the type of comparison. The default is \code{"wald"}. #' @param data No longer used. #' @return A list containing several components. Component \code{call} is #' the call to the \code{pool.compare} function. Component \code{call11} is #' the call that created \code{fit1}. Component \code{call12} is the #' call that created the imputations. Component \code{call01} is the #' call that created \code{fit0}. Component \code{call02} is the #' call that created the imputations. Components \code{method} is the #' method used to compare two models: 'Wald' or 'likelihood'. Component #' \code{nmis} is the number of missing entries for each variable. #' Component \code{m} is the number of imputations. #' Component \code{qhat1} is a matrix, containing the estimated coefficients of the #' \emph{m} repeated complete data analyses from \code{fit1}. #' Component \code{qhat0} is a matrix, containing the estimated coefficients of the #' \emph{m} repeated complete data analyses from \code{fit0}. #' Component \code{ubar1} is the mean of the variances of \code{fit1}, #' formula (3.1.3), Rubin (1987). #' Component \code{ubar0} is the mean of the variances of \code{fit0}, #' formula (3.1.3), Rubin (1987). #' Component \code{qbar1} is the pooled estimate of \code{fit1}, formula (3.1.2) Rubin #' (1987). #' Component \code{qbar0} is the pooled estimate of \code{fit0}, formula (3.1.2) Rubin #' (1987). #' Component \code{Dm} is the test statistic. #' Component \code{rm} is the relative increase in variance due to nonresponse, formula #' (3.1.7), Rubin (1987). #' Component \code{df1}: df1 = under the null hypothesis it is assumed that \code{Dm} has an F #' distribution with (df1,df2) degrees of freedom. #' Component \code{df2}: df2. #' Component \code{pvalue} is the P-value of testing whether the model \code{fit1} is #' statistically different from the smaller \code{fit0}. #' @author Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 #' @seealso \code{\link{lm.mids}}, \code{\link{glm.mids}} #' @references Li, K.H., Meng, X.L., Raghunathan, T.E. and Rubin, D. B. (1991). #' Significance levels from repeated p-values with multiply-imputed data. #' Statistica Sinica, 1, 65-92. #' #' Meng, X.L. and Rubin, D.B. (1992). Performing likelihood ratio tests with #' multiple-imputed data sets. Biometrika, 79, 103-111. #' #' van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @keywords htest #' @export pool.compare <- function(fit1, fit0, method = c("wald", "likelihood"), data = NULL) { .Deprecated("D1") # Check the arguments call <- match.call() method <- match.arg(method) fits1 <- getfit(fit1) fits0 <- getfit(fit0) if (length(fits1) != length(fits0)) { stop("unequal number of imputations for 'fit1' and 'fit0'", call. = FALSE) } if (length(fits1) < 2L) { stop("at least two imputations are needed", call. = FALSE) } m <- length(fits1) est1 <- pool(fit1) est0 <- pool(fit0) dimQ1 <- length(getqbar(est1)) dimQ2 <- dimQ1 - length(getqbar(est0)) # Check: Only need the lm or lmer object formula1 <- formula(getfit(fit1, 1L)) formula0 <- formula(getfit(fit0, 1L)) vars1 <- est1$pooled$term vars0 <- est0$pooled$term if (is.null(vars1) || is.null(vars0)) { stop("coefficients do not have names", call. = FALSE) } if (dimQ2 < 1L) { stop("Model 'fit1' not larger than 'fit0'", call. = FALSE) } if (!setequal(vars0, intersect(vars0, vars1))) { stop("Model 'fit0' not contained in 'fit1'", call. = FALSE) } if (method == "wald") { # Reference: paragraph 2.2, Article Meng & Rubin, # Biometrika, 1992. When two objects are to be compared # we need to calculate matrix Q Q <- diag(dimQ1) where_new_vars <- which(!(vars1 %in% vars0)) Q <- Q[where_new_vars, , drop = FALSE] qbar <- Q %*% getqbar(est1) Ubar <- Q %*% diag(est1$pooled$ubar) %*% (t(Q)) Bm <- Q %*% diag(est1$pooled$b) %*% (t(Q)) rm <- (1 + 1 / m) * sum(diag(Bm %*% (solve(Ubar)))) / dimQ2 Dm <- (t(qbar)) %*% (solve(Ubar)) %*% qbar / (dimQ2 * (1 + rm)) deviances <- NULL } if (method == "likelihood") { # Calculate for each imputed dataset the deviance between the two # models with its estimated coefficients dev1.M <- lapply(fits1, glance) %>% bind_rows() %>% pull(.data$deviance) dev0.M <- lapply(fits0, glance) %>% bind_rows() %>% pull(.data$deviance) # Calculate for each imputed dataset the deviance between the two # models with the pooled coefficients qbar1 <- getqbar(pool(fits1)) mds1 <- lapply(fits1, fix.coef, beta = qbar1) dev1.L <- lapply(mds1, glance) %>% bind_rows() %>% pull(.data$deviance) qbar0 <- getqbar(pool(fits0)) mds0 <- lapply(fits0, fix.coef, beta = qbar0) dev0.L <- lapply(mds0, glance) %>% bind_rows() %>% pull(.data$deviance) deviances <- list( dev1.M = dev1.M, dev0.M = dev0.M, dev1.L = dev1.L, dev0.L = dev0.L ) dev.M <- mean(dev0.M - dev1.M) dev.L <- mean(dev0.L - dev1.L) rm <- ((m + 1) / (dimQ2 * (m - 1))) * (dev.M - dev.L) Dm <- dev.L / (dimQ2 * (1 + rm)) } # Degrees of freedom for F distribution, same for both methods v <- dimQ2 * (m - 1) if (v > 4) { # according to Li 1991 w <- 4 + (v - 4) * ((1 + (1 - 2 / v) * (1 / rm))^2) } else { w <- v * (1 + 1 / dimQ2) * ((1 + 1 / rm)^2) / 2 } statistic <- list( call = call, call11 = fit1$call, call12 = fit1$call1, call01 = fit0$call, call02 = fit0$call1, method = method, nmis = fit1$nmis, m = m, qbar1 = getqbar(est1), qbar0 = getqbar(est0), ubar1 = est1$pooled$ubar, ubar0 = est0$pooled$ubar, deviances = deviances, Dm = Dm, rm = rm, df1 = dimQ2, df2 = w, pvalue = pf(Dm, dimQ2, w, lower.tail = FALSE) ) statistic } mice/R/mice.impute.2l.bin.R0000644000176200001440000001110314334522175015017 0ustar liggesusers#' Imputation by a two-level logistic model using \code{glmer} #' #' Imputes univariate systematically and sporadically missing data #' using a two-level logistic model using \code{lme4::glmer()} #' #' Data are missing systematically if they have not been measured, e.g., in the #' case where we combine data from different sources. Data are missing sporadically #' if they have been partially observed. #' #' @inheritParams mice.impute.2l.lmer #' @param intercept Logical determining whether the intercept is automatically #' added. #' @param \dots Arguments passed down to \code{glmer} #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Shahab Jolani, 2015; adapted to mice, SvB, 2018 #' @references #' Jolani S., Debray T.P.A., Koffijberg H., van Buuren S., Moons K.G.M. (2015). #' Imputation of systematically missing predictors in an individual #' participant data meta-analysis: a generalized approach using MICE. #' \emph{Statistics in Medicine}, 34:1841-1863. #' @family univariate-2l #' @keywords datagen #' @examples #' library(tidyr) #' library(dplyr) #' data("toenail2") #' data <- tidyr::complete(toenail2, patientID, visit) %>% #' tidyr::fill(treatment) %>% #' dplyr::select(-time) %>% #' dplyr::mutate(patientID = as.integer(patientID)) #' \dontrun{ #' pred <- mice(data, print = FALSE, maxit = 0, seed = 1)$pred #' pred["outcome", "patientID"] <- -2 #' imp <- mice(data, method = "2l.bin", pred = pred, maxit = 1, m = 1, seed = 1) #' } #' @export mice.impute.2l.bin <- function(y, ry, x, type, wy = NULL, intercept = TRUE, ...) { install.on.demand("lme4", ...) if (is.null(wy)) wy <- !ry if (intercept) { x <- cbind(1, as.matrix(x)) type <- c(2, type) names(type)[1] <- colnames(x)[1] <- "(Intercept)" } clust <- names(type[type == -2]) rande <- names(type[type == 2]) fixe <- names(type[type > 0]) X <- x[, fixe, drop = FALSE] Z <- x[, rande, drop = FALSE] xobs <- x[ry, , drop = FALSE] yobs <- y[ry] # create formula, use [-1] to remove intercept fr <- ifelse(length(rande) > 1, paste("+ ( 1 +", paste(rande[-1L], collapse = "+")), "+ ( 1 " ) randmodel <- paste( "yobs ~ ", paste(fixe[-1L], collapse = "+"), fr, "|", clust, ")" ) suppressWarnings(fit <- try( lme4::glmer(formula(randmodel), data = data.frame(yobs, xobs), family = binomial, ... ), silent = TRUE )) if (!is.null(attr(fit, "class"))) { if (attr(fit, "class") == "try-error") { warning("glmer does not run. Simplify imputation model") return(y[wy]) } } # draw beta* beta <- lme4::fixef(fit) rv <- t(chol(vcov(fit))) beta.star <- beta + rv %*% rnorm(ncol(rv)) # calculate psi* psi.hat <- matrix(lme4::VarCorr(fit)[[1L]], nrow = dim(lme4::VarCorr(fit)[[1L]])[1L] ) s <- nrow(psi.hat) * psi.hat rancoef <- as.matrix(lme4::ranef(fit)[[1L]]) lambda <- t(rancoef) %*% rancoef temp <- lambda + s if (attr(suppressWarnings(chol(temp, pivot = TRUE)), "rank") != nrow(temp)) { warning("The cov matrix is not full rank") } temp <- MASS::ginv(temp) ev <- eigen(temp) if (mode(ev$values) == "complex") { ev$values <- suppressWarnings(as.numeric(ev$values)) ev$vectors <- suppressWarnings(matrix(as.numeric(ev$vectors), nrow = length(ev$values) )) warning("The cov matrix is complex") } if (sum(ev$values < 0) > 0) { ev$values[ev$values < 0] <- 0 temp <- ev$vectors %*% diag(ev$values, nrow = length(ev$values)) %*% t(ev$vectors) } deco <- ev$vectors %*% diag(sqrt(ev$values), nrow = length(ev$values)) temp.psi.star <- stats::rWishart( 1, nrow(rancoef) + nrow(psi.hat), diag(nrow(psi.hat)) )[, , 1L] psi.star <- MASS::ginv(deco %*% temp.psi.star %*% t(deco)) # psi.star positive definite? if (!isSymmetric(psi.star)) psi.star <- (psi.star + t(psi.star)) / 2 valprop <- eigen(psi.star) if (sum(valprop$values < 0) > 0) { valprop$values[valprop$values < 0] <- 0 psi.star <- valprop$vectors %*% diag(valprop$values) %*% t(valprop$vectors) } # find clusters for which we need imputes clmis <- x[wy, clust] # the main imputation task for (i in clmis) { bi.star <- t(MASS::mvrnorm( n = 1L, mu = rep(0, nrow(psi.star)), Sigma = psi.star )) idx <- wy & (x[, clust] == i) logit <- X[idx, , drop = FALSE] %*% beta.star + Z[idx, , drop = FALSE] %*% matrix(bi.star, ncol = 1) vec <- rbinom(nrow(logit), 1, as.vector(1 / (1 + exp(-logit)))) if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } y[idx] <- vec } y[wy] } mice/R/brandsma.R0000644000176200001440000000432014330031606013275 0ustar liggesusers#' Brandsma school data used Snijders and Bosker (2012) #' #' Dataset with raw data from Snijders and Bosker (2012) containing #' data from 4106 pupils attending 216 schools. This dataset #' includes all pupils and schools with missing data. #' #' @name brandsma #' @docType data #' @format \code{brandsma} is a data frame with 4106 rows and 14 columns: #' \describe{ #' \item{\code{sch}}{School number} #' \item{\code{pup}}{Pupil ID} #' \item{\code{iqv}}{IQ verbal} #' \item{\code{iqp}}{IQ performal} #' \item{\code{sex}}{Sex of pupil} #' \item{\code{ses}}{SES score of pupil} #' \item{\code{min}}{Minority member 0/1} #' \item{\code{rpg}}{Number of repeated groups, 0, 1, 2} #' \item{\code{lpr}}{language score PRE} #' \item{\code{lpo}}{language score POST} #' \item{\code{apr}}{Arithmetic score PRE} #' \item{\code{apo}}{Arithmetic score POST} #' \item{\code{den}}{Denomination classification 1-4 - at school level} #' \item{\code{ssi}}{School SES indicator - at school level} #' } #' #' @note This dataset is constructed from the raw data. There are #' a few differences with the data set used in Chapter 4 and 5 #' of Snijders and Bosker: #' \enumerate{ #' \item All schools are included, including the five school with #' missing values on \code{langpost}. #' \item Missing \code{denomina} codes are left as missing. #' \item Aggregates are undefined in the presence of missing data #' in the underlying values. #' Variables \code{ses}, \code{iqv} and \code{iqp} are in their #' original scale, and not globally centered. #' No aggregate variables at the school level are included. #' \item There is a wider selection of original variables. Note #' however that the source data contain an even wider set of #' variables. #' } #' #' @source Constructed from \code{MLbook_2nded_total_4106-99.sav} from #' \url{https://www.stats.ox.ac.uk/~snijders/mlbook.htm} by function #' \code{data-raw/R/brandsma.R} #' #' @references #' Brandsma, HP and Knuver, JWM (1989), Effects of school and #' classroom characteristics on pupil progress in language and arithmetic. #' International Journal of Educational Research, 13(7), 777 - 788. #' #' Snijders, TAB and Bosker RJ (2012). Multilevel Analysis, 2nd Ed. Sage, #' Los Angeles, 2012. #' @keywords datasets NULL mice/R/check.deprecated.R0000644000176200001440000000110214330031606014655 0ustar liggesusers# contributed by Simon Grund, #137 check.deprecated <- function(...) { # print warnings for deprecated argument names nms <- names(list(...)) replace.args <- list( imputationMethod = "method", defaultImputationMethod = "defaultMethod", form = "formulas" ) wrn <- names(replace.args) %in% nms if (any(wrn)) { for (i in which(wrn)) { msg <- paste0( "The '", names(replace.args)[i], "' argument is no longer supported. Please use '", replace.args[i], "' instead." ) warning(msg) } } invisible(NULL) } mice/R/mice.impute.mnar.norm.R0000644000176200001440000001647514334522175015663 0ustar liggesusers#' Imputation under MNAR mechanism by NARFCS #' #' Imputes univariate data under a user-specified MNAR mechanism by #' linear or logistic regression and NARFCS. Sensitivity analysis under #' different model specifications may shed light on the impact of #' different MNAR assumptions on the conclusions. #' #' @rdname mice.impute.mnar #' @aliases mice.impute.mnar.norm mnar.norm #' mice.impute.mnar.logreg mnar.logreg #' @inheritParams mice.impute.pmm #' @param ums A string containing the specification of the #' unidentifiable part of the imputation model (the *unidentifiable #' model specification"), that is, the desired \eqn{\delta}-adjustment #' (offset) as a function of other variables and values for the #' corresponding deltas (sensitivity parameters). See details. #' @param umx An auxiliary data matrix containing variables that do #' not appear in the identifiable part of the imputation procedure #' but that have been specified via \code{ums} as being predictors #' in the unidentifiable part of the imputation model. See details. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' This function imputes data that are thought to be Missing Not at #' Random (MNAR) by the NARFCS method. The NARFCS procedure #' (Tompsett et al, 2018) generalises the so-called #' \eqn{\delta}-adjustment sensitivity analysis method of Van Buuren, #' Boshuizen & Knook (1999) to the case with multiple incomplete #' variables within the FCS framework. In practical terms, the #' NARFCS procedure shifts the imputations drawn at each #' iteration of \code{mice} by a user-specified quantity that can #' vary across subjects, to reflect systematic departures of the #' missing data from the data distribution imputed under MAR. #' #' Specification of the NARFCS model is done by the \code{blots} #' argument of \code{mice()}. The \code{blots} parameter is a named #' list. For each variable to be imputed by #' \code{mice.impute.mnar.norm()} or \code{mice.impute.mnar.logreg()} #' the corresponding element in \code{blots} is a list with #' at least one argument \code{ums} and, optionally, a second #' argument \code{umx}. #' For example, the high-level call might like something like #' \code{mice(nhanes[, c(2, 4)], method = c("pmm", "mnar.norm"), #' blots = list(chl = list(ums = "-3+2*bmi")))}. #' #' The \code{ums} parameter is required, and might look like this: #' \code{"-4+1*Y"}. The \code{ums} specifcation must have the #' following characteristics: #' \enumerate{ #' \item{A single term corresponding to the intercept (constant) term, #' not multiplied by any variable name, must be included in the #' expression;} #' \item{Each term in the expression (corresponding to the intercept #' or a predictor variable) must be separated by either a \code{"+"} #' or \code{"-"} sign, depending on the sign of the sensitivity #' parameter;} #' \item{Within each non-intercept term, the sensitivity parameter #' value comes first and the predictor variable comes second, and these #' must be separated by a \code{"*"} sign;} #' \item{For categorical predictors, for example a variable \code{Z} #' with K + 1 categories \code{("Cat0","Cat1", ...,"CatK")}, K #' category-specific terms are needed, and those not in \code{umx} #' (see below) must be specified by concatenating the variable name #' with the name of the category (e.g. \code{ZCat1}) as this is how #' they are named in the design matrix (argument \code{x}) passed #' to the univariate imputation function. An example is #' \code{"2+1*ZCat1-3*ZCat2"}.} #' } #' #' If given, the \code{umx} specification must have the following #' characteristics: #' \enumerate{ #' \item{It contains only complete variables, with no missing values;} #' \item{It is a numeric matrix. In particular, categorical variables #' must be represented as dummy indicators with names corresponding #' to what is used in \code{ums} to refer to the category-specific terms #' (see above);} #' \item{It has the same number of rows as the \code{data} argument #' passed on to the main \code{mice} function;} #' \item{It does not contain variables that were already predictors #' in the identifiable part of the model for the variable under #' imputation.} #' } #' #' Limitation: The present implementation can only condition on variables #' that appear in the identifiable part of the imputation model (\code{x}) or #' in complete auxiliary variables passed on via the \code{umx} argument. #' It is not possible to specify models where the offset depends on #' incomplete auxiliary variables. #' #' For an MNAR alternative see also \code{\link{mice.impute.ri}}. #' #' @author Margarita Moreno-Betancur, Stef van Buuren, Ian R. White, 2020. #' @references #' Tompsett, D. M., Leacy, F., Moreno-Betancur, M., Heron, J., & #' White, I. R. (2018). On the use of the not-at-random fully #' conditional specification (NARFCS) procedure in practice. #' \emph{Statistics in Medicine}, \bold{37}(15), 2338-2353. #' \doi{10.1002/sim.7643}. #' #' Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple #' imputation of missing blood pressure covariates in survival analysis. #' \emph{Statistics in Medicine}, \bold{18}, 681--694. #' #' @family univariate imputation functions #' @keywords datagen #' @examples #' # 1: Example with no auxiliary data: only pass unidentifiable model specification (ums) #' #' # Specify argument to pass on to mnar imputation functions via "blots" argument #' mnar.blot <- list(X = list(ums = "-4"), Y = list(ums = "2+1*ZCat1-3*ZCat2")) #' #' # Run NARFCS by using mnar imputation methods and passing argument via blots #' impNARFCS <- mice(mnar_demo_data, #' method = c("mnar.logreg", "mnar.norm", ""), #' blots = mnar.blot, seed = 234235, print = FALSE #' ) #' #' # Obtain MI results: Note they coincide with those from old version at #' # https://github.com/moreno-betancur/NARFCS #' pool(with(impNARFCS, lm(Y ~ X + Z)))$pooled$estimate #' #' # 2: Example passing also auxiliary data to MNAR procedure (umx) #' # Assumptions: #' # - Auxiliary data are complete, no missing values #' # - Auxiliary data are a numeric matrix #' # - Auxiliary data have same number of rows as x #' # - Auxiliary data have no overlapping variable names with x #' #' # Specify argument to pass on to mnar imputation functions via "blots" argument #' aux <- matrix(0:1, nrow = nrow(mnar_demo_data)) #' dimnames(aux) <- list(NULL, "even") #' mnar.blot <- list( #' X = list(ums = "-4"), #' Y = list(ums = "2+1*ZCat1-3*ZCat2+0.5*even", umx = aux) #' ) #' #' # Run NARFCS by using mnar imputation methods and passing argument via blots #' impNARFCS <- mice(mnar_demo_data, #' method = c("mnar.logreg", "mnar.norm", ""), #' blots = mnar.blot, seed = 234235, print = FALSE #' ) #' #' # Obtain MI results: As expected they differ (slightly) from those #' # from old version at https://github.com/moreno-betancur/NARFCS #' pool(with(impNARFCS, lm(Y ~ X + Z)))$pooled$estimate #' @export mice.impute.mnar.norm <- function(y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...) { ## Undentifiable part: u <- parse.ums(x, ums = ums, umx = umx, ...) ## Identifiable part: exactly the same as mice.impute.norm if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) parm <- .norm.draw(y, ry, x, ...) ## Draw imputations return(x[wy, , drop = FALSE] %*% parm$beta + u$x[wy, , drop = FALSE] %*% u$delta + rnorm(sum(wy)) * parm$sigma) } mice/R/mads.R0000644000176200001440000000733614330031606012444 0ustar liggesusers#' Multivariate amputed data set (\code{mads}) #' #' The \code{mads} object contains an amputed data set. The \code{mads} object is #' generated by the \code{ampute} function. The \code{mads} class of objects has #' methods for the following generic functions: \code{print}, \code{summary}, #' \code{bwplot} and \code{xyplot}. #' #' @section Contents: #' \describe{ #' \item{\code{call}:}{The function call.} #' \item{\code{prop}:}{Proportion of cases with missing values. Note: even when #' the proportion is entered as the proportion of missing cells (when #' \code{bycases == TRUE}), this object contains the proportion of missing cases.} #' \item{\code{patterns}:}{A data frame of size #patterns by #variables where \code{0} #' indicates a variable has missing values and \code{1} indicates a variable remains #' complete.} #' \item{\code{freq}:}{A vector of length #patterns containing the relative #' frequency with which the patterns occur. For example, if the vector is #' \code{c(0.4, 0.4, 0.2)}, this means that of all cases with missing values, #' 40 percent is candidate for pattern 1, 40 percent for pattern 2 and 20 #' percent for pattern 3. The vector sums to 1.} #' \item{\code{mech}:}{A string specifying the missingness mechanism, either #' \code{"MCAR"}, \code{"MAR"} or \code{"MNAR"}.} #' \item{\code{weights}:}{A data frame of size #patterns by #variables. It contains #' the weights that were used to calculate the weighted sum scores. The weights #' may differ between patterns and between variables.} #' \item{\code{cont}:}{Logical, whether probabilities are based on continuous logit #' functions or on discrete odds distributions.} #' \item{\code{type}:}{A vector of strings containing the type of missingness #' for each pattern. Either \code{"LEFT"}, \code{"MID"}, \code{"TAIL"} or #' \code{"RIGHT"}. The first type refers to the first pattern, the second type #' to the second pattern, etc.} #' \item{\code{odds}:}{A matrix where #patterns defines the #rows. Each row contains #' the odds of being missing for the corresponding pattern. The amount of odds values #' defines in how many quantiles the sum scores were divided. The values are #' relative probabilities: a quantile with odds value 4 will have a probability of #' being missing that is four times higher than a quantile with odds 1. The #' #quantiles may differ between patterns, NA is used for cells remaining empty.} #' \item{\code{amp}:}{A data frame containing the input data with NAs for the #' amputed values.} #' \item{\code{cand}:}{A vector that contains the pattern number for each case. #' A value between 1 and #patterns is given. For example, a case with value 2 is #' candidate for missing data pattern 2.} #' \item{\code{scores}:}{A list containing vectors with weighted sum scores of the #' candidates. The first vector refers to the candidates of the first pattern, the #' second vector refers to the candidates of the second pattern, etc. The length #' of the vectors differ because the number of candidates is different for each #' pattern.} #' \item{\code{data}:}{The complete data set that was entered in \code{ampute}.} #' } #' @note Many of the functions of the \code{mice} package do not use the S4 class #' definitions, and instead rely on the S3 list equivalent #' \code{oldClass(obj) <- "mads"}. #' @author Rianne Schouten, 2016 #' @seealso \code{\link{ampute}}, Vignette titled "Multivariate Amputation using #' Ampute". #' @export setClass("mads", representation( call = "call", prop = "numeric", patterns = "matrix", freq = "numeric", mech = "character", weights = "matrix", cont = "logical", type = "character", odds = "matrix", amp = "data.frame", cand = "integer", scores = "list", data = "data.frame" ), contains = "list" ) mice/R/mammalsleep.R0000644000176200001440000000361214330031606014006 0ustar liggesusers#' Mammal sleep data #' #' Dataset from Allison and Cicchetti (1976) of 62 mammal species on the #' interrelationship between sleep, ecological, and constitutional variables. #' The dataset contains missing values on five variables. #' #' Allison and Cicchetti (1976) investigated the interrelationship between #' sleep, ecological, and constitutional variables. They assessed these #' variables for 39 mammalian species. The authors concluded that slow-wave #' sleep is negatively associated with a factor related to body size. This #' suggests that large amounts of this sleep phase are disadvantageous in large #' species. Also, paradoxical sleep (REM sleep) was associated with a factor #' related to predatory danger, suggesting that large amounts of this sleep #' phase are disadvantageous in prey species. #' #' @name mammalsleep #' @aliases mammalsleep sleep #' @docType data #' @format \code{mammalsleep} is a data frame with 62 rows and 11 columns: #' \describe{ #' \item{species}{Species of animal} #' \item{bw}{Body weight (kg)} #' \item{brw}{Brain weight (g)} #' \item{sws}{Slow wave ("nondreaming") sleep (hrs/day)} #' \item{ps}{Paradoxical ("dreaming") sleep (hrs/day)} #' \item{ts}{Total sleep (hrs/day) (sum of slow wave and paradoxical sleep)} #' \item{mls}{Maximum life span (years)} #' \item{gt}{Gestation time (days)} #' \item{pi}{Predation index (1-5), 1 = least likely to be preyed upon} #' \item{sei}{Sleep exposure index (1-5), 1 = least exposed (e.g. animal sleeps in a #' well-protected den), 5 = most exposed} #' \item{odi}{Overall danger index (1-5) based on the above two indices and other information, 1 = least #' danger (from other animals), 5 = most danger (from other animals)} #' } #' @source Allison, T., Cicchetti, D.V. (1976). Sleep in Mammals: Ecological and #' Constitutional Correlates. Science, 194(4266), 732-734. #' @keywords datasets #' @examples #' sleep <- data(mammalsleep) NULL mice/R/rbind.R0000644000176200001440000001157114433400023012607 0ustar liggesusersrbind.mids <- function(x, y = NULL, ...) { call <- match.call() if (is.mids(y)) { return(rbind.mids.mids(x, y, call = call)) } # Combine y and dots into data.frame if (is.null(y)) { y <- rbind.data.frame(...) } else { y <- rbind.data.frame(y, ...) } if (is.data.frame(y)) { if (ncol(y) != ncol(x$data)) { stop("datasets have different number of columns") } } varnames <- colnames(x$data) # Call is a vector, with first argument the mice statement and second argument the call to cbind.mids. call <- c(x$call, call) # The data in x (x$data) and y are combined together. data <- rbind(x$data, y) blocks <- x$blocks # where argument: code all values as observed, including NA wy <- matrix(FALSE, nrow = nrow(y), ncol = ncol(y)) rownames(wy) <- rownames(y) where <- rbind(x$where, wy) # ignore argument: include all new values ignore <- c(x$ignore, rep(FALSE, nrow(y))) # The number of imputations in the new midsobject is equal to that in x. m <- x$m # count the number of missing data in y and add them to x$nmis. nmis <- x$nmis + colSums(is.na(y)) # The listelements method, post, predictorMatrix, visitSequence will be copied from x. method <- x$method post <- x$post formulas <- x$formulas blots <- x$blots predictorMatrix <- x$predictorMatrix visitSequence <- x$visitSequence # Only x contributes imputations imp <- x$imp # seed, lastSeedValue, number of iterations, chainMean and chainVar is taken as in mids object x. seed <- x$seed lastSeedValue <- x$lastSeedValue iteration <- x$iteration chainMean <- x$chainMean chainVar <- x$chainVar loggedEvents <- x$loggedEvents midsobj <- list( data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, ignore = ignore, seed = seed, iteration = iteration, lastSeedValue = lastSeedValue, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date() ) oldClass(midsobj) <- "mids" midsobj } rbind.mids.mids <- function(x, y, call) { if (!is.mids(y)) stop("argument `y` not a mids object") if (ncol(y$data) != ncol(x$data)) { stop("datasets have different number of columns") } if (!identical(colnames(x$data), colnames(y$data))) { stop("datasets have different variable names") } if (!identical(sapply(x$data, is.factor), sapply(y$data, is.factor))) { stop("datasets have different factor variables") } if (x$m != y$m) { stop("number of imputations differ") } varnames <- colnames(x$data) # Call is a vector, with first argument the mice statement and second argument the call to rbind.mids. call <- match.call() call <- c(x$call, call) # The data in x (x$data) and y are combined together. data <- rbind(x$data, y$data) # Where argument where <- rbind(x$where, y$where) # The number of imputations in the new midsobject is equal to that in x. m <- x$m # count the number of missing data in y and add them to x$nmis. nmis <- x$nmis + y$nmis # The listelements method, post, predictorMatrix, visitSequence will be copied from x. blocks <- x$blocks method <- x$method post <- x$post formulas <- x$formulas blots <- x$blots ignore <- c(x$ignore, y$ignore) predictorMatrix <- x$predictorMatrix visitSequence <- x$visitSequence # The original data of y will be binded into the multiple imputed dataset # including the imputed values of y. imp <- vector("list", ncol(x$data)) for (j in seq_len(ncol(x$data))) { if (!is.null(x$imp[[j]]) || !is.null(y$imp[[j]])) { imp[[j]] <- rbind(x$imp[[j]], y$imp[[j]]) } } names(imp) <- varnames # seed, lastSeedValue, number of iterations seed <- x$seed lastSeedValue <- x$lastSeedValue iteration <- x$iteration if (x$iteration != y$iteration) { warning("iterations differ, so no convergence diagnostics calculated", call. = FALSE ) chainMean <- NULL chainVar <- NULL } else { w <- colSums(x$where) / colSums(where) chainMean <- x$chainMean * w + y$chainMean * (1 - w) chainVar <- x$chainVar * w + y$chainVar * (1 - w) } loggedEvents <- x$loggedEvents midsobj <- list( data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, ignore = ignore, seed = seed, iteration = iteration, lastSeedValue = lastSeedValue, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date() ) oldClass(midsobj) <- "mids" midsobj } mice/R/mice.theme.R0000644000176200001440000000353314330031606013531 0ustar liggesusers#' Set the theme for the plotting Trellis functions #' #' The \code{mice.theme()} function sets default choices for #' Trellis plots that are built into \pkg{mice}. #' #' @aliases mice.theme #' @param transparent A logical indicating whether alpha-transparency is #' allowed. The default is \code{TRUE}. #' @param alpha.fill A numerical values between 0 and 1 that indicates the #' default alpha value for fills. #' @return \code{mice.theme()} returns a named list that can be used as a theme in the functions in #' \pkg{lattice}. By default, the \code{mice.theme()} function sets #' \code{transparent <- TRUE} if the current device \code{.Device} supports #' semi-transparent colors. #' @author Stef van Buuren 2011 #' @export mice.theme <- function(transparent = TRUE, alpha.fill = 0.3) { filler <- function(transparent, alpha) { if (transparent) { return(c( grDevices::hcl(240, 100, 40, alpha), grDevices::hcl(0, 100, 40, alpha) )) } return(c(grDevices::hcl(240, 100, 40), grDevices::hcl(0, 100, 40))) } if (missing(transparent)) transparent <- supports.transparent() if (missing(alpha.fill)) alpha.fill <- ifelse(transparent, 0.3, 0) list( superpose.symbol = list( col = mdc(1:2), fill = filler(transparent, alpha.fill), pch = 1 ), superpose.line = list( col = mdc(4:5), lwd = 1 ), box.dot = list( col = mdc(1:2) ), box.rectangle = list( col = mdc(4:5) ), box.symbol = list( col = mdc(1:2) ), plot.symbol = list( col = mdc(1:2), fill = filler(transparent, alpha.fill), pch = 1 ), plot.line = list( col = mdc(4:5) ), superpose.polygon = list( col = filler(transparent, alpha.fill) ), strip.background = list( col = "grey95" ), mice = list( flag = TRUE ) ) } mice/R/ncc.R0000644000176200001440000000206114330031606012251 0ustar liggesusers#' Number of complete cases #' #' Calculates the number of complete cases. #' #' @param x An \code{R} object. Currently supported are methods for the #' following classes: \code{mids}, \code{data.frame} and \code{matrix}. Also, #' \code{x} can be a vector. #' @return Number of elements in \code{x} with complete data. #' @author Stef van Buuren, 2017 #' @seealso \code{\link{nic}}, \code{\link{cci}} #' @examples #' ncc(nhanes) # 13 complete cases #' @export ncc <- function(x) sum(cci(x)) #' Number of incomplete cases #' #' Calculates the number of incomplete cases. #' #' @param x An \code{R} object. Currently supported are methods for the #' following classes: \code{mids}, \code{data.frame} and \code{matrix}. Also, #' \code{x} can be a vector. #' @return Number of elements in \code{x} with incomplete data. #' @author Stef van Buuren, 2017 #' @seealso \code{\link{ncc}}, \code{\link{cci}} #' @examples #' nic(nhanes) # the remaining 12 rows #' nic(nhanes[, c("bmi", "hyp")]) # number of cases with incomplete bmi and hyp #' @export nic <- function(x) sum(ici(x)) mice/R/is.R0000644000176200001440000000230114330031606012116 0ustar liggesusers#' Check for \code{mids} object #' #' @aliases is.mids #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mids} #' @export is.mids <- function(x) { inherits(x, "mids") } #' Check for \code{mira} object #' #' @aliases is.mira #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mira} #' @export is.mira <- function(x) { inherits(x, "mira") } #' Check for \code{mipo} object #' #' @aliases is.mipo #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mipo} #' @export is.mipo <- function(x) { inherits(x, "mipo") } #' Check for \code{mitml.result} object #' #' @aliases is.mitml.result #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mitml.result} #' @export is.mitml.result <- function(x) { inherits(x, "mitml.result") } is.passive <- function(string) { "~" == substring(string, 1, 1) } #' Check for \code{mads} object #' #' @aliases is.mads #' @param x An object #' @return A logical indicating whether \code{x} is an object of class \code{mads} #' @export is.mads <- function(x) { inherits(x, "mads") } mice/R/generics.R0000644000176200001440000002004114433400023013300 0ustar liggesusers#' Combine R objects by rows and columns #' #' Functions \code{cbind()} and \code{rbind()} are defined in #' the \code{mice} package in order to #' enable dispatch to \code{cbind.mids()} and \code{rbind.mids()} #' when one of the arguments is a \code{data.frame}. #' #' The standard \code{base::cbind()} and \code{base::rbind()} #' always dispatch to #' \code{base::cbind.data.frame()} or \code{base::rbind.data.frame()} #' if one of the arguments is a #' \code{data.frame}. The versions defined in the \code{mice} #' package intercept the user command #' and test whether the first argument has class \code{"mids"}. If so, #' function calls \code{cbind.mids()}, respectively \code{rbind.mids()}. In #' all other cases, the call is forwarded to standard functions in the #' \code{base} package. #' #' @inheritDotParams base::cbind #' @details #' The \code{cbind.mids()} function combines two \code{mids} objects #' columnwise into a single #' object of class \code{mids}, or combines a single \code{mids} object with #' a \code{vector}, \code{matrix}, \code{factor} or \code{data.frame} #' columnwise into a \code{mids} object. #' #' If both arguments of \code{cbind.mids()} are \code{mids}-objects, the #' \code{data} list components should have the same number of rows. Also, the #' number of imputations (\code{m}) should be identical. #' If the second argument is a \code{matrix}, #' \code{factor} or \code{vector}, it is transformed into a #' \code{data.frame}. The number of rows should match with the \code{data} #' component of the first argument. #' #' The \code{cbind.mids()} function renames any duplicated variable or block names by #' appending \code{".1"}, \code{".2"} to duplicated names. #' #' The \code{rbind.mids()} function combines two \code{mids} objects rowwise into a single #' \code{mids} object, or combines a \code{mids} object with a vector, matrix, #' factor or data frame rowwise into a \code{mids} object. #' #' If both arguments of \code{rbind.mids()} are \code{mids} objects, #' then \code{rbind.mids()} requires that both have the same number of multiple #' imputations. In addition, their \code{data} components should match. #' #' If the second argument of \code{rbind.mids()} is not a \code{mids} object, #' the columns of the arguments should match. The \code{where} matrix for the #' second argument is set to \code{FALSE}, signalling that any missing values in #' that argument were not imputed. The \code{ignore} vector for the second argument is #' set to \code{FALSE}. Rows inherited from the second argument will therefore #' influence the parameter estimation of the imputation model in any future #' iterations. # #' @note #' The \code{cbind.mids()} function constructs the elements of the new \code{mids} object as follows: #' \tabular{ll}{ #' \code{data} \tab Columnwise combination of the data in \code{x} and \code{y}\cr #' \code{imp} \tab Combines the imputed values from \code{x} and \code{y}\cr #' \code{m} \tab Taken from \code{x$m}\cr #' \code{where} \tab Columnwise combination of \code{x$where} and \code{y$where}\cr #' \code{blocks} \tab Combines \code{x$blocks} and \code{y$blocks}\cr #' \code{call} \tab Vector, \code{call[1]} creates \code{x}, \code{call[2]} #' is call to \code{cbind.mids()}\cr #' \code{nmis} \tab Equals \code{c(x$nmis, y$nmis)}\cr #' \code{method} \tab Combines \code{x$method} and \code{y$method}\cr #' \code{predictorMatrix} \tab Combination with zeroes on the off-diagonal blocks\cr #' \code{visitSequence} \tab Combined as \code{c(x$visitSequence, y$visitSequence)}\cr #' \code{formulas} \tab Combined as \code{c(x$formulas, y$formulas)}\cr #' \code{post} \tab Combined as \code{c(x$post, y$post)}\cr #' \code{blots} \tab Combined as \code{c(x$blots, y$blots)}\cr #' \code{ignore} \tab Taken from \code{x$ignore}\cr #' \code{seed} \tab Taken from \code{x$seed}\cr #' \code{iteration} \tab Taken from \code{x$iteration}\cr #' \code{lastSeedValue} \tab Taken from \code{x$lastSeedValue}\cr #' \code{chainMean} \tab Combined from \code{x$chainMean} and \code{y$chainMean}\cr #' \code{chainVar} \tab Combined from \code{x$chainVar} and \code{y$chainVar}\cr #' \code{loggedEvents} \tab Taken from \code{x$loggedEvents}\cr #' \code{version} \tab Current package version\cr #' \code{date} \tab Current date\cr #' } #' #' The \code{rbind.mids()} function constructs the elements of the new \code{mids} object as follows: #' \tabular{ll}{ #' \code{data} \tab Rowwise combination of the (incomplete) data in \code{x} and \code{y}\cr #' \code{imp} \tab Equals \code{rbind(x$imp[[j]], y$imp[[j]])} if \code{y} is \code{mids} object; otherwise #' the data of \code{y} will be copied\cr #' \code{m} \tab Equals \code{x$m}\cr #' \code{where} \tab Rowwise combination of \code{where} arguments\cr #' \code{blocks} \tab Equals \code{x$blocks}\cr #' \code{call} \tab Vector, \code{call[1]} creates \code{x}, \code{call[2]} is call to \code{rbind.mids}\cr #' \code{nmis} \tab \code{x$nmis} + \code{y$nmis}\cr #' \code{method} \tab Taken from \code{x$method}\cr #' \code{predictorMatrix} \tab Taken from \code{x$predictorMatrix}\cr #' \code{visitSequence} \tab Taken from \code{x$visitSequence}\cr #' \code{formulas} \tab Taken from \code{x$formulas}\cr #' \code{post} \tab Taken from \code{x$post}\cr #' \code{blots} \tab Taken from \code{x$blots}\cr #' \code{ignore} \tab Concatenate \code{x$ignore} and \code{y$ignore}\cr #' \code{seed} \tab Taken from \code{x$seed}\cr #' \code{iteration} \tab Taken from \code{x$iteration}\cr #' \code{lastSeedValue} \tab Taken from \code{x$lastSeedValue}\cr #' \code{chainMean} \tab Set to \code{NA}\cr #' \code{chainVar} \tab Set to \code{NA}\cr #' \code{loggedEvents} \tab Taken from \code{x$loggedEvents}\cr #' \code{version} \tab Taken from \code{x$version}\cr #' \code{date} \tab Taken from \code{x$date} #' } #' @return An S3 object of class \code{mids} #' @author Karin Groothuis-Oudshoorn, Stef van Buuren #' @seealso \code{\link[base:cbind]{cbind}}, \code{\link{ibind}}, #' \code{\link[=mids-class]{mids}} #' @references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords manip #' @examples #' # --- cbind --- #' # impute four variables at once (default) #' imp <- mice(nhanes, m = 1, maxit = 1, print = FALSE) #' imp$predictorMatrix #' #' # impute two by two #' data1 <- nhanes[, c("age", "bmi")] #' data2 <- nhanes[, c("hyp", "chl")] #' imp1 <- mice(data1, m = 2, maxit = 1, print = FALSE) #' imp2 <- mice(data2, m = 2, maxit = 1, print = FALSE) #' #' # Append two solutions #' imp12 <- cbind(imp1, imp2) #' #' # This is a different imputation model #' imp12$predictorMatrix #' #' # Append the other way around #' imp21 <- cbind(imp2, imp1) #' imp21$predictorMatrix #' #' # Append 'forgotten' variable chl #' data3 <- nhanes[, 1:3] #' imp3 <- mice(data3, maxit = 1, m = 2, print = FALSE) #' imp4 <- cbind(imp3, chl = nhanes$chl) #' #' # Of course, chl was not imputed #' head(complete(imp4)) #' #' # Combine mids object with data frame #' imp5 <- cbind(imp3, nhanes2) #' head(complete(imp5)) #' #' # --- rbind --- #' imp1 <- mice(nhanes[1:13, ], m = 2, maxit = 1, print = FALSE) #' imp5 <- mice(nhanes[1:13, ], m = 2, maxit = 2, print = FALSE) #' mylist <- list(age = NA, bmi = NA, hyp = NA, chl = NA) #' #' nrow(complete(rbind(imp1, imp5))) #' nrow(complete(rbind(imp1, mylist))) #' #' nrow(complete(rbind(imp1, data.frame(mylist)))) #' nrow(complete(rbind(imp1, complete(imp5)))) #' @export cbind <- function(...) { if (is.null(attr(list(...)[[1]], "class"))) { return(base::cbind(...)) } if ("mids" %in% attr(list(...)[[1]], "class")) { cbind.mids(...) } else { base::cbind(...) } } #' @rdname cbind #' @export rbind <- function(...) { if (is.null(attr(list(...)[[1]], "class"))) { return(base::rbind(...)) } if ("mids" %in% attr(list(...)[[1]], "class")) { rbind.mids(...) } else { base::rbind(...) } } mice/R/getfit.R0000644000176200001440000000367714330031606013006 0ustar liggesusers#' Extract list of fitted models #' #' Function \code{getfit()} returns the list of objects containing the repeated analysis #' results, or optionally, one of these fitted objects. The function looks for #' a list element called \code{analyses}, and return this component as a list with #' \code{mira} class. If element \code{analyses} is not found in \code{x}, then #' it returns \code{x} as a \code{mira} object. #' #' No checking is done for validity of objects. The function also processes #' objects of class \code{mitml.result} from the \code{mitml} package. #' #' @param x An object of class \code{mira}, typically produced by a call #' to \code{with()}. #' @param i An integer between 1 and \code{x$m} signalling the index of the #' repeated analysis. The default \code{i= -1} return a list with all analyses. #' @param simplify Should the return value be unlisted? #' @return If \code{i = -1} an object of class \code{mira} containing #' all analyses. If \code{i} selects one of the analyses, then it return #' an object whose with class inherited from that element. #' @author Stef van Buuren, 2012, 2020 #' @seealso \code{\link[=mira-class]{mira}}, \code{\link{with.mids}} #' @keywords manip #' @examples #' imp <- mice(nhanes, print = FALSE, seed = 21443) #' fit <- with(imp, lm(bmi ~ chl + hyp)) #' f1 <- getfit(fit) #' class(f1) #' f2 <- getfit(fit, 2) #' class(f2) #' @export getfit <- function(x, i = -1L, simplify = FALSE) { if (is.null(x$analyses)) { ra <- x } else { ra <- x$analyses } if (i != -1L) { return(ra[[i]]) } if (simplify) ra <- unlist(ra) class(ra) <- c("mira", "list") ra } #' Extract estimate from \code{mipo} object #' #' \code{getqbar} returns a named vector of pooled estimates. #' #' @param x An object of class \code{mipo} #' @export getqbar <- function(x) { if (!is.mipo(x)) stop("Not a mipo object") qbar <- x$pooled$estimate # note: not supported: component/y.values names(qbar) <- x$pooled$term qbar } mice/R/mice.impute.lasso.select.norm.R0000644000176200001440000000623714330031647017313 0ustar liggesusers#' Imputation by indirect use of lasso linear regression #' #' Imputes univariate missing data using Bayesian linear regression following a #' preprocessing lasso variable selection step. #' #' @aliases mice.impute.lasso.select.norm lasso.select.norm #' @inheritParams mice.impute.pmm #' @param nfolds The number of folds for the cross-validation of the lasso penalty. #' The default is 10. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' The method consists of the following steps: #' \enumerate{ #' \item For a given \code{y} variable under imputation, fit a linear regression with lasso #' penalty using \code{y[ry]} as dependent variable and \code{x[ry, ]} as predictors. #' Coefficients that are not shrunk to 0 define an active set of predictors #' that will be used for imputation #' \item Define a Bayesian linear model using \code{y[ry]} as the #' dependent variable, the active set of \code{x[ry, ]} as predictors, and standard #' non-informative priors #' \item Draw parameter values for the intercept, regression weights, and error #' variance from their posterior distribution #' \item Draw imputations from the posterior predictive distribution #' } #' The user can specify a \code{predictorMatrix} in the \code{mice} call #' to define which predictors are provided to this univariate imputation method. #' The lasso regularization will select, among the variables indicated by #' the user, the ones that are important for imputation at any given iteration. #' Therefore, users may force the exclusion of a predictor from a given #' imputation model by specifying a \code{0} entry. #' However, a non-zero entry does not guarantee the variable will be used, #' as this decision is ultimately made by the lasso variable selection #' procedure. #' #' The method is based on the Indirect Use of Regularized Regression (IURR) proposed by #' Zhao & Long (2016) and Deng et al (2016). #' @author Edoardo Costantini, 2021 #' @references #' #' Deng, Y., Chang, C., Ido, M. S., & Long, Q. (2016). Multiple imputation for #' general missing data patterns in the presence of high-dimensional data. #' Scientific reports, 6(1), 1-10. #' #' Zhao, Y., & Long, Q. (2016). Multiple imputation in the presence of #' high-dimensional data. Statistical Methods in Medical Research, 25(5), #' 2021-2035. #' #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.lasso.select.norm <- function(y, ry, x, wy = NULL, nfolds = 10, ...) { install.on.demand("glmnet", ...) # Body if (is.null(wy)) wy <- !ry x_glmnet <- cbind(1, x) xobs <- x_glmnet[ry, , drop = FALSE] xmis <- x[wy, ] yobs <- y[ry] # Train imputation model # used later in the estiamtion require this. cv_lasso <- glmnet::cv.glmnet( x = xobs, y = yobs, family = "gaussian", nfolds = nfolds, alpha = 1 ) # Define Active Set glmnet_coefs <- as.matrix(coef(cv_lasso, s = "lambda.min" ))[, 1] AS <- which((glmnet_coefs != 0)[-1]) # Non-zero reg coefficinets # Perform regular norm draw from Bayesian linear model xas <- x_glmnet[, AS, drop = FALSE] vec <- mice.impute.norm( y = y, ry = ry, x = xas, wy = wy, ... ) vec } mice/R/mice.impute.sample.R0000644000176200001440000000166614330031647015224 0ustar liggesusers#' Imputation by simple random sampling #' #' Imputes a random sample from the observed \code{y} data #' #' This function takes a simple random sample from the observed values in #' \code{y}, and returns these as imputations. #' #' @inheritParams mice.impute.pmm #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000, 2017 #' @references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords datagen #' @export mice.impute.sample <- function(y, ry, x = NULL, wy = NULL, ...) { if (is.null(wy)) { wy <- !ry } yry <- y[ry] if (length(yry) < 1) { return(rnorm(sum(wy))) } if (length(yry) == 1) yry <- rep(yry, 2) sample(yry, size = sum(wy), replace = TRUE) } mice/R/mice.impute.logreg.R0000644000176200001440000001504514334522175015223 0ustar liggesusers#' Imputation by logistic regression #' #' Imputes univariate missing data using logistic regression. #' #' @aliases mice.impute.logreg #' @inheritParams mice.impute.pmm #' @param ... Other named arguments. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Stef van Buuren, Karin Groothuis-Oudshoorn #' @details #' Imputation for binary response variables by the Bayesian logistic regression #' model (Rubin 1987, p. 169-170). The #' Bayesian method consists of the following steps: #' \enumerate{ #' \item Fit a logit, and find (bhat, V(bhat)) #' \item Draw BETA from N(bhat, V(bhat)) #' \item Compute predicted scores for m.d., i.e. logit-1(X BETA) #' \item Compare the score to a random (0,1) deviate, and impute. #' } #' The method relies on the #' standard \code{glm.fit} function. Warnings from \code{glm.fit} are #' suppressed. Perfect prediction is handled by the data augmentation #' method. #' #' @seealso \code{\link{mice}}, \code{\link{glm}}, \code{\link{glm.fit}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' #' Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple #' Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. #' Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. ISBN #' 90-74479-08-1. #' #' Venables, W.N. & Ripley, B.D. (1997). Modern applied statistics with S-Plus #' (2nd ed). Springer, Berlin. #' #' White, I., Daniel, R. and Royston, P (2010). Avoiding bias due to perfect #' prediction in multiple imputation of incomplete categorical variables. #' Computational Statistics and Data Analysis, 54:22672275. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.logreg <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry # augment data in order to evade perfect prediction aug <- augment(y, ry, x, wy) x <- aug$x y <- aug$y ry <- aug$ry wy <- aug$wy w <- aug$w # fit model x <- cbind(1, as.matrix(x)) expr <- expression(glm.fit( x = x[ry, , drop = FALSE], y = y[ry], family = quasibinomial(link = logit), weights = w[ry] )) fit <- eval(expr) fit.sum <- summary.glm(fit) beta <- coef(fit) rv <- t(chol(sym(fit.sum$cov.unscaled))) beta.star <- beta + rv %*% rnorm(ncol(rv)) # draw imputations p <- 1 / (1 + exp(-(x[wy, , drop = FALSE] %*% beta.star))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } vec } #' Imputation by logistic regression using the bootstrap #' #' Imputes univariate missing data using logistic regression #' by a bootstrapped logistic regression model. #' The bootstrap method draws a simple bootstrap sample with replacement #' from the observed data \code{y[ry]} and \code{x[ry, ]}. #' #' @aliases mice.impute.logreg.boot #' @inheritParams mice.impute.pmm #' @param ... Other named arguments. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000, 2011 #' @seealso \code{\link{mice}}, \code{\link{glm}}, \code{\link{glm.fit}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-categorical.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.logreg.boot <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry # draw a bootstrap sample for yobs and xobs xobs <- x[ry, , drop = FALSE] yobs <- y[ry] n1 <- sum(ry) s <- sample(n1, n1, replace = TRUE) doty <- y doty[ry] <- yobs[s] dotx <- x dotx[ry, ] <- xobs[s, , drop = FALSE] x <- dotx y <- doty # fit model x <- cbind(1, as.matrix(x)) expr <- expression(glm.fit( x = x[ry, , drop = FALSE], y = y[ry], family = binomial(link = logit) )) fit <- suppressWarnings(eval(expr)) beta.star <- coef(fit) # draw imputations p <- 1 / (1 + exp(-(x[wy, ] %*% beta.star))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } vec } augment <- function(y, ry, x, wy, maxcat = 50) { # define augmented data for stabilizing logreg and polyreg # by the ad hoc procedure of White, Daniel & Royston, CSDA, 2010 # This function will prevent augmented data beyond the min and # the max of the data # Input: # x: numeric data.frame (n rows) # y: factor or numeric vector (lengt n) # ry: logical vector (length n) # Output: # return a list with elements y, ry, x, and w with length n+2*(ncol(x))*length(levels(y)) # SvB May 2009 icod <- sort(unique(unclass(y))) k <- length(icod) if (k > maxcat) { stop("Maximum number of categories (", maxcat, ") exceeded") } p <- ncol(x) # skip augmentation if there are no predictors if (p == 0) { return(list(y = y, ry = ry, x = x, wy = wy, w = rep(1, length(y)))) } ## skip augmentation if there is only 1 missing value 12jul2012 ## this need to be fixed 12jul2011 if (sum(!ry) == 1) { return(list(y = y, ry = ry, x = x, wy = wy, w = rep(1, length(y)))) } # calculate values to augment mean <- apply(x, 2, mean, na.rm = TRUE) sd <- sqrt(apply(x, 2, var, na.rm = TRUE)) minx <- apply(x, 2, min, na.rm = TRUE) maxx <- apply(x, 2, max, na.rm = TRUE) nr <- 2 * p * k a <- matrix(mean, nrow = nr, ncol = p, byrow = TRUE) b <- matrix(rep(c(rep.int(c(0.5, -0.5), k), rep.int(0, nr)), length = nr * p), nrow = nr, ncol = p, byrow = FALSE) c <- matrix(sd, nrow = nr, ncol = p, byrow = TRUE) d <- a + b * c d <- pmax(matrix(minx, nrow = nr, ncol = p, byrow = TRUE), d, na.rm = TRUE) d <- pmin(matrix(maxx, nrow = nr, ncol = p, byrow = TRUE), d, na.rm = TRUE) e <- rep(rep(icod, each = 2), p) dimnames(d) <- list(paste0("AUG", seq_len(nrow(d))), dimnames(x)[[2]]) xa <- rbind.data.frame(x, d) # beware, concatenation of factors ya <- if (is.factor(y)) as.factor(levels(y)[c(y, e)]) else c(y, e) rya <- c(ry, rep.int(TRUE, nr)) wya <- c(wy, rep.int(FALSE, nr)) wa <- c(rep.int(1, length(y)), rep.int((p + 1) / nr, nr)) list(y = ya, ry = rya, x = xa, w = wa, wy = wya) } mice/R/squeeze.R0000644000176200001440000000172514330031606013175 0ustar liggesusers#' Squeeze the imputed values to be within specified boundaries. #' #' This function replaces any values in \code{x} that are lower than #' \code{bounds[1]} by \code{bounds[1]}, and replaces any values higher #' than \code{bounds[2]} by \code{bounds[2]}. #' #' @aliases squeeze #' @param x A numerical vector with values #' @param bounds A numerical vector of length 2 containing the lower and upper bounds. #' By default, the bounds are to the minimum and maximum values in \code{x}. #' @param r A logical vector of length \code{length(x)} that is used to select a #' subset in \code{x} before calculating automatic bounds. #' @return A vector of length \code{length(x)}. #' @author Stef van Buuren, 2011. #' @export squeeze <- function(x, bounds = c(min(x[r]), max(x[r])), r = rep.int(TRUE, length(x))) { if (length(r) != length(x)) { stop("Different length of vectors x and r") } x[x < bounds[1]] <- bounds[1] x[x > bounds[2]] <- bounds[2] x } mice/R/mice.impute.lda.R0000644000176200001440000000526014330031647014475 0ustar liggesusers#' Imputation by linear discriminant analysis #' #' Imputes univariate missing data using linear discriminant analysis #' #' @inheritParams mice.impute.pmm #' @param ... Other named arguments. Not used. #' @return Vector with imputed data, of type factor, and of length #' \code{sum(wy)} #' @details Imputation of categorical response variables by linear discriminant analysis. #' This function uses the Venables/Ripley functions \code{lda()} and #' \code{predict.lda()} to compute posterior probabilities for each incomplete #' case, and draws the imputations from this posterior. #' #' This function can be called from within the Gibbs sampler by specifying #' \code{"lda"} in the \code{method} argument of \code{mice()}. This method is usually #' faster and uses fewer resources than calling the function, but the statistical #' properties may not be as good (Brand, 1999). #' \code{\link{mice.impute.polyreg}}. #' @section Warning: The function does not incorporate the variability of the #' discriminant weight, so it is not 'proper' in the sense of Rubin. For small #' samples and rare categories in the \code{y}, variability of the imputed data #' could therefore be underestimated. #' #' Added: SvB June 2009 Tried to include bootstrap, but disabled since #' bootstrapping may easily lead to constant variables within groups. #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #' @seealso \code{\link{mice}}, \code{link{mice.impute.polyreg}}, #' \code{\link[MASS]{lda}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' #' Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple #' Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. #' Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. ISBN #' 90-74479-08-1. #' #' Venables, W.N. & Ripley, B.D. (1997). Modern applied statistics with S-PLUS #' (2nd ed). Springer, Berlin. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.lda <- function(y, ry, x, wy = NULL, ...) { install.on.demand("MASS", ...) if (is.null(wy)) wy <- !ry fy <- as.factor(y) nc <- length(levels(fy)) # SvB June 2009 - take bootstrap sample of training data # idx <- sample((1:length(y))[ry], size=sum(ry), replace=TRUE) # x[ry,] <- x[idx,] # y[ry] <- y[idx] # end bootstrap fit <- MASS::lda(x, fy, subset = ry) post <- predict(fit, x[wy, , drop = FALSE])$posterior un <- rep(runif(sum(wy)), each = nc) idx <- 1 + apply(un > apply(post, 1, cumsum), 2, sum) levels(fy)[idx] } mice/R/validate.arguments.R0000644000176200001440000000061714330031606015310 0ustar liggesusersvalidate.arguments <- function(y, ry, x, wy, allow.x.NULL = FALSE, allow.x.NA = FALSE) { # validate standard arguments of mice.impute functions if (!allow.x.NULL && is.null(x)) { stop("Cannot handle NULL value for `x`") } if (!allow.x.NA && anyNA(x)) { stop("Cannot handle NA in `x`") } if (!is.vector(ry)) { stop("`ry` is not a vector") } } mice/R/ibind.R0000644000176200001440000000660714433400023012602 0ustar liggesusers#' Enlarge number of imputations by combining \code{mids} objects #' #' This function combines two \code{mids} objects \code{x} and \code{y} into a #' single \code{mids} object, with the objective of increasing the number of #' imputed data sets. If the number of imputations in \code{x} and \code{y} are #' \code{m(x)} and \code{m(y)}, then the combined object will have #' \code{m(x)+m(y)} imputations. #' #' The two \code{mids} objects are required to #' have the same underlying multiple imputation model and should #' be fitted on the same data. #' #' @param x A \code{mids} object. #' @param y A \code{mids} object. #' @return An S3 object of class \code{mids} #' @author Karin Groothuis-Oudshoorn, Stef van Buuren #' @seealso \code{\link[=mids-class]{mids}} #' @keywords manip #' @examples #' data(nhanes) #' imp1 <- mice(nhanes, m = 1, maxit = 2, print = FALSE) #' imp1$m #' #' imp2 <- mice(nhanes, m = 3, maxit = 3, print = FALSE) #' imp2$m #' #' imp12 <- ibind(imp1, imp2) #' imp12$m #' plot(imp12) #' @export ibind <- function(x, y) { call <- match.call() call <- c(x$call, call) if (!is.mids(y) && !is.mids(x)) { stop("Arguments `x` and `y` not of class `mids`") } if (!identical(is.na(x$data), is.na(y$data))) { stop("Differences detected in the missing data pattern") } if (!identical(x$data[!is.na(x$data)], y$data[!is.na(y$data)])) { stop("Differences detected in the observed data") } if (!identical(x$where, y$where)) { stop("Differences detected between `x$where` and `y$where`") } if (!identical(x$blocks, y$blocks)) { stop("Differences detected between `x$blocks` and `y$blocks`") } if (!identical(x$method, y$method)) { stop("Differences detected between `x$method` and `y$method`") } if (!identical(x$predictorMatrix, y$predictorMatrix)) { stop("Differences detected between `x$predictorMatrix` and `y$predictorMatrix`") } if (!identical(x$visitSequence, y$visitSequence)) { stop("Differences detected between `x$visitSequence` and `y$visitSequence`") } if (!identical(x$post, y$post)) { stop("Differences detected between `x$post` and `y$post`") } if (!identical(x$blots, y$blots)) { stop("Differences detected between `x$blots` and `y$blots`") } visitSequence <- x$visitSequence imp <- vector("list", ncol(x$data)) names(imp) <- names(x$data) for (j in visitSequence) { imp[[j]] <- cbind(x$imp[[j]], y$imp[[j]]) } m <- (x$m + y$m) iteration <- max(x$iteration, y$iteration) chainMean <- chainVar <- initialize.chain(x$blocks, iteration, m) for (j in seq_len(x$m)) { chainMean[, seq_len(x$iteration), j] <- x$chainMean[, , j] chainVar[, seq_len(x$iteration), j] <- x$chainVar[, , j] } for (j in seq_len(y$m)) { chainMean[, seq_len(y$iteration), j + x$m] <- y$chainMean[, , j] chainVar[, seq_len(y$iteration), j + x$m] <- y$chainVar[, , j] } midsobj <- list( data = x$data, imp = imp, m = m, where = x$where, blocks = x$blocks, call = call, nmis = x$nmis, method = x$method, predictorMatrix = x$predictorMatrix, visitSequence = visitSequence, formulas = x$formulas, post = x$post, blots = x$blots, seed = x$seed, iteration = iteration, lastSeedValue = x$lastSeedValue, chainMean = chainMean, chainVar = chainVar, loggedEvents = x$loggedEvents, version = packageVersion("mice"), date = Sys.Date() ) oldClass(midsobj) <- "mids" midsobj } mice/R/edit.setup.R0000644000176200001440000000533014334522175013606 0ustar liggesusersedit.setup <- function(data, setup, allow.na = FALSE, remove.constant = TRUE, remove.collinear = TRUE, remove_collinear = TRUE, ...) { # legacy handling if (!remove_collinear) remove.collinear <- FALSE # edits the imputation model setup # When it detec constant or collinear variables, write in loggedEvents # and continues imputation with reduced model pred <- setup$predictorMatrix meth <- setup$method vis <- setup$visitSequence post <- setup$post # FIXME: this function is not yet adapted to blocks if (ncol(pred) != nrow(pred) || length(meth) != nrow(pred) || ncol(data) != nrow(pred)) { return(setup) } varnames <- colnames(data) # remove constant variables but leave passive variables untouched for (j in seq_len(ncol(data))) { if (!is.passive(meth[j])) { d.j <- data[, j] v <- if (is.character(d.j)) NA else var(as.numeric(d.j), na.rm = TRUE) constant <- if (allow.na) { if (is.na(v)) FALSE else v < 1000 * .Machine$double.eps } else { is.na(v) || v < 1000 * .Machine$double.eps } didlog <- FALSE if (constant && any(pred[, j] != 0) && remove.constant) { out <- varnames[j] pred[, j] <- 0 updateLog(out = out, meth = "constant") didlog <- TRUE } if (constant && meth[j] != "" && remove.constant) { out <- varnames[j] pred[j, ] <- 0 if (!didlog) { updateLog(out = out, meth = "constant") } meth[j] <- "" vis <- vis[vis != j] post[j] <- "" } } } ## remove collinear variables ispredictor <- apply(pred != 0, 2, any) if (any(ispredictor)) { droplist <- find.collinear(data[, ispredictor, drop = FALSE], ...) } else { droplist <- NULL } if (length(droplist) > 0) { for (k in seq_along(droplist)) { j <- which(varnames %in% droplist[k]) didlog <- FALSE if (any(pred[, j] != 0) && remove.collinear) { # remove as predictor out <- varnames[j] pred[, j] <- 0 updateLog(out = out, meth = "collinear") didlog <- TRUE } if (meth[j] != "" && remove.collinear) { out <- varnames[j] pred[j, ] <- 0 if (!didlog) { updateLog(out = out, meth = "collinear") } meth[j] <- "" vis <- vis[vis != j] post[j] <- "" } } } if (all(pred == 0L)) { stop("`mice` detected constant and/or collinear variables. No predictors were left after their removal.") } setup$predictorMatrix <- pred setup$visitSequence <- vis setup$post <- post setup$method <- meth setup } mice/R/mipo.R0000644000176200001440000001344014433402232012456 0ustar liggesusers#' \code{mipo}: Multiple imputation pooled object #' #' The \code{mipo} object contains the results of the pooling step. #' The function \code{\link{pool}} generates an object of class \code{mipo}. #' #' @param x An object of class \code{mipo} #' @param object An object of class \code{mipo} #' @param mira.obj An object of class \code{mira} #' @inheritParams broom::lm_tidiers #' @param z Data frame with a tidied version of a coefficient matrix #' @param conf.int Logical indicating whether to include #' a confidence interval. The default is \code{FALSE}. #' @param conf.level Confidence level of the interval, used only if #' \code{conf.int = TRUE}. Number between 0 and 1. #' @param exponentiate Flag indicating whether to exponentiate the #' coefficient estimates and confidence intervals (typical for #' logistic regression). #' @param \dots Arguments passed down #' @details An object class \code{mipo} is a \code{list} with #' elements: \code{call}, \code{m}, \code{pooled} and \code{glanced}. #' #' The \code{pooled} elements is a data frame with columns: #' \tabular{ll}{ #' \code{estimate}\tab Pooled complete data estimate\cr #' \code{ubar} \tab Within-imputation variance of \code{estimate}\cr #' \code{b} \tab Between-imputation variance of \code{estimate}\cr #' \code{t} \tab Total variance, of \code{estimate}\cr #' \code{dfcom} \tab Degrees of freedom in complete data\cr #' \code{df} \tab Degrees of freedom of $t$-statistic\cr #' \code{riv} \tab Relative increase in variance\cr #' \code{lambda} \tab Proportion attributable to the missingness\cr #' \code{fmi} \tab Fraction of missing information\cr #' } #' The names of the terms are stored as \code{row.names(pooled)}. #' #' The \code{glanced} elements is a \code{data.frame} with \code{m} rows. #' The precise composition depends on the class of the complete-data analysis. #' At least field \code{nobs} is expected to be present. #' #' The \code{process_mipo} is a helper function to process a #' tidied mipo object, and is normally not called directly. #' It adds a confidence interval, and optionally exponentiates, the result. #' @seealso \code{\link{pool}}, #' \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} #' @references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords classes #' @name mipo NULL #' @rdname mipo #' @export mipo <- function(mira.obj, ...) { if (!is.mira(mira.obj)) stop("`mira.obj` not of class `mira`") structure(pool(mira.obj, ...), class = c("mipo")) } #' @return The \code{summary} method returns a data frame with summary statistics of the pooled analysis. #' @rdname mipo #' @export summary.mipo <- function(object, type = c("tests", "all"), conf.int = FALSE, conf.level = .95, exponentiate = FALSE, ...) { type <- match.arg(type) m <- object$m x <- object$pooled std.error <- sqrt(x$t) statistic <- x$estimate / std.error p.value <- 2 * (pt(abs(statistic), pmax(x$df, 0.001), lower.tail = FALSE)) z <- data.frame(x, std.error = std.error, statistic = statistic, p.value = p.value ) z <- process_mipo(z, object, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate ) parnames <- names(z)[1L:(pmatch("m", names(z)) - 1L)] if (type == "tests") { out <- c("m", "riv", "lambda", "fmi", "ubar", "b", "t", "dfcom") keep <- base::setdiff(names(z), out) z <- z[, keep] } class(z) <- c("mipo.summary", "data.frame") z } #' @rdname mipo #' @export print.mipo <- function(x, ...) { cat("Class: mipo m =", x$m, "\n") print.data.frame(x$pooled, ...) invisible(x) } #' @rdname mipo #' @export print.mipo.summary <- function(x, ...) { print.data.frame(x, ...) invisible(x) } #' @rdname mipo #' @keywords internal process_mipo <- function(z, x, conf.int = FALSE, conf.level = .95, exponentiate = FALSE) { if (exponentiate) { # save transformation function for use on confidence interval trans <- exp } else { trans <- identity } CI <- NULL if (conf.int) { # avoid "Waiting for profiling to be done..." message CI <- suppressMessages(confint(x, level = conf.level)) } z$estimate <- trans(z$estimate) # combine and sort columns in desired order parnames <- names(z)[1L:(pmatch("m", names(z)) - 1L)] if (!is.null(CI)) { z <- cbind( z[, parnames, drop = FALSE], z[, c("m", "estimate", "std.error", "statistic", "df", "p.value")], trans(unrowname(CI)), z[, c("riv", "lambda", "fmi", "ubar", "b", "t", "dfcom")] ) } else { z <- cbind( z[, parnames, drop = FALSE], z[, c("m", "estimate", "std.error", "statistic", "df", "p.value")], z[, c("riv", "lambda", "fmi", "ubar", "b", "t", "dfcom")] ) } z } vcov.mipo <- function(object, ...) { so <- diag(object$t) dimnames(so) <- list(object$term, object$term) so } confint.mipo <- function(object, parm, level = 0.95, ...) { pooled <- object$pooled cf <- getqbar(object) df <- pooled$df se <- sqrt(pooled$t) pnames <- names(df) <- names(se) <- names(cf) <- row.names(pooled) if (missing(parm)) { parm <- pnames } else if (is.numeric(parm)) { parm <- pnames[parm] } a <- (1 - level) / 2 a <- c(a, 1 - a) fac <- qt(a, df) pct <- fmt.perc(a, 3) ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm, pct) ) ci[, 1] <- cf[parm] + qt(a[1], df[parm]) * se[parm] ci[, 2] <- cf[parm] + qt(a[2], df[parm]) * se[parm] ci } unrowname <- function(x) { rownames(x) <- NULL x } fmt.perc <- function(probs, digits) { paste( format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), "%" ) } mice/R/toenail2.R0000644000176200001440000000410314330031606013222 0ustar liggesusers#' Toenail data #' #' The toenail data come from a Multicenter study comparing two oral #' treatments for toenail infection. Patients were evaluated for the #' degree of separation of the nail. Patients were randomized into two #' treatments and were followed over seven visits - four in the first #' year and yearly thereafter. The patients have not been treated #' prior to the first visit so this should be regarded as the #' baseline. #' @name toenail2 #' @docType data #' @format A data frame with 1908 observations on the following 5 variables: #' \describe{ #' \item{\code{patientID}}{a numeric vector giving the ID of patient} #' \item{\code{outcome}}{a factor with 2 levels giving the response} #' \item{\code{treatment}}{a factor with 2 levels giving the treatment group} #' \item{\code{time}}{a numeric vector giving the time of the visit #' (not exactly monthly intervals hence not round numbers)} #' \item{\code{visit}}{an integer giving the number of the visit} #' } #' @source #' De Backer, M., De Vroey, C., Lesaffre, E., Scheys, I., and De #' Keyser, P. (1998). Twelve weeks of continuous oral therapy for #' toenail onychomycosis caused by dermatophytes: A double-blind #' comparative trial of terbinafine 250 mg/day versus itraconazole 200 #' mg/day. Journal of the American Academy of Dermatology, 38, 57-63. #' @references #' Lesaffre, E. and Spiessens, B. (2001). On the effect of the number of #' quadrature points in a logistic random-effects model: An example. #' Journal of the Royal Statistical Society, Series C, 50, 325-335. #' #' G. Fitzmaurice, N. Laird and J. Ware (2004) Applied Longitudinal Analysis, #' Wiley and Sons, New York, USA. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-catoutcome.html#example}{\emph{Flexible #' Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. #' Boca Raton, FL. #' @keywords datasets #' @seealso \code{\link{toenail}} #' @details Apart from formatting, this dataset is identical to #' \code{toenail}. The formatting is taken identical to #' \code{data("toenail", package = "HSAUR3")}. NULL mice/R/mnar_demo_data.R0000644000176200001440000000035414330031606014443 0ustar liggesusers#' MNAR demo data #' #' A toy example from Margarita Moreno-Betancur for checking NARFCS. #' #' A small dataset with just three columns. #' @source \url{https://github.com/moreno-betancur/NARFCS/blob/master/datmis.csv} "mnar_demo_data" mice/R/mice.impute.polr.R0000644000176200001440000001062014334522175014712 0ustar liggesusers#' Imputation of ordered data by polytomous regression #' #' Imputes missing data in a categorical variable using polytomous regression #' @aliases mice.impute.polr #' @inheritParams mice.impute.pmm #' @param nnet.maxit Tuning parameter for \code{nnet()}. #' @param nnet.trace Tuning parameter for \code{nnet()}. #' @param nnet.MaxNWts Tuning parameter for \code{nnet()}. #' @param polr.to.loggedEvents A logical indicating whether each fallback #' to the \code{multinom()} function should be written to \code{loggedEvents}. #' The default is \code{FALSE}. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' The function \code{mice.impute.polr()} imputes for ordered categorical response #' variables by the proportional odds logistic regression (polr) model. The #' function repeatedly applies logistic regression on the successive splits. The #' model is also known as the cumulative link model. #' #' By default, ordered factors with more than two levels are imputed by #' \code{mice.impute.polr}. #' #' The algorithm of \code{mice.impute.polr} uses the function \code{polr()} from #' the \code{MASS} package. #' #' In order to avoid bias due to perfect prediction, the algorithm augment the #' data according to the method of White, Daniel and Royston (2010). #' #' The call to \code{polr} might fail, usually because the data are very sparse. #' In that case, \code{multinom} is tried as a fallback. #' If the local flag \code{polr.to.loggedEvents} is set to TRUE, #' a record is written #' to the \code{loggedEvents} component of the \code{\link{mids}} object. #' Use \code{mice(data, polr.to.loggedEvents = TRUE)} to set the flag. #' #' @note #' In December 2019 Simon White alerted that the #' \code{polr} could always fail silently. I can confirm this behaviour for #' versions \code{mice 3.0.0 - mice 3.6.6}, so any method requests #' for \code{polr} in these versions were in fact handled by \code{multinom}. #' See \url{https://github.com/amices/mice/issues/206} for details. #' #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000-2010 #' @seealso \code{\link{mice}}, \code{\link[nnet]{multinom}}, #' \code{\link[MASS]{polr}} #' @references #' #' Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' #' Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of #' multiple imputation strategies for the statistical analysis of incomplete #' data sets.} Dissertation. Rotterdam: Erasmus University. #' #' White, I.R., Daniel, R. Royston, P. (2010). Avoiding bias due to perfect #' prediction in multiple imputation of incomplete categorical variables. #' \emph{Computational Statistics and Data Analysis}, 54, 2267-2275. #' #' Venables, W.N. & Ripley, B.D. (2002). \emph{Modern applied statistics with #' S-Plus (4th ed)}. Springer, Berlin. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.polr <- function(y, ry, x, wy = NULL, nnet.maxit = 100, nnet.trace = FALSE, nnet.MaxNWts = 1500, polr.to.loggedEvents = FALSE, ...) { if (is.null(wy)) wy <- !ry # augment data to evade issues with perfect prediction x <- as.matrix(x) aug <- augment(y, ry, x, wy) x <- aug$x y <- aug$y ry <- aug$ry wy <- aug$wy w <- aug$w xy <- cbind.data.frame(y = y, x = x) ## polr may fail on sparse data. We revert to multinom in such cases. fit <- try( suppressWarnings(MASS::polr(formula(xy), data = xy[ry, , drop = FALSE], weights = w[ry], control = list(...) )), silent = TRUE ) if (inherits(fit, "try-error")) { if (polr.to.loggedEvents) { updateLog(out = "polr falls back to multinom", frame = 6) } fit <- nnet::multinom(formula(xy), data = xy[ry, , drop = FALSE], weights = w[ry], maxit = nnet.maxit, trace = nnet.trace, MaxNWts = nnet.MaxNWts, ... ) } post <- predict(fit, xy[wy, , drop = FALSE], type = "probs") if (sum(wy) == 1) { post <- matrix(post, nrow = 1, ncol = length(post)) } fy <- as.factor(y) nc <- length(levels(fy)) un <- rep(runif(sum(wy)), each = nc) if (is.vector(post)) { post <- matrix(c(1 - post, post), ncol = 2) } draws <- un > apply(post, 1, cumsum) idx <- 1 + apply(draws, 2, sum) levels(fy)[idx] } mice/R/mice.impute.mnar.logreg.R0000644000176200001440000000211214334522175016146 0ustar liggesusers#' @rdname mice.impute.mnar #' @export mice.impute.mnar.logreg <- function(y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...) { ## Undentifiable part: u <- parse.ums(x, ums = ums, umx = umx, ...) if (is.null(wy)) wy <- !ry wyold <- wy ## Identifiable part: exactly the same as mice.impute.logreg # augment data in order to evade perfect prediction aug <- augment(y, ry, x, wy) x <- aug$x y <- aug$y ry <- aug$ry wy <- aug$wy w <- aug$w # fit model x <- cbind(1, as.matrix(x)) expr <- expression(glm.fit( x = x[ry, , drop = FALSE], y = y[ry], family = quasibinomial(link = logit), weights = w[ry] )) fit <- eval(expr) fit.sum <- summary.glm(fit) beta <- coef(fit) rv <- t(chol(sym(fit.sum$cov.unscaled))) beta.star <- beta + rv %*% rnorm(ncol(rv)) ## Draw imputations p <- 1 / (1 + exp(-(x[wy, , drop = FALSE] %*% beta.star + u$x[wyold, , drop = FALSE] %*% u$delta))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } vec } mice/R/initialize.imp.R0000644000176200001440000000173714330031606014444 0ustar liggesusersinitialize.imp <- function(data, m, ignore, where, blocks, visitSequence, method, nmis, data.init) { imp <- vector("list", ncol(data)) names(imp) <- names(data) r <- !is.na(data) for (h in visitSequence) { for (j in blocks[[h]]) { y <- data[, j] ry <- r[, j] & !ignore wy <- where[, j] imp[[j]] <- as.data.frame(matrix(NA, nrow = sum(wy), ncol = m)) dimnames(imp[[j]]) <- list(row.names(data)[wy], 1:m) if (method[h] != "") { for (i in seq_len(m)) { if (nmis[j] < nrow(data) && is.null(data.init)) { imp[[j]][, i] <- mice.impute.sample(y, ry, wy = wy) } else if (!is.null(data.init)) { imp[[j]][, i] <- data.init[wy, j] } else { if (is.factor(y)) { imp[[j]][, i] <- sample(levels(y), nrow(data)) } else { imp[[j]][, i] <- rnorm(nrow(data)) } } } } } } imp } mice/R/tbc.R0000644000176200001440000000473414330031606012267 0ustar liggesusers#' Terneuzen birth cohort #' #' Data of subset of the Terneuzen Birth Cohort data on child growth. #' #' This \code{tbc} data set is a random subset of persons from a much larger #' collection of data from the Terneuzen Birth Cohort. The total cohort #' comprises of 2604 unique persons, whereas the subset in \code{tbc} covers 306 #' persons. The \code{tbc.target} is an auxiliary data set containing two #' outcomes at adult age. For more details, see De Kroon et al (2008, 2010, #' 2011). The imputation methodology is explained in Chapter 9 of Van Buuren #' (2012). #' #' @name tbc #' @aliases tbc tbc.target terneuzen #' @docType data #' @format \code{tbs} is a data frame with 3951 rows and 11 columns: #' \describe{ #' \item{id}{Person number} #' \item{occ}{Occasion number} #' \item{nocc}{Number of occasions} #' \item{first}{Is this the first record for this person? (TRUE/FALSE)} #' \item{typ}{Type of data (all observed)} #' \item{age}{Age (years)} #' \item{sex}{Sex 1=M, 2=F} #' \item{hgt.z}{Height Z-score} #' \item{wgt.z}{Weight Z-score} #' \item{bmi.z}{BMI Z-score} #' \item{ao}{Adult overweight (0=no, 1=yes)} #' } #' #' \code{tbc.target} is a data frame with 2612 rows and 3 columns: #' \describe{ #' \item{id}{Person number} #' \item{ao}{Adult overweight (0=no, 1=yes)} #' \item{bmi.z.jv}{BMI Z-score as young adult (18-29 years)} #' } #' @source De Kroon, M. L. A., Renders, C. M., Kuipers, E. C., van Wouwe, J. P., #' van Buuren, S., de Jonge, G. A., Hirasing, R. A. (2008). Identifying #' metabolic syndrome without blood tests in young adults - The Terneuzen birth #' cohort. \emph{European Journal of Public Health}, \emph{18}(6), 656-660. #' #' De Kroon, M. L. A., Renders, C. M., Van Wouwe, J. P., Van Buuren, S., #' Hirasing, R. A. (2010). The Terneuzen birth cohort: BMI changes between 2 #' and 6 years correlate strongest with adult overweight. \emph{PLoS ONE}, #' \emph{5}(2), e9155. #' #' De Kroon, M. L. A. (2011). \emph{The Terneuzen Birth Cohort. Detection and #' Prevention of Overweight and Cardiometabolic Risk from Infancy Onward.} #' Dissertation, Vrije Universiteit, Amsterdam. #' \url{https://research.vu.nl/en/publications/the-terneuzen-birth-cohort-detection-and-prevention-of-overweight} #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-rastering.html#terneuzen-birth-cohort}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords datasets #' @examples #' data <- tbc #' md.pattern(data) NULL mice/R/mice.impute.quadratic.R0000644000176200001440000001250014436637243015720 0ustar liggesusers#' Imputation of quadratic terms #' #' Imputes incomplete variable that appears as both #' main effect and quadratic effect in the complete-data model. #' #' @aliases mice.impute.quadratic quadratic #' @inheritParams mice.impute.pmm #' @param quad.outcome The name of the outcome in the quadratic analysis as a #' character string. For example, if the substantive model of interest is #' \code{y ~ x + xx}, then \code{"y"} would be the \code{quad.outcome} #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' This function implements the "polynomial combination" method. #' First, the polynomial #' combination \eqn{Z = Y \beta_1 + Y^2 \beta_2} is formed. #' \eqn{Z} is imputed by #' predictive mean matching, followed by a decomposition of the imputed #' data \eqn{Z} #' into components \eqn{Y} and \eqn{Y^2}. #' See Van Buuren (2012, pp. 139-141) and Vink #' et al (2012) for more details. The method ensures that 1) the imputed data #' for \eqn{Y} and \eqn{Y^2} are mutually consistent, and 2) that provides unbiased #' estimates of the regression weights in a complete-data linear regression that #' use both \eqn{Y} and \eqn{Y^2}. #' #' @note There are two situations to consider. If only the linear term \code{Y} #' is present in the data, calculate the quadratic term \code{YY} after #' imputation. If both the linear term \code{Y} and the the quadratic term #' \code{YY} are variables in the data, then first impute \code{Y} by calling #' \code{mice.impute.quadratic()} on \code{Y}, and then impute \code{YY} by #' passive imputation as \code{meth["YY"] <- "~I(Y^2)"}. See example section #' for details. Generally, we would like \code{YY} to be present in the data if #' we need to preserve quadratic relations between \code{YY} and any third #' variables in the multivariate incomplete data that we might wish to impute. #' @author Mingyang Cai and Gerko Vink #' @seealso \code{\link{mice.impute.pmm}} #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-knowledge.html#sec:quadratic}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' Vink, G., van Buuren, S. (2013). Multiple Imputation of Squared Terms. #' \emph{Sociological Methods & Research}, 42:598-607. #' @family univariate imputation functions #' @keywords datagen #' @examples #' # Create Data #' B1 <- .5 #' B2 <- .5 #' X <- rnorm(1000) #' XX <- X^2 #' e <- rnorm(1000, 0, 1) #' Y <- B1 * X + B2 * XX + e #' dat <- data.frame(x = X, xx = XX, y = Y) #' #' # Impose 25 percent MCAR Missingness #' dat[0 == rbinom(1000, 1, 1 - .25), 1:2] <- NA #' #' # Prepare data for imputation #' ini <- mice(dat, maxit = 0) #' meth <- c("quadratic", "~I(x^2)", "") #' pred <- ini$pred #' pred[, "xx"] <- 0 #' #' # Impute data #' imp <- mice(dat, meth = meth, pred = pred, quad.outcome = "y") #' #' # Pool results #' pool(with(imp, lm(y ~ x + xx))) #' #' # Plot results #' stripplot(imp) #' plot(dat$x, dat$xx, col = mdc(1), xlab = "x", ylab = "xx") #' cmp <- complete(imp) #' points(cmp$x[is.na(dat$x)], cmp$xx[is.na(dat$x)], col = mdc(2)) #' @export mice.impute.quadratic <- function(y, ry, x, wy = NULL, quad.outcome = NULL, ...) { if (is.null(quad.outcome)) stop("Argument 'quad.outcome' for mice.impute.quadratic has not been specified") if (!quad.outcome %in% colnames(x)) stop("The name specified for the outcome in 'quad.outcome' can not be found in the data") if (is.null(wy)) { wy <- !ry } x <- cbind(1, as.matrix(x)) # create the square of y y2 <- y^2 # create z based on B1 * y + B2 * y^2 parm <- .norm.draw(x[, quad.outcome], ry, cbind(1, y, y2)) zobs <- cbind(y, y2) %*% parm$coef[-1] # impute z zmis <- mice.impute.pmm(zobs, ry, x[, -1]) zstar <- zobs zstar[!ry] <- zmis zstar <- as.vector(zstar) # decompositions of z into roots b1 <- parm$coef[2] b2 <- parm$coef[3] y.low <- -(1 / (2 * b2)) * (sqrt(4 * b2 * zstar + b1^2) + b1) y.up <- (1 / (2 * b2)) * (sqrt(4 * b2 * zstar + b1^2) - b1) # calculate the abscissa at the parabolic minimum/maximum y.min <- -b1 / (2 * b2) # data augmentation data.augment <- data.frame( V = c((y > y.min)[ry] * 1, 1, 1, 0, 0, 1, 1, 0, 0), q = c( x[ry, quad.outcome], mean(x[ry, quad.outcome]) + sd(x[ry, quad.outcome]), mean(x[ry, quad.outcome]) - sd(x[ry, quad.outcome]), mean(x[ry, quad.outcome]) + sd(x[ry, quad.outcome]), mean(x[ry, quad.outcome]) - sd(x[ry, quad.outcome]), mean(x[ry, quad.outcome]), mean(x[ry, quad.outcome]), mean(x[ry, quad.outcome]), mean(x[ry, quad.outcome]) ), zstar = c( zstar[ry], mean(zstar[ry]), mean(zstar[ry]), mean(zstar[ry]), mean(zstar[ry]), mean(zstar[ry]) + sd(zstar[ry]), mean(zstar[ry]) - sd(zstar[ry]), mean(zstar[ry]) + sd(zstar[ry]), mean(zstar[ry]) - sd(zstar[ry]) ) ) w <- c(rep(1, nrow(data.augment) - 8), rep(3 / 8, 8)) # calculate regression parameters for vobs <- glm(V ~ q + zstar + q * zstar, family = quasibinomial, data = data.augment, weights = w ) # impute Vmis newdata <- data.frame(q = x[wy, quad.outcome], zstar = zstar[wy]) prob <- predict(vobs, newdata = newdata, type = "response", na.action = na.exclude ) idy <- rbinom(sum(wy), 1, prob = prob) # create final imputation ystar <- y.low[wy] ystar[idy == 1] <- y.up[wy][idy == 1] return(ystar) } mice/R/lm.R0000644000176200001440000000774014330031647012134 0ustar liggesusers#' Linear regression for \code{mids} object #' #' Applies \code{lm()} to multiply imputed data set #' #' This function is included for backward compatibility with V1.0. The function #' is superseded by \code{\link{with.mids}}. #' #' @param formula a formula object, with the response on the left of a ~ #' operator, and the terms, separated by + operators, on the right. See the #' documentation of \code{\link{lm}} and \code{\link{formula}} for details. #' @param data An object of type 'mids', which stands for 'multiply imputed data #' set', typically created by a call to function \code{mice()}. #' @param \dots Additional parameters passed to \code{\link{lm}} #' @return An objects of class \code{mira}, which stands for 'multiply imputed #' repeated analysis'. This object contains \code{data$m} distinct #' \code{lm.objects}, plus some descriptive information. #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #' @seealso \code{\link{lm}}, \code{\link[=mids-class]{mids}}, \code{\link[=mira-class]{mira}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords multivariate #' @examples #' imp <- mice(nhanes) #' fit <- lm.mids(bmi ~ hyp + chl, data = imp) #' fit #' @export lm.mids <- function(formula, data, ...) { .Deprecated("with", msg = "Use with(imp, lm(yourmodel)." ) # adapted 28/1/00 repeated complete data regression (lm) on a mids data set call <- match.call() if (!is.mids(data)) { stop("The data must have class mids") } analyses <- lapply(seq_len(data$m), function(i) lm(formula, data = complete(data, i), ...)) # return the complete data analyses as a list of length nimp object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) oldClass(object) <- c("mira", "lm") ## FEH object } #' Generalized linear model for \code{mids} object #' #' Applies \code{glm()} to a multiply imputed data set #' #' This function is included for backward compatibility with V1.0. The function #' is superseded by \code{\link{with.mids}}. #' #' @param formula a formula expression as for other regression models, of the #' form response ~ predictors. See the documentation of \code{\link{lm}} and #' \code{\link{formula}} for details. #' @param family The family of the glm model #' @param data An object of type \code{mids}, which stands for 'multiply imputed #' data set', typically created by function \code{mice()}. #' @param \dots Additional parameters passed to \code{\link{glm}}. #' @return An objects of class \code{mira}, which stands for 'multiply imputed #' repeated analysis'. This object contains \code{data$m} distinct #' \code{glm.objects}, plus some descriptive information. #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #' @seealso \code{\link{with.mids}}, \code{\link{glm}}, \code{\link[=mids-class]{mids}}, #' \code{\link[=mira-class]{mira}} #' @references Van Buuren, S., Groothuis-Oudshoorn, C.G.M. (2000) #' \emph{Multivariate Imputation by Chained Equations: MICE V1.0 User's manual.} #' Leiden: TNO Quality of Life. #' @keywords multivariate #' @examples #' #' imp <- mice(nhanes) #' #' # logistic regression on the imputed data #' fit <- glm.mids((hyp == 2) ~ bmi + chl, data = imp, family = binomial) #' fit #' @export glm.mids <- function(formula, family = gaussian, data, ...) { .Deprecated("with", msg = "Use with(imp, glm(yourmodel)." ) # adapted 04/02/00 repeated complete data regression (glm) on a mids data set call <- match.call() if (!is.mids(data)) { stop("The data must have class mids") } analyses <- lapply( seq_len(data$m), function(i) glm(formula, family = family, data = complete(data, i), ...) ) # return the complete data analyses as a list of length nimp object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) oldClass(object) <- c("mira", "glm", "lm") ## FEH object } mice/R/mice.impute.polyreg.R0000644000176200001440000000667514330031647015431 0ustar liggesusers#' Imputation of unordered data by polytomous regression #' #' Imputes missing data in a categorical variable using polytomous regression #' #' @aliases mice.impute.polyreg #' @inheritParams mice.impute.pmm #' @param nnet.maxit Tuning parameter for \code{nnet()}. #' @param nnet.trace Tuning parameter for \code{nnet()}. #' @param nnet.MaxNWts Tuning parameter for \code{nnet()}. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000-2010 #' @details #' The function \code{mice.impute.polyreg()} imputes categorical response #' variables by the Bayesian polytomous regression model. See J.P.L. Brand #' (1999), Chapter 4, Appendix B. #' #' By default, unordered factors with more than two levels are imputed by #' \code{mice.impute.polyreg()}. #' #' The method consists of the following steps: #' \enumerate{ #' \item Fit categorical response as a multinomial model #' \item Compute predicted categories #' \item Add appropriate noise to predictions #' } #' #' The algorithm of \code{mice.impute.polyreg} uses the function #' \code{multinom()} from the \code{nnet} package. #' #' In order to avoid bias due to perfect prediction, the algorithm augment the #' data according to the method of White, Daniel and Royston (2010). #' @seealso \code{\link{mice}}, \code{\link[nnet]{multinom}}, #' \code{\link[MASS]{polr}} #' @references #' #' Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' #' Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of #' multiple imputation strategies for the statistical analysis of incomplete #' data sets.} Dissertation. Rotterdam: Erasmus University. #' #' White, I.R., Daniel, R. Royston, P. (2010). Avoiding bias due to perfect #' prediction in multiple imputation of incomplete categorical variables. #' \emph{Computational Statistics and Data Analysis}, 54, 2267-2275. #' #' Venables, W.N. & Ripley, B.D. (2002). \emph{Modern applied statistics with #' S-Plus (4th ed)}. Springer, Berlin. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.polyreg <- function(y, ry, x, wy = NULL, nnet.maxit = 100, nnet.trace = FALSE, nnet.MaxNWts = 1500, ...) { if (is.null(wy)) { wy <- !ry } # augment data to evade issues with perfect prediction x <- as.matrix(x) aug <- augment(y, ry, x, wy) x <- aug$x y <- aug$y ry <- aug$ry wy <- aug$wy w <- aug$w fy <- as.factor(y) nc <- length(levels(fy)) un <- rep(runif(sum(wy)), each = nc) xy <- cbind.data.frame(y = y, x = x) if (ncol(x) == 0L) { xy <- data.frame(xy, int = 1) } # escape with same impute if the dependent does not vary cat.has.all.obs <- table(y[ry]) == sum(ry) if (any(cat.has.all.obs)) { return(rep(levels(fy)[cat.has.all.obs], sum(wy))) } fit <- nnet::multinom(formula(xy), data = xy[ry, , drop = FALSE], weights = w[ry], maxit = nnet.maxit, trace = nnet.trace, MaxNWts = nnet.MaxNWts, ... ) post <- predict(fit, xy[wy, , drop = FALSE], type = "probs") if (sum(wy) == 1) { post <- matrix(post, nrow = 1, ncol = length(post)) } if (is.vector(post)) { post <- matrix(c(1 - post, post), ncol = 2) } draws <- un > apply(post, 1, cumsum) idx <- 1 + apply(draws, 2, sum) levels(fy)[idx] } mice/R/mdc.R0000644000176200001440000000755314330031606012264 0ustar liggesusers#' Graphical parameter for missing data plots #' #' \code{mdc} returns colors used to distinguish observed, missing and combined #' data in plotting. \code{mice.theme} return a partial list of named objects #' that can be used as a theme in \code{stripplot}, \code{bwplot}, #' \code{densityplot} and \code{xyplot}. #' #' This function eases consistent use of colors in plots. The default follows #' the Abayomi convention, which uses blue for observed data, red for missing or #' imputed data, and black for combined data. #' #' @aliases mdc #' @param r A numerical or character vector. The numbers 1-6 request colors as #' follows: 1=\code{cso}, 2=\code{csi}, 3=\code{csc}, 4=\code{clo}, 5=\code{cli} #' and 6=\code{clc}. Alternatively, \code{r} may contain the strings #'' \code{observed}', '\code{missing}', or '\code{both}', or abbreviations #' thereof. #' @param s A character vector containing the strings '\code{symbol}' or #'' \code{line}', or abbreviations thereof. #' @param transparent A logical indicating whether alpha-transparency is #' allowed. The default is \code{TRUE}. #' @param cso The symbol color for the observed data. The default is a #' transparent blue. #' @param csi The symbol color for the missing or imputed data. The default is a #' transparent red. #' @param csc The symbol color for the combined observed and imputed data. The #' default is a grey color. #' @param clo The line color for the observed data. The default is a slightly #' darker transparent blue. #' @param cli The line color for the missing or imputed data. The default is a #' slightly darker transparent red. #' @param clc The line color for the combined observed and imputed data. The #' default is a grey color. #' @return \code{mdc()} returns a vector containing color definitions. The length #' of the output vector is calculate from the length of \code{r} and \code{s}. #' Elements of the input vectors are repeated if needed. #' @author Stef van Buuren, sept 2012. #' @seealso \code{\link{hcl}}, \code{\link{rgb}}, #' \code{\link{xyplot.mids}}, \code{\link[lattice:xyplot]{xyplot}}, #' \code{\link[lattice:trellis.par.get]{trellis.par.set}} #' @references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #' Visualization with R}, Springer. #' @keywords hplot #' @examples #' # all six colors #' mdc(1:6) #' #' # lines color for observed and missing data #' mdc(c("obs", "mis"), "lin") #' @export mdc <- function(r = "observed", s = "symbol", transparent = TRUE, cso = grDevices::hcl(240, 100, 40, 0.7), csi = grDevices::hcl(0, 100, 40, 0.7), csc = "gray50", clo = grDevices::hcl(240, 100, 40, 0.8), cli = grDevices::hcl(0, 100, 40, 0.8), clc = "gray50") { # cso: blue symbol color for observed data # csi: red symbol color for imputations # csc: symbol color for combined data # clo: blue line color for observed data # cli: red line color for observed data # clc: line color for combined data if (missing(transparent)) { if (!supports.transparent()) { cso <- grDevices::hcl(240, 100, 40) csi <- grDevices::hcl(0, 100, 40) csc <- "black" clo <- grDevices::hcl(240, 100, 40) cli <- grDevices::hcl(0, 100, 40) clc <- "black" } } else if (!transparent) { cso <- grDevices::hcl(240, 100, 40) csi <- grDevices::hcl(0, 100, 40) csc <- "black" clo <- grDevices::hcl(240, 100, 40) cli <- grDevices::hcl(0, 100, 40) clc <- "black" } fallback <- grDevices::palette()[1] if (is.numeric(r)) { idx <- floor(r) idx[r < 1 | r > 6] <- 7 myc <- c(cso, csi, csc, clo, cli, clc, fallback)[idx] return(myc) } rc <- pmatch(r, c("observed", "missing", "both")) sc <- pmatch(s, c("symbol", "line")) idx <- rc + (sc - 1) * 3 idx[is.na(idx)] <- 7 myc <- c(cso, csi, csc, clo, cli, clc, fallback)[idx] myc } mice/R/design.R0000644000176200001440000000022414433403072012762 0ustar liggesusersobtain.design <- function(data, formula = ~.) { mf <- model.frame(formula, data = data, na.action = na.pass) model.matrix(formula, data = mf) } mice/R/parlmice.R0000644000176200001440000001440714334522175013323 0ustar liggesusers#' Wrapper function that runs MICE in parallel #' #' This function is included for backward compatibility. The function #' is superseded by \code{\link{futuremice}}. #' #' This function relies on package \code{\link{parallel}}, which is a base #' package for R versions 2.14.0 and later. We have chosen to use parallel function #' \code{parLapply} to allow the use of \code{parlmice} on Mac, Linux and Windows #' systems. For the same reason, we use the Parallel Socket Cluster (PSOCK) type by default. #' #' On systems other than Windows, it can be hugely beneficial to change the cluster type to #' \code{FORK}, as it generally results in improved memory handling. When memory issues #' arise on a Windows system, we advise to store the multiply imputed datasets, #' clean the memory by using \code{\link{rm}} and \code{\link{gc}} and make another #' run using the same settings. #' #' This wrapper function combines the output of \code{\link{parLapply}} with #' function \code{\link{ibind}} in \code{\link{mice}}. A \code{mids} object is returned #' and can be used for further analyses. #' #' Note that if a seed value is desired, the seed should be entered to this function #' with argument \code{seed}. Seed values outside the wrapper function (in an #' R-script or passed to \code{\link{mice}}) will not result to reproducible results. #' We refer to the manual of \code{\link{parallel}} for an explanation on this matter. #' #' @aliases parlmice #' @param data A data frame or matrix containing the incomplete data. Similar to #' the first argument of \code{\link{mice}}. #' @param m The number of desired imputated datasets. By default $m=5$ as with \code{mice} #' @param seed A scalar to be used as the seed value for the mice algorithm within #' each parallel stream. Please note that the imputations will be the same for all #' streams and, hence, this should be used if and only if \code{n.core = 1} and #' if it is desired to obtain the same output as under \code{mice}. #' @param n.core A scalar indicating the number of cores that should be used. #' @param n.imp.core A scalar indicating the number of imputations per core. #' @param cluster.seed A scalar to be used as the seed value. It is recommended to put the #' seed value here and not outside this function, as otherwise the parallel processes #' will be performed with separate, random seeds. #' @param cl.type The cluster type. Default value is \code{"PSOCK"}. Posix machines (linux, Mac) #' generally benefit from much faster cluster computation if \code{type} is set to \code{type = "FORK"}. #' @param ... Named arguments that are passed down to function \code{\link{mice}} or #' \code{\link{makeCluster}}. #' #' @return A mids object as defined by \code{\link{mids-class}} #' #' @author Gerko Vink, Rianne Schouten #' @seealso \code{\link{parallel}}, \code{\link{parLapply}}, \code{\link{makeCluster}}, #' \code{\link{mice}}, \code{\link{mids-class}} #' @references #' Schouten, R. and Vink, G. (2017). parlmice: faster, paraleller, micer. #' \url{https://www.gerkovink.com/parlMICE/Vignette_parlMICE.html} #' #' #'Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/parallel-computation.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' @examples #' # 150 imputations in dataset nhanes, performed by 3 cores #' \dontrun{ #' imp1 <- parlmice(data = nhanes, n.core = 3, n.imp.core = 50) #' # Making use of arguments in mice. #' imp2 <- parlmice(data = nhanes, method = "norm.nob", m = 100) #' imp2$method #' fit <- with(imp2, lm(bmi ~ hyp)) #' pool(fit) #' } #' #' @export parlmice <- function(data, m = 5, seed = NA, cluster.seed = NA, n.core = NULL, n.imp.core = NULL, cl.type = "PSOCK", ...) { .Deprecated("futuremice") # check form of data and m data <- check.dataform(data) m <- check.m(m) # check if data complete if (sum(is.na(data)) == 0) { stop("Data has no missing values") } # check if arguments match CPU specifications if (!is.null(n.core)) { if (n.core > parallel::detectCores()) { stop("Number of cores specified is greater than the number of logical cores in your CPU") } } # determine course of action when not all arguments specified if (!is.null(n.core) & is.null(n.imp.core)) { n.imp.core <- m warning(paste("Number of imputations per core not specified: n.imp.core = m =", m, "has been used")) } if (is.null(n.core) & !is.null(n.imp.core)) { n.core <- parallel::detectCores() - 1 warning(paste("Number of cores not specified. Based on your machine a value of n.core =", parallel::detectCores() - 1, "is chosen")) } if (is.null(n.core) & is.null(n.imp.core)) { specs <- match.cluster(n.core = parallel::detectCores() - 1, m = m) n.core <- specs$cores n.imp.core <- specs$imps } if (!is.na(seed)) { if (n.core > 1) { warning("Be careful; the specified seed is equal for all imputations. Please consider specifying cluster.seed instead.") } } # create arguments to export to cluster args <- match.call(mice, expand.dots = TRUE) args[[1]] <- NULL args$m <- n.imp.core # make computing cluster cl <- parallel::makeCluster(n.core, type = cl.type) parallel::clusterExport(cl, varlist = c( "data", "m", "seed", "cluster.seed", "n.core", "n.imp.core", "cl.type", ls(parent.frame()) ), envir = environment() ) parallel::clusterExport(cl, varlist = "do.call" ) parallel::clusterEvalQ(cl, library(mice)) if (!is.na(cluster.seed)) { parallel::clusterSetRNGStream(cl, cluster.seed) } # generate imputations imps <- parallel::parLapply(cl = cl, X = 1:n.core, function(x) do.call(mice, as.list(args), envir = environment())) parallel::stopCluster(cl) # postprocess clustered imputation into a mids object imp <- imps[[1]] if (length(imps) > 1) { for (i in 2:length(imps)) { imp <- ibind(imp, imps[[i]]) } } # let imputation matrix correspond to grand m for (i in 1:length(imp$imp)) { colnames(imp$imp[[i]]) <- 1:imp$m } imp } match.cluster <- function(n.core, m) { cores <- 1:n.core imps <- 1:m out <- data.frame( results = as.vector(cores %*% t(imps)), cores = cores, imps = rep(imps, each = n.core) ) which <- out[out[, "results"] == m, ] which[order(which$cores, decreasing = T), ][1, 2:3] } mice/R/mice.impute.ri.R0000755000176200001440000000436114330031606014346 0ustar liggesusers#' Imputation by the random indicator method for nonignorable data #' #' Imputes nonignorable missing data by the random indicator method. #' #' @aliases mice.impute.ri ri #' @inheritParams mice.impute.pmm #' @param ri.maxit Number of inner iterations #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Shahab Jolani (University of Utrecht) #' @details #' The random indicator method estimates an offset between the #' distribution of the observed and missing data using an algorithm #' that iterates over the response and imputation models. #' #' This routine assumes that the response model and imputation model #' have same predictors. #' #' For an MNAR alternative see also \code{\link{mice.impute.mnar.logreg}}. #' @references Jolani, S. (2012). #' \emph{Dual Imputation Strategies for Analyzing Incomplete Data}. #' Dissertation. University of Utrecht, Dec 7 2012. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.ri <- function(y, ry, x, wy = NULL, ri.maxit = 10, ...) { if (is.null(wy)) { wy <- !ry } x <- cbind(1, as.matrix(x)) xy <- x xr <- xy y.dot <- y y.dot[wy] <- mice.impute.sample(y, ry, wy = wy) for (k in seq_len(ri.maxit)) { r.dot <- .r.draw(y.dot, ry, xr, ...) y.dot <- .y.draw(y, ry, r.dot, xy, wy, ...) } y.dot[wy] } # generting a realization of the response indicator r .r.draw <- function(ydot, ry, xr, ...) { n <- length(ry) xr <- cbind(xr, ydot) expr <- expression(glm.fit(xr, ry, family = binomial(link = logit))) fit <- suppressWarnings(eval(expr)) fit.sum <- summary.glm(fit) psi <- coef(fit) rv <- t(chol(sym(fit.sum$cov.unscaled))) psi.star <- psi + rv %*% rnorm(ncol(rv)) p <- 1 / (1 + exp(-(xr %*% psi.star))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 vec[seq_len(n)] } # Imputation of y given rdot .y.draw <- function(y, ry, rdot, xy, wy, ...) { parm <- .norm.draw(y, ry, cbind(xy, rdot), ...) if (all(rdot[ry] == 1) || all(rdot[ry] == 0)) parm$coef[length(parm$coef)] <- 0 ydot <- y rydot <- as.logical(rdot) ydot[wy] <- xy[wy, , drop = FALSE] %*% parm$beta[-length(parm$coef), ] + rnorm(sum(wy)) * parm$sigma ydot[wy & !rydot] <- ydot[wy & !rydot] - parm$coef[length(parm$coef)] ydot } mice/R/xyplot.R0000644000176200001440000002372414330031647013063 0ustar liggesusers#' Scatterplot of observed and imputed data #' #' Plotting methods for imputed data using \pkg{lattice}. #' \code{xyplot()} produces a conditional scatterplots. The function #' automatically separates the observed (blue) and imputed (red) data. The #' function extends the usual features of \pkg{lattice}. #' #' The argument \code{na.groups} may be used to specify (combinations of) #' missingness in any of the variables. The argument \code{groups} can be used #' to specify groups based on the variable values themselves. Only one of both #' may be active at the same time. When both are specified, \code{na.groups} #' takes precedence over \code{groups}. #' #' Use the \code{subset} and \code{na.groups} together to plots parts of the #' data. For example, select the first imputed data set by by #' \code{subset=.imp==1}. #' #' Graphical parameters like \code{col}, \code{pch} and \code{cex} can be #' specified in the arguments list to alter the plotting symbols. If #' \code{length(col)==2}, the color specification to define the observed and #' missing groups. \code{col[1]} is the color of the 'observed' data, #' \code{col[2]} is the color of the missing or imputed data. A convenient color #' choice is \code{col=mdc(1:2)}, a transparent blue color for the observed #' data, and a transparent red color for the imputed data. A good choice is #' \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the #' duration of the session by running \code{mice.theme()}. #' #' @aliases xyplot #' @param x A \code{mids} object, typically created by \code{mice()} or #' \code{mice.mids()}. #' @param data Formula that selects the data to be plotted. This argument #' follows the \pkg{lattice} rules for \emph{formulas}, describing the primary #' variables (used for the per-panel display) and the optional conditioning #' variables (which define the subsets plotted in different panels) to be used #' in the plot. #' #' The formula is evaluated on the complete data set in the \code{long} form. #' Legal variable names for the formula include \code{names(x$data)} plus the #' two administrative factors \code{.imp} and \code{.id}. #' #' \bold{Extended formula interface:} The primary variable terms (both the LHS #' \code{y} and RHS \code{x}) may consist of multiple terms separated by a #' \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be #' taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and #' \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in #' \emph{separate panels}. This behavior differs from standard \pkg{lattice}. #' \emph{Only combine terms of the same type}, i.e. only factors or only #' numerical variables. Mixing numerical and categorical data occasionally #' produces odds labeling of vertical axis. #' #' @param na.groups An expression evaluating to a logical vector indicating #' which two groups are distinguished (e.g. using different colors) in the #' display. The environment in which this expression is evaluated in the #' response indicator \code{is.na(x$data)}. #' #' The default \code{na.group = NULL} contrasts the observed and missing data #' in the LHS \code{y} variable of the display, i.e. groups created by #' \code{is.na(y)}. The expression \code{y} creates the groups according to #' \code{is.na(y)}. The expression \code{y1 & y2} creates groups by #' \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as #' \code{is.na(y1) | is.na(y2)}, and so on. #' @param groups This is the usual \code{groups} arguments in \pkg{lattice}. It #' differs from \code{na.groups} because it evaluates in the completed data #' \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas #' \code{na.groups} evaluates in the response indicator. See #' \code{\link{xyplot}} for more details. When both \code{na.groups} and #' \code{groups} are specified, \code{na.groups} takes precedence, and #' \code{groups} is ignored. #' @param theme A named list containing the graphical parameters. The default #' function \code{mice.theme} produces a short list of default colors, line #' width, and so on. The extensive list may be obtained from #' \code{trellis.par.get()}. Global graphical parameters like \code{col} or #' \code{cex} in high-level calls are still honored, so first experiment with #' the global parameters. Many setting consists of a pair. For example, #' \code{mice.theme} defines two symbol colors. The first is for the observed #' data, the second for the imputed data. The theme settings only exist during #' the call, and do not affect the trellis graphical parameters. #' @param as.table See \code{\link[lattice:xyplot]{xyplot}}. #' @param outer See \code{\link[lattice:xyplot]{xyplot}}. #' @param allow.multiple See \code{\link[lattice:xyplot]{xyplot}}. #' @param drop.unused.levels See \code{\link[lattice:xyplot]{xyplot}}. #' @param subscripts See \code{\link[lattice:xyplot]{xyplot}}. #' @param subset See \code{\link[lattice:xyplot]{xyplot}}. #' @param \dots Further arguments, usually not directly processed by the #' high-level functions documented here, but instead passed on to other #' functions. #' @return The high-level functions documented here, as well as other high-level #' Lattice functions, return an object of class \code{"trellis"}. The #' \code{\link[lattice:update.trellis]{update}} method can be used to #' subsequently update components of the object, and the #' \code{\link[lattice:print.trellis]{print}} method (usually called by default) #' will plot it on an appropriate plotting device. #' @note The first two arguments (\code{x} and \code{data}) are reversed #' compared to the standard Trellis syntax implemented in \pkg{lattice}. This #' reversal was necessary in order to benefit from automatic method dispatch. #' #' In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas #' in \pkg{lattice} the argument \code{x} is always a formula. #' #' In \pkg{mice} the argument \code{data} is always a formula object, whereas in #' \pkg{lattice} the argument \code{data} is usually a data frame. #' #' All other arguments have identical interpretation. #' #' @author Stef van Buuren #' @seealso \code{\link{mice}}, \code{\link{stripplot}}, \code{\link{densityplot}}, #' \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the #' package, as well as \code{\link[lattice:xyplot]{xyplot}}, #' \code{\link[lattice:panel.xyplot]{panel.xyplot}}, #' \code{\link[lattice:print.trellis]{print.trellis}}, #' \code{\link[lattice:trellis.par.get]{trellis.par.set}} #' @references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #' Visualization with R}, Springer. #' #' van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @keywords hplot #' @examples #' imp <- mice(boys, maxit = 1) #' #' # xyplot: scatterplot by imputation number #' # observe the erroneous outlying imputed values #' # (caused by imputing hgt from bmi) #' xyplot(imp, hgt ~ age | .imp, pch = c(1, 20), cex = c(1, 1.5)) #' #' # same, but label with missingness of wgt (four cases) #' xyplot(imp, hgt ~ age | .imp, na.group = wgt, pch = c(1, 20), cex = c(1, 1.5)) #' @export xyplot.mids <- function(x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), ..., subscripts = TRUE, subset = TRUE) { call <- match.call() if (!is.mids(x)) stop("Argument 'x' must be a 'mids' object") if (missing(data)) stop("Missing formula") formula <- data ## unpack data and response indicator cd <- data.frame(complete(x, "long", include = TRUE)) r <- as.data.frame(is.na(x$data)) ## evaluate na.group in response indicator nagp <- eval(expr = substitute(na.groups), envir = r, enclos = parent.frame()) if (is.expression(nagp)) nagp <- eval(expr = nagp, envir = r, enclos = parent.frame()) ## evaluate groups in imputed data ngp <- eval(expr = substitute(groups), envir = cd, enclos = parent.frame()) if (is.expression(ngp)) ngp <- eval(expr = ngp, envir = cd, enclos = parent.frame()) groups <- ngp ## evaluate subset in imputed data ss <- eval(expr = substitute(subset), envir = cd, enclos = parent.frame()) if (is.expression(ss)) ss <- eval(expr = ss, envir = cd, enclos = parent.frame()) subset <- ss ## evaluate further arguments before parsing dots <- list(...) args <- list( allow.multiple = allow.multiple, outer = outer, drop.unused.levels = drop.unused.levels, subscripts = subscripts, as.table = as.table ) ## determine the y-variables form <- lattice::latticeParseFormula( model = formula, data = cd, subset = subset, groups = groups, multiple = allow.multiple, outer = outer, subscripts = TRUE, drop = drop.unused.levels ) ynames <- unlist(lapply(strsplit(form$left.name, " \\+ "), rm.whitespace)) ## calculate selection vector gp nona <- is.null(call$na.groups) if (!is.null(call$groups) && nona) { gp <- call$groups } else { if (nona) { na.df <- r[, ynames, drop = FALSE] gp <- unlist(lapply(na.df, rep.int, x$m + 1)) } else { gp <- rep.int(nagp, length(ynames) * (x$m + 1)) } } ## change axis defaults of extended formula interface if (is.null(call$ylab)) { args$ylab <- "" if (length(ynames) == 1) args$ylab <- ynames } if (is.null(call$scales)) { args$scales <- list() if (length(ynames) > 1) { args$scales <- list( x = list(relation = "free"), y = list(relation = "free") ) } } ## ready args <- c( x = formula, data = list(cd), groups = list(gp), args, dots, subset = call$subset ) ## go tp <- do.call("xyplot", args) update(tp, par.settings = theme) } mice/R/install.on.demand.R0000644000176200001440000000072014330031647015023 0ustar liggesusersinstall.on.demand <- function(pkg, quiet = FALSE, ...) { # internal function that checks whether package pkg is # in the library. If not found, it asks the user permission # to install from CRAN. if (requireNamespace(pkg, quietly = TRUE)) { return() } if (interactive()) { answer <- askYesNo(paste("Package", pkg, "needed. Install from CRAN?")) if (answer) install.packages(pkg, repos = "https://cloud.r-project.org/", quiet = quiet) } } mice/R/popmis.R0000644000176200001440000000156514330031606013025 0ustar liggesusers#' Hox pupil popularity data with missing popularity scores #' #' Hox pupil popularity data with some missing popularity scores #' #' The original, complete dataset was generated by Joop Hox as an example of #' well-behaved multilevel data set. The distributed data contains missing data #' in pupil popularity. #' #' @name popmis #' @docType data #' @format A data frame with 2000 rows and 7 columns: #' \describe{ #' \item{pupil}{Pupil number within school} #' \item{school}{School number} #' \item{popular}{Pupil popularity with 848 missing entries} #' \item{sex}{Pupil gender} #' \item{texp}{Teacher experience (years)} #' \item{const}{Constant intercept term} #' \item{teachpop}{Teacher popularity} } #' @source Hox, J. J. (2002) \emph{Multilevel analysis. Techniques and #' applications.} Mahwah, NJ: Lawrence Erlbaum. #' @keywords datasets #' @examples #' #' popmis[1:3, ] NULL mice/R/mice.impute.2lonly.mean.R0000644000176200001440000000610214330031606016062 0ustar liggesusers#' Imputation of most likely value within the class #' #' Method \code{2lonly.mean} replicates the most likely value within #' a class of a second-level variable. It works for numeric and #' factor data. The function is primarily useful as a quick fixup for #' data in which the second-level variable is inconsistent. #' #' @aliases 2lonly.mean #' @inheritParams mice.impute.pmm #' @param type Vector of length \code{ncol(x)} identifying random and class #' variables. The class variable (only one is allowed) is coded as \code{-2}. #' @param ... Other named arguments. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' Observed values in \code{y} are averaged within the class, and #' replicated to the missing \code{y} within that class. #' This function is primarily useful for repairing incomplete data #' that are constant within the class, but vary over classes. #' #' For numeric variables, \code{mice.impute.2lonly.mean()} imputes the #' class mean of \code{y}. If \code{y} is a second-level variable, then #' conventionally all observed \code{y} will be identical within the #' class, and the function just provides a quick fix for any #' missing \code{y} by filling in the class mean. #' #' For factor variables, \code{mice.impute.2lonly.mean()} imputes the #' most frequently occuring category within the class. #' #' If there are no observed \code{y} in the class, all entries of the #' class are set to \code{NA}. Note that this may produce problems #' later on in \code{mice} if imputation routines are called that #' expects predictor data to be complete. Methods designed for #' imputing this type of second-level variables include #' \code{\link{mice.impute.2lonly.norm}} and #' \code{\link{mice.impute.2lonly.pmm}}. #' #' @references #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Boca Raton, FL.: Chapman & Hall/CRC Press. #' @author Gerko Vink, Stef van Buuren, 2019 #' @family univariate-2lonly #' @keywords datagen #' @export mice.impute.2lonly.mean <- function(y, ry, x, type, wy = NULL, ...) { if (all(ry)) { return(numeric(0)) } if (is.null(wy)) { wy <- !ry } yobs <- y[ry] class <- x[, type == -2] if (length(class) == 0) { stop("No class variable") } classobs <- class[ry] classmis <- class[wy] # deal with empty classes (will be NaN) empty.classes <- class[!class %in% classobs] classobs <- c(classobs, empty.classes) yobs <- c(yobs, rep.int(NA, length(empty.classes))) # factor: return class levels corresponding to class median if (is.factor(y)) { ym <- aggregate(yobs, list(classobs), median, na.rm = TRUE) ym$x <- as.integer(ym$x) return(apply(as.matrix(classmis), 1, function(z, y, lev) lev[y[z == y[, 1], 2]], y = ym, lev = levels(y), ... )) } # otherwise: return the class means ym <- aggregate(yobs, list(classobs), mean, na.rm = TRUE) z <- apply(as.matrix(classmis), 1, function(z, y) y[z == y[, 1], 2], y = ym, ... ) z[is.nan(z)] <- NA z } mice/R/formula.R0000644000176200001440000001743714430511431013170 0ustar liggesusers#' Creates a \code{formulas} argument #' #' This helper function creates a valid \code{formulas} object. The #' \code{formulas} object is an argument to the \code{mice} function. #' It is a list of formula's that specifies the target variables and #' the predictors by means of the standard \code{~} operator. #' @param data A \code{data.frame} with the source data #' @param blocks An optional specification for blocks of variables in #' the rows. The default assigns each variable in its own block. #' @param predictorMatrix A \code{predictorMatrix} specified by the user. #' @return A list of formula's. #' @seealso \code{\link{make.blocks}}, \code{\link{make.predictorMatrix}} #' @examples #' f1 <- make.formulas(nhanes) #' f1 #' f2 <- make.formulas(nhanes, blocks = make.blocks(nhanes, "collect")) #' f2 #' #' # for editing, it may be easier to work with the character vector #' c1 <- as.character(f1) #' c1 #' #' # fold it back into a formula list #' f3 <- name.formulas(lapply(c1, as.formula)) #' f3 #' @export make.formulas <- function(data, blocks = make.blocks(data), predictorMatrix = NULL) { data <- check.dataform(data) formulas <- as.list(rep("~ 0", length(blocks))) names(formulas) <- names(blocks) for (h in names(blocks)) { y <- blocks[[h]] if (is.null(predictorMatrix)) { predictors <- colnames(data) } else { type <- predictorMatrix[h, ] predictors <- names(type)[type != 0] } x <- setdiff(predictors, y) if (length(x) == 0) { x <- "0" } formulas[[h]] <- paste( paste(y, collapse = "+"), "~", paste(x, collapse = "+") ) } formulas <- lapply(formulas, as.formula) formulas } #' Name formula list elements #' #' This helper function names any unnamed elements in the \code{formula} #' list. This is a convenience function. #' @inheritParams mice #' @param prefix A character vector of length 1 with the prefix to #' be using for naming any unnamed blocks with two or more variables. #' @return Named list of formulas #' @seealso \code{\link{mice}} #' @details #' This function will name any unnamed list elements specified in #' the optional argument \code{formula}. Unnamed formula's #' consisting with just one response variable will be named #' after this variable. Unnamed formula's containing more #' than one variable will be named by the \code{prefix} #' argument, padded by an integer sequence stating at 1. #' @examples #' # fully conditionally specified main effects model #' form1 <- list( #' bmi ~ age + chl + hyp, #' hyp ~ age + bmi + chl, #' chl ~ age + bmi + hyp #' ) #' form1 <- name.formulas(form1) #' imp1 <- mice(nhanes, formulas = form1, print = FALSE, m = 1, seed = 12199) #' #' # same model using dot notation #' form2 <- list(bmi ~ ., hyp ~ ., chl ~ .) #' form2 <- name.formulas(form2) #' imp2 <- mice(nhanes, formulas = form2, print = FALSE, m = 1, seed = 12199) #' identical(complete(imp1), complete(imp2)) #' #' # same model using repeated multivariate imputation #' form3 <- name.blocks(list(all = bmi + hyp + chl ~ .)) #' imp3 <- mice(nhanes, formulas = form3, print = FALSE, m = 1, seed = 12199) #' cmp3 <- complete(imp3) #' identical(complete(imp1), complete(imp3)) #' #' # same model using predictorMatrix #' imp4 <- mice(nhanes, print = FALSE, m = 1, seed = 12199, auxiliary = TRUE) #' identical(complete(imp1), complete(imp4)) #' #' # different model: multivariate imputation for chl and bmi #' form5 <- list(chl + bmi ~ ., hyp ~ bmi + age) #' form5 <- name.formulas(form5) #' imp5 <- mice(nhanes, formulas = form5, print = FALSE, m = 1, seed = 71712) #' @export name.formulas <- function(formulas, prefix = "F") { if (!is.list(formulas)) { stop("Argument `formulas` not a list", call. = FALSE) } if (!all(sapply(formulas, is.formula) | sapply(formulas, is.list))) { stop("Not all elements in `formulas` are a formula or a list") } if (is.null(names(formulas))) names(formulas) <- rep("", length(formulas)) inc <- 1 for (i in seq_along(formulas)) { if (names(formulas)[i] != "") next # if (hasdot(formulas[[i]]) && is.null(data)) # stop("Formula with dot requires `data` argument", call. = FALSE) y <- lhs(formulas[[i]]) if (length(y) == 1) { names(formulas)[i] <- y } else { names(formulas)[i] <- paste0(prefix, inc) inc <- inc + 1 } } formulas } check.formulas <- function(formulas, data) { formulas <- name.formulas(formulas) formulas <- handle.oldstyle.formulas(formulas, data) formulas <- lapply(formulas, expand.dots, data) # escape if formula is list of two formula's if (any(sapply(formulas, is.list))) { return(formulas) } formulas <- lapply(formulas, as.formula) formulas } #' Extends formula's with predictor matrix settings #' #' @inheritParams mice #' @return A list of formula's #' @param auxiliary A logical that indicates whether the variables #' listed in \code{predictors} should be added to the formula as main #' effects. The default is \code{TRUE}. #' @param include.intercept A logical that indicated whether the intercept #' should be included in the result. #' @keywords internal extend.formulas <- function(formulas, data, blocks, predictorMatrix = NULL, auxiliary = TRUE, include.intercept = FALSE, ...) { # Extend formulas with predictorMatrix if (is.null(predictorMatrix)) { return(formulas) } for (h in names(blocks)) { type <- predictorMatrix[h, ] predictors <- names(type)[type != 0] ff <- extend.formula( formula = formulas[[h]], predictors = predictors, auxiliary = auxiliary, include.intercept = include.intercept ) formulas[[h]] <- ff } formulas } #' Extends a formula with predictors #' #' @param formula A formula. If it is #' not a formula, the formula is internally reset to \code{~0}. #' @param predictors A character vector of variable names. #' @param auxiliary A logical that indicates whether the variables #' listed in \code{predictors} should be added to the formula as main #' effects. The default is \code{TRUE}. #' @param include.intercept A logical that indicated whether the intercept #' should be included in the result. #' @return A formula #' @keywords internal extend.formula <- function(formula = ~0, predictors = NULL, auxiliary = TRUE, include.intercept = FALSE, ...) { if (!is.formula(formula)) formula <- ~0 # handle dot in RHS if (hasdot(formula)) { if (length(predictors) > 1) { fr <- as.formula(c("~", paste(predictors, collapse = "+"))) } else { fr <- ~0 } } else { fr <- reformulate(c(".", predictors)) } if (auxiliary) formula <- update(formula, fr, ...) if (include.intercept) formula <- update(formula, ~ . + 1, ...) formula } handle.oldstyle.formulas <- function(formulas, data) { # converts old-style character vector to formula list oldstyle <- length(formulas) == ncol(data) && is.vector(formulas) && is.character(formulas) if (!oldstyle) { return(formulas) } formulas[formulas != ""] <- "~ 0" fl <- as.list(formulas) names(fl) <- names(formulas) fl } is.empty.model.data <- function(x, data) { tt <- terms(x, data = data) (length(attr(tt, "factors")) == 0L) & (attr(tt, "intercept") == 0L) } lhs <- function(x) all.vars(update(x, . ~ 1)) is.formula <- function(x) { inherits(x, "formula") } hasdot <- function(f) { if (is.recursive(f)) { return(any(sapply(as.list(f), hasdot))) } else { f == as.symbol(".") } } expand.dots <- function(formula, data) { if (!is.formula(formula)) { return(formula) } if (!hasdot(formula)) { return(formula) } y <- lhs(formula) x <- setdiff(colnames(data), y) fs <- paste(paste(y, collapse = "+"), "~", paste(x, collapse = "+")) as.formula(fs) } mice/R/post.R0000644000176200001440000000146414330031606012501 0ustar liggesusers#' Creates a \code{post} argument #' #' This helper function creates a valid \code{post} vector. The #' \code{post} vector is an argument to the \code{mice} function that #' specifies post-processing for a variable after each iteration of imputation. #' @inheritParams mice #' @return Character vector of \code{ncol(data)} element #' @seealso \code{\link{mice}} #' @examples #' make.post(nhanes2) #' @export make.post <- function(data) { post <- vector("character", length = ncol(data)) names(post) <- colnames(data) post } check.post <- function(post, data) { if (is.null(post)) { return(make.post(data)) } # check if (length(post) != ncol(data)) { stop("length(post) does not match ncol(data)", call. = FALSE) } # change if (is.null(names(post))) names(post) <- colnames(data) post } mice/R/D2.R0000644000176200001440000000413414436133147011770 0ustar liggesusers#' Compare two nested models using D2-statistic #' #' The D2-statistic pools test statistics from the repeated analyses. #' The method is less powerful than the D1- and D3-statistics. #' #' @inheritParams D1 #' @inheritParams mitml::testModels #' @note Warning: `D2()` assumes that the order of the variables is the #' same in different models. See #' \url{https://github.com/amices/mice/issues/420} for details. #' @references #' Li, K. H., X. L. Meng, T. E. Raghunathan, and D. B. Rubin. 1991. #' Significance Levels from Repeated p-Values with Multiply-Imputed Data. #' \emph{Statistica Sinica} 1 (1): 65–92. #' #' \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:chi} #' @examples #' # Compare two linear models: #' imp <- mice(nhanes2, seed = 51009, print = FALSE) #' mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) #' mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) #' D2(mi1, mi0) #' \dontrun{ #' # Compare two logistic regression models #' imp <- mice(boys, maxit = 2, print = FALSE) #' fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) #' fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) #' D2(fit1, fit0) #' } #' @seealso \code{\link[mitml]{testModels}} #' @export D2 <- function(fit1, fit0 = NULL, use = "wald") { install.on.demand("mitml") # fit1: a fitlist or mira-object # fit0: named numerical vector, character vector, or list fit1 <- as.mitml.result(fit1) est1 <- pool(fit1) qbar1 <- getqbar(est1) if (is.null(fit0)) { # test all estimates equal to zero beta <- rep(0, length(qbar1)) names(beta) <- names(qbar1) fit0 <- lapply(fit1, fix.coef, beta = beta) fit0 <- as.mitml.result(fit0) } else if (is.mira(fit0)) { fit0 <- as.mitml.result(fit0) } tmr <- mitml::testModels(fit1, fit0, method = "D2", use = use) out <- list( call = match.call(), result = tmr$test, formulas = list( `1` = formula(fit1[[1L]]), `2` = formula(fit0[[1L]]) ), m = tmr$m, method = "D2", use = use, dfcom = NA ) class(out) <- c("mice.anova", class(fit1)) out } mice/R/RcppExports.R0000644000176200001440000000646514436642366014035 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 legendre <- function(x, p) { .Call(`_mice_legendre`, x, p) } matcher <- function(obs, mis, k) { .Call(`_mice_matcher`, obs, mis, k) } #' Find index of matched donor units #' #' @param d Numeric vector with values from donor cases. #' @param t Numeric vector with values from target cases. #' @param k Integer, number of unique donors from which a random draw is made. #' For \code{k = 1} the function returns the index in \code{d} corresponding #' to the closest unit. For multiple imputation, the #' advice is to set values in the range of \code{k = 5} to \code{k = 10}. #' @return An integer vector with \code{length(t)} elements. Each #' element is an index in the array \code{d}. #' @details #' For each element in \code{t}, the method finds the \code{k} nearest #' neighbours in \code{d}, randomly draws one of these neighbours, and #' returns its position in vector \code{d}. #' #' Fast predictive mean matching algorithm in seven steps: #' #' 1. Shuffle records to remove effects of ties #' #' 2. Obtain sorting order on shuffled data #' #' 3. Calculate index on input data and sort it #' #' 4. Pre-sample vector \code{h} with values between 1 and \code{k} #' #' For each of the \code{n0} elements in \code{t}: #' #' 5. find the two adjacent neighbours #' #' 6. find the \code{h_i}'th nearest neighbour #' #' 7. store the index of that neighbour #' #' Return vector of \code{n0} positions in \code{d}. #' #' We may use the function to perform predictive mean matching under a given #' predictive model. To do so, specify both \code{d} and \code{t} as #' predictions from the same model. Suppose that \code{y} contains the observed #' outcomes of the donor cases (in the same sequence as \code{d}), then #' \code{y[matchindex(d, t)]} returns one matched outcome for every #' target case. #' #' See \url{https://github.com/amices/mice/issues/236}. #' This function is a replacement for the \code{matcher()} function that has #' been in default in \code{mice} since version \code{2.22} (June 2014). #' @examples #' set.seed(1) #' #' # Inputs need not be sorted #' d <- c(-5, 5, 0, 10, 12) #' t <- c(-6, -4, 0, 2, 4, -2, 6) #' #' # Index (in vector a) of closest match #' idx <- matchindex(d, t, 1) #' idx #' #' # To check: show values of closest match #' #' # Random draw among indices of the 5 closest predictors #' matchindex(d, t) #' #' # An example #' train <- mtcars[1:20, ] #' test <- mtcars[21:32, ] #' fit <- lm(mpg ~ disp + cyl, data = train) #' d <- fitted.values(fit) #' t <- predict(fit, newdata = test) # note: not using mpg #' idx <- matchindex(d, t) #' #' # Borrow values from train to produce 12 synthetic values for mpg in test. #' # Synthetic values are plausible values that could have been observed if #' # they had been measured. #' train$mpg[idx] #' #' # Exercise: Create a distribution of 1000 plausible values for each of the #' # twelve mpg entries in test, and count how many times the true value #' # (which we know here) is located within the inter-quartile range of each #' # distribution. Is your count anywhere close to 500? Why? Why not? #' @author Stef van Buuren, Nasinski Maciej, Alexander Robitzsch #' @export matchindex <- function(d, t, k = 5L) { .Call(`_mice_matchindex`, d, t, k) } mice/R/futuremice.R0000644000176200001440000001654014422737141013676 0ustar liggesusers#' Wrapper function that runs MICE in parallel #' #' This is a wrapper function for \code{\link{mice}}, using multiple cores to #' execute \code{\link{mice}} in parallel. As a result, the imputation #' procedure can be sped up, which may be useful in general. By default, #' \code{\link{futuremice}} distributes the number of imputations \code{m} #' about equally over the cores. #' #' This function relies on package \code{\link[furrr]{furrr}}, which is a #' package for R versions 3.2.0 and later. We have chosen to use furrr function #' \code{future_map} to allow the use of \code{futuremice} on Mac, Linux and #' Windows systems. #' #' #' This wrapper function combines the output of \code{\link[furrr]{future_map}} with #' function \code{\link{ibind}} from the \code{\link{mice}} package. A #' \code{mids} object is returned and can be used for further analyses. #' #' A seed value can be specified in the global environment, which will yield #' reproducible results. A seed value can also be specified within the #' \code{\link{futuremice}} call, through specifying the argument #' \code{parallelseed}. If \code{parallelseed} is not specified, a seed value is #' drawn randomly by default, and accessible through \code{$parallelseed} in the #' output object. Hence, results will always be reproducible, regardless of #' whether the seed is specified in the global environment, or by setting the #' same seed within the function (potentially by extracting the seed from the #' \code{futuremice} output object. #' #' @aliases futuremice #' @param data A data frame or matrix containing the incomplete data. Similar to #' the first argument of \code{\link{mice}}. #' @param m The number of desired imputated datasets. By default $m=5$ as with #' \code{mice} #' @param parallelseed A scalar to be used to obtain reproducible results over #' the futures. The default \code{parallelseed = NA} will result in a seed value #' that is randomly drawn between -999999999 and 999999999. #' @param n.core A scalar indicating the number of cores that should be used. #' @param seed A scalar to be used as the seed value for the mice algorithm #' within each parallel stream. Please note that the imputations will be the #' same for all streams and, hence, this should be used if and only if #' \code{n.core = 1} and if it is desired to obtain the same output as under #' \code{mice}. #' @param use.logical A logical indicating whether logical (\code{TRUE}) or #' physical (\code{FALSE}) CPU's on machine should be used. #' @param future.plan A character indicating how \code{future}s are resolved. #' The default \code{multisession} resolves futures asynchronously (in parallel) #' in separate \code{R} sessions running in the background. See #' \code{\link[future]{plan}} for more information on future plans. #' @param packages A character vector with additional packages to be used in #' \code{mice} (e.g., for using external imputation functions). #' @param globals A character string with additional functions to be exported to #' each future (e.g., user-written imputation functions). #' @param ... Named arguments that are passed down to function \code{\link{mice}}. #' #' @return A mids object as defined by \code{\link{mids-class}} #' #' @author Thom Benjamin Volker, Gerko Vink #' @seealso \code{\link[future]{future}}, \code{\link[furrr]{furrr}}, \code{\link[furrr]{future_map}}, #' \code{\link[future]{plan}}, \code{\link{mice}}, \code{\link{mids-class}} #' @references #' Volker, T.B. and Vink, G. (2022). futuremice: The future starts today. #' \url{https://www.gerkovink.com/miceVignettes/futuremice/Vignette_futuremice.html} #' #' #'Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/parallel-computation.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' @examples #' # 150 imputations in dataset nhanes, performed by 3 cores #' \dontrun{ #' imp1 <- futuremice(data = nhanes, m = 150, n.core = 3) #' # Making use of arguments in mice. #' imp2 <- futuremice(data = nhanes, m = 100, method = "norm.nob") #' imp2$method #' fit <- with(imp2, lm(bmi ~ hyp)) #' pool(fit) #' } #' #' @export futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA, use.logical = TRUE, future.plan = "multisession", packages = NULL, globals = NULL, ...) { # check if pacakages available install.on.demand("parallelly", ...) install.on.demand("furrr", ...) install.on.demand("future", ...) # check form of data and m data <- check.dataform(data) m <- check.m(m) # check if data complete if (sum(is.na(data)) == 0) { stop("Data has no missing values") } # number of available cores available <- parallelly::availableCores(logical = use.logical) # set the number of cores n.core <- check.cores(n.core, available, m) if (n.core > 1) { dist.core <- cut(1:m, n.core, labels = paste0("core", 1:n.core)) } else { dist.core <- rep("core1", m) } n.imp.core <- as.vector(table(dist.core)) if (!is.na(seed)) { if (n.core > 1) { if (interactive()) { msg <- "Be careful; specifying seed rather than parallelseed results in duplicate imputations.\nDo you want to continue?\n" ask <- askYesNo(msg, prompts = getOption("askYesNo", gettext(c("Yes", "No, ignore seed", "Cancel")))) if (isTRUE(ask)) { seed <- seed warning("Be careful; the imputations will be the same over the cores.") } else if (isFALSE(ask)) { seed <- NA message("Parallelseed is specified for you, and is accessible in the output object under $parallelseed.") } else if (is.na(ask)) { stop("You stopped futuremice. To obtain unique, but reproducible imputations, specify parallelseed.") } } else { warning("Be careful; the imputations will be identical over the cores. Perhaps you want to specify parallelseed, for unique, but reproducible results.") } } } if (!is.na(parallelseed)) { set.seed(parallelseed) } else { if(!exists(".Random.seed")) { set.seed(NULL) } parallelseed <- get( ".Random.seed", envir = globalenv(), mode = "integer", inherits = FALSE ) } # start multisession future::plan(future.plan, workers = n.core ) # begin future imps <- furrr::future_map( n.imp.core, function(x) { mice(data = data, m = x, printFlag = FALSE, seed = seed, ... )}, .options = furrr::furrr_options( seed = TRUE, globals = globals, packages = packages ) ) # end multisession future::plan(future::sequential) # stitch future into mids # postprocess clustered imputation into a mids object imp <- imps[[1]] if (length(imps) > 1) { for (i in 2:length(imps)) { imp <- ibind(imp, imps[[i]]) } } # let imputation matrix correspond to grand m for (i in 1:length(imp$imp)) { colnames(imp$imp[[i]]) <- 1:imp$m } imp$parallelseed <- parallelseed return(imp) } check.cores <- function(n.core, available, m) { if (is.null(n.core)) { n.core <- min(available - 1, m) } else { if (n.core > available | n.core > m) { warning(paste("'n.core' exceeds the maximum number of available cores on your machine or the number of imputations, and is set to", min(available - 1, m))) } n.core <- min(available - 1, m, n.core) } n.core }mice/R/anova.R0000644000176200001440000000336414330031606012621 0ustar liggesusers#' Compare several nested models #' #' @rdname anova #' @param object Two or more objects of class \code{mira} #' @param method Either \code{"D1"}, \code{"D2"} or \code{"D3"} #' @param use An character indicating the test statistic #' @param ... Other parameters passed down to \code{D1()}, \code{D2()}, #' \code{D3()} and \code{mitml::testModels}. #' @return Object of class \code{mice.anova} #' @export anova.mira <- function(object, ..., method = "D1", use = "wald") { modlist <- list(object, ...) first <- lapply(modlist, getfit, 1L) %>% sapply(glance) if (is.null(names(modlist))) { names(modlist) <- names(first) <- 1L:length(modlist) } else { names(first) <- names(modlist) } # order by model complexity dfcom <- rep(NA, ncol(first)) for (j in 1:ncol(first)) dfcom[j] <- get.dfcom(modlist[[j]]) idx <- order(dfcom, decreasing = FALSE) modlist <- modlist[idx] dfcom <- dfcom[idx] names(dfcom) <- names(modlist) # get model formulas formulas <- lapply(modlist, getfit, 1L) %>% lapply(formula) names(formulas) <- names(modlist) # test successive models nm <- length(modlist) out <- vector("list", nm - 1L) names(out) <- paste(names(modlist), lead(names(modlist)), sep = " ~~ ")[-nm] for (j in seq_along(out)) { if (method == "D2") { args <- alist(fit1 = modlist[[j]], fit0 = modlist[[j + 1L]], use = use) } else { args <- alist( fit1 = modlist[[j]], fit0 = modlist[[j + 1L]], dfcom = as.numeric(unlist(dfcom[j])) ) } out[[j]] <- do.call(method, args = args) } obj <- list( call = match.call(), out = out, formulas = formulas, m = length(getfit(modlist[[1L]])), method = method, use = use ) class(obj) <- c("mice.anova", class(first)) obj } mice/R/fix.coef.R0000644000176200001440000000642714433402442013225 0ustar liggesusers#' Fix coefficients and update model #' #' Refits a model with a specified set of coefficients. #' #' @param model An R model, e.g., produced by \code{lm} or \code{glm} #' @param beta A numeric vector with \code{length(coef)} model coefficients. #' If the vector is not named, the coefficients should be #' given in the same order as in \code{coef(model)}. If the vector is named, #' the procedure attempts to match on names. #' @return An updated R model object #' @author Stef van Buuren, 2018 #' @details #' The function calculates the linear predictor using the new coefficients, #' and reformulates the model using the \code{offset} #' argument. The linear predictor is called #' \code{offset}, and its coefficient will be \code{1} by definition. #' The new model only fits the intercept, which should be \code{0} #' if we set \code{beta = coef(model)}. #' @examples #' model0 <- lm(Volume ~ Girth + Height, data = trees) #' formula(model0) #' coef(model0) #' deviance(model0) #' #' # refit same model #' model1 <- fix.coef(model0) #' formula(model1) #' coef(model1) #' deviance(model1) #' #' # change the beta's #' model2 <- fix.coef(model0, beta = c(-50, 5, 1)) #' coef(model2) #' deviance(model2) #' #' # compare predictions #' plot(predict(model0), predict(model1)) #' abline(0, 1) #' plot(predict(model0), predict(model2)) #' abline(0, 1) #' #' # compare proportion explained variance #' cor(predict(model0), predict(model0) + residuals(model0))^2 #' cor(predict(model1), predict(model1) + residuals(model1))^2 #' cor(predict(model2), predict(model2) + residuals(model2))^2 #' #' # extract offset from constrained model #' summary(model2$offset) #' #' # it also works with factors and missing data #' model0 <- lm(bmi ~ age + hyp + chl, data = nhanes2) #' model1 <- fix.coef(model0) #' model2 <- fix.coef(model0, beta = c(15, -8, -8, 2, 0.2)) #' @export fix.coef <- function(model, beta = NULL) { oldcoef <- clean.coef(model) if (is.null(beta)) beta <- oldcoef if (length(oldcoef) != length(beta)) { stop("incorrect length of 'beta'", call. = FALSE) } # handle naming if (is.null(names(oldcoef))) { names(oldcoef) <- make.names(seq_along(oldcoef)) } if (is.null(names(beta))) { names(beta) <- names(oldcoef) } else { diff <- setdiff(names(oldcoef), names(beta)) if (length(diff) > 0) { stop("names not found in 'beta': ", diff, call. = FALSE) } diff <- setdiff(names(beta), names(oldcoef)) if (length(diff) > 0) { stop("names not found in 'coef(model)': ", diff, call. = FALSE) } } beta <- beta[names(oldcoef)] # re-calculate model for new beta's data <- model.frame(formula = formula(model), data = model.frame(model)) mm <- model.matrix(formula(model, fixed.only = TRUE), data = data) # Problem: offset cannot be calculated for the Cox model because that does # not include the intercept if (inherits(model, "coxph")) { stop("D3 does not support the Cox model.", call. = FALSE) } offset <- as.vector(mm %*% beta) uf <- . ~ 1 if (inherits(model, "merMod")) uf <- formula(model, random.only = TRUE) upd <- update(model, formula. = uf, data = cbind(data, offset = offset), offset = offset ) upd } clean.coef <- function(model) { est <- tidy(model, effects = "fixed") coef <- est$estimate names(coef) <- est$term coef } mice/R/nelsonaalen.R0000644000176200001440000000425614436637751014041 0ustar liggesusers#' Cumulative hazard rate or Nelson-Aalen estimator #' #' Calculates the cumulative hazard rate (Nelson-Aalen estimator) #' #' This function is useful for imputing variables that depend on survival time. #' White and Royston (2009) suggested using the cumulative hazard to the #' survival time H0(T) rather than T or log(T) as a predictor in imputation #' models. See section 7.1 of Van Buuren (2012) for an example. #' #' @aliases nelsonaalen hazard #' @param data A data frame containing the data. #' @param timevar The name of the time variable in \code{data}. #' @param statusvar The name of the event variable, e.g. death in \code{data}. #' @return A vector with \code{nrow(data)} elements containing the Nelson-Aalen #' estimates of the cumulative hazard function. #' @author Stef van Buuren, 2012 #' @references White, I. R., Royston, P. (2009). Imputing missing covariate #' values for the Cox model. \emph{Statistics in Medicine}, \emph{28}(15), #' 1982-1998. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-toomany.html#a-further-improvement-survival-as-predictor-variable}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords misc #' @examples #' require(MASS) #' #' leuk$status <- 1 ## no censoring occurs in leuk data (MASS) #' ch <- nelsonaalen(leuk, time, status) #' plot(x = leuk$time, y = ch, ylab = "Cumulative hazard", xlab = "Time") #' #' ### See example on http://www.engineeredsoftware.com/lmar/pe_cum_hazard_function.htm #' time <- c(43, 67, 92, 94, 149, rep(149, 7)) #' status <- c(rep(1, 5), rep(0, 7)) #' eng <- data.frame(time, status) #' ch <- nelsonaalen(eng, time, status) #' plot(x = time, y = ch, ylab = "Cumulative hazard", xlab = "Time") #' @export nelsonaalen <- function(data, timevar, statusvar) { if (!is.data.frame(data)) { stop("Data must be a data frame") } timevar <- as.character(substitute(timevar)) statusvar <- as.character(substitute(statusvar)) time <- data[, timevar, drop = TRUE] status <- data[, statusvar, drop = TRUE] hazard <- survival::basehaz(survival::coxph(survival::Surv(time, status) ~ 1)) idx <- match(time, hazard[, "time"]) hazard[idx, "hazard"] } mice/R/flux.R0000644000176200001440000002140214330031606012464 0ustar liggesusers#' Influx and outflux of multivariate missing data patterns #' #' Influx and outflux are statistics of the missing data pattern. These #' statistics are useful in selecting predictors that should go into the #' imputation model. #' #' Infux and outflux have been proposed by Van Buuren (2018), chapter 4. #' #' Influx is equal to the number of variable pairs \code{(Yj , Yk)} with #' \code{Yj} missing and \code{Yk} observed, divided by the total number of #' observed data cells. Influx depends on the proportion of missing data of the #' variable. Influx of a completely observed variable is equal to 0, whereas for #' completely missing variables we have influx = 1. For two variables with the #' same proportion of missing data, the variable with higher influx is better #' connected to the observed data, and might thus be easier to impute. #' #' Outflux is equal to the number of variable pairs with \code{Yj} observed and #' \code{Yk} missing, divided by the total number of incomplete data cells. #' Outflux is an indicator of the potential usefulness of \code{Yj} for imputing #' other variables. Outflux depends on the proportion of missing data of the #' variable. Outflux of a completely observed variable is equal to 1, whereas #' outflux of a completely missing variable is equal to 0. For two variables #' having the same proportion of missing data, the variable with higher outflux #' is better connected to the missing data, and thus potentially more useful for #' imputing other variables. #' #' FICO is an outbound statistic defined by the fraction of incomplete cases #' among cases with \code{Yj} observed (White and Carlin, 2010). #' #' @aliases flux #' @param data A data frame or a matrix containing the incomplete data. Missing #' values are coded as NA's. #' @param local A vector of names of columns of \code{data}. The default is to #' include all columns in the calculations. #' @return A data frame with \code{ncol(data)} rows and six columns: #' pobs = Proportion observed, #' influx = Influx #' outflux = Outflux #' ainb = Average inbound statistic #' aout = Average outbound statistic #' fico = Fraction of incomplete cases among cases with \code{Yj} observed #' @seealso \code{\link{fluxplot}}, \code{\link{md.pattern}}, \code{\link{fico}} #' @author Stef van Buuren, 2012 #' @references #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation #' compared with complete-case analysis for missing covariate values. #' \emph{Statistics in Medicine}, \emph{29}, 2920-2931. #' @keywords misc #' @export flux <- function(data, local = names(data)) { .avg <- function(row) sum(row, na.rm = TRUE) / (length(row) - 1) ## calculates influx and outflux statistics ## of the missing data pattern x <- colMeans(!is.na(data)) pat <- md.pairs(data) pat$rr <- pat$rr[local, , drop = FALSE] pat$rm <- pat$rm[local, , drop = FALSE] pat$mr <- pat$mr[local, , drop = FALSE] pat$mm <- pat$mm[local, , drop = FALSE] ainb <- apply(pat$mr / (pat$mr + pat$mm), 1, .avg) aout <- apply(pat$rm / (pat$rm + pat$rr), 1, .avg) fico <- fico(data) outflux <- rowSums(pat$rm) / (rowSums(pat$rm + pat$mm)) influx <- rowSums(pat$mr) / (rowSums(pat$mr + pat$rr)) data.frame(pobs = x, influx = influx, outflux = outflux, ainb = ainb, aout = aout, fico = fico) } #' Fluxplot of the missing data pattern #' #' Influx and outflux are statistics of the missing data pattern. These #' statistics are useful in selecting predictors that should go into the #' imputation model. #' #' Infux and outflux have been proposed by Van Buuren (2012), chapter 4. #' #' Influx is equal to the number of variable pairs \code{(Yj , Yk)} with #' \code{Yj} missing and \code{Yk} observed, divided by the total number of #' observed data cells. Influx depends on the proportion of missing data of the #' variable. Influx of a completely observed variable is equal to 0, whereas for #' completely missing variables we have influx = 1. For two variables with the #' same proportion of missing data, the variable with higher influx is better #' connected to the observed data, and might thus be easier to impute. #' #' Outflux is equal to the number of variable pairs with \code{Yj} observed and #' \code{Yk} missing, divided by the total number of incomplete data cells. #' Outflux is an indicator of the potential usefulness of \code{Yj} for imputing #' other variables. Outflux depends on the proportion of missing data of the #' variable. Outflux of a completely observed variable is equal to 1, whereas #' outflux of a completely missing variable is equal to 0. For two variables #' having the same proportion of missing data, the variable with higher outflux #' is better connected to the missing data, and thus potentially more useful for #' imputing other variables. #' #' @aliases fluxplot #' @param data A data frame or a matrix containing the incomplete data. Missing #' values are coded as NA's. #' @param local A vector of names of columns of \code{data}. The default is to #' include all columns in the calculations. #' @param plot Should a graph be produced? #' @param labels Should the points be labeled? #' @param xlim See \code{par}. #' @param ylim See \code{par}. #' @param las See \code{par}. #' @param xlab See \code{par}. #' @param ylab See \code{par}. #' @param main See \code{par}. #' @param eqscplot Should a square plot be produced? #' @param pty See \code{par}. #' @param lwd See \code{par}. Controls axis line thickness and diagonal #' @param \dots Further arguments passed to \code{plot()} or \code{eqscplot()}. #' @return An invisible data frame with \code{ncol(data)} rows and six columns: #' pobs = Proportion observed, #' influx = Influx #' outflux = Outflux #' ainb = Average inbound statistic #' aout = Average outbound statistic #' fico = Fraction of incomplete cases among cases with \code{Yj} observed #' @seealso \code{\link{flux}}, \code{\link{md.pattern}}, \code{\link{fico}} #' @author Stef van Buuren, 2012 #' @references #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation #' compared with complete-case analysis for missing covariate values. #' \emph{Statistics in Medicine}, \emph{29}, 2920-2931. #' @keywords misc #' @export fluxplot <- function(data, local = names(data), plot = TRUE, labels = TRUE, xlim = c(0, 1), ylim = c(0, 1), las = 1, xlab = "Influx", ylab = "Outflux", main = paste("Influx-outflux pattern for", deparse(substitute(data))), eqscplot = TRUE, pty = "s", lwd = 1, ...) { f <- flux(data, local) if (plot) { if (eqscplot) { MASS::eqscplot( x = f$influx, y = f$outflux, type = "n", main = main, xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, pty = pty, lwd = lwd, axes = FALSE, ... ) } else { plot( x = f$influx, y = f$outflux, type = "n", main = main, xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, pty = pty, lwd = lwd, axes = FALSE, ... ) } axis(1, lwd = lwd, las = las) axis(2, lwd = lwd, las = las) abline(1, -1, lty = 2, lwd = lwd) if (labels) { text(x = f$influx, y = f$outflux, label = names(data), ...) } else { points(x = f$influx, y = f$outflux, ...) } box(lwd = lwd) } invisible(data.frame(f)) } #' Fraction of incomplete cases among cases with observed #' #' FICO is an outbound statistic defined by the fraction of incomplete cases #' among cases with \code{Yj} observed (White and Carlin, 2010). #' #' @aliases fico #' @param data A data frame or a matrix containing the incomplete data. Missing #' values are coded as NA's. #' @return A vector of length \code{ncol(data)} of FICO statistics. #' @seealso \code{\link{fluxplot}}, \code{\link{flux}}, \code{\link{md.pattern}} #' @author Stef van Buuren, 2012 #' @references #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html#sec:flux}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' White, I.R., Carlin, J.B. (2010). Bias and efficiency of multiple imputation #' compared with complete-case analysis for missing covariate values. #' \emph{Statistics in Medicine}, \emph{29}, 2920-2931. #' @keywords misc #' @export fico <- function(data) { ic <- ici(data) unlist(lapply(data, FUN = function(x) sum((!is.na(x)) & ic) / sum(!is.na(x)))) } mice/R/fdd.R0000644000176200001440000001022214330031606012241 0ustar liggesusers#' SE Fireworks disaster data #' #' Multiple outcomes of a randomized study to reduce post-traumatic stress. #' #' Data from a randomized experiment to reduce post-traumatic stress by two #' treatments: Eye Movement Desensitization and Reprocessing (EMDR) #' (experimental treatment), and cognitive behavioral therapy (CBT) (control #' treatment). 52 children were randomized to one of these two treatments. #' Outcomes were measured at three time points: at baseline (pre-treatment, T1), #' post-treatment (T2, 4-8 weeks), and at follow-up (T3, 3 months). For more #' details, see de Roos et al (2011). Some person covariates were reshuffled. #' The imputation methodology is explained in Chapter 9 of van Buuren (2012). #' #' @name fdd #' @aliases fdd fdd.pred #' @docType data #' @format \code{fdd} is a data frame with 52 rows and 65 columns: #' \describe{ #' \item{id}{Client number} #' \item{trt}{Treatment (E=EMDR, C=CBT)} #' \item{pp}{Per protocol (Y/N)} #' \item{trtp}{Number of parental treatments} #' \item{sex}{Sex: M/F} #' \item{etn}{Ethnicity: NL/OTHER} #' \item{age}{Age (years)} #' \item{trauma}{Trauma count (1-5)} #' \item{prop1}{PROPS total score T1} #' \item{prop2}{PROPS total score T2} #' \item{prop3}{PROPS total score T3} #' \item{crop1}{CROPS total score T1} #' \item{crop2}{CROPS total score T2} #' \item{crop3}{CROPS total score T3} #' \item{masc1}{MASC score T1} #' \item{masc2}{MASC score T2} #' \item{masc3}{MASC score T3} #' \item{cbcl1}{CBCL T1} #' \item{cbcl3}{CBCL T3} #' \item{prs1}{PRS total score T1} #' \item{prs2}{PRS total score T2} #' \item{prs3}{PRS total score T3} #' \item{ypa1}{PTSD-RI B intrusive recollection parent T1} #' \item{ypb1}{PTSD-RI C avoidant/numbing parent T1} #' \item{ypc1}{PTSD-RI D hyper-arousal parent T1} #' \item{yp1}{PTSD-RI B+C+D parent T1} #' \item{ypa2}{PTSD-RI B intrusive recollection parent T2} #' \item{ypb2}{PTSD-RI C avoidant/numbing parent T2} #' \item{ypc2}{PTSD-RI D hyper-arousal parent T2} #' \item{yp2}{PTSD-RI B+C+D parent T1} #' \item{ypa3}{PTSD-RI B intrusive recollection parent T3} #' \item{ypb3}{PTSD-RI C avoidant/numbing parent T3} #' \item{ypc3}{PTSD-RI D hyper-arousal parent T3} #' \item{yp3}{PTSD-RI B+C+D parent T3} #' \item{yca1}{PTSD-RI B intrusive recollection child T1} #' \item{ycb1}{PTSD-RI C avoidant/numbing child T1} #' \item{ycc1}{PTSD-RI D hyper-arousal child T1} #' \item{yc1}{PTSD-RI B+C+D child T1} #' \item{yca2}{PTSD-RI B intrusive recollection child T2} #' \item{ycb2}{PTSD-RI C avoidant/numbing child T2} #' \item{ycc2}{PTSD-RI D hyper-arousal child T2} #' \item{yc2}{PTSD-RI B+C+D child T2} #' \item{yca3}{PTSD-RI B intrusive recollection child T3} #' \item{ycb3}{PTSD-RI C avoidant/numbing child T3} #' \item{ycc3}{PTSD-RI D hyper-arousal child T3} #' \item{yc3}{PTSD-RI B+C+D child T3} #' \item{ypf1}{PTSD-RI parent full T1} #' \item{ypf2}{PTSD-RI parent full T2} #' \item{ypf3}{PTSD-RI parent full T3} #' \item{ypp1}{PTSD parent partial T1} #' \item{ypp2}{PTSD parent partial T2} #' \item{ypp3}{PTSD parent partial T3} #' \item{ycf1}{PTSD child full T1} #' \item{ycf2}{PTSD child full T2} #' \item{ycf3}{PTSD child full T3} #' \item{ycp1}{PTSD child partial T1} #' \item{ycp2}{PTSD child partial T2} #' \item{ycp3}{PTSD child partial T3} #' \item{cbin1}{CBCL Internalizing T1} #' \item{cbin3}{CBCL Internalizing T3} #' \item{cbex1}{CBCL Externalizing T1} #' \item{cbex3}{CBCL Externalizing T3} #' \item{bir1}{Birlison T1} #' \item{bir2}{Birlison T2} #' \item{bir3}{Birlison T3} #' } #' \code{fdd.pred} is the 65 by 65 binary #' predictor matrix used to impute \code{fdd}. #' @source de Roos, C., Greenwald, R., den Hollander-Gijsman, M., Noorthoorn, #' E., van Buuren, S., de Jong, A. (2011). A Randomised Comparison of Cognitive #' Behavioral Therapy (CBT) and Eye Movement Desensitisation and Reprocessing #' (EMDR) in disaster-exposed children. \emph{European Journal of #' Psychotraumatology}, \emph{2}, 5694. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-fdd.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' Boca Raton, FL.: Chapman & Hall/CRC Press. #' @keywords datasets #' @examples #' #' #' data <- fdd #' md.pattern(fdd) NULL mice/R/method.R0000644000176200001440000001011114330031606012761 0ustar liggesusers#' Creates a \code{method} argument #' #' This helper function creates a valid \code{method} vector. The #' \code{method} vector is an argument to the \code{mice} function that #' specifies the method for each block. #' @inheritParams mice #' @return Vector of \code{length(blocks)} element with method names #' @seealso \code{\link{mice}} #' @examples #' make.method(nhanes2) #' @export make.method <- function(data, where = make.where(data), blocks = make.blocks(data), defaultMethod = c("pmm", "logreg", "polyreg", "polr")) { method <- rep("", length(blocks)) names(method) <- names(blocks) for (j in names(blocks)) { yvar <- blocks[[j]] y <- data[, yvar] def <- sapply(y, assign.method) k <- ifelse(all(diff(def) == 0), k <- def[1], 1) method[j] <- defaultMethod[k] } nimp <- nimp(where, blocks) method[nimp == 0] <- "" method } check.method <- function(method, data, where, blocks, defaultMethod) { if (is.null(method)) { return(make.method( data = data, where = where, blocks = blocks, defaultMethod = defaultMethod )) } nimp <- nimp(where, blocks) # expand user's imputation method to all visited columns # single string supplied by user (implicit assumption of two columns) if (length(method) == 1) { if (is.passive(method)) { stop("Cannot have a passive imputation method for every column.") } method <- rep(method, length(blocks)) method[nimp == 0] <- "" } # check the length of the argument if (length(method) != length(blocks)) { stop("Length of method differs from number of blocks", call. = FALSE) } # add names to method names(method) <- names(blocks) # check whether the requested imputation methods are on the search path active.check <- !is.passive(method) & nimp > 0 & method != "" passive.check <- is.passive(method) & nimp > 0 & method != "" check <- all(active.check) & any(passive.check) if (check) { fullNames <- rep.int("mice.impute.passive", length(method[passive.check])) } else { fullNames <- paste("mice.impute", method[active.check], sep = ".") if (length(method[active.check]) == 0) fullNames <- character(0) } # type checks on built-in imputation methods for (j in names(blocks)) { vname <- blocks[[j]] y <- data[, vname, drop = FALSE] mj <- method[j] mlist <- list( m1 = c("logreg", "logreg.boot", "polyreg", "lda", "polr"), m2 = c( "norm", "norm.nob", "norm.predict", "norm.boot", "mean", "2l.norm", "2l.pan", "2lonly.norm", "2lonly.pan", "quadratic", "ri" ), m3 = c( "norm", "norm.nob", "norm.predict", "norm.boot", "mean", "2l.norm", "2l.pan", "2lonly.norm", "2lonly.pan", "quadratic", "logreg", "logreg.boot" ) ) cond1 <- sapply(y, is.numeric) cond2 <- sapply(y, is.factor) & sapply(y, nlevels) == 2 cond3 <- sapply(y, is.factor) & sapply(y, nlevels) > 2 if (any(cond1) && mj %in% mlist$m1) { warning("Type mismatch for variable(s): ", paste(vname[cond1], collapse = ", "), "\nImputation method ", mj, " is for categorical data.", call. = FALSE ) } if (any(cond2) && mj %in% mlist$m2) { warning("Type mismatch for variable(s): ", paste(vname[cond2], collapse = ", "), "\nImputation method ", mj, " is not for factors.", call. = FALSE ) } if (any(cond3) && mj %in% mlist$m3) { warning("Type mismatch for variable(s): ", paste(vname[cond3], collapse = ", "), "\nImputation method ", mj, " is not for factors with >2 levels.", call. = FALSE ) } } method[nimp == 0] <- "" unlist(method) } # assign methods based on type, # use method 1 if there is no single method within the block assign.method <- function(y) { if (is.numeric(y)) { return(1) } if (nlevels(y) == 2) { return(2) } if (is.ordered(y) && nlevels(y) > 2) { return(4) } if (nlevels(y) > 2) { return(3) } if (is.logical(y)) { return(2) } 1 } mice/R/toenail.R0000644000176200001440000000411114330031606013137 0ustar liggesusers#' Toenail data #' #' The toenail data come from a Multicenter study comparing two oral #' treatments for toenail infection. Patients were evaluated for the #' degree of separation of the nail. Patients were randomized into two #' treatments and were followed over seven visits - four in the first #' year and yearly thereafter. The patients have not been treated #' prior to the first visit so this should be regarded as the #' baseline. #' @name toenail #' @docType data #' @format A data frame with 1908 observations on the following 5 variables: #' \describe{ #' \item{\code{ID}}{a numeric vector giving the ID of patient} #' \item{\code{outcome}}{a numeric vector giving the response #' (0=none or mild seperation, 1=moderate or severe)} #' \item{\code{treatment}}{a numeric vector giving the treatment group} #' \item{\code{month}}{a numeric vector giving the time of the visit #' (not exactly monthly intervals hence not round numbers)} #' \item{\code{visit}}{a numeric vector giving the number of the visit} #' } #' @source #' De Backer, M., De Vroey, C., Lesaffre, E., Scheys, I., and De #' Keyser, P. (1998). Twelve weeks of continuous oral therapy for #' toenail onychomycosis caused by dermatophytes: A double-blind #' comparative trial of terbinafine 250 mg/day versus itraconazole 200 #' mg/day. Journal of the American Academy of Dermatology, 38, 57-63. #' @references #' Lesaffre, E. and Spiessens, B. (2001). On the effect of the number of #' quadrature points in a logistic random-effects model: An example. #' Journal of the Royal Statistical Society, Series C, 50, 325-335. #' #' G. Fitzmaurice, N. Laird and J. Ware (2004) Applied Longitudinal Analysis, #' Wiley and Sons, New York, USA. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-catoutcome.html#example}{\emph{Flexible #' Imputation of Missing Data. Second Edition.}} Chapman & Hall/CRC. #' Boca Raton, FL. #' @keywords datasets #' @seealso \code{\link{toenail2}} #' @details This dataset was copied from the \code{DPpackage}, which is #' scheduled to be discontinued from CRAN in August 2019. NULL mice/R/mice.impute.pmm.R0000644000176200001440000002215414335404116014527 0ustar liggesusers#' Imputation by predictive mean matching #' #' @aliases mice.impute.pmm pmm #' @param y Vector to be imputed #' @param ry Logical vector of length \code{length(y)} indicating the #' the subset \code{y[ry]} of elements in \code{y} to which the imputation #' model is fitted. The \code{ry} generally distinguishes the observed #' (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}. #' @param x Numeric design matrix with \code{length(y)} rows with predictors for #' \code{y}. Matrix \code{x} may have no missing values. #' @param exclude Value or vector of values to exclude from the imputation donor pool in \code{y} #' @param wy Logical vector of length \code{length(y)}. A \code{TRUE} value #' indicates locations in \code{y} for which imputations are created. #' @param donors The size of the donor pool among which a draw is made. #' The default is \code{donors = 5L}. Setting \code{donors = 1L} always selects #' the closest match, but is not recommended. Values between 3L and 10L #' provide the best results in most cases (Morris et al, 2015). #' @param matchtype Type of matching distance. The default choice #' (\code{matchtype = 1L}) calculates the distance between #' the \emph{predicted} value of \code{yobs} and #' the \emph{drawn} values of \code{ymis} (called type-1 matching). #' Other choices are \code{matchtype = 0L} #' (distance between predicted values) and \code{matchtype = 2L} #' (distance between drawn values). #' @param ridge The ridge penalty used in \code{.norm.draw()} to prevent #' problems with multicollinearity. The default is \code{ridge = 1e-05}, #' which means that 0.01 percent of the diagonal is added to the cross-product. #' Larger ridges may result in more biased estimates. For highly noisy data #' (e.g. many junk variables), set \code{ridge = 1e-06} or even lower to #' reduce bias. For highly collinear data, set \code{ridge = 1e-04} or higher. #' @param use.matcher Logical. Set \code{use.matcher = TRUE} to specify #' the C function \code{matcher()}, the now deprecated matching function that #' was default in versions #' \code{2.22} (June 2014) to \code{3.11.7} (Oct 2020). Since version \code{3.12.0} #' \code{mice()} uses the much faster \code{matchindex} C function. Use #' the deprecated \code{matcher} function only for exact reproduction. #' @param \dots Other named arguments. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Gerko Vink, Stef van Buuren, Karin Groothuis-Oudshoorn #' @details #' Imputation of \code{y} by predictive mean matching, based on #' van Buuren (2012, p. 73). The procedure is as follows: #' #' \enumerate{ #' \item{Calculate the cross-product matrix \eqn{S=X_{obs}'X_{obs}}.} #' \item{Calculate \eqn{V = (S+{diag}(S)\kappa)^{-1}}, with some small ridge #' parameter \eqn{\kappa}.} #' \item{Calculate regression weights \eqn{\hat\beta = VX_{obs}'y_{obs}.}} #' \item{Draw \eqn{q} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_1}.} #' \item{Calculate \eqn{V^{1/2}} by Cholesky decomposition.} #' \item{Calculate \eqn{\dot\beta = \hat\beta + \dot\sigma\dot z_1 V^{1/2}}.} #' \item{Calculate \eqn{\dot\eta(i,j)=|X_{{obs},[i]|}\hat\beta-X_{{mis},[j]}\dot\beta} #' with \eqn{i=1,\dots,n_1} and \eqn{j=1,\dots,n_0}.} #' \item{Construct \eqn{n_0} sets \eqn{Z_j}, each containing \eqn{d} candidate donors, from Y_{obs} such that \eqn{\sum_d\dot\eta(i,j)} is minimum for all \eqn{j=1,\dots,n_0}. Break ties randomly.} #' \item{Draw one donor \eqn{i_j} from \eqn{Z_j} randomly for \eqn{j=1,\dots,n_0}.} #' \item{Calculate imputations \eqn{\dot y_j = y_{i_j}} for \eqn{j=1,\dots,n_0}.} #' } #' #' The name \emph{predictive mean matching} was proposed by Little (1988). #' #' @references Little, R.J.A. (1988), Missing data adjustments in large surveys #' (with discussion), Journal of Business Economics and Statistics, 6, 287--301. #' #' Morris TP, White IR, Royston P (2015). Tuning multiple imputation by predictive #' mean matching and local residual draws. BMC Med Res Methodol. ;14:75. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-pmm.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @family univariate imputation functions #' @keywords datagen #' @examples #' # We normally call mice.impute.pmm() from within mice() #' # But we may call it directly as follows (not recommended) #' #' set.seed(53177) #' xname <- c("age", "hgt", "wgt") #' r <- stats::complete.cases(boys[, xname]) #' x <- boys[r, xname] #' y <- boys[r, "tv"] #' ry <- !is.na(y) #' table(ry) #' #' # percentage of missing data in tv #' sum(!ry) / length(ry) #' #' # Impute missing tv data #' yimp <- mice.impute.pmm(y, ry, x) #' length(yimp) #' hist(yimp, xlab = "Imputed missing tv") #' #' # Impute all tv data #' yimp <- mice.impute.pmm(y, ry, x, wy = rep(TRUE, length(y))) #' length(yimp) #' hist(yimp, xlab = "Imputed missing and observed tv") #' plot(jitter(y), jitter(yimp), #' main = "Predictive mean matching on age, height and weight", #' xlab = "Observed tv (n = 224)", #' ylab = "Imputed tv (n = 224)" #' ) #' abline(0, 1) #' cor(y, yimp, use = "pair") #' #' # Use blots to exclude different values per column #' # Create blots object #' blots <- make.blots(boys) #' # Exclude ml 1 through 5 from tv donor pool #' blots$tv$exclude <- c(1:5) #' # Exclude 100 random observed heights from tv donor pool #' blots$hgt$exclude <- sample(unique(boys$hgt), 100) #' imp <- mice(boys, method = "pmm", print = FALSE, blots = blots, seed=123) #' blots$hgt$exclude %in% unlist(c(imp$imp$hgt)) # MUST be all FALSE #' blots$tv$exclude %in% unlist(c(imp$imp$tv)) # MUST be all FALSE #' @export mice.impute.pmm <- function(y, ry, x, wy = NULL, donors = 5L, matchtype = 1L, exclude = -99999999, ridge = 1e-05, use.matcher = FALSE, ...) { id.ex <- !ry | !y %in% exclude # id vector for exclusion y <- y[id.ex] # leave out the exclude vector y's # allow for one-dimensional x-space if(!is.null(dim(x))){ x <- x[id.ex, ] } else { x <- x[id.ex] } # leave out the exclude vector x's ry <- ry[id.ex] # leave out the exclude vector indicator { if (is.null(wy)) { wy <- !ry } else { wy <- wy[id.ex] # if applicable adjust wy to match exclude } } x <- cbind(1, as.matrix(x)) ynum <- y if (is.factor(y)) { ynum <- as.integer(y) } parm <- .norm.draw(ynum, ry, x, ridge = ridge, ...) if (matchtype == 0L) { yhatobs <- x[ry, , drop = FALSE] %*% parm$coef yhatmis <- x[wy, , drop = FALSE] %*% parm$coef } if (matchtype == 1L) { yhatobs <- x[ry, , drop = FALSE] %*% parm$coef yhatmis <- x[wy, , drop = FALSE] %*% parm$beta } if (matchtype == 2L) { yhatobs <- x[ry, , drop = FALSE] %*% parm$beta yhatmis <- x[wy, , drop = FALSE] %*% parm$beta } if (use.matcher) { idx <- matcher(yhatobs, yhatmis, k = donors) } else { idx <- matchindex(yhatobs, yhatmis, donors) } return(y[ry][idx]) } #' Finds an imputed value from matches in the predictive metric (deprecated) #' #' This function finds matches among the observed data in the predictive #' mean metric. It selects the \code{donors} closest matches, randomly #' samples one of the donors, and returns the observed value of the #' match. #' #' This function is included for backward compatibility. It was #' used up to \code{mice 2.21}. The current \code{mice.impute.pmm()} #' function calls the faster \code{C} function \code{matcher} instead of #' \code{.pmm.match()}. #' #' @aliases .pmm.match #' @param z A scalar containing the predicted value for the current case #' to be imputed. #' @param yhat A vector containing the predicted values for all cases with an observed #' outcome. #' @param y A vector of \code{length(yhat)} elements containing the observed outcome #' @param donors The size of the donor pool among which a draw is made. The default is #' \code{donors = 5}. Setting \code{donors = 1} always selects the closest match. Values #' between 3 and 10 provide the best results. Note: This setting was changed from #' 3 to 5 in version 2.19, based on simulation work by Tim Morris (UCL). #' @param \dots Other parameters (not used). #' @return A scalar containing the observed value of the selected donor. #' @author Stef van Buuren #' @rdname pmm.match #' @references #' Schenker N & Taylor JMG (1996) Partially parametric techniques #' for multiple imputation. \emph{Computational Statistics and Data Analysis}, 22, 425-446. #' #' Little RJA (1988) Missing-data adjustments in large surveys (with discussion). #' \emph{Journal of Business Economics and Statistics}, 6, 287-301. #' #' @export .pmm.match <- function(z, yhat = yhat, y = y, donors = 5, ...) { d <- abs(yhat - z) f <- d > 0 a1 <- ifelse(any(f), min(d[f]), 1) d <- d + runif(length(d), 0, a1 / 10^10) if (donors == 1) { return(y[which.min(d)]) } donors <- min(donors, length(d)) donors <- max(donors, 1) ds <- sort.int(d, partial = donors) m <- sample(y[d <= ds[donors]], 1) return(m) } mice/R/pattern1.R0000644000176200001440000000325214436640305013260 0ustar liggesusers#' Datasets with various missing data patterns #' #' Four simple datasets with various missing data patterns #' #' Van Buuren (2012) uses these four artificial datasets to illustrate various #' missing data patterns. #' #' @name pattern #' @aliases pattern1 pattern2 pattern3 pattern4 #' @docType data #' @format \describe{ \item{list("pattern1")}{Data with a univariate missing #' data pattern} \item{list("pattern2")}{Data with a monotone missing data #' pattern} \item{list("pattern3")}{Data with a file matching missing data #' pattern} \item{list("pattern4")}{Data with a general missing data pattern} } #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/missing-data-pattern.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords datasets #' @examples #' pattern4 #' #' data <- rbind(pattern1, pattern2, pattern3, pattern4) #' mdpat <- cbind(expand.grid(rec = 8:1, pat = 1:4, var = 1:3), r = as.numeric(as.vector(is.na(data)))) #' #' types <- c("Univariate", "Monotone", "File matching", "General") #' tp41 <- lattice::levelplot(r ~ var + rec | as.factor(pat), #' data = mdpat, #' as.table = TRUE, aspect = "iso", #' shrink = c(0.9), #' col.regions = mdc(1:2), #' colorkey = FALSE, #' scales = list(draw = FALSE), #' xlab = "", ylab = "", #' between = list(x = 1, y = 0), #' strip = lattice::strip.custom( #' bg = "grey95", style = 1, #' factor.levels = types #' ) #' ) #' print(tp41) #' #' md.pattern(pattern4) #' p <- md.pairs(pattern4) #' p #' #' ### proportion of usable cases #' p$mr / (p$mr + p$mm) #' #' ### outbound statistics #' p$rm / (p$rm + p$rr) #' #' #' fluxplot(pattern2) NULL mice/R/quickpred.R0000644000176200001440000001327214330031647013510 0ustar liggesusers#' Quick selection of predictors from the data #' #' Selects predictors according to simple statistics #' #' This function creates a predictor matrix using the variable selection #' procedure described in Van Buuren et al.~(1999, p.~687--688). The function is #' designed to aid in setting up a good imputation model for data with many #' variables. #' #' Basic workings: The procedure calculates for each variable pair (i.e. #' target-predictor pair) two correlations using all available cases per pair. #' The first correlation uses the values of the target and the predictor #' directly. The second correlation uses the (binary) response indicator of the #' target and the values of the predictor. If the largest (in absolute value) of #' these correlations exceeds \code{mincor}, the predictor will be added to the #' imputation set. The default value for \code{mincor} is 0.1. #' #' In addition, the procedure eliminates predictors whose proportion of usable #' cases fails to meet the minimum specified by \code{minpuc}. The default value #' is 0, so predictors are retained even if they have no usable case. #' #' Finally, the procedure includes any predictors named in the \code{include} #' argument (which is useful for background variables like age and sex) and #' eliminates any predictor named in the \code{exclude} argument. If a variable #' is listed in both \code{include} and \code{exclude} arguments, the #' \code{include} argument takes precedence. #' #' Advanced topic: \code{mincor} and \code{minpuc} are typically specified as #' scalars, but vectors and squares matrices of appropriate size will also work. #' Each element of the vector corresponds to a row of the predictor matrix, so #' the procedure can effectively differentiate between different target #' variables. Setting a high values for can be useful for auxiliary, less #' important, variables. The set of predictor for those variables can remain #' relatively small. Using a square matrix extends the idea to the columns, so #' that one can also apply cellwise thresholds. #' #' @note \code{quickpred()} uses \code{\link[base]{data.matrix}} to convert #' factors to numbers through their internal codes. Especially for unordered #' factors the resulting quantification may not make sense. #' #' @param data Matrix or data frame with incomplete data. #' @param mincor A scalar, numeric vector (of size \code{ncol(data))} or numeric #' matrix (square, of size \code{ncol(data)} specifying the minimum #' threshold(s) against which the absolute correlation in the data is compared. #' @param minpuc A scalar, vector (of size \code{ncol(data))} or matrix (square, #' of size \code{ncol(data)} specifying the minimum threshold(s) for the #' proportion of usable cases. #' @param include A string or a vector of strings containing one or more #' variable names from \code{names(data)}. Variables specified are always #' included as a predictor. #' @param exclude A string or a vector of strings containing one or more #' variable names from \code{names(data)}. Variables specified are always #' excluded as a predictor. #' @param method A string specifying the type of correlation. Use #' \code{'pearson'} (default), \code{'kendall'} or \code{'spearman'}. Can be #' abbreviated. #' @return A square binary matrix of size \code{ncol(data)}. #' @author Stef van Buuren, Aug 2009 #' @seealso \code{\link{mice}}, \code{\link[=mids-class]{mids}} #' @references van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple #' imputation of missing blood pressure covariates in survival analysis. #' \emph{Statistics in Medicine}, \bold{18}, 681--694. #' #' van Buuren, S. and Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @keywords misc #' @examples #' # default: include all predictors with absolute correlation over 0.1 #' quickpred(nhanes) #' #' # all predictors with absolute correlation over 0.4 #' quickpred(nhanes, mincor = 0.4) #' #' # include age and bmi, exclude chl #' quickpred(nhanes, mincor = 0.4, inc = c("age", "bmi"), exc = "chl") #' #' # only include predictors with at least 30% usable cases #' quickpred(nhanes, minpuc = 0.3) #' #' # use low threshold for bmi, and high thresholds for hyp and chl #' pred <- quickpred(nhanes, mincor = c(0, 0.1, 0.5, 0.5)) #' pred #' #' # use it directly from mice #' imp <- mice(nhanes, pred = quickpred(nhanes, minpuc = 0.25, include = "age")) #' @export quickpred <- function(data, mincor = 0.1, minpuc = 0, include = "", exclude = "", method = "pearson") { data <- check.dataform(data) # initialize nvar <- ncol(data) predictorMatrix <- matrix(0, nrow = nvar, ncol = nvar, dimnames = list(names(data), names(data))) x <- data.matrix(data) r <- !is.na(x) # include predictors with # 1) pairwise correlation among data # 2) pairwise correlation of data with response indicator higher than mincor suppressWarnings(v <- abs(cor(x, use = "pairwise.complete.obs", method = method))) v[is.na(v)] <- 0 suppressWarnings(u <- abs(cor(y = x, x = r, use = "pairwise.complete.obs", method = method))) u[is.na(u)] <- 0 maxc <- pmax(v, u) predictorMatrix[maxc > mincor] <- 1 # exclude predictors with a percentage usable cases below minpuc p <- md.pairs(data) puc <- p$mr / (p$mr + p$mm) predictorMatrix[puc < minpuc] <- 0 # exclude predictors listed in the exclude argument yz <- pmatch(exclude, names(data)) predictorMatrix[, yz] <- 0 # include predictors listed in the include argument yz <- pmatch(include, names(data)) predictorMatrix[, yz] <- 1 # some final processing diag(predictorMatrix) <- 0 predictorMatrix[colSums(!r) == 0, ] <- 0 predictorMatrix } mice/R/cc.R0000644000176200001440000000417314330031606012101 0ustar liggesusers#' Select complete cases #' #' Extracts the complete cases, also known as \emph{listwise deletion}. #' \code{cc(x)} is similar to #' \code{na.omit(x)}, but returns an object of the same class #' as the input data. Dimensions are not dropped. For extracting #' incomplete cases, use \code{\link{ici}}. #' #' @param x An \code{R} object. Methods are available for classes #' \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} #' could be a vector. #' @return A \code{vector}, \code{matrix} or \code{data.frame} containing the data of the complete cases. #' @author Stef van Buuren, 2017. #' @seealso \code{\link{na.omit}}, \code{\link{cci}}, \code{\link{ici}} #' @keywords univar #' @examples #' #' # cc(nhanes) # get the 13 complete cases #' # cc(nhanes$bmi) # extract complete bmi #' @export cc <- function(x) UseMethod("cc", x) #' @export cc.mids <- function(x) { x$data[cci(x), , drop = FALSE] } #' @export cc.matrix <- function(x) { x[cci(x), , drop = FALSE] } #' @export cc.data.frame <- function(x) { x[cci(x), , drop = FALSE] } #' @export cc.default <- function(x) { x[cci(x)] } #' Select incomplete cases #' #' Extracts incomplete cases from a data set. #' The companion function for selecting the complete cases is \code{\link{cc}}. #' #' @param x An \code{R} object. Methods are available for classes #' \code{mids}, \code{data.frame} and \code{matrix}. Also, \code{x} #' could be a vector. #' @return A \code{vector}, \code{matrix} or \code{data.frame} containing the data of the complete cases. #' @author Stef van Buuren, 2017. #' @seealso \code{\link{cc}}, \code{\link{ici}} #' @keywords univar #' @examples #' #' ic(nhanes) # get the 12 rows with incomplete cases #' ic(nhanes[1:10, ]) # incomplete cases within the first ten rows #' ic(nhanes[, c("bmi", "hyp")]) # restrict extraction to variables bmi and hyp #' @export ic <- function(x) UseMethod("ic", x) #' @export ic.mids <- function(x) { x$data[ici(x), , drop = FALSE] } #' @export ic.matrix <- function(x) { x[ici(x), , drop = FALSE] } #' @export ic.data.frame <- function(x) { x[ici(x), , drop = FALSE] } #' @export ic.default <- function(x) { x[ici(x)] } mice/R/pool.r.squared.R0000644000176200001440000000736314330031647014401 0ustar liggesusers#' Pools R^2 of m models fitted to multiply-imputed data #' #' The function pools the coefficients of determination R^2 or the adjusted #' coefficients of determination (R^2_a) obtained with the \code{lm} modeling #' function. For pooling it uses the Fisher \emph{z}-transformation. #' #' @param object An object of class 'mira' or 'mipo', produced by \code{lm.mids}, #' \code{with.mids}, or \code{pool} with \code{lm} as modeling function. #' @param adjusted A logical value. If adjusted=TRUE then the adjusted R^2 is #' calculated. The default value is FALSE. #' @return Returns a 1x4 table with components. Component \code{est} is the #' pooled R^2 estimate. Component \code{lo95} is the 95 \% lower bound of the pooled R^2. #' Component \code{hi95} is the 95 \% upper bound of the pooled R^2. #' Component \code{fmi} is the fraction of missing information due to nonresponse. #' @author Karin Groothuis-Oudshoorn and Stef van Buuren, 2009 #' @seealso \code{\link{pool}},\code{\link{pool.scalar}} #' @references Harel, O (2009). The estimation of R^2 and adjusted R^2 in #' incomplete data sets using multiple imputation, Journal of Applied Statistics, #' 36:1109-1118. #' #' Rubin, D.B. (1987). Multiple Imputation for Nonresponse in Surveys. New #' York: John Wiley and Sons. #' #' van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' #' @keywords htest #' @examples #' imp <- mice(nhanes, print = FALSE, seed = 16117) #' fit <- with(imp, lm(chl ~ age + hyp + bmi)) #' #' # input: mira object #' pool.r.squared(fit) #' pool.r.squared(fit, adjusted = TRUE) #' #' # input: mipo object #' est <- pool(fit) #' pool.r.squared(est) #' pool.r.squared(est, adjusted = TRUE) #' @export pool.r.squared <- function(object, adjusted = FALSE) { call <- match.call() if (!is.mira(object) & !is.mipo(object)) { stop("The object must have class 'mira' or 'mipo'") } if (is.mira(object)) { if ((m <- length(object$analyses)) < 2) { stop("At least two imputations are needed for pooling.\n") } if (class((object$analyses[[1]]))[1] != "lm") { stop("r^2 can only be calculated for results of the 'lm' modeling function") } glanced <- summary(object, type = "glance") } if (is.mipo(object)) { if (nrow(object$glanced) < 2) { stop("At least two imputations are needed for pooling.\n") } if (!"r.squared" %in% colnames(object$glanced)) { stop("r^2 can only be calculated for results of the 'lm' modeling function") } glanced <- object$glanced } # Set up array r2 to store R2 values, Fisher z-transformations of R2 values and its variance. m <- nrow(glanced) r2 <- matrix(NA, nrow = m, ncol = 3, dimnames = list(seq_len(m), c("R^2", "Fisher trans F^2", "se()"))) # Fill arrays for (i in seq_len(m)) { r2[i, 1] <- if (!adjusted) sqrt(glanced$r.squared[i]) else sqrt(glanced$adj.r.squared[i]) r2[i, 2] <- 0.5 * log((r2[i, 1] + 1) / (1 - r2[i, 1])) r2[i, 3] <- 1 / (glanced$nobs[i] - 3) } # Compute within, between and total variances following Rubin's rules with function pool.scalar(). fit <- pool.scalar(r2[, 2], r2[, 3]) # Make table with results. qbar <- fit$qbar table <- array(((exp(2 * qbar) - 1) / (1 + exp(2 * qbar)))^2, dim = c(1, 4) ) dimnames(table) <- if (!adjusted) { list("R^2", c("est", "lo 95", "hi 95", "fmi")) } else { list("adj R^2", c("est", "lo 95", "hi 95", "fmi")) } table[, 2] <- ((exp(2 * (qbar - 1.96 * sqrt(fit$t))) - 1) / (1 + exp(2 * (qbar - 1.96 * sqrt(fit$t)))))^2 table[, 3] <- ((exp(2 * (qbar + 1.96 * sqrt(fit$t))) - 1) / (1 + exp(2 * (qbar + 1.96 * sqrt(fit$t)))))^2 table[, 4] <- fit$f table } mice/R/pool.R0000644000176200001440000002512314334522175012475 0ustar liggesusers#' Combine estimates by pooling rules #' #' The \code{pool()} function combines the estimates from \code{m} #' repeated complete data analyses. The typical sequence of steps to #' perform a multiple imputation analysis is: #' \enumerate{ #' \item Impute the missing data by the \code{mice()} function, resulting in #' a multiple imputed data set (class \code{mids}); #' \item Fit the model of interest (scientific model) on each imputed data set #' by the \code{with()} function, resulting an object of class \code{mira}; #' \item Pool the estimates from each model into a single set of estimates #' and standard errors, resulting in an object of class \code{mipo}; #' \item Optionally, compare pooled estimates from different scientific models #' by the \code{D1()} or \code{D3()} functions. #' } #' A common error is to reverse steps 2 and 3, i.e., to pool the #' multiply-imputed data instead of the estimates. Doing so may severely bias #' the estimates of scientific interest and yield incorrect statistical #' intervals and p-values. The \code{pool()} function will detect #' this case. #' #' @details #' The \code{pool()} function averages the estimates of the complete #' data model, computes the total variance over the repeated analyses #' by Rubin's rules (Rubin, 1987, p. 76), and computes the following #' diagnostic statistics per estimate: #' \enumerate{ #' \item Relative increase in variance due to nonresponse {\code{r}}; #' \item Residual degrees of freedom for hypothesis testing {\code{df}}; #' \item Proportion of total variance due to missingness {\code{lambda}}; #' \item Fraction of missing information {\code{fmi}}. #' } #' The degrees of freedom calculation for the pooled estimates uses the #' Barnard-Rubin adjustment for small samples (Barnard and Rubin, 1999). #' #' The \code{pool.syn()} function combines estimates by Reiter's partially #' synthetic data pooling rules (Reiter, 2003). This combination rule #' assumes that the data that is synthesised is completely observed. #' Pooling differs from Rubin's method in the calculation of the total #' variance and the degrees of freedom. #' #' Pooling requires the following input from each fitted model: #' \enumerate{ #' \item the estimates of the model; #' \item the standard error of each estimate; #' \item the residual degrees of freedom of the model. #' } #' The \code{pool()} and \code{pool.syn()} functions rely on the #' \code{broom::tidy} and \code{broom::glance} for extracting these #' parameters. #' #' Since \code{mice 3.0+}, the \code{broom} #' package takes care of filtering out the relevant parts of the #' complete-data analysis. It may happen that you'll see the messages #' like \code{Error: No tidy method for objects of class ...} or #' \code{Error: No glance method for objects of class ...}. The message #' means that your complete-data method used in \code{with(imp, ...)} has #' no \code{tidy} or \code{glance} method defined in the \code{broom} package. #' #' The \code{broom.mixed} package contains \code{tidy} and \code{glance} methods #' for mixed models. If you are using a mixed model, first run #' \code{library(broom.mixed)} before calling \code{pool()}. #' #' If no \code{tidy} or \code{glance} methods are defined for your analysis #' tabulate the \code{m} parameter estimates and their variance #' estimates (the square of the standard errors) from the \code{m} fitted #' models stored in \code{fit$analyses}. For each parameter, run #' \code{\link{pool.scalar}} to obtain the pooled parameters estimate, its variance, the #' degrees of freedom, the relative increase in variance and the fraction of missing #' information. #' #' An alternative is to write your own \code{glance()} and \code{tidy()} #' methods and add these to \code{broom} according to the specifications #' given in \url{https://broom.tidymodels.org}. #' In versions prior to \code{mice 3.0} pooling required that #' \code{coef()} and \code{vcov()} methods were available for fitted #' objects. \emph{This feature is no longer supported}. The reason is that #' \code{vcov()} methods are inconsistent across packages, leading to #' buggy behaviour of the \code{pool()} function. #' #' Since \code{mice 3.13.2} function \code{pool()} uses the robust #' the standard error estimate for pooling when it can extract #' \code{robust.se} from the \code{tidy()} object. #' #' @param object An object of class \code{mira} (produced by \code{with.mids()} #' or \code{as.mira()}), or a \code{list} with model fits. #' @param dfcom A positive number representing the degrees of freedom in the #' complete-data analysis. Normally, this would be the number of independent #' observation minus the number of fitted parameters. The default #' (\code{dfcom = NULL}) extract this information in the following #' order: 1) the component #' \code{residual.df} returned by \code{glance()} if a \code{glance()} #' function is found, 2) the result of \code{df.residual(} applied to #' the first fitted model, and 3) as \code{999999}. #' In the last case, the warning \code{"Large sample assumed"} is printed. #' If the degrees of freedom is incorrect, specify the appropriate value #' manually. #' @param rule A string indicating the pooling rule. Currently supported are #' \code{"rubin1987"} (default, for missing data) and \code{"reiter2003"} #' (for synthetic data created from a complete data set). #' @param custom.t A custom character string to be parsed as a calculation rule #' for the total variance \code{t}. The custom rule can use the other calculated #' pooling statistics where the dimensions must come from \code{.data$}. The #' default \code{t} calculation would have the form #' \code{".data$ubar + (1 + 1 / .data$m) * .data$b"}. #' See examples for an example. #' @return An object of class \code{mipo}, which stands for 'multiple imputation #' pooled outcome'. #' For rule \code{"reiter2003"} values for \code{lambda} and \code{fmi} are #' set to `NA`, as these statistics do not apply for data synthesised from #' fully observed data. #' @seealso \code{\link{with.mids}}, \code{\link{as.mira}}, \code{\link{pool.scalar}}, #' \code{\link[broom:reexports]{glance}}, \code{\link[broom:reexports]{tidy}} #' \url{https://github.com/amices/mice/issues/142}, #' \url{https://github.com/amices/mice/issues/274} #' @references #' Barnard, J. and Rubin, D.B. (1999). Small sample degrees of #' freedom with multiple imputation. \emph{Biometrika}, 86, 948-955. #' #' Rubin, D.B. (1987). \emph{Multiple Imputation for Nonresponse in Surveys}. #' New York: John Wiley and Sons. #' #' Reiter, J.P. (2003). Inference for Partially Synthetic, #' Public Use Microdata Sets. \emph{Survey Methodology}, \bold{29}, 181-189. #' #' van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @examples #' # impute missing data, analyse and pool using the classic MICE workflow #' imp <- mice(nhanes, maxit = 2, m = 2) #' fit <- with(data = imp, exp = lm(bmi ~ hyp + chl)) #' summary(pool(fit)) #' #' # generate fully synthetic data, analyse and pool #' imp <- mice(cars, #' maxit = 2, m = 2, #' where = matrix(TRUE, nrow(cars), ncol(cars)) #' ) #' fit <- with(data = imp, exp = lm(speed ~ dist)) #' summary(pool.syn(fit)) #' #' # use a custom pooling rule for the total variance about the estimate #' # e.g. use t = b + b/m instead of t = ubar + b + b/m #' imp <- mice(nhanes, maxit = 2, m = 2) #' fit <- with(data = imp, exp = lm(bmi ~ hyp + chl)) #' pool(fit, custom.t = ".data$b + .data$b / .data$m") #' #' @export pool <- function(object, dfcom = NULL, rule = NULL, custom.t = NULL) { call <- match.call() if (!is.list(object)) stop("Argument 'object' not a list", call. = FALSE) object <- as.mira(object) m <- length(object$analyses) if (m == 1) { warning("Number of multiple imputations m = 1. No pooling done.") return(getfit(object, 1)) } dfcom <- get.dfcom(object, dfcom) pooled <- pool.fitlist(getfit(object), dfcom = dfcom, rule = rule, custom.t = custom.t) # mipo object rr <- list( call = call, m = m, pooled = pooled, glanced = get.glanced(object) ) class(rr) <- c("mipo", "data.frame") rr } pool.fitlist <- function(fitlist, dfcom = NULL, rule = c("rubin1987", "reiter2003"), custom.t = NULL) { # rubin1987: Rubin's rules for scalar estimates # reiter2003: Reiter's rules for partially synthetic data rule <- match.arg(rule) w <- summary(fitlist, type = "tidy", exponentiate = FALSE) grp <- intersect(names(w), c("parameter", "term", "contrast", "y.level", "component")) # Note: group_by() changes the order of the terms, which is undesirable # We convert any parameter terms to factor to preserve ordering if ("term" %in% names(w)) w$term <- factor(w$term, levels = unique(w$term)) if ("contrast" %in% names(w)) w$contrast <- factor(w$contrast, levels = unique(w$contrast)) if ("y.level" %in% names(w)) w$y.level <- factor(w$y.level, levels = unique(w$y.level)) if ("component" %in% names(w)) w$component <- factor(w$component, levels = unique(w$component)) # https://github.com/amices/mice/issues/310 # Prefer using robust.se when tidy object contains it if ("robust.se" %in% names(w)) w$std.error <- w$robust.se if (rule == "rubin1987") { pooled <- w %>% group_by(!!!syms(grp)) %>% summarize( m = n(), qbar = mean(.data$estimate), ubar = mean(.data$std.error^2), b = var(.data$estimate), t = ifelse(is.null(custom.t), .data$ubar + (1 + 1 / .data$m) * .data$b, eval(parse(text = custom.t)) ), dfcom = dfcom, df = barnard.rubin(.data$m, .data$b, .data$t, .data$dfcom), riv = (1 + 1 / .data$m) * .data$b / .data$ubar, lambda = (1 + 1 / .data$m) * .data$b / .data$t, fmi = (.data$riv + 2 / (.data$df + 3)) / (.data$riv + 1) ) } if (rule == "reiter2003") { pooled <- w %>% group_by(!!!syms(grp)) %>% summarize( m = n(), qbar = mean(.data$estimate), ubar = mean(.data$std.error^2), b = var(.data$estimate), t = ifelse(is.null(custom.t), .data$ubar + (1 / .data$m) * .data$b, eval(parse(text = custom.t)) ), dfcom = dfcom, df = (.data$m - 1) * (1 + (.data$ubar / (.data$b / .data$m)))^2, riv = (1 + 1 / .data$m) * .data$b / .data$ubar, lambda = NA_real_, fmi = NA_real_ ) } pooled <- data.frame(pooled) names(pooled)[names(pooled) == "qbar"] <- "estimate" pooled } #' @rdname pool #' @export pool.syn <- function(object, dfcom = NULL, rule = "reiter2003") { pool(object = object, dfcom = dfcom, rule = rule) } mice/R/mice.impute.norm.boot.R0000644000176200001440000000223514330031647015651 0ustar liggesusers#' Imputation by linear regression, bootstrap method #' #' Imputes univariate missing data using linear regression with bootstrap #' #' @aliases mice.impute.norm.boot norm.boot #' @inheritParams mice.impute.pmm #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' Draws a bootstrap sample from \code{x[ry,]} and \code{y[ry]}, calculates #' regression weights and imputes with normal residuals. #' @author Gerko Vink, Stef van Buuren, 2018 #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.norm.boot <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) n1 <- sum(ry) s <- sample(n1, n1, replace = TRUE) ss <- s dotxobs <- x[ry, , drop = FALSE][s, ] dotyobs <- y[ry][s] p <- estimice(dotxobs, dotyobs, ...) sigma <- sqrt((sum(p$r^2)) / (n1 - ncol(x) - 1)) x[wy, ] %*% p$c + rnorm(sum(wy)) * sigma } mice/R/bwplot.mads.R0000644000176200001440000001301014330031606013734 0ustar liggesusers#' Box-and-whisker plot of amputed and non-amputed data #' #' Plotting method to investigate the relation between the data variables and #' the amputed data. The function shows how the amputed values are related #' to the variable values. #' #' @param x A \code{mads} (\code{\link{mads-class}}) object, typically created by #' \code{\link{ampute}}. #' @param data A string or vector of variable names that needs to be plotted. As #' a default, all variables will be plotted. #' @param which.pat A scalar or vector indicating which patterns need to be plotted. #' As a default, all patterns are plotted. #' @param standardized Logical. Whether the box-and-whisker plots need to be created #' from standardized data or not. Default is TRUE. #' @param descriptives Logical. Whether the mean, variance and n of the variables #' need to be printed. This is useful to examine the effect of the amputation. #' Default is TRUE. #' @param layout A vector of two values indicating how the boxplots of one pattern #' should be divided over the plot. For example, \code{c(2, 3)} indicates that the #' boxplots of six variables need to be placed on 3 rows and 2 columns. Default #' is 1 row and an amount of columns equal to #variables. Note that for more than #' 6 variables, multiple plots will be created automatically. #' @param \dots Not used, but for consistency with generic #' @return A list containing the box-and-whisker plots. Note that a new pattern #' will always be shown in a new plot. #' @note The \code{mads} object contains all the information you need to #' make any desired plots. Check \code{\link{mads-class}} or the vignette \emph{Multivariate #' Amputation using Ampute} to understand the contents of class object \code{mads}. #' @author Rianne Schouten, 2016 #' @seealso \code{\link{ampute}}, \code{\link{bwplot}}, \code{\link{Lattice}} for #' an overview of the package, \code{\link{mads-class}} #' @export bwplot.mads <- function(x, data, which.pat = NULL, standardized = TRUE, descriptives = TRUE, layout = NULL, ...) { if (!is.mads(x)) { stop("Object is not of class mads") } if (missing(data)) data <- NULL yvar <- data if (is.null(yvar)) { varlist <- colnames(x$amp) } else { varlist <- yvar } if (is.null(which.pat)) { pat <- nrow(x$patterns) which.pat <- seq_len(pat) } else { pat <- length(which.pat) } formula <- as.formula(paste0(paste0(varlist, collapse = "+"), "~ factor(.amp)")) data <- NULL if (standardized) { dat <- data.frame(scale(x$data)) } else { dat <- x$data } if (is.null(layout)) { if (ceiling(length(varlist) / 2) > 6) { layout <- c(6, 1) } else { layout <- c(length(varlist), 1) } } for (i in seq_len(pat)) { can <- which(x$cand == which.pat[i]) mis <- matrix(NA, nrow = length(can), ncol = 2) nc <- which(x$patterns[which.pat[i], ] == 0) if (length(nc) > 1) { mis[apply(is.na(x$amp[can, nc]), 1, all), 1] <- "Amp" mis[is.na(mis[, 1]), 1] <- "Non-Amp" } else if (length(nc) == 1) { mis[is.na(x$amp[can, nc]), 1] <- "Amp" mis[is.na(mis[, 1]), 1] <- "Non-Amp" } mis[, 2] <- rep.int(which.pat[i], length(can)) data <- rbind(data, cbind(mis, dat[can, ])) } colnames(data) <- c(".amp", ".pat", varlist) p <- list() vec1 <- c() vec3 <- c() for (i in seq_along(which.pat)) { vec1[((i * 2) - 1):(i * 2)] <- rep.int(paste(which.pat[i]), 2) } vec3 <- paste("", varlist) var <- length(varlist) if (descriptives) { desc <- array(NA, dim = c(2 * length(which.pat), 4, var), dimnames = list( Pattern = vec1, Descriptives = c("Amp", "Mean", "Var", "N"), Variable = vec3 ) ) desc[, 1, ] <- rep.int(rep.int(c(1, 0), length(which.pat)), var) for (i in seq_along(which.pat)) { wp <- which.pat[i] desc[(i * 2) - 1, 2, ] <- round(vapply(varlist, function(x) { mean(data[data$.pat == wp & data$.amp == "Amp", x]) }, numeric(1)), 5) desc[(i * 2), 2, ] <- round(vapply(varlist, function(x) { mean(data[data$.pat == wp & data$.amp == "Non-Amp", x]) }, numeric(1)), 5) desc[(i * 2) - 1, 3, ] <- round(vapply(varlist, function(x) { var(data[data$.pat == wp & data$.amp == "Amp", x]) }, numeric(1)), 5) desc[(i * 2), 3, ] <- round(vapply(varlist, function(x) { var(data[data$.pat == wp & data$.amp == "Non-Amp", x]) }, numeric(1)), 5) desc[(i * 2) - 1, 4, ] <- vapply(varlist, function(x) { length(data[data$.pat == wp & data$.amp == "Amp", x]) }, numeric(1)) desc[(i * 2), 4, ] <- vapply(varlist, function(x) { length(data[data$.pat == wp & data$.amp == "Non-Amp", x]) }, numeric(1)) } p[["Descriptives"]] <- desc } theme <- list( superpose.symbol = list(col = "black", pch = 1), superpose.line = list(col = "black", lwd = 1), box.dot = list(col = "black"), box.rectangle = list(col = "black"), box.umbrella = list(col = "black"), box.symbol = list(col = "black"), plot.symbol = list(col = "black", pch = 1), plot.line = list(col = "black"), strip.background = list(col = "grey95") ) for (i in seq_len(pat)) { p[[paste("Boxplot pattern", which.pat[i])]] <- bwplot( x = formula, data = data[data$.pat == which.pat[i], ], multiple = TRUE, outer = TRUE, layout = layout, ylab = "", par.settings = theme, xlab = paste("Data distributions in pattern", which.pat[i]) ) } p } mice/R/mice.impute.passive.R0000644000176200001440000000234314330031647015406 0ustar liggesusers#' Passive imputation #' #' Calculate new variable during imputation #' #' @param data A data frame #' @param func A \code{formula} specifying the transformations on data #' @return The result of applying \code{formula} #' @details #' Passive imputation is a special internal imputation function. Using this #' facility, the user can specify, at any point in the \code{mice} Gibbs #' sampling algorithm, a function on the imputed data. This is useful, for #' example, to compute a cubic version of a variable, a transformation like #' \code{Q = W/H^2} based on two variables, or a mean variable like #' \code{(x_1+x_2+x_3)/3}. The so derived variables might be used in other #' places in the imputation model. The function allows to dynamically derive #' virtually any function of the imputed data at virtually any time. #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #' @seealso \code{\link{mice}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords datagen #' @export mice.impute.passive <- function(data, func) { model.frame(func, data) } mice/R/mice.impute.lasso.norm.R0000644000176200001440000000470014330031647016026 0ustar liggesusers#' Imputation by direct use of lasso linear regression #' #' Imputes univariate missing normal data using lasso linear regression with bootstrap. #' #' @aliases mice.impute.lasso.norm lasso.norm #' @inheritParams mice.impute.norm.boot #' @param nfolds The number of folds for the cross-validation of the lasso penalty. #' The default is 10. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' The method consists of the following steps: #' \enumerate{ #' \item For a given y variable under imputation, draw a bootstrap version y* #' with replacement from the observed cases \code{y[ry]}, and stores in x* the #' corresponding values from \code{x[ry, ]}. #' \item Fit a regularised (lasso) linear regression with y* as the outcome, #' and x* as predictors. #' A vector of regression coefficients bhat is obtained. #' All of these coefficients are considered random draws from the imputation model #' parameters posterior distribution. #' Same of these coefficients will be shrunken to 0. #' \item Draw the imputed values from the predictive distribution defined by #' the original (non-bootstrap) data, bhat, and estimated error variance. #' } #' The method is based on the Direct Use of Regularized Regression (DURR) proposed by #' Zhao & Long (2016) and Deng et al (2016). #' @author Edoardo Costantini, 2021 #' @references #' #' Deng, Y., Chang, C., Ido, M. S., & Long, Q. (2016). Multiple imputation for #' general missing data patterns in the presence of high-dimensional data. #' Scientific reports, 6(1), 1-10. #' #' Zhao, Y., & Long, Q. (2016). Multiple imputation in the presence of #' high-dimensional data. Statistical Methods in Medical Research, 25(5), #' 2021-2035. #' #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.lasso.norm <- function(y, ry, x, wy = NULL, nfolds = 10, ...) { install.on.demand("glmnet", ...) # Bootstrap sample if (is.null(wy)) wy <- !ry n1 <- sum(ry) s <- sample(n1, n1, replace = TRUE) x_glmnet <- cbind(1, x) dotxobs <- x_glmnet[ry, , drop = FALSE][s, , drop = FALSE] dotyobs <- y[ry][s] # Train imputation model cv_lasso <- glmnet::cv.glmnet( x = dotxobs, y = dotyobs, family = "gaussian", nfolds = nfolds, alpha = 1 ) # Obtain imputations s2hat <- mean((predict(cv_lasso, dotxobs, s = "lambda.min") - dotyobs)^2) as.vector(predict(cv_lasso, x_glmnet[wy, ], s = "lambda.min")) + rnorm(sum(wy), 0, sqrt(s2hat)) } mice/R/supports.transparent.R0000644000176200001440000000136114330031606015747 0ustar liggesusers#' Supports semi-transparent foreground colors? #' #' This function is used by \code{mdc()} to find out whether the current device #' supports semi-transparent foreground colors. #' #' The function calls the function \code{dev.capabilities()} from the package #' \code{grDevices}. The function return \code{FALSE} if the status of the #' current device is unknown. #' #' @aliases supports.transparent transparent #' @return \code{TRUE} or \code{FALSE} #' @seealso \code{\link{mdc}} \code{\link{dev.capabilities}} #' @keywords hplot #' @examples #' #' supports.transparent() #' @export supports.transparent <- function() { query <- grDevices::dev.capabilities("semiTransparency")$semiTransparency if (is.na(query)) { query <- FALSE } query } mice/R/complete.R0000644000176200001440000001252014334522175013331 0ustar liggesusers#' Extracts the completed data from a \code{mids} object #' #' Takes an object of class \code{mids}, fills in the missing data, and returns #' the completed data in a specified format. #' #' @aliases complete #' @param data An object of class \code{mids} as created by the function #' \code{mice()}. #' @param action A numeric vector or a keyword. Numeric #' values between 1 and \code{data$m} return the data with #' imputation number \code{action} filled in. The value of \code{action = 0} #' return the original data, with missing values. \code{action} can #' also be one of the following keywords: \code{"all"}, \code{"long"}, #' \code{"broad"} and \code{"repeated"}. See the Details section #' for the interpretation. #' The default is \code{action = 1L} returns the first imputed data set. #' @param include A logical to indicate whether the original data with the missing #' values should be included. #' @param mild A logical indicating whether the return value should #' always be an object of class \code{mild}. Setting \code{mild = TRUE} #' overrides \code{action} keywords \code{"long"}, \code{"broad"} #' and \code{"repeated"}. The default is \code{FALSE}. #' @param \dots Additional arguments. Not used. #' @return Complete data set with missing values replaced by imputations. #' A \code{data.frame}, or a list of data frames of class \code{mild}. #' @details #' The argument \code{action} can be length-1 character, which is #' matched to one of the following keywords: #' \describe{ #' \item{\code{"all"}}{produces a \code{mild} object of imputed data sets. When #' \code{include = TRUE}, then the original data are appended as the first list #' element;} #' \item{\code{"long"}}{ produces a data set where imputed data sets #' are stacked vertically. The columns are added: 1) \code{.imp}, integer, #' referring the imputation number, and 2) \code{.id}, character, the row #' names of \code{data$data};} #' \item{\code{"stacked"}}{ same as \code{"long"} but without the two #' additional columns;} #' \item{\code{"broad"}}{ produces a data set with where imputed data sets #' are stacked horizontally. Columns are ordered as in the original data. #' The imputation number is appended to each column name;} #' \item{\code{"repeated"}}{ same as \code{"broad"}, but with #' columns in a different order.} #' } #' @note #' Technical note: \code{mice 3.7.5} renamed the \code{complete()} function #' to \code{complete.mids()} and exported it as an S3 method of the #' generic \code{tidyr::complete()}. Name clashes between #' \code{mice::complete()} and \code{tidyr::complete()} should no #' longer occur. #' @seealso \code{\link{mice}}, \code{\link[=mids-class]{mids}} #' @keywords manip #' @examples #' #' # obtain first imputed data set #' sum(is.na(nhanes2)) #' imp <- mice(nhanes2, print = FALSE, maxit = 1) #' dat <- complete(imp) #' sum(is.na(dat)) #' #' # obtain stacked third and fifth imputation #' dat <- complete(imp, c(3, 5)) #' #' # obtain all datasets, with additional identifiers #' head(complete(imp, "long")) #' #' # same, but now as list, mild object #' dslist <- complete(imp, "all") #' length(dslist) #' #' # same, but also include the original data #' dslist <- complete(imp, "all", include = TRUE) #' length(dslist) #' #' # select original + 3 + 5, store as mild #' dslist <- complete(imp, c(0, 3, 5), mild = TRUE) #' names(dslist) #' @export complete.mids <- function(data, action = 1L, include = FALSE, mild = FALSE, ...) { if (!is.mids(data)) stop("'data' not of class 'mids'") m <- as.integer(data$m) if (is.numeric(action)) { action <- as.integer(action) idx <- action[action >= 0L & action <= m] if (include && all(idx != 0L)) idx <- c(0L, idx) shape <- ifelse(mild, "mild", "stacked") } else if (is.character(action)) { if (include) idx <- 0L:m else idx <- 1L:m shape <- match.arg(action, c("all", "long", "broad", "repeated", "stacked")) shape <- ifelse(shape == "all" || mild, "mild", shape) } else { stop("'action' not recognized") } mylist <- vector("list", length = length(idx)) for (j in seq_along(idx)) { mylist[[j]] <- single.complete(data$data, data$where, data$imp, idx[j]) } if (shape == "stacked") { return(bind_rows(mylist)) } if (shape == "mild") { names(mylist) <- as.character(idx) class(mylist) <- c("mild", "list") return(mylist) } if (shape == "long") { cmp <- bind_rows(mylist) cmp <- data.frame( .imp = rep(idx, each = nrow(data$data)), .id = rep.int(1L:nrow(data$data), length(idx)), cmp ) if (is.integer(attr(data$data, "row.names"))) { row.names(cmp) <- seq_len(nrow(cmp)) } else { row.names(cmp) <- as.character(seq_len(nrow(cmp))) } return(cmp) } # must be broad or repeated cmp <- bind_cols(mylist) names(cmp) <- paste(rep.int(names(data$data), m), rep.int(idx, rep.int(ncol(data$data), length(idx))), sep = "." ) if (shape == "broad") { return(cmp) } else { return(cmp[, order(rep.int(seq_len(ncol(data$data)), length(idx)))]) } } single.complete <- function(data, where, imp, ell) { if (ell == 0L) { return(data) } if (is.null(where)) { where <- is.na(data) } idx <- seq_len(ncol(data))[apply(where, 2, any)] for (j in idx) { if (is.null(imp[[j]])) { data[where[, j], j] <- NA } else { data[where[, j], j] <- imp[[j]][, ell] } } data } mice/R/ampute.default.R0000644000176200001440000001067014330031606014431 0ustar liggesusers#' Default \code{patterns} in \code{ampute} #' #' This function creates a default pattern matrix for the multivariate #' amputation function \code{ampute()}. #' #' @param n A scalar specifying the number of variables in the data. #' @return A square matrix of size \code{n} where \code{0} indicates a variable # should have missing values and \code{1} indicates a variable should remain # complete. Each pattern has missingness on one variable only. #' @seealso \code{\link{ampute}}, \code{\link{md.pattern}} #' @author Rianne Schouten, 2016 #' @keywords internal #' @export ampute.default.patterns <- function(n) { patterns.list <- lapply( seq_len(n), function(i) c(rep.int(1, i - 1), 0, rep.int(1, n - i)) ) do.call(rbind, patterns.list) } #' Default \code{freq} in \code{ampute} #' #' Defines the default relative frequency vector for the multivariate #' amputation function \code{ampute}. #' #' @param patterns A matrix of size #patterns by #variables where \code{0} indicates #' a variable should have missing values and \code{1} indicates a variable should #' remain complete. Could be the result of \code{\link{ampute.default.patterns}}. #' @return A vector of length #patterns containing the relative frequencies with #' which the patterns should occur. An equal probability is given to each pattern. #' @seealso \code{\link{ampute}}, \code{\link{ampute.default.patterns}} #' @author Rianne Schouten, 2016 #' @keywords internal #' @export ampute.default.freq <- function(patterns) { rep.int(1 / nrow(patterns), nrow(patterns)) } #' Default \code{weights} in \code{ampute} #' #' Defines the default weights matrix for the multivariate amputation function #' \code{ampute}. #' #' @param patterns A matrix of size #patterns by #variables where \code{0} indicates #' a variable should have missing values and \code{1} indicates a variable should #' remain complete. Could be the result of \code{\link{ampute.default.patterns}}. #' @param mech A string specifying the missingness mechanism. #' @return A matrix of size #patterns by #variables containing the weights that #' will be used to calculate the weighted sum scores. Equal weights are given to #' all variables. When mechanism is MAR, variables that will be amputed will be #' weighted with \code{0}. If it is MNAR, variables that will be observed #' will be weighted with \code{0}. If mechanism is MCAR, the weights matrix will #' not be used. A default MAR matrix will be returned. #' @seealso \code{\link{ampute}}, \code{\link{ampute.default.patterns}} #' @author Rianne Schouten, 2016 #' @keywords internal #' @export ampute.default.weights <- function(patterns, mech) { weights <- matrix(data = 1, nrow = nrow(patterns), ncol = ncol(patterns)) if (mech != "MNAR") { weights <- matrix(data = 1, nrow = nrow(patterns), ncol = ncol(patterns)) weights[patterns == 0] <- 0 } else { weights <- matrix(data = 0, nrow = nrow(patterns), ncol = ncol(patterns)) weights[patterns == 0] <- 1 } weights } #' Default \code{type} in \code{ampute()} #' #' Defines the default type vector for the multivariate amputation function #' \code{ampute}. #' #' @param patterns A matrix of size #patterns by #variables where 0 indicates a #' variable should have missing values and 1 indicates a variable should remain #' complete. Could be the result of \code{\link{ampute.default.patterns}}. #' @return A string vector of length #patterns containing the missingness types. #' Each pattern will be amputed with a "RIGHT" missingness. #' @seealso \code{\link{ampute}}, \code{\link{ampute.default.patterns}} #' @author Rianne Schouten, 2016 #' @keywords internal #' @export ampute.default.type <- function(patterns) { rep.int("RIGHT", nrow(patterns)) } #' Default \code{odds} in \code{ampute()} #' #' Defines the default odds matrix for the multivariate amputation function #' \code{ampute}. #' #' @param patterns A matrix of size #patterns by #variables where 0 indicates a #' variable should have missing values and 1 indicates a variable should remain #' complete. Could be the result of \code{\link{ampute.default.patterns}}. #' @return A matrix where #rows equals #patterns. Default is 4 quantiles with odds #' values 1, 2, 3 and 4, for each pattern, imitating a RIGHT type of missingness. #' @seealso \code{\link{ampute}}, \code{\link{ampute.default.patterns}} #' @author Rianne Schouten, 2016 #' @keywords internal #' @export ampute.default.odds <- function(patterns) { matrix(seq_len(4), nrow = nrow(patterns), ncol = 4, byrow = TRUE) } mice/R/bwplot.R0000644000176200001440000002772414330031647013037 0ustar liggesusers#' Box-and-whisker plot of observed and imputed data #' #' Plotting methods for imputed data using \pkg{lattice}. \code{bwplot} #' produces box-and-whisker plots. The function #' automatically separates the observed and imputed data. The #' functions extend the usual features of \pkg{lattice}. #' #' The argument \code{na.groups} may be used to specify (combinations of) #' missingness in any of the variables. The argument \code{groups} can be used #' to specify groups based on the variable values themselves. Only one of both #' may be active at the same time. When both are specified, \code{na.groups} #' takes precedence over \code{groups}. #' #' Use the \code{subset} and \code{na.groups} together to plots parts of the #' data. For example, select the first imputed data set by by #' \code{subset=.imp==1}. #' #' Graphical parameters like \code{col}, \code{pch} and \code{cex} can be #' specified in the arguments list to alter the plotting symbols. If #' \code{length(col)==2}, the color specification to define the observed and #' missing groups. \code{col[1]} is the color of the 'observed' data, #' \code{col[2]} is the color of the missing or imputed data. A convenient color #' choice is \code{col=mdc(1:2)}, a transparent blue color for the observed #' data, and a transparent red color for the imputed data. A good choice is #' \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the #' duration of the session by running \code{mice.theme()}. #' #' @aliases bwplot #' @param x A \code{mids} object, typically created by \code{mice()} or #' \code{mice.mids()}. #' @param data Formula that selects the data to be plotted. This argument #' follows the \pkg{lattice} rules for \emph{formulas}, describing the primary #' variables (used for the per-panel display) and the optional conditioning #' variables (which define the subsets plotted in different panels) to be used #' in the plot. #' #' The formula is evaluated on the complete data set in the \code{long} form. #' Legal variable names for the formula include \code{names(x$data)} plus the #' two administrative factors \code{.imp} and \code{.id}. #' #' \bold{Extended formula interface:} The primary variable terms (both the LHS #' \code{y} and RHS \code{x}) may consist of multiple terms separated by a #' \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be #' taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and #' \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in #' \emph{separate panels}. This behavior differs from standard \pkg{lattice}. #' \emph{Only combine terms of the same type}, i.e. only factors or only #' numerical variables. Mixing numerical and categorical data occasionally #' produces odds labeling of vertical axis. #' #' For convenience, in \code{stripplot()} and \code{bwplot} the formula #' \code{y~.imp} may be abbreviated as \code{y}. This applies only to a single #' \code{y}, and does not (yet) work for \code{y1+y2~.imp}. #' #' @param na.groups An expression evaluating to a logical vector indicating #' which two groups are distinguished (e.g. using different colors) in the #' display. The environment in which this expression is evaluated in the #' response indicator \code{is.na(x$data)}. #' #' The default \code{na.group = NULL} contrasts the observed and missing data #' in the LHS \code{y} variable of the display, i.e. groups created by #' \code{is.na(y)}. The expression \code{y} creates the groups according to #' \code{is.na(y)}. The expression \code{y1 & y2} creates groups by #' \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as #' \code{is.na(y1) | is.na(y2)}, and so on. #' @param groups This is the usual \code{groups} arguments in \pkg{lattice}. It #' differs from \code{na.groups} because it evaluates in the completed data #' \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas #' \code{na.groups} evaluates in the response indicator. See #' \code{\link{xyplot}} for more details. When both \code{na.groups} and #' \code{groups} are specified, \code{na.groups} takes precedence, and #' \code{groups} is ignored. #' @param theme A named list containing the graphical parameters. The default #' function \code{mice.theme} produces a short list of default colors, line #' width, and so on. The extensive list may be obtained from #' \code{trellis.par.get()}. Global graphical parameters like \code{col} or #' \code{cex} in high-level calls are still honored, so first experiment with #' the global parameters. Many setting consists of a pair. For example, #' \code{mice.theme} defines two symbol colors. The first is for the observed #' data, the second for the imputed data. The theme settings only exist during #' the call, and do not affect the trellis graphical parameters. #' @param mayreplicate A logical indicating whether color, line widths, and so #' on, may be replicated. The graphical functions attempt to choose #' "intelligent" graphical parameters. For example, the same color can be #' replicated for different element, e.g. use all reds for the imputed data. #' Replication may be switched off by setting the flag to \code{FALSE}, in order #' to allow the user to gain full control. #' @param as.table See \code{\link[lattice:xyplot]{xyplot}}. #' @param outer See \code{\link[lattice:xyplot]{xyplot}}. #' @param allow.multiple See \code{\link[lattice:xyplot]{xyplot}}. #' @param drop.unused.levels See \code{\link[lattice:xyplot]{xyplot}}. #' @param subscripts See \code{\link[lattice:xyplot]{xyplot}}. #' @param subset See \code{\link[lattice:xyplot]{xyplot}}. #' @param \dots Further arguments, usually not directly processed by the #' high-level functions documented here, but instead passed on to other #' functions. #' @return The high-level functions documented here, as well as other high-level #' Lattice functions, return an object of class \code{"trellis"}. The #' \code{\link[lattice:update.trellis]{update}} method can be used to #' subsequently update components of the object, and the #' \code{\link[lattice:print.trellis]{print}} method (usually called by default) #' will plot it on an appropriate plotting device. #' @note The first two arguments (\code{x} and \code{data}) are reversed #' compared to the standard Trellis syntax implemented in \pkg{lattice}. This #' reversal was necessary in order to benefit from automatic method dispatch. #' #' In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas #' in \pkg{lattice} the argument \code{x} is always a formula. #' #' In \pkg{mice} the argument \code{data} is always a formula object, whereas in #' \pkg{lattice} the argument \code{data} is usually a data frame. #' #' All other arguments have identical interpretation. #' #' @author Stef van Buuren #' @seealso \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{densityplot}}, #' \code{\link{stripplot}}, \code{\link{lattice}} for an overview of the #' package, as well as \code{\link[lattice:xyplot]{bwplot}}, #' \code{\link[lattice:panel.xyplot]{panel.bwplot}}, #' \code{\link[lattice:print.trellis]{print.trellis}}, #' \code{\link[lattice:trellis.par.get]{trellis.par.set}} #' @references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #' Visualization with R}, Springer. #' #' van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @keywords hplot #' @examples #' #' imp <- mice(boys, maxit = 1) #' #' ### box-and-whisker plot per imputation of all numerical variables #' bwplot(imp) #' #' ### tv (testicular volume), conditional on region #' bwplot(imp, tv ~ .imp | reg) #' #' ### same data, organized in a different way #' bwplot(imp, tv ~ reg | .imp, theme = list()) #' @export bwplot.mids <- function(x, data, na.groups = NULL, groups = NULL, as.table = TRUE, theme = mice.theme(), mayreplicate = TRUE, allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), ..., subscripts = TRUE, subset = TRUE) { call <- match.call() if (!is.mids(x)) stop("Argument 'x' must be a 'mids' object") ## unpack data and response indicator cd <- data.frame(complete(x, "long", include = TRUE)) cd$.imp <- as.factor(cd$.imp) r <- as.data.frame(is.na(x$data)) ## evaluate na.group in response indicator nagp <- eval(expr = substitute(na.groups), envir = r, enclos = parent.frame()) if (is.expression(nagp)) nagp <- eval(expr = nagp, envir = r, enclos = parent.frame()) ## evaluate groups in imputed data ngp <- eval(expr = substitute(groups), envir = cd, enclos = parent.frame()) if (is.expression(ngp)) ngp <- eval(expr = ngp, envir = cd, enclos = parent.frame()) groups <- ngp ## evaluate subset in imputed data ss <- eval(expr = substitute(subset), envir = cd, enclos = parent.frame()) if (is.expression(ss)) ss <- eval(expr = ss, envir = cd, enclos = parent.frame()) subset <- ss ## evaluate further arguments before parsing dots <- list(...) args <- list( allow.multiple = allow.multiple, outer = outer, drop.unused.levels = drop.unused.levels, subscripts = subscripts, as.table = as.table ) ## create formula if not given (in call$data !) vnames <- names(cd)[-seq_len(2)] allfactors <- unlist(lapply(cd, is.factor))[-seq_len(2)] if (missing(data)) { vnames <- vnames[!allfactors] formula <- as.formula(paste(paste(vnames, collapse = "+", sep = ""), "~.imp", sep = "")) } else { ## pad abbreviated formula abbrev <- length(grep("~", call$data)) == 0 if (abbrev) { if (length(call$data) > 1) { stop("Cannot pad extended formula.") } else { formula <- as.formula(paste(call$data, "~.imp", sep = "")) } } else { formula <- data } } ## determine the y-variables form <- lattice::latticeParseFormula( model = formula, data = cd, subset = subset, groups = groups, multiple = allow.multiple, outer = outer, subscripts = TRUE, drop = drop.unused.levels ) ynames <- unlist(lapply(strsplit(form$left.name, " \\+ "), rm.whitespace)) xnames <- unlist(lapply(strsplit(form$right.name, " \\+ "), rm.whitespace)) ## groups is not useful in bwplot ## in order to force subgroup analysis, ## make the observed data in .imp>0 missing data nona <- is.null(call$na.groups) if (!is.null(call$groups) & nona) { gp <- call$groups } else { if (nona) { for (i in seq_along(ynames)) { yvar <- ynames[i] select <- cd$.imp != 0 & !r[, yvar] cd[select, yvar] <- NA } } else { for (i in seq_along(ynames)) { yvar <- ynames[i] select <- cd$.imp != 0 & !nagp cd[select, yvar] <- NA } } } ## replicate color 2 if .imp is part of xnames mustreplicate <- !is.na(match(".imp", xnames)) & mayreplicate if (mustreplicate) { theme$box.dot$col <- rep(theme$box.dot$col[1:2], c(1, x$m)) theme$box.rectangle$col <- rep(theme$box.rectangle$col[1:2], c(1, x$m)) theme$box.umbrella$col <- rep(theme$box.rectangle$col[1:2], c(1, x$m)) theme$plot.symbol$col <- mdc(3) theme$plot.symbol$pch <- 1 } ## change axis defaults of extended formula interface if (is.null(call$xlab) && !is.na(match(".imp", xnames))) { dots$xlab <- "" if (length(xnames) == 1) dots$xlab <- "Imputation number" } if (is.null(call$ylab)) { args$ylab <- "" if (length(ynames) == 1) args$ylab <- ynames } if (is.null(call$scales)) { args$scales <- list() if (length(ynames) > 1) { args$scales <- list(x = list(relation = "free"), y = list(relation = "free")) } } ## ready args <- c( x = formula, data = list(cd), groups = list(groups), args, dots, subset = call$subset ) ## go tp <- do.call("bwplot", args) update(tp, par.settings = theme) } mice/R/sampler.R0000644000176200001440000001700114347612445013167 0ustar liggesusers# The sampler controls the actual Gibbs sampling iteration scheme. # This function is called by mice and mice.mids sampler <- function(data, m, ignore, where, imp, blocks, method, visitSequence, predictorMatrix, formulas, blots, post, fromto, printFlag, ...) { from <- fromto[1] to <- fromto[2] maxit <- to - from + 1 r <- !is.na(data) # set up array for convergence checking chainMean <- chainVar <- initialize.chain(blocks, maxit, m) ## THE MAIN LOOP: GIBBS SAMPLER ## if (maxit < 1) iteration <- 0 if (maxit >= 1) { if (printFlag) { cat("\n iter imp variable") } for (k in from:to) { # begin k loop : main iteration loop iteration <- k for (i in seq_len(m)) { # begin i loop: repeated imputation loop if (printFlag) { cat("\n ", iteration, " ", i) } # prepare the i'th imputation # do not overwrite any observed data for (h in visitSequence) { for (j in blocks[[h]]) { y <- data[, j] ry <- r[, j] wy <- where[, j] data[(!ry) & wy, j] <- imp[[j]][(!ry)[wy], i] } } # impute block-by-block for (h in visitSequence) { ct <- attr(blocks, "calltype") calltype <- ifelse(length(ct) == 1, ct[1], ct[h]) b <- blocks[[h]] if (calltype == "formula") ff <- formulas[[h]] else ff <- NULL type <- predictorMatrix[h, ] user <- blots[[h]] # univariate/multivariate logic theMethod <- method[h] empt <- theMethod == "" univ <- !empt && !is.passive(theMethod) && !handles.format(paste0("mice.impute.", theMethod)) mult <- !empt && !is.passive(theMethod) && handles.format(paste0("mice.impute.", theMethod)) pass <- !empt && is.passive(theMethod) && length(blocks[[h]]) == 1 if (printFlag & !empt) cat(" ", b) ## store current state oldstate <- get("state", pos = parent.frame()) newstate <- list( it = k, im = i, dep = h, meth = theMethod, log = oldstate$log ) assign("state", newstate, pos = parent.frame(), inherits = TRUE) # (repeated) univariate imputation - type method if (univ) { for (j in b) { imp[[j]][, i] <- sampler.univ( data = data, r = r, where = where, type = type, formula = ff, method = theMethod, yname = j, k = k, calltype = calltype, user = user, ignore = ignore, ... ) data[(!r[, j]) & where[, j], j] <- imp[[j]][(!r[, j])[where[, j]], i] # optional post-processing cmd <- post[j] if (cmd != "") { eval(parse(text = cmd)) data[(!r[, j]) & where[, j], j] <- imp[[j]][(!r[, j])[where[, j]], i] } } } # multivariate imputation - type and formula if (mult) { mis <- !r mis[, setdiff(colnames(data), b)] <- FALSE data[mis] <- NA fm <- paste("mice.impute", theMethod, sep = ".") if (calltype == "formula") { imputes <- do.call(fm, args = list( data = data, formula = ff, ... )) } else if (calltype == "type") { imputes <- do.call(fm, args = list( data = data, type = type, ... )) } else { stop("Cannot call function of type ", calltype, call. = FALSE ) } if (is.null(imputes)) { stop("No imputations from ", theMethod, h, call. = FALSE ) } for (j in names(imputes)) { imp[[j]][, i] <- imputes[[j]] data[!r[, j], j] <- imp[[j]][, i] } } # passive imputation # applies to all rows, so no ignore needed if (pass) { for (j in b) { wy <- where[, j] ry <- r[, j] imp[[j]][, i] <- model.frame(as.formula(theMethod), data[wy, ], na.action = na.pass ) data[(!ry) & wy, j] <- imp[[j]][(!ry)[wy], i] } } } # end h loop (blocks) } # end i loop (imputation number) # store means and sd of m imputes k2 <- k - from + 1L if (length(visitSequence) > 0L) { for (h in visitSequence) { for (j in blocks[[h]]) { if (!is.factor(data[, j])) { chainVar[j, k2, ] <- apply(imp[[j]], 2L, var, na.rm = TRUE) chainMean[j, k2, ] <- colMeans(as.matrix(imp[[j]]), na.rm = TRUE) } if (is.factor(data[, j])) { for (mm in seq_len(m)) { nc <- as.integer(factor(imp[[j]][, mm], levels = levels(data[, j]))) chainVar[j, k2, mm] <- var(nc, na.rm = TRUE) chainMean[j, k2, mm] <- mean(nc, na.rm = TRUE) } } } } } } # end main iteration if (printFlag) { r <- get("loggedEvents", parent.frame(1)) ridge.used <- any(grepl("A ridge penalty", r$out)) if (ridge.used) { cat("\n * Please inspect the loggedEvents \n") } else { cat("\n") } } } list(iteration = maxit, imp = imp, chainMean = chainMean, chainVar = chainVar) } sampler.univ <- function(data, r, where, type, formula, method, yname, k, calltype = "type", user, ignore, ...) { j <- yname[1L] if (calltype == "type") { vars <- colnames(data)[type != 0] pred <- setdiff(vars, j) if (length(pred) > 0L) { formula <- reformulate(pred, response = j) formula <- update(formula, ". ~ . ") } else { formula <- as.formula(paste0(j, " ~ 1")) } } if (calltype == "formula") { # move terms other than j from lhs to rhs ymove <- setdiff(lhs(formula), j) formula <- update(formula, paste(j, " ~ . ")) if (length(ymove) > 0L) { formula <- update(formula, paste("~ . + ", paste(ymove, collapse = "+"))) } } # get the model matrix x <- obtain.design(data, formula) # expand type vector to model matrix, remove intercept if (calltype == "type") { type <- type[labels(terms(formula))][attr(x, "assign")] x <- x[, -1L, drop = FALSE] names(type) <- colnames(x) } if (calltype == "formula") { x <- x[, -1L, drop = FALSE] type <- rep(1L, length = ncol(x)) names(type) <- colnames(x) } # define y, ry and wy y <- data[, j] ry <- complete.cases(x, y) & r[, j] & !ignore wy <- complete.cases(x) & where[, j] # nothing to impute if (all(!wy)) { return(numeric(0)) } cc <- wy[where[, j]] if (k == 1L) check.df(x, y, ry) # remove linear dependencies keep <- remove.lindep(x, y, ry, ...) x <- x[, keep, drop = FALSE] type <- type[keep] if (ncol(x) != length(type)) { stop("Internal error: length(type) != number of predictors") } # here we go f <- paste("mice.impute", method, sep = ".") imputes <- data[wy, j] imputes[!cc] <- NA args <- c(list(y = y, ry = ry, x = x, wy = wy, type = type), user, list(...)) imputes[cc] <- do.call(f, args = args) imputes } mice/R/tidiers.R0000644000176200001440000000455014330035710013156 0ustar liggesusers#' @importFrom generics tidy #' @export generics::tidy #' @importFrom generics glance #' @export generics::glance #' Tidy method to extract results from a `mipo` object #' #' @param x An object of class \code{mipo} #' @param conf.int Logical. Should confidence intervals be returned? #' @param conf.level Confidence level for intervals. Defaults to .95 #' @param ... extra arguments (not used) #' @export #' @keywords internal #' @return A dataframe withh these columns: #' \itemize{ #' \item term #' \item estimate #' \item ubar #' \item b #' \item t #' \item dfcom #' \item df #' \item riv #' \item lambda #' \item fmi #' \item p.value #' \item conf.low (if called with conf.int = TRUE) #' \item conf.high (if called with conf.int = TRUE) #' } tidy.mipo <- function(x, conf.int = FALSE, conf.level = .95, ...) { out <- summary(x, type = "all", conf.int = conf.int, conf.level = conf.level, ... ) if ("term" %in% names(out)) out$term <- as.character(out$term) if ("contrast" %in% names(out)) out$contrast <- as.character(out$contrast) # needed for broom <= 0.5.6 # rename variables if present idx <- grepl("%", names(out)) names(out)[idx] <- c("conf.low", "conf.high") idx <- names(out) == "t" names(out)[idx] <- "statistic" # order columns cols_a <- c( "term", "estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high" ) cols_a <- base::intersect(cols_a, colnames(out)) cols_b <- sort(base::setdiff(colnames(out), cols_a)) out[, c(cols_a, cols_b)] } #' Glance method to extract information from a `mipo` object #' #' @param x An object with multiply-imputed models from `mice` (class: `mipo`) #' @param ... extra arguments (not used) #' @return a dataframe with one row and the following columns: #' \itemize{ #' \item nimp #' \item nobs #' } #' @note If x contains `lm` models, R2 and Adj.R2 are included in the output #' @export #' @keywords internal #' @family tidiers glance.mipo <- function(x, ...) { out <- data.frame(nimp = nrow(x$glanced)) out$nobs <- tryCatch(x$glanced$nobs[1], error = function(e) NULL ) # R2 in lm models out$r.squared <- tryCatch(pool.r.squared(x, adjusted = FALSE)[1], error = function(e) NULL ) out$adj.r.squared <- tryCatch(pool.r.squared(x, adjusted = TRUE)[1], error = function(e) NULL ) out } mice/R/initialize.chain.R0000644000176200001440000000035614330031606014735 0ustar liggesusersinitialize.chain <- function(blocks, maxit, m) { vars <- unique(unlist(blocks)) chain <- array(NA, dim = c(length(vars), maxit, m)) dimnames(chain) <- list( vars, seq_len(maxit), paste("Chain", seq_len(m)) ) chain } mice/R/mice.impute.2l.lmer.R0000644000176200001440000001424314334522175015216 0ustar liggesusers#' Imputation by a two-level normal model using \code{lmer} #' #' Imputes univariate systematically and sporadically missing data using a #' two-level normal model using \code{lme4::lmer()}. #' #' Data are missing systematically if they have not been measured, e.g., in the #' case where we combine data from different sources. Data are missing sporadically #' if they have been partially observed. #' #' While the method is fully Bayesian, it may fix parameters of the #' variance-covariance matrix or the random effects to their estimated #' value in cases where creating draws from the posterior is not #' possible. The procedure throws a warning when this happens. #' #' If \code{lme4::lmer()} fails, the procedure prints the warning #' \code{"lmer does not run. Simplify imputation model"} and returns the #' current imputation. If that happens we see flat lines in the #' trace line plots. Thus, the appearance of flat trace lines should be taken #' as an additional alert to a problem with imputation model fitting. #' @name mice.impute.2l.lmer #' @inheritParams mice.impute.pmm #' @param type Vector of length \code{ncol(x)} identifying random and class #' variables. Random variables are identified by a '2'. The class variable #' (only one is allowed) is coded as '-2'. Fixed effects are indicated by #' a '1'. #' @param intercept Logical determining whether the intercept is automatically #' added. #' @param \dots Arguments passed down to \code{lmer} #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Shahab Jolani, 2017 #' @references #' Jolani S. (2017) Hierarchical imputation of systematically and #' sporadically missing data: An approximate Bayesian approach using #' chained equations. Forthcoming. #' #' Jolani S., Debray T.P.A., Koffijberg H., van Buuren S., Moons K.G.M. (2015). #' Imputation of systematically missing predictors in an individual #' participant data meta-analysis: a generalized approach using MICE. #' \emph{Statistics in Medicine}, 34:1841-1863. #' #' Van Buuren, S. (2011) Multiple imputation of multilevel data. In Hox, J.J. #' and and Roberts, J.K. (Eds.), \emph{The Handbook of Advanced Multilevel #' Analysis}, Chapter 10, pp. 173--196. Milton Park, UK: Routledge. #' @family univariate-2l #' @keywords datagen #' @export mice.impute.2l.lmer <- function(y, ry, x, type, wy = NULL, intercept = TRUE, ...) { install.on.demand("lme4", ...) if (is.null(wy)) wy <- !ry if (intercept) { x <- cbind(1, as.matrix(x)) type <- c(2, type) names(type)[1] <- colnames(x)[1] <- "(Intercept)" } clust <- names(type[type == -2]) rande <- names(type[type == 2]) fixe <- names(type[type > 0]) lev <- unique(x[, clust]) X <- x[, fixe, drop = FALSE] Z <- x[, rande, drop = FALSE] xobs <- x[ry, , drop = FALSE] yobs <- y[ry] Xobs <- X[ry, , drop = FALSE] Zobs <- Z[ry, , drop = FALSE] # create formula fr <- ifelse(length(rande) > 1, paste("+ ( 1 +", paste(rande[-1L], collapse = "+")), "+ ( 1 " ) randmodel <- paste( "yobs ~ ", paste(fixe[-1L], collapse = "+"), fr, "|", clust, ")" ) suppressWarnings(fit <- try( lme4::lmer(formula(randmodel), data = data.frame(yobs, xobs), ... ), silent = TRUE )) if (inherits(fit, "try-error")) { warning("lmer does not run. Simplify imputation model") return(y[wy]) } # taken from lme4 sigma <- function(object, ...) { dc <- object@devcomp dd <- dc$dims if (dd[["useSc"]]) { dc$cmp[[if (dd[["REML"]]) { "sigmaREML" } else { "sigmaML" }]] } else { 1 } } # draw sigma* sigmahat <- sigma(fit) df <- nrow(fit@frame) - length(fit@beta) sigma2star <- df * sigmahat^2 / rchisq(1, df) # draw beta* beta <- lme4::fixef(fit) RX <- lme4::getME(fit, "RX") # cov-matrix, i.e., vcov(fit) covmat <- sigma2star * chol2inv(RX) rv <- t(chol(covmat)) beta.star <- beta + rv %*% rnorm(ncol(rv)) # draw psi* # applying the standard Wishart prior rancoef <- as.matrix(lme4::ranef(fit)[[1]]) lambda <- t(rancoef) %*% rancoef df.psi <- nrow(rancoef) temp.psi.star <- stats::rWishart(1, df.psi, diag(nrow(lambda)))[, , 1] temp <- MASS::ginv(lambda) ev <- eigen(temp) if (sum(ev$values > 0) == length(ev$values)) { deco <- ev$vectors %*% diag(sqrt(ev$values), nrow = length(ev$values)) psi.star <- MASS::ginv(deco %*% temp.psi.star %*% t(deco)) } else { try(temp.svd <- svd(lambda)) if (!inherits(temp.svd, "try-error")) { deco <- temp.svd$u %*% diag(sqrt(temp.svd$d), nrow = length(temp.svd$d)) psi.star <- MASS::ginv(deco %*% temp.psi.star %*% t(deco)) } else { psi.star <- temp warning("psi fixed to estimate") } } # Calculate myi, vyi and drawing bi per cluster for (jj in lev) { if (jj %in% unique(xobs[, clust])) { Xi <- Xobs[xobs[, clust] == jj, ] Zi <- as.matrix(Zobs[xobs[, clust] == jj, ]) yi <- yobs[xobs[, clust] == jj] sigma2 <- diag(sigma2star, nrow = nrow(Zi)) Mi <- psi.star %*% t(Zi) %*% MASS::ginv(Zi %*% psi.star %*% t(Zi) + sigma2) myi <- Mi %*% (yi - Xi %*% beta.star) vyi <- psi.star - Mi %*% Zi %*% psi.star } else { myi <- matrix(0, nrow = nrow(psi.star), ncol = 1) vyi <- psi.star } vyi <- vyi - upper.tri(vyi) * vyi + t(lower.tri(vyi) * vyi) # generating bi.star using eigenvalues deco1 <- eigen(vyi) if (sum(deco1$values > 0) == length(deco1$values)) { A <- deco1$vectors %*% sqrt(diag(deco1$values, nrow = length(deco1$values))) bi.star <- myi + A %*% rnorm(length(myi)) } else { # generating bi.star using svd try(deco1 <- svd(vyi)) if (!inherits(deco1, "try-error")) { A <- deco1$u %*% sqrt(diag(deco1$d, nrow = length(deco1$d))) bi.star <- myi + A %*% rnorm(length(myi)) } else { bi.star <- myi warning("b_", jj, " fixed to estimate") } } # imputation y[wy & x[, clust] == jj] <- as.vector( as.matrix(X[wy & x[, clust] == jj, , drop = FALSE]) %*% beta.star + as.matrix(Z[wy & x[, clust] == jj, , drop = FALSE]) %*% as.matrix(bi.star) + rnorm(sum(wy & x[, clust] == jj)) * sqrt(sigma2star) ) } y[wy] } mice/R/mice.impute.rf.R0000644000176200001440000001125714436640532014355 0ustar liggesusers#' Imputation by random forests #' #' Imputes univariate missing data using random forests. #' #' @aliases mice.impute.rf #' @inheritParams mice.impute.pmm #' @param ntree The number of trees to grow. The default is 10. #' @param rfPackage A single string specifying the backend for estimating the #' random forest. The default backend is the \code{ranger} package. The only #' alternative currently implemented is the \code{randomForest} package, which #' used to be the default in mice 3.13.10 and earlier. #' @param \dots Other named arguments passed down to #' \code{mice:::install.on.demand()}, \code{randomForest::randomForest()} and #' \code{randomForest:::randomForest.default()}. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' Imputation of \code{y} by random forests. The method #' calls \code{randomForrest()} which implements Breiman's random forest #' algorithm (based on Breiman and Cutler's original Fortran code) #' for classification and regression. See Appendix A.1 of Doove et al. #' (2014) for the definition of the algorithm used. #' @note An alternative implementation was independently #' developed by Shah et al (2014). This were available as #' functions \code{CALIBERrfimpute::mice.impute.rfcat} and #' \code{CALIBERrfimpute::mice.impute.rfcont} (now archived). #' Simulations by Shah (Feb 13, 2014) suggested that #' the quality of the imputation for 10 and 100 trees was identical, #' so mice 2.22 changed the default number of trees from \code{ntree = 100} to #' \code{ntree = 10}. #' @author Lisa Doove, Stef van Buuren, Elise Dusseldorp, 2012; Patrick Rockenschaub, 2021 #' @references #' #' Doove, L.L., van Buuren, S., Dusseldorp, E. (2014), Recursive partitioning #' for missing data imputation in the presence of interaction Effects. #' Computational Statistics & Data Analysis, 72, 92-104. #' #' Shah, A.D., Bartlett, J.W., Carpenter, J., Nicholas, O., Hemingway, H. (2014), #' Comparison of random forest and parametric imputation models for #' imputing missing data using MICE: A CALIBER study. American Journal #' of Epidemiology, \doi{10.1093/aje/kwt312}. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-cart.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @seealso \code{\link{mice}}, \code{\link{mice.impute.cart}}, #' \code{\link[randomForest]{randomForest}} #' \code{\link[ranger]{ranger}} #' @family univariate imputation functions #' @keywords datagen #' @examples #' \dontrun{ #' imp <- mice(nhanes2, meth = "rf", ntree = 3) #' plot(imp) #' } #' @export mice.impute.rf <- function(y, ry, x, wy = NULL, ntree = 10, rfPackage = c("ranger", "randomForest"), ...) { rfPackage <- match.arg(rfPackage) if (is.null(wy)) wy <- !ry ntree <- max(1, ntree) # safety nmis <- sum(wy) xobs <- x[ry, , drop = FALSE] xmis <- x[wy, , drop = FALSE] yobs <- y[ry] # Find eligible donors f <- switch(rfPackage, randomForest = .randomForest.donors, ranger = .ranger.donors ) forest <- f(xobs, xmis, yobs, ntree, ...) # Sample from donors if (nmis == 1) forest <- array(forest, dim = c(1, ntree)) apply(forest, MARGIN = 1, FUN = function(s) sample(unlist(s), 1)) } # Find eligible donors using the randomForest package (default) .randomForest.donors <- function(xobs, xmis, yobs, ntree, ...) { install.on.demand("randomForest", ...) onetree <- function(xobs, xmis, yobs, ...) { # Function to fit a single tree fit <- randomForest::randomForest( x = xobs, y = yobs, ntree = 1, ... ) leafnr <- predict(object = fit, newdata = xobs, nodes = TRUE) leafnr <- as.vector(attr(leafnr, "nodes")) nodes <- predict(object = fit, newdata = xmis, nodes = TRUE) nodes <- as.vector(attr(nodes, "nodes")) donor <- lapply(nodes, function(s) yobs[leafnr == s]) return(donor) } sapply(seq_len(ntree), FUN = function(s) onetree(xobs, xmis, yobs, ...)) } # Find eligible donors using the ranger package .ranger.donors <- function(xobs, xmis, yobs, ntree, ...) { install.on.demand("ranger", ...) # Fit all trees at once fit <- ranger::ranger(x = xobs, y = yobs, num.trees = ntree) nodes <- predict( object = fit, data = rbind(xobs, xmis), type = "terminalNodes", predict.all = TRUE ) nodes <- ranger::predictions(nodes) nodes_obs <- nodes[1:nrow(xobs), , drop = FALSE] nodes_mis <- nodes[(nrow(xobs) + 1):nrow(nodes), , drop = FALSE] select_donors <- function(i) { # Function to extract all eligible donors for each missing value donors <- split(yobs, nodes_obs[, i]) donors[as.character(nodes_mis[, i])] } sapply(seq_len(ntree), FUN = select_donors) } mice/R/walking.R0000644000176200001440000000457014330031606013151 0ustar liggesusers#' Walking disability data #' #' Two items YA and YB measuring walking disability in samples A, B and E. #' #' Example dataset to demonstrate imputation of two items (YA and YB). Item YA #' is administered to sample A and sample E, item YB is administered to sample B #' and sample E, so sample E acts as a bridge study. Imputation using a bridge #' study is better than simple equating or than imputation under independence. #' #' Item YA corresponds to the HAQ8 item, and item YB corresponds to the GAR9 #' items from Van Buuren et al (2005). Sample E (as well as sample B) is the #' Euridiss study (n=292), sample A is the ERGOPLUS study (n=306). #' #' See Van Buuren (2018) section 9.4 for more details on the imputation #' methodology. #' #' @name walking #' @aliases walking #' @docType data #' @format A data frame with 890 rows on the following 5 variables: #' \describe{ #' \item{sex}{Sex of respondent (factor)} #' \item{age}{Age of respondent} #' \item{YA}{Item administered in samples A and E (factor)} #' \item{YB}{Item administered in samples B and E (factor)} #' \item{src}{Source: Sample A, B or E (factor)} #' } #' @references van Buuren, S., Eyres, S., Tennant, A., Hopman-Rock, M. (2005). #' Improving comparability of existing data by Response Conversion. #' \emph{Journal of Official Statistics}, \bold{21}(1), 53-72. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-codingsystems.html#sec:impbridge}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @keywords datasets #' @examples #' md.pattern(walking) #' #' micemill <- function(n) { #' for (i in 1:n) { #' imp <<- mice.mids(imp) # global assignment #' cors <- with(imp, cor(as.numeric(YA), #' as.numeric(YB), #' method = "kendall" #' )) #' tau <<- rbind(tau, getfit(cors, s = TRUE)) # global assignment #' } #' } #' #' plotit <- function() { #' matplot( #' x = 1:nrow(tau), y = tau, #' ylab = expression(paste("Kendall's ", tau)), #' xlab = "Iteration", type = "l", lwd = 1, #' lty = 1:10, col = "black" #' ) #' } #' #' tau <- NULL #' imp <- mice(walking, max = 0, m = 10, seed = 92786) #' pred <- imp$pred #' pred[, c("src", "age", "sex")] <- 0 #' imp <- mice(walking, max = 0, m = 3, seed = 92786, pred = pred) #' micemill(5) #' plotit() #' #' ### to get figure 9.8 van Buuren (2018) use m=10 and micemill(20) NULL mice/R/mice.impute.lasso.logreg.R0000644000176200001440000000470414330031647016336 0ustar liggesusers#' Imputation by direct use of lasso logistic regression #' #' Imputes univariate missing binary data using lasso logistic regression with bootstrap. #' #' @aliases mice.impute.lasso.logreg lasso.logreg #' @inheritParams mice.impute.pmm #' @param nfolds The number of folds for the cross-validation of the lasso penalty. #' The default is 10. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' The method consists of the following steps: #' \enumerate{ #' \item For a given y variable under imputation, draw a bootstrap version y* #' with replacement from the observed cases \code{y[ry]}, and stores in x* the #' corresponding values from \code{x[ry, ]}. #' \item Fit a regularised (lasso) logistic regression with y* as the outcome, #' and x* as predictors. #' A vector of regression coefficients bhat is obtained. #' All of these coefficients are considered random draws from the imputation model #' parameters posterior distribution. #' Same of these coefficients will be shrunken to 0. #' \item Compute predicted scores for m.d., i.e. logit-1(X bhat) #' \item Compare the score to a random (0,1) deviate, and impute. #' } #' The method is based on the Direct Use of Regularized Regression (DURR) proposed by #' Zhao & Long (2016) and Deng et al (2016). #' @author Edoardo Costantini, 2021 #' @references #' #' Deng, Y., Chang, C., Ido, M. S., & Long, Q. (2016). Multiple imputation for #' general missing data patterns in the presence of high-dimensional data. #' Scientific reports, 6(1), 1-10. #' #' Zhao, Y., & Long, Q. (2016). Multiple imputation in the presence of #' high-dimensional data. Statistical Methods in Medical Research, 25(5), #' 2021-2035. #' #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.lasso.logreg <- function(y, ry, x, wy = NULL, nfolds = 10, ...) { install.on.demand("glmnet", ...) if (is.null(wy)) wy <- !ry # Bootstrap sample n1 <- sum(ry) s <- sample(n1, n1, replace = TRUE) x_glmnet <- cbind(1, x) dotxobs <- x_glmnet[ry, , drop = FALSE][s, , drop = FALSE] dotyobs <- y[ry][s] # Train imputation model cv_lasso <- glmnet::cv.glmnet( x = dotxobs, y = dotyobs, family = "binomial", nfolds = nfolds, alpha = 1 ) # Obtain imputation p <- 1 / (1 + exp(-predict(cv_lasso, x_glmnet[wy, ], s = "lambda.min"))) vec <- (runif(nrow(p)) <= p) vec[vec] <- 1 if (is.factor(y)) { vec <- factor(vec, c(0, 1), levels(y)) } vec } mice/R/mice.impute.2lonly.norm.R0000644000176200001440000001205414330031606016120 0ustar liggesusers#' Imputation at level 2 by Bayesian linear regression #' #' Imputes univariate missing data at level 2 using Bayesian linear regression #' analysis. Variables are level 1 are aggregated at level 2. The group #' identifier at level 2 must be indicated by \code{type = -2} in the #' \code{predictorMatrix}. #' #' @aliases 2lonly.norm #' @inheritParams mice.impute.pmm #' @param type Group identifier must be specified by '-2'. Predictors must be #' specified by '1'. #' @param ... Other named arguments. #' @return A vector of length \code{nmis} with imputations. #' @author Alexander Robitzsch (IPN - Leibniz Institute for Science and #' Mathematics Education, Kiel, Germany), \email{robitzsch@@ipn.uni-kiel.de} #' @seealso \code{\link{mice.impute.norm}}, #' \code{\link{mice.impute.2lonly.pmm}}, \code{\link{mice.impute.2l.pan}}, #' \code{\link{mice.impute.2lonly.mean}} #' @details #' This function allows in combination with \code{\link{mice.impute.2l.pan}} #' switching regression imputation between level 1 and level 2 as described in #' Yucel (2008) or Gelman and Hill (2007, p. 541). #' #' The function checks for partial missing level-2 data. Level-2 data #' are assumed to be constant within the same cluster. If one or more #' entries are missing, then the procedure aborts with an error #' message that identifies the cluster with incomplete level-2 data. #' In such cases, one may first fill in the cluster mean (or mode) by #' the \code{2lonly.mean} method to remove inconsistencies. #' #' @references Gelman, A. and Hill, J. (2007). \emph{Data analysis using #' regression and multilevel/hierarchical models}. Cambridge, Cambridge #' University Press. #' #' Yucel, RM (2008). Multiple imputation inference for multivariate multilevel #' continuous data with ignorable non-response. \emph{Philosophical #' Transactions of the Royal Society A}, \bold{366}, 2389-2404. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-level2pred.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @family univariate-2lonly #' @note #' For a more general approach, see #' \code{miceadds::mice.impute.2lonly.function()}. #' @examples #' # simulate some data #' # x,y ... level 1 variables #' # v,w ... level 2 variables #' #' G <- 250 # number of groups #' n <- 20 # number of persons #' beta <- .3 # regression coefficient #' rho <- .30 # residual intraclass correlation #' rho.miss <- .10 # correlation with missing response #' missrate <- .50 # missing proportion #' y1 <- rep(rnorm(G, sd = sqrt(rho)), each = n) + rnorm(G * n, sd = sqrt(1 - rho)) #' w <- rep(round(rnorm(G), 2), each = n) #' v <- rep(round(runif(G, 0, 3)), each = n) #' x <- rnorm(G * n) #' y <- y1 + beta * x + .2 * w + .1 * v #' dfr0 <- dfr <- data.frame("group" = rep(1:G, each = n), "x" = x, "y" = y, "w" = w, "v" = v) #' dfr[rho.miss * x + rnorm(G * n, sd = sqrt(1 - rho.miss)) < qnorm(missrate), "y"] <- NA #' dfr[rep(rnorm(G), each = n) < qnorm(missrate), "w"] <- NA #' dfr[rep(rnorm(G), each = n) < qnorm(missrate), "v"] <- NA #' #' # empty mice imputation #' imp0 <- mice(as.matrix(dfr), maxit = 0) #' predM <- imp0$predictorMatrix #' impM <- imp0$method #' #' # multilevel imputation #' predM1 <- predM #' predM1[c("w", "y", "v"), "group"] <- -2 #' predM1["y", "x"] <- 1 # fixed x effects imputation #' impM1 <- impM #' impM1[c("y", "w", "v")] <- c("2l.pan", "2lonly.norm", "2lonly.pmm") #' #' # y ... imputation using pan #' # w ... imputation at level 2 using norm #' # v ... imputation at level 2 using pmm #' #' imp1 <- mice(as.matrix(dfr), #' m = 1, predictorMatrix = predM1, #' method = impM1, maxit = 1, paniter = 500 #' ) #' #' # Demonstration that 2lonly.norm aborts for partial missing data. #' # Better use 2lonly.mean for repair. #' data <- data.frame( #' patid = rep(1:4, each = 5), #' sex = rep(c(1, 2, 1, 2), each = 5), #' crp = c( #' 68, 78, 93, NA, 143, #' 5, 7, 9, 13, NA, #' 97, NA, 56, 52, 34, #' 22, 30, NA, NA, 45 #' ) #' ) #' pred <- make.predictorMatrix(data) #' pred[, "patid"] <- -2 #' # only missing value (out of five) for patid == 1 #' data[3, "sex"] <- NA #' \dontrun{ #' # The following fails because 2lonly.norm found partially missing #' # level-2 data #' # imp <- mice(data, method = c("", "2lonly.norm", "2l.pan"), #' # predictorMatrix = pred, maxit = 1, m = 2) #' # > iter imp variable #' # > 1 1 sex crpError in .imputation.level2(y = y, ... : #' # > Method 2lonly.norm found the following clusters with partially missing #' # > level-2 data: 1 #' # > Method 2lonly.mean can fix such inconsistencies. #' } #' #' # In contrast, if all sex values are missing for patid == 1, it runs fine, #' # except on r-patched-solaris-x86. I used dontrun to evade CRAN errors. #' \dontrun{ #' data[1:5, "sex"] <- NA #' imp <- mice(data, #' method = c("", "2lonly.norm", "2l.pan"), #' predictorMatrix = pred, maxit = 1, m = 2 #' ) #' } #' @export mice.impute.2lonly.norm <- function(y, ry, x, type, wy = NULL, ...) { imp <- .imputation.level2( y = y, ry = ry, x = x, type = type, wy = wy, method = "norm", ... ) imp } mice/R/summary.R0000644000176200001440000000552014334522175013220 0ustar liggesusers#' Summary of a \code{mira} object #' #' @rdname summary #' @param object A \code{mira} object #' @param type A length-1 character vector indicating the #' type of summary. There are three choices: \code{type = "tidy"} #' return the parameters estimates of each analyses as a data frame. #' \code{type = "glance"} return the fit statistics of each analysis #' as a data frame. \code{type = "summary"} returns a list of #' length \code{m} with the analysis results. The default is #' \code{"tidy"}. #' @param ... Other parameters passed down to \code{print()} and \code{summary()} #' @return \code{NULL} #' @seealso \code{\link[=mira-class]{mira}} #' @method summary mira #' @export summary.mira <- function(object, type = c("tidy", "glance", "summary"), ...) { type <- match.arg(type) fitlist <- getfit(object) if (type == "tidy") { v <- lapply(fitlist, tidy, effects = "fixed", parametric = TRUE, ...) %>% bind_rows() } if (type == "glance") { v <- lapply(fitlist, glance, ...) %>% bind_rows() } # nobs is needed for pool.r.squared # not supplied by broom <= 0.5.6 if (!"nobs" %in% colnames(v)) { v$nobs <- tryCatch(length(stats::residuals(getfit(object)[[1]])), error = function(e) NULL ) } if (type == "summary") { v <- lapply(fitlist, summary, ...) } v } #' Summary of a \code{mids} object #' #' @rdname summary #' @return \code{NULL} #' @seealso \code{\link[=mids-class]{mids}} #' @method summary mids #' @export summary.mids <- function(object, ...) { print(object, ...) invisible(object) } #' Summary of a \code{mads} object #' #' @rdname summary #' @return \code{NULL} #' @seealso \code{\link[=mads-class]{mads}} #' @export summary.mads <- function(object, ...) { print(object, ...) invisible(object) } #' Print a \code{mice.anova} object #' #' @rdname summary #' @return \code{NULL} #' @seealso \code{\link{mipo}} #' @method summary mice.anova #' @export summary.mice.anova <- function(object, ...) { # handle objects from anova out <- object$out # handle objects from D1, D2 and D3 if (is.null(out)) { out <- list(`1 ~~ 2` = list( result = object$result, dfcom = object$dfcom )) } test <- names(out) dfcom <- vapply(out, function(x) x$dfcom, numeric(1)) results <- t(vapply(out, function(x) x$result, numeric(5))) rf <- data.frame( test = test, statistic = results[, 1], df1 = results[, 2], df2 = results[, 3], dfcom = dfcom, p.value = results[, 4], riv = results[, 5], row.names = NULL ) formulas <- object$formulas ff <- data.frame( model = names(formulas), formula = as.character(formulas) ) structure( list( models = ff, comparisons = rf, m = object$m, method = object$method, use = object$use ), class = c("mice.anova.summary", class(object)) ) } mice/R/plot.R0000644000176200001440000001031314330031606012463 0ustar liggesusers#' Plot the trace lines of the MICE algorithm #' #' Trace line plots portray the value of an estimate #' against the iteration number. The estimate can be anything that you can calculate, but #' typically are chosen as parameter of scientific interest. The \code{plot} method for #' a \code{mids} object plots the mean and standard deviation of the imputed (not observed) #' values against the iteration number for each of the $m$ replications. By default, #' the function plot the development of the mean and standard deviation for each incomplete #' variable. On convergence, the streams should intermingle and be free of any trend. #' #' @param x An object of class \code{mids} #' @param y A formula that specifies which variables, stream and iterations are plotted. #' If omitted, all streams, variables and iterations are plotted. #' @param theme The trellis theme to applied to the graphs. The default is \code{mice.theme()}. #' @param layout A vector of length 2 given the number of columns and rows in the plot. #' The default is \code{c(2, 3)}. #' @param type Parameter \code{type} of \code{\link{panel.xyplot}}. #' @param col Parameter \code{col} of \code{\link{panel.xyplot}}. #' @param lty Parameter \code{lty} of \code{\link{panel.xyplot}}. #' @param ... Extra arguments for \code{\link{xyplot}}. #' @return An object of class \code{"trellis"}. #' @author Stef van Buuren 2011 #' @seealso \code{\link{mice}}, \code{\link[=mids-class]{mids}}, #' \code{\link{xyplot}} #' @method plot mids #' @examples #' imp <- mice(nhanes, print = FALSE) #' plot(imp, bmi + chl ~ .it | .ms, layout = c(2, 1)) #' @export plot.mids <- function(x, y = NULL, theme = mice.theme(), layout = c(2, 3), type = "l", col = 1:10, lty = 1, ...) { strip.combined <- function(which.given, which.panel, factor.levels, ...) { if (which.given == 1) { lattice::panel.rect(0, 0, 1, 1, col = theme$strip.background$col, border = 1 ) lattice::panel.text( x = 0, y = 0.5, pos = 4, lab = factor.levels[which.panel[which.given]] ) } if (which.given == 2) { lattice::panel.text( x = 1, y = 0.5, pos = 2, lab = factor.levels[which.panel[which.given]] ) } } call <- match.call() if (!is.mids(x)) { stop("argument 'x' must be a 'mids' object", call. = FALSE) } if (is.null(x$chainMean)) { stop("no convergence diagnostics found", call. = FALSE) } mn <- x$chainMean sm <- sqrt(x$chainVar) # select subset of nonmissing entries obs <- apply(!(is.nan(mn) | is.na(mn)), 1, all) varlist <- names(obs)[obs] ## create formula if not given in y if (missing(y)) { formula <- as.formula(paste0( paste0(varlist, collapse = "+"), "~.it|.ms" )) } else { formula <- NULL if (is.null(y)) { formula <- as.formula(paste0( paste0(varlist, collapse = "+"), "~.it|.ms" )) } if (is.character(y)) { formula <- if (length(y) == 1) { as.formula(paste0(y, "~.it|.ms")) } else { as.formula(paste0(paste0(y, collapse = "+"), "~.it|.ms")) } } if (is.integer(y) || is.logical(y)) { vars <- varlist[y] formula <- if (length(vars) == 1) { as.formula(paste0(vars, "~.it|.ms")) } else { as.formula(paste0(paste0(vars, collapse = "+"), "~.it|.ms")) } } if (is.null(formula)) { formula <- as.formula(y) } } m <- x$m it <- x$iteration mn <- matrix(aperm(mn[varlist, , , drop = FALSE], c(2, 3, 1)), nrow = m * it) sm <- matrix(aperm(sm[varlist, , , drop = FALSE], c(2, 3, 1)), nrow = m * it) adm <- expand.grid(seq_len(it), seq_len(m), c("mean", "sd")) data <- cbind(adm, rbind(mn, sm)) colnames(data) <- c(".it", ".m", ".ms", varlist) ## Dummy to trick R CMD check .m <- NULL rm(.m) tp <- xyplot( x = formula, data = data, groups = .m, type = type, lty = lty, col = col, layout = layout, scales = list( y = list(relation = "free"), x = list(alternating = FALSE) ), as.table = TRUE, xlab = "Iteration", ylab = "", strip = strip.combined, par.strip.text = list(lines = 0.5), ... ) update(tp, par.settings = theme) } mice/R/nimp.R0000644000176200001440000000135614330031606012457 0ustar liggesusers#' Number of imputations per block #' #' Calculates the number of cells within a block for which imputation #' is requested. #' @inheritParams mice #' @return A numeric vector of length \code{length(blocks)} containing #' the number of cells that need to be imputed within a block. #' @seealso \code{\link{mice}} #' @export #' @examples #' where <- is.na(nhanes) #' #' # standard FCS #' nimp(where) #' #' # user-defined blocks #' nimp(where, blocks = name.blocks(list(c("bmi", "hyp"), "age", "chl"))) nimp <- function(where, blocks = make.blocks(where)) { nwhere <- apply(where, 2, sum) nimp <- vector("integer", length = length(blocks)) names(nimp) <- names(blocks) for (i in seq_along(blocks)) nimp[i] <- sum(nwhere[blocks[[i]]]) nimp } mice/R/mice.impute.panImpute.R0000644000176200001440000000660114436064333015704 0ustar liggesusers#' Impute multilevel missing data using \code{pan} #' #' This function is a wrapper around the \code{panImpute} function #' from the \code{mitml} package so that it can be called to #' impute blocks of variables in \code{mice}. The \code{mitml::panImpute} #' function provides an interface to the \code{pan} package for #' multiple imputation of multilevel data (Schafer & Yucel, 2002). #' Imputations can be generated using \code{type} or \code{formula}, #' which offer different options for model specification. #' #' @name mice.impute.panImpute #' @inheritParams mitml::panImpute #' @param data A data frame containing incomplete and auxiliary variables, #' the cluster indicator variable, and any other variables that should be #' present in the imputed datasets. #' @param type An integer vector specifying the role of each variable #' in the imputation model (see \code{\link[mitml]{panImpute}}) #' @param formula A formula specifying the role of each variable #' in the imputation model. The basic model is constructed #' by \code{model.matrix}, thus allowing to include derived variables #' in the imputation model using \code{I()}. See #' \code{\link[mitml]{panImpute}}. #' @param format A character vector specifying the type of object that should #' be returned. The default is \code{format = "list"}. No other formats are #' currently supported. #' @param ... Other named arguments: \code{n.burn}, \code{n.iter}, #' \code{group}, \code{prior}, \code{silent} and others. #' @return A list of imputations for all incomplete variables in the model, #' that can be stored in the the \code{imp} component of the \code{mids} #' object. #' @seealso \code{\link[mitml]{panImpute}} #' @note The number of imputations \code{m} is set to 1, and the function #' is called \code{m} times so that it fits within the \code{mice} #' iteration scheme. #' #' This is a multivariate imputation function using a joint model. #' @author Stef van Buuren, 2018, building on work of Simon Grund, #' Alexander Robitzsch and Oliver Luedtke (authors of \code{mitml} package) #' and Joe Schafer (author of \code{pan} package). #' @references #' Grund S, Luedtke O, Robitzsch A (2016). Multiple #' Imputation of Multilevel Missing Data: An Introduction to the R #' Package \code{pan}. SAGE Open. #' #' Schafer JL (1997). Analysis of Incomplete Multivariate Data. London: #' Chapman & Hall. #' #' Schafer JL, and Yucel RM (2002). Computational strategies for #' multivariate linear mixed-effects models with missing values. #' Journal of Computational and Graphical Statistics, 11, 437-457. #' @family multivariate-2l #' @keywords datagen #' @examples #' blocks <- list(c("bmi", "chl", "hyp"), "age") #' method <- c("panImpute", "pmm") #' ini <- mice(nhanes, blocks = blocks, method = method, maxit = 0) #' pred <- ini$pred #' pred["B1", "hyp"] <- -2 #' imp <- mice(nhanes, blocks = blocks, method = method, pred = pred, maxit = 1) #' @export mice.impute.panImpute <- function(data, formula, type, m = 1, silent = TRUE, format = "imputes", ...) { install.on.demand("mitml", ...) nat <- mitml::panImpute( data = data, formula = formula, type = type, m = m, silent = silent, ... ) if (format == "native") { return(nat) } cmp <- mitml::mitmlComplete(nat, print = 1)[, names(data)] if (format == "complete") { return(cmp) } if (format == "imputes") { return(single2imputes(cmp, is.na(data))) } NULL } mice/R/mice.impute.mean.R0000644000176200001440000000225614330031647014657 0ustar liggesusers#' Imputation by the mean #' #' Imputes the arithmetic mean of the observed data #' #' @inheritParams mice.impute.pmm #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @section Warning: Imputing the mean of a variable is almost never #' appropriate. See Little and Rubin (2002, p. 61-62) or #' Van Buuren (2012, p. 10-11) #' @seealso \code{\link{mice}}, \code{\link{mean}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' #' Little, R.J.A. and Rubin, D.B. (2002). Statistical Analysis with Missing #' Data. New York: John Wiley and Sons. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-simplesolutions.html#sec:meanimp}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.mean <- function(y, ry, x = NULL, wy = NULL, ...) { if (is.null(wy)) { wy <- !ry } rep.int(mean(y[ry]), times = sum(wy)) } mice/R/D1.R0000644000176200001440000000560514436064333011773 0ustar liggesusers#' Compare two nested models using D1-statistic #' #' The D1-statistics is the multivariate Wald test. #' #' @param fit1 An object of class \code{mira}, produced by \code{with()}. #' @param fit0 An object of class \code{mira}, produced by \code{with()}. The #' model in \code{fit0} is a nested within \code{fit1}. The default null #' model \code{fit0 = NULL} compares \code{fit1} to the intercept-only model. #' @param dfcom A single number denoting the #' complete-data degrees of freedom of model \code{fit1}. If not specified, #' it is set equal to \code{df.residual} of model \code{fit1}. If that cannot #' be done, the procedure assumes (perhaps incorrectly) a large sample. #' @param df.com Deprecated #' @note Warning: `D1()` assumes that the order of the variables is the #' same in different models. See #' \url{https://github.com/amices/mice/issues/420} for details. #' @references #' Li, K. H., T. E. Raghunathan, and D. B. Rubin. 1991. #' Large-Sample Significance Levels from Multiply Imputed Data Using #' Moment-Based Statistics and an F Reference Distribution. #' \emph{Journal of the American Statistical Association}, 86(416): 1065–73. #' #' \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:wald} #' @examples #' # Compare two linear models: #' imp <- mice(nhanes2, seed = 51009, print = FALSE) #' mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) #' mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) #' D1(mi1, mi0) #' \dontrun{ #' # Compare two logistic regression models #' imp <- mice(boys, maxit = 2, print = FALSE) #' fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) #' fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) #' D1(fit1, fit0) #' } #' @seealso \code{\link[mitml]{testModels}} #' @export D1 <- function(fit1, fit0 = NULL, dfcom = NULL, df.com = NULL) { install.on.demand("mitml") # legacy handling if (!missing(df.com)) { warning("argument df.com is deprecated; please use dfcom instead.", call. = FALSE ) dfcom <- df.com } dfcom <- get.dfcom(fit1, dfcom) # fit1: a fitlist or mira-object # fit0: named numerical vector, character vector, or list fit1 <- as.mitml.result(fit1) est1 <- pool(fit1, dfcom = dfcom) qbar1 <- getqbar(est1) if (is.null(fit0)) { # test all estimates equal to zero, except intercept beta <- rep(0, length(qbar1)) names(beta) <- names(qbar1) fit0 <- lapply(fit1, fix.coef, beta = beta) fit0 <- as.mitml.result(fit0) } else if (is.mira(fit0)) { fit0 <- as.mitml.result(fit0) } tmr <- mitml::testModels(fit1, fit0, method = "D1", df.com = dfcom) out <- list( call = match.call(), result = tmr$test, formulas = list( `1` = formula(fit1[[1L]]), `2` = formula(fit0[[1L]]) ), m = tmr$m, method = "D1", use = NULL, dfcom = tmr$df.com ) class(out) <- c("mice.anova", class(fit1)) out } mice/R/employee.R0000644000176200001440000000234714330031606013334 0ustar liggesusers#' Employee selection data #' #' A toy example from Craig Enders. #' #' Enders describes these data as follows: #' I designed these data to mimic an employee selection scenario in #' which prospective employees complete an IQ test and a #' psychological well-being questionnaire during their interview. #' The company subsequently hires the applications that score in the #' upper half of the IQ distribution, and a supervisor rates their #' job performance following a 6-month probationary period. #' Note that the job performance scores are missing at random (MAR) #' (i.e. individuals in the lower half of the IQ distribution were #' never hired, and thus have no performance rating). In addition, #' I randomly deleted three of the well-being scores in order to #' mimic a situation where the applicant's well-being questionnaire #' is inadvertently lost. #' #' A larger version of this data set in present as #' \code{\link[miceadds:data.enders]{data.enders.employee}}. #' #' @format A data frame with 20 rows and 3 variables: #' \describe{ #' \item{IQ}{candidate IQ score} #' \item{wbeing}{candidate well-being score} #' \item{jobperf}{candidate job performance score} #' } #' @source Enders (2010), Applied Missing Data Analysis, p. 218 "employee" mice/R/cci.R0000644000176200001440000000334014330031606012245 0ustar liggesusers#' Complete case indicator #' #' #' The complete case indicator is useful for extracting the subset of complete cases. The function #' \code{cci(x)} calls \code{complete.cases(x)}. #' The companion function \code{ici()} selects the incomplete cases. #' #' @name cci #' @param x An \code{R} object. Currently supported are methods for the #' following classes: \code{mids}. #' @return Logical vector indicating the complete cases. #' @author Stef van Buuren, 2017. #' @seealso \code{\link{complete.cases}}, \code{\link{ici}}, \code{\link{cc}} #' @keywords univar #' @examples #' cci(nhanes) # indicator for 13 complete cases #' cci(mice(nhanes, maxit = 0)) #' f <- cci(nhanes[, c("bmi", "hyp")]) # complete data for bmi and hyp #' nhanes[f, ] # obtain all data from those with complete bmi and hyp #' @export cci <- function(x) UseMethod("cci", x) #' @export cci.mids <- function(x) { complete.cases(x$data) } #' @export cci.default <- function(x) { complete.cases(x) } #' Incomplete case indicator #' #' This array is useful for extracting the subset of incomplete cases. #' The companion function \code{cci()} selects the complete cases. #' #' @name ici #' @aliases ici ici,data.frame-method ici,matrix-method ici,mids-method #' @param x An \code{R} object. Currently supported are methods for the #' following classes: \code{mids}. #' @return Logical vector indicating the incomplete cases, #' @author Stef van Buuren, 2017. #' @seealso \code{\link{cci}}, \code{\link{ic}} #' @keywords univar #' @examples #' #' ici(nhanes) # indicator for 12 rows with incomplete cases #' @export ici <- function(x) UseMethod("ici", x) #' @export ici.mids <- function(x) { !complete.cases(x$data) } #' @export ici.default <- function(x) { !complete.cases(x) } mice/R/mice.impute.norm.nob.R0000644000176200001440000000431314330031647015463 0ustar liggesusers#' Imputation by linear regression without parameter uncertainty #' #' Imputes univariate missing data using linear regression analysis without #' accounting for the uncertainty of the model parameters. #' #' @aliases mice.impute.norm.nob norm.nob #' @inheritParams mice.impute.pmm #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' This function creates imputations using the spread around the #' fitted linear regression line of \code{y} given \code{x}, as #' fitted on the observed data. #' #' This function is provided mainly to allow comparison between proper (e.g., #' as implemented in \code{mice.impute.norm} and improper (this function) #' normal imputation methods. #' #' For large data, having many rows, differences between proper and improper #' methods are small, and in those cases one may opt for speed by using #' \code{mice.impute.norm.nob}. #' @section Warning: The function does not incorporate the variability of the #' regression weights, so it is not 'proper' in the sense of Rubin. For small #' samples, variability of the imputed data is therefore underestimated. #' @author Gerko Vink, Stef van Buuren, Karin Groothuis-Oudshoorn, 2018 #' @seealso \code{\link{mice}}, \code{\link{mice.impute.norm}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' #' Brand, J.P.L. (1999). Development, Implementation and Evaluation of Multiple #' Imputation Strategies for the Statistical Analysis of Incomplete Data Sets. #' Ph.D. Thesis, TNO Prevention and Health/Erasmus University Rotterdam. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.norm.nob <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) parm <- .norm.fix(y, ry, x, ...) x[wy, ] %*% parm$beta + rnorm(sum(wy)) * parm$sigma } .norm.fix <- function(y, ry, x, ...) { p <- estimice(x[ry, , drop = FALSE], y[ry], ...) sigma <- sqrt((sum(p$r^2)) / (sum(ry) - ncol(x) - 1)) parm <- list(p$c, sigma) names(parm) <- c("beta", "sigma") parm } mice/R/cbind.R0000644000176200001440000002025514433400023012567 0ustar liggesuserscbind.mids <- function(x, y = NULL, ...) { call <- match.call() dots <- list(...) if (is.mids(y)) { return(cbind.mids.mids(x, y, call = call)) } if ((is.null(y) || length(y) == 0L) && length(dots) == 0L) { return(x) } n <- nrow(x$data) if (length(y) == 1L) y <- rep(y, n) if (length(y) == 0L && length(dots) > 0L) { y <- cbind.data.frame(dots) } else if (length(y) > 0L && length(dots) == 0L) { y <- cbind.data.frame(y) } else { y <- cbind.data.frame(y, dots) } # Call is a vector, with first argument the mice statement # and second argument the call to cbind.mids. call <- c(x$call, call) if (nrow(y) != nrow(x$data)) { stop( "arguments imply differing number of rows: ", c(nrow(x$data), ", ", nrow(y)) ) } data <- cbind(x$data, y) varnames <- make.unique(colnames(data)) colnames(data) <- varnames # where argument where <- cbind(x$where, matrix(FALSE, nrow = nrow(x$where), ncol = ncol(y))) colnames(where) <- varnames # blocks: no renaming needed because all block definition will # refer to varnames[1:ncol(x$data)] only, and are hence unique # but we do need to rename duplicate block names yblocks <- vector("list", length = ncol(y)) blocks <- c(x$blocks, yblocks) xynames <- c(names(x$blocks), colnames(y)) blocknames <- make.unique(xynames) names(blocknames) <- xynames names(blocks) <- blocknames ct <- c(attr(x$blocks, "calltype"), rep("type", ncol(y))) names(ct) <- blocknames attr(blocks, "calltype") <- ct m <- x$m # count the number of missing data in y nmis <- c(x$nmis, colSums(is.na(y))) names(nmis) <- varnames # imp: original data of y will be copied into the multiple imputed dataset, # including the missing values of y. r <- (!is.na(y)) f <- function(j) { m <- matrix(NA, nrow = sum(!r[, j]), ncol = x$m, dimnames = list(row.names(y)[!r[, j]], seq_len(m)) ) as.data.frame(m) } imp <- lapply(seq_len(ncol(y)), f) imp <- c(x$imp, imp) names(imp) <- varnames # The imputation method for (columns in) y will be set to ''. method <- c(x$method, rep.int("", ncol(y))) names(method) <- blocknames # The variable(s) in y are included in the predictorMatrix. # y is not used as predictor as well as not imputed. predictorMatrix <- rbind( x$predictorMatrix, matrix(0, ncol = ncol(x$predictorMatrix), nrow = ncol(y) ) ) predictorMatrix <- cbind( predictorMatrix, matrix(0, ncol = ncol(y), nrow = nrow(x$predictorMatrix) + ncol(y) ) ) rownames(predictorMatrix) <- blocknames colnames(predictorMatrix) <- varnames visitSequence <- x$visitSequence formulas <- x$formulas post <- c(x$post, rep.int("", ncol(y))) names(post) <- varnames blots <- x$blots ignore <- x$ignore # seed, lastSeedValue, number of iterations, chainMean and chainVar # is taken as in mids object x. seed <- x$seed lastSeedValue <- x$lastSeedValue iteration <- x$iteration chainMean <- x$chainMean chainVar <- x$chainVar loggedEvents <- x$loggedEvents ## save, and return midsobj <- list( data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, ignore = ignore, seed = seed, iteration = iteration, lastSeedValue = lastSeedValue, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date() ) oldClass(midsobj) <- "mids" midsobj } cbind.mids.mids <- function(x, y, call) { if (!is.mids(y)) stop("Argument `y` not a mids object") if (nrow(y$data) != nrow(x$data)) { stop("The two datasets do not have the same length\n") } if (x$m != y$m) { stop("The two mids objects should have the same number of imputations\n") } # Call is a vector, with first argument the mice statement # and second argument the call to cbind.mids. call <- c(x$call, call) # The data in x$data and y$data are combined together. # make variable names unique data <- cbind(x$data, y$data) xynames <- c(colnames(x$data), colnames(y$data)) varnames <- make.unique(xynames) names(varnames) <- xynames names(data) <- varnames where <- cbind(x$where, y$where) colnames(where) <- varnames # rename variable names within each x$blocks and y$blocks xnew <- varnames[1:ncol(x$data)] ynew <- varnames[-(1:ncol(x$data))] xblocks <- x$blocks yblocks <- y$blocks for (i in names(xblocks)) xblocks[[i]] <- unname(xnew[xblocks[[i]]]) for (i in names(yblocks)) yblocks[[i]] <- unname(ynew[yblocks[[i]]]) blocks <- c(xblocks, yblocks) xynames <- c(names(xblocks), names(yblocks)) blocknames <- make.unique(xynames) names(blocknames) <- xynames names(blocks) <- blocknames ct <- c(attr(xblocks, "calltype"), attr(yblocks, "calltype")) names(ct) <- blocknames attr(blocks, "calltype") <- ct m <- x$m nmis <- c(x$nmis, y$nmis) names(nmis) <- varnames imp <- c(x$imp, y$imp) names(imp) <- varnames method <- c(x$method, y$method) names(method) <- blocknames # The predictorMatrices of x and y are combined with zero matrices # on the off diagonal blocks. predictorMatrix <- rbind( x$predictorMatrix, matrix(0, ncol = ncol(x$predictorMatrix), nrow = nrow(y$predictorMatrix) ) ) predictorMatrix <- cbind( predictorMatrix, rbind( matrix(0, ncol = ncol(y$predictorMatrix), nrow = nrow(x$predictorMatrix) ), y$predictorMatrix ) ) rownames(predictorMatrix) <- blocknames colnames(predictorMatrix) <- varnames # As visitSequence is taken first the order for x and after that from y. # take care that duplicate names need to be renamed xnew <- blocknames[1:length(x$blocks)] ynew <- blocknames[-(1:length(x$blocks))] visitSequence <- unname(c(xnew[x$visitSequence], ynew[y$visitSequence])) formulas <- c(x$formulas, y$formulas) names(formulas) <- blocknames post <- c(x$post, y$post) names(post) <- varnames blots <- c(x$blots, y$blots) names(blots) <- blocknames ignore <- x$ignore # For the elements seed, lastSeedValue and iteration the values # from midsobject x are copied. seed <- x$seed lastSeedValue <- x$lastSeedValue iteration <- x$iteration # the chainMean and chainVar vectors for x and y are combined. chainMean <- array( data = NA, dim = c(dim(x$chainMean)[1] + dim(y$chainMean)[1], iteration, m), dimnames = list( c( dimnames(x$chainMean)[[1]], dimnames(y$chainMean)[[1]] ), dimnames(x$chainMean)[[2]], dimnames(x$chainMean)[[3]] ) ) chainMean[seq_len(dim(x$chainMean)[1]), , ] <- x$chainMean if (iteration <= dim(y$chainMean)[2]) { chainMean[(dim(x$chainMean)[1] + 1):dim(chainMean)[1], , ] <- y$chainMean[, seq_len(iteration), ] } else { chainMean[(dim(x$chainMean)[1] + 1):dim(chainMean)[1], seq_len(dim(y$chainMean)[2]), ] <- y$chainMean } chainVar <- array( data = NA, dim = c(dim(x$chainVar)[1] + dim(y$chainVar)[1], iteration, m), dimnames = list( c( dimnames(x$chainVar)[[1]], dimnames(y$chainVar)[[1]] ), dimnames(x$chainVar)[[2]], dimnames(x$chainVar)[[3]] ) ) chainVar[seq_len(dim(x$chainVar)[1]), , ] <- x$chainVar if (iteration <= dim(y$chainVar)[2]) { chainVar[(dim(x$chainVar)[1] + 1):dim(chainVar)[1], , ] <- y$chainVar[, seq_len(iteration), ] } else { chainVar[(dim(x$chainVar)[1] + 1):dim(chainVar)[1], seq_len(dim(y$chainVar)[2]), ] <- y$chainVar } loggedEvents <- x$loggedEvents midsobj <- list( data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, ignore = ignore, seed = seed, iteration = iteration, lastSeedValue = lastSeedValue, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date() ) oldClass(midsobj) <- "mids" midsobj } mice/R/mice.impute.2l.pan.R0000644000176200001440000001730114436064333015033 0ustar liggesusers# Usage is an extension of 2l.norm # -2 ... group identifier # 1 ... fixed effects # 2 ... fixed and random effects # 3 ... introduce aggregated effects (i.e. group means) # 4 ... fixed, random and aggregated effects #' Imputation by a two-level normal model using \code{pan} #' #' Imputes univariate missing data using a two-level normal model with #' homogeneous within group variances. Aggregated group effects (i.e. group #' means) can be automatically created and included as predictors in the #' two-level regression (see argument \code{type}). This function needs the #' \code{pan} package. #' #' Implements the Gibbs sampler for the linear two-level model with homogeneous #' within group variances which is a special case of a multivariate linear mixed #' effects model (Schafer & Yucel, 2002). For a two-level imputation with #' heterogeneous within-group variances see \code{\link{mice.impute.2l.norm}}. % #' The random intercept is automatically added in % #' \code{mice.impute.2l.norm()}. #' #' @aliases mice.impute.2l.pan 2l.pan #' @author Alexander Robitzsch (IPN - Leibniz Institute for Science and #' Mathematics Education, Kiel, Germany), \email{robitzsch@@ipn.uni-kiel.de} #' @name mice.impute.2l.pan #' @param y Incomplete data vector of length \code{n} #' @param ry Vector of missing data pattern (\code{FALSE}=missing, #' \code{TRUE}=observed) #' @param x Matrix (\code{n} x \code{p}) of complete covariates. #' @param type Vector of length \code{ncol(x)} identifying random and class #' variables. Random effects are identified by a '2'. The group variable (only #' one is allowed) is coded as '-2'. Random effects also include the fixed #' effect. If for a covariates X1 group means shall be calculated and included #' as further fixed effects choose '3'. In addition to the effects in '3', #' specification '4' also includes random effects of X1. #' @param intercept Logical determining whether the intercept is automatically #' added. #' @param paniter Number of iterations in \code{pan}. Default is 500. #' @param groupcenter.slope If \code{TRUE}, in case of group means (\code{type} #' is '3' or'4') group mean centering for these predictors are conducted before #' doing imputations. Default is \code{FALSE}. #' @param ... Other named arguments. #' @return A vector of length \code{nmis} with imputations. #' @author Alexander Robitzsch (IPN - Leibniz Institute for Science and #' Mathematics Education, Kiel, Germany), \email{robitzsch@@ipn.uni-kiel.de}. #' @note This function does not implement the \code{where} functionality. It #' always produces \code{nmis} imputation, irrespective of the \code{where} #' argument of the \code{mice} function. #' @family univariate-2l #' @references #' #' Schafer J L, Yucel RM (2002). Computational strategies for multivariate #' linear mixed-effects models with missing values. \emph{Journal of #' Computational and Graphical Statistics}. \bold{11}, 437-457. #' #' Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @examples #' # simulate some data #' # two-level regression model with fixed slope #' #' # number of groups #' G <- 250 #' # number of persons #' n <- 20 #' # regression parameter #' beta <- .3 #' # intraclass correlation #' rho <- .30 #' # correlation with missing response #' rho.miss <- .10 #' # missing proportion #' missrate <- .50 #' y1 <- rep(rnorm(G, sd = sqrt(rho)), each = n) + rnorm(G * n, sd = sqrt(1 - rho)) #' x <- rnorm(G * n) #' y <- y1 + beta * x #' dfr0 <- dfr <- data.frame("group" = rep(1:G, each = n), "x" = x, "y" = y) #' dfr[rho.miss * x + rnorm(G * n, sd = sqrt(1 - rho.miss)) < qnorm(missrate), "y"] <- NA #' #' # empty imputation in mice #' imp0 <- mice(as.matrix(dfr), maxit = 0) #' predM <- imp0$predictorMatrix #' impM <- imp0$method #' #' # specify predictor matrix and method #' predM1 <- predM #' predM1["y", "group"] <- -2 #' predM1["y", "x"] <- 1 # fixed x effects imputation #' impM1 <- impM #' impM1["y"] <- "2l.pan" #' #' # multilevel imputation #' imp1 <- mice(as.matrix(dfr), #' m = 1, predictorMatrix = predM1, #' method = impM1, maxit = 1 #' ) #' #' # multilevel analysis #' library(lme4) #' mod <- lmer(y ~ (1 + x | group) + x, data = complete(imp1)) #' summary(mod) #' #' # Examples of predictorMatrix specification #' #' # random x effects #' # predM1["y","x"] <- 2 #' #' # fixed x effects and group mean of x #' # predM1["y","x"] <- 3 #' #' # random x effects and group mean of x #' # predM1["y","x"] <- 4 #' @export mice.impute.2l.pan <- function(y, ry, x, type, intercept = TRUE, paniter = 500, groupcenter.slope = FALSE, ...) { install.on.demand("pan", ...) ## append intercept if (intercept) { x <- cbind(1, as.matrix(x)) type <- c(2, type) } # add groupmeans in the regression model if (any(type %in% c(3, 4))) { x0 <- as.matrix(cbind(x[, type == -2], x[, type %in% c(3, 4)])) colnames(x0) <- c(colnames(x)[type == -2], colnames(x)[type %in% c(3, 4)]) type0 <- c(-2, rep.int(1, ncol(x0) - 1)) x0.aggr <- as.matrix(.mice.impute.2l.groupmean( y = y, ry = ry, x = x0, type = type0, grmeanwarning = FALSE, ... )) colnames(x0.aggr) <- paste0("M.", colnames(x0)[-1]) # groupcentering if (groupcenter.slope) { x0.aggr1 <- as.matrix(x0.aggr) colnames(x0.aggr1) <- colnames(x0)[-1] x0cent <- x0[, -1] - x0.aggr1 x[, colnames(x0cent)] <- x0cent } # combine covariate matrix x <- cbind(x, x0.aggr) # add type type1 <- c(type, rep.int(1, ncol(x0.aggr))) names(type1) <- c(names(type), colnames(x0.aggr)) type1[type1 == 3] <- 1 type1[type1 == 4] <- 2 type <- type1 } ############################# # pan imputation # define cluster group <- x[, type == -2] subj <- match(group, unique(group)) # is group resorting necessary? (need this for pan) sortgroups <- any(diff(subj) < 0) if (sortgroups) { dfr <- data.frame( "group" = group, "ry" = ry, "index" = seq(1, length(ry)) ) dfr <- dfr[order(dfr$group), ] group <- group[dfr$index] y <- y[dfr$index] x <- x[dfr$index, ] ry <- ry[dfr$index] subj <- subj[dfr$index] # stop( "Sort group identifiers in increasing order!\n") } y1 <- matrix(as.numeric(y), ncol = 1) y1[!ry, 1] <- NA # specify predictors pred <- x[, type != -2, drop = FALSE] ## fixed SvB 1feb2013 # columns fixed effects xcol <- seq(1, ncol(pred)) type1 <- type[type != -2] zcol <- which(type1 == 2) # noninformative priors prior <- list( a = ncol(y1), Binv = diag(rep(1, ncol(y1))), c = ncol(y1) * length(zcol), Dinv = diag(rep(1, ncol(y1) * length(zcol))) ) if (length(subj) != nrow(y1)) stop("No class variable") # pan imputation ii <- 0 while (ii == 0) { s1 <- round(runif(1, 1, 10^7)) imput <- pan::pan(y1, subj, pred, xcol, zcol, prior, seed = s1, iter = paniter) res <- imput$y ii <- 1 - any(is.na(res)) # check for invalid imputations: pan occasionally produces NaNs } if (sortgroups) { dfr <- cbind(res, dfr) dfr <- dfr[order(dfr$index), ] res <- dfr[!dfr$ry, "res"] } else { res <- res[!ry] } flush.console() res } # compute cluster groupmean .mice.impute.2l.groupmean <- function(y, ry, x, type, grmeanwarning = TRUE, ...) { if ((ncol(x) > 2) & grmeanwarning) warning("\nMore than one variable is requested to be aggregated.\n") # calculate aggregated values a1 <- aggregate(x[, type %in% c(1, 2)], list(x[, type == -2]), mean, na.rm = TRUE) i1 <- match(x[, type == -2], a1[, 1]) ximp <- as.matrix(a1[i1, -1]) colnames(ximp) <- paste(names(type)[type %in% c(1, 2)], names(type)[type == -2], sep = ".") return(ximp) } mice/R/md.pairs.R0000644000176200001440000000362614330031647013240 0ustar liggesusers#' Missing data pattern by variable pairs #' #' Number of observations per variable pair. #' #' The four components in the output value is have the following interpretation: #' \describe{ \item{list('rr')}{response-response, both variables are observed} #' \item{list('rm')}{response-missing, row observed, column missing} #' \item{list('mr')}{missing -response, row missing, column observed} #' \item{list('mm')}{missing -missing, both variables are missing} } #' #' @param data A data frame or a matrix containing the incomplete data. Missing #' values are coded as \code{NA}. #' @return A list of four components named \code{rr}, \code{rm}, \code{mr} and #' \code{mm}. Each component is square numerical matrix containing the number #' observations within four missing data pattern. #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2009 #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords univar #' @examples #' pat <- md.pairs(nhanes) #' pat #' #' # show that these four matrices decompose the total sample size #' # for each pair #' pat$rr + pat$rm + pat$mr + pat$mm #' #' # percentage of usable cases to impute row variable from column variable #' round(100 * pat$mr / (pat$mr + pat$mm)) #' @export md.pairs <- function(data) { # calculates pairwise missing data statistics # rr: response-response pairs # rm: response-missing pairs # mr: missing-response pairs # mm: missing-missing pairs if (!(is.matrix(data) || is.data.frame(data))) { stop("Data should be a matrix or dataframe") } if (ncol(data) < 2) { stop("Data should have at least two columns") } r <- !is.na(data) rr <- t(r) %*% r mm <- t(!r) %*% (!r) mr <- t(!r) %*% r rm <- t(r) %*% (!r) list(rr = rr, rm = rm, mr = mr, mm = mm) } mice/R/pool.scalar.R0000644000176200001440000000764514334522175013752 0ustar liggesusers#' Multiple imputation pooling: univariate version #' #' Pools univariate estimates of m repeated complete data analysis #' #' The function averages the univariate estimates of the complete data model, #' computes the total variance over the repeated analyses, and computes the #' relative increase in variance due to missing data or data synthesisation #' and the fraction of missing information. #' #' @param Q A vector of univariate estimates of \code{m} repeated complete data #' analyses. #' @param U A vector containing the corresponding \code{m} variances of the univariate #' estimates. #' @param n A number providing the sample size. If nothing is specified, #' an infinite sample \code{n = Inf} is assumed. #' @param k A number indicating the number of parameters to be estimated. #' By default, \code{k = 1} is assumed. #' @inheritParams pool #' @return Returns a list with components. #' \describe{ #' \item{\code{m}:}{Number of imputations.} #' \item{\code{qhat}:}{The \code{m} univariate estimates of repeated complete-data analyses.} #' \item{\code{u}:}{The corresponding \code{m} variances of the univariate estimates.} #' \item{\code{qbar}:}{The pooled univariate estimate, formula (3.1.2) Rubin (1987).} #' \item{\code{ubar}:}{The mean of the variances (i.e. the pooled within-imputation variance), #' formula (3.1.3) Rubin (1987).} #' \item{\code{b}:}{The between-imputation variance, formula (3.1.4) Rubin (1987).} #' \item{\code{t}:}{The total variance of the pooled estimated, formula (3.1.5) #' Rubin (1987).} #' \item{\code{r}:}{The relative increase in variance due to nonresponse, formula #' (3.1.7) Rubin (1987).} #' \item{\code{df}:}{The degrees of freedom for t reference distribution by the #' method of Barnard-Rubin (1999).} #' \item{\code{fmi}:}{The fraction missing information due to nonresponse, #' formula (3.1.10) Rubin (1987). (Not defined for synthetic data.)} #' } #' @author Karin Groothuis-Oudshoorn and Stef van Buuren, 2009; Thom Volker, 2021 #' @seealso \code{\link{pool}} #' @references #' Rubin, D.B. (1987). Multiple Imputation for Nonresponse in #' Surveys. New York: John Wiley and Sons. #' #' Reiter, J.P. (2003). Inference for Partially Synthetic, #' Public Use Microdata Sets. \emph{Survey Methodology}, \bold{29}, 181-189. #' @examples #' # missing data imputation with with manual pooling #' imp <- mice(nhanes, maxit = 2, m = 2, print = FALSE, seed = 18210) #' fit <- with(data = imp, lm(bmi ~ age)) #' #' # manual pooling #' summary(fit$analyses[[1]]) #' summary(fit$analyses[[2]]) #' pool.scalar(Q = c(-1.5457, -1.428), U = c(0.9723^2, 1.041^2), n = 25, k = 2) #' #' # check: automatic pooling using broom #' pool(fit) #' #' # manual pooling for synthetic data created from complete data #' imp <- mice(cars, #' maxit = 2, m = 2, print = FALSE, seed = 18210, #' where = matrix(TRUE, nrow(cars), ncol(cars)) #' ) #' fit <- with(data = imp, lm(speed ~ dist)) #' #' # manual pooling: extract Q and U #' summary(fit$analyses[[1]]) #' summary(fit$analyses[[2]]) #' pool.scalar.syn(Q = c(0.12182, 0.13209), U = c(0.02121^2, 0.02516^2), n = 50, k = 2) #' #' # check: automatic pooling using broom #' pool.syn(fit) #' @export pool.scalar <- function(Q, U, n = Inf, k = 1, rule = c("rubin1987", "reiter2003")) { rule <- match.arg(rule) m <- length(Q) qbar <- mean(Q) ubar <- mean(U) b <- var(Q) r <- (1 + 1 / m) * b / ubar if (rule == "rubin1987") { t <- ubar + (m + 1) * b / m df <- barnard.rubin(m, b, t, dfcom = n - k) fmi <- (r + 2 / (df + 3)) / (r + 1) } if (rule == "reiter2003") { t <- ubar + b / m df <- (m - 1) * (1 + (ubar / (b / m)))^2 fmi <- NA_real_ } list( m = m, qhat = Q, u = U, qbar = qbar, ubar = ubar, b = b, t = t, df = df, r = r, fmi = fmi ) } #' @rdname pool.scalar #' @export pool.scalar.syn <- function(Q, U, n = Inf, k = 1, rule = "reiter2003") { pool.scalar(Q, U, n = Inf, k = 1, rule = rule) } mice/R/windspeed.R0000644000176200001440000000254114330031606013473 0ustar liggesusers#' Subset of Irish wind speed data #' #' Subset of Irish wind speed data #' #' The original data set is much larger and was analyzed in detail by Haslett #' and Raftery (1989). Van Buuren et al (2006) used this subset to investigate #' the influence of extreme MAR mechanisms on the quality of imputation. #' #' @name windspeed #' @docType data #' @format A data frame with 433 rows and 6 columns containing the daily average #' wind speeds within the period 1961-1978 at meteorological stations in the #' Republic of Ireland. The data are a random sample from a larger data set. #' \describe{ #' \item{RochePt}{Roche Point} #' \item{Rosslare}{Rosslare} #' \item{Shannon}{Shannon} #' \item{Dublin}{Dublin} #' \item{Clones}{Clones} #' \item{MalinHead}{Malin Head} } #' @references Haslett, J. and Raftery, A. E. (1989). \emph{Space-time #' Modeling with Long-memory Dependence: Assessing Ireland's Wind Power #' Resource (with Discussion)}. Applied Statistics 38, 1-50. #' \url{http://lib.stat.cmu.edu/datasets/wind.desc} and #' \url{http://lib.stat.cmu.edu/datasets/wind.data} #' #' van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) #' Fully conditional specification in multivariate imputation. \emph{Journal of #' Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. #' @keywords datasets #' @examples #' #' windspeed[1:3, ] NULL mice/R/mice.impute.2l.norm.R0000644000176200001440000001236014334522175015230 0ustar liggesusers#' Imputation by a two-level normal model #' #' Imputes univariate missing data using a two-level normal model #' #' Implements the Gibbs sampler for the linear multilevel model with #' heterogeneous with-class variance (Kasim and Raudenbush, 1998). Imputations #' are drawn as an extra step to the algorithm. For simulation work see Van #' Buuren (2011). #' #' The random intercept is automatically added in \code{mice.impute.2L.norm()}. #' A model within a random intercept can be specified by \code{mice(..., #' intercept = FALSE)}. #' #' @name mice.impute.2l.norm #' @inheritParams mice.impute.pmm #' @param type Vector of length \code{ncol(x)} identifying random and class #' variables. Random variables are identified by a '2'. The class variable #' (only one is allowed) is coded as '-2'. Random variables also include the #' fixed effect. #' @param intercept Logical determining whether the intercept is automatically #' added. #' @param ... Other named arguments. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @note Added June 25, 2012: The currently implemented algorithm does not #' handle predictors that are specified as fixed effects (type=1). When using #' \code{mice.impute.2l.norm()}, the current advice is to specify all predictors #' as random effects (type=2). #' #' Warning: The assumption of heterogeneous variances requires that in every #' class at least one observation has a response in \code{y}. #' @author Roel de Jong, 2008 #' @references #' #' Kasim RM, Raudenbush SW. (1998). Application of Gibbs sampling to nested #' variance components models with heterogeneous within-group variance. Journal #' of Educational and Behavioral Statistics, 23(2), 93--116. #' #' Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' #' Van Buuren, S. (2011) Multiple imputation of multilevel data. In Hox, J.J. #' and and Roberts, J.K. (Eds.), \emph{The Handbook of Advanced Multilevel #' Analysis}, Chapter 10, pp. 173--196. Milton Park, UK: Routledge. #' @family univariate-2l #' @keywords datagen #' @export mice.impute.2l.norm <- function(y, ry, x, type, wy = NULL, intercept = TRUE, ...) { if (intercept) { x <- cbind(1, as.matrix(x)) type <- c(2, type) } ## Initialize n.iter <- 100 if (is.null(wy)) wy <- !ry n.class <- length(unique(x[, type == -2])) if (n.class == 0) stop("No class variable") gf.full <- factor(x[, type == -2], labels = seq_len(n.class)) gf <- gf.full[ry] XG <- split.data.frame(as.matrix(x[ry, type == 2]), gf) X.SS <- lapply(XG, crossprod) yg <- split(as.vector(y[ry]), gf) n.g <- tabulate(gf) n.rc <- ncol(XG[[1]]) bees <- matrix(0, nrow = n.class, ncol = n.rc) ss <- vector(mode = "numeric", length = n.class) mu <- rep.int(0, n.rc) inv.psi <- diag(1, n.rc, n.rc) inv.sigma2 <- rep.int(1, n.class) sigma2.0 <- 1 theta <- 1 ## Execute Gibbs sampler for (iter in seq_len(n.iter)) { ## Draw bees for (class in seq_len(n.class)) { vv <- symridge(inv.sigma2[class] * X.SS[[class]] + inv.psi, ...) bees.var <- chol2inv(chol(vv)) bees[class, ] <- drop(bees.var %*% (crossprod(inv.sigma2[class] * XG[[class]], yg[[class]]) + inv.psi %*% mu)) + drop(rnorm(n = n.rc) %*% chol(symridge(bees.var, ...))) ss[class] <- crossprod(yg[[class]] - XG[[class]] %*% bees[class, ]) } ## Draw mu mu <- colMeans(bees) + drop(rnorm(n = n.rc) %*% chol(chol2inv(chol(symridge(inv.psi, ...))) / n.class)) ## Draw psi inv.psi <- rwishart( df = n.class - n.rc - 1, SqrtSigma = chol(chol2inv(chol(symridge(crossprod(t(t(bees) - mu)), ...)))) ) ## Draw sigma2 inv.sigma2 <- rgamma(n.class, n.g / 2 + 1 / (2 * theta), scale = 2 * theta / (ss * theta + sigma2.0)) ## Draw sigma2.0 H <- 1 / mean(inv.sigma2) # Harmonic mean sigma2.0 <- rgamma(1, n.class / (2 * theta) + 1, scale = 2 * theta * H / n.class) ## Draw theta G <- exp(mean(log(1 / inv.sigma2))) # Geometric mean theta <- 1 / rgamma(1, n.class / 2 - 1, scale = 2 / (n.class * (sigma2.0 / H - log(sigma2.0) + log(G) - 1))) } ## Generate imputations imps <- rnorm(n = sum(wy), sd = sqrt(1 / inv.sigma2[gf.full[wy]])) + rowSums(as.matrix(x[wy, type == 2, drop = FALSE]) * bees[gf.full[wy], ]) imps } rwishart <- function(df, p = nrow(SqrtSigma), SqrtSigma = diag(p)) { ## rwishart, written by Bill Venables Z <- matrix(0, p, p) diag(Z) <- sqrt(rchisq(p, df:(df - p + 1))) if (p > 1) { pseq <- seq_len(p - 1) Z[rep(p * pseq, pseq) + unlist(lapply(pseq, seq))] <- rnorm(p * (p - 1) / 2) } crossprod(Z %*% SqrtSigma) } force.chol <- function(x, warn = TRUE) { z <- 0 repeat { lambda <- 0.1 * z XT <- x + diag(x = lambda, nrow = nrow(x)) XT <- (XT + t(XT)) / 2 s <- try(expr = chol(XT), silent = TRUE) if (!inherits(s, "try-error")) { break } z <- z + 1 } attr(s, "forced") <- (z > 0) if (warn && z > 0) { warning("Cholesky decomposition had to be forced", call. = FALSE) } s } symridge <- function(x, ridge = 0.0001, ...) { x <- (x + t(x)) / 2 if (nrow(x) == 1L) { return(x) } x + diag(diag(x) * ridge) } mice/R/where.R0000644000176200001440000000420414334522175012633 0ustar liggesusers#' Creates a \code{where} argument #' #' This helper function creates a valid \code{where} matrix. The #' \code{where} matrix is an argument to the \code{mice} function. #' It has the same size as \code{data} and specifies which values #' are to be imputed (\code{TRUE}) or nor (\code{FALSE}). #' @param data A \code{data.frame} with the source data #' @param keyword An optional keyword, one of \code{"missing"} (missing #' values are imputed), \code{"observed"} (observed values are imputed), #' \code{"all"} and \code{"none"}. The default #' is \code{keyword = "missing"} #' @return A matrix with logical #' @seealso \code{\link{make.blocks}}, \code{\link{make.predictorMatrix}} #' @examples #' head(make.where(nhanes), 3) #' #' # create & analyse synthetic data #' where <- make.where(nhanes2, "all") #' imp <- mice(nhanes2, #' m = 10, where = where, #' print = FALSE, seed = 123 #' ) #' fit <- with(imp, lm(chl ~ bmi + age + hyp)) #' summary(pool.syn(fit)) #' @export make.where <- function(data, keyword = c("missing", "all", "none", "observed")) { keyword <- match.arg(keyword) data <- check.dataform(data) where <- switch(keyword, missing = is.na(data), all = matrix(TRUE, nrow = nrow(data), ncol = ncol(data)), none = matrix(FALSE, nrow = nrow(data), ncol = ncol(data)), observed = !is.na(data) ) dimnames(where) <- dimnames(data) where } check.where <- function(where, data, blocks) { if (is.null(where)) { where <- make.where(data, keyword = "missing") } if (!(is.matrix(where) || is.data.frame(where))) { if (is.character(where)) { return(make.where(data, keyword = where)) } else { stop("Argument `where` not a matrix or data frame", call. = FALSE) } } if (!all(dim(data) == dim(where))) { stop("Arguments `data` and `where` not of same size", call. = FALSE) } where <- as.logical(as.matrix(where)) if (anyNA(where)) { stop("Argument `where` contains missing values", call. = FALSE) } where <- matrix(where, nrow = nrow(data), ncol = ncol(data)) dimnames(where) <- dimnames(data) where[, !colnames(where) %in% unlist(blocks)] <- FALSE where } mice/R/ampute.discrete.R0000644000176200001440000001072714334522175014624 0ustar liggesusers#' Multivariate amputation based on discrete probability functions #' #' This function creates a missing data indicator for each pattern. Odds probabilities #' (Brand, 1999, pp. 110-113) will be induced on the weighted sum scores, calculated earlier #' in the multivariate amputation function \code{\link{ampute}}. #' #' @param P A vector containing the pattern numbers of candidates. #' For each case, a value between 1 and #patterns is given. For example, a #' case with value 2 is candidate for missing data pattern 2. #' @param scores A list containing vectors with the candidates's weighted sum scores, #' the result of an underlying function in \code{\link{ampute}}. #' @param prop A scalar specifying the proportion of missingness. Should be a value #' between 0 and 1. Default is a missingness proportion of 0.5. #' @param odds A matrix where #patterns defines the #rows. Each row should contain #' the odds of being missing for the corresponding pattern. The amount of odds values #' defines in how many quantiles the sum scores will be divided. The values are #' relative probabilities: a quantile with odds value 4 will have a probability of #' being missing that is four times higher than a quantile with odds 1. The #' #quantiles may differ between the patterns, specify NA for cells remaining empty. #' Default is 4 quantiles with odds values 1, 2, 3 and 4, the result of #' \code{\link{ampute.default.odds}}. #' @return A list containing vectors with \code{0} if a case should be made missing #' and \code{1} if a case should remain complete. The first vector refers to the #' first pattern, the second vector to the second pattern, etcetera. #' @author Rianne Schouten, 2016 #' @seealso \code{\link{ampute}}, \code{\link{ampute.default.odds}} #' @references Brand, J.P.L. (1999). \emph{Development, implementation and #' evaluation of multiple imputation strategies for the statistical analysis of #' incomplete data sets.} Dissertation. Rotterdam: Erasmus University. #' @keywords internal #' @export ampute.discrete <- function(P, scores, prop, odds) { R <- vector(mode = "list", length = nrow(odds)) for (i in seq_len(nrow(odds))) { if (scores[[i]][[1]] == 0) { R[[i]] <- 0 } else { # The scores are divided into quantiles # Specify #quantiles by #odds values ng <- length(odds[i, ][!is.na(odds[i, ])]) quantiles <- quantile(scores[[i]], probs = seq.int(0, 1, by = 1 / ng)) if (anyDuplicated(quantiles) || anyNA(quantiles)) { stop("Division of sum scores into quantiles did not succeed. Possibly the sum scores contain too few different observations (in case of categorical or dummy variables). Try using more variables to calculate the sum scores or diminish the number of quantiles in the odds matrix", call. = FALSE) } # For each candidate the quantile number is specified R.temp <- rep.int(NA, length(scores[[i]])) for (k in seq_len(ng)) { R.temp <- replace(R.temp, scores[[i]] >= quantiles[k] & scores[[i]] <= quantiles[k + 1], k) } # For each candidate, a random value between 0 and 1 is compared with the # odds probability of being missing. If random value <= prob, the candidate # will receive missing data indicator 0, meaning he will be made missing # according the pattern; if random value > prob, the candidate will receive # missing data indicator 1, meaning the candidate will remain complete. for (l in seq_len(ng)) { prob <- (ng * prop * odds[i, l]) / sum(odds[i, ], na.rm = TRUE) if (prob >= 1.0) { warning("Combination of odds matrix and desired proportion of missingness results to small quantile groups, probably decreasing the obtained proportion of missingness", call. = FALSE ) } gn <- length(R.temp[R.temp == l]) if (gn != 0) { random <- runif(n = gn, min = 0, max = 1) Q <- c() for (m in seq_len(gn)) { if (random[m] <= prob) { Q[m] <- 0 # Candidate will be made missing } else { Q[m] <- 1 # Candidate will be kept complete } } # Give the result to the right candidate R.temp <- replace(R.temp, R.temp == l, Q) } } # Give the result to the right cases in the data R[[i]] <- replace(P, P == (i + 1), R.temp) R[[i]] <- replace(R[[i]], P != (i + 1), 1) } } R } mice/R/nhanes.R0000644000176200001440000000155614330031606012772 0ustar liggesusers#' NHANES example - all variables numerical #' #' A small data set with non-monotone missing values. #' #' A small data set with all numerical variables. The data set \code{nhanes2} is #' the same data set, but with \code{age} and \code{hyp} treated as factors. #' #' @name nhanes #' @docType data #' @format A data frame with 25 observations on the following 4 variables. #' \describe{ #' \item{age}{Age group (1=20-39, 2=40-59, 3=60+)} #' \item{bmi}{Body mass index (kg/m**2)} #' \item{hyp}{Hypertensive (1=no,2=yes)} #' \item{chl}{Total serum cholesterol (mg/dL)} } #' @seealso \code{\link{nhanes2}} #' @source Schafer, J.L. (1997). \emph{Analysis of Incomplete Multivariate #' Data.} London: Chapman & Hall. Table 6.14. #' @keywords datasets #' @examples #' # create 5 imputed data sets #' imp <- mice(nhanes) #' #' # print the first imputed data set #' complete(imp) NULL mice/R/convergence.R0000644000176200001440000001123214433400023014001 0ustar liggesusers#' Computes convergence diagnostics for a \code{mids} object #' #' Takes an object of class \code{mids}, computes the autocorrelation #' and/or potential scale reduction factor, and returns a \code{data.frame} #' with the specified diagnostic(s) per iteration. #' #' @param data An object of class \code{mids} as created by the function #' \code{mice()}. #' @param diagnostic A keyword. One of the following keywords: \code{"ac"}, #' \code{"all"}, \code{"gr"} and \code{"psrf"}. See the Details section #' for the interpretation. #' The default is \code{diagnostic = "all"} which returns both the #' autocorrelation and potential scale reduction factor per iteration. #' @param parameter A keyword. One of the following keywords: \code{"mean"} #' or \code{"sd"} to evaluate chain means or chain standard deviations, #' respectively. #' @param \dots Additional arguments. Not used. #' @return A \code{data.frame} with the autocorrelation and/or potential #' scale reduction factor per iteration of the MICE algorithm. #' @details #' The argument \code{diagnostic} can be length-1 character, which is #' matched to one of the following keywords: #' \describe{ #' \item{\code{"all"}}{computes both the lag-1 autocorrelation as well as #' the potential scale reduction factor (cf. Vehtari et al., 2021) per #' iteration of the MICE algorithm;} #' \item{\code{"ac"}}{computes only the autocorrelation per iteration;} #' \item{\code{"psrf"}}{computes only the potential scale reduction factor #' per iteration;} #' \item{\code{"gr"}}{same as \code{psrf}, the potential scale reduction #' factor is colloquially called the Gelman-Rubin diagnostic.} #' } #' In the unlikely event of perfect convergence, the autocorrelation equals #' zero and the potential scale reduction factor equals one. To interpret #' the convergence diagnostic(s) in the output of the function, it is #' recommended to plot the diagnostics (ac and/or psrf) against the #' iteration number (.it) per imputed variable (vrb). A persistently #' decreasing trend across iterations indicates potential non-convergence. #' #' @seealso \code{\link{mice}}, \code{\link[=mids-class]{mids}} #' @keywords none #' @references Vehtari, A., Gelman, A., Simpson, D., Carpenter, B., & Burkner, #' P.-C. (2021). Rank-Normalization, Folding, and Localization: An Improved #' R for Assessing Convergence of MCMC. Bayesian Analysis, 1(1), 1-38. #' https://doi.org/10.1214/20-BA1221 #' @examples #' \dontrun{ #' # obtain imputed data set #' imp <- mice(nhanes2, print = FALSE) #' # compute convergence diagnostics #' convergence(imp) #' } #' @export convergence <- function(data, diagnostic = "all", parameter = "mean", ...) { # process inputs install.on.demand("rstan", ...) install.on.demand("purrr", ...) if (!is.mids(data)) { stop("'data' not of class 'mids'") } if (data$m < 2) { stop("the number of imputations should be at least two (m > 1)") } if (data$iteration < 3) { stop("the number of iterations should be at least three (maxit > 2)") } if (is.null(data$chainMean)) { stop("no convergence diagnostics found", call. = FALSE) } if (parameter != "mean" & parameter != "sd") { stop("'parameter' not recognized, please use 'mean' or 'sd'") } vrbs <- names(data$data) p <- length(vrbs) m <- as.integer(data$m) t <- as.integer(data$iteration) out <- expand.grid(.it = seq_len(t), vrb = vrbs) # extract chain means or chain standard deviations if (parameter == "mean") { param <- lapply(seq(p), function(x) { aperm(data$chainMean[vrbs, , , drop = FALSE], c(2, 3, 1))[, , x] }) } if (parameter == "sd") { param <- lapply(seq(p), function(x) { aperm(sqrt(data$chainVar)[vrbs, , , drop = FALSE], c(2, 3, 1))[, , x] }) } names(param) <- vrbs # compute autocorrelation if (diagnostic == "all" | diagnostic == "ac") { ac <- purrr::map(vrbs, function(.vrb) { c(NA, dplyr::cummean(dplyr::coalesce( purrr::map_dbl(2:t, function(.itr) { suppressWarnings(stats::cor( param[[.vrb]][.itr - 1, ], param[[.vrb]][.itr, ], use = "pairwise.complete.obs" )) }), 0 ))) + 0 * param[[.vrb]][, 1] }) out <- base::cbind(out, ac = unlist(ac)) } # compute potential scale reduction factor if (diagnostic == "all" | diagnostic == "psrf" | diagnostic == "gr") { psrf <- purrr::map_dfr(param, ~ { purrr::map_dfr(1:t, function(.itr) { data.frame(psrf = rstan::Rhat(.[1:.itr, ])) }) }) out <- base::cbind(out, psrf) } out[is.nan(out)] <- NA return(out) } # function to extend is.nan() to data.frame objects is.nan.data.frame <- function(x) do.call(cbind, lapply(x, is.nan)) mice/R/get.df.R0000644000176200001440000000245614330031606012665 0ustar liggesusersget.dfcom <- function(object, dfcom = NULL) { # residual degrees of freedom of model fitted on hypothetically complete data # assumed to be the same across imputations if (!is.null(dfcom)) { return(max(dfcom, 1L)) } glanced <- get.glanced(object) # try to extract from df.residual if (!is.null(glanced)) { if ("df.residual" %in% colnames(glanced)) { return(glanced$df.residual[1L]) } } # try n - p (or nevent - p for Cox model) if (!is.null(glanced)) { if ("nobs" %in% colnames(glanced)) { model <- getfit(object, 1L) if (inherits(model, "coxph")) { return(max(model$nevent - length(coef(model)), 1L)) } return(max(glanced$nobs[1L] - length(coef(model)), 1L)) } } # not found warning("Infinite sample size assumed.") Inf } get.glanced <- function(object) { if (!is.list(object)) stop("Argument 'object' not a list", call. = FALSE) object <- as.mira(object) glanced <- try(data.frame(summary(getfit(object), type = "glance")), silent = TRUE) if (inherits(glanced, "data.frame")) { # nobs is needed for pool.r.squared # broom <= 0.5.6 does not supply it if (!"nobs" %in% colnames(glanced)) { glanced$nobs <- length(stats::residuals(object$analyses[[1]])) } } else { glanced <- NULL } glanced } mice/R/mids2spss.R0000644000176200001440000000627514330031606013450 0ustar liggesusers#' Export \code{mids} object to SPSS #' #' Converts a \code{mids} object into a format recognized by SPSS, and writes #' the data and the SPSS syntax files. #' #' This function automates most of the work needed to export a \code{mids} #' object to SPSS. It uses \code{haven::write_sav()} to facilitate the export to an #' SPSS \code{.sav} or \code{.zsav} file. #' #' Below are some things to pay attention to. #' #' The \code{SPSS} syntax file has the proper file names and separators set, so #' in principle it should run and read the data without alteration. \code{SPSS} #' is more strict than \code{R} with respect to the paths. Always use the full #' path, otherwise \code{SPSS} may not be able to find the data file. #' #' Factors in \code{R} translate into categorical variables in \code{SPSS}. The #' internal coding of factor levels used in \code{R} is exported. This is #' generally acceptable for \code{SPSS}. However, when the data are to be #' combined with existing \code{SPSS} data, watch out for any changes in the #' factor levels codes. #' #' \code{SPSS} will recognize the data set as a multiply imputed data set, and #' do automatic pooling in procedures where that is supported. Note however that #' pooling is an extra option only available to those who license the #' \code{MISSING VALUES} module. Without this license, \code{SPSS} will still #' recognize the structure of the data, but it will not pool the multiply imputed #' estimates into a single inference. #' #' @param imp The \code{imp} argument is an object of class \code{mids}, #' typically produced by the \code{mice()} function. #' @param filename A character string describing the name of the output data #' file and its extension. #' @param path A character string containing the path of the output file. The #' value in \code{path} is appended to \code{filedat}. By #' default, files are written to the current \code{R} working directory. If #' \code{path=NULL} then no file path appending is done. #' @param compress A logical flag stating whether the resulting SPSS set should #' be a compressed \code{.zsav} file. #' @param silent A logical flag stating whether the location of the saved file should be #' printed. #' @return The return value is \code{NULL}. #' @author Gerko Vink, dec 2020. #' @seealso \code{\link[=mids-class]{mids}} #' @keywords manip #' @export mids2spss <- function(imp, filename = "midsdata", path = getwd(), compress = FALSE, silent = FALSE) { .id <- NULL # avoid empty global variable binding install.on.demand("haven") # extract a completed dataset (long format - all imputations stacked) # rename the .imp variable to imputation_, such that SPSS can identify a multiply imputed dataset out <- imp %>% complete(action = "long", include = TRUE) %>% dplyr::select(-.id) %>% dplyr::rename("Imputation_" = ".imp") # write the data to a .sav file with package haven and print (optional) the saved location if (!compress) { whereto <- paste(path, "/", filename, ".sav", sep = "") } else { whereto <- paste(path, "/", filename, ".zsav", sep = "") } haven::write_sav(data = out, path = whereto, compress = compress) if (!silent) { cat("SPSS file written to", whereto, "\n") } } mice/R/densityplot.R0000644000176200001440000003124314330031647014075 0ustar liggesusers#' Density plot of observed and imputed data #' #' Plotting methods for imputed data using \pkg{lattice}. \code{densityplot} #' produces plots of the densities. The function #' automatically separates the observed and imputed data. The #' functions extend the usual features of \pkg{lattice}. #' #' The argument \code{na.groups} may be used to specify (combinations of) #' missingness in any of the variables. The argument \code{groups} can be used #' to specify groups based on the variable values themselves. Only one of both #' may be active at the same time. When both are specified, \code{na.groups} #' takes precedence over \code{groups}. #' #' Use the \code{subset} and \code{na.groups} together to plots parts of the #' data. For example, select the first imputed data set by by #' \code{subset=.imp==1}. #' #' Graphical parameters like \code{col}, \code{pch} and \code{cex} can be #' specified in the arguments list to alter the plotting symbols. If #' \code{length(col)==2}, the color specification to define the observed and #' missing groups. \code{col[1]} is the color of the 'observed' data, #' \code{col[2]} is the color of the missing or imputed data. A convenient color #' choice is \code{col=mdc(1:2)}, a transparent blue color for the observed #' data, and a transparent red color for the imputed data. A good choice is #' \code{col=mdc(1:2), pch=20, cex=1.5}. These choices can be set for the #' duration of the session by running \code{mice.theme()}. #' #' @aliases densityplot #' @param x A \code{mids} object, typically created by \code{mice()} or #' \code{mice.mids()}. #' @param data Formula that selects the data to be plotted. This argument #' follows the \pkg{lattice} rules for \emph{formulas}, describing the primary #' variables (used for the per-panel display) and the optional conditioning #' variables (which define the subsets plotted in different panels) to be used #' in the plot. #' #' The formula is evaluated on the complete data set in the \code{long} form. #' Legal variable names for the formula include \code{names(x$data)} plus the #' two administrative factors \code{.imp} and \code{.id}. #' #' \bold{Extended formula interface:} The primary variable terms (both the LHS #' \code{y} and RHS \code{x}) may consist of multiple terms separated by a #' \sQuote{+} sign, e.g., \code{y1 + y2 ~ x | a * b}. This formula would be #' taken to mean that the user wants to plot both \code{y1 ~ x | a * b} and #' \code{y2 ~ x | a * b}, but with the \code{y1 ~ x} and \code{y2 ~ x} in #' \emph{separate panels}. This behavior differs from standard \pkg{lattice}. #' \emph{Only combine terms of the same type}, i.e. only factors or only #' numerical variables. Mixing numerical and categorical data occasionally #' produces odds labeling of vertical axis. #' #' The function \code{densityplot} does not use the \code{y} terms in the #' formula. Density plots for \code{x1} and \code{x2} are requested as \code{~ #' x1 + x2}. #' @param na.groups An expression evaluating to a logical vector indicating #' which two groups are distinguished (e.g. using different colors) in the #' display. The environment in which this expression is evaluated in the #' response indicator \code{is.na(x$data)}. #' #' The default \code{na.group = NULL} contrasts the observed and missing data #' in the LHS \code{y} variable of the display, i.e. groups created by #' \code{is.na(y)}. The expression \code{y} creates the groups according to #' \code{is.na(y)}. The expression \code{y1 & y2} creates groups by #' \code{is.na(y1) & is.na(y2)}, and \code{y1 | y2} creates groups as #' \code{is.na(y1) | is.na(y2)}, and so on. #' @param groups This is the usual \code{groups} arguments in \pkg{lattice}. It #' differs from \code{na.groups} because it evaluates in the completed data #' \code{data.frame(complete(x, "long", inc=TRUE))} (as usual), whereas #' \code{na.groups} evaluates in the response indicator. See #' \code{\link{xyplot}} for more details. When both \code{na.groups} and #' \code{groups} are specified, \code{na.groups} takes precedence, and #' \code{groups} is ignored. #' @param plot.points A logical used in \code{densityplot} that signals whether #' the points should be plotted. #' @param theme A named list containing the graphical parameters. The default #' function \code{mice.theme} produces a short list of default colors, line #' width, and so on. The extensive list may be obtained from #' \code{trellis.par.get()}. Global graphical parameters like \code{col} or #' \code{cex} in high-level calls are still honored, so first experiment with #' the global parameters. Many setting consists of a pair. For example, #' \code{mice.theme} defines two symbol colors. The first is for the observed #' data, the second for the imputed data. The theme settings only exist during #' the call, and do not affect the trellis graphical parameters. #' @param mayreplicate A logical indicating whether color, line widths, and so #' on, may be replicated. The graphical functions attempt to choose #' "intelligent" graphical parameters. For example, the same color can be #' replicated for different element, e.g. use all reds for the imputed data. #' Replication may be switched off by setting the flag to \code{FALSE}, in order #' to allow the user to gain full control. #' @param thicker Used in \code{densityplot}. Multiplication factor of the line #' width of the observed density. \code{thicker=1} uses the same thickness for #' the observed and imputed data. #' @param as.table See \code{\link[lattice:xyplot]{xyplot}}. #' @param panel See \code{\link{xyplot}}. #' @param default.prepanel See \code{\link[lattice:xyplot]{xyplot}}. #' @param outer See \code{\link[lattice:xyplot]{xyplot}}. #' @param allow.multiple See \code{\link[lattice:xyplot]{xyplot}}. #' @param drop.unused.levels See \code{\link[lattice:xyplot]{xyplot}}. #' @param subscripts See \code{\link[lattice:xyplot]{xyplot}}. #' @param subset See \code{\link[lattice:xyplot]{xyplot}}. #' @param \dots Further arguments, usually not directly processed by the #' high-level functions documented here, but instead passed on to other #' functions. #' @return The high-level functions documented here, as well as other high-level #' Lattice functions, return an object of class \code{"trellis"}. The #' \code{\link[lattice:update.trellis]{update}} method can be used to #' subsequently update components of the object, and the #' \code{\link[lattice:print.trellis]{print}} method (usually called by default) #' will plot it on an appropriate plotting device. #' @note The first two arguments (\code{x} and \code{data}) are reversed #' compared to the standard Trellis syntax implemented in \pkg{lattice}. This #' reversal was necessary in order to benefit from automatic method dispatch. #' #' In \pkg{mice} the argument \code{x} is always a \code{mids} object, whereas #' in \pkg{lattice} the argument \code{x} is always a formula. #' #' In \pkg{mice} the argument \code{data} is always a formula object, whereas in #' \pkg{lattice} the argument \code{data} is usually a data frame. #' #' All other arguments have identical interpretation. #' #' \code{densityplot} errs on empty groups, which occurs if all observations in #' the subgroup contain \code{NA}. The relevant error message is: \code{Error in #' density.default: ... need at least 2 points to select a bandwidth #' automatically}. There is yet no workaround for this problem. Use the more #' robust \code{bwplot} or \code{stripplot} as a replacement. #' @author Stef van Buuren #' @seealso \code{\link{mice}}, \code{\link{xyplot}}, \code{\link{stripplot}}, #' \code{\link{bwplot}}, \code{\link{lattice}} for an overview of the #' package, as well as \code{\link[lattice:histogram]{densityplot}}, #' \code{\link[lattice:panel.densityplot]{panel.densityplot}}, #' \code{\link[lattice:print.trellis]{print.trellis}}, #' \code{\link[lattice:trellis.par.get]{trellis.par.set}} #' @references Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data #' Visualization with R}, Springer. #' #' van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: Multivariate #' Imputation by Chained Equations in \code{R}. \emph{Journal of Statistical #' Software}, \bold{45}(3), 1-67. \doi{10.18637/jss.v045.i03} #' @keywords hplot #' @examples #' imp <- mice(boys, maxit = 1) #' #' ### density plot of head circumference per imputation #' ### blue is observed, red is imputed #' densityplot(imp, ~ hc | .imp) #' #' ### All combined in one panel. #' densityplot(imp, ~hc) #' @export densityplot.mids <- function(x, data, na.groups = NULL, groups = NULL, as.table = TRUE, plot.points = FALSE, theme = mice.theme(), mayreplicate = TRUE, thicker = 2.5, allow.multiple = TRUE, outer = TRUE, drop.unused.levels = lattice::lattice.getOption("drop.unused.levels"), panel = lattice::lattice.getOption("panel.densityplot"), default.prepanel = lattice::lattice.getOption("prepanel.default.densityplot"), ..., subscripts = TRUE, subset = TRUE) { call <- match.call() if (!is.mids(x)) stop("Argument 'x' must be a 'mids' object") ## unpack data and response indicator cd <- data.frame(complete(x, "long", include = TRUE)) r <- as.data.frame(is.na(x$data)) ## evaluate na.group in response indicator nagp <- eval(expr = substitute(na.groups), envir = r, enclos = parent.frame()) if (is.expression(nagp)) nagp <- eval(expr = nagp, envir = r, enclos = parent.frame()) ## evaluate groups in imputed data ngp <- eval(expr = substitute(groups), envir = cd, enclos = parent.frame()) if (is.expression(ngp)) ngp <- eval(expr = ngp, envir = cd, enclos = parent.frame()) groups <- ngp ## evaluate subset in imputed data ss <- eval(expr = substitute(subset), envir = cd, enclos = parent.frame()) if (is.expression(ss)) ss <- eval(expr = ss, envir = cd, enclos = parent.frame()) subset <- ss ## evaluate further arguments before parsing dots <- list(...) args <- list( panel = panel, default.prepanel = default.prepanel, allow.multiple = allow.multiple, outer = outer, drop.unused.levels = drop.unused.levels, subscripts = subscripts, as.table = as.table, plot.points = plot.points ) ## create formula if not given (in call$data !) vnames <- names(cd)[-seq_len(2)] allfactors <- vapply(cd, is.factor, logical(1))[-seq_len(2)] if (missing(data)) { vnames <- vnames[!allfactors & x$nmis > 2 & x$nmis < nrow(x$data) - 1] formula <- as.formula(paste("~", paste(vnames, collapse = "+", sep = ""), sep = "")) } else { formula <- data } ## determine the y-variables form <- lattice::latticeParseFormula( model = formula, data = cd, subset = subset, groups = groups, multiple = allow.multiple, outer = outer, subscripts = TRUE, drop = drop.unused.levels ) xnames <- unlist(lapply(strsplit(form$right.name, " \\+ "), rm.whitespace)) ## Jul2011 ## calculate selection vector gp nona <- is.null(call$na.groups) if (!is.null(call$groups) && nona) { gp <- call$groups } else { if (nona) { for (i in seq_along(xnames)) { xvar <- xnames[i] select <- cd$.imp != 0 & !r[, xvar] cd[select, xvar] <- NA } gp <- rep.int(cd$.imp, length(xnames)) } else { for (i in seq_along(xnames)) { xvar <- xnames[i] select <- cd$.imp != 0 & !nagp cd[select, xvar] <- NA } gp <- rep.int(cd$.imp, length(xnames)) } } ## replicate color 2 if group=.imp is part of xnames mustreplicate <- !(!is.null(call$groups) && nona) && mayreplicate if (mustreplicate) { theme$superpose.line$col <- rep(theme$superpose.line$col[seq_len(2)], c(1, x$m)) theme$superpose.line$lwd <- rep(c(theme$superpose.line$lwd[1] * thicker, theme$superpose.line$lwd[1]), c(1, x$m)) theme$superpose.symbol$col <- rep(theme$superpose.symbol$col[seq_len(2)], c(1, x$m)) theme$superpose.symbol$pch <- c(NA, 49:(49 + x$m - 1)) } ## change axis defaults of extended formula interface if (is.null(call$xlab)) { args$xlab <- "" if (length(xnames) == 1) args$xlab <- xnames } if (is.null(call$scales)) { args$scales <- list() if (length(xnames) > 1) { args$scales <- list(x = list(relation = "free"), y = list(relation = "free")) } } ## ready args <- c( x = formula, data = list(cd), groups = list(gp), args, dots, subset = call$subset ) ## go tp <- do.call("densityplot", args) tp <- update(tp, par.settings = theme) return(tp) } mice/R/filter.R0000644000176200001440000001115614334447167013021 0ustar liggesusers#' @importFrom dplyr filter #' @export dplyr::filter #' Subset rows of a \code{mids} object #' #' This function takes a \code{mids} object and returns a new #' \code{mids} object that pertains to the subset of the data #' identified by the expression in \dots. The expression may use #' column values from the incomplete data in \code{.data$data}. #' #' @param .data A \code{mids} object. #' @param ... Expressions that return a #' logical value, and are defined in terms of the variables in \code{.data$data}. #' If multiple expressions are specified, they are combined with the \code{&} operator. #' Only rows for which all conditions evaluate to \code{TRUE} are kept. #' @inheritParams dplyr::filter #' @seealso \code{\link[dplyr]{filter}} #' @return An S3 object of class \code{mids} #' @note The function calculates a logical vector \code{include} of length \code{nrow(.data$data)}. #' The function constructs the elements of the filtered \code{mids} object as follows: #' \tabular{ll}{ #' \code{data} \tab Select rows in \code{.data$data} for which \code{include == TRUE}\cr #' \code{imp} \tab Select rows each imputation \code{data.frame} in \code{.data$imp} for which \code{include == TRUE}\cr #' \code{m} \tab Equals \code{.data$m}\cr #' \code{where} \tab Select rows in \code{.data$where} for which \code{include == TRUE}\cr #' \code{blocks} \tab Equals \code{.data$blocks}\cr #' \code{call} \tab Equals \code{.data$call}\cr #' \code{nmis} \tab Recalculate \code{nmis} based on the selected \code{data} rows\cr #' \code{method} \tab Equals \code{.data$method}\cr #' \code{predictorMatrix} \tab Equals \code{.data$predictorMatrix}\cr #' \code{visitSequence} \tab Equals \code{.data$visitSequence}\cr #' \code{formulas} \tab Equals \code{.data$formulas}\cr #' \code{post} \tab Equals \code{.data$post}\cr #' \code{blots} \tab Equals \code{.data$blots}\cr #' \code{ignore} \tab Select positions in \code{.data$ignore} for which \code{include == TRUE}\cr #' \code{seed} \tab Equals \code{.data$seed}\cr #' \code{iteration} \tab Equals \code{.data$iteration}\cr #' \code{lastSeedValue} \tab Equals \code{.data$lastSeedValue}\cr #' \code{chainMean} \tab Set to \code{NULL}\cr #' \code{chainVar} \tab Set to \code{NULL}\cr #' \code{loggedEvents} \tab Equals \code{.data$loggedEvents}\cr #' \code{version} \tab Replaced with current version\cr #' \code{date} \tab Replaced with current date #' } #' @author Patrick Rockenschaub #' @keywords manip #' @examples #' imp <- mice(nhanes, m = 2, maxit = 1, print = FALSE) #' #' # example with external logical vector #' imp_f <- filter(imp, c(rep(TRUE, 13), rep(FALSE, 12))) #' #' nrow(complete(imp)) #' nrow(complete(imp_f)) #' #' # example with calculated include vector #' imp_f2 <- filter(imp, age >= 2 & hyp == 1) #' nrow(complete(imp_f2)) # should be 5 #' @export filter.mids <- function(.data, ..., .preserve = FALSE) { if (!is.mids(.data)) { stop("Argument `.data` should be of class mids.") } rows <- .data$data %>% mutate(.rownumber = row_number()) %>% filter(...) %>% pull(".rownumber") include <- 1L:nrow(.data$data) %in% rows # Components that stay the same after filtering m <- .data$m call <- .data$call blocks <- .data$blocks method <- .data$method predictorMatrix <- .data$predictorMatrix visitSequence <- .data$visitSequence formulas <- .data$formulas blots <- .data$blots post <- .data$post seed <- .data$seed iteration <- .data$iteration lastSeedValue <- .data$lastSeedValue loggedEvents <- .data$loggedEvents # Components that need to be subset data <- .data$data[include, ] ignore <- .data$ignore[include] where <- .data$where[include, ] imp <- vector("list", length(.data$imp)) names(imp) <- names(.data$imp) for (i in names(.data$imp)) { wy <- .data$where[, i] iy <- .data$where[, i] & include imp[[i]] <- .data$imp[[i]][iy[wy], , drop = FALSE] } # Components that need to be recalculated/reset nmis <- colSums(is.na(data)) chainMean <- NULL chainVar <- NULL # Create subset mids object midsobj <- list( data = data, imp = imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, ignore = ignore, seed = seed, iteration = iteration, lastSeedValue = lastSeedValue, chainMean = chainMean, chainVar = chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date() ) oldClass(midsobj) <- "mids" midsobj } mice/R/internal.R0000644000176200001440000000737414334522175013350 0ustar liggesuserskeep.in.model <- function(y, ry, x, wy) { (complete.cases(y, x) & ry) | (complete.cases(x) & wy) } impute.with.na <- function(x, wy) !complete.cases(x) & wy check.df <- function(x, y, ry) { # if needed, writes the df warning message to the log df <- sum(ry) - ncol(x) - 1 mess <- paste("df set to 1. # observed cases:", sum(ry), " # predictors:", ncol(x) + 1) if (df < 1 && sum(ry) > 0) { updateLog(out = mess, frame = 4) } } remove.lindep <- function(x, y, ry, eps = 1e-04, maxcor = 0.99, allow.na = TRUE, frame = 4, ...) { # returns a logical vector of length ncol(x) if (ncol(x) == 0) { return(NULL) } # setting eps = 0 bypasses remove.lindep() if (eps == 0) { return(rep.int(TRUE, ncol(x))) } if (eps < 0) { stop("\n Argument 'eps' must be positive.") } # Keep all predictors if we allow imputation of fully missing y if (allow.na && sum(ry) == 0) { return(rep.int(TRUE, ncol(x))) } xobs <- x[ry, , drop = FALSE] yobs <- as.numeric(y[ry]) if (var(yobs) < eps) { return(rep(FALSE, ncol(xobs))) } keep <- unlist(apply(xobs, 2, var) > eps) keep[is.na(keep)] <- FALSE highcor <- suppressWarnings(unlist(apply(xobs, 2, cor, yobs) < maxcor)) keep <- keep & highcor if (all(!keep)) { updateLog( out = "All predictors are constant or have too high correlation.", frame = frame ) } # no need to calculate correlations, so return k <- sum(keep) if (k <= 1L) { return(keep) } # at most one TRUE # correlation between x's cx <- cor(xobs[, keep, drop = FALSE], use = "all.obs") eig <- eigen(cx, symmetric = TRUE) ncx <- cx while (eig$values[k] / eig$values[1] < eps) { j <- seq_len(k)[order(abs(eig$vectors[, k]), decreasing = TRUE)[1]] keep[keep][j] <- FALSE ncx <- cx[keep[keep], keep[keep], drop = FALSE] k <- k - 1 eig <- eigen(ncx) } if (!all(keep)) { out <- paste(dimnames(x)[[2]][!keep], collapse = ", ") updateLog(out = out, frame = frame) } return(keep) } ## make list of collinear variables to remove find.collinear <- function(x, threshold = 0.999, ...) { nvar <- ncol(x) x <- data.matrix(x) r <- !is.na(x) nr <- apply(r, 2, sum, na.rm = TRUE) ord <- order(nr, decreasing = TRUE) xo <- x[, ord, drop = FALSE] ## SvB 24mar2011 varnames <- dimnames(xo)[[2]] z <- suppressWarnings(cor(xo, use = "pairwise.complete.obs")) hit <- outer(seq_len(nvar), seq_len(nvar), "<") & (abs(z) >= threshold) out <- apply(hit, 2, any, na.rm = TRUE) return(varnames[out]) } updateLog <- function(out = NULL, meth = NULL, frame = 1) { # find structures defined a mice() level pos_state <- ma_exists("state", frame)$pos pos_loggedEvents <- ma_exists("loggedEvents", frame)$pos s <- get("state", pos_state) r <- get("loggedEvents", pos_loggedEvents) rec <- data.frame( it = s$it, im = s$im, dep = s$dep, meth = if (is.null(meth)) s$meth else meth, out = if (is.null(out)) "" else out ) if (s$log) { rec <- rbind(r, rec) } s$log <- TRUE assign("state", s, pos = pos_state, inherits = TRUE) assign("loggedEvents", rec, pos = pos_loggedEvents, inherits = TRUE) return() } sym <- function(x) { (x + t(x)) / 2 } # This helper function was copied from # https://github.com/alexanderrobitzsch/miceadds/blob/master/R/ma_exists.R ma_exists <- function(x, pos, n_index = 1:8) { n_index <- n_index + 1 is_there <- exists(x, where = pos) obj <- NULL if (is_there) { obj <- get(x, pos) } if (!is_there) { for (nn in n_index) { pos <- parent.frame(n = nn) is_there <- exists(x, where = pos) if (is_there) { obj <- get(x, pos) break } } } #--- output res <- list(is_there = is_there, obj = obj, pos = pos) return(res) } mice/R/mice.impute.norm.R0000644000176200001440000002005014330031606014675 0ustar liggesusers#' Imputation by Bayesian linear regression #' #' Calculates imputations for univariate missing data by Bayesian linear #' regression, also known as the normal model. #' #' @aliases mice.impute.norm norm #' @inheritParams mice.impute.pmm #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @author Stef van Buuren, Karin Groothuis-Oudshoorn #' @details #' Imputation of \code{y} by the normal model by the method defined by #' Rubin (1987, p. 167). The procedure is as follows: #' #' \enumerate{ #' \item{Calculate the cross-product matrix \eqn{S=X_{obs}'X_{obs}}.} #' \item{Calculate \eqn{V = (S+{diag}(S)\kappa)^{-1}}, with some small ridge #' parameter \eqn{\kappa}.} #' \item{Calculate regression weights \eqn{\hat\beta = VX_{obs}'y_{obs}.}} #' \item{Draw a random variable \eqn{\dot g \sim \chi^2_\nu} with \eqn{\nu=n_1 - q}.} #' \item{Calculate \eqn{\dot\sigma^2 = (y_{obs} - X_{obs}\hat\beta)'(y_{obs} - X_{obs}\hat\beta)/\dot g.}} #' \item{Draw \eqn{q} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_1}.} #' \item{Calculate \eqn{V^{1/2}} by Cholesky decomposition.} #' \item{Calculate \eqn{\dot\beta = \hat\beta + \dot\sigma\dot z_1 V^{1/2}}.} #' \item{Draw \eqn{n_0} independent \eqn{N(0,1)} variates in vector \eqn{\dot z_2}.} #' \item{Calculate the \eqn{n_0} values \eqn{y_{imp} = X_{mis}\dot\beta + \dot z_2\dot\sigma}.} #' } #' #' Using \code{mice.impute.norm} for all columns emulates Schafer's NORM method (Schafer, 1997). #' @references #' Rubin, D.B (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley & Sons. #' #' Schafer, J.L. (1997). Analysis of incomplete multivariate data. London: Chapman & Hall. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.norm <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) parm <- .norm.draw(y, ry, x, ...) x[wy, ] %*% parm$beta + rnorm(sum(wy)) * parm$sigma } #' Draws values of beta and sigma by Bayesian linear regression #' #' This function draws random values of beta and sigma under the Bayesian #' linear regression model as described in Rubin (1987, p. 167). This function #' can be called by user-specified imputation functions. #' #' @aliases norm.draw .norm.draw #' @param y Incomplete data vector of length \code{n} #' @param ry Vector of missing data pattern (\code{FALSE}=missing, #' \code{TRUE}=observed) #' @param x Matrix (\code{n} x \code{p}) of complete covariates. #' @param rank.adjust Argument that specifies whether \code{NA}'s in the #' coefficients need to be set to zero. Only relevant when \code{ls.meth = "qr"} #' AND the predictor matrix is rank-deficient. #' @param ... Other named arguments. #' @return A \code{list} containing components \code{coef} (least squares estimate), #' \code{beta} (drawn regression weights) and \code{sigma} (drawn value of the #' residual standard deviation). #' @references #' Rubin, D.B. (1987). \emph{Multiple imputation for nonresponse in surveys}. New York: Wiley. #' @author Gerko Vink, 2018, for this version, based on earlier versions written #' by Stef van Buuren, Karin Groothuis-Oudshoorn, 2017 #' @export norm.draw <- function(y, ry, x, rank.adjust = TRUE, ...) { return(.norm.draw(y, ry, x, rank.adjust = TRUE, ...)) } ###' @rdname norm.draw ###' @export .norm.draw <- function(y, ry, x, rank.adjust = TRUE, ...) { p <- estimice(x[ry, , drop = FALSE], y[ry], ...) sigma.star <- sqrt(sum((p$r)^2) / rchisq(1, p$df)) beta.star <- p$c + (t(chol(sym(p$v))) %*% rnorm(ncol(x))) * sigma.star parm <- list(p$c, beta.star, sigma.star, p$ls.meth) names(parm) <- c("coef", "beta", "sigma", "estimation") if (any(is.na(parm$coef)) & rank.adjust) { parm$coef[is.na(parm$coef)] <- 0 parm$beta[is.na(parm$beta)] <- 0 } parm } #' Computes least squares parameters #' #' This function computes least squares estimates, variance/covariance matrices, #' residuals and degrees of freedom according to ridge regression, QR decomposition #' or Singular Value Decomposition. This function is internally called by .norm.draw(), #' but can be called by any user-specified imputation function. #' #' When calculating the inverse of the crossproduct of the predictor matrix, #' problems may arise. For example, taking the inverse is not possible when the #' predictor matrix is rank deficient, or when the estimation problem is #' computationally singular. This function detects such error cases and #' automatically falls back to adding a ridge penalty to the diagonal of the #' crossproduct to allow for proper calculation of the inverse. #' #' @note #' This functions adds a star to variable names in the mice iteration #' history to signal that a ridge penalty was added. In that case, it #' also adds an entry to \code{loggedEvents}. #' #' @aliases estimice #' @param x Matrix (\code{n} x \code{p}) of complete covariates. #' @param y Incomplete data vector of length \code{n} #' @param ls.meth the method to use for obtaining the least squares estimates. By #' default parameters are drawn by means of QR decomposition. #' @param ridge A small numerical value specifying the size of the ridge used. #' The default value \code{ridge = 1e-05} represents a compromise between stability #' and unbiasedness. Decrease \code{ridge} if the data contain many junk variables. #' Increase \code{ridge} for highly collinear data. #' @param ... Other named arguments. #' @return A \code{list} containing components \code{c} (least squares estimate), #' \code{r} (residuals), \code{v} (variance/covariance matrix) and \code{df} #' (degrees of freedom). #' @author Gerko Vink, 2018 #' @export estimice <- function(x, y, ls.meth = "qr", ridge = 1e-05, ...) { df <- max(length(y) - ncol(x), 1) if (ls.meth == "qr") { qr <- lm.fit(x = x, y = y) c <- t(qr$coef) f <- qr$fitted.values r <- t(qr$residuals) v <- try(solve(as.matrix(crossprod(qr.R(qr$qr)))), silent = TRUE) if (inherits(v, "try-error")) { xtx <- as.matrix(crossprod(qr.R(qr$qr))) # calculate ridge penalty pen <- diag(xtx) * ridge # add ridge penalty to allow inverse of v v <- solve(xtx + diag(pen)) mess <- paste0( "mice detected that your data are (nearly) multi-collinear.\n", "It applied a ridge penalty to continue calculations, but the results can be unstable.\n", "Does your dataset contain duplicates, linear transformation, or factors with unique respondent names?" ) updateLog(out = mess, frame = 6) if (get.printFlag()) { cat("*") } # indicator of added ridge penalty in the printed iteration history } return(list(c = t(c), r = t(r), v = v, df = df, ls.meth = ls.meth)) } if (ls.meth == "ridge") { xtx <- crossprod(x) pen <- ridge * diag(xtx) if (length(pen) == 1) { pen <- matrix(pen) } v <- solve(xtx + diag(pen)) c <- t(y) %*% x %*% v r <- y - x %*% t(c) return(list(c = t(c), r = r, v = v, df = df, ls.meth = ls.meth)) } if (ls.meth == "svd") { s <- svd(x) c <- s$v %*% ((t(s$u) %*% y) / s$d) f <- x %*% c r <- f - y v <- try(solve(s$v %*% diag(s$d)^2 %*% t(s$v)), silent = TRUE) if (inherits(v, "try-error")) { xtx <- s$v %*% diag(s$d)^2 %*% t(s$v) # calculate ridge penalty pen <- diag(xtx) * ridge # add ridge penalty to allow inverse of v v <- solve(xtx + diag(pen)) mess <- paste0( "mice detected that your data are (nearly) multi-collinear.\n", "It applied a ridge penalty to continue calculations, but the results can be unstable.\n", "Does your dataset contain duplicates, linear transformation, or factors with unique respondent names?" ) updateLog(out = mess, frame = 6) if (get.printFlag()) { cat("*") } # indicator of added ridge penalty in the printed iteration history } return(list(c = c, r = r, v = v, df = df, ls.meth = ls.meth)) } } get.printFlag <- function(start = 4) { while (inherits( try(get("printFlag", parent.frame(start)), silent = TRUE), "try-error" )) { start <- start + 1 } get("printFlag", parent.frame(start)) } mice/R/mids2mplus.R0000644000176200001440000000533614330031606013615 0ustar liggesusers#' Export \code{mids} object to Mplus #' #' Converts a \code{mids} object into a format recognized by Mplus, and writes #' the data and the Mplus input files #' #' This function automates most of the work needed to export a \code{mids} #' object to \code{Mplus}. The function writes the multiple imputation datasets, #' the file that contains the names of the multiple imputation data sets and an #' \code{Mplus} input file. The \code{Mplus} input file has the proper file #' names, so in principle it should run and read the data without alteration. #' \code{Mplus} will recognize the data set as a multiply imputed data set, and #' do automatic pooling in procedures where that is supported. #' #' @param imp The \code{imp} argument is an object of class \code{mids}, #' typically produced by the \code{mice()} function. #' @param file.prefix A character string describing the prefix of the output #' data files. #' @param path A character string containing the path of the output file. By #' default, files are written to the current \code{R} working directory. #' @param sep The separator between the data fields. #' @param dec The decimal separator for numerical data. #' @param silent A logical flag stating whether the names of the files should be #' printed. #' @return The return value is \code{NULL}. #' @author Gerko Vink, 2011. #' @seealso \code{\link[=mids-class]{mids}}, \code{\link{mids2spss}} #' @keywords manip #' @export mids2mplus <- function(imp, file.prefix = "imp", path = getwd(), sep = "\t", dec = ".", silent = FALSE) { m <- imp$m file.list <- matrix(0, m, 1) script <- matrix(0, 3, 1) for (i in seq_len(m)) { write.table(complete(imp, i), file = file.path(path, paste0(file.prefix, i, ".dat")), sep = sep, dec = dec, col.names = FALSE, row.names = FALSE ) file.list[i, ] <- paste0(file.prefix, i, ".dat") } write.table(file.list, file = file.path(path, paste0(file.prefix, "list.dat")), sep = sep, dec = dec, col.names = FALSE, row.names = FALSE, quote = FALSE ) names <- paste(colnames(complete(imp, 1)), collapse = " ") script[1, ] <- paste0("DATA: FILE IS ", file.prefix, "list.dat;") script[2, ] <- "TYPE = IMPUTATION;" script[3, ] <- paste0("VARIABLE: NAMES ARE ", names, ";") write.table(script, file = file.path(path, paste0(file.prefix, "list.inp")), sep = sep, dec = dec, col.names = FALSE, row.names = FALSE, quote = FALSE ) if (!silent) { cat( "Data values written to", file.path(path, paste0(file.prefix, 1, ".dat")), "through", paste0(file.prefix, m, ".dat"), "\n" ) cat("Data names written to", file.path(path, paste0(file.prefix, "list.dat")), "\n") cat("Mplus code written to", file.path(path, paste0(file.prefix, "list.inp")), "\n") } } mice/R/predictorMatrix.R0000644000176200001440000001171414347615172014711 0ustar liggesusers#' Creates a \code{predictorMatrix} argument #' #' This helper function creates a valid \code{predictMatrix}. The #' \code{predictorMatrix} is an argument to the \code{mice} function. #' It specifies the target variable or block in the rows, and the #' predictor variables on the columns. An entry of \code{0} means that #' the column variable is NOT used to impute the row variable or block. #' A nonzero value indicates that it is used. #' @param data A \code{data.frame} with the source data #' @param blocks An optional specification for blocks of variables in #' the rows. The default assigns each variable in its own block. #' @param predictorMatrix A predictor matrix from which rows with the same #' names are copied into the output predictor matrix. #' @return A matrix #' @seealso \code{\link{make.blocks}} #' @examples #' make.predictorMatrix(nhanes) #' make.predictorMatrix(nhanes, blocks = make.blocks(nhanes, "collect")) #' @export make.predictorMatrix <- function(data, blocks = make.blocks(data), predictorMatrix = NULL) { input.predictorMatrix <- predictorMatrix data <- check.dataform(data) predictorMatrix <- matrix(1, nrow = length(blocks), ncol = ncol(data)) dimnames(predictorMatrix) <- list(names(blocks), colnames(data)) for (i in row.names(predictorMatrix)) { predictorMatrix[i, colnames(predictorMatrix) %in% i] <- 0 } # preserve any user setting in predictorMatrix specification if (!is.null(input.predictorMatrix)) { for (i in row.names(predictorMatrix)) { if (i %in% row.names(input.predictorMatrix)) { predictorMatrix[i, ] <- input.predictorMatrix[i, ] } } } predictorMatrix } check.predictorMatrix <- function(predictorMatrix, data, blocks = NULL) { data <- check.dataform(data) if (!is.matrix(predictorMatrix)) { stop("predictorMatrix not a matrix", call. = FALSE) } if (any(dim(predictorMatrix) == 0L)) { stop("predictorMatrix has no rows or columns", call. = FALSE) } # if we have no blocks, restrict to square predictorMatrix if (is.null(blocks)) { if (nrow(predictorMatrix) != ncol(predictorMatrix)) { stop( paste( "If no blocks are specified, predictorMatrix must", "have same number of rows and columns" ), call. = FALSE ) } if (is.null(dimnames(predictorMatrix))) { if (ncol(predictorMatrix) == ncol(data)) { dimnames(predictorMatrix) <- list(colnames(data), colnames(data)) } else { stop("Missing row/column names in predictorMatrix", call. = FALSE) } } for (i in row.names(predictorMatrix)) { predictorMatrix[i, grep(paste0("^", i, "$"), colnames(predictorMatrix))] <- 0 } return(predictorMatrix) } # check conforming arguments if (nrow(predictorMatrix) > length(blocks)) { stop( paste0( "predictorMatrix has more rows (", nrow(predictorMatrix), ") than blocks (", length(blocks), ")" ), call. = FALSE ) } # borrow rownames from blocks if needed if (is.null(rownames(predictorMatrix)) && nrow(predictorMatrix) == length(blocks)) { rownames(predictorMatrix) <- names(blocks) } if (is.null(rownames(predictorMatrix))) { stop("Unable to set row names of predictorMatrix", call. = FALSE) } # borrow blocknames from predictorMatrix if needed if (is.null(names(blocks)) && nrow(predictorMatrix) == length(blocks)) { names(blocks) <- rownames(predictorMatrix) } if (is.null(names(blocks))) { stop("Unable to set names of blocks", call. = FALSE) } # check existence of row names in blocks found <- rownames(predictorMatrix) %in% names(blocks) if (!all(found)) { stop("Names not found in blocks: ", paste(rownames(predictorMatrix)[!found], collapse = ", "), call. = FALSE ) } # borrow colnames from data if needed if (is.null(colnames(predictorMatrix)) && ncol(predictorMatrix) == ncol(data)) { colnames(predictorMatrix) <- names(data) } if (is.null(colnames(predictorMatrix))) { stop("Unable to set column names of predictorMatrix", call. = FALSE) } # check existence of variable names on data found <- colnames(predictorMatrix) %in% names(data) if (!all(found)) { stop("Names not found in data: ", paste(colnames(predictorMatrix)[!found], collapse = ", "), call. = FALSE ) } list( predictorMatrix = predictorMatrix, blocks = blocks ) } edit.predictorMatrix <- function(predictorMatrix, visitSequence, user.visitSequence, maxit) { # edit predictorMatrix to a monotone pattern if (maxit == 1L && !is.null(user.visitSequence) && user.visitSequence == "monotone") { for (i in 1L:length(visitSequence)) { predictorMatrix[visitSequence[i], visitSequence[i:length(visitSequence)]] <- 0 } } predictorMatrix } mice/R/mira.R0000644000176200001440000000513014330031647012443 0ustar liggesusers#' Multiply imputed repeated analyses (\code{mira}) #' #' The \code{mira} object is generated by the \code{with.mids()} function. #' The \code{as.mira()} #' function takes the results of repeated complete-data analysis stored as a #' list, and turns it into a \code{mira} object that can be pooled. #' #' @section Slots: #' \describe{ #' #' \item{\code{.Data}:}{Object of class \code{"list"} containing the #' following slots:} #' \item{\code{call}:}{The call that created the object.} #' \item{\code{call1}:}{The call that created the \code{mids} object that was used #' in \code{call}.} #' \item{\code{nmis}:}{An array containing the number of missing observations per #' column.} #' \item{\code{analyses}:}{A list of \code{m} components containing the individual #' fit objects from each of the \code{m} complete data analyses.} #' } #' #' @details #' In versions prior to \code{mice 3.0} pooling required only that #' \code{coef()} and \code{vcov()} methods were available for fitted #' objects. \emph{This feature is no longer supported}. The reason is that \code{vcov()} #' methods are inconsistent across packages, leading to buggy behaviour #' of the \code{pool()} function. Since \code{mice 3.0+}, the \code{broom} #' package takes care of filtering out the relevant parts of the #' complete-data analysis. It may happen that you'll see the messages #' like \code{No method for tidying an S3 object of class ...} or #' \code{Error: No glance method for objects of class ...}. The royal #' way to solve this problem is to write your own \code{glance()} and \code{tidy()} #' methods and add these to \code{broom} according to the specifications #' given in \url{https://broom.tidymodels.org}. #' #' #'The \code{mira} class of objects has methods for the #' following generic functions: \code{print}, \code{summary}. #' #' Many of the functions of the \code{mice} package do not use the #' S4 class definitions, and instead rely on the S3 list equivalent #' \code{oldClass(obj) <- "mira"}. #' #' @name mira-class #' @rdname mira-class #' @aliases mira-class mira #' @author Stef van Buuren, Karin Groothuis-Oudshoorn, 2000 #' @seealso \code{\link{with.mids}}, \code{\link[=mids-class]{mids}}, \code{\link{mipo}} #' @references van Buuren S and Groothuis-Oudshoorn K (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' @keywords classes #' @export setClass("mira", representation( call = "call", call1 = "call", nmis = "integer", analyses = "list" ), contains = "list" ) mice/R/mice.impute.mpmm.R0000644000176200001440000000651214347334322014710 0ustar liggesusers#' Imputation by multivariate predictive mean matching #' #' Imputes multivariate incomplete data among which there are specific relations, #' for instance, polynomials, interactions, range restrictions and sum scores. #' @aliases mice.impute.mpmm mpmm #' @param data matrix with exactly two missing data patterns #' @param format A character vector specifying the type of object that should #' be returned. The default is \code{format = "imputes"}. #' @param ... Other named arguments. #' @return A matrix with imputed data, which has \code{ncol(y)} columns and #' \code{sum(wy)} rows. #' @details #' This function implements the predictive mean matching and applies canonical #' regression analysis to select donors fora set of missing variables. In general, #' canonical regressionanalysis looks for a linear combination of covariates that #' predicts a linear combination of outcomes (a set of missing variables) #' optimally in a least-square sense (Israels, 1987). The predicted #' value of the linear combination of the set of missing variables #' would be applied to perform predictive mean matching. #' #' @note #' The function requires variables in the block have the same missingness pattern. #' If there are more than one missingness pattern, the function will return #' a warning. #' @author Mingyang Cai and Gerko Vink # @author Mingyang Cai (University of Utrecht), \email{g.vink#uu.nl} #' @seealso \code{\link{mice.impute.pmm}} #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-knowledge.html#sec:quadratic}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @family univariate imputation functions #' @keywords datagen #' @examples #' # simulate data #' beta2 <- beta1 <- .5 #' x <- rnorm(1000) #' e <- rnorm(1000, 0, 1) #' y <- beta1 * x + beta2 * x^2 + e #' dat <- data.frame(y = y, x = x, x2 = x^2) #' m <- as.logical(rbinom(1000, 1, 0.25)) #' dat[m, c("x", "x2")] <- NA #' #' # impute #' blk <- list("y", c("x", "x2")) #' meth <- c("", "mpmm") #' imp <- mice(dat, blocks = blk, method = meth, print = FALSE, #' m = 2, maxit = 2) #' #' # analyse and check #' summary(pool(with(imp, lm(y ~ x + x2)))) #' with(dat, plot(x, x2, col = mdc(1))) #' with(complete(imp), points(x[m], x2[m], col = mdc(2))) #' @export #' mice.impute.mpmm <- function(data, format = "imputes", ...) { order <- dimnames(data)[[1]] res <- mpmm.impute(data, ...) return(single2imputes(res[order, ], is.na(data))) } mpmm.impute <- function(data, ...) { data <- as.data.frame(data) r <- !is.na(data) mpat <- apply(r, 1, function(x) paste(as.numeric(x), collapse = "")) nmpat <- length(unique(mpat)) if (nmpat != 2) stop("There are more than one missingness patterns") r <- unique(r) r <- r[rowSums(r) < ncol(r), ] y <- data[, which(r == FALSE), drop = FALSE] ry <- !is.na(y)[, 1] x <- data[, which(r == TRUE), drop = FALSE] wy <- !ry ES <- eigen(solve(cov(y[ry, , drop = FALSE], y[ry, , drop = FALSE])) %*% cov(y[ry, , drop = FALSE], x[ry, , drop = FALSE]) %*% solve(cov(x[ry, , drop = FALSE], x[ry, , drop = FALSE])) %*% cov(x[ry, , drop = FALSE], y[ry, , drop = FALSE])) parm <- as.matrix(ES$vectors[, 1]) z <- as.matrix(y) %*% parm imp <- mice.impute.pmm(z, ry, x) zstar <- as.matrix(imp) y[wy, ] <- y[ry, , drop = FALSE][match(zstar, z[ry]), ] data[colnames(y)] <- y return(data) } mice/R/nhanes2.R0000644000176200001440000000162114330031606013045 0ustar liggesusers#' NHANES example - mixed numerical and discrete variables #' #' A small data set with non-monotone missing values. #' #' A small data set with missing data and mixed numerical and discrete #' variables. The data set \code{nhanes} is the same data set, but with all data #' treated as numerical. #' #' @name nhanes2 #' @docType data #' @format A data frame with 25 observations on the following 4 variables. #' \describe{ #' \item{age}{Age group (1=20-39, 2=40-59, 3=60+)} #' \item{bmi}{Body mass index (kg/m**2)} #' \item{hyp}{Hypertensive (1=no,2=yes)} #' \item{chl}{Total serum cholesterol (mg/dL)} } #' @seealso \code{\link{nhanes}} #' @source Schafer, J.L. (1997). \emph{Analysis of Incomplete Multivariate #' Data.} London: Chapman & Hall. Table 6.14. #' @keywords datasets #' @examples #' # create 5 imputed data sets #' imp <- mice(nhanes2) #' #' # print the first imputed data set #' complete(imp) NULL mice/R/as.R0000644000176200001440000001466514330031625012127 0ustar liggesusers#' Converts an imputed dataset (long format) into a \code{mids} object #' #' This function converts imputed data stored in long format into #' an object of class \code{mids}. The original incomplete dataset #' needs to be available so that we know where the missing data are. #' The function is useful to convert back operations applied to #' the imputed data back in a \code{mids} object. It may also be #' used to store multiply imputed data sets from other software #' into the format used by \code{mice}. #' @note The function expects the input data \code{long} to be sorted by #' imputation number (variable \code{".imp"} by default), and in the #' same sequence within each imputation block. #' @param long A multiply imputed data set in long format, for example #' produced by a call to \code{complete(..., action = 'long', include = TRUE)}, #' or by other software. #' @param .imp An optional column number or column name in \code{long}, #' indicating the imputation index. The values are assumed to be consecutive #' integers between 0 and \code{m}. Values \code{1} through \code{m} #' correspond to the imputation index, value \code{0} indicates #' the original data (with missings). #' By default, the procedure will search for a variable named \code{".imp"}. #' @param .id An optional column number or column name in \code{long}, #' indicating the subject identification. If not specified, then the #' function searches for a variable named \code{".id"}. If this variable #' is found, the values in the column will define the row names in #' the \code{data} element of the resulting \code{mids} object. #' @inheritParams mice #' @return An object of class \code{mids} #' @author Gerko Vink #' @examples #' # impute the nhanes dataset #' imp <- mice(nhanes, print = FALSE) #' # extract the data in long format #' X <- complete(imp, action = "long", include = TRUE) #' # create dataset with .imp variable as numeric #' X2 <- X #' #' # nhanes example without .id #' test1 <- as.mids(X) #' is.mids(test1) #' identical(complete(test1, action = "long", include = TRUE), X) #' #' # nhanes example without .id where .imp is numeric #' test2 <- as.mids(X2) #' is.mids(test2) #' identical(complete(test2, action = "long", include = TRUE), X) #' #' # nhanes example, where we explicitly specify .id as column 2 #' test3 <- as.mids(X, .id = ".id") #' is.mids(test3) #' identical(complete(test3, action = "long", include = TRUE), X) #' #' # nhanes example with .id where .imp is numeric #' test4 <- as.mids(X2, .id = 2) #' is.mids(test4) #' identical(complete(test4, action = "long", include = TRUE), X) #' #' # example without an .id variable #' # variable .id not preserved #' X3 <- X[, -2] #' test5 <- as.mids(X3) #' is.mids(test5) #' identical(complete(test5, action = "long", include = TRUE)[, -2], X[, -2]) #' #' # as() syntax has fewer options #' test7 <- as(X, "mids") #' test8 <- as(X2, "mids") #' test9 <- as(X2[, -2], "mids") #' rev <- ncol(X):1 #' test10 <- as(X[, rev], "mids") #' #' # where argument copies also observed data into $imp element #' where <- matrix(TRUE, nrow = nrow(nhanes), ncol = ncol(nhanes)) #' colnames(where) <- colnames(nhanes) #' test11 <- as.mids(X, where = where) #' identical(complete(test11, action = "long", include = TRUE), X) #' @keywords mids #' @export as.mids <- function(long, where = NULL, .imp = ".imp", .id = ".id") { if (is.numeric(.imp)) .imp <- names(long)[.imp] if (is.numeric(.id)) .id <- names(long)[.id] if (!.imp %in% names(long)) stop("Imputation index `.imp` not found") # no missings allowed in .imp imps <- unlist(long[, .imp], use.names = FALSE) if (anyNA(imps)) stop("Missing values in imputation index `.imp`") # number of records within .imp should be the same if (any(diff(table(imps))) != 0) { stop("Unequal group sizes in imputation index `.imp`") } # get original data part keep <- setdiff(names(long), na.omit(c(.imp, .id))) data <- long[imps == 0, keep, drop = FALSE] n <- nrow(data) if (n == 0) { stop("Original data not found.\n Use `complete(..., action = 'long', include = TRUE)` to save original data.") } # determine m m <- length(unique(imps)) - 1 # use mice to get info on data if (is.null(where)) where <- is.na(data) ini <- mice(data, m = m, where = where, maxit = 0, remove.collinear = FALSE, allow.na = TRUE ) # store any .id as row names if (!is.na(.id)) { rownames(ini$data) <- unlist(long[imps == 0, .id], use.names = FALSE) } # copy imputations from long into proper ini$imp elements names <- names(ini$imp) for (i in seq_along(names)) { varname <- names[i] if (!is.null(ini$imp[[varname]])) { for (j in seq_len(m)) { idx <- imps == j & where[, varname] ini$imp[[varname]][j] <- long[idx, varname] } } } ini } #' Create a \code{mira} object from repeated analyses #' #' The \code{as.mira()} function takes the results of repeated #' complete-data analysis stored as a list, and turns it #' into a \code{mira} object that can be pooled. #' @param fitlist A list containing $m$ fitted analysis objects #' @return An S3 object of class \code{mira}. #' @seealso \code{\link[=mira-class]{mira}} #' @author Stef van Buuren #' @export as.mira <- function(fitlist) { if (is.mira(fitlist)) { return(fitlist) } if (is.mids(fitlist)) { stop("as.mira() cannot convert class 'mids' into 'mira'. Use with() instead.") } call <- match.call() if (!is.list(fitlist)) { stop("Argument 'fitlist' is not a list") } class(fitlist) <- "list" object <- list(call = call, call1 = NULL, nmis = NULL, analyses = fitlist) oldClass(object) <- c("mira", "matrix") object } #' Converts into a \code{mitml.result} object #' #' The \code{as.mitml.result()} function takes the results of repeated #' complete-data analysis stored as a list, and turns it #' into an object of class \code{mitml.result}. #' @param x An object of class \code{mira} #' @return An S3 object of class \code{mitml.result}, a list #' containing $m$ fitted analysis objects. #' @seealso \code{\link[mitml]{with.mitml.list}} #' @author Stef van Buuren #' @export as.mitml.result <- function(x) { if (inherits(x, "mitml.result")) { return(x) } z <- NULL if (is.mira(x)) { z <- getfit(x) } else if (is.list(x)) z <- x class(z) <- c("mitml.result", "list") z } setOldClass(c("mids", "mira")) setAs( from = "data.frame", to = "mids", def = function(from) { as.mids(from) } ) setAs( from = "list", to = "mira", def = function(from) { as.mira(from) } ) mice/R/mice-package.R0000644000176200001440000001156314330031647014030 0ustar liggesusers#' \pkg{mice}: Multivariate Imputation by Chained Equations #' #' The \pkg{mice} package implements a method to deal with missing data. #' The package creates multiple imputations (replacement values) for #' multivariate missing data. The method is based on Fully Conditional #' Specification, where each incomplete variable is imputed by a separate #' model. The MICE algorithm can impute mixes of continuous, binary, #' unordered categorical and ordered categorical data. In addition, MICE #' can impute continuous two-level data, and maintain consistency between #' imputations by means of passive imputation. Many diagnostic plots are #' implemented to inspect the quality of the imputations. #' #' The \pkg{mice} package contains functions to #' \itemize{ #' \item Inspect the missing data pattern #' \item Impute the missing data \emph{m} times, resulting in \emph{m} completed data sets #' \item Diagnose the quality of the imputed values #' \item Analyze each completed data set #' \item Pool the results of the repeated analyses #' \item Store and export the imputed data in various formats #' \item Generate simulated incomplete data #' \item Incorporate custom imputation methods #' } #' #' @section Functions: #' #' The main functions are: #' \tabular{ll}{ #' \code{mice()} \tab Impute the missing data *m* times\cr #' \code{with()} \tab Analyze completed data sets\cr #' \code{pool()} \tab Combine parameter estimates\cr #' \code{complete()} \tab Export imputed data\cr #' \code{ampute()} \tab Generate missing data\cr} #' #' @section Vignettes: #' #' There is a detailed series of #' six online vignettes that walk you through solving realistic inference #' problems with mice. #' #' We suggest going through these vignettes in the following order #' \enumerate{ #' \item \href{https://www.gerkovink.com/miceVignettes/Ad_hoc_and_mice/Ad_hoc_methods.html}{Ad hoc methods and the MICE algorithm} #' \item \href{https://www.gerkovink.com/miceVignettes/Convergence_pooling/Convergence_and_pooling.html}{Convergence and pooling} #' \item \href{https://www.gerkovink.com/miceVignettes/Missingness_inspection/Missingness_inspection.html}{Inspecting how the observed data and missingness are related} #' \item \href{https://www.gerkovink.com/miceVignettes/Passive_Post_processing/Passive_imputation_post_processing.html}{Passive imputation and post-processing} #' \item \href{https://www.gerkovink.com/miceVignettes/Multi_level/Multi_level_data.html}{Imputing multilevel data} #' \item \href{https://www.gerkovink.com/miceVignettes/Sensitivity_analysis/Sensitivity_analysis.html}{Sensitivity analysis with \pkg{mice}} #' } #' #' #'Van Buuren, S. (2018). #' Boca Raton, FL.: Chapman & Hall/CRC Press. #' The book #' \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' contains a lot of \href{https://github.com/stefvanbuuren/fimdbook/tree/master/R}{example code}. #' #' @section Methodology: #' #' The \pkg{mice} software was published in the {Journal of Statistical Software} (Van Buuren and Groothuis-Oudshoorn, 2011). \doi{10.18637/jss.v045.i03} #' The first application of the method #' concerned missing blood pressure data (Van Buuren et. al., 1999). #' The term \emph{Fully Conditional Specification} was introduced in 2006 to describe a general class of methods that specify imputations model for multivariate data as a set of conditional distributions (Van Buuren et. al., 2006). Further details on mixes of variables and applications can be found in the book #' \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' @section Enhanced linear algebra: #' #' Updating the BLAS can improve speed of R, sometime considerably. The details #' depend on the operating system. See the discussion in the #' "R Installation and Administration" guide for further information. #' #' @docType package #' @name mice #' @seealso \code{\link{mice}}, \code{\link{with.mids}}, #' \code{\link{pool}}, \code{\link{complete}}, \code{\link{ampute}} #' @references #' van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple #' imputation of missing blood pressure covariates in survival analysis. #' \emph{Statistics in Medicine}, \bold{18}, 681--694. #' #' van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) #' Fully conditional specification in multivariate imputation. \emph{Journal of #' Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. #' #' van Buuren, S., Groothuis-Oudshoorn, K. (2011). {\code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1--67. \doi{10.18637/jss.v045.i03} #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @useDynLib mice, .registration = TRUE NULL mice/R/mice.impute.lasso.select.logreg.R0000644000176200001440000000603014330031647017606 0ustar liggesusers#' Imputation by indirect use of lasso logistic regression #' #' Imputes univariate missing data using logistic regression following a #' preprocessing lasso variable selection step. #' #' @aliases mice.impute.lasso.select.logreg lasso.select.logreg #' @inheritParams mice.impute.pmm #' @param nfolds The number of folds for the cross-validation of the lasso penalty. #' The default is 10. #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' The method consists of the following steps: #' \enumerate{ #' \item For a given \code{y} variable under imputation, fit a linear regression with lasso #' penalty using \code{y[ry]} as dependent variable and \code{x[ry, ]} as predictors. #' The coefficients that are not shrunk to 0 define the active set of predictors #' that will be used for imputation. #' \item Fit a logit with the active set of predictors, and find (bhat, V(bhat)) #' \item Draw BETA from N(bhat, V(bhat)) #' \item Compute predicted scores for m.d., i.e. logit-1(X BETA) #' \item Compare the score to a random (0,1) deviate, and impute. #' } #' The user can specify a \code{predictorMatrix} in the \code{mice} call #' to define which predictors are provided to this univariate imputation method. #' The lasso regularization will select, among the variables indicated by #' the user, the ones that are important for imputation at any given iteration. #' Therefore, users may force the exclusion of a predictor from a given #' imputation model by speficing a \code{0} entry. #' However, a non-zero entry does not guarantee the variable will be used, #' as this decision is ultimately made by the lasso variable selection #' procedure. #' #' The method is based on the Indirect Use of Regularized Regression (IURR) proposed by #' Zhao & Long (2016) and Deng et al (2016). #' @author Edoardo Costantini, 2021 #' @references #' #' Deng, Y., Chang, C., Ido, M. S., & Long, Q. (2016). Multiple imputation for #' general missing data patterns in the presence of high-dimensional data. #' Scientific reports, 6(1), 1-10. #' #' Zhao, Y., & Long, Q. (2016). Multiple imputation in the presence of #' high-dimensional data. Statistical Methods in Medical Research, 25(5), #' 2021-2035. #' #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.lasso.select.logreg <- function(y, ry, x, wy = NULL, nfolds = 10, ...) { install.on.demand("glmnet", ...) # Body if (is.null(wy)) wy <- !ry x_glmnet <- cbind(1, x) xobs <- x_glmnet[ry, , drop = FALSE] xmis <- x[wy, ] yobs <- y[ry] # Train imputation model # used later in the estiamtion require this. cv_lasso <- glmnet::cv.glmnet( x = xobs, y = yobs, family = "binomial", nfolds = nfolds, alpha = 1 ) # Define Active Set glmnet_coefs <- as.matrix(coef(cv_lasso, s = "lambda.min" ))[, 1] AS <- which((glmnet_coefs != 0)[-1]) # Non-zero reg coefficinets # Perform regular logreg draw xas <- x_glmnet[, AS, drop = FALSE] vec <- mice.impute.logreg( y = y, ry = ry, x = xas, wy = wy, ... ) vec } mice/R/imports.R0000644000176200001440000000274614436636423013234 0ustar liggesusers#' @import methods #' @importFrom broom glance tidy #' @importFrom dplyr %>% bind_cols bind_rows filter group_by lead #' mutate n pull row_number select summarize #' @importFrom glmnet cv.glmnet #' @importFrom graphics abline axis box par plot plot.new plot.window #' points rect text #' @importFrom lattice bwplot densityplot stripplot xyplot #' @importFrom mitml jomoImpute mitmlComplete panImpute testModels #' @importFrom nnet multinom #' @importFrom Rcpp evalCpp #' @importFrom rlang .data syms #' @importFrom rpart rpart rpart.control #' @importFrom stats C aggregate as.formula binomial coef #' complete.cases confint #' contr.treatment cor df.residual fitted #' formula gaussian getCall #' glm is.empty.model lm lm.fit #' median model.frame model.matrix #' na.exclude na.omit na.pass #' pf predict pt qt quantile quasibinomial #' rbinom rchisq reformulate rgamma rnorm runif #' sd summary.glm terms update var vcov #' @importFrom tidyr complete #' @importFrom utils askYesNo flush.console head install.packages methods #' packageDescription packageVersion #' tail write.table #' @export bwplot #' @export densityplot #' @export stripplot #' @export xyplot #' @export complete NULL mice/R/boys.R0000644000176200001440000000562514330031606012473 0ustar liggesusers#' Growth of Dutch boys #' #' Height, weight, head circumference and puberty of 748 Dutch boys. #' #' Random sample of 10\% from the cross-sectional data used to construct the #' Dutch growth references 1997. Variables \code{gen} and \code{phb} are ordered #' factors. \code{reg} is a factor. #' #' @name boys #' @docType data #' @format A data frame with 748 rows on the following 9 variables: \describe{ #' \item{age}{Decimal age (0-21 years)} #' \item{hgt}{Height (cm)} #' \item{wgt}{Weight (kg)} #' \item{bmi}{Body mass index} #' \item{hc}{Head circumference (cm)} #' \item{gen}{Genital Tanner stage (G1-G5)} #' \item{phb}{Pubic hair (Tanner P1-P6)} #' \item{tv}{Testicular volume (ml)} #' \item{reg}{Region (north, east, west, south, city)} } #' @source Fredriks, A.M,, van Buuren, S., Burgmeijer, R.J., Meulmeester JF, #' Beuker, R.J., Brugman, E., Roede, M.J., Verloove-Vanhorick, S.P., Wit, J.M. #' (2000) Continuing positive secular growth change in The Netherlands #' 1955-1997. \emph{Pediatric Research}, \bold{47}, 316-323. #' #' Fredriks, A.M., van Buuren, S., Wit, J.M., Verloove-Vanhorick, S.P. (2000). #' Body index measurements in 1996-7 compared with 1980. \emph{Archives of #' Disease in Childhood}, \bold{82}, 107-112. #' @keywords datasets #' @examples #' #' # create two imputed data sets #' imp <- mice(boys, m = 1, maxit = 2) #' z <- complete(imp, 1) #' #' # create imputations for age <8yrs #' plot(z$age, z$gen, #' col = mdc(1:2)[1 + is.na(boys$gen)], #' xlab = "Age (years)", ylab = "Tanner Stage Genital" #' ) #' #' # figure to show that the default imputation method does not impute BMI #' # consistently #' plot(z$bmi, z$wgt / (z$hgt / 100)^2, #' col = mdc(1:2)[1 + is.na(boys$bmi)], #' xlab = "Imputed BMI", ylab = "Calculated BMI" #' ) #' #' # also, BMI distributions are somewhat different #' oldpar <- par(mfrow = c(1, 2)) #' MASS::truehist(z$bmi[!is.na(boys$bmi)], #' h = 1, xlim = c(10, 30), ymax = 0.25, #' col = mdc(1), xlab = "BMI observed" #' ) #' MASS::truehist(z$bmi[is.na(boys$bmi)], #' h = 1, xlim = c(10, 30), ymax = 0.25, #' col = mdc(2), xlab = "BMI imputed" #' ) #' par(oldpar) #' #' # repair the inconsistency problem by passive imputation #' meth <- imp$meth #' meth["bmi"] <- "~I(wgt/(hgt/100)^2)" #' pred <- imp$predictorMatrix #' pred["hgt", "bmi"] <- 0 #' pred["wgt", "bmi"] <- 0 #' imp2 <- mice(boys, m = 1, maxit = 2, meth = meth, pred = pred) #' z2 <- complete(imp2, 1) #' #' # show that new imputations are consistent #' plot(z2$bmi, z2$wgt / (z2$hgt / 100)^2, #' col = mdc(1:2)[1 + is.na(boys$bmi)], #' ylab = "Calculated BMI" #' ) #' #' # and compare distributions #' oldpar <- par(mfrow = c(1, 2)) #' MASS::truehist(z2$bmi[!is.na(boys$bmi)], #' h = 1, xlim = c(10, 30), ymax = 0.25, col = mdc(1), #' xlab = "BMI observed" #' ) #' MASS::truehist(z2$bmi[is.na(boys$bmi)], #' h = 1, xlim = c(10, 30), ymax = 0.25, col = mdc(2), #' xlab = "BMI imputed" #' ) #' par(oldpar) NULL mice/R/mice.impute.norm.predict.R0000644000176200001440000000363614330031606016341 0ustar liggesusers#' Imputation by linear regression through prediction #' #' Imputes the "best value" according to the linear regression model, also #' known as \emph{regression imputation}. #' #' @aliases mice.impute.norm.predict norm.predict #' @inheritParams mice.impute.pmm #' @return Vector with imputed data, same type as \code{y}, and of length #' \code{sum(wy)} #' @details #' Calculates regression weights from the observed data and returns predicted #' values to as imputations. This #' method is known as \emph{regression imputation}. #' @section Warning: THIS METHOD SHOULD NOT BE USED FOR DATA ANALYSIS. #' This method is seductive because it imputes the most #' likely value according to the model. However, it ignores the uncertainty #' of the missing values and artificially #' amplifies the relations between the columns of the data. Application of #' richer models having more parameters does not help to evade these issues. #' Stochastic regression methods, like \code{\link{mice.impute.pmm}} or #' \code{\link{mice.impute.norm}}, are generally preferred. #' #' At best, prediction can give reasonable estimates of the mean, especially #' if normality assumptions are plausible. See Little and Rubin (2002, p. 62-64) #' or Van Buuren (2012, p. 11-13, p. 45-46) for a discussion of this method. #' @author Gerko Vink, Stef van Buuren, 2018 #' @references #' Little, R.J.A. and Rubin, D.B. (2002). Statistical Analysis with Missing #' Data. New York: John Wiley and Sons. #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-linearnormal.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' @family univariate imputation functions #' @keywords datagen #' @export mice.impute.norm.predict <- function(y, ry, x, wy = NULL, ...) { if (is.null(wy)) wy <- !ry x <- cbind(1, as.matrix(x)) p <- estimice(x[ry, , drop = FALSE], y[ry], ...) x[wy, , drop = FALSE] %*% p$c } mice/R/auxiliary.R0000644000176200001440000000571314344615604013537 0ustar liggesusers#' Conditional imputation helper #' #' Sorry, the \code{ifdo()} function is not yet implemented. #' @aliases ifdo #' @param cond a condition #' @param action the action to do #' @return Currently returns an error message. #' @author Stef van Buuren, 2012 #' @keywords internal ifdo <- function(cond, action) { cat("Function ifdo() not yet implemented.\n") } #' Appends specified break to the data #' #' A custom function to insert rows in long data with new pseudo-observations #' that are being done on the specified break ages. There should be a #' column called \code{first} in \code{data} with logical data that codes whether #' the current row is the first for subject \code{id}. Furthermore, #' the function assumes that columns \code{age}, \code{occ}, #' \code{hgt.z}, \code{wgt.z} and #' \code{bmi.z} are available. This function is used on the \code{tbc} #' data in FIMD chapter 9. Check that out to see it in action. #' @aliases appendbreak #' @param data A data frame in the long long format #' @param brk A vector of break ages #' @param warp.model A time warping model #' @param id The subject identifier #' @param typ Label to signal that this is a newly added observation #' @return A long data frame with additional rows for the break ages #' @export appendbreak <- function(data, brk, warp.model = warp.model, id = NULL, typ = "pred") { k <- length(brk) app <- data[data$first, ] if (!is.null(id)) { idx <- app$id %in% id app <- app[idx, ] } nap <- nrow(app) ## update administrative variables app$first <- FALSE app$typ <- typ app$occ <- NA app <- app[rep.int(seq_len(nap), length(brk)), ] ## update age variables app$age <- rep(brk, each = nap) app$age2 <- predict(warp.model, newdata = app) X <- splines::bs(app$age, knots = brk, Boundary.knots = c(brk[1], brk[k] + 0.0001), degree = 1 ) X <- X[, -(k + 1)] app[, paste0("x", seq_len(ncol(X)))] <- X ## update outcome variable (set to missing) app[, c("hgt.z", "wgt.z", "bmi.z")] <- NA app <- rbind(data, app) app[order(app$id, app$age), ] } #' Extract broken stick estimates from a \code{lmer} object #' #' @param fit An object of class \code{lmer} #' @return A matrix containing broken stick estimates #' @author Stef van Buuren, 2012 #' @export extractBS <- function(fit) { siz <- t(lme4::ranef(fit)[[1]]) + lme4::fixef(fit) matrix(siz, nrow = nrow(siz) * ncol(siz), ncol = 1) } ## used by mice.impute.midastouch bootfunc.plain <- function(n) { random <- sample.int(n, replace = TRUE) as.numeric(table(factor(random, levels = seq_len(n)))) } minmax <- function(x, domin = TRUE, domax = TRUE) { maxx <- sqrt(.Machine$double.xmax) minx <- sqrt(.Machine$double.eps) if (domin) { x <- pmin(x, maxx) } if (domax) { x <- pmax(x, minx) } x } single2imputes <- function(single, mis) { nmis <- colSums(mis) vars <- names(single)[nmis > 0] z <- vector("list", length(vars)) names(z) <- vars for (j in vars) z[[j]] <- single[mis[, j], j] z } mice/R/mice.R0000644000176200001440000005653214430511431012437 0ustar liggesusers#' Multivariate Imputation by Chained Equations (MICE) #' #' Generates Multivariate Imputations by Chained Equations (MICE) #' #' Generates multiple imputations for incomplete multivariate data by Gibbs #' sampling. Missing data can occur anywhere in the data. The algorithm imputes #' an incomplete column (the target column) by generating 'plausible' synthetic #' values given other columns in the data. Each incomplete column must act as a #' target column, and has its own specific set of predictors. The default set of #' predictors for a given target consists of all other columns in the data. For #' predictors that are incomplete themselves, the most recently generated #' imputations are used to complete the predictors prior to imputation of the #' target column. #' #' A separate univariate imputation model can be specified for each column. The #' default imputation method depends on the measurement level of the target #' column. In addition to these, several other methods are provided. You can #' also write their own imputation functions, and call these from within the #' algorithm. #' #' The data may contain categorical variables that are used in a regressions on #' other variables. The algorithm creates dummy variables for the categories of #' these variables, and imputes these from the corresponding categorical #' variable. #' #' Built-in univariate imputation methods are: #' #' \tabular{lll}{ #' \code{pmm} \tab any \tab Predictive mean matching\cr #' \code{midastouch} \tab any \tab Weighted predictive mean matching\cr #' \code{sample} \tab any \tab Random sample from observed values\cr #' \code{cart} \tab any \tab Classification and regression trees\cr #' \code{rf} \tab any \tab Random forest imputations\cr #' \code{mean} \tab numeric \tab Unconditional mean imputation\cr #' \code{norm} \tab numeric \tab Bayesian linear regression\cr #' \code{norm.nob} \tab numeric \tab Linear regression ignoring model error\cr #' \code{norm.boot} \tab numeric \tab Linear regression using bootstrap\cr #' \code{norm.predict} \tab numeric \tab Linear regression, predicted values\cr #' \code{lasso.norm} \tab numeric \tab Lasso linear regression\cr #' \code{lasso.select.norm} \tab numeric \tab Lasso select + linear regression\cr #' \code{quadratic} \tab numeric \tab Imputation of quadratic terms\cr #' \code{ri} \tab numeric \tab Random indicator for nonignorable data\cr #' \code{logreg} \tab binary \tab Logistic regression\cr #' \code{logreg.boot} \tab binary \tab Logistic regression with bootstrap\cr #' \code{lasso.logreg} \tab binary \tab Lasso logistic regression\cr #' \code{lasso.select.logreg}\tab binary \tab Lasso select + logistic regression\cr #' \code{polr} \tab ordered \tab Proportional odds model\cr #' \code{polyreg} \tab unordered\tab Polytomous logistic regression\cr #' \code{lda} \tab unordered\tab Linear discriminant analysis\cr #' \code{2l.norm} \tab numeric \tab Level-1 normal heteroscedastic\cr #' \code{2l.lmer} \tab numeric \tab Level-1 normal homoscedastic, lmer\cr #' \code{2l.pan} \tab numeric \tab Level-1 normal homoscedastic, pan\cr #' \code{2l.bin} \tab binary \tab Level-1 logistic, glmer\cr #' \code{2lonly.mean} \tab numeric \tab Level-2 class mean\cr #' \code{2lonly.norm} \tab numeric \tab Level-2 class normal\cr #' \code{2lonly.pmm} \tab any \tab Level-2 class predictive mean matching #' } #' #' These corresponding functions are coded in the \code{mice} library under #' names \code{mice.impute.method}, where \code{method} is a string with the #' name of the univariate imputation method name, for example \code{norm}. The #' \code{method} argument specifies the methods to be used. For the \code{j}'th #' column, \code{mice()} calls the first occurrence of #' \code{paste('mice.impute.', method[j], sep = '')} in the search path. The #' mechanism allows uses to write customized imputation function, #' \code{mice.impute.myfunc}. To call it for all columns specify #' \code{method='myfunc'}. To call it only for, say, column 2 specify #' \code{method=c('norm','myfunc','logreg',\dots{})}. #' #' \emph{Skipping imputation:} The user may skip imputation of a column by #' setting its entry to the empty method: \code{""}. For complete columns without #' missing data \code{mice} will automatically set the empty method. Setting t #' he empty method does not produce imputations for the column, so any missing #' cells remain \code{NA}. If column A contains \code{NA}'s and is used as #' predictor in the imputation model for column B, then \code{mice} produces no #' imputations for the rows in B where A is missing. The imputed data #' for B may thus contain \code{NA}'s. The remedy is to remove column A from #' the imputation model for the other columns in the data. This can be done #' by setting the entire column for variable A in the \code{predictorMatrix} #' equal to zero. #' #' \emph{Passive imputation:} \code{mice()} supports a special built-in method, #' called passive imputation. This method can be used to ensure that a data #' transform always depends on the most recently generated imputations. In some #' cases, an imputation model may need transformed data in addition to the #' original data (e.g. log, quadratic, recodes, interaction, sum scores, and so #' on). #' #' Passive imputation maintains consistency among different transformations of #' the same data. Passive imputation is invoked if \code{~} is specified as the #' first character of the string that specifies the univariate method. #' \code{mice()} interprets the entire string, including the \code{~} character, #' as the formula argument in a call to \code{model.frame(formula, #' data[!r[,j],])}. This provides a simple mechanism for specifying deterministic #' dependencies among the columns. For example, suppose that the missing entries #' in variables \code{data$height} and \code{data$weight} are imputed. The body #' mass index (BMI) can be calculated within \code{mice} by specifying the #' string \code{'~I(weight/height^2)'} as the univariate imputation method for #' the target column \code{data$bmi}. Note that the \code{~} mechanism works #' only on those entries which have missing values in the target column. You #' should make sure that the combined observed and imputed parts of the target #' column make sense. An easy way to create consistency is by coding all entries #' in the target as \code{NA}, but for large data sets, this could be #' inefficient. Note that you may also need to adapt the default #' \code{predictorMatrix} to evade linear dependencies among the predictors that #' could cause errors like \code{Error in solve.default()} or \code{Error: #' system is exactly singular}. Though not strictly needed, it is often useful #' to specify \code{visitSequence} such that the column that is imputed by the #' \code{~} mechanism is visited each time after one of its predictors was #' visited. In that way, deterministic relation between columns will always be #' synchronized. #' #' #'A new argument \code{ls.meth} can be parsed to the lower level #' \code{.norm.draw} to specify the method for generating the least squares #' estimates and any subsequently derived estimates. Argument \code{ls.meth} #' takes one of three inputs: \code{"qr"} for QR-decomposition, \code{"svd"} for #' singular value decomposition and \code{"ridge"} for ridge regression. #' \code{ls.meth} defaults to \code{ls.meth = "qr"}. #' #' \emph{Auxiliary predictors in formulas specification: } #' For a given block, the \code{formulas} specification takes precedence over #' the corresponding row in the \code{predictMatrix} argument. This #' precedence is, however, restricted to the subset of variables #' specified in the terms of the block formula. Any #' variables not specified by \code{formulas} are imputed #' according to the \code{predictMatrix} specification. Variables with #' non-zero \code{type} values in the \code{predictMatrix} will #' be added as main effects to the \code{formulas}, which will #' act as supplementary covariates in the imputation model. It is possible #' to turn off this behavior by specifying the #' argument \code{auxiliary = FALSE}. #' #' @param data A data frame or a matrix containing the incomplete data. Missing #' values are coded as \code{NA}. #' @param m Number of multiple imputations. The default is \code{m=5}. #' @param method Can be either a single string, or a vector of strings with #' length \code{length(blocks)}, specifying the imputation method to be #' used for each column in data. If specified as a single string, the same #' method will be used for all blocks. The default imputation method (when no #' argument is specified) depends on the measurement level of the target column, #' as regulated by the \code{defaultMethod} argument. Columns that need #' not be imputed have the empty method \code{""}. See details. #' @param predictorMatrix A numeric matrix of \code{length(blocks)} rows #' and \code{ncol(data)} columns, containing 0/1 data specifying #' the set of predictors to be used for each target column. #' Each row corresponds to a variable block, i.e., a set of variables #' to be imputed. A value of \code{1} means that the column #' variable is used as a predictor for the target block (in the rows). #' By default, the \code{predictorMatrix} is a square matrix of \code{ncol(data)} #' rows and columns with all 1's, except for the diagonal. #' Note: For two-level imputation models (which have \code{"2l"} in their names) #' other codes (e.g, \code{2} or \code{-2}) are also allowed. #' @param ignore A logical vector of \code{nrow(data)} elements indicating #' which rows are ignored when creating the imputation model. The default #' \code{NULL} includes all rows that have an observed value of the variable #' to imputed. Rows with \code{ignore} set to \code{TRUE} do not influence the #' parameters of the imputation model, but are still imputed. We may use the #' \code{ignore} argument to split \code{data} into a training set (on which the #' imputation model is built) and a test set (that does not influence the #' imputation model estimates). #' Note: Multivariate imputation methods, like \code{mice.impute.jomoImpute()} #' or \code{mice.impute.panImpute()}, do not honour the \code{ignore} argument. #' @param where A data frame or matrix with logicals of the same dimensions #' as \code{data} indicating where in the data the imputations should be #' created. The default, \code{where = is.na(data)}, specifies that the #' missing data should be imputed. The \code{where} argument may be used to #' overimpute observed data, or to skip imputations for selected missing values. #' Note: Imputation methods that generate imptutations outside of #' \code{mice}, like \code{mice.impute.panImpute()} may depend on a complete #' predictor space. In that case, a custom \code{where} matrix can not be #' specified. #' @param blocks List of vectors with variable names per block. List elements #' may be named to identify blocks. Variables within a block are #' imputed by a multivariate imputation method #' (see \code{method} argument). By default each variable is placed #' into its own block, which is effectively #' fully conditional specification (FCS) by univariate models #' (variable-by-variable imputation). Only variables whose names appear in #' \code{blocks} are imputed. The relevant columns in the \code{where} #' matrix are set to \code{FALSE} of variables that are not block members. #' A variable may appear in multiple blocks. In that case, it is #' effectively re-imputed each time that it is visited. #' @param visitSequence A vector of block names of arbitrary length, specifying the #' sequence of blocks that are imputed during one iteration of the Gibbs #' sampler. A block is a collection of variables. All variables that are #' members of the same block are imputed #' when the block is visited. A variable that is a member of multiple blocks #' is re-imputed within the same iteration. #' The default \code{visitSequence = "roman"} visits the blocks (left to right) #' in the order in which they appear in \code{blocks}. #' One may also use one of the following keywords: \code{"arabic"} #' (right to left), \code{"monotone"} (ordered low to high proportion #' of missing data) and \code{"revmonotone"} (reverse of monotone). #' \emph{Special case}: If you specify both \code{visitSequence = "monotone"} and #' \code{maxit = 1}, then the procedure will edit the \code{predictorMatrix} #' to conform to the monotone pattern. Realize that convergence in one #' iteration is only guaranteed if the missing data pattern is actually #' monotone. The procedure does not check this. #' @param formulas A named list of formula's, or expressions that #' can be converted into formula's by \code{as.formula}. List elements #' correspond to blocks. The block to which the list element applies is #' identified by its name, so list names must correspond to block names. #' The \code{formulas} argument is an alternative to the #' \code{predictorMatrix} argument that allows for more flexibility in #' specifying imputation models, e.g., for specifying interaction terms. #' @param blots A named \code{list} of \code{alist}'s that can be used #' to pass down arguments to lower level imputation function. The entries #' of element \code{blots[[blockname]]} are passed down to the function #' called for block \code{blockname}. #' @param post A vector of strings with length \code{ncol(data)} specifying #' expressions as strings. Each string is parsed and #' executed within the \code{sampler()} function to post-process #' imputed values during the iterations. #' The default is a vector of empty strings, indicating no post-processing. #' Multivariate (block) imputation methods ignore the \code{post} parameter. #' @param defaultMethod A vector of length 4 containing the default #' imputation methods for 1) numeric data, 2) factor data with 2 levels, 3) #' factor data with > 2 unordered levels, and 4) factor data with > 2 #' ordered levels. By default, the method uses #' \code{pmm}, predictive mean matching (numeric data) \code{logreg}, logistic #' regression imputation (binary data, factor with 2 levels) \code{polyreg}, #' polytomous regression imputation for unordered categorical data (factor > 2 #' levels) \code{polr}, proportional odds model for (ordered, > 2 levels). #' @param maxit A scalar giving the number of iterations. The default is 5. #' @param printFlag If \code{TRUE}, \code{mice} will print history on console. #' Use \code{print=FALSE} for silent computation. #' @param seed An integer that is used as argument by the \code{set.seed()} for #' offsetting the random number generator. Default is to leave the random number #' generator alone. #' @param data.init A data frame of the same size and type as \code{data}, #' without missing data, used to initialize imputations before the start of the #' iterative process. The default \code{NULL} implies that starting imputation #' are created by a simple random draw from the data. Note that specification of #' \code{data.init} will start all \code{m} Gibbs sampling streams from the same #' imputation. #' @param \dots Named arguments that are passed down to the univariate imputation #' functions. #' #' @return Returns an S3 object of class \code{\link[=mids-class]{mids}} #' (multiply imputed data set) #' @author Stef van Buuren \email{stef.vanbuuren@@tno.nl}, Karin #' Groothuis-Oudshoorn \email{c.g.m.oudshoorn@@utwente.nl}, 2000-2010, with #' contributions of Alexander Robitzsch, Gerko Vink, Shahab Jolani, #' Roel de Jong, Jason Turner, Lisa Doove, #' John Fox, Frank E. Harrell, and Peter Malewski. #' @seealso \code{\link[=mids-class]{mids}}, \code{\link{with.mids}}, #' \code{\link{set.seed}}, \code{\link{complete}} #' @references Van Buuren, S., Groothuis-Oudshoorn, K. (2011). \code{mice}: #' Multivariate Imputation by Chained Equations in \code{R}. \emph{Journal of #' Statistical Software}, \bold{45}(3), 1-67. #' \doi{10.18637/jss.v045.i03} #' #' Van Buuren, S. (2018). #' \href{https://stefvanbuuren.name/fimd/sec-FCS.html#sec:MICE}{\emph{Flexible Imputation of Missing Data. Second Edition.}} #' Chapman & Hall/CRC. Boca Raton, FL. #' #' Van Buuren, S., Brand, J.P.L., Groothuis-Oudshoorn C.G.M., Rubin, D.B. (2006) #' Fully conditional specification in multivariate imputation. \emph{Journal of #' Statistical Computation and Simulation}, \bold{76}, 12, 1049--1064. #' #' Van Buuren, S. (2007) Multiple imputation of discrete and continuous data by #' fully conditional specification. \emph{Statistical Methods in Medical #' Research}, \bold{16}, 3, 219--242. #' #' Van Buuren, S., Boshuizen, H.C., Knook, D.L. (1999) Multiple imputation of #' missing blood pressure covariates in survival analysis. \emph{Statistics in #' Medicine}, \bold{18}, 681--694. #' #' Brand, J.P.L. (1999) \emph{Development, implementation and evaluation of #' multiple imputation strategies for the statistical analysis of incomplete #' data sets.} Dissertation. Rotterdam: Erasmus University. #' @keywords iteration #' @examples #' # do default multiple imputation on a numeric matrix #' imp <- mice(nhanes) #' imp #' #' # list the actual imputations for BMI #' imp$imp$bmi #' #' # first completed data matrix #' complete(imp) #' #' # imputation on mixed data with a different method per column #' mice(nhanes2, meth = c("sample", "pmm", "logreg", "norm")) #' #' \dontrun{ #' # example where we fit the imputation model on the train data #' # and apply the model to impute the test data #' set.seed(123) #' ignore <- sample(c(TRUE, FALSE), size = 25, replace = TRUE, prob = c(0.3, 0.7)) #' #' # scenario 1: train and test in the same dataset #' imp <- mice(nhanes2, m = 2, ignore = ignore, print = FALSE, seed = 22112) #' imp.test1 <- filter(imp, ignore) #' imp.test1$data #' complete(imp.test1, 1) #' complete(imp.test1, 2) #' #' # scenario 2: train and test in separate datasets #' traindata <- nhanes2[!ignore, ] #' testdata <- nhanes2[ignore, ] #' imp.train <- mice(traindata, m = 2, print = FALSE, seed = 22112) #' imp.test2 <- mice.mids(imp.train, newdata = testdata) #' complete(imp.test2, 1) #' complete(imp.test2, 2) #' } #' @export mice <- function(data, m = 5, method = NULL, predictorMatrix, ignore = NULL, where = NULL, blocks, visitSequence = NULL, formulas, blots = NULL, post = NULL, defaultMethod = c("pmm", "logreg", "polyreg", "polr"), maxit = 5, printFlag = TRUE, seed = NA, data.init = NULL, ...) { call <- match.call() check.deprecated(...) if (!is.na(seed)) set.seed(seed) # check form of data and m data <- check.dataform(data) m <- check.m(m) # determine input combination: predictorMatrix, blocks, formulas mp <- missing(predictorMatrix) mb <- missing(blocks) mf <- missing(formulas) # case A if (mp & mb & mf) { # blocks lead blocks <- make.blocks(colnames(data)) predictorMatrix <- make.predictorMatrix(data, blocks) formulas <- make.formulas(data, blocks) } # case B if (!mp & mb & mf) { # predictorMatrix leads predictorMatrix <- check.predictorMatrix(predictorMatrix, data) blocks <- make.blocks(colnames(predictorMatrix), partition = "scatter") formulas <- make.formulas(data, blocks, predictorMatrix = predictorMatrix) } # case C if (mp & !mb & mf) { # blocks leads blocks <- check.blocks(blocks, data) predictorMatrix <- make.predictorMatrix(data, blocks) formulas <- make.formulas(data, blocks) } # case D if (mp & mb & !mf) { # formulas leads formulas <- check.formulas(formulas, data) blocks <- construct.blocks(formulas) predictorMatrix <- make.predictorMatrix(data, blocks) } # case E if (!mp & !mb & mf) { # predictor leads blocks <- check.blocks(blocks, data) z <- check.predictorMatrix(predictorMatrix, data, blocks) predictorMatrix <- z$predictorMatrix blocks <- z$blocks formulas <- make.formulas(data, blocks, predictorMatrix = predictorMatrix) } # case F if (!mp & mb & !mf) { # formulas lead formulas <- check.formulas(formulas, data) predictorMatrix <- check.predictorMatrix(predictorMatrix, data) blocks <- construct.blocks(formulas, predictorMatrix) predictorMatrix <- make.predictorMatrix(data, blocks, predictorMatrix) } # case G if (mp & !mb & !mf) { # blocks lead blocks <- check.blocks(blocks, data, calltype = "formula") formulas <- check.formulas(formulas, blocks) predictorMatrix <- make.predictorMatrix(data, blocks) } # case H if (!mp & !mb & !mf) { # blocks lead blocks <- check.blocks(blocks, data) formulas <- check.formulas(formulas, data) predictorMatrix <- check.predictorMatrix(predictorMatrix, data, blocks) } chk <- check.cluster(data, predictorMatrix) where <- check.where(where, data, blocks) # check visitSequence, edit predictorMatrix for monotone user.visitSequence <- visitSequence visitSequence <- check.visitSequence(visitSequence, data = data, where = where, blocks = blocks ) predictorMatrix <- edit.predictorMatrix( predictorMatrix = predictorMatrix, visitSequence = visitSequence, user.visitSequence = user.visitSequence, maxit = maxit ) method <- check.method( method = method, data = data, where = where, blocks = blocks, defaultMethod = defaultMethod ) post <- check.post(post, data) blots <- check.blots(blots, data, blocks) ignore <- check.ignore(ignore, data) # data frame for storing the event log state <- list(it = 0, im = 0, dep = "", meth = "", log = FALSE) loggedEvents <- data.frame(it = 0, im = 0, dep = "", meth = "", out = "") # edit imputation setup setup <- list( method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, post = post ) setup <- edit.setup(data, setup, ...) method <- setup$method predictorMatrix <- setup$predictorMatrix visitSequence <- setup$visitSequence post <- setup$post # initialize imputations nmis <- apply(is.na(data), 2, sum) imp <- initialize.imp( data, m, ignore, where, blocks, visitSequence, method, nmis, data.init ) # and iterate... from <- 1 to <- from + maxit - 1 q <- sampler( data, m, ignore, where, imp, blocks, method, visitSequence, predictorMatrix, formulas, blots, post, c(from, to), printFlag, ... ) if (!state$log) loggedEvents <- NULL if (state$log) row.names(loggedEvents) <- seq_len(nrow(loggedEvents)) ## save, and return midsobj <- list( data = data, imp = q$imp, m = m, where = where, blocks = blocks, call = call, nmis = nmis, method = method, predictorMatrix = predictorMatrix, visitSequence = visitSequence, formulas = formulas, post = post, blots = blots, ignore = ignore, seed = seed, iteration = q$iteration, lastSeedValue = get(".Random.seed", envir = globalenv(), mode = "integer", inherits = FALSE ), chainMean = q$chainMean, chainVar = q$chainVar, loggedEvents = loggedEvents, version = packageVersion("mice"), date = Sys.Date() ) oldClass(midsobj) <- "mids" if (!is.null(midsobj$loggedEvents)) { warning("Number of logged events: ", nrow(midsobj$loggedEvents), call. = FALSE ) } midsobj } mice/R/mcar.R0000644000176200001440000004556114334522175012456 0ustar liggesusers#' Jamshidian and Jalal's Non-Parametric MCAR Test #' #' Test whether missingness is contingent upon the observed variables, #' according to the methodology developed by Jamshidian and Jalal (2010) (see #' Details). #' @param x An object for which a method exists; usually a `data.frame`. #' @param imputed Either an object of class `mids`, as returned by #' [mice::mice()], or a list of `data.frame`s. #' @param min_n Atomic numeric, must be greater than 1. When there are missing #' data patterns with fewer than `min_n` cases, all cases with that pattern will #' be removed from `x` and `imputed`. #' @param method Atomic character. If it is known (or assumed) that data are #' either multivariate normally distributed or not, then use either #' `method = "hawkins"` or `method = "nonparametric"`, respectively. #' The default argument `method = "auto"` follows the procedure outlined in the #' Details section, and in Figure 7 of Jamshidian and Jalal (2010). #' @param replications Number of replications used to simulate the Neyman #' distribution when performing Hawkins' test. As this method is based on random #' sampling, use a high number of `replications` (and optionally, #' [set.seed()]) to minimize Monte Carlo error and ensure reproducibility. #' @param use_chisq Atomic integer, indicating the minimum number of cases #' within a group *k* that triggers the use of asymptotic Chi-square #' distribution instead of the emprical distribution in the Neyman uniformity #' test, which is performed as part of Hawkins' test. #' @param alpha Atomic numeric, indicating the significance level of tests. #' @details Three types of missingness have been distinguished in the literature #' (Rubin, 1976): #' Missing completely at random (MCAR), which means that missingness is random; #' missing at random (MAR), which means that missingness is contingent on the #' *observed*; #' and missing not at random (MNAR), which means that missingness is related to #' unobserved data. #' #' Jamshidian and Jalal's non-parametric MCAR test assumes that the missing data #' are either MCAR or MAR, and tests whether the missingness is independent of #' the observed values. If so, the covariance matrices of the imputed data will #' be equal accross groups with different patterns of missingness. This test #' consists of the following procedure: #' \enumerate{ #' \item Data are imputed. #' \item The imputed data are split into *k* groups according to the #' *k* missing data patterns in the original data (see #' [mice::md.pattern()]). #' \item Perform Hawkins' test for equality of covariances across the *k* #' groups. #' \item If the test is *not significant*, conclude that there is no evidence #' against multivariate normality of the data, nor against MCAR. #' \item If the test *is significant*, and multivariate normality of the data #' can be assumed, then it can be concluded that missingness is MAR. #' \item If multivariate normality cannot be assumed, then perform the #' Anderson-Darling non-parametric test for equality of covariances across the #' *k* groups. #' \item If the Anderson-Darling test is *not significant*, this is evidence #' against multivariate normality - but no evidence against MCAR. #' \item If the Anderson-Darling test *is significant*, this is evidence #' it can be concluded that missingness is MAR. #' } #' #' Note that, despite its name in common parlance, an MCAR test can only #' indicate whether missingness is MCAR or MAR. The procedure cannot distinguish #' MCAR from MNAR, so a non-significant result does not rule out MNAR. #' #' This is a re-implementation of the function `TestMCARNormality`, which was #' originally published in the R-packgage `MissMech`, which has been removed #' from CRAN. This new implementation is faster, as its backend is written in #' C++. It also enhances the functionality of the original: #' \itemize{ #' \item Multiply imputed data can now be used; the median p-value and test #' statistic across replications is then reported, as suggested by #' Eekhout, Wiel, and Heymans (2017). #' \item The printing method for an `mcar_object` gives a warning when at #' least one p-value of either test was significant. In this case, it is #' recommended to inspect the range of p-values, and consider potential #' violations of MCAR. #' \item A plotting method for an `mcar_object` is provided. #' \item A plotting method for the `$md.pattern` element of an `mcar_object` #' is provided. #' } #' #' @return An object of class `mcar_object`. #' @author Caspar J. Van Lissa #' @references #' Rubin, D. B. (1976). Inference and Missing Data. Biometrika, Vol. 63, No. 3, #' pp. 581-592. \doi{10.2307/2335739} #' #' Eekhout, I., M. A. Wiel, & M. W. Heymans (2017). Methods for Significance #' Testing of Categorical Covariates in Logistic Regression Models After #' Multiple Imputation: Power and Applicability Analysis. BMC Medical Research #' Methodology 17 (1): 129. #' #' Jamshidian, M., & Jalal, S. (2010). Tests of homoscedasticity, normality, and #' missing completely at random for incomplete multivariate data. Psychometrika, #' 75(4), 649–674. \doi{10.1007/s11336-010-9175-3} #' @keywords internal #' @examples #' res <- mcar(nhanes) #' # Examine test results #' res #' # Plot p-values across imputed data sets #' plot(res) #' # Plot md patterns used for the test #' plot(res, type = "md.pattern") #' # Note difference with the raw md.patterns: #' md.pattern(nhanes) #' @export #' @importFrom stats cov pchisq spline #' @md mcar <- function(x, imputed = mice(x, method = "norm"), min_n = 6, method = "auto", replications = 10000, use_chisq = 30, alpha = 0.05) { UseMethod("mcar", x) } #' @method mcar data.frame #' @export mcar.data.frame <- function(x, imputed = mice(x, method = "norm"), min_n = 6, method = "auto", replications = 10000, use_chisq = 30, alpha = 0.05) { anyfact <- sapply(x, inherits, what = "factor") if (any(anyfact)) { stop("Be advised that this MCAR test has not been validated for categorical variables.") } if (min_n < 1) { stop("Argument 'min_n' must be greater than 1.") } out <- list( hawk_chisq = NULL, hawk_df = NULL, hawk_p = NULL, ad_value = NULL, ad_p = NULL, alpha = alpha, method = method, md.pattern = NULL, removed_rows = NULL, removed_patterns = NULL ) if (inherits(imputed, "mids")) { imputed <- mice::complete(imputed, action = "all") } else if (inherits(imputed, "list")) { if (!inherits(imputed[[1]], "data.frame")) { imputed <- tryCatch(lapply(imputed, data.frame), error = function(e) { stop("Argument 'imputed' must be a list of data.frames or an object of class 'mids'. Could not coerce argument 'imputed' to class 'data.frame'.") }) message("Argument 'imputed' must be an object of class 'mids', or a list of 'data.frame's. Coerced argument 'imputed' to data.frame.") } } if (!all(dim(x) == dim(imputed[[1]]))) { stop("Imputed data must have the same dimensions as the incomplete data.") } if (any(is.na(imputed[[1]]))) { stop("Imputed data contain missing values.") } missings <- is.na(x) rowmis <- rowSums(missings) colmis <- colSums(missings) if (any(rowmis == ncol(x))) { warning("Note that there were some rows with all missing data. Tests may be invalid for these rows. Consider removing them.") # x <- x[!rowmis == ncol(x), , drop = FALSE] } if (any(colmis == nrow(x))) { stop("Some columns contain all missing data. This will result in invalid results.") } univals <- sapply(x, function(i) { length(unique(i)) }) if (any(univals < 2)) { stop("Some columns are constant, not variable. This will result in invalid results.") } newdata <- x missings <- is.na(x) pats <- mice::md.pattern(x, plot = FALSE) if (nrow(pats) < 4L) { stop("Two or more missing data patterns are required.") } remove_pats <- as.numeric(rownames(pats))[-nrow(pats)] <= min_n if (any(remove_pats)) { out$removed_patterns <- pats[remove_pats, ] pats <- pats[-nrow(pats), colnames(missings)] idmiss <- do.call(paste, as.data.frame(missings)) idpats <- do.call(paste, as.data.frame(pats == 0)) remove_these <- idmiss %in% idpats[remove_pats] out$removed_rows <- remove_these newdata <- x[!remove_these, , drop = FALSE] imputed <- lapply(imputed, `[`, i = !remove_these, j = colnames(newdata), drop = FALSE) missings <- is.na(newdata) pats <- mice::md.pattern(newdata, plot = FALSE) } class(pats) <- c("md.pattern", class(pats)) out$md.pattern <- pats pat_n <- as.numeric(rownames(pats))[-nrow(pats)] pats <- pats[-nrow(pats), colnames(missings)] idpats <- idpats[!remove_pats] idmiss <- do.call(paste, as.data.frame(missings)) which_pat <- apply(sapply(idpats, `==`, idmiss), 1, which) numpat <- length(pat_n) if (nrow(newdata) == 0) { stop("No valid rows of data left.") } if (numpat == 1) { stop("Only one missing data pattern.") } if (any(pat_n < 2)) { stop("At least 2 cases needed in each missing data pattern.") } # Perform Hawkins test ---------------------------------------------------- hawklist <- lapply(imputed, function(thisimp) { hawkins(thisimp, which_pat) }) if (method %in% c("auto", "hawkins")) { pvalsn <- sapply(hawklist, function(thisimp) { sapply(thisimp[["a"]], function(thistail) { ni <- length(thistail) pn <- p_neyman(thistail, replications, use_chisq) pn + (pn == 0) / replications }) }) out$hawk_chisq <- -2 * colSums(log(pvalsn)) out$hawk_df <- 2 * numpat out$hawk_p <- pchisq(out$hawk_chisq, out$hawk_df, lower.tail = FALSE) } # Perform Anderson-Darling test ------------------------------------------- if ((method == "auto" & any(out$hawk_p < alpha)) | method == "nonparametric") { adout <- sapply(hawklist, function(thisimp) { anderson_darling(thisimp[["fij"]]) # First row is p }) out$ad_value <- colSums(adout[-1, , drop = FALSE]) out$ad_p <- adout[1, ] } class(out) <- c("mcar_object", class(out)) out } #' @method print mcar_object #' @export print.mcar_object <- function(x, ...) { ni <- x$pat_n out <- "\nInterpretation of results:\n" cat("\nMissing data patterns:", (nrow(x$md.pattern) - 1)) if (!is.null(x$removed_patterns)) cat(" used,", nrow(x$removed_patterns), "removed.") cat("\nCases used:", sum(!x$removed_rows), "\n\n") if (!is.null(x$hawk_p)) { if (length(x$hawk_p) > 1) { hawkp <- median(x$hawk_p) cat("Hawkins' test: median chi^2 (", x$hawk_df, ") = ", median(x$hawk_chisq), ", median p = ", hawkp, sep = "") if (any(x$hawk_p < x$alpha) & !hawkp < x$alpha) cat(". Some p-values for Hawkins' test were significant; please inspect their values, e.g., using `plot(", deparse(substitute(x)), ")`", sep = "") } else { hawkp <- x$hawk_p cat("Hawkins' test: chi^2 (", x$hawk_df, ") = ", x$hawk_chisq, ", p = ", x$hawk_p, sep = "") } cat("\n\n") if (x$method == "auto") { out <- c( out, c( "Hawkins' test is not significant; there is no evidence to reject the assumptions of multivariate normality and MCAR.\n", "Hawkins' test is significant; if multivariate normality can be assumed, then reject the assumption that missingness is MCAR.\n" )[(hawkp < x$alpha) + 1] ) } } if (!is.null(x$ad_p)) { if (length(x$ad_p) > 1) { adp <- median(x$ad_p) cat("Anderson-Darling rank test: median T = ", median(x$ad_value), ", median p = ", median(x$ad_p), sep = "") if (any(x$ad_p < x$alpha) & !median(x$ad_p) < x$alpha) cat(". Some p-values for the Anderson-Darling test were significant; please inspect their values, e.g., using `plot(", deparse(substitute(x)), ")`", sep = "") } else { adp <- x$ad_p cat("Anderson-Darling rank test: T = ", x$ad_value, ", p = ", x$ad_p, sep = "") } cat("\n") if (x$method == "auto") { out <- c( out, c( "Anderson-Darling test is not significant. There is thus evidence against multivariate normality, but not against MCAR.\n", "Anderson-Darling test is significant. Reject the assumption that missingness is MCAR." )[(adp < x$alpha) + 1] ) } } if (x$method == "auto") { cat(out) } } #' @importFrom graphics hist #' @importFrom grDevices dev.off #' @method plot mcar_object #' @export plot.mcar_object <- function(x, y, type = NULL, ...) { if (isTRUE(type == "md.pattern")) { plot(x[["md.pattern"]]) } else { op <- par(mar = rep(0, 4)) on.exit(par(op)) dev.off() if (!is.null(x$hawk_p) & !is.null(x$ad_p)) par(mfrow = c(2, 1)) if (!is.null(x$hawk_p)) { pct <- sum(x$hawk_p < x$alpha) / length(x$hawk_p) hist(x$hawk_p, main = NULL, xlab = paste0("Hawkins p-values, ", round(pct * 100), "% significant"), ylab = NULL) abline(v = x$alpha, col = "red") } if (!is.null(x$ad_p)) { pct <- sum(x$ad_p < x$alpha) / length(x$ad_p) hist(x$ad_p, main = NULL, xlab = paste0("Anderson-Darling p-values, ", round(pct * 100), "% significant"), ylab = NULL) abline(v = x$alpha, col = "red") } } } hawkins <- function(x, grouping) { p <- ncol(x) n <- nrow(x) x <- split(x, factor(grouping)) g <- length(x) S_pooled <- lapply(x, function(i) { (nrow(i) - 1) * cov(i, use = "complete.obs") }) S_pooled <- Reduce("+", S_pooled) S_pooled <- S_pooled / (n - g) S_pooled <- solve(S_pooled) f <- lapply(x, function(i) { i_centered <- scale(i, center = TRUE, scale = FALSE) i_centered <- apply(i_centered %*% S_pooled * i_centered, 1, sum) i_centered <- i_centered * nrow(i) ((n - g - p) * i_centered) / (p * ((nrow(i) - 1) * (n - g) - i_centered)) }) a <- lapply(f, function(thisf) { pf(thisf, p, (n - g - p), lower.tail = FALSE) }) list(fij = f, a = a, ni = matrix(sapply(x, nrow), ncol = 1)) } p_neyman <- function(x, replications = 10000, use_chisq = 30) { n <- length(x) n4 <- sum(colSums(legendre(x, 4))^2) / n if (n < use_chisq) { sum(sim_neyman(n, replications) > n4) / replications } else { pchisq(n4, 4, lower.tail = FALSE) } } sim_neyman <- function(n, replications) { x <- matrix(runif(replications * n), ncol = replications) pi <- apply(x, 2, function(i) { sum(colSums(legendre(i, 4))^2) / n }) sort(pi) } anderson_darling <- function(fij) { x <- unlist(fij) ni <- sapply(fij, length) if (length(ni) < 2) { stop("At least 2 groups required for Anderson-Darling test.") } k <- length(ni) n <- length(x) x.sort <- sort(x)[-n] hj <- rle(x.sort)$lengths hn <- cumsum(hj) zj <- x.sort[which(!duplicated(x.sort))] adk.all <- sapply(fij, function(fi) { ni <- length(fi) combs <- expand.grid(zj, fi) b <- combs[, 1] == combs[, 2] thisfij <- rowSums(matrix(b, length(zj))) mij <- cumsum(thisfij) num <- (n * mij - ni * hn)^2 den <- hn * (n - hn) (1 / ni * sum(hj * (num / den))) }) adk <- sum(adk.all) / n adk.all <- adk.all / n j <- sum(1 / ni) h <- sum(1 / seq(1:(n - 1))) g <- sum(sapply(1:(n - 2), function(i) { (1 / (n - i)) * sum(1 / seq((i + 1), (n - 1))) })) a <- (4 * g - 6) * (k - 1) + (10 - 6 * g) * j b <- (2 * g - 4) * k^2 + 8 * h * k + (2 * g - 14 * h - 4) * j - 8 * h + 4 * g - 6 c <- (6 * h + 2 * g - 2) * k^2 + (4 * h - 4 * g + 6) * k + (2 * h - 6) * j + 4 * h d <- (2 * h + 6) * k^2 - 4 * h * k var.adk <- max(((a * n^3) + (b * n^2) + (c * n) + d) / ((n - 1) * (n - 2) * (n - 3)), 0) adk.s <- (adk - (k - 1)) / sqrt(var.adk) b0 <- c(0.675, 1.281, 1.645, 1.96, 2.326) b1 <- c(-0.245, 0.25, 0.678, 1.149, 1.822) b2 <- c(-0.105, -0.305, -0.362, -0.391, -0.396) c0 <- c( 1.09861228866811, 2.19722457733622, 2.94443897916644, 3.66356164612965, 4.59511985013459 ) qnt <- b0 + b1 / sqrt(k - 1) + b2 / (k - 1) ind <- seq(1:4) + (adk.s <= qnt[3]) yy <- spline(qnt[ind], c0[ind], xout = adk.s)$y p <- 1 / (1 + exp(yy)) c(p, adk.all) } ad <- function(fij) { x <- unlist(fij) ni <- sapply(fij, length) if (length(ni) < 2) { stop("At least 2 groups required for Anderson-Darling test.") } k <- length(ni) n <- length(x) x.sort <- sort(x)[-n] hj <- rle(x.sort)$lengths hn <- cumsum(hj) zj <- x.sort[which(!duplicated(x.sort))] adk.all <- sapply(fij, function(fi) { ni <- length(fi) combs <- expand.grid(zj, fi) b <- combs[, 1] == combs[, 2] thisfij <- rowSums(matrix(b, length(zj))) mij <- cumsum(thisfij) num <- (n * mij - ni * hn)^2 den <- hn * (n - hn) (1 / ni * sum(hj * (num / den))) }) adk <- sum(adk.all) / n adk.all <- adk.all / n j <- sum(1 / ni) h <- sum(1 / seq(1:(n - 1))) g <- sum(sapply(1:(n - 2), function(i) { (1 / (n - i)) * sum(1 / seq((i + 1), (n - 1))) })) a <- (4 * g - 6) * (k - 1) + (10 - 6 * g) * j b <- (2 * g - 4) * k^2 + 8 * h * k + (2 * g - 14 * h - 4) * j - 8 * h + 4 * g - 6 c <- (6 * h + 2 * g - 2) * k^2 + (4 * h - 4 * g + 6) * k + (2 * h - 6) * j + 4 * h d <- (2 * h + 6) * k^2 - 4 * h * k var.adk <- max(((a * n^3) + (b * n^2) + (c * n) + d) / ((n - 1) * (n - 2) * (n - 3)), 0) adk.s <- (adk - (k - 1)) / sqrt(var.adk) b0 <- c(0.675, 1.281, 1.645, 1.96, 2.326) b1 <- c(-0.245, 0.25, 0.678, 1.149, 1.822) b2 <- c(-0.105, -0.305, -0.362, -0.391, -0.396) c0 <- c( 1.09861228866811, 2.19722457733622, 2.94443897916644, 3.66356164612965, 4.59511985013459 ) qnt <- b0 + b1 / sqrt(k - 1) + b2 / (k - 1) ind <- seq(1:4) + (adk.s <= qnt[3]) yy <- spline(qnt[ind], c0[ind], xout = adk.s)$y p <- 1 / (1 + exp(yy)) list(pn = p, adk.all = adk.all, adk = adk, var.sdk = var.adk) } #' @method plot md.pattern #' @export plot.md.pattern <- function(x, y, rotate.names = FALSE, ...) { op <- par(mar = rep(0, 4)) on.exit(par(op)) plot.new() R <- x[1:nrow(x) - 1, 1:ncol(x) - 1] nmis <- x[nrow(x), 1:ncol(x) - 1] if (rotate.names) { adj <- c(0, 0.5) srt <- 90 length_of_longest_colname <- max(nchar(colnames(x))) / 2.6 plot.window( xlim = c(-1, ncol(R) + 1), ylim = c(-1, nrow(R) + length_of_longest_colname), asp = 1 ) } else { adj <- c(0.5, 0) srt <- 0 plot.window( xlim = c(-1, ncol(R) + 1), ylim = c(-1, nrow(R) + 1), asp = 1 ) } M <- cbind(c(row(R)), c(col(R))) - 1 shade <- ifelse(R[nrow(R):1, ], mdc(1), mdc(2)) rect(M[, 2], M[, 1], M[, 2] + 1, M[, 1] + 1, col = shade) for (i in 1:ncol(R)) { text(i - .5, nrow(R) + .3, colnames(x)[i], adj = adj, srt = srt ) text(i - .5, -.3, nmis[order(nmis)][i]) } for (i in 1:nrow(R)) { text(ncol(R) + .3, i - .5, x[(nrow(x) - 1):1, ncol(x)][i], adj = 0) text(-.3, i - .5, rownames(x)[(nrow(x) - 1):1][i], adj = 1) } text(ncol(R) + .3, -.3, x[nrow(x), ncol(x)]) } mice/R/handles.R0000644000176200001440000000047014330031606013126 0ustar liggesusershandles.arg <- function(f, a = "data") { # determine whether function f handles argument a if (!is.function(f)) { return(FALSE) } a %in% names(formals(f)) } handles.format <- function(fn) { # determine whether function fn handles the `format` argument f <- get(fn) handles.arg(f, "format") } mice/R/D3.R0000644000176200001440000001026614436133157011775 0ustar liggesusers#' Compare two nested models using D3-statistic #' #' The D3-statistic is a likelihood-ratio test statistic. #' #' @details #' The \code{D3()} function implement the LR-method by #' Meng and Rubin (1992). The implementation of the method relies #' on the \code{broom} package, the standard \code{update} mechanism #' for statistical models in \code{R} and the \code{offset} function. #' #' The function calculates \code{m} repetitions of the full #' (or null) models, calculates the mean of the estimates of the #' (fixed) parameter coefficients \eqn{\beta}. For each imputed #' imputed dataset, it calculates the likelihood for the model with #' the parameters constrained to \eqn{\beta}. #' #' The \code{mitml::testModels()} function offers similar functionality #' for a subset of statistical models. Results of \code{mice::D3()} and #' \code{mitml::testModels()} differ in multilevel models because the #' \code{testModels()} also constrains the variance components parameters. #' For more details on #' #' @seealso \code{\link{fix.coef}} #' @inheritParams D1 #' @return An object of class \code{mice.anova} #' @references #' Meng, X. L., and D. B. Rubin. 1992. #' Performing Likelihood Ratio Tests with Multiply-Imputed Data Sets. #' \emph{Biometrika}, 79 (1): 103–11. #' #' \url{https://stefvanbuuren.name/fimd/sec-multiparameter.html#sec:likelihoodratio} #' #' \url{http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#setting-residual-variances-to-a-fixed-value-zero-or-other} #' @examples #' # Compare two linear models: #' imp <- mice(nhanes2, seed = 51009, print = FALSE) #' mi1 <- with(data = imp, expr = lm(bmi ~ age + hyp + chl)) #' mi0 <- with(data = imp, expr = lm(bmi ~ age + hyp)) #' D3(mi1, mi0) #' \dontrun{ #' # Compare two logistic regression models #' imp <- mice(boys, maxit = 2, print = FALSE) #' fit1 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc + reg, family = binomial)) #' fit0 <- with(imp, glm(gen > levels(gen)[1] ~ hgt + hc, family = binomial)) #' D3(fit1, fit0) #' } #' @export D3 <- function(fit1, fit0 = NULL, dfcom = NULL, df.com = NULL) { if (!missing(df.com)) { warning("argument df.com is deprecated; please use dfcom instead.", call. = FALSE ) dfcom <- df.com } dfcom <- get.dfcom(fit1, dfcom) call <- match.call() fit1 <- getfit(fit1) m <- length(fit1) est1 <- pool(fit1, dfcom = dfcom) qbar1 <- getqbar(est1) if (is.null(fit0)) { # test all estimates equal to zero beta <- rep(0, length(qbar1)) names(beta) <- names(qbar1) fit0 <- lapply(fit1, fix.coef, beta = beta) } else { fit0 <- getfit(fit0) } est0 <- pool(fit0, dfcom = dfcom) qbar0 <- getqbar(est0) k <- length(qbar1) - length(qbar0) # For each imputed dataset, calculate the deviance between the two # models as fitted dev1.M <- -2 * lapply(fit1, glance) %>% bind_rows() %>% pull(.data$logLik) dev0.M <- -2 * lapply(fit0, glance) %>% bind_rows() %>% pull(.data$logLik) # For each imputed dataset, calculate the deviance between the two # models with coefficients restricted to qbar mds1 <- lapply(fit1, fix.coef, beta = qbar1) dev1.L <- -2 * lapply(mds1, glance) %>% bind_rows() %>% pull(.data$logLik) mds0 <- lapply(fit0, fix.coef, beta = qbar0) dev0.L <- -2 * lapply(mds0, glance) %>% bind_rows() %>% pull(.data$logLik) deviances <- list( dev1.M = dev1.M, dev0.M = dev0.M, dev1.L = dev1.L, dev0.L = dev0.L ) # scaled deviance, as fitted dev.M <- mean(dev0.M - dev1.M) # scaled deviance, restricted dev.L <- mean(dev0.L - dev1.L) rm <- ((m + 1) / (k * (m - 1))) * (dev.M - dev.L) Dm <- dev.L / (k * (1 + rm)) # Degrees of freedom for F distribution v <- k * (m - 1) if (v > 4) { w <- 4 + (v - 4) * ((1 + (1 - 2 / v) * (1 / rm))^2) } else { w <- v * (1 + 1 / k) * ((1 + 1 / rm)^2) / 2 } pvalue <- pf(Dm, k, w, lower.tail = FALSE) test <- out <- list( call = match.call(), result = c(Dm, k, w, pvalue, rm), formulas = list( `1` = formula(getfit(fit1, 1L)), `2` = formula(getfit(fit0, 1L)) ), m = m, method = "D3", use = NULL, dfcom = dfcom, deviances = deviances ) class(out) <- c("mice.anova", class(fit1)) out } mice/NEWS.md0000644000176200001440000010733214433407421012275 0ustar liggesusers# mice 3.16.0 ### Major changes * Expands `futuremice()` functionality by allowing for external packages and user-written functions (#550). Contributed @thomvolker * Adds GH issue templates `bug_report`, `feature_request` and `help_wanted` (#560). Contributed @hanneoberman ### Minor changes * Removes documentation files for `rbind.mids()` and `cbind.mids()` to conform to CRAN policy * Adds `mitml` and `glmnet` to imports so that test code conforms to `_R_CHECK_DEPENDS_ONLY=true` flag in `R CMD check` * Initializes random number generator in `futuremice()` if there is no `.Random.seed` yet. * Updates GitHub actions for package checking and site building * Preserves user settings in `predictorMatrix` for case F by adding a `predictorMatrix` argument to `make.predictorMatrix()` * Polishes `mice.impute.mpmm()` example code ### Bug fixes * Adds proper support for factors to `mice.impute.2lonly.pmm()` (#555) * Solves function naming problems for S3 generic functions `tidy()`, `update()`, `format()` and `sum()` * Out-comments and weeds example&test code to silence `R CMD check` with `_R_CHECK_DEPENDS_ONLY=true` * Fixes small bug in `futuremice()` that throws an error when the number of cores is not specified, but the number of available cores is greater than the number of imputations. * Solves a bug in `mice.impute.mpmm()` that changed the column order of the data # mice 3.15.0 ### Major changes * Adds a function `futuremice()` with support for parallel imputation using the `future` package (#504). Contributed @thomvolker, @gerkovink * Adds multivariate predictive mean matching `mice.impute.mpmm()`. (#460). Contributed @Mingyang-Cai * Adds `convergence()` for convergence evaluation (#484). Contributed @hanneoberman * Reverts the internal seed behaviour back to `mice 3.13.10` (#515). #432 introduced new local seed in response to #426. However, various issues arose with this facility (#459, #492, #502, #505). This version restores the old behaviour using global `.Random.seed`. Contributed @gerkovink * Adds a `custom.t` argument to `pool()` that allows the advanced user to specify a custom rule for calculating the total variance $T$. Contributed @gerkovink * Adds new argument `exclude` to `mice.impute.pmm()` that excludes a user-specified vector of values from matching. Excluded values will not appear in the imputations. Since the observed values are not imputed, the user-specified values are still being used to fit the imputation model (#392, #519). Contributed @gerkovink ### Minor changes * Styles all `.R` and `.Rmd` files * Makes post-processing assignment consistent with lines 85/86 in `sampler.R` (#511) * Edit test broken on R<4 (#501). Contributed @MichaelChirico * Adds support for models reporting contrasts rather than terms (#498). Contributed @LukasWallrich * Applies edits to autocorrelation function (#491). Contributed @hanneoberman * Changes p-value calculation to more robust alternative (#494). Contributed @AndrewLawrence * Uses `inherits()` to check on class membership * Adds decprecation notices to `parlmice()` * Adapt `prop`, `patterns` and `weights` matrices for pattern with only 1's * Adds warning when patterns cannot be generated (#449, #317, #451) * Adds warning on the order of model terms in `D1()` and `D2()` (#420) * Adds example code to fit model on train data and apply to test data to `mice()` * Adds example code on synthetic data generation and analysis in `make.where()` * Adds testfile `test-mice.impute.rf.R`(#448) ### Bug fixes * Replaces `.Random.seed` reads from the `.GlobalEnv` by `get(".Random.seed", envir = globalenv(), mode = "integer", inherits = FALSE)` * Repairs capitalisation problems with `lastSeedValue` variable name * Solves `x$lastSeedValue` problem in `cbind.mids()` (#502) * Fixes problems with `ampute()` * Preserves stochastic nature of `mice()` by smarter random seed initialisation (#459) * Repairs a `drop = FALSE` buglet in `mice.impute.rf()` (#447, #448) * @str-amg reported that the new dependency on `withr` package should have version 2.4.0 (published in January 2021) or higher. Versions `withr 2.3.0` and before may give `Error: object 'local_seed' is not exported by 'namespace:withr'`. Either update manually, or install the patched version `mice 3.14.1` from GitHub. (#445). NOTE: `withr` is no longer needed in `mice 3.15.0` # mice 3.14.0 ### Major changes * Adds four new univariate functions using the lasso for automatic variable selection: | Function | Description | | --------------------------------- | --------------------------------- | |`mice.impute.lasso.norm()` | Lasso linear regression | |`mice.impute.lasso.logreg()` | Lasso logistic regression | |`mice.impute.lasso.select.norm()` | Lasso selector + linear regression | |`mice.impute.lasso.select.logreg()`| Lasso selector + logistic regression | Contributed by @EdoardoCostantini (#438). * Adds Jamshidian && Jalal's non-parametric MCAR test, `mice::MCAR()` and associated plot method. Contributed by @cjvanlissa (#423). * Adds two new functions `pool.syn()` and `pool.scalar.syn()` that specialise pooling estimates from synthetic data. The `"reiter2003"` pooling rule assumes that synthetic data were created from complete data. Thanks Thom Volker (#436). * Avoids changing the global `.Random.seed` (#426, #432) by implementing `withr::local_preserve_seed()` and `withr::local_seed()`. This change provides stabler behavior in complex scripts. The change does not appear to break reproducibility when `mice()` was run with a seed. Nevertheless, if you run into a reproducibility problem, install `mice 3.13.12` or before. * Improves the imputation of parabolic data in `mice.impute.quadratic()`, adds a parameter `quad.outcome` containing the name of the outcome variable in the complete-data model. Contributed @Mingyang-Cai, @gerkovink (#408) * By default, `mice.impute.rf()` now uses the faster `ranger` package as back-end instead of `randomForest` package. If you want the old behaviour specify the `rfPackage = "randomForest"` argument to the `mice(...)` call. Contributed @prockenschaub (#431). * Generalises `pool()` so that it processes the parameters from all `gamlss` sub-models. Thanks Marcio Augusto Diniz (#406, #405) * Uses the robust standard error estimate for pooling when `pool()` can extract `robust.se` from the object returned by `broom::tidy()` (#310) ### Bug fixes * Contains an emergency solution as `install.on.demand()` broke the standard CRAN workflow. mice 3.14.0 does not call `install.on.demand()` anymore for recommended packages. Also, `install.on.demand()` will not run anymore in non-interactive mode. * Repairs an error in the `mice:::barnard.rubin()` function for infinite `dfcom`. Thanks @huftis (#441). * Solves problem with `Xi <- as.matrix(...)` in `mice.impute.2l.lmer()` that occurred when a cluster contains only one observation (#384) * Edits the `predictorMatrix` to a monotone pattern if `visitSequence = "monotone"` and `maxit = 1` (#316) * Solves a problem with the plot produced by `md.pattern()` (#318, #323) * Fixes the intercept in `make.formulas()` (#305, #324) * Fixes seed when using `newdata` in `mice.mids()` (#313, #325) * Solves a problem with row names of the `where` element created in `rbind()` (#319) * Solves a bug in mnar imputation routine. Contributed by Margarita Moreno Betancur. ### Minor changes * Replaces URL to jstatsoft with DOI * Update reference to literature (#442) * Informs the user that `pool()` cannot take a `mids` object (#433) * Updates documentation for post-processing functionality (#387) * Adds Rcpp necessities * Solves a problem with "last resort" initialisation of factors (#410) * Documents the "flat-line behaviour" of `mice.impute.2l.lmer()` to indicate a problem in fitting the imputation model (#385) * Add reprex to test (#326) * Documents that multivariate imputation methods do not support the `post` parameter (#326) # mice 3.13.0 ### Major changes * Updated `mids2spss()` replaces the `foreign` by `haven` package. Contributed Gerko Vink (#291) ### Minor changes * Repairs an error in `tests\testhat\test-D1.R` that failed on `mitml 0.4-0` * Reverts `with.mids()` function to old version because the change in commit 4634094 broke downstream package `metafor` (#292) * Solves a glitch in `mice.impute.rf()` in finding candidate donors (#288, #289) # mice 3.12.0 ### Much faster predictive mean matching * The new `matchindex` C function makes predictive mean matching **50 to 600 times faster**. The speed of `pmm` is now on par with normal imputation (`mice.impute.norm()`) and with the `miceFast` package, without compromising on the statistical quality of the imputations. Thanks to Polkas and suggestions by Alexander Robitzsch. See #236 for more details. ### New `ignore` argument to `mice` * New `ignore` argument to `mice()`. This argument is a logical vector of `nrow(data)` elements indicating which rows are ignored when creating the imputation model. We may use the `ignore` argument to split the data into a training set (on which the imputation model is built) and a test set (that does not influence the imputation model estimates). The argument is based on the suggestion in . See #32 for more background and techniques. Crafted by Patrick Rockenschaub ### New `filter()` function for `mids` objects * New `filter()` method that subsets a `mids` object (multiply-imputed data set). The method accepts a logical vector of length `nrow(data)`, or an expression to construct such a vector from the incomplete data. (#269). Crafted by Patrick Rockenschaub. ### Changes affecting reproducibility * **Breaking change:** The `matcher` algorithm in `pmm` has changed to `matchindex` for speed improvements. If you want the old behavior, specify `mice(..., use.matcher = TRUE)`. ### Minor changes * Corrected installation problem related to `cpp11` package (#286) * Simplifies `with.mids()` by calling `eval_tidy()` on a quosure. Does not yet solve #265. * Improve documentation for `pool()` and `pool.scalar()` (#142, #106, #190 and others) * Makes `tidy.mipo` more flexible (#276) * Solves a problem if `nelsonaalen()` gets a `tibble` (#272) * Add explanation to how `NA`s can appear in the imputed data (#267) * Add warning to `quickpred()` documentation (#268) * Styles all sources files with styler * Improves consistency in code and documentation * Moves internally defined functions to global namespace * Solves bug in internal `sum.scores()` * Adds deprecated messages to `lm.mids()`, `glm.mids()`, `pool.compare()` * Removes `.pmm.match()` and `expandcov()` * Strips out all `return()` calls placed just before end-of-function * Remove all trailing spaces * Repairs a bug in the routine for finding the `printFlag` value (#258) * Update URL's after transfer to organisation `amices` # mice 3.11.0 ## Major changes * The Cox model does not return `df.residual`, which caused problematic behavior in the `D1()`, `D2()`, `D3()`, `anova()` and `pool()`. `mice` now extracts the relevant information from other parts of the objects returned by `survival::coxph()`, which solves long-standing issues with the integration of the Cox model (#246). * Adds missing `Rccp` dependency to work with `tidyr 1.1.1` (#248). ## Minor changes * Addresses warnings: `Non-file package-anchored link(s) in documentation object`. * Updates on `ampute` documentation (#251). * Ask user permission before installing a package from `suggests`. # mice 3.10.0 ## Major changes * New functions `tidy.mipo()` and `glance.mipo()` return standardized output that conforms to `broom` specifications. Kindly contributed by Vincent Arel Bundock (#240). ## Minor changes * Solves a problem with the `D3` testing script that produced an error on CRAN (#244). # mice 3.9.0 ## Major changes * The `D3()` function in `mice` gave incorrect results. This version solves a problem in the calculation of the `D3`-statistic. See #226 and #228 for more details. The documentation explains why results from `mice::D3()` and `mitml::testModels()` may differ. * The `pool()` function is now more forgiving when there is no `glance()` function (#233) * It is possible to bypass `remove.lindep()` by setting `eps = 0` (#225) ## Minor changes * Adds reference to Leacy's thesis * Adds an example to the `plot.mids()` documentation # mice 3.8.0 ## Major changes * This version adds two new NARFCS methods for imputing data under the *Missing Not at Random (MNAR)* assumption. NARFCS is generalised version of the so-called $\delta$-adjustment method. Margarita Moreno-Betancur and Ian White kindly contributes the functions `mice.impute.mnar.norm()` and `mice.impute.mnar.logreg()`. These functions aid in performing sensitivity analysis to investigate the impact of different MNAR assumptions on the conclusion of the study. An alternative for MNAR is the older `mice.impute.ri()` function. * Installation of `mice` is faster. External packages needed for imputation and analyses are now installed on demand. The number of dependencies as estimated by `rsconnect::appDepencies()` decreased from 132 to 83. * The name clash with the `complete()` function of `tidyr` should no longer be a problem. * There is now a more flexible `pool()` function that integrates better with the `broom` and `broom.mixed` packages. ## Bug fixes * Deprecates `pool.compare()`. Use `D1()` instead (#220) * Removes everything in `utils::globalVariables()` * Prevents name clashes with `tidyr` by defining `complete.mids()` as an S3 method for the `tidyr::complete()` generic (#212) * Extends the `pool()` function to deal with multiple sets of parameters. Currently supported keywords are: `term` (all `broom` functions), `component` (some `broom.mixed` functions) and `y.values` (for `multinom()` model) (#219) * Adds a new `install.on.demand()` function for lighter installation * Adds `toenail2` and remove dependency on `HSAUR3` * Solves problem with `ampute` in extreme cases (#216) * Solves problem with `pool` with `mgcv::gam` (#218) * Adds `.gitattributes` for consistent line endings # mice 3.7.0 * Solves a bug that made `polr()` always fail (#206) * Aborts if one or more columns are a `data.frame` (#208) * Update `mira-class` documentation (#207) * Remove links to deprecated package `CALIBERrfimpute` * Adds check on partial missing level-2 data to `2lonly.norm` and `2lonly.pmm` * Change calculation of `a2` to elementwise division by a matrix of observations * Extend documentation for `2lonly.norm` and `2lonly.pmm` * Repair return value from `2lonly.pmm` * Imputation method `2lonly.mean` now also works with factors * Replace deprecated `imputationMethod` argument in examples by `method` * More informative error message when stopped after pre-processing (#194) * Updated URL's in DESCRIPTION * Fix string matching in `check.predictorMatrix()` (#191) # mice 3.6.0 * Copy `toenail` data from orphaned `DPpackage` package * Remove `DPpackage` from `Suggests` field in `DESCRIPTION` * Adds support for rotated names in `md.pattern()` (#170, #177) # mice 3.5.0 * This version has some error fixes * Fixes a bug in the sampler that ignored imputed values in variables outside the active block (#175, @alexanderrobitzsch) * Adds a note to the documenation of `as.mids`() (#173) * Removes a superfluous warning from process_mipo() (#92) * Fixes an error in the degrees of freedom of the P-value calculation (#171) # mice 3.4.0 * Add a hex sticker to the mice package. Designed by Jaden M. Walters. * Specify the R3.5.0 random generator in order to pass CRAN tests * Remove test-fix.coef.R from tests * Adds a rotate.names argument to md.pattern() (#154, #160) * Fix to solve the name-matching problem (#156, #149, #147) * Fix that removes the pre-check for existence of `mice.impute.xxx()` so that `mice::mice()` works as expected (#55) * Solves a bug that crashed `mids2spss()`, thanks Edgar Schoreit (#149) * Solves a problem in the routing logic (#149) causing that passive imputation was not done when no predictors were specified. No passive imputation correctly will ignore any the specification of `predictorMatrix`. * Implements an alternative solution for #93 and #96. Instead of skipping imputation of variables without predictors, `mice 3.3.1` will impute those variables using the intercept only * Adds a routine contributed by Simon Grund that checks for deprecated arguments #137 * Improves the `nelsonaalen()` function for data where variables `time` or `status` have already been defined (#140), thanks matthieu-faron # mice 3.3.0 * Solves bug in passive imputation (#130). *Warning: This bug may have caused invalid imputations in `mice 3.0.0` - `mice 3.2.0` under passive imputation.* * Updates code to `broom 0.5.0` (#128) * Solves problem with `mice.impute.2l.norm()` (#129) * Use explicit foreign function calls in tests # mice 3.2.0 * Skip tests for `mice.impute.2l.norm()` (#129) * Skip tests for `D1()` (#128) * Solve problem with `md.pattern` (#126) * Evades warning in `rbind` and `cbind` (#114) * Solves `rbind` problem when `method` is a list (#113) * More efficient use of `parlmice` (#109) * Add `dfcom` argument to `pool()` (#105, #110) * Updates to `parlmice` + bugfix (#107) # mice 3.1.0 * New parallel functionality: `parlmice` (#104) * Incorporate suggestion of @JoergMBeyer to `flux` (#102) * Replace duplicate code by `estimice` (#101) * Better checking for empty methods (#99) * Remove problem with `parent.frame` (#98) * Set empty method for complete data (#93) * Add `NEWS.md`, `index.Rmd` and online package documentation * Track `.R` instead of `.r` * Patch issue with `updateLog` (#8, @alexanderrobitzsch) * Extend README * Repair issue `md.pattern` (#90) * Repair check on `m` (#89) # mice 3.0.0 Version 3.0 represents a major update that implements the following features: 1. `blocks`: The main algorithm iterates over blocks. A block is simply a collection of variables. In the common MICE algorithm each block was equivalent to one variable, which - of course - is the default; The `blocks` argument allows mixing univariate imputation method multivariate imputation methods. The `blocks` feature bridges two seemingly disparate approaches, joint modeling and fully conditional specification, into one framework; 2. `where`: The `where` argument is a logical matrix of the same size of `data` that specifies which cells should be imputed. This opens up some new analytic possibilities; 3. Multivariate tests: There are new functions `D1()`, `D2()`, `D3()` and `anova()` that perform multivariate parameter tests on the repeated analysis from on multiply-imputed data; 4. `formulas`: The old `form` argument has been redesign and is now renamed to `formulas`. This provides an alternative way to specify imputation models that exploits the full power of R's native formula's. 5. Better integration with the `tidyverse` framework, especially for packages `dplyr`, `tibble` and `broom`; 6. Improved numerical algorithms for low-level imputation function. Better handling of duplicate variables. 7. Last but not least: A brand new edition AND online version of [Flexible Imputation of Missing Data. Second Edition.](https://stefvanbuuren.name/fimd/) # mice 2.46.9 (2017-12-08) * simplify code for `mids` object in `mice` (thanks stephematician) (#61) * simplify code in `rbind.mids` (thanks stephematician) (#59) * repair bug in `pool.compare()` in handling factors (#60) * fixed bug in `rbind.mids` in handling `where` (#59) * add new arguments to `as.mids()`, add `as()` * update contact info * resolved problem `cart` not accepting a matrix (thanks Joerg Drechsler) * Adds generalized `pool()` to list of models * Switch to 3-digit versioning # mice 2.46 (2017-10-22) * Allow for capitals in imputation methods # mice 2.45 (2017-10-21) * Reorganized vignettes to land on GitHUB pages # mice 2.44 (2017-10-18) * Code changes for robustness, style and efficiency (Bernie Gray) # mice 2.43 (2017-07-20) * Updates the `ampute` function and vignettes (Rianne Schouten) # mice 2.42 (2017-07-11) * Rename `mice.impute.2l.sys` to `mice.impute.2l.lmer` # mice 2.41 (2017-07-10) * Add new feature: `where`argument to mice * Add new `wy` argument to imputation functions * Add `mice.impute.2l.sys()`, author Shahab Jolani * Update with many simplifications and code enhancements * Fixed broken `cbind()` function * Fixed Bug that made the pad element disappear from `mids` object # mice 2.40 (2017-07-07) * Fixed integration with `lattice` package * Updates colors in `xyplot.mads` * Add support for factors in `mice.impute.2lonly.pmm()` * Create more robust version of as.mids() * Update of `ampute()` by Rianne Schouten * Fix timestamp problem by rebuilding vignette using R 3.4.0. # mice 2.34 (2017-04-24) * Update to roxygen 6.0.1 * Stylistic changes to `mice` function (thanks Ben Ogorek) * Calls to `cbind.mids()` replaced by calls to `cbind()` # mice 2.31 (2017-02-23) * Add link to `miceVignettes` on github (thanks Gerko Vink) * Add package documentation * Add `README` for GitHub * Add new ampute functions and vignette (thanks Rianne Schouten) * Rename `ccn` --> `ncc`, `icn` --> `nic` * Change helpers `cc()`, `ncc()`, `cci()`, `ic()`, `nic()` and `ici()` use `S3` dispatch * Change issues tracker on Github - add BugReports URL #21 * Fixed `multinom` MaxNWts type fix in `polyreg` and `polr` #9 * Fix checking of nested models in `pool.compare` #12 * Fix `as.mids` if names not same as all columns #11 * Fix extension for `glmer` models #5 # mice 2.29 (2016-10-05) * Add `midastouch`: predictive mean matching for small samples (thanks Philip Gaffert, Florian Meinfelder) # mice 2.28 (2016-10-05) * Repaired dots problem in `rpart` call # mice 2.27 (2016-07-27) * Add `ridge` to `2l.norm()` * Remove `.o` files # mice 2.25 (2015-11-09) * Fix `as.mids()` bug that crashed `miceadds::mice.1chain()` # mice 2.23 (2015-11-04) * Update of example code on /doc * Remove lots of dependencies, general cleanup * Fix `impute.polyreg()` bug that bombed if there were no predictors (thanks Jan Graffelman) * Fix `as.mids()` bug that gave incorrect $m$ (several users) * Fix `pool.compare()` error for `lmer` object (thanks Claudio Bustos) * Fix error in `mice.impute.2l.norm()` if just one `NA` (thanks Jeroen Hoogland) # mice 2.22 (2014-06-11) * Add about six times faster predictive mean matching * `pool.scalar()` now can do Barnard-Rubin adjustment * `pool()` now handles class `lmerMod` from the `lme4` package * Added automatic bounds on donors in `.pmm.match()` for safety * Added donors argument to `mice.impute.pmm()` for increased visibility * Changes default number of trees in `mice.impute.rf()` from 100 to 10 (thanks Anoop Shah) * `long2mids()` deprecated. Use `as.mids()` instead * Put `lattice` back into DEPENDS to find generic `xyplot()` and friends * Fix error in `2lonly.pmm` (thanks Alexander Robitzsch, Gerko Vink, Judith Godin) * Fix number of imputations in `as.mids()` (thanks Tommy Nyberg, Gerko Vink) * Fix colors to `mdc()` in example `mice.impute.quadratic()` * Fix error in `mice.impute.rf()` if just one `NA` (thanks Anoop Shah) * Fix error in `summary.mipo()` when `names(x$qbar)` equals `NULL` (thanks Aiko Kuhn) * Fix improper testing in `ncol()` in `mice.impute.2lonly.mean()` # mice 2.21 02-05-2014 SvB * FIXED: compilation problem in match.cpp on solaris CC # mice 2.20 02-02-2014 SvB * ADDED: experimental fastpmm() function using Rcpp * FIXED: fixes to mice.impute.cart() and mice.impute.rf() (thanks Anoop Shah) # mice 2.19 21-01-2014 SvB * ADDED: mice.impute.rf() for random forest imputation (thanks Lisa Doove) * CHANGED: default number of donors in mice.impute.pmm() changed from 3 to 5. Use mice(..., donors = 3) to get the old behavior. * CHANGED: speedup in .norm.draw() by using crossprod() (thanks Alexander Robitzsch) * CHANGED: speedup in .imputation.level2() (thanks Alexander Robitzsch) * FIXED: define MASS, nnet, lattice as imports instead of depends * FIXED: proper handling of rare case in remove.lindep() that removed all predictors (thanks Jaap Brand) # mice 2.18 31-07-2013 SvB * ADDED: as.mids() for converting long format in a mids object (thanks Gerko Vink) * FIXED: mice.impute.logreg.boot() now properly exported (thanks Suresh Pujar) * FIXED: two bugs in rbind.mids() (thanks Gerko Vink) # mice 2.17 10-05-2013 SvB * ADDED: new form argument to mice() to specify imputation models using forms (contributed Ross Boylan) * FIXED: with.mids(), is.mids(), is.mira() and is.mipo() exported * FIXED: eliminated errors in the documentation of pool.scalar() * FIXED: error in mice.impute.ri() (thanks Shahab Jolani) # mice 2.16 27-04-2013 SvB * ADDED: random indicator imputation by mice.impute.ri() for nonignorable models (thanks Shahab Jolani) * ADDED: workhorse functions .norm.draw() and .pmm.match() are exported * FIXED: bug in 2.14 and 2.15 in mice.impute.pmm() that produced an error on factors * FIXED: bug that crashed R when the class variable was incomplete (thanks Robert Long) * FIXED: bug in 2l.pan and 2l.norm by convert a class factor to integer (thanks Robert Long) * FIXED: warning eliminated caused by character variables (thanks Robert Long) # mice 2.15 - 02-04-2013 SvB * CHANGED: complete reorganization of documentation and source files * ADDED: source published on GitHub.com * ADDED: new imputation method mice.impute.cart() (thanks Lisa Doove) * FIXED: calculation of degrees of freedom in pool.compare() (thanks Lorenz Uhlmann) * FIXED: error in DESCRIPTION file (thanks Kurt Hornik) # mice 2.14 - 11-03-2013 / SvB * ADDED: mice.impute.2l.mean() for imputing class means at level 2 * ADDED: sampler(): new checks of degrees of freedom per variable at iteration 1 * ADDED: function check.df() to throw a warning about low degrees of freedom * FIXED: tolower() added in "2l" test in sampler() * FIXED: conversion of factors that have other roles (multilevel) in padModel() * FIXED: family argument in call to glm() in glm.mids() (thanks Nicholas Horton) * FIXED: .norm.draw(): evading NaN imputed values by setting df in rchisq() to a minimum of 1 * FIXED: bug in mice.df() that prevented the classic Rubin df calculation (thanks Jean-Batiste Pingaul) * FIXED: bug fixed in mice.impute.2l.norm() (thanks Robert Long) * CHANGED: faster .pmm.match2() from version 2.12 renamed to default .pmm.match() # mice 2.13 - 03-07-2012 / SvB * ADDED: new multilevel functions 2l.pan(), 2lonly.norm(), 2lonly.pmm() (contributed by Alexander Robitzsch) * ADDED: new quadratic imputation function: quadratic() (contributed by Gerko Vink) * ADDED: pmm2(), five times faster than pmm() * ADDED: new argument data.init in mice() for initialization (suggested by Alexander Robitzsch) * ADDED: mice() now accepts pmm as method for (ordered) factors * ADDED: warning and a note to 2l.norm() that advises to use type=2 for the predictors * FIXED: bug that chrashed plot.mids() if there was only one incomplete variable (thanks Dennis Prangle) * FIXED: bug in sample() in .pmm.match() when donor=1 (thanks Alexander Robitzsch) * FIXED: bug in sample() in mice.impute.sample() * FIXED: fixed '?data' bug in check.method() * REMOVED: wp.twin(). Now available from the AGD package # mice 2.12 - 25-03-2012 / SvB * UPDATE: version for launch of Flexible Imputation of Missing Data (FIMD) * ADDED: code fimd1.r-fim9.r to inst/doc for calculating solutions in FIMD * FIXED: more robust version of supports.transparent() (thanks Brian Ripley) * ADDED: auxiliary functions ifdo(), long2mids(), appendbreak(), extractBS(), wp.twin() * ADDED: getfit() function * ADDED: datasets: tbc, potthoffroy, selfreport, walking, fdd, fdgs, pattern1-pattern4, mammalsleep * FIXED: as.mira() added to namespace * ADDED: functions flux(), fluxplot() and fico() for missing data patterns * ADDED: function nelsonaalen() for imputing survival data * CHANGED: rm.whitespace() shortened * FIXED: bug in pool() that crashed on nonstandard behavior of survreg() (thanks Erich Studerus) * CHANGED: pool() streamlined, warnings about incompatibility in lengths of coef() and vcov() * FIXED: mdc() bug that ignored transparent=FALSE argument, now made visible * FIXED: bug in md.pattern() for >32 variables (thanks Sascha Vieweg, Joshua Wiley) # mice 2.11 - 21-11-2011 / SvB * UPDATE: definite reference to JSS paper * ADDED: rm.whitespace() to do string manipulation (thanks Gerko Vink) * ADDED: function mids2mplus() to export data to Mplus (thanks Gerko Vink) * CHANGED: plot.mids() changed into trellis version * ADDED: code used in JSS-paper * FIXED: bug in check.method() (thanks Gerko Vink) # mice 2.10 - 14-09-2011 / SvB * FIXED: arguments dec and sep in mids2spss (thanks Nicole Haag) * FIXED: bug in keyword "monotone" in mice() (thanks Alain D) # mice 2.9 - 31-08-2011 / SvB * FIXED: appropriate trimming of ynames and xnames in Trellis plots * FIXED: exported: spss2mids(), mice.impute.2L.norm() * ADDED: mice.impute.norm.predict(), mice.impute.norm.boot(), mice.impute.logreg.boot() * ADDED: supports.transparent() to detect whether .Device can do semi-transparent colors * FIXED: stringr package is now properly loaded * ADDED: trellis version of plot.mids() * ADDED: automatic semi-transparancy detection in mdc() * FIXED: documentation of mira class (thanks Sandro Tsang) # mice 2.8 - 24-03-2011 / SvB * FIXED: bug fixed in find.collinear() that bombed when only one variable was left # mice 2.7 - 16-03-2011 / SvB * CHANGED: check.data(), remove.lindep(): fully missing variables are imputed if allow.na=TRUE (Alexander Robitzsch) * FIXED: bug in check.data(). Now checks collinearity in predictors only (Alexander Robitzsch) * CHANGED: abbreviations of arguments eliminated to evade linux warnings # mice 2.6 - 03-03-2011 / SvB * ADDED: bwplot(), stripplot(), densityplot() and xyplot() for creating Trellis graphs * ADDED: function mdc() and mice.theme() for graphical parameters * ADDED: argument passing from mice() to lower-level functions (requested by Juned Siddique) * FIXED: erroneous rgamma() replaced by rchisq() in .norm.draw, lowers variance a bit for small n * ADDED: with.mids() extended to handle expression objects * FIXED: reporting bug in summary.mipo() * CHANGED: df calculation in pool(), intervals may become slightly wider * ADDED: internal functions mice.df() and df.residual() * FIXED: error in rm calculation for "likelihood" in pool.compare() * CHANGED: default ridge parameter changed # mice 2.5 - 06-01-2011 / SvB * ADDED: various stability enhancements and code clean-up * ADDED: find.collinear() function * CHANGED: automatic removal of constant and collinear variables * ADDED: ridge parameter in .norm.draw() and .norm.fix() * ADDED: mice.impute.polr() for ordered factors * FIXED: chainMean and chainVar in mice.mids() * FIXED: iteration counter for mice.mids and sampler() * ADDED: component 'loggedEvents' to mids-object for logging actions * REMOVED: annoying warnings about removed predictors * ADDED: updateLog() function * CHANGED: smarter handling of model setup in mice() * CHANGED: .pmm.match() now draws from the three closest donors * ADDED: mids2spss() for shipping a mids-object to SPSS * FIXED: change in summary.mipo() to work with as.mira() * ADDED: function mice.impute.2L.norm.noint() * ADDED: function as.mira() * FIXED: global assign() removed from mice.impute.polyreg() * FIXED: improved handling of factors by complete() * FIXED: improved labeling of nhanes2 data # mice 2.4 - 17-10-2010 / SvB * ADDED: pool() now supports class 'polr' (Jean-Baptiste Pingault) * FIXED: solved problem in mice.impute.polyreg when one of the variables was named y or x * FIXED: remove.lindep: intercept prediction bug * ADDED: version() function * ADDED: cc(), cci() and ccn() convenience functions # mice 2.3 - 14-02-2010 / SvB * FIXED: check.method: logicals are now treated as binary variables (Emmanuel Charpentier) * FIXED: complete: the NULL imputation case is now properly handled * FIXED: mice.impute.pmm: now creates between imputation variability for univariate predictor * FIXED: remove.lindep: returns 'keep' vector instead of data # mice 2.2 - 13-01-2010 / SvB * ADDED: pool() now supports class 'multinom' (Jean-Baptiste Pingault) * FIXED: bug fixed in check.data for data consisting of two columns (Rogier Donders, Thomas Koepsell) * ADDED: new function remove.lindep() that removes predictors that are (almost) linearly dependent * FIXED: bug fixed in pool() that produced an (innocent) warning message (Qi Zheng) # mice 2.1 - 14-09-2009 / SvB * ADDED: pool() now also supports class 'mer' * CHANGED: nlme and lme4 are now only loaded if needed (by pool()) * FIXED: bug fixed in mice.impute.polyreg() when there was one missing entry (Emmanuel Charpentier) * FIXED: bug fixed in plot.mids() when there was one missing entry (Emmanuel Charpentier) * CHANGED: NAMESPACE expanded to allow easy access to function code * FIXED: mice() can now find mice.impute.xxx() functions in the .GlobalEnv # mice 2.0 - 26-08-2009 / SvB, KO Major upgrade for JSS manuscript * ADDED: new functions cbind.mids(), rbind.mids(), ibind() * ADDED: new argument in mice(): 'post' in post-processing imputations * ADDED: new functions: pool.scaler(), pool.compare(), pool.r.squared() * ADDED: new data: boys, popmis, windspeed * FIXED: function summary.mipo all(object$df) command fixed * REMOVED: data.frame.to.matrix replaced by the internal data.matrix function * ADDED: new imputation method mice.impute.2l.norm() for multilevel data * CHANGED: pool now works for any class having a vcov() method * ADDED: with.mids() provides a general complete-data analysis * ADDED: type checking in mice() to ensure appropriate imputation methods * ADDED: warning added in mice() for constant predictors * ADDED: prevention of perfect prediction in mice.impute.logreg() and mice.impute.polyreg() * CHANGED: mice.impute.norm.improper() changed into mice.impute.norm.nob() * REMOVED: mice.impute.polyreg2() deleted * ADDED: new 'include' argument in complete() * ADDED: support for the empty imputation method in mice() * ADDED: new function md.pairs() * ADDED: support for intercept imputation * ADDED: new function quickpred() * FIXED: plot.mids() bug fix when number of variables > 5 # mice 1.21 - 15/3/2009 SvB Maintainance release * FIXED: Stricter type checking on logicals in mice() to evade warnings. * CHANGED: Modernization of all help files. * FIXED: padModel: treatment changed to contr.treatment * CHANGED: Functions check.visitSequence, check.predictorMatrix, check.imputationMethod are now coded as local to mice() * FIXED: existsFunction in check.imputationMethod now works both under S-Plus and R # mice 1.16 - 6/25/2007 * FIXED: The impution function impute.logreg used convergence criteria that were too optimistic when fitting a GLM with glm.fit. Thanks to Ulrike Gromping. # mice 1.15 - 01/09/2006 * FIXED: In the lm.mids and glm.mids functions, parameters were not passed through to glm and lm. # mice 1.14R - 9/26/2005 11:44AM * FIXED: Passive imputation works again. (Roel de Jong) * CHANGED: Random seed is now left alone, UNLESS the argument "seed" is specified. This means that unless you specify identical seed values, imputations of the same dataset will be different for multiple calls to mice. (Roel de Jong) * FIXED: (docs): Documentation for "impute.mean" (Roel de Jong) * FIXED: Function 'summary.mids' now works (Roel de Jong) * FIXED: Imputation function 'impute.polyreg' and 'impute.lda' should now work under R # mice 1.13 * Changed function checkImputationMethod, Feb 6, 2004 # mice 1.12 * Maintainance, S-Plus 6.1 and R 1.8 unicode, January 2004 # mice 1.1 * R version (with help of Peter Malewski and Frank Harrell), Feb 2001 # mice 1.0 * Original S-PLUS release, June 14 2000 mice/MD50000644000176200001440000004733714437371703011526 0ustar liggesusers0f0b343db9c8afd4b3f1d427dd031d90 *DESCRIPTION e44846ddd323d1c3daf0d24e8f1cfa6b *NAMESPACE 24da189e358a0c1a44b3d10498045289 *NEWS.md 0bdd0882c5b8884d6ddd5692d15d21ed *R/D1.R 238926abcd6dd44a6b6d327559444abf *R/D2.R 5a7d8d1b5865ecc566fb879076c8fb0b *R/D3.R b5cc06703b04879278a44807f8b6ec85 *R/RcppExports.R cc4ba99c40a6b30cd8d9963eaa5b6fa9 *R/ampute.R 1472e25c2acd473b3fdf3af173bd6501 *R/ampute.continuous.R 206bee6c5d5c69a765d74f71a2e91037 *R/ampute.default.R 4d36dc0697f6db7974faa6cd73a30571 *R/ampute.discrete.R 14cb5c29b32bc8284fcd37f6cd0d05b4 *R/ampute.mcar.R 420608a80961fc90c04cb62ff826b82c *R/anova.R 9c6c1d4bfc49127b666151a175a3c6ca *R/as.R 1908f9993d6342e54facd567769085d0 *R/auxiliary.R 7b64978a056a0a83068938a658809207 *R/barnard.rubin.R 8a2b222edf6007172e12fe10cb5c45c6 *R/blocks.R 45770ab348eb6ad475517eb5d61cfbaa *R/blots.R 97035505647c98a6835d944b44c23535 *R/boys.R 7b3487a4b55f89e4f8de5eafb80fe949 *R/brandsma.R b73e9d18911afc61aff6383703a1be7c *R/bwplot.R cd86fa7e37bdae96cc1e991fe4dce39e *R/bwplot.mads.R e0c77dd4fb4f5be9feeba7eccf4286e0 *R/cbind.R 30a7f23278424ef0ab5acf4676fe025f *R/cc.R a38e64e5b3de8a8a76922471043b150d *R/cci.R bad262a41a567bfb3a29550d3839197a *R/check.R 7737385601d25624c35a9e5a8571277a *R/check.deprecated.R 64022b9da595000cee13691a2552d1d4 *R/complete.R 89fcb4ce7233e40466f699aa2181eb09 *R/convergence.R 0cf9f6de8d808c0c0710c6dcf442d4fd *R/densityplot.R 373391ef9f5879657cdc7f0ead575c2d *R/design.R a9aa3330517f96b5049a22766ef4c29a *R/df.residual.R 35a9680763c25d12d39081514367cec2 *R/edit.setup.R 90f34868e50e2a11506a4cb99b18894a *R/employee.R d30bce8ae1629656b440272b81ca2475 *R/fdd.R 3925a40f25f7adaef285484bb21455e2 *R/fdgs.R 325c0efc3530d18f9287bfece8a4545d *R/filter.R 6cb281f9362a44549ca882d75892fb4f *R/fix.coef.R 0d33e3a05ae622c208f3cbd16996c811 *R/flux.R f4a26be21c2dedec96fe280609c862cf *R/formula.R 298104c482ccfd99eda27c61956bc3fe *R/futuremice.R 2d57c55e9418bd8f30c76c8940a554e0 *R/generics.R 09e7f5993204d816da6462df79a13bb9 *R/get.df.R 1ebcc3be72cc6aedf6874a934373aaaf *R/getfit.R ae55f3f412cba217d80e4a03171a32d9 *R/handles.R d750a6e00487f4f9b3025d277f8befbe *R/ibind.R 373dc862fda860e494a47ce558f2a1f7 *R/imports.R 1271521f882ec91a4504f9e8b350c32d *R/initialize.chain.R 1b64267dfa4bed9ea30ecbaf0afc058e *R/initialize.imp.R 8313b00b13d8141fe5f993b77c3744dd *R/install.on.demand.R bdbe7dd8833d6221441c3236df41656e *R/internal.R 4476b18e91ade0b90673731c2bc3e3ad *R/is.R bff59e6e19ae1daa44b99e83bec357b5 *R/leiden85.R 01c43049b6b9660dece5a77e97ede695 *R/lm.R 82673225d6ee4db0e95c51c69f1d3834 *R/mads.R 251893819224e83f29fa2f71c02ef147 *R/mammalsleep.R 44eaf944d971a40eaba47380b9575557 *R/mcar.R bf3dd1c065a5b85bc595bcf74f3f4855 *R/md.pairs.R f76bcfc98b8529dfee05be40fc75c173 *R/md.pattern.R 542265f6cdf62e92b76033e41445a06b *R/mdc.R 5f576ca7b5e3c63e889a137f0db8dabf *R/method.R 0bf5f7311cb2b7911c9f697667ae860f *R/mice-package.R e608cd17271d2312fa47481136479dd7 *R/mice.R 690fcd6f6589b783b3e1fd8ebf542233 *R/mice.impute.2l.bin.R b8f920d40e97b93e668e4a96731eea51 *R/mice.impute.2l.lmer.R b87355b88fcd5132bde7d860a722a385 *R/mice.impute.2l.norm.R 41a578fa62aef082a37c6b8f8338fe07 *R/mice.impute.2l.pan.R 06e4b437a5a2103963a9aa4c43600d1d *R/mice.impute.2lonly.mean.R d9713588dee2c92e58f33557d3fe758d *R/mice.impute.2lonly.norm.R 5432563fbb7f9a4f9d38d9c14da067d0 *R/mice.impute.2lonly.pmm.R 5fa658561fe13a76b51fb14a7757c678 *R/mice.impute.cart.R 16858b1b842ded2f3c076b1454959b44 *R/mice.impute.jomoImpute.R 80354de2221015ea914ecf6e516e0c69 *R/mice.impute.lasso.logreg.R e63c09f89a8bc62264e418bffb90cf7d *R/mice.impute.lasso.norm.R 9d41c1b3209353e1604e03172f680e55 *R/mice.impute.lasso.select.logreg.R c8753589509095c3a478916e401b7581 *R/mice.impute.lasso.select.norm.R 828efe745f14614bbb947b22e0446d41 *R/mice.impute.lda.R b164b22e605d9a172e470064ba911774 *R/mice.impute.logreg.R fae446a704050a2ba4fea1e445da4fe5 *R/mice.impute.mean.R 619fdbb99a50968bce16825424eaf87b *R/mice.impute.midastouch.R ef3429d647d6285cf3aaf515207ec443 *R/mice.impute.mnar.logreg.R ac7ce822527aa5dde30904b081e8e5c5 *R/mice.impute.mnar.norm.R 2bcc76dd5fbb8cd4242cf56d350c2e09 *R/mice.impute.mpmm.R c20c7494009c68ba3fa2c3ca353c9ab5 *R/mice.impute.norm.R 908a750382ef72f7913e58cd21aef1f0 *R/mice.impute.norm.boot.R edb637356c1871f51956ab3640189f8c *R/mice.impute.norm.nob.R be4039296f2d6e6cadc1821d47cb8822 *R/mice.impute.norm.predict.R 775d0a8ab2cce4c7c28962ac81805658 *R/mice.impute.panImpute.R 944e1a39786075e8fcefae876516d9fd *R/mice.impute.passive.R 241d5c2c8383359a18bb639f90538dc0 *R/mice.impute.pmm.R 712112b402c2576b066c22b2e586ce7d *R/mice.impute.polr.R 7b3a0856b7e147d470cfed3314f166ae *R/mice.impute.polyreg.R 3ef052c278d41902da70833421cfeaf7 *R/mice.impute.quadratic.R c32bbfd29cc158526c9c7d15720403ef *R/mice.impute.rf.R bb1e4188ed4ab7f5537f0245391f2e94 *R/mice.impute.ri.R c00eadb55e130b37fa2a32c792866c58 *R/mice.impute.sample.R 66460e0dd0cac9e3a966bdfbb926973f *R/mice.mids.R 5b802783a7d4d45282e65b87503b6c0e *R/mice.theme.R e176181cddabb72a60e995e5e520b9a9 *R/mids.R 5e1cdf8d0d80fc792981ddd6827a0f13 *R/mids2mplus.R 47b7c469868571883893aaa35dc1c8a1 *R/mids2spss.R 9e0e0da48418d1dfae908eb7f5c64254 *R/mipo.R 9088c86811c30e1fb6465f03c6a37f62 *R/mira.R e0c86c37951768c69727639c4f197f71 *R/mnar_demo_data.R 4615d3b5260da87239de00685aa9231a *R/ncc.R 2b706f58008482dc8b5fe166f0300dfe *R/nelsonaalen.R 8848f5546c87b9363cfa295e0eb76350 *R/nhanes.R c2cb94a73c50806f9ff5580be4b6048b *R/nhanes2.R cef0d370806adce791e857814154f1b4 *R/nimp.R 67c32ffa33b4f58bfd44b15ed9bd8377 *R/parlmice.R c13c09405c479fb736e5a8c6686852ca *R/parse.ums.R a6caa9134c2e584e12fdbdbbb9ba7b74 *R/pattern1.R 5b64cc7fef0045dc5bcb693260af425a *R/plot.R 5ec7d16239f568302a7a2619767663b0 *R/pool.R 28c743cbd2e0893392e24607ed3b7fbd *R/pool.compare.R 665ff9846c9e4fd52df6362ddb574fdf *R/pool.r.squared.R 73cc0ed9dbe238abe9e9a331b45929ed *R/pool.scalar.R 602460e02a329cb58b37d8e0cae93066 *R/popmis.R f5040d4a553c3850c1a9cd1ca675fe3a *R/pops.R 96bec656b5ae1a2d5980b082b09b59ef *R/post.R a31dfdaec938106a2c5670c193c0d148 *R/potthoffroy.R 9f0dfce969baf949dcf846cf358eed7b *R/predictorMatrix.R 010feda86f7d27e46540f3037dc02c4f *R/print.R 1412561b6448e2aa7471ee891098667d *R/quickpred.R b09f417ea386fb1d93fb13e318f6814f *R/rbind.R 94a0166d4a631e445118b66386a85536 *R/rm.whitespace.R 205a08109e88b58d08c276cfcf868555 *R/sampler.R 734bfd56d3e056ac5e92c076b8fea93d *R/selfreport.R 1131751fe364b6bf4d1580b3d62a09b6 *R/squeeze.R 9bb7f6dde26d98ee20e6d4d5a854dc4e *R/stripplot.R 2c6501e82b857e0433acbb90c50cdf46 *R/summary.R 15586bbad2d6548b92dd8fa8ac062c56 *R/supports.transparent.R f6243d70f93af814b342e7d4a719bfb6 *R/tbc.R a41e5f72713896eede7dfaf850f7284e *R/tidiers.R 066f7b2ad688e676a84d897395ae9279 *R/toenail.R cd276d1ded1405e2eecf8fe891ac0ce0 *R/toenail2.R 870b84862433c38c1858eec5e05c90c1 *R/validate.arguments.R 5245689ac2a9df8863fb7d1dfca5bbfc *R/visitSequence.R fc79d28c21bbee900232e228976979a0 *R/walking.R 8388d5ea407d8a4417788ec274438900 *R/where.R 40a380716b3cb99641a11e89d0bc0439 *R/windspeed.R 7c17ff4ca7133c4c661c2adaa59665a2 *R/with.R 19a09cb529902e35c2cb61953915c002 *R/xyplot.R 9cd97c74d57104fcd9f29bb68c4d44d7 *R/xyplot.mads.R 3db8671b36c1c326bb3e0122e346df65 *R/zzz.R f281617c3595cdaae39a6fb4f96386ce *README.md 439bf689fa27cf9affd0335332142165 *build/partial.rdb 3709c798ba6427dfc28b9e8c47b165e8 *data/boys.rda 32cb50043192073624d7a3f7215ad801 *data/brandsma.rda ee4bdbb28f21c4e95b7a449245eb620c *data/employee.rda 8ce389a211ad979d60676e039df7319f *data/fdd.pred.rda 972229ddd3e452cfec73ccbb6c31892c *data/fdd.rda 5ea54eea59179e3ab0dec78a373f8459 *data/fdgs.rda 365f612df41418b00e12e954bf501414 *data/leiden85.rda 0d0cd0c7d3a1fc652daf88dc5fff39d7 *data/mammalsleep.rda 3089353b6ad68b780988d157b85ffb1e *data/mnar_demo_data.rda 2ab63b9932cfe39c847cc2eac344848b *data/nhanes.rda b2fe3e7c172f48efd23e0cc94fd0d203 *data/nhanes2.rda 4d264592bd199a63030472eccd6fd87d *data/pattern1.rda 66af0bc4eeb77a3524948906746fcf55 *data/pattern2.rda 487cb144a4a9185dd1c0f9fd3007973d *data/pattern3.rda 5cdd5b75e7c03bb3585eae04b33567a2 *data/pattern4.rda 0b2ffd42fa531abed362b545dba051d6 *data/popmis.rda 55e2f8d9a4dcd0a6dcd23bcd7d048201 *data/pops.pred.rda 40825f787646b160277a2a7e209b129b *data/pops.rda 4f9667b7d0e46602a22516e1ee845442 *data/potthoffroy.rda 6b5ef38755a87c6e6a60896a0a922ef2 *data/selfreport.rda d3b54e3ae3665be970c8648c524118d8 *data/tbc.rda 6fd8c89895806f89236d3856339cf2eb *data/tbc.target.rda 02c7a5dde878a13a1c6d17ca0bf8f02d *data/toenail.rda 5135c2ab6bc3d23abe90caefa3bb2b78 *data/toenail2.rda c1a7281864590a4890fa4b20ba991a20 *data/walking.rda 50833f651067e6300f4b1940f94f9503 *data/windspeed.rda cb067e5e8a55719915e08c19b043d4b1 *inst/CITATION bbedc3bef12ecfbf87d80d2b61575107 *man/D1.Rd a35538e6fe5d8e3286fdb6f0c0c5fe92 *man/D2.Rd 413d01eb8e3c8888cf81f7fcc7261e82 *man/D3.Rd 1333716b656c6ceb5de41ba4323aca53 *man/ampute.Rd 6bc699661ec39898fc7f0ec626e2399c *man/ampute.continuous.Rd 84384bf303df09f826e1ec28459d6304 *man/ampute.default.freq.Rd 4becbb3c3b8cfaca2190a712206ce82a *man/ampute.default.odds.Rd 1abfcbde6a8f38e6dd08cf1c41a546ad *man/ampute.default.patterns.Rd 4100e92e914f4416ab034a495b98c4a4 *man/ampute.default.type.Rd f2f30d08529613b4c6e4fba1a8a0d5cf *man/ampute.default.weights.Rd 8f0c6ebb67497a978950d4f8f755fd30 *man/ampute.discrete.Rd 63266def0d19e62aad35631f57583c14 *man/ampute.mcar.Rd b6ef24a9a94759c4532079a0cc586b01 *man/anova.Rd 93608a37b8792ad640a57a82c21043f7 *man/appendbreak.Rd b0e5d2b9173092886ac4c87e5c5a3032 *man/as.mids.Rd f29900b41906dbce971fdb748129fb30 *man/as.mira.Rd fc507575643e3e6711c83db7cf487d7c *man/as.mitml.result.Rd c936cfd2f27a2ef6f8b257b093cf6382 *man/boys.Rd 69a2596af382dff6cc5c97da74053a63 *man/brandsma.Rd 1e98eb9f15b975beab22cc4b067a25a4 *man/bwplot.mads.Rd aa0d7e0c62aab7b0ebf57c532379a0d9 *man/bwplot.mids.Rd 3dd82f4bfc7e0b5f1597b975a631f575 *man/cbind.Rd efe2c5ab75d5e16ef4a51d3a598a800f *man/cc.Rd b9a8fd5a0b5b778132a677fc9704c181 *man/cci.Rd 097562fa8d4debf14e9bdd11a53400d1 *man/complete.mids.Rd 7c60c855dbe3f09584d49a025c3cfef3 *man/construct.blocks.Rd a611f7b3a9f678fa5de5ab29dd5504c4 *man/convergence.Rd ce4522bfbc3373bc30965a5dd2e79e16 *man/densityplot.mids.Rd fe21c6357c40d49af35862ee9b287203 *man/employee.Rd 9f4509adaea249de9c1f507a6cabe2a0 *man/estimice.Rd 8f23d34b9d00d344ad90971356a25e10 *man/extend.formula.Rd 1cb4bf10eab8c87f36318c7158af0456 *man/extend.formulas.Rd 8c98624d0af097d231bc36c714e01665 *man/extractBS.Rd 5af4e8e865f8587d60feff43c7c4642d *man/fdd.Rd 0260f626bbaff1cfa620155b3fac5586 *man/fdgs.Rd 09c1a9bf506601397644ceb429460f73 *man/fico.Rd c526aaed478e3df2987e0b9c77eed1b4 *man/filter.mids.Rd 9dc8f43d389b66a5292fd52b434bab03 *man/fix.coef.Rd bd43c3ce98d38e6ba20a3ac555d2c787 *man/flux.Rd 2e12d0b210b78f97fbd084961efc3fe9 *man/fluxplot.Rd 50c9c8d5ae0babe3bd08aab05a71f314 *man/futuremice.Rd 61607a0e28ce925811096c65329d256b *man/getfit.Rd 2dde2beed9a1fc5f01e45dac991f006a *man/getqbar.Rd 04d75fe8e23332c13e53020e798872ae *man/glance.mipo.Rd a58e66da9b60f9916581939ab3ac2ed9 *man/glm.mids.Rd 80ffd8bc95c5496fa0bc5581948d2abe *man/ibind.Rd bf228b424dfcc660a3ef459e3de600c2 *man/ic.Rd fc1c00f3edc62c7e22dc3b5b9a3e7e32 *man/ici.Rd fa2f428cbe0167adb38f65ce85d09a7d *man/ifdo.Rd 5e3f6e4be4e37c414de7298d6a9936e7 *man/is.mads.Rd c7b112c5fabe6d6af37e22706334981e *man/is.mids.Rd 505b6d5a4d2028f39a4e3b1502c28f01 *man/is.mipo.Rd 75eaed69a60dcbd4fffd579d69926bde *man/is.mira.Rd 698720da50609f106a6440d268a02727 *man/is.mitml.result.Rd bf5e69778062f02e0f60f36a03e01c90 *man/leiden85.Rd b8d77548c59226847b4c60345752e006 *man/lm.mids.Rd f5c08da4034c1ba5e30fc2db091f097a *man/mads-class.Rd e2c247afbee546d66160c3e7c737cfba *man/make.blocks.Rd 5497cffdb97e78096789b15a5356abe8 *man/make.blots.Rd 674e93b764e73cc96595174226be5d12 *man/make.formulas.Rd 86f0b79b8ba9d0b332f69cc84a317e76 *man/make.method.Rd 475ee87af164417b427751c3b20db32d *man/make.post.Rd e47fcc19cc5ef77ce4d44cc3404c02cd *man/make.predictorMatrix.Rd 42493e00f9c266aa752d8b1228b18dc3 *man/make.visitSequence.Rd 2abb03c5101bd73c2c7a3e4a7099f2fb *man/make.where.Rd 327c3f2ed7b48b0acb6d48f9f1fc7c29 *man/mammalsleep.Rd 6921a7c1ce0ead316365873b82d9215d *man/matchindex.Rd 2f6472227d98d1ee64b065f90212aea6 *man/mcar.Rd f4297db3fdaa536f93a79857a5b8f2d1 *man/md.pairs.Rd bc2483d91e5017322fe921b12fa1c0a2 *man/md.pattern.Rd 838929d2969dfdf17aba6dc6a3b4f2f5 *man/mdc.Rd 7651adb763f1f8e5ddcc793fe6aec7d8 *man/mice.Rd e4ab40d3e94b9d0a3169f98bca47203f *man/mice.impute.2l.bin.Rd 565d066ed17c30e48cc366c5fba94854 *man/mice.impute.2l.lmer.Rd 21fa0a119bca29f9d756486e4547f110 *man/mice.impute.2l.norm.Rd 307e4957ab275df5893755aae66eece5 *man/mice.impute.2l.pan.Rd 3fe0393a0a59db2e060ceba9c30f0a83 *man/mice.impute.2lonly.mean.Rd 5cfb1f76b8c00ca2e95891661e1e1d94 *man/mice.impute.2lonly.norm.Rd 41d33d175cee5dbace2266c31ed90bfa *man/mice.impute.2lonly.pmm.Rd f01b81296ba3d7145c6d78e8304e5971 *man/mice.impute.cart.Rd 08ceccaa8c63ea9bffba41b545a9d444 *man/mice.impute.jomoImpute.Rd c64c861920623e2a5020c70660edd407 *man/mice.impute.lasso.logreg.Rd 4b96d792a5c626da2c2127c2003e67c3 *man/mice.impute.lasso.norm.Rd eae2553a5d72a4c60e1eee641c6bd2e5 *man/mice.impute.lasso.select.logreg.Rd a77821fd11fa6ca5a1560b8a95a8524d *man/mice.impute.lasso.select.norm.Rd 0440c5fece87a417bc0567284f5b15b2 *man/mice.impute.lda.Rd 7c0ec78109277381084ba07cd5bf4441 *man/mice.impute.logreg.Rd bf58678d6ef6f77cac2ae156c51d9e2d *man/mice.impute.logreg.boot.Rd 8bc24a3efd375ca13217fa1c9f3cb4c6 *man/mice.impute.mean.Rd a01da7ac38dc57b3e8f725b960dd2a2d *man/mice.impute.midastouch.Rd e27a2c65b77c60852106e7236622371a *man/mice.impute.mnar.Rd f85908d69f95261fae9341e677ae1ed6 *man/mice.impute.mpmm.Rd 923a639d38ec3eafcec620f246e8a8a4 *man/mice.impute.norm.Rd 607725a852e4ba37731753dda1176264 *man/mice.impute.norm.boot.Rd d73692ecd21e6fc68430f3744b133932 *man/mice.impute.norm.nob.Rd 56489d4117068f8ceb64cc2936928494 *man/mice.impute.norm.predict.Rd 6167ca0f339dc050c41f68434e2cb305 *man/mice.impute.panImpute.Rd daacd7a130b7cf0716a391d4491aff36 *man/mice.impute.passive.Rd 36e860af219e8420ba5a146ed9949ccd *man/mice.impute.pmm.Rd 8b4cf0e9b1a232268af532fdb4cbc72e *man/mice.impute.polr.Rd b4684ce4783e21a4310e141093ebabc5 *man/mice.impute.polyreg.Rd a8479b715974487cb3dcf0a8dd5beaf1 *man/mice.impute.quadratic.Rd bb2a5d30cec712c839e368c4df54c539 *man/mice.impute.rf.Rd 5c3c734539957ceabaf6c262f8269590 *man/mice.impute.ri.Rd 9dd91ff9d53e05678c6863215546c38b *man/mice.impute.sample.Rd b80b5d6b875494f23f04623eee90ecd1 *man/mice.mids.Rd 87bc291aedcaf1705bff2b5cd8dfca1d *man/mice.theme.Rd 8c53f2d333cf2f3d7b954a3805cbed27 *man/mids-class.Rd 186a052848ac6e5bfd02ffbcdbc67087 *man/mids2mplus.Rd 2eb29532b1428fe4eb5b5e99af0e2312 *man/mids2spss.Rd 2693c90d4c5d19d66098b9862f841c12 *man/mipo.Rd 8e1de550904e1f21f6ce0a8cdd5fb014 *man/mira-class.Rd bfecc7809e9c6a1f7543502806358a65 *man/mnar_demo_data.Rd 6d0b99d6ef8f2818d101770615d61250 *man/name.blocks.Rd 650dfaaa15bbea43994984c27c4e190d *man/name.formulas.Rd d61a4e4aa6e73d7e9cdc65eb2ab6d641 *man/ncc.Rd 73e37dcb10a06ac4caf5f6fa00dc7219 *man/nelsonaalen.Rd 14c6e8dd4719d2c2e84a44216e406e5c *man/nhanes.Rd eadde1e044ae64aa211ffc8c59a8fc56 *man/nhanes2.Rd c281240bb8ae60bbad4ca49a72180f34 *man/nic.Rd 81f3dc665862a9f158226a8462a35c2c *man/nimp.Rd 0de9f6b29c13e795768afcd4622347ca *man/norm.draw.Rd 8993c6cc390610d9c58460a2db81f1d8 *man/parlmice.Rd 2e883389db494688f969ffa7d0fab634 *man/pattern.Rd cb2b25bb29b0f0afc562778ea6b09eb2 *man/plot.mids.Rd 4a9cc1f45b853764d494bd4873b1e6c0 *man/pmm.match.Rd 33d213fe7b8dacf90213582e27cf9e78 *man/pool.Rd d275b67400af717465e20a43782b1970 *man/pool.compare.Rd 0e52a15ac1d8c85fd2fd1f456837e6b2 *man/pool.r.squared.Rd 3c420057baf44403f68aa23fdf0b749c *man/pool.scalar.Rd c8ab16475defca2cc4849ee50a38e8da *man/popmis.Rd ba22202989dc5b33d793b588fcd88b94 *man/pops.Rd c1cdcefe99d64aa5a9624f051f788e45 *man/potthoffroy.Rd 66c7521aab3da1592ec21d7411f7eaf5 *man/print.Rd df1da421444ab898dc53ef32b7d5c7df *man/print.mads.Rd 57cf8fafe5e937f4fa5640a438b7db1f *man/quickpred.Rd 473098328539c5cda47ca6a077874745 *man/reexports.Rd 793831fac35d7b7d927d57f2b88ffec1 *man/selfreport.Rd 1a705e7b3002e6d68cc55e0d84cc6106 *man/squeeze.Rd fa533dc4b02409dd8833fd3f21049ba7 *man/stripplot.mids.Rd d5a15f57cd3c33e18b0aaf9a4c6fad96 *man/summary.Rd 17a7dc182cf3a03625a7141155c69444 *man/supports.transparent.Rd d8ebe298453bab147b4294180dacf1ac *man/tbc.Rd 1d47630ed890073021368ea013ac1ff7 *man/tidy.mipo.Rd 47022dc1c6219bbf62ce8f0030e2551b *man/toenail.Rd 2b242fa899539374771fffa28cfbafbb *man/toenail2.Rd 1098e8e09b649d3491efefd05722052b *man/version.Rd b5dfa2a6346201d4f99e20a7502fdecf *man/walking.Rd 1408cfd49636fe78f4b3f84b65e8d530 *man/windspeed.Rd 6db07dad8b24ad35cade2811f0acc56c *man/with.mids.Rd cacaa5a43065ce72f0058466f94bdef5 *man/xyplot.mads.Rd f00fc35cdaef4b00986ae1fe7f333a47 *man/xyplot.mids.Rd b3f86c20b81b51f5162874977d38784d *src/Makevars 73505498722ad4b07b0ddb3067e92523 *src/Makevars.win abf0796b9ead28a345d94562dbb1eed2 *src/RcppExports.cpp cbf1ff3af50a0baa98c0ae1e2bf5e7d4 *src/legendre.cpp 1ade0b5c10c7372904e8a26a14b98692 *src/match.cpp 5c3f7bae1a08a4533db939a8d5a17c8a *src/matchindex.cpp d392e8954a628ed6bdf7039e4655a0c3 *tests/testthat.R f2a3db57ddf9c367d42a65d8e70a8a11 *tests/testthat/test-D1.R ad3ce204a6a7d6150f4cfb4ba8728193 *tests/testthat/test-D3.R 79afe72eac67c1c967742972bfac44bf *tests/testthat/test-ampute.R de7acf354082e7222138e91336945b0c *tests/testthat/test-anova.R 755e60fc7d4e3e9dff7c01eb43d447ed *tests/testthat/test-as.mids.R 10f87fb824060e1c04c29c4f88a5fe32 *tests/testthat/test-blocks.R e01724468f11f4d450f6f8c7905a55f2 *tests/testthat/test-blots.R 06b51edd3ec98f4d7e432cfefa7d5335 *tests/testthat/test-cbind.R 6a492bfe7793d1cdd794c41f5be3bd79 *tests/testthat/test-check.formula.R 37e3f9b94f54b8e2165dd830eda12877 *tests/testthat/test-check.visitSequence.R 6d76abfe8110c76d650a95772e502dbf *tests/testthat/test-complete.R a3e687a49b548fa25a3b5fe0170f7c5a *tests/testthat/test-filter.R 0d267a2f2042858090cffcfc3ac9f656 *tests/testthat/test-formulas.R 6849cfe01f09ee51d91a34dc3ae54cc6 *tests/testthat/test-loggedEvents.R a0471f0419b0f85b535099d3b10f8562 *tests/testthat/test-make.predictorMatrix.R fc22d970775211e1deae6f73ad3ac1e5 *tests/testthat/test-md.pattern.R 51ad7f7455bcd2cbd4985a859ce33cde *tests/testthat/test-mice-initialize.R 36c4b55b10b46bfe23e17a3358e5c61d *tests/testthat/test-mice.R 6ed71404ccd4b6809181c539442cfe62 *tests/testthat/test-mice.impute.2l.bin.R c7e30eec51744b0fad3734d6d091a7d6 *tests/testthat/test-mice.impute.2l.lmer.R 7cc1a1b29b571f0eb65436b22c998bbc *tests/testthat/test-mice.impute.2l.norm.R 8901b9817a18a7d0439d0224633c130e *tests/testthat/test-mice.impute.2lonly.mean.R 9afaecd8d8c81518348589f2b882f8d4 *tests/testthat/test-mice.impute.2lonly.norm.R 51f860c6a1f1a033e76d13730440502f *tests/testthat/test-mice.impute.durr.logreg.R ce49aa7b7050c6aaec8dac40cec2986a *tests/testthat/test-mice.impute.durr.norm.R 039301a04bd94f4ef6db8127d3f01b0f *tests/testthat/test-mice.impute.iurr.logreg.R efcdf63d7e3befe5c62ea55988864e33 *tests/testthat/test-mice.impute.iurr.norm.R 5301a3ceb39eb1b37a729d30e31d01d4 *tests/testthat/test-mice.impute.jomoImpute.R bceb704b6f135e387eb4415943d2b65a *tests/testthat/test-mice.impute.logreg.R 772aa2fb16063e72ab14dc9b2af68e34 *tests/testthat/test-mice.impute.mpmm.R 7b3e607ae460a8ba9a4ca0ea5bd9073a *tests/testthat/test-mice.impute.norm.R 5eb30b53ece4da020e36f797fe5bf80b *tests/testthat/test-mice.impute.panImpute.R 3eaa87cd6b544c8796fd6e8f5356bb88 *tests/testthat/test-mice.impute.pmm.R 73728b9bb0c889a90cfe8b597dbecb78 *tests/testthat/test-mice.impute.polr.R 15a772d00b2e997d68ddaefc533439d3 *tests/testthat/test-mice.impute.rf.R 9bf568831dcd8cda3a09948b5550fb7d *tests/testthat/test-mira.R 31e003394e0e0707b9e231fb9a9c31b6 *tests/testthat/test-newdata.R 5013c295a64475b976a3cf3bde3b49af *tests/testthat/test-parlmice.R 613f7172b875597d399f6a5ad8634199 *tests/testthat/test-pool.R 2cd544312cdc4d4b24c46288b2e3e8e5 *tests/testthat/test-pool.r.squared.R 893477a422baac279eb57a9fa1e5856a *tests/testthat/test-rbind.R 02d1aa916a6fb30b81f16b62c0617df5 *tests/testthat/test-remove.lindep.R de1696c09367c0e7e9ec9e9aff154436 *tests/testthat/test-tidiers.R 9d2526394239b7da86fb6389c8f027b4 *tests/testthat/test-with.R mice/inst/0000755000176200001440000000000014437176012012151 5ustar liggesusersmice/inst/CITATION0000644000176200001440000000125614433400023013276 0ustar liggesuserscitHeader("To cite mice in publications use:") bibentry( bibtype = "Article", title = "{mice}: Multivariate Imputation by Chained Equations in R", author = c(person("Stef", "van Buuren"), as.person("Karin Groothuis-Oudshoorn")), journal = "Journal of Statistical Software", year = "2011", volume = "45", number = "3", pages = "1-67", doi = "10.18637/jss.v045.i03", textVersion = paste("Stef van Buuren, Karin Groothuis-Oudshoorn (2011).", "mice: Multivariate Imputation by Chained Equations in R.", "Journal of Statistical Software, 45(3), 1-67.", "DOI 10.18637/jss.v045.i03."))