lavaan/0000755000176200001440000000000013054131615011513 5ustar liggesuserslavaan/inst/0000755000176200001440000000000012412523024012464 5ustar liggesuserslavaan/inst/CITATION0000644000176200001440000000120712104004704013616 0ustar liggesuserscitHeader("To cite lavaan in publications use:") citEntry(entry = "Article", title = "{lavaan}: An {R} Package for Structural Equation Modeling", author = as.person("Yves Rosseel"), journal = "Journal of Statistical Software", year = "2012", volume = "48", number = "2", pages = "1--36", url = "http://www.jstatsoft.org/v48/i02/", textVersion = paste("Yves Rosseel (2012).", "lavaan: An R Package for Structural Equation Modeling.", "Journal of Statistical Software, 48(2), 1-36.", "URL http://www.jstatsoft.org/v48/i02/.") ) lavaan/tests/0000755000176200001440000000000013043447520012660 5ustar liggesuserslavaan/tests/testthat.R0000644000176200001440000000010313031506643014634 0ustar liggesuserslibrary(testthat) library(lavaan) # run tests test_check("lavaan") lavaan/tests/testthat/0000755000176200001440000000000013054131615014515 5ustar liggesuserslavaan/tests/testthat/helper-skip_level.R0000644000176200001440000000034713031506643020260 0ustar liggesusersskip_level <- function(test_lvl){ lvl <- if (nzchar(s <- Sys.getenv("LAV_TEST_LEVEL")) && is.finite(s <- as.numeric(s))) s else 1 if (test_lvl > lvl) testthat::skip(paste("test level", test_lvl, ">", lvl)) } lavaan/tests/testthat/test-skip_example.R0000644000176200001440000000014113043450737020301 0ustar liggesuserscontext("skip only") test_that("skip test", { skip_level(2) expect_identical(TRUE, FALSE) }) lavaan/tests/testthat/test-lav_mvnorm.R0000644000176200001440000001127213043451457020007 0ustar liggesusers### Terrence D. Jorgensen ### Last updated: 25 January 2017 ### test lav_mvnorm_* functions context("lav_mvnorm_*") ## complete data varnames <- paste("x", 1:9, sep = "") H9 <- HolzingerSwineford1939[ , varnames] ## impose missingness H9miss <- H9 H9miss$x5 <- ifelse(H9miss$x1 <= quantile(H9miss$x1, .3), NA, H9miss$x5) H9miss$x9 <- ifelse(H9miss$x4 <= quantile(H9miss$x4, .3), NA, H9miss$x9) ## fit model to complete and incomplete data HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' ######################### ## Test Complete cases ## ######################### cfit <- cfa(HS.model, data = H9, meanstructure = TRUE) ## save summary statistics cM <- lavInspect(cfit, "sampstat", add.class = FALSE)$mean # matches round(colMeans(H9), 3) cS <- lavInspect(cfit, "sampstat", add.class = FALSE)$cov # matches round(cov(H9)*300/301, 3) ## model-implied moments cMu <- lavInspect(cfit, "mean.ov", add.class = FALSE) cSigma <- lavInspect(cfit, "cov.ov", add.class = FALSE) ## sum casewise log-likelihoods under saturated model cLL1 <- fitMeasures(cfit)[["unrestricted.logl"]] cLL2 <- sum(mnormt::dmnorm(H9, mean = cM, varcov = cS, log = TRUE)) #cLL3 <- sum(mvtnorm::dmvnorm(H9, mean = cM, sigma = cS, log = TRUE)) ## functions of actual interest cLL4 <- sum(lav_mvnorm_dmvnorm(Y = as.matrix(H9), Mu = cM, Sigma = cS)) cLL5 <- lav_mvnorm_h1_loglik_data(as.matrix(H9), casewise = FALSE) cLL6 <- sum(lav_mvnorm_h1_loglik_data(as.matrix(H9), casewise = TRUE)) test_that("6 saturated log-likelihoods match for complete data", { expect_equal(cLL1, cLL2) # expect_equal(cLL1, cLL3) expect_equal(cLL1, cLL4) expect_equal(cLL1, cLL5) expect_equal(cLL1, cLL6) }) rm(cLL1, cLL2, #cLL3, cLL4, cLL5, cLL6) ## sum casewise log-likelihoods under target model cLL1 <- fitMeasures(cfit)[["logl"]] cLL2 <- sum(mnormt::dmnorm(H9, mean = cMu, varcov = cSigma, log = TRUE)) #cLL3 <- sum(mvtnorm::dmvnorm(H9, mean = cMu, sigma = cSigma, log = TRUE)) cLL4 <- sum(lav_mvnorm_dmvnorm(Y = as.matrix(H9), Mu = cMu, Sigma = cSigma)) cLL5 <- lav_mvnorm_loglik_samplestats(sample.mean = cM, sample.cov = cS, sample.nobs = nobs(cfit), Mu = cMu, Sigma = cSigma) test_that("5 target-model log-likelihoods match for complete data", { expect_equal(cLL1, cLL2) # expect_equal(cLL1, cLL3) expect_equal(cLL1, cLL4) expect_equal(cLL1, cLL5) }) rm(cLL1, cLL2, #cLL3, cLL4, cLL5) ################## ## Missing Data ## ################## mfit <- cfa(HS.model, data = H9miss, meanstructure = TRUE, missing = "fiml") ## list per missind-data pattern lavInspect(mfit, "coverage") pattern <- lavInspect(mfit, "pattern") H9logic <- !is.na(H9miss) ## indicators for which pattern each row belongs to # lav_data_missing_patterns(H9miss)$case.idx indPatterns <- sapply(1:4, function(pp) { apply(H9logic, 1, function(x) all(x == pattern[pp, ])) }) all(rowSums(indPatterns) == 1) # check exactly 1 pattern per person ## lists of sample stats per pattern # (mN <- colSums(indPatterns)) # N per pattern # mM <- lapply(1:4, function(pp) { # colMeans(H9miss[indPatterns[,pp], varnames[ pattern[pp,] ] ]) # }) # mS <- lapply(1:4, function(pp) { # cov(H9miss[indPatterns[,pp], varnames[pattern[pp,]]]) * (mN[pp] - 1) / mN[pp] # }) ## lists of model-implied moments mMu <- lavInspect(mfit, "mean.ov", add.class = FALSE) mSigma <- lavInspect(mfit, "cov.ov", add.class = FALSE) ## sum casewise log-likelihoods under saturated model for each pattern mLL1 <- fitMeasures(mfit)[["logl"]] mLL2 <- sum(sapply(1:4, function(pp) { sum(apply(H9miss[indPatterns[,pp], varnames[pattern[pp,]]], 1, mnormt::dmnorm, mean = mMu[varnames[pattern[pp,]]], varcov = mSigma[varnames[pattern[pp,]], varnames[pattern[pp,]]], log = TRUE)) })) #mLL3 <- sum(sapply(1:4, function(pp) { # sum(apply(H9miss[indPatterns[,pp], varnames[pattern[pp,]]], 1, # mvtnorm::dmvnorm, mean = mMu[varnames[pattern[pp,]]], # sigma = mSigma[varnames[pattern[pp,]], varnames[pattern[pp,]]], log = TRUE)) #})) ## functions of actual interest mLL4 <- lav_mvnorm_missing_loglik_data(H9miss, mMu, mSigma, pattern = FALSE) mLL5 <- lav_mvnorm_missing_loglik_data(H9miss, mMu, mSigma, pattern = TRUE) ## from sample stats mLL6 <- lav_mvnorm_missing_loglik_samplestats(mfit@SampleStats@missing[[1]], mMu, mSigma) test_that("6 target-model log-likelihoods match for missing data", { expect_equal(mLL1, mLL2) # expect_equal(mLL1, mLL3) expect_equal(mLL1, mLL4) expect_equal(mLL1, mLL5) expect_equal(mLL1, mLL6) }) rm(mLL1, mLL2, #mLL3, mLL4, mLL5, mLL6) ######################### ## run tests in this file # test_file("tests/testthat/test_lav_mvnorm.R") lavaan/tests/testthat/test-lav_matrix.R0000644000176200001440000000160713031506643017771 0ustar liggesuserscontext("lav_matrix") A <- matrix(1:16, nrow=2) A_sqr <- matrix(1:16, nrow=4) A_sym <- matrix(1:9, nrow=3); A_sym[upper.tri(A_sym)] <- t(A_sym)[upper.tri(A_sym)] test_that("lav_matrix_vech matches using lower.tri", { expect_identical( lav_matrix_vech(A), A[lower.tri(A, diag = TRUE)] ) }) test_that("lav_matrix_vech without diagonal matches using lower.tri", { expect_identical( lav_matrix_vech(A, diagonal = FALSE), A[lower.tri(A)]) }) test_that("lav_matrix_vech and lav_matrix_vechru are identical on a symmetric matrix", { for (diagonal in c(TRUE, FALSE)) expect_identical(lav_matrix_vech(A_sym, diagonal), lav_matrix_vechru(A_sym, diagonal)) }) test_that("lav_matrix_vechr and lav_matrix_vechu are identical on a symmetric matrix", { for (diagonal in c(TRUE, FALSE)) expect_identical(lav_matrix_vechr(A_sym, diagonal), lav_matrix_vechu(A_sym, diagonal)) })lavaan/NAMESPACE0000644000176200001440000001357113051523100012730 0ustar liggesusers# only when we have src code # useDynLib("lavaan") importFrom("graphics", "abline", "axis", "box", "hist", "pairs", "par", # generics "plot") importFrom("methods", "is", "new", "slot", "slotNames", # generics, "show") importFrom("stats", "as.formula", "complete.cases", "cor", "cov", "cov2cor", "dnorm", "lm.fit", "na.omit", "nlminb", "optim", "pchisq", "plogis", "pnorm", "qchisq", "qnorm", "quantile", "rnorm", "runif", "sd", "terms", "uniroot", "var", "weighted.mean", # generics "coef", "residuals", "resid", "fitted.values", "fitted", "predict", "update", "anova", "vcov") importFrom("utils", "combn", "modifyList", "packageDescription", "read.table", "str", "write.table") importFrom("quadprog", "solve.QP") importFrom("mnormt", "dmnorm", "sadmvn") importFrom("pbivnorm", "pbivnorm") # AIC and friends... if(getRversion() >= "2.13.0") { importFrom("stats4",AIC, BIC, logLik, nobs) export(logLik, nobs) # not AIC, BIC? } else { importFrom("stats4",AIC, BIC, logLik) export(logLik, BIC, nobs) # not AIC? } # export ordinary functions defined in this package export("lavaan", "cfa", "sem", "growth", "lavaanList", "cfaList", "semList", "fsr", # new name # old name "lavParTable", "lavaanify", "lavNames", "lavaanNames", "lavParseModelString", # "parseModelString", "lavInspect", "inspect", "lavTech", "lavListInspect", "lavListTech", # utilities "getCov", "char2num", "cor2cov", # options, "lavOptions", "modindices", "modificationIndices", "modificationindices", "standardizedSolution", "standardizedsolution", "parameterEstimates", "parameterestimates", "parameterTable", "parametertable", "parTable", "partable", "varTable", "vartable", "fitMeasures", "fitmeasures", "inspectSampleCov", "bootstrapLavaan", "bootstrapLRT", "InformativeTesting", "simulateData", "estfun.lavaan", "lavScores", "lavTables", "lavTablesFitCp", "lavTablesFitCf", "lavTablesFitCm", "lavExport", "lavTestLRT", "lavTestWald", "lavTestScore", "lavMatrixRepresentation", "mplus2lavaan", "mplus2lavaan.modelSyntax", #"prelav", #"lavData", "lavPredict", "lavCor", # API functions # lav_matrix "lav_matrix_vec", "lav_matrix_vecr", "lav_matrix_vech", "lav_matrix_vechr", "lav_matrix_vechu", "lav_matrix_vechru", "lav_matrix_vech_idx", "lav_matrix_vech_row_idx", "lav_matrix_vech_col_idx", "lav_matrix_vechr_idx", "lav_matrix_vechu_idx", "lav_matrix_vechru_idx", "lav_matrix_diag_idx", "lav_matrix_diagh_idx", "lav_matrix_antidiag_idx", "lav_matrix_vech_reverse", "lav_matrix_vechru_reverse", "lav_matrix_upper2full", "lav_matrix_vechr_reverse", "lav_matrix_vechu_reverse", "lav_matrix_lower2full", "lav_matrix_duplication", "lav_matrix_duplication_pre", "lav_matrix_duplication_post", "lav_matrix_duplication_pre_post", "lav_matrix_duplication_ginv", "lav_matrix_duplication_ginv_pre", "lav_matrix_duplication_ginv_post", "lav_matrix_duplication_ginv_pre_post", "lav_matrix_commutation", "lav_matrix_commutation_pre", "lav_matrix_commutation_mn_pre", "lav_matrix_symmetric_sqrt", "lav_matrix_orthogonal_complement", "lav_matrix_bdiag", "lav_matrix_trace", # lav_partable "lav_partable_independence", # used by semTools! "lav_partable_unrestricted", "lav_partable_npar", "lav_partable_ndat", "lav_partable_df", "lav_partable_labels", "lav_partable_from_lm", "lav_partable_complete", "lav_partable_attributes", "lav_partable_merge", "lav_partable_constraints_def", "lav_partable_constraints_ceq", "lav_partable_constraints_ciq", # lav_constraints "lav_constraints_parse", # lav_func "lav_func_gradient_complex", "lav_func_gradient_simple", "lav_func_jacobian_complex", "lav_func_jacobian_simple", # used by semTools! # lav_model "lav_model_get_parameters", "lav_model_set_parameters", "lav_model_implied", "lav_model_vcov_se", # deprecated functions "vech", "vech.reverse", "vechru", "vechru.reverse", "lower2full", "vechr", "vechr.reverse", "vechu", "vechu.reverse", "upper2full", "duplicationMatrix", "commutationMatrix", "sqrtSymmetricMatrix" ) # export Classes exportClasses( "lavaan" #, #"lavData" ) # export Methods exportMethods( "predict", "update", "anova", "coef", "residuals", "resid", "vcov", "logLik", "fitted.values", "fitted", "summary" ) S3method(print, lavaan.data.frame) S3method(print, lavaan.list) S3method(print, lavaan.matrix) S3method(print, lavaan.matrix.symmetric) S3method(print, lavaan.vector) S3method(print, lavaan.parameterEstimates) S3method(print, prelav) S3method(print, lavaan.fsr) S3method(summary, lavaan.fsr) S3method(pairs, lavaan) S3method(print, InformativeTesting) S3method(plot, InformativeTesting) # S3method(print, lavaan.tables.fit.Cf) # S3method(print, lavaan.tables.fit.Cp) # S3method(print, lavaan.tables.fit.Cm) S3method(inspect, lavaan) S3method(inspect, lavaanList) S3method(lavInspect, lavaan) S3method(lavTech, lavaan) S3method(lavInspect, lavaanList) S3method(lavTech, lavaanList) lavaan/data/0000755000176200001440000000000012104004704012415 5ustar liggesuserslavaan/data/FacialBurns.rda0000644000176200001440000000134112104004704015275 0ustar liggesusersV]oAvK T,kX&/`n֘К@>?_|I|pf{̝ zUJ (äeW@Y*B|ĽxN>ήJI q)Lp5BEL#Gv aFF=ԡ[VҧSE~:Zr+Os頮 r)}rXP~6|pޅm_ҭ&1C;A00! 0 jX3u,FUX1y|Ib_}@B8#s>=$RwڂzO =UwF|'P^]:9 s-C z!D{!tn>g'}{7.kż3\;` \;*8?1W#(QSy0x |L6'fo\}Ɉ|yy?#uyt?}}]%c:xc}]&qy\u}d?MyҏMm#2"(gsyg}yM&eB!o{qַE^[!w ^W[HN7+Dף 7GgSwp0ֽq[3 #ZlMߵqK_ /qK lavaan/data/HolzingerSwineford1939.rda0000644000176200001440000001420212104004704017254 0ustar liggesusersyeuu}L֯uϞ3,ʐA\&H1#, 2q4B!lFB,LK,T$#(h,CNs{wz*e:{={߽yɯ?vCI@zcoI2hms[v{ٳ^{͞5m1'wbmvhԘ\X̯|FQQyiF# kUjF2F&&ZFSFFK-7Zah/1:kKF3eFvet1:8^ht:#1ьQfb^bt)F/5zˍ7^iUFitѫ^cZFetN7m3`Fo4=MFo2:hѬYFgctm1jtќFm3hћ.2-F0C?2zF0w]jtџѻmF]atUFW]co]kCF6+6G>fqO]gF44-F}F_1va_F_3.1F{i[F6^gtw04!1zGF1zFq'4zB OjuZVGQjuZVGQjZ-VK|Y-VKRjZ-VKRjZ-VKRjZ-VKRjZ-VKJ#)zJYMVSTj5ZMVSTjZ=VOSjZ=VO Rj*J_5Jj+͌R*JR*JR*JRj*JRuFVWUjuZ]VWUjuV:_Ww«}y:TV*<:w(nMo7ۋ|;\Lb%vv([4֥1&Jnө_̾ve|,7y1kayszzrtZîkcې8쟉Cr-s(Cy>3O,zr>,pA%Va1ןa=:3ېb3<5۟`Nx17a.I$IqL|+km̍[$iùx"[sx<Ω\ ^pIWsUt,=KwgYZڮohvgvmt#O;>Mn}6YwMqo|gV\i%?3_?|ވG}uyjI>|5~V$/?G?%;-;[W|H_vǸȧ_K_nܨ!OpD~׺v5>hg?OŊC/T ;~bGCpȩIj'ΈǀO7%|W>k('%uٷ4ߊOz&eS_/5) ? iu[%Io_?!_*O >'CǻG^Um;|k"-uQ>=`.Ys]I_6{C~YƗz5dډuxbrWҧ^&eqn̯ݯؿ&⇼̳n%v?#>Ke2oJuYC\Zi3C?*<~젞 J}2\S|i}2y_|z*%-q͓*?4NҲq'~qWU7+yאWCZOyz*/ ?Mej>^%Z^+RfuR+?q>=.)^8~=§z㉟oB;s~6W3z ~ď<#O~;9)++:Ծ.>?.؉ꂽ;?) _I;ue]n&ę'x_ψإ7ZqN~qpJx~#DvxO{_!gσ|S/[ċz;/88!^>I{Vd >#~F>k8Kﺯ##}z||?Zo؍q.zg{_ӎM@ZW#^|<(n?O"c=c|xvлxЎ[%{^msm}>ڤOu8z?uO;uc{U_^&?܏S_z5O*qTTK.C~2^I:%Xwƍ]܇?IVD_{T}_8\ֺq/Ћ1.׷5_*܅z5)F%}6|?$ՎMSk[SDZO4^wU|:}1^*8U4.Fw=z<,}7WO\d؟J/_{O~|Mj_1pf_P$?i[(~CQhKp5Ѿ>g^+u;^|zcx>Aa^?J[~۔>߳>qU= 8}~CIȋU?źC~<|,|Bzuez_ɟg<%G<][scEu'иxyzGqO\GJN\qZ~<%-> v}9ͯޟCoEƕ_8ƾs§u/zCիϝ!x>ҒyřXyQuqz79 ߫ϧ}:5zVׅO㬸ɸ=ϖU~պ ~׬ٺ~4ٺkH}zjY=pKHd5~ɵ1^Z==ݷg?]q 7*~,]sK6 .q}|5>~e7}.sggڽ?)Zw?7v`aozTk.?܋1]X ~$/I<$G-ķcïy`=?8a?<"?QO~4=qW\Cuq8b g\ľR۳Kvd׬_uC:{7Zb ē ? ;p[Q\pqaOay]_:`kwI8 qo=hRZWeS?vGċ{BÞ}5k:ꤺWLqqxߞ78:?|n_&~ Ƈ|g؏%Wi?x'N459+8}U{FY8I5jS6<׾1}Džg~Y]w}3YO. Þ|;wݾo ]_r庉Ӈ.\U7GW=ëzx++m7pw|&__uѡowq껢E.zIvz'\ ͙g&=98^3Ƴq;\.~ mEȚw]xNg3O}7Ǹlb_>cY['g >|w##0; >GxUp"~?v/s\`~_I< /z8Dx||^O#=7Q w׵?8Eq#=צ?M% Aik8po^?+p#o3;uGyC.% zS/9"̻=gu%}p_7ng/`e|n_q ⍽{w~oȖH8>gr n8H;O!'mKźyqQ}?O;~ۆ;|%7nr'x~'WR/#pVӐw>G:>K\D'rmgqo[N}pl[kהkVz/+{ܸ#룏 7}~ȝv9>_y&>!'#>+_ ZO8udx !77[&y ~_Kҏgyuf^&>Z^NӢ-\8#)iKL8oJ \xM'~p-zZ]G'88ES ?8>sH]OLJS53R+u1 u0r}<#W9*vcWOڦ7q<~cg҂~: ݗ[ץng'Up}:W%o{r|__px#>8xv:G"_{+oB⾺%GBpպAF~ ~yl_UXwl񛖞+wG3޷|!~׹􂛯]8~Zw_uʿfoړ y[w{.'~o_w\o9L7TYv]ؒM}ryqyc<H1.?C޸~񎸺M'e_痢ܓ/9#'b7|>%-kk肛V%x_˧>F5盦^}SbG?(ĔOI԰= >׏qx#o=x>'?M//=x&q>=v>oY^<惥s7 g禳g/ٖlMy޶i,rL|:6>O/O/O'ħuzN_G=yMG]|n 4>2pWslavaan/data/PoliticalDemocracy.rda0000644000176200001440000000545312104004704016663 0ustar liggesusersY \G$IrIH(Z,GET֪EX*r ZV TTVQ[PkY@\ TObPҨߌo}y3yyI;^f0 Md,MՇU=|o g! /)PqTG,y`BQg"X$'@Oڱ$,oNQlz JOHP.%/?#9RQ1S$^_JΔZ9}8T; K =!1Pxy'a<DsJQvAAjT,e4rE0UИ3EBQω>&w"{YYlLc2ŗ" QGD~䓚9~}JO*=>%%¿]K"> yL w"Oˌ?Akן_>Oe(n`k^WO;OvýP-H- MfQt Pj<z>LB]~}kzXs;<;r@"ׄŘO[$>2~"=>Iu>HJ6lԣFNfu~Ͱ\h-H: ~Oްj%3=6O߄Ӕ0ky׺M[)MC_.E:+4uG w= ?1ѽf_ܾg_tv 5)~aNs(~uXO7й`0ۢW|6jCNu_I+Z> !`3yȦ$bMe'{7.[zb"-oziu\P~W{tY}GJ1maޞn9dW͎NvF%~mgu|"I=|o3qμs*>٪Gt+)cmi:un ï•ěfIsi9]s>.і20jRˎC==e`3: GDޡ3J]@,qz={~ F3?7IRvN b 6[wo6+ĺ9^9wo|p ]q4Ax%)nc֗I ~!h0I)qʚP'Qw/ _rw<2ƪw[M=w*R*@p<8e>B1O}ftW=y}oмc[hŌ֌_٨z/{o7 ?!"-K^*{kOKK@8 djJE#`֮jdCg ,: lAs[4Wfe5 ]AռV-{ I"uc˱=BőwAv~Z0}8iZ bX;JW?Gnk)/-,i_U&ǻLD;.2O !]%-}a^?wR a\l@41چV_Noi8XW}ɠu=dU/o6U RmU_Hx{?NUlv 3kKxp#os x5w[kB51n?-3ܚAO5 R\s|vRQc Kun{ǎLpxH0#"A7#DM cWCӯ}|4);)W0ޚaлx]"O'\2^ylR3f,{po6{Ec{ѳWt!_ w ī>JIg p3dUUgϟҩHlavaan/data/Demo.growth.rda0000644000176200001440000007453712104004704015322 0ustar liggesusers}OKѶV78gMY@CDu}kkl** ~,[s>Z-U`qB3hUJx,|,s< Z8}"vDJ#QJl*Zyýu0.y6OGZsPC?D %҅yߨ:+~gbkV}h1r>s٫LAh_V4'gp_O7%A@]a<= XTҵŴ׉)yhSݯߐ(W4Stv\Zq Á'ƶb:ͫJw-Ƕ^tߟӨ廬_{\bPlt_Eۇf"X"g4JÕ$8D йNy>Ӓ{ 9h(j惖5Bxȇ{pUaav*R&ќ&g6Sտ#o<ه6o4FӝvB5?iIa/DVxYD`ɭn'{h`z Dv ABQM0 p֓V4Ozw2yy有/L*h /jK_%N(%4r!9hѫQ]{(#V_R7=omRES:Āe]I'U}zJd|I hntF <\?>X:zkeݗ?8ȸ@3e&#سx|nׂ)CK"kw4M9Rw$LoJ&Sx۹!;+_լك5_Xüy~u Z,{[o7= @c#= h}CfqLE\ӇesQzd-J䦋[۫ ЀGG:Ӧm\^ր9=|!m{h?!_^5@K/k[_nyVJFaLn7 dn(rjj> Twkݧ0Yfc&~A&8MY>[6禔upߡ,@&Pd-|~kȤI5ώǃYU;@p?,[F?.Ȇa y*gnZȌɬEmy%0@chU`v5 @>j=QvXuQ\|Z슚hE,GgHZ8:U_ Lߣm_v{:gd@cyw gѺOW6@sr47,њ<7;pmMWpB}ҺCip]d|O>*Ryћ;=̙FQJa@c\!Xxu hsyz8Ҽ'IxMx}nگq'f%#2zҎ^קEL\,}+7pC#.dR:+GeZjwWycZhe)ZsIQ C9z=mk</Zk`yT6 $gI+35 è":P)䄰Ϟ⼝}$b0M[$H9bc$虮ݢ\C~Eۊ'jL}v~B` pߣ]u= v?y< ~8-=7|XC"]zfcj އK_Ixݓ2ݸTؾT,׍ork)"FMW4mɘyQ<8?AP"cݒ4cjxv3SKdY:h۽N~[ts 2iUIb$z7c5ޜ1؉C%25W-֭3Yk¥/7ic/iQYt1Gmv("WMR-3i)r N?Cc}Qvc?[PU7π9O%-Z=paS̏CKl"Ѳ]p:^~sZUȩWv*To_D˶O"=t"xCpd5FӉh\^U2<:+x}X!ó''Y+z V+V.w{\e ȚD0~}s^<{H/m,Ť]ranB7px: \-&9FMq )DUhM8{r]K£,рpHjҟV,v7o!wG |8`Cm3SD{775d%%ǡ/$PI ZesVhz`T2sn{ ?]făe+4h^+$Ѵ [E{h9Ti9e53Y+ A\FZ%CX6-"A]x |of'6,Σ}o˨gj`(#BDF.Ќ'4X؜5?ڡ"C=Vz[N!e8L Oߛeo=@=SOrJ317 9MR-Ng_1uNH\++FWzwfx,8\.;yͭهRQIK7j 4Ι,oOdFC{W5)}c^>=8Rw2p2^! OixMp{-F`-k*#J [~JffYͰ/ )dTHĊ2Ps8R$qۏ%~'s73u05kڑ*=7x R|z+Q}.̑V<71ڃ!݀\deV*4ct3!C"[^2ǭ֩{Mrnas00 A%ًwg]&2s4]4 j4h&{p:-$'zo[A>"^xz.-<0cm+Ycs3*b+EsE]{{O׺Ih{=_`g{-叩X/yytsv=<< Dɼ+)f1ћ#wUH'y.=3<&2gڂ]Z0EX4^Yc@r箷-w:M≧zjq$Z ;{|.>rZQadʃ>8Wxq4Z{IF9ׄff$(B>y`0~|8^ (ɸ (5aE_higkq5wE(s ? oٻsiJyB@ckՖlٖaV: ToQo ^ik)!\/O]DZZl=,Cj;"ΒoSx]Uia`t]{k9z-\7[F4Ji 8ȀD =4uҢ͎0g?D`pqG/_ߔŀ~q Ye6:;'VID ߍu!7=|4x#kX(}7L n@w"7N?U7$f@YaE$KׇPo;ܸ4?lsܖ뭷ϥ ͷ9~:WgbJq Z⏰jl!O/SЗ/Zte*񺮺xj^܎MD 4U[ M_h-+bqǖ_֫r'敂Ndss]X=.%!XJ]sx@3&zDW[u8Z+vQH|K]Z60E8rJK#QAi4x6<2q>mTs%4= ͗k_Aˉ)knz$CmOWևԛ_{G1:mGX6{ž6r>>^͕ h ц?O0< ;U?SvSt\]ZQ syd4^˰HQh o5qE @̱ϮSxΔ~ GZqKE`OUŀ_ hJO$B GT?? TC&bQbhYȺcl/!uQ[[h:Rڊ̝dV?Hԇ8g9>ZJ0y wy֢!@cpq d5wmA; 4POt{tj1/C*q x"!1BK'z4Zm Y1˸ut ^1|ާW{ $;\nr_*=\rIS2-Fƿi@U> 5$/˰Gf7֧fEx@v) +<1_;X% h9f`$(&G>*ۏB\:N-G6/*K`|?|Ǩٷ&Uo=+3<;>HwvY 25wc{:Y== h>| b$i&ʔB0\tp=2x3iqUPO> GqL`HAU=pXnmeE3 U ڀFxOm+?yf@ o[:\Ȣ^xؾ 9~#T(APl ~'fl?gߑgvihwz<ع4DOߠI~jlODU_襼K\4`^45u<;6+0J%N#IyIiŠz{Qӯx_e1mr+{W43duOZ;n҉,)Ygl 79K1Od.@]#K-~}3SGBbB*CcYq7׽%L7#JnnxiPI<0z`@:.# |ug?s+;C׀p1-sQ ~>xw*~f*2q;)q@UL6KA+PSt_ /$^ԞM9 1P)4+#k,N9u}G[hv "ͿyRoyޮ rjYV a6L!GF.gʗ(>w99G}}џq!sdQ]c~-}u/h+eH|܆O-So_ PEx%U5j Vgjw; oډNL|\!LFkPMܢCcKxuU%.GoMq YYBxO-$1cE5֝K"p獬 g vB_ꟁD0=Ӷlє'ƛ|Іs sQ Z C^.9 ,&o "ݫ ^jgT ò/b@؋~%DIx'!Zy]ӗب\&gsr3W#+Pt=ZDG1xKfTi7zǜmϙ@ANsry@'q#+$lG(~S@<x"?p퀈lf9 fҪh?A~\\-j ֏[MTƼ+ m7vݯ-.4J o UҘA%w/H8$o9󛌇 ԶȻ[J^:/38˹@Gp% OvI? Caד'%pWȯi`3 Z:I'W<®@MKT}(\H#4'0x \|C2AA#ʷ;Zw\Ш>̩b$WTk(ǟ~Aӑ &ÃDPqa)0\Df{݁-"pq= _AbZ>*6H݂D[PQuDNz.1eMM<3}V<_z8ei> 4<D)[7%1h</A+z툎ǁS#˜o5>5sտ (TFpQϩ&ѱߨlǫ-;oָQ#8QO3ņ, 2M_M_eU_Ѩ-/f1ΝEa<3oz<D:j>'߽M8(>ӄrϵK=М~ }Ǘ/Xm.RD||(Dg+-R|tsCZ a_Ln|Z(ס}@'0p- b^?9 yY`?7-3ʗ1PWF]:c0@a%mш_F+ <:5lsOb&`2zTkbw-~b)cߏXMiImW 4}gCsxWDeQ~]dVht0Ӗ> +82'uxk$ v)qఒ:Wpg!]Ůk> \€ﭑz;3&<>^3@EiJ hh ޲y,yoo- ?weնA+4x.A;L~h2NLi=IO^ds=ʢo(1UmK$6.`70s[Xnl'6p #м Uծ]+5BJ&Wm'߾@X/&>R'?A\w83΀Xh0 u槝#;ABlcTfS\ 2-Jkk_y$ozHw`#\;DA\3HK5݌xCx@yļTm~ɥs+ UBmj崋tT&,[$dȣtJהy(xޝQ8D3# \iujtVsq(+}c $ވb\g{>괁hJM). d;U9 r`T ~ֈˇ-F@|Ӱ;z@0枺)>^9Ҏֺo3o3k͔ xlc -m8Sõ@}L4gtI~;-6Az1r}Sǁc>>FG99 9wXIv 7;hׄDyϔ+E+H^wUU X܄RNOs j3|ؖ4AD-gx{՟{A 'D?eiY2}@`2]KчT4:<&ww3Τ>AJ,/2-=X)=HO~%tgPw k5G^͝S 5,LU?<f,*?Ibx+*9Ck85ƨiǖQ;.'J 5qj1 sXQFdJSm %ID9oPG:}{W(Y-@)u}Ui[W"}b֑?z&4=iR84 'oQGNS3L ?]y`ۭo4,EAlq szj}7Rȶ4z׮[68DCI",{C8')|WYe)ӈVs-u w3)X`>Ep_a֓c}{Aɕ# v6l[v;zbǭW|ҳc.F\Q%xQi_K /\&zMQ@AKqD?xia<+>/նH}ȑa6')B;EDC#zp7_8^T4_KC4x4V^zvﳣ/^" Ut<>~_ /hG V>35=ZWuU]jwBS"7Q *,&<YP`㥉޽+?u%OitxpEF=KjmD-N&eEi=Ѷ K<$'D| (ދѸ0Yx=Cv$m^jpCS g^I=*/pɤD~<-eMPiIu\\0P$341IQNoCA<$϶>OcPc(0]ԇ/??~^@ ъ^9J}QG]cXQ QYML=;vUs8pbOD4Kv?J7A~gBxrT M|D`Y&ݎOjmaO[&!k^{q<]3&%95(miL~`w^\yY(j҈@85ߖyw] sPW2\=I׳JNu37/ +NHfx`_?blV{;1<:SY|G CJw#26'r>wfz{-GIB)}GtS/ԓ\bahZXpyY9  r2,L33;]Nwj`@sŽKw]]"4WI5M3I͛}ށkpoZv1I1LL*dÃCqweLv39B&Ֆխqů<|9 z!#u#^ilg?o_>Fjq!jN̔2K1!Tl$JZMzOwL#Qww %Uآ%%Lɺq)jFPnP^NBѰ4vv=>ph-# ӏ R6D?#8Z/HV{rnf]ߎiV7N6u*ͺZaaFe1)D}0;2zVCHLw& ԇho&?.Tb*EExӎUKs7B3&㩳$Z^[q4evu[iуOGE~D b0AR|F߭ ֶw*Q%Pg sՃ}9?[ek ~O,tW6xAs'Z9ۊD>6/vwYVtƀt]b_6~['fEh⛫N6P}O"I.hlY>\Ѳ@s_h(CFR ^+~m8W1PkWY6:,`}\19qQ/>&Xh-ֆm:RWSz+700"$PgIrV&+'R< >fB 2oЬt#7BM $[dHY)$Ҡ)IBIDk>Z?Y_ku^gGzuxUOJnXgQgj4xo6ehQ~{UZ}HvݟaIKW^̇^3*n*3q'RHڲQ5VvBXuW*oo7|uu1tW39IިT}fRU]Y H[΄a9?1 8ZČ|Saev%Vy!n~ZWַX_r{ff6&%3(OyZf\}[eh`3P# p䶿#NQF!a/zˤtM8V2O<[&ROp|Gm|WsHDڽnCg3mk:g&f`^š6B .e'X MC3L U):ug^ @!}҇vwuބ@'ye4Ժ QS VAbQ c;u 0P*!L ]Tٔu 2)$c9X]e3X} q~Wa#zZ"R耙Cьa#<bO^-@TdľX뜄Ï #Rݫ<@o$i P-Q9B`si;:=3_6YEg0`,zb'6ސ '3*H`+iHZQO3Enq⽎zXWRJO9ٟ /eKm;!ʺj~Ɔ|?ğnaKv@!5N]؉\I :;s|yo^ .npI |)|MLI#3;7tkxvn:b^{D^,1aa[}.0g &ށaZY1 %MK's/G-SVoY1sez30 TYݎt5 39n .姱 E.jEQp| ?E O (!Jq X|W N[ܰd͎֙ksaww]'IKì{ %BJ]5]zqyYX&g }sxxX>xmڽk92ȷ޴Hu>Y%Q33ONAYS^Q5a 11F`1ʃРbȌ?Zu.`fSШ`+ s8ч#_iHE&Ff VAUyX;d1ahz97!%J--rt=-8¨v`"Hq(?svc~YO^+ϔ.1&Zq5V{N)i _8qcГ|.o[a { p;.=c..3~I{G}'?rvϷx`уbƔ 远`3Wid=ns,[d1}=Ysv[({Dw|] ,~~(F3J rKGE;I 7:NO&eQ9H2g5NEn7pY^u'DZfߢ0:f9#O6ϊׯ G fDvT:[`z [ew"G?e ? %ii'Z5Rܰhڨf~k/=CmCOI8x V{} cK8{zm8QvOEMq~JЕ=*/<3;Cԃ1pp2`Ray3:1O?U{8}s,#Ya^ݜtDj 菄(KC=f7l/P{ $v*Æ̥#tRBi\QKv 9ƆM9gt ~<3b߫{}{?|1|]@>6 j_54_ fcmß/n^1^,${kr'p({LվQ k{ahǩf6! 6dOz{:37ЁJo[~wo G>f̄b&1GQӽxTsp$ˁ:@j?㰸SOV5j\ـa`XW$v78o}|h9w]AO^Ch6g$maT9x7iJ8OʽƲ.*] {en t>;Sc?a$9ᓋdS`cw_?HY&|MؐTy;zc (kΧat?G*Gq}ڬ6] Bnx<]՝+ѩ؎城3,p`PQ 0.8 *O)K8 Fj ]rc ["'laqvg jr؇fޭK$۝1}Kx u), U^b}F9$~ c̲ff/EbK([l~vK*?%&h!R RXpON:--R-/sUBs{Fu9xɻ iU =aGݐh[R?KjT0)b$)1˼0G'H0ol~vU;.eaJɆm ީ }B#PϧM>B۝*8ռY +lM[nT ' .m5a٬i Ҥ|[q"rg><k\cC}Q:kɽ*]tڵo ]_ĖM`<,Z|| LbH?q'J9?>hr}Ċʺ>ލ:֯ӽ§"Th4xmlcpc gn".oa+ι33خ op$Fǥe `OAQ][OA^l%V7SSoDwyV;N%.l|#yL$N/'Ksϕ+wgáś]GرK:)X.<{dutn7w\ͧ(p sB叄\[@jT'9ಒɷM].&^'ƃ56ڒG 2X3\qW$@QQoN]n*ł[%ma+Fs8*q#!ں{U2p,1HvyC'x?' eB4dYlaCHd" G7#V9SwIDŽ'30\ ː:z1)cןwEC.k,=Ec ^3g}+ ZE:Nγ[c~gveN۝9HB7a“qV^h6.5+gWQƞ.Oֳ]NwtA>χiO`L[r~t^1))3B岷ҝwpjQQvW_C]{8a?H v~z$m/X]W^M\??HzGso"b0qq@$>Ho97^j^yt[w߱}*jtE&zXd6 kjޡPks+ϳgh\|>w<iN?RZoPPs,'\Q?'BF0wA',Ims7~3[x&l*R-r 8z#._ۙOvvׯrՕ m^Mj-ܱƖ<8c[i1e?etzwEM]Xm)̥ qǩaS]IX(zB.湖WH"54Aͦv\R8pj?$ۅVT`m4tpfOS}ldUm?bseOd+[1w":V:&"8U}m3\XQt* Ffo5⅑c_F"-}Þ*5J<7-A̳t (6M.&Uh7md2+['yuľWAb Zջ7"KVۦtRz]J\e,G&iS雰g{* hsſ7$%uVJ)lt) PYxhoS_;Ax9ӆ̘SÆgW7sa>Ng+ |Gm;N~L +;!;Y"r= P\xM4ݫX֥GB/býof’7Ȉ=|\K  m6CI& ԴгB'}_,Jjʀo`+1.(wluA3~{M [}qff]24ݐ&aK;7Dr'zqpni\ɟOygo_!+:/a4ktg rh/̶hЃspL=>ʾ&Bq>񫀉nrL=Z%9Þ8Z_lTnˋaPx 2 v=VgT5CR~En5DJ7\͂Air2ѷ׊paܻ^#J<z&툵h=ao&7q>.'yUBGQVT.JSrFh +v]ڗi\,~%.@fkӰUZyj(zs`yk&"0%ud̩_!߰z(S[e0ѴLnVO? t|ko!-sP6v\,(%AR[O0Pw`SQ~~u6]y/EVpN&oǾH; #^F֪܈ӡ=ŧpd(rּ,186hI< \obnϖ8w'7/TZcrZ E}!7B*{Kn2TAU@:G|'w5#tZ}7`lq῁Se#P`!tY\,/~Wp7}!tu` gx_4>sH߃ M7n`* FVco.Jx6IGVKLs<Ʀmo m k!r- P/n \4I ~Y9gM^ذOvq q:NIy uy'=fms] d0۠',Nl"ƻG6w_6Z zx1CT;VR&yo츨}E۾["ÏT+?ҟ {aṵnϮJn`-nl⵩рzl+`n|Mh)to w3zPI՟\9$֕D}2iS4 zVJSs8L>$%,]ըOc6ѹgyedqLu2OJ1򾊭g'=yӒCqw.bއaVN+~ 3N7n=s҅N d τ=Չ%| ǜh7ӑg6X~}0Ihw M6%i;#y j>khSv܅{%2H'aϥP]|/%tp)wEP}[Hf58QD_u.g*l!{:w.F H|lٶt[b<9\,4l>,@ 4/ Zs\,%5qj86ौwpK2¡$PږWd5._tWn&MneR &|gby9-sK/^j>IT8Di'c AM7 ]m).M0o pO mW3pnUH/* $R:Bj3~n> VW;3QJS51)QƼvZ5%5.VK-`Kdu'߭KVZ7?kX[W'vnAj%洛}:*{f!GH=Ƕ|'a?^l[zypÚzqhA&/_Gqю~jĭ7ܯ%?9BS@ɯdu5 (KecIuc~ |-o7VS_~FD0,+]Nj L{9#[^~e|RWd Ie&ͫz{yacqǽ6JNR9aWٕ Z 0}KE*WޗGC_P !%W\F7dr_X5_ 7*jbaGᘴs#lC@\NnTU}.ף! Opte=.up>xa ZHųXy= P$%܂$xrgS֙ u'Lx&+6[7K>Т`#w?Gh]&R:e3/zg ? _ϛ}ۏ Z>Ee$U5s+~h%Oq{x;6O u;*6+D7ʭPנ:ݫ5DKw_ܑZf4,)K8}ֵz.$Z*H߾#8m2&-3CpC(k:NZ ]èM`V'OIce>SLPX8_z܆\ E**=#1ͧ/'u88ao]waUKGivRⵏIyko\v4c_YI0^]B7;D)S!^2ępӞP$-bqw5 i|zR?\<*9zG`D ~w wIeo¤^E^oڢ}9urqK)h8.U>pg&peKyP3`%3yBϒ^ NHZH}:Kv~C1n#w'1߱ *!elPnOwk׶:賢q]oW㞦!9losf׋m ƸX0vCf aꗐ]0󋟠ݼ 9Q[al^(9"'dӒ~ÿڂm_5,۳,UL͕ȋ 2:A;_|0xa ERǏ{VȹT$&('m4j-tގu7`zIC!7dlt 8U/NwpmDؔ(t-`_'TFa~ɞ>(b/n3v8LFgzfXIj- smY.l(5awJV̵px( cTuˬ FF 9A~̰r-f˰C|sP87tHIS8B&YNN1r@sd#PqЛ:-'8d~_ $.ۀ 㠩NZS8'nmފ}oavFgskyJFl/oRk[gYeO)/Th` G"%9 ,/LbO~u7t `˿~u qp`h >|3 -*^c~)6mxrvF 7vlYc 7kG坁eBE ]9g!j)Ww"avu5ipھכƁS7縴`6ӱ}^.>ea>^XtIo e+ϔrc>go* Yye( QH|q)[$o(S{m,匒`^ޣ Z, f+BƼc[f7)_I@OvRW=pjk#xiVfc,h}廮^;|UÒީrmKʉ0Tݓ>D;|hâsE01ޤ|'Q|E|bB@_c28m/>I~-dU:2;)گzN{ݧtweȄuR _j KY < 46{Q] +ՔE|o>$(&p '2[N-a&p]ne ߊbqʽ5"8.u5(D0mu|(4 \ڿQS -ێ ![+F3McU`آG(kج)@1ܣ^@Gӻr{T m387N#%~ْ+.#sݿ\H~G:6IX49^ 9}9qT$]blPM+smπM5%=d#0 _gj# v@LW%} oO`?Շolk [q=m::'lG`PE>cgGW-k_Y's@K0v䜕ºH:H7( Q?̔4t'XE4w/ӌ[C {F§@3C;(~d!'+AES3,0꽟?7fVYʓ83|/ O7zɈJa$a_v;t|?{V/MK iN=25ݳ:C\8dΆe|18-f`+D9Aom(sGnLÎWt 8d xEa/ b8\~2^0.=b({ 4o4?>B3y1-J+2P_a#+1)iQط H;'T/s/cOI`mχ()sy}oǬ.{qP>/?6C[LDW\OiVv6T#aC#\PmW|,zfpuȾr (ݴ#a$0+?Ͷd4(mPiva5WI9LSc(=#ӭ0,[' I=2]4suGPgt˰0.`!S>18D.abJD3%۱$׵ɛ=)jؕ_j֤Un1#@HNSnD4ڨsYaar_ w^U ٱV~r㦱wcM/쩤“ X.Zm{1=cI|?a{GjmC#k6;2H7$ar`&&*qt]l['`VNx׸cFߒe`v_u7%PJ!Bڱ8B9%Q5wu"`2=Eg[ƪ@9e [q(h ݝ{5?2qjzRU"@nW@ũ Zpscwϙߗ!b͉ga86-ad}Il>t |6dkX-xZpbo6; 51Lp,ȇ~H0]n BFuH2~5^qOlnH էw *^0+ר֏%\|tGe)f1O9{wۯ5&oQq%E\k'uɓ_wP~DlM<[u8}8 q#u5 b&X26'[4R3糯,4ҽ!Lq[O:D7îPE B"qQKfo 2`eTX D f<=9 p$;aHb^WXWZkS AR8 se0sOǪrW+֓BZU0gn5ϒ-KgQ>3{"ҩ br]KRòdESڸP^lXƍɻ1huQ8(6MM(=bz3Nt{էqBZobĶ&4g5JF$4a<Sd?0|nU[u$p~f<>&<s?dĿ$g&eg#z ؾS^Gָ<ԴDU(E蚝~"2X۾$HJLw./y'<ia$ 6Nvc{y{(`w w܆m9E L `af!޸Hg)Cv;BƞGUJ-p37cՇn~C4^*+dc%M=9ˣ+tvz WzCyߕ@~3pX8őOiuƄ+ Q?#͛@g/:L IHl1oR.KD69F=")=rlbvI9 m"&8 QW GW]Uo ʁǮ\21p @/?w FJ~!IfښϑLmn8ladp:%u-\gky$jY?!O7nk7S{:٬%7o<Hg~lavaan/R/0000755000176200001440000000000013054027123011712 5ustar liggesuserslavaan/R/ctr_pairwise_table.R0000644000176200001440000001726012465075714015723 0ustar liggesusers# this function is written by Myrsini Katsikatsou ############################## pairwiseTables FUNCTION ######################## # This function can be public. It gets as an input a raw data set of ordinal # variables and it returns a list of all pairwise frequency tables. # # The input arguments of the function: # data : matrix or data frame containing the data. The rows correspond to # different observations and the columns to different observed categorical # (ordinal or nominal) variables. No continuous variables or covariates # should be contained in data. If the variables contained in the data are # distinguished into indicators of exogenous latent variables (lv) and # indicators of endogenous latent variables, those for exogenous lv should # be presented first (in the first columns of data) followed by the # indicators for endogenous lv. # var.levels: NULL or vector or list, specifies the levels (response categories) # for each categorical variable contained in data. # If NULL, the levels encoutered in data are used. If a response # category is not observed in the data, then var.levels should be # defined. # If vector, that implies that all variables have the same levels as # given in the vector. # If list, the components of the list are vectors, as many as the # number of variables in data. Each vector gives the levels of # the corresponding categorical variable in data. # no.x : NULL or integer, gives the number of indicators for exogenous lv. # The default value is NULL indicating that data contains only # indicators of exogenous latent variables. # perc : TRUE/FALSE. If FALSE the observed frequencies are reported, otherwise # the observed percentages are given. # na.exclude : TRUE/FALSE. If TRUE, listwise deletion is applied to data. # Otherwise, cases with missing values are preserved and and an # extra level with label NA is included in the tables. # The output of the function: # It is a list of three components: $pairTables, $VarLevels and $Ncases_del. # pairTables : a list of so many tables as the number of variable pairs formed # by data. If there are indicators of both exogenous and endogenous # variables, then first all the matrices referring to pairs of # indicators of exogenous lv are reported, followed by all the # matrices referring to pairs of indicators of endogenous lv, which # in turn folowed by all the matrices of pairs: one indicator of an # exogenous - one indicator of an endogenous lv. # VarLevels : a list of as many vectors as the number of variables in the data. # Each vector gives the levels/ response categories of each variable # Ncases_del : An integer reporting the number of cases deleted by data because # of missing values (listwise deletion) when na.exclude=TRUE. pairwiseTables <- function(data, var.levels=NULL, no.x=NULL, perc=FALSE, na.exclude=TRUE) { # data in right format? if ( (!is.matrix(data)) & (!is.data.frame(data)) ) { stop("data is neither a matrix nor a data.frame") } # at least two variables no.var <- dim(data)[2] if(no.var<2) { stop("there are less than 2 variables") } # no.x < no.var ? if(no.x > no.var) { stop("number of indicators for exogenous latent variables is larger than the total number of variables in data") } # if data as matrix, transforma as data.frame if(is.matrix(data)) { data <- as.data.frame(data) } # listwise deletion if(na.exclude) { old.data <- data data <- na.omit(data) } # all columns of data.frame should be of class factor so that function levels # can be applied if(!all(sapply(data,class)=="factor")) { if(nrow(data)>1) { data <- data.frame( sapply(data,factor) ) } else { data <- apply(data,2,factor) data <- as.data.frame( matrix(data, nrow=1) ) } } # the levels observed for each variable, obs.levels is a list obs.levels <- lapply(data,levels) # number of variables in data same as number of vectors in var.levels if(is.list(var.levels) && no.var!= length(var.levels) ) { stop("the length of var.levels does not match the number of variables of the given data set") } # create var.levels if a list is not given old.var.levels <- var.levels if(!is.list(old.var.levels)) { if(is.null(old.var.levels) ) { var.levels <- obs.levels } else { var.levels <- vector("list", no.var) var.levels <- lapply(var.levels, function(x){x <- old.var.levels} ) } } names(var.levels) <- names(data) # also check that obs.levels exist in the object var.levels given by the user, i.e. old.var.levels if(is.list(old.var.levels)) { for(i in 1:no.var) { if(!all( obs.levels[[i]] %in% old.var.levels[[i]])) stop("levels observed in data are not mentioned in var.levels") } } else if (is.vector(old.var.levels)) { if(!all(apply(na.omit(data), 2, function(x){x %in% old.var.levels}))) stop("levels observed in data are not mentioned in var.levels") } no.given.levels <- sapply(var.levels, length) # assign the right levels for each variable as given in object var.levels if it is not the case # it is not the case when the observed levels are a subgroup of the var.levels given if(!is.null(old.var.levels)) { no.obs.levels <- sapply(obs.levels, length) if(!all(no.obs.levels==no.given.levels) ) { index <- c(1:no.var)[no.obs.levels!=no.given.levels] for(i in index) { data[,i] <- factor(data[,i] , levels=var.levels[[i]]) } } } # compute the bivariate frequency tables # Split first into two cases: a) only indicators of exogenous latent variables # b) otherwise if(is.null(no.x) || no.x==no.var) { pairs.index <- utils::combn(no.var,2) no.pairs <- dim(pairs.index)[2] res <- vector("list", no.pairs) for(i in 1:no.pairs ) { res[[i]] <- table( data[, pairs.index[,i] ], useNA="ifany" ) } } else { no.y <- no.var - no.x pairs.xixj.index <- utils::combn(no.x,2) # row 1 gives i index, row 2 j index, j runs faster than i pairs.yiyj.index <- utils::combn(no.y,2) pairs.xiyj.index <- expand.grid(1:no.y, 1:no.x) pairs.xiyj.index <- rbind( pairs.xiyj.index[,2], pairs.xiyj.index[,1] ) # row 1 gives i index, row 2 j index, j runs faster than i no.pairs.xixj <- dim(pairs.xixj.index)[2] no.pairs.yiyj <- dim(pairs.yiyj.index)[2] no.pairs.xiyj <- dim(pairs.xiyj.index)[2] no.all.pairs <- no.pairs.xixj + no.pairs.yiyj + no.pairs.xiyj data.x <- data[,1:no.x] data.y <- data[,(no.x+1):no.var] res <- vector("list", no.all.pairs) for(i in 1:no.pairs.xixj) { res[[i]] <- table(data.x[,pairs.xixj.index[,i]], useNA="ifany" ) } j <- 0 for(i in (no.pairs.xixj+1):(no.pairs.xixj+no.pairs.yiyj) ) { j <- j+1 res[[i]] <- table(data.y[,pairs.yiyj.index[,j]], useNA="ifany" ) } j <-0 for(i in (no.pairs.xixj+no.pairs.yiyj+1):no.all.pairs ) { j <- j+1 res[[i]] <- table(cbind(data.x[,pairs.xiyj.index[1,j], drop=FALSE], data.y[,pairs.xiyj.index[2,j], drop=FALSE]), useNA="ifany" ) } } # if percentages are asked if (perc) { Nobs <- dim(data)[1] res <- lapply(res, function(x){x/Nobs} ) } #Ncases_del = the number of cases deleted because they had missing values if (na.exclude) { Ncases_deleted <- dim(old.data)[1] - dim(data)[1] } else { Ncases_deleted <- 0 } list(pairTables=res, VarLevels=var.levels, Ncases_del= Ncases_deleted) } lavaan/R/ctr_pml_utils.R0000644000176200001440000004014712676474651014750 0ustar liggesusers# contributed by Myrsini Katsikatsou (March 2016) #the function pc_lik_PL_with_cov gives the value of the bivariate likelihood #for a specific pair of ordinal variables casewise when covariates are present and estimator=="PML" #(the bivariate likelihood is essentially the bivariate probability of the # observed response pattern of two ordinal variables) # Input arguments: # Y1 is a vector, includes the observed values for the first variable for all cases/units, # Y1 is ordinal # Y2 similar to Y1 # Rho is the polychoric correlation of Y1 and Y2 # th.y1 is the vector of the thresholds for Y1* excluding the first and # the last thresholds which are -Inf and Inf # th.y2 is similar to th.y1 # eXo is the data for the covariates in a matrix format where nrows= no of cases, # ncols= no of covariates # PI.y1 is a vector, includes the regression coefficients of the covariates # for the first variable, Y1, the length of the vector is the no of covariates; # to obtain this vector apply the function lavaan:::computePI()[row_correspondin_to_Y1, ] # PI.y2 is similar to PI.y2 # missing.ind is of "character" value, taking the values listwise, pairwise, available_cases; # to obtain a value use lavdata@missing # Output: # It is a vector, length= no of cases, giving the bivariate likelihood for each case. pc_lik_PL_with_cov <- function(Y1, Y2, Rho, th.y1, th.y2, eXo, PI.y1, PI.y2, missing.ind ) { th.y1 <- c(-100, th.y1, 100) th.y2 <- c(-100, th.y2, 100) pred.y1 <- c( eXo %*% PI.y1 ) pred.y2 <- c( eXo %*% PI.y2 ) th.y1.upper <- th.y1[Y1 +1L] - pred.y1 th.y1.lower <- th.y1[Y1 ] - pred.y1 th.y2.upper <- th.y2[Y2 +1L] - pred.y2 th.y2.lower <- th.y2[Y2 ] - pred.y2 if (missing.ind=="listwise") { #I guess this is the default which #also handles the case of complete data biv_prob <- pbivnorm(th.y1.upper, th.y2.upper, rho= Rho) - pbivnorm(th.y1.lower, th.y2.upper, rho= Rho) - pbivnorm(th.y1.upper, th.y2.lower, rho= Rho) + pbivnorm(th.y1.lower, th.y2.lower, rho= Rho) lik <- biv_prob } else if (missing.ind %in% c("pairwise", "available.cases", "available_cases")) { #index of cases with complete pairs CP.idx <- which( complete.cases( cbind(Y1,Y2) ) ) th.y1.upper <- th.y1.upper[CP.idx] th.y1.lower <- th.y1.lower[CP.idx] th.y2.upper <- th.y2.upper[CP.idx] th.y2.lower <- th.y2.lower[CP.idx] biv_prob <- pbivnorm(th.y1.upper, th.y2.upper, rho= Rho) - pbivnorm(th.y1.lower, th.y2.upper, rho= Rho) - pbivnorm(th.y1.upper, th.y2.lower, rho= Rho) + pbivnorm(th.y1.lower, th.y2.lower, rho= Rho) #lik <- numeric( length(Y1) ) lik <- rep(as.numeric(NA), length(Y1)) lik[CP.idx] <- biv_prob } lik } ################################################################# # The function uni_lik gives the value of the univariate likelihood for a # specific ordinal variable, casewise (which is essentially the probability for # the observed response category for each case). # The input arguments are explained before the function pc_lik_PL_with_cov above. # Output: # It is a vector, length= no of cases, giving the univariate likelihoods for each case. uni_lik <- function(Y1, th.y1, eXo=NULL, PI.y1=NULL) { th.y1 <- c(-100, th.y1, 100) if(!is.null(eXo)) { pred.y1 <- c( eXo %*% PI.y1 ) } if(is.null(eXo)){ th.y1.upper <- th.y1[Y1 +1L] th.y1.lower <- th.y1[Y1 ] } else { th.y1.upper <- th.y1[Y1 +1L] - pred.y1 th.y1.lower <- th.y1[Y1 ] - pred.y1 } uni_lik <- pnorm(th.y1.upper) - pnorm(th.y1.lower) uni_lik[is.na(uni_lik)] <- 0 } ################################################################# # The function lav_tables_univariate_freq_cell computes the univariate (one-way) frequency tables. # The function closely folows the "logic" of the lavaan function # lav_tables_pairwise_freq_cell. # The output is either a list or a data.frame depending on the value the logical # input argument as.data.frame. Either way, the same information is contained which is: # a) the observed (univariate) frequencies f_ia, i=1,...,p (variables), # a=1,...,ci (response categories), with a index running faster than i index. # b) an index vector with the name varb which indicates which variable each frequency refers to. # c) an index vector with the name group which indicates which group each frequency # refers to when multi-group analysis. # d) an index vector with the name level which indicates which level within # each ordinal variable each frequency refers to. # e) a vector nobs which gives how many cases where considered to compute the # corresponding frequency. Since we use the available data for each variable # when missing=="available_cases" we expect these numbers to differ when # missing values are present. # f) an index vector with the name id indexing each univariate table, # 1 goes to first variable in the first group, 2 to 2nd variable in the second # group and so on. The last table has the index equal to (no of groups)*(no of variables). lav_tables_univariate_freq_cell <- function(lavdata = NULL, as.data.frame. = TRUE) { # shortcuts vartable <- as.data.frame(lavdata@ov, stringsAsFactors = FALSE) X <- lavdata@X ov.names <- lavdata@ov.names ngroups <- lavdata@ngroups # identify 'categorical' variables cat.idx <- which(vartable$type %in% c("ordered","factor")) # do we have any categorical variables? if(length(cat.idx) == 0L) { stop("lavaan ERROR: no categorical variables are found") } # univariate tables univariate.tables <- vartable$name[cat.idx] univariate.tables <- rbind(univariate.tables, seq_len(length(univariate.tables)), deparse.level = 0 ) ntables <- ncol(univariate.tables) # for each group, for each pairwise table, collect information UNI_TABLES <- vector("list", length=ngroups) for(g in 1:ngroups) { UNI_TABLES[[g]] <- apply(univariate.tables, MARGIN=2, FUN=function(x) { idx1 <- which(vartable$name == x[1]) id <- (g-1)*ntables + as.numeric(x[2]) ncell <- vartable$nlev[idx1] # compute one-way observed frequencies Y1 <- X[[g]][,idx1] UNI_FREQ <- tabulate(Y1, nbins = max(Y1, na.rm=TRUE) ) list( id = rep.int(id, ncell), varb = rep.int(x[1], ncell), group = rep.int(g, ncell), nobs = rep.int(sum(UNI_FREQ), ncell), level = seq_len(ncell), obs.freq = UNI_FREQ ) }) } if(as.data.frame.) { for(g in 1:ngroups) { UNI_TABLE <- UNI_TABLES[[g]] UNI_TABLE <- lapply(UNI_TABLE, as.data.frame, stringsAsFactors=FALSE) if(g == 1) { out <- do.call(rbind, UNI_TABLE) } else { out <- rbind(out, do.call(rbind, UNI_TABLE)) } } if(g == 1) { # remove group column out$group <- NULL } } else { if(ngroups == 1L) { out <- UNI_TABLES[[1]] } else { out <- UNI_TABLES } } out } ################################################################# # The function univariateExpProbVec gives the model-based univariate probabilities # for all ordinal indicators and for all of their response categories, i.e. pi(xi=a), where # a=1,...,ci and i=1,...,p with a index running faster than i index. # Input arguments: # TH is a vector giving the thresholds for all variables, tau_ia, with a running # faster than i (the first and the last thresholds which are -Inf and Inf are # not included). TH can be given by the lavaan function computeTH . # th.idx is a vector of same length as TH which gives the value of the i index, # namely which variable each thresholds refers to. This can be obtained by # lavmodel@th.idx . # Output: # It is a vector, lenght= Sum_i(ci), i.e. the sum of the response categories of # all ordinal variables. The vector contains the model-based univariate probabilities pi(xi=a). univariateExpProbVec <- function(TH=TH, th.idx=th.idx){ TH.split <- split(TH, th.idx) TH.lower <- unlist( lapply(TH.split, function(x){c(-100,x)}), use.names =FALSE ) TH.upper <- unlist( lapply(TH.split, function(x){c(x, 100)}), use.names =FALSE ) prob <- pnorm(TH.upper)-pnorm(TH.lower) #to avoid Nan/-Inf prob[prob < .Machine$double.eps] <- .Machine$double.eps prob } ############################################################################# # The function pc_cor_scores_PL_with_cov computes the derivatives of a bivariate # log-likelihood of two ordinal variables casewise with respect to thresholds, # slopes (reduced-form regression coefficients for the covariates), and polychoric correlation. # The function dbinorm of lavaan is used. # The function gives the right result for both listwise and pairwise deletion, # and the case of complete data. # Input arguments are explained before the function pc_lik_PL_with_cov defined above. # The only difference is that PI.y1 and PI.y2 are (accidentally) renamed here as sl.y1 and sl.y2 # Output: # It is a list containing the following # a) the derivatives w.r.t. the thresholds of the first variable casewise. # This is a matrix, nrows=no of cases, ncols= no of thresholds of variable 1. # b) the derivatives w.r.t. the thresholds of the second variable casewise. # This is a matrix, nrows=no of cases, ncols= no of thresholds of variable 2. # c) the derivatives w.r.t slopes for variable 1. This is a matrix, where # nrows=no of cases, ncols= no of covariates. # d) the derivatives w.r.t slopes for variable 2. This is a matrix, where # nrows=no of cases, ncols= no of covariates. # e) the derivative w.r.t the polychoric correlation of the two variables. # This is a vector of length= no of cases. pc_cor_scores_PL_with_cov <- function(Y1, Y2, eXo, Rho, th.y1, th.y2, sl.y1, sl.y2, missing.ind) { nth.y1 <- length(th.y1) nth.y2 <- length(th.y2) start.th.y1 <- th.y1 start.th.y2 <- th.y2 Nobs <- length(Y1) R <- sqrt(1 - Rho*Rho) th.y1 <- c(-100, th.y1, 100) th.y2 <- c(-100, th.y2, 100) pred.y1 <- c( eXo %*% sl.y1 ) pred.y2 <- c( eXo %*% sl.y2 ) th.y1.z1 <- th.y1[Y1 +1L] - pred.y1 th.y1.z2 <- th.y1[Y1 ] - pred.y1 th.y2.z1 <- th.y2[Y2 +1L] - pred.y2 th.y2.z2 <- th.y2[Y2 ] - pred.y2 # lik, i.e. the bivariate probability case-wise lik <- pc_lik_PL_with_cov(Y1=Y1, Y2=Y2, Rho=Rho, th.y1= start.th.y1, th.y2= start.th.y2, eXo=eXo, PI.y1=sl.y1, PI.y2=sl.y2, missing.ind= missing.ind ) #w.r.t. th.y1, mean tau tilde #derivarive bivariate prob w.r.t. tau^xi_ci, see formula in paper 2012 y1.Z1 <- dnorm(th.y1.z1) * ( pnorm( (th.y2.z1- Rho*th.y1.z1)/R ) - pnorm( (th.y2.z2- Rho*th.y1.z1)/R) ) #derivarive bivariate prob w.r.t. tau^xi_(ci-1), y1.Z2 <- (-1)*( dnorm(th.y1.z2) * ( pnorm( (th.y2.z1- Rho*th.y1.z2)/R) - pnorm( (th.y2.z2- Rho*th.y1.z2)/R) ) ) #allocate the derivatives at the right column casewise idx.y1.z1 <- matrix(1:nth.y1, nrow=Nobs, ncol=nth.y1, byrow=TRUE) == Y1 idx.y1.z2 <- matrix(1:nth.y1, nrow=Nobs, ncol=nth.y1, byrow=TRUE) == (Y1-1L) der.table.y1 <- idx.y1.z1* y1.Z1 + idx.y1.z2* y1.Z2 #der of pl w.r.t. th.y1 dx.th.tilde.y1 <- der.table.y1/lik dx.th.tilde.y1[is.na(dx.th.tilde.y1)]<-0 #w.r.t. th.y2, mean tau tilde #derivarive bivariate prob w.r.t. tau^xi_ci, see formula in paper 2012 y2.Z1 <- dnorm(th.y2.z1) * ( pnorm( (th.y1.z1- Rho*th.y2.z1)/R ) - pnorm( (th.y1.z2- Rho*th.y2.z1)/R) ) #derivarive bivariate prob w.r.t. tau^xi_(ci-1), y2.Z2 <- (-1)*(dnorm(th.y2.z2) * ( pnorm( (th.y1.z1- Rho*th.y2.z2)/R) - pnorm( (th.y1.z2- Rho*th.y2.z2)/R) ) ) #allocate the derivatives at the right column casewise idx.y2.z1 <- matrix(1:nth.y2, nrow=Nobs, ncol=nth.y2, byrow=TRUE) == Y2 idx.y2.z2 <- matrix(1:nth.y2, nrow=Nobs, ncol=nth.y2, byrow=TRUE) == (Y2-1L) der.table.y2 <- idx.y2.z1* y2.Z1 + idx.y2.z2* y2.Z2 #der of pl w.r.t. th.y2 dx.th.tilde.y2 <- der.table.y2/lik dx.th.tilde.y2[is.na(dx.th.tilde.y2)] <- 0 #w.r.t. rho #derivarive bivariate prob w.r.t. rho, see formula in paper 2012 dbivprob.wrt.rho <- ( dbinorm(th.y1.z1, th.y2.z1, Rho) - dbinorm(th.y1.z2, th.y2.z1, Rho) - dbinorm(th.y1.z1, th.y2.z2, Rho) + dbinorm(th.y1.z2, th.y2.z2, Rho) ) #der of pl w.r.t. rho dx.rho <- dbivprob.wrt.rho/lik dx.rho[is.na(dx.rho)] <- 0 #der of pl w.r.t. slopes (also referred to PI obtained by computePI function) row.sums.y1 <- rowSums(dx.th.tilde.y1) row.sums.y2 <- rowSums(dx.th.tilde.y2) dx.sl.y1 <- (-1)*eXo*row.sums.y1 dx.sl.y2 <- (-1)*eXo*row.sums.y2 list(dx.th.y1 = dx.th.tilde.y1, #note that dx.th.tilde=dx.th dx.th.y2 = dx.th.tilde.y2, dx.sl.y1=dx.sl.y1, dx.sl.y2=dx.sl.y2, dx.rho=dx.rho) } ############################################################### # The function uni_scores gives, casewise, the derivative of a univariate # log-likelihood w.r.t. thresholds and slopes if present weighted by the # casewise uni-weights as those defined in AC-PL (essentially the number of missing values per case). # The function closely follows the "logic" of the function pc_cor_scores_PL_with_cov defined above. # Input arguments are as before plus: weights.casewise given by # lavcavhe$uniweights.casewise . # Output: # A list including the following: # a) the derivatives w.r.t. the thresholds of the variable. This is a matrix, # nrows=no of cases, ncols= no of thresholds of variable 1. # b) the derivatives w.r.t slopes for the variable. If covariates are present, # this is a matrix, nrows=no of cases, ncols= no of covariates. # Otherwise it takes the value NULL. uni_scores <- function(Y1, th.y1, eXo=NULL, sl.y1=NULL, weights.casewise) { nth.y1 <- length(th.y1) start.th.y1 <- th.y1 Nobs <- length(Y1) th.y1 <- c(-100, th.y1, 100) if(is.null(eXo)){ th.y1.z1 <- th.y1[Y1 +1L] th.y1.z2 <- th.y1[Y1 ] } else { pred.y1 <- c( eXo %*% sl.y1 ) th.y1.z1 <- th.y1[Y1 +1L] - pred.y1 th.y1.z2 <- th.y1[Y1 ] - pred.y1 } # lik, i.e. the univariate probability case-wise lik <- uni_lik( #Y1 = X[,i], Y1 = Y1, #th.y1 = TH[th.idx==i], th.y1 = th.y1, eXo = eXo, #PI.y1 = PI[i,]) PI.y1 = sl.y1) #w.r.t. th.y1 #derivarive of the univariate prob w.r.t. to the upper limit threshold y1.Z1 <- dnorm(th.y1.z1) #derivarive of the univariate prob w.r.t. to the lower limit threshold y1.Z2 <- (-1)* dnorm(th.y1.z2) #allocate the derivatives at the right column casewise idx.y1.z1 <- matrix(1:nth.y1, nrow=Nobs, ncol=nth.y1, byrow=TRUE) == Y1 idx.y1.z2 <- matrix(1:nth.y1, nrow=Nobs, ncol=nth.y1, byrow=TRUE) == (Y1-1L) der.table.y1 <- idx.y1.z1* y1.Z1 + idx.y1.z2* y1.Z2 #der of pl w.r.t. th.y1 dx.th.tilde.y1 <- der.table.y1* (weights.casewise/lik) dx.th.tilde.y1[is.na(dx.th.tilde.y1)]<-0 #der of pl w.r.t. slopes (also referred to PI obtained by computePI function) dx.sl.y1 <- NULL if(!is.null(eXo)) { row.sums.y1 <- rowSums(dx.th.tilde.y1) dx.sl.y1 <- (-1)*eXo*row.sums.y1 } list(dx.th.y1 = dx.th.tilde.y1, #note that dx.th.tilde=dx.th dx.sl.y1 = dx.sl.y1) } lavaan/R/lav_export.R0000644000176200001440000001312012655122301014215 0ustar liggesusers# export `lavaan' lav model description to third-party software # lavExport <- function(object, target="lavaan", prefix="sem", dir.name="lavExport", export=TRUE) { stopifnot(inherits(object, "lavaan")) target <- tolower(target) # check for conditional.x = TRUE if(object@Model@conditional.x) { stop("lavaan ERROR: this function is not (yet) available if conditional.x = TRUE") } ngroups <- object@Data@ngroups if(ngroups > 1L) { group.label2 <- paste(".", object@Data@group.label, sep="") } else { group.label2 <- "" } data.file <- paste(prefix, group.label2, ".", target, ".raw", sep="") # 2. create syntax file if(target == "lavaan") { header <- "" syntax <- lav2lavaan(object) footer <- "" out <- paste(header, syntax, footer, sep="") } else if(target == "mplus") { header <- lav_mplus_header(data.file=data.file, group.label=object@Data@group.label, ov.names=vnames(object@ParTable, "ov"), ov.ord.names=vnames(object@ParTable, "ov.ord"), estimator=lav_mplus_estimator(object), data.type=object@Data@data.type, nobs=object@Data@nobs[[1L]] ) syntax <- lav2mplus(object, group.label=object@Data@group.label) footer <- paste("OUTPUT:\n sampstat standardized tech1;\n") out <- paste(header, syntax, footer, sep="") } else if(target == "lisrel") { syntax <- lav2lisrel(object) } else if(target == "eqs") { syntax <- lav2eqs(object) } else if(target == "sem") { syntax <- lav2sem(object) } else if(target == "openmx") { syntax <- lav2openmx(object) } else { stop("lavaan ERROR: target", target, "has not been implemented yet") } # export to file? if(export) { dir.create(path=dir.name) input.file <- paste(dir.name, "/", prefix, ".", target, ".in", sep="") cat(out, file=input.file, sep="") # write data (if available) if(identical(object@Data@data.type, "full")) { for(g in 1:ngroups) { if(is.null(object@Data@eXo[[g]])) { DATA <- object@Data@X[[g]] } else { DATA <- cbind(object@Data@X[[g]], object@Data@eXo[[g]]) } write.table(DATA, file=paste(dir.name, "/", data.file[g], sep=""), na="-999999", col.names=FALSE, row.names=FALSE, quote=FALSE) } } else if(identical(object@Data@data.type, "moment")) { for(g in 1:ngroups) { DATA <- object@SampleStats@cov[[g]] write.table(DATA, file=paste(dir.name, "/", data.file[g], sep=""), na="-999999", col.names=FALSE, row.names=FALSE, quote=FALSE) } } else { warning("lavaan WARNING: not data available") } return(invisible(out)) } else { # just return the syntax file for inspection class(out) <- c("lavaan.character", "character") } out } lav2check <- function(lav) { if(inherits(lav, "lavaan")) { lav <- lav@ParTable } else if(is.list(lav)) { # nothing to do } else { stop("lavaan ERROR: lav must be of class `lavaan' or a parTable") } # check syntax if(is.null(lav$ustart)) lav$ustart <- lav$est # check if free is missing if(is.null(lav$free)) lav$free <- rep(0L, length(lav$ustart)) # check if label is missing if(is.null(lav$label)) lav$label <- rep("", length(lav$ustart)) # check if group is missing if(is.null(lav$group)) lav$group <- rep(1L, length(lav$ustart)) # if eq.id not all zero, create labels instead #if(!is.null(lav$eq.id) && !all(lav$eq.id == 0L)) { # lav$label <- paste("p",as.character(lav$eq.id), sep="") # lav$label[lav$label == "p0"] <- "" #} lav } ## FIXME: this is completely UNFINISHED (just used to quickly get something) lav2lavaan <- lav2lav <- function(lav) { lav <- lav2check(lav) header <- "# this model syntax is autogenerated by lavExport\n" footer <- "\n" # intercepts int.idx <- which(lav$op == "~1") lav$op[int.idx] <- "~" lav$rhs[int.idx] <- "1" # spacing around operator lav$op <- paste(" ",lav$op, " ", sep="") lav2 <- ifelse(lav$free != 0L, ifelse(lav$label == "", paste(lav$lhs, lav$op, lav$rhs, sep=""), paste(lav$lhs, lav$op, lav$label, "*", lav$rhs, sep="") ), ifelse(lav$label == "", paste(lav$lhs, lav$op, lav$ustart, "*", lav$rhs, sep=""), paste(lav$lhs, lav$op, lav$ustart, "*", lav$rhs, "+", lav$label, "*", lav$rhs, sep="") ) ) body <- paste(lav2, collapse="\n") out <- paste(header, body, footer, sep="") class(out) <- c("lavaan.character", "character") out } lav2lisrel <- function(lav) { lav <- lav2check(lav) stop("this function needs revision") } lav2eqs <- function(lav) { lav <- lav2check(lav) stop("this function needs revision") } lav2sem <- function(lav) { lav <- lav2check(lav) stop("this function needs revision") } lav2openmx <- function(lav) { lav <- lav2check(lav) stop("this function needs revision") } lavaan/R/lav_standardize.R0000644000176200001440000006115013053010046015205 0ustar liggesusersstandardize.est.lv.x <- function(x, lavobject, partable = NULL, cov.std = TRUE) { # embed x in est est <- lav_object_inspect_est(lavobject) free.idx <- which(lavobject@ParTable$free > 0L) stopifnot(length(x) == length(free.idx)) est[free.idx] <- x # take care of setResidualElements... lavmodel <- lav_model_set_parameters(lavmodel = lavobject@Model, x = x) GLIST <- lavmodel@GLIST standardize.est.lv(lavobject = lavobject, partable = partable, est = est, GLIST = GLIST, cov.std = cov.std) } standardize.est.all.x <- function(x, lavobject, partable = NULL, cov.std = TRUE) { # embed x in est est <- lav_object_inspect_est(lavobject) free.idx <- which(lavobject@ParTable$free > 0L) stopifnot(length(x) == length(free.idx)) est[free.idx] <- x # take care of setResidualElements... lavmodel <- lav_model_set_parameters(lavmodel = lavobject@Model, x = x) GLIST <- lavmodel@GLIST standardize.est.all(lavobject = lavobject, partable = partable, est = est, est.std = NULL, GLIST = GLIST, cov.std = cov.std) } standardize.est.all.nox.x <- function(x, lavobject, partable = NULL, cov.std = TRUE) { # embed x in est est <- lav_object_inspect_est(lavobject) free.idx <- which(lavobject@ParTable$free > 0L) stopifnot(length(x) == length(free.idx)) est[free.idx] <- x # take care of setResidualElements... lavmodel <- lav_model_set_parameters(lavmodel = lavobject@Model, x = x) GLIST <- lavmodel@GLIST standardize.est.all.nox(lavobject = lavobject, partable = partable, est = est, est.std = NULL, GLIST = GLIST, cov.std = cov.std) } unstandardize.est.ov.x <- function(x, lavobject) { partable <- lavobject@ParTable partable$ustart <- x unstandardize.est.ov(partable=partable, ov.var=lavobject@SampleStats@var, cov.std=TRUE) } standardize.est.lv <- function(lavobject, partable=NULL, est=NULL, GLIST=NULL, cov.std = TRUE) { if(is.null(partable)) partable <- lavobject@ParTable if(is.null(est)) est <- lav_object_inspect_est(lavobject) if(is.null(GLIST)) GLIST <- lavobject@Model@GLIST if("SampleStats" %in% slotNames(lavobject)) { lavsamplestats = lavobject@SampleStats } else { lavsamplestats = NULL } out <- est; N <- length(est) stopifnot(N == length(partable$lhs)) nmat <- lavobject@Model@nmat # compute ETA LV.ETA <- computeVETA(lavmodel = lavobject@Model, GLIST = GLIST, lavsamplestats = lavsamplestats) for(g in 1:lavobject@Model@nblocks) { ov.names <- vnames(lavobject@ParTable, "ov", block=g) # not user, # which may be incomplete lv.names <- vnames(lavobject@ParTable, "lv", block=g) # shortcut: no latents in this block, nothing to do if(length(lv.names) == 0L) next # which mm belong to block g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] ETA2 <- diag(LV.ETA[[g]]) ETA <- sqrt(ETA2) # 1a. "=~" regular indicators idx <- which(partable$op == "=~" & !(partable$rhs %in% lv.names) & partable$block == g) out[idx] <- out[idx] * ETA[ match(partable$lhs[idx], lv.names) ] # 1b. "=~" regular higher-order lv indicators idx <- which(partable$op == "=~" & !(partable$rhs %in% ov.names) & partable$block == g) out[idx] <- ( out[idx] * ETA[ match(partable$lhs[idx], lv.names) ] / ETA[ match(partable$rhs[idx], lv.names) ] ) # 1c. "=~" indicators that are both in ov and lv #idx <- which(partable$op == "=~" & partable$rhs %in% ov.names # & partable$rhs %in% lv.names & # partable$block == g) # 2. "~" regressions (and "<~") idx <- which((partable$op == "~" | partable$op == "<~") & partable$lhs %in% lv.names & partable$block == g) out[idx] <- out[idx] / ETA[ match(partable$lhs[idx], lv.names) ] idx <- which((partable$op == "~" | partable$op == "<~") & partable$rhs %in% lv.names & partable$block == g) out[idx] <- out[idx] * ETA[ match(partable$rhs[idx], lv.names) ] # 3a. "~~" ov #idx <- which(partable$op == "~~" & !(partable$lhs %in% lv.names) & # partable$block == g) # 3b. "~~" lv # ATTENTION: in Mplus 4.1, the off-diagonal residual covariances # were computed by the formula cov(i,j) / sqrt(i.var*j.var) # were i.var and j.var where diagonal elements of ETA # # in Mplus 6.1 (but also AMOS and EQS), the i.var and j.var # elements are the 'PSI' diagonal elements!! # variances rv.idx <- which(partable$op == "~~" & partable$rhs %in% lv.names & partable$lhs == partable$rhs & partable$block == g) out[rv.idx] <- ( out[rv.idx] / ETA[ match(partable$lhs[rv.idx], lv.names) ] / ETA[ match(partable$rhs[rv.idx], lv.names) ] ) # covariances lv # three types: # - only lhs is LV (and fixed.x = FALSE) # - only rhs is LV (and fixed.x = FALSE) # - both lhs and rhs are LV (regular case) if(cov.std) { if(!is.complex(est[rv.idx])) { RV <- sqrt(abs(est[rv.idx])) # abs in case of heywood cases } else { RV <- sqrt( est[rv.idx] ) } rv.names <- partable$lhs[rv.idx] } # left idx.lhs <- which(partable$op == "~~" & partable$lhs %in% lv.names & partable$lhs != partable$rhs & partable$block == g) if(length(idx.lhs) > 0L) { if(cov.std == FALSE) { out[idx.lhs] <- (out[idx.lhs] / ETA[ match(partable$lhs[idx.lhs], lv.names)]) } else { out[idx.lhs] <- (out[idx.lhs] / RV[ match(partable$lhs[idx.lhs], rv.names)]) } } # right idx.rhs <- which(partable$op == "~~" & partable$rhs %in% lv.names & partable$lhs != partable$rhs & partable$block == g) if(length(idx.rhs) > 0L) { if(cov.std == FALSE) { out[idx.rhs] <- (out[idx.rhs] / ETA[ match(partable$rhs[idx.rhs],lv.names)]) } else { out[idx.rhs] <- (out[idx.rhs] / RV[ match(partable$rhs[idx.rhs], rv.names)]) } } # 4a. "~1" ov #idx <- which(partable$op == "~1" & !(partable$lhs %in% lv.names) & # partable$block == g) # 4b. "~1" lv idx <- which(partable$op == "~1" & partable$lhs %in% lv.names & partable$block == g) out[idx] <- out[idx] / ETA[ match(partable$lhs[idx], lv.names) ] } # 5a ":=" idx <- which(partable$op == ":=") if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@def.function(x) } # 5b "==" idx <- which(partable$op == "==") if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@ceq.function(x) } # 5c. "<" or ">" idx <- which((partable$op == "<" | partable$op == ">")) if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@cin.function(x) } out } standardize.est.all <- function(lavobject, partable=NULL, est=NULL, est.std=NULL, GLIST = NULL, cov.std = TRUE) { if(is.null(partable)) partable <- lavobject@ParTable if(is.null(est)) est <- lav_object_inspect_est(lavobject) if(is.null(est.std)) { est.std <- standardize.est.lv(lavobject, partable = partable, est = est, GLIST = GLIST, cov.std = cov.std) } if(is.null(GLIST)) GLIST <- lavobject@Model@GLIST if("SampleStats" %in% slotNames(lavobject)) { lavsamplestats = lavobject@SampleStats } else { lavsamplestats = NULL } out <- est.std; N <- length(est.std) stopifnot(N == length(partable$lhs)) VY <- computeVY(lavmodel = lavobject@Model, GLIST = GLIST, lavsamplestats = lavsamplestats, diagonal.only = TRUE) for(g in 1:lavobject@Model@nblocks) { ov.names <- vnames(lavobject@ParTable, "ov", block = g) # not user lv.names <- vnames(lavobject@ParTable, "lv", block = g) OV <- sqrt(VY[[g]]) if(lavobject@Model@conditional.x) { # extend OV with ov.names.x ov.names.x <- vnames(lavobject@ParTable, "ov.x", block = g) ov.names <- c(ov.names, ov.names.x) OV <- c(OV, sqrt(diag(lavobject@SampleStats@cov.x[[g]]))) } # 1a. "=~" regular indicators idx <- which(partable$op == "=~" & !(partable$rhs %in% lv.names) & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$rhs[idx], ov.names) ] # 1b. "=~" regular higher-order lv indicators # 1c. "=~" indicators that are both in ov and lv #idx <- which(partable$op == "=~" & partable$rhs %in% ov.names # & partable$rhs %in% lv.names & # partable$block == g) # 2. "~" regressions (and "<~") idx <- which((partable$op == "~" | partable$op == "<~") & partable$lhs %in% ov.names & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$lhs[idx], ov.names) ] idx <- which((partable$op == "~" | partable$op == "<~") & partable$rhs %in% ov.names & partable$block == g) out[idx] <- out[idx] * OV[ match(partable$rhs[idx], ov.names) ] # 3a. "~~" ov # ATTENTION: in Mplus 4.1, the off-diagonal residual covariances # were computed by the formula cov(i,j) / sqrt(i.var*j.var) # were i.var and j.var where diagonal elements of OV # # in Mplus 6.1 (but also AMOS and EQS), the i.var and j.var # elements are the 'THETA' diagonal elements!! # variances rv.idx <- which(partable$op == "~~" & !(partable$lhs %in% lv.names) & partable$lhs == partable$rhs & partable$block == g) out[rv.idx] <- ( out[rv.idx] / OV[ match(partable$lhs[rv.idx], ov.names) ] / OV[ match(partable$rhs[rv.idx], ov.names) ] ) # covariances ov # three types: # - only lhs is OV (and fixed.x = FALSE) # - only rhs is OV (and fixed.x = FALSE) # - both lhs and rhs are OV (regular case) if(cov.std) { if(!is.complex(est[rv.idx])) { RV <- sqrt(abs(est[rv.idx])) } else { RV <- sqrt( est[rv.idx] ) } rv.names <- partable$lhs[rv.idx] } # left idx.lhs <- which(partable$op == "~~" & !(partable$lhs %in% lv.names) & partable$lhs != partable$rhs & partable$block == g) if(length(idx.lhs) > 0L) { if(cov.std == FALSE) { out[idx.lhs] <- (out[idx.lhs] / OV[ match(partable$lhs[idx.lhs], ov.names)]) } else { out[idx.lhs] <- (out[idx.lhs] / RV[ match(partable$lhs[idx.lhs], rv.names)]) } } # right idx.rhs <- which(partable$op == "~~" & !(partable$rhs %in% lv.names) & partable$lhs != partable$rhs & partable$block == g) if(length(idx.rhs) > 0L) { if(cov.std == FALSE) { out[idx.rhs] <- (out[idx.rhs] / OV[ match(partable$rhs[idx.rhs], ov.names)]) } else { out[idx.rhs] <- (out[idx.rhs] / RV[ match(partable$rhs[idx.rhs], rv.names)]) } } # 3b. "~~" lv #idx <- which(partable$op == "~~" & partable$rhs %in% lv.names & # partable$block == g) # 4a. "~1" ov idx <- which(partable$op == "~1" & !(partable$lhs %in% lv.names) & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$lhs[idx], ov.names) ] # 4b. "~1" lv #idx <- which(partable$op == "~1" & partable$lhs %in% lv.names & # partable$block == g) # 4c. "|" thresholds idx <- which(partable$op == "|" & !(partable$lhs %in% lv.names) & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$lhs[idx], ov.names) ] # 4d. "~*~" scales idx <- which(partable$op == "~*~" & !(partable$lhs %in% lv.names) & partable$block == g) out[idx] <- 1.0 } # 5a ":=" idx <- which(partable$op == ":=") if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@def.function(x) } # 5b "==" idx <- which(partable$op == "==") if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@ceq.function(x) } # 5c. "<" or ">" idx <- which((partable$op == "<" | partable$op == ">")) if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@cin.function(x) } out } standardize.est.all.nox <- function(lavobject, partable=NULL, est=NULL, est.std=NULL, GLIST = NULL, cov.std = TRUE) { if(is.null(partable)) partable <- lavobject@ParTable if(is.null(est)) est <- lav_object_inspect_est(lavobject) if(is.null(est.std)) { est.std <- standardize.est.lv(lavobject, partable = partable, est = est, GLIST = GLIST, cov.std = cov.std) } if(is.null(GLIST)) GLIST <- lavobject@Model@GLIST if("SampleStats" %in% slotNames(lavobject)) { lavsamplestats = lavobject@SampleStats } else { lavsamplestats = NULL } out <- est.std; N <- length(est.std) stopifnot(N == length(partable$lhs)) VY <- computeVY(lavmodel = lavobject@Model, GLIST = GLIST, lavsamplestats = lavsamplestats, diagonal.only = TRUE) for(g in 1:lavobject@Model@nblocks) { ov.names <- vnames(lavobject@ParTable, "ov", block = g) ov.names.x <- vnames(lavobject@ParTable, "ov.x", block = g) ov.names.nox <- vnames(lavobject@ParTable, "ov.nox", block = g) lv.names <- vnames(lavobject@ParTable, "lv", block = g) OV <- sqrt(VY[[g]]) if(lavobject@Model@conditional.x) { # extend OV with ov.names.x ov.names.x <- vnames(lavobject@ParTable, "ov.x", block = g) ov.names <- c(ov.names, ov.names.x) OV <- c(OV, sqrt(diag(lavobject@SampleStats@cov.x[[g]]))) } # 1a. "=~" regular indicators idx <- which(partable$op == "=~" & !(partable$rhs %in% lv.names) & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$rhs[idx], ov.names) ] # 1b. "=~" regular higher-order lv indicators # 1c. "=~" indicators that are both in ov and lv #idx <- which(partable$op == "=~" & partable$rhs %in% ov.names # & partable$rhs %in% lv.names & # partable$block == g) # 2. "~" regressions (and "<~") idx <- which((partable$op == "~" | partable$op == "<~") & partable$lhs %in% ov.names & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$lhs[idx], ov.names) ] idx <- which((partable$op == "~" | partable$op == "<~") & partable$rhs %in% ov.names.nox & partable$block == g) out[idx] <- out[idx] * OV[ match(partable$rhs[idx], ov.names.nox) ] # 3a. "~~" ov # ATTENTION: in Mplus 4.1, the off-diagonal residual covariances # were computed by the formula cov(i,j) / sqrt(i.var*j.var) # were i.var and j.var where diagonal elements of OV # # in Mplus 6.1 (but also AMOS and EQS), the i.var and j.var # elements are the 'THETA' diagonal elements!! # variances rv.idx <- which(partable$op == "~~" & !(partable$lhs %in% lv.names) & !(partable$lhs %in% ov.names.x) & partable$lhs == partable$rhs & partable$block == g) out[rv.idx] <- ( out[rv.idx] / OV[ match(partable$lhs[rv.idx], ov.names) ] / OV[ match(partable$rhs[rv.idx], ov.names) ] ) # covariances ov # three types: # - only lhs is OV (and fixed.x = FALSE) # - only rhs is OV (and fixed.x = FALSE) # - both lhs and rhs are OV (regular case) if(cov.std) { if(!is.complex(est[rv.idx])) { RV <- sqrt(abs(est[rv.idx])) } else { RV <- sqrt( est[rv.idx] ) } rv.names <- partable$lhs[rv.idx] } # left idx.lhs <- which(partable$op == "~~" & !(partable$lhs %in% lv.names) & !(partable$lhs %in% ov.names.x) & partable$lhs != partable$rhs & partable$block == g) if(length(idx.lhs) > 0L) { if(cov.std == FALSE) { out[idx.lhs] <- (out[idx.lhs] / OV[ match(partable$lhs[idx.lhs], ov.names)]) } else { out[idx.lhs] <- (out[idx.lhs] / RV[ match(partable$lhs[idx.lhs], rv.names)]) } } # right idx.rhs <- which(partable$op == "~~" & !(partable$rhs %in% lv.names) & !(partable$rhs %in% ov.names.x) & partable$lhs != partable$rhs & partable$block == g) if(length(idx.rhs) > 0L) { if(cov.std == FALSE) { out[idx.rhs] <- (out[idx.rhs] / OV[ match(partable$rhs[idx.rhs], ov.names)]) } else { out[idx.rhs] <- (out[idx.rhs] / RV[ match(partable$rhs[idx.rhs], rv.names)]) } } # 3b. "~~" lv #idx <- which(partable$op == "~~" & partable$rhs %in% lv.names & # partable$block == g) # 4a. "~1" ov idx <- which(partable$op == "~1" & !(partable$lhs %in% lv.names) & !(partable$lhs %in% ov.names.x) & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$lhs[idx], ov.names) ] # 4b. "~1" lv #idx <- which(partable$op == "~1" & partable$lhs %in% lv.names & # partable$block == g) # 4c. "|" thresholds idx <- which(partable$op == "|" & !(partable$lhs %in% lv.names) & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$lhs[idx], ov.names) ] # 4d. "~*~" scales idx <- which(partable$op == "~*~" & !(partable$lhs %in% lv.names) & partable$block == g) out[idx] <- 1.0 } # 5a ":=" idx <- which(partable$op == ":=") if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@def.function(x) } # 5b "==" idx <- which(partable$op == "==") if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@ceq.function(x) } # 5c. "<" or ">" idx <- which((partable$op == "<" | partable$op == ">")) if(length(idx) > 0L) { x <- out[ partable$free & !duplicated(partable$free) ] out[idx] <- lavobject@Model@cin.function(x) } out } unstandardize.est.ov <- function(partable, ov.var=NULL, cov.std=TRUE) { # check if ustart is missing; if so, look for est if(is.null(partable$ustart)) partable$ustart <- partable$est # check if block is missing if(is.null(partable$block)) { partable$block <- rep(1L, length(partable$ustart)) } stopifnot(!any(is.na(partable$ustart))) est <- out <- partable$ustart N <- length(est) # nblocks nblocks <- lav_partable_nblocks(partable) # if ov.var is NOT a list, make a list if(!is.list(ov.var)) { tmp <- ov.var ov.var <- vector("list", length=nblocks) ov.var[1:nblocks] <- list(tmp) } for(g in 1:nblocks) { ov.names <- vnames(partable, "ov", block = g) # not user lv.names <- vnames(partable, "lv", block = g) OV <- sqrt(ov.var[[g]]) # 1a. "=~" regular indicators idx <- which(partable$op == "=~" & !(partable$rhs %in% lv.names) & partable$block == g) out[idx] <- out[idx] * OV[ match(partable$rhs[idx], ov.names) ] # 1b. "=~" regular higher-order lv indicators # 1c. "=~" indicators that are both in ov and lv #idx <- which(partable$op == "=~" & partable$rhs %in% ov.names # & partable$rhs %in% lv.names & # partable$block == g) # 2. "~" regressions (and "<~") idx <- which((partable$op == "~" | partable$op == "<~") & partable$lhs %in% ov.names & partable$block == g) out[idx] <- out[idx] * OV[ match(partable$lhs[idx], ov.names) ] idx <- which((partable$op == "~" | partable$op == "<~") & partable$rhs %in% ov.names & partable$block == g) out[idx] <- out[idx] / OV[ match(partable$rhs[idx], ov.names) ] # 3a. "~~" ov # ATTENTION: in Mplus 4.1, the off-diagonal residual covariances # were computed by the formula cov(i,j) / sqrt(i.var*j.var) # were i.var and j.var where diagonal elements of OV # # in Mplus 6.1 (but also AMOS and EQS), the i.var and j.var # elements are the 'THETA' diagonal elements!! # variances rv.idx <- which(partable$op == "~~" & !(partable$lhs %in% lv.names) & partable$lhs == partable$rhs & partable$block == g) out[rv.idx] <- ( out[rv.idx] * OV[ match(partable$lhs[rv.idx], ov.names) ] * OV[ match(partable$rhs[rv.idx], ov.names) ] ) # covariances idx <- which(partable$op == "~~" & !(partable$lhs %in% lv.names) & partable$lhs != partable$rhs & partable$block == g) if(length(idx) > 0L) { if(cov.std == FALSE) { out[idx] <- ( out[idx] * OV[ match(partable$lhs[idx], ov.names) ] * OV[ match(partable$rhs[idx], ov.names) ] ) } else { if(!is.complex(out[rv.idx])) { RV <- sqrt(abs(out[rv.idx])) } else { RV <- sqrt( out[rv.idx] ) } rv.names <- partable$lhs[rv.idx] out[idx] <- ( out[idx] * RV[ match(partable$lhs[idx], rv.names) ] * RV[ match(partable$rhs[idx], rv.names) ] ) } } # 3b. "~~" lv #idx <- which(partable$op == "~~" & partable$rhs %in% lv.names & # partable$block == g) # 4a. "~1" ov idx <- which(partable$op == "~1" & !(partable$lhs %in% lv.names) & partable$block == g) out[idx] <- out[idx] * OV[ match(partable$lhs[idx], ov.names) ] # 4b. "~1" lv #idx <- which(partable$op == "~1" & partable$lhs %in% lv.names & # partable$block == g) } # 5a ":=" # 5b "==" # 5c. "<" or ">" out } lavaan/R/lav_partable_subset.R0000644000176200001440000001225213051523100016051 0ustar liggesusers# YR 11 feb 2017: initial version # given a parameter table (PT), extract a part of the model: # eg.: # - only the measurement model (with saturated latent variables # - only the stuctural part # - a single measurement model (1 factor only) # ... lav_partable_subset_measurement_model <- function(PT = NULL, lavpta = NULL, lv.names = NULL) { # PT PT <- as.data.frame(PT, stringsAsFactors = FALSE) # lavpta if(is.null(lavpta)) { lavpta <- lav_partable_attributes(PT) } # ngroups ngroups <- lavpta$ngroups # lv.names: list with element per group if(is.null(lv.names)) { lv.names <- lavpta$vnames$lv.regular } else if(!is.list(lv.names)) { lv.names <- list(lv.names) } # which latent variables should we remove? lv.names.rm <- lapply(1:ngroups, function(g) { lavpta$vnames$lv.regular[[g]][ !lavpta$vnames$lv.regular[[g]] %in% lv.names[[g]] ] }) # remove rows idx rm.idx <- integer(0L) # remove not-needed measurement models for(g in 1:ngroups) { # indicators for not-needed latent variables IND.idx <- which( PT$op == "=~" & !PT$lhs %in% lv.names[[g]] & PT$group == g ) IND <- PT$rhs[ IND.idx ] # remove =~ rm.idx <- c(rm.idx, IND.idx) # remove ~~ VAR.idx <- which( PT$op == "~~" & ( PT$lhs %in% IND | PT$rhs %in% IND | PT$lhs %in% lv.names.rm[[g]] | PT$rhs %in% lv.names.rm[[g]] ) & PT$group == g ) rm.idx <- c(rm.idx, VAR.idx) # regressions, involving a latent variable LV.EQS.idx <- which( PT$op == "~" & ( PT$lhs %in% lavpta$vnames$lv.regular[[g]] | PT$rhs %in% lavpta$vnames$lv.regular[[g]] ) & PT$group == g ) rm.idx <- c(rm.idx, LV.EQS.idx) # regressions, involving indicators OV.EQS.idx <- which( PT$op == "~" & ( PT$lhs %in% IND | PT$rhs %in% IND ) & PT$group == g ) rm.idx <- c(rm.idx, OV.EQS.idx) # intercepts indicators OV.INT.idx <- which( PT$op == "~1" & PT$lhs %in% IND & PT$group == g ) rm.idx <- c(rm.idx, OV.INT.idx) # intercepts latent variables LV.INT.idx <- which( PT$op == "~1" & PT$lhs %in% lv.names.rm[[g]] & PT$group == g ) rm.idx <- c(rm.idx, LV.INT.idx) # thresholds TH.idx <- which( PT$op == "|" & PT$lhs %in% IND & PT$group == g ) rm.idx <- c(rm.idx, TH.idx) # scaling factors SC.idx <- which( PT$op == "~*~" & PT$lhs %in% IND & PT$group == g ) rm.idx <- c(rm.idx, SC.idx) # FIXME: ==, :=, <, >, == involving IND... } if(length(rm.idx) > 0L) { PT <- PT[-rm.idx,,drop = FALSE] } # clean up PT <- lav_partable_complete(PT) # check if we have enough indicators? # TODO PT } lav_partable_subset_structural_model <- function(PT = NULL, lavpta = NULL) { # PT PT <- as.data.frame(PT, stringsAsFactors = FALSE) # lavpta if(is.null(lavpta)) { lavpta <- lav_partable_attributes(PT) } # ngroups ngroups <- lavpta$ngroups # eqs.names eqs.x.names <- lavpta$vnames$eqs.x eqs.y.names <- lavpta$vnames$eqs.y # keep rows idx keep.idx <- integer(0L) # remove not-needed measurement models for(g in 1:ngroups) { # eqs.names eqs.names <- unique( c(lavpta$vnames$eqs.x[[g]], lavpta$vnames$eqs.y[[g]]) ) # regressions reg.idx <- which(PT$op == "~" & PT$group == g & PT$lhs %in% eqs.names & PT$rhs %in% eqs.names) # the variances var.idx <- which(PT$op == "~~" & PT$group == g & PT$lhs %in% eqs.names & PT$rhs %in% eqs.names & PT$lhs == PT$rhs) # optionally covariances (exo!) cov.idx <- which(PT$op == "~~" & PT$group == g & PT$lhs %in% eqs.names & PT$rhs %in% eqs.names & PT$lhs != PT$rhs) # means/intercepts int.idx <- which(PT$op == "~1" & PT$group == g & PT$lhs %in% eqs.names) keep.idx <- c(keep.idx, reg.idx, var.idx, cov.idx, int.idx) } PT <- PT[keep.idx, , drop = FALSE] # clean up PT <- lav_partable_complete(PT) PT } lavaan/R/lav_partable_flat.R0000644000176200001440000005007513052604111015502 0ustar liggesusers lav_partable_flat <- function(FLAT = NULL, blocks = "group", block.id = NULL, meanstructure = FALSE, int.ov.free = FALSE, int.lv.free = FALSE, orthogonal = FALSE, std.lv = FALSE, conditional.x = FALSE, fixed.x = TRUE, parameterization = "delta", auto.fix.first = FALSE, auto.fix.single = FALSE, auto.var = FALSE, auto.cov.lv.x = FALSE, auto.cov.y = FALSE, auto.th = FALSE, auto.delta = FALSE, varTable = NULL, group.equal = NULL, group.w.free = FALSE, ngroups = 1L) { categorical <- FALSE ### DEFAULT elements: parameters that are typically not specified by ### users, but should typically be considered, ### either free or fixed # extract `names' of various types of variables: lv.names <- lav_partable_vnames(FLAT, type="lv") # latent variables #lv.names.r <- lav_partable_vnames(FLAT, type="lv.regular") # regular latent variables lv.names.f <- lav_partable_vnames(FLAT, type="lv.formative") # formative latent variables ov.names <- lav_partable_vnames(FLAT, type="ov") # observed variables ov.names.x <- lav_partable_vnames(FLAT, type="ov.x") # exogenous x covariates ov.names.nox <- lav_partable_vnames(FLAT, type="ov.nox") lv.names.x <- lav_partable_vnames(FLAT, type="lv.x") # exogenous lv ov.names.y <- lav_partable_vnames(FLAT, type="ov.y") # dependent ov lv.names.y <- lav_partable_vnames(FLAT, type="lv.y") # dependent lv #lvov.names.y <- c(ov.names.y, lv.names.y) lvov.names.y <- c(lv.names.y, ov.names.y) # get 'ordered' variables, either from FLAT or varTable ov.names.ord1 <- lav_partable_vnames(FLAT, type="ov.ord") # check if we have "|" for exogenous variables if(length(ov.names.ord1) > 0L) { idx <- which(ov.names.ord1 %in% ov.names.x) if(length(idx) > 0L) { warning("lavaan WARNING: thresholds are defined for exogenous variables: ", paste(ov.names.ord1[idx], collapse=" ")) } } if(!is.null(varTable)) { ov.names.ord2 <- as.character(varTable$name[ varTable$type == "ordered" ]) # remove fixed.x variables idx <- which(ov.names.ord2 %in% ov.names.x) if(length(idx) > 0L) { ov.names.ord2 <- ov.names.ord2[-idx] } # remove those that do appear in the model syntax idx <- which(!ov.names.ord2 %in% ov.names) if(length(idx) > 0L) { ov.names.ord2 <- ov.names.ord2[-idx] } } else { ov.names.ord2 <- character(0) } #### FIXME!!!!! ORDER! ov.names.ord <- unique(c(ov.names.ord1, ov.names.ord2)) # if we have the "|" in the model syntax, check the number of thresholds if(!is.null(varTable) && length(ov.names.ord1) > 0L) { for(o in ov.names.ord1) { nth <- varTable$nlev[ varTable$name == o ] - 1L nth.in.partable <- sum(FLAT$op == "|" & FLAT$lhs == o) if(nth != nth.in.partable) { stop("lavaan ERROR: expected ", nth, " threshold(s) for variable ", sQuote(o), "; syntax contains ", nth.in.partable, "\n") } } } if(length(ov.names.ord) > 0L) categorical <- TRUE lhs <- rhs <- character(0) # 1. THRESHOLDS (based on varTable) # NOTE: - new in 0.5-18: ALWAYS include threshold parameters in partable, # but only free them if auto.th = TRUE # - only ov.names.ord2, because ov.names.ord1 are already in USER # and we only need to add 'default' parameters here nth <- 0L #if(auto.th && length(ov.names.ord2) > 0L) { if(length(ov.names.ord2) > 0L) { for(o in ov.names.ord2) { nth <- varTable$nlev[ varTable$name == o ] - 1L if(nth < 1L) next lhs <- c(lhs, rep(o, nth)) rhs <- c(rhs, paste("t", seq_len(nth), sep="")) } nth <- length(lhs) } # 2. default (residual) variances and covariances # a) (residual) VARIANCES (all ov's except exo, and all lv's) # NOTE: change since 0.5-17: we ALWAYS include the vars in the # parameter table; but only if auto.var = TRUE, we set them free #if(auto.var) { ov.var <- ov.names.nox # auto-remove ordinal variables #idx <- match(ov.names.ord, ov.var) #if(length(idx)) ov.var <- ov.var[-idx] lhs <- c(lhs, ov.var, lv.names) rhs <- c(rhs, ov.var, lv.names) #} # b) `independent` latent variable COVARIANCES (lv.names.x) if(auto.cov.lv.x && length(lv.names.x) > 1L) { tmp <- utils::combn(lv.names.x, 2) lhs <- c(lhs, tmp[1,]) # to fill upper.tri rhs <- c(rhs, tmp[2,]) } # c) `dependent` latent variables COVARIANCES (lv.y.idx + ov.y.lv.idx) if(auto.cov.y && length(lvov.names.y) > 1L) { tmp <- utils::combn(lvov.names.y, 2L) lhs <- c(lhs, tmp[1,]) # to fill upper.tri rhs <- c(rhs, tmp[2,]) } # d) exogenous x covariates: VARIANCES + COVARIANCES if(!conditional.x && (nx <- length(ov.names.x)) > 0L) { idx <- lower.tri(matrix(0, nx, nx), diag=TRUE) lhs <- c(lhs, rep(ov.names.x, each=nx)[idx]) # fill upper.tri rhs <- c(rhs, rep(ov.names.x, times=nx)[idx]) } # create 'op' (thresholds come first, then variances) op <- rep("~~", length(lhs)); op[seq_len(nth)] <- "|" # LATENT RESPONSE SCALES (DELTA) # NOTE: - new in 0.5-19: ALWAYS include scaling parameters in partable, # but only free them if auto.delta = TRUE (and parameterization # is "delta" #if(auto.delta && auto.th && length(ov.names.ord) > 0L && # # length(lv.names) > 0L && # (ngroups > 1L || any(FLAT$op == "~*~") || parameterization == "theta")) { if(length(ov.names.ord) > 0L) { lhs <- c(lhs, ov.names.ord) rhs <- c(rhs, ov.names.ord) op <- c(op, rep("~*~", length(ov.names.ord))) } # 3. INTERCEPTS if(meanstructure) { if(conditional.x) { ov.int <- ov.names.nox } else { ov.int <- ov.names } # auto-remove ordinal variables #idx <- which(ov.int %in% ov.names.ord) #if(length(idx)) ov.int <- ov.int[-idx] int.lhs <- c(ov.int, lv.names) lhs <- c(lhs, int.lhs) rhs <- c(rhs, rep("", length(int.lhs))) op <- c(op, rep("~1", length(int.lhs))) } # free group weights if(group.w.free) { lhs <- c(lhs, "group") rhs <- c(rhs, "w") op <- c(op, "%") } DEFAULT <- data.frame(lhs=lhs, op=op, rhs=rhs, mod.idx=rep(0L, length(lhs)), stringsAsFactors=FALSE) # 4. USER: user-specified elements lhs <- FLAT$lhs op <- FLAT$op rhs <- FLAT$rhs mod.idx <- FLAT$mod.idx lv.names <- lav_partable_vnames(FLAT, type="lv") # latent variables ov.names <- lav_partable_vnames(FLAT, type="ov") # observed variables # check order of covariances: we only fill the upper.tri! cov.idx <- which(op == "~~" & lhs != rhs) for(i in cov.idx) { lv.ov.names <- c(lv.names, ov.names) ### FIXME!!! OK?? lv.idx <- match(c(lhs[i], rhs[i]), lv.ov.names) if(lv.idx[1] > lv.idx[2]) { # swap! tmp <- lhs[i]; lhs[i] <- rhs[i]; rhs[i] <- tmp } if(lhs[i] %in% lv.names && rhs[i] %in% lv.names) { lv.idx <- match(c(lhs[i], rhs[i]), lv.names) if(lv.idx[1] > lv.idx[2]) { # swap! tmp <- lhs[i]; lhs[i] <- rhs[i]; rhs[i] <- tmp } } else if(lhs[i] %in% ov.names && rhs[i] %in% ov.names) { ov.idx <- match(c(lhs[i], rhs[i]), ov.names) if(ov.idx[1] > ov.idx[2]) { # swap! tmp <- lhs[i]; lhs[i] <- rhs[i]; rhs[i] <- tmp } } else { # mixed!! # we allow this since 0.4-10 lv.ov.names <- c(lv.names, ov.names) ### FIXME!!! OK?? lv.idx <- match(c(lhs[i], rhs[i]), lv.ov.names) if(lv.idx[1] > lv.idx[2]) { # swap! tmp <- lhs[i]; lhs[i] <- rhs[i]; rhs[i] <- tmp } } } USER <- data.frame(lhs=lhs, op=op, rhs=rhs, mod.idx=mod.idx, stringsAsFactors=FALSE) # check for duplicated elements in USER TMP <- USER[,1:3] idx <- which(duplicated(TMP)) if(length(idx) > 0L) { txt <- sapply(1:length(idx), function(i) { paste(" ", TMP[idx[i],"lhs"], TMP[idx[i], "op"], TMP[idx[i],"rhs"]) }) warning("duplicated elements in model syntax have been ignored:\n", paste(txt, collapse = "\n")) USER <- USER[-idx,] } # check for duplicated elements in DEFAULT # - FIXME: can we not avoid this somehow?? # - for example, if the user model includes 'x1 ~~ x1' # or 'x1 ~ 1' # - remove them from DEFAULT TMP <- rbind(DEFAULT[,1:3], USER[,1:3]) idx <- which(duplicated(TMP, fromLast=TRUE)) # idx should be in DEFAULT if(length(idx)) { for(i in idx) { flat.idx <- which(USER$lhs == DEFAULT$lhs[i] & USER$op == DEFAULT$op[i] & USER$rhs == DEFAULT$rhs[i]) if(length(flat.idx) != 1L) { cat("[lavaan DEBUG] idx in TMP: i = ", i, "\n"); print(TMP[i,]) cat("[lavaan DEBUG] idx in DEFAULT: i = ", i, "\n"); print(DEFAULT[i,]) cat("[lavaan DEBUG] flat.idx:"); print(flat.idx) } } DEFAULT <- DEFAULT[-idx,] } # now that we have removed all duplicated elements, we can construct # the LIST for a single group/block lhs <- c(USER$lhs, DEFAULT$lhs) op <- c(USER$op, DEFAULT$op) rhs <- c(USER$rhs, DEFAULT$rhs) user <- c(rep(1L, length(USER$lhs)), rep(0L, length(DEFAULT$lhs))) mod.idx <- c(USER$mod.idx, DEFAULT$mod.idx) free <- rep(1L, length(lhs)) ustart <- rep(as.numeric(NA), length(lhs)) #label <- paste(lhs, op, rhs, sep="") label <- rep(character(1), length(lhs)) exo <- rep(0L, length(lhs)) # 0a. if auto.th = FALSE, set fix the thresholds if(!auto.th) { th.idx <- which(op == "|" & user == 0L) free[th.idx] <- 0L } # 0b. if auto.var = FALSE, set the unspecified variances to zero if(!auto.var) { var.idx <- which(op == "~~" & lhs == rhs & user == 0L) ustart[var.idx] <- 0.0 free[var.idx] <- 0L } else { # 'formative' (residual) variances are set to zero by default var.idx <- which(op == "~~" & lhs == rhs & lhs %in% lv.names.f & user == 0L) ustart[var.idx] <- 0.0 free[var.idx] <- 0L } # 1. fix metric of regular latent variables if(std.lv) { # fix metric by fixing the variance of the latent variable lv.var.idx <- which(op == "~~" & lhs %in% lv.names & lhs == rhs) ustart[lv.var.idx] <- 1.0 free[lv.var.idx] <- 0L } if(auto.fix.first) { # fix metric by fixing the loading of the first indicator mm.idx <- which(op == "=~") first.idx <- mm.idx[which(!duplicated(lhs[mm.idx]))] ustart[first.idx] <- 1.0 free[first.idx] <- 0L } # 2. fix residual variance of single indicators to zero if(auto.var && auto.fix.single) { mm.idx <- which(op == "=~") T <- table(lhs[mm.idx]) if(any(T == 1L)) { # ok, we have a LV with only a single indicator lv.names.single <- names(T)[T == 1L] # get corresponding indicator if unique lhs.mm <- lhs[mm.idx]; rhs.mm <- rhs[mm.idx] single.ind <- rhs.mm[which(lhs.mm %in% lv.names.single & !(duplicated(rhs.mm) | duplicated(rhs.mm, fromLast=TRUE)))] # is the indicator unique? if(length(single.ind)) { var.idx <- which(op == "~~" & lhs %in% single.ind & rhs %in% single.ind & lhs == rhs & user == 0L) ustart[var.idx] <- 0.0 free[var.idx] <- 0L } } } # 3. orthogonal=TRUE? if(orthogonal) { # FIXME: only lv.x.idx for now lv.cov.idx <- which(op == "~~" & lhs %in% lv.names & lhs != rhs & user == 0L) ustart[lv.cov.idx] <- 0.0 free[lv.cov.idx] <- 0L } # 4. intercepts if(meanstructure) { if(categorical) { # zero intercepts/means ordinal variables ov.int.idx <- which(op == "~1" & lhs %in% ov.names.ord & user == 0L) ustart[ov.int.idx] <- 0.0 free[ov.int.idx] <- 0L } if(int.ov.free == FALSE) { # zero intercepts/means observed variables ov.int.idx <- which(op == "~1" & lhs %in% ov.names & user == 0L) ustart[ov.int.idx] <- 0.0 free[ov.int.idx] <- 0L } if(int.lv.free == FALSE) { # zero intercepts/means latent variables lv.int.idx <- which(op == "~1" & lhs %in% lv.names & user == 0L) ustart[lv.int.idx] <- 0.0 free[lv.int.idx] <- 0L } } # 5. handle exogenous `x' covariates if(length(ov.names.x) > 0) { # 1. variances/covariances exo.var.idx <- which(op == "~~" & rhs %in% ov.names.x & user == 0L) if(fixed.x) { ustart[exo.var.idx] <- as.numeric(NA) # should be overriden later! free[exo.var.idx] <- 0L exo[exo.var.idx] <- 1L } else if(conditional.x) { exo[exo.var.idx] <- 1L } # 2. intercepts exo.int.idx <- which(op == "~1" & lhs %in% ov.names.x & user == 0L) if(fixed.x) { ustart[exo.int.idx] <- as.numeric(NA) # should be overriden later! free[exo.int.idx] <- 0L exo[exo.int.idx] <- 1L } else if(conditional.x) { exo[exo.int.idx] <- 1L } # 3. regressions ov + lv exo.reg.idx <- which(op == "~" & lhs %in% c(lv.names, ov.names.nox) & rhs %in% ov.names.x) if(conditional.x) { exo[exo.reg.idx] <- 1L } } # 5b. residual variances of ordinal variables? if(length(ov.names.ord) > 0L) { ord.idx <- which(lhs %in% ov.names.ord & op == "~~" & lhs == rhs) ustart[ord.idx] <- 1L ## FIXME!! or 0?? (0 breaks ex3.12) free[ord.idx] <- 0L } # 5c latent response scales of ordinal variables? # by default, all fixed to 1.0 if(length(ov.names.ord) > 0L) { delta.idx <- which(op == "~*~") ustart[delta.idx] <- 1.0 free[delta.idx] <- 0L } # group proportions (group 1L) if(group.w.free) { group.idx <- which(lhs == "group" & op == "%") #if(ngroups > 1L) { free[ group.idx ] <- 1L ustart[ group.idx ] <- as.numeric(NA) #} else { # free[ group.idx ] <- 0L # ustart[ group.idx ] <- 0.0 # last group #} } # 6. multiple groups? group <- rep(1L, length(lhs)) if(ngroups > 1) { group <- rep(1:ngroups, each=length(lhs)) user <- rep(user, times=ngroups) lhs <- rep(lhs, times=ngroups) op <- rep(op, times=ngroups) rhs <- rep(rhs, times=ngroups) free <- rep(free, times=ngroups) ustart <- rep(ustart, times=ngroups) mod.idx <- rep(mod.idx, times=ngroups) label <- rep(label, times=ngroups) exo <- rep(exo, times=ngroups) # specific changes per group for(g in 2:ngroups) { # label # label[group == g] <- paste(label[group == 1], ".g", g, sep="") # free/fix intercepts if(meanstructure) { int.idx <- which(op == "~1" & lhs %in% lv.names & user == 0L & group == g) if(int.lv.free == FALSE && g > 1 && ("intercepts" %in% group.equal || "thresholds" %in% group.equal) && !("means" %in% group.equal) ) { free[ int.idx ] <- 1L ustart[ int.idx ] <- as.numeric(NA) } } # latent response scaling if(auto.delta && parameterization == "delta") { if(any(op == "~*~" & group == g) && ("thresholds" %in% group.equal)) { delta.idx <- which(op == "~*~" & group == g) free[ delta.idx ] <- 1L ustart[ delta.idx ] <- as.numeric(NA) } } else if(parameterization == "theta") { if(any(op == "~*~" & group == g) && ("thresholds" %in% group.equal)) { var.ord.idx <- which(op == "~~" & group == g & lhs %in% ov.names.ord & lhs == rhs) free[ var.ord.idx ] <- 1L ustart[ var.ord.idx ] <- as.numeric(NA) } } # group proportions if(group.w.free) { group.idx <- which(lhs == "group" & op == "%" & group == g) #if(g == ngroups) { # free[ group.idx ] <- 0L # ustart[ group.idx ] <- 0.0 # last group #} else { free[ group.idx ] <- 1L ustart[ group.idx ] <- as.numeric(NA) #} } } # g } # ngroups # construct LIST LIST <- list( id = seq_along(lhs), lhs = lhs, op = op, rhs = rhs, user = user) # add block column (before group/level columns) if(!is.null(block.id)) { # only one block LIST$block <- rep(block.id, length(lhs)) } else { # block is a combination of at least group, level, ... # for now, only group LIST$block <- group } # block columns (typically only group) for(block in blocks) { if(block == "group") { LIST[[ block ]] <- group } else { LIST[[block]] <- rep(0L, length(lhs)) } } # other columns LIST2 <- list(mod.idx = mod.idx, free = free, ustart = ustart, exo = exo, label = label) LIST <- c(LIST, LIST2) } lavaan/R/lav_samplestats.R0000644000176200001440000012037013054023606015244 0ustar liggesusers# constructor for the 'lavSampleStats' class # # initial version: YR 25/03/2009 # major revision: YR 5/11/2011: separate data.obs and sample statistics # YR 5/01/2016: add rescov, resvar, ... if conditional.x = TRUE lav_samplestats_from_data <- function(lavdata = NULL, DataX = NULL, DataeXo = NULL, DataOvnames = NULL, DataOvnamesx = NULL, DataOv = NULL, missing = "listwise", rescale = FALSE, missing.h1 = TRUE, estimator = "ML", mimic = "lavaan", meanstructure = FALSE, conditional.x = FALSE, fixed.x = FALSE, group.w.free = FALSE, WLS.V = NULL, NACOV = NULL, se = "standard", information = "expected", ridge = 1e-5, optim.method = "nlminb", zero.add = c(0.5, 0.0), zero.keep.margins = TRUE, zero.cell.warn = TRUE, debug = FALSE, verbose = FALSE) { # ridge default ridge.eps <- 0.0 # get X and Mp if(!is.null(lavdata)) { X <- lavdata@X; Mp <- lavdata@Mp ngroups <- lavdata@ngroups nlevels <- lavdata@nlevels nobs <- lavdata@nobs ov.names <- lavdata@ov.names ov.names.x <- lavdata@ov.names.x DataOv <- lavdata@ov eXo <- lavdata@eXo } else if(!is.null(DataX)) { stopifnot(is.list(DataX), is.matrix(DataX[[1L]])) X <- DataX eXo <- DataeXo ngroups <- length(X) nlevels <- 1L # for now Mp <- vector("list", length = ngroups) nobs <- vector("list", length = ngroups) for(g in 1:ngroups) { if(missing != "listwise") { Mp[[g]] <- lav_data_missing_patterns(X[[g]], sort.freq = FALSE, coverage = FALSE) } nobs[[g]] <- nrow(X[[g]]) } ov.names <- DataOvnames ov.names.x <- DataOvnamesx } else { stop("both lavdata and DataX argument are NULL") } # sample statistics per group # joint (y,x) cov <- vector("list", length = ngroups) var <- vector("list", length = ngroups) mean <- vector("list", length = ngroups) th <- vector("list", length = ngroups) th.idx <- vector("list", length = ngroups) th.names <- vector("list", length = ngroups) # residual (y | x) res.cov <- vector("list", length = ngroups) res.var <- vector("list", length = ngroups) res.th <- vector("list", length = ngroups) res.th.nox <- vector("list", length = ngroups) res.slopes <- vector("list", length = ngroups) res.int <- vector("list", length = ngroups) # fixed.x mean.x <- vector("list", length = ngroups) cov.x <- vector("list", length = ngroups) # binary/ordinal bifreq <- vector("list", length = ngroups) # extra sample statistics per group icov <- vector("list", length = ngroups) cov.log.det <- vector("list", length = ngroups) res.icov <- vector("list", length = ngroups) res.cov.log.det <- vector("list", length = ngroups) WLS.obs <- vector("list", length = ngroups) missing. <- vector("list", length = ngroups) missing.h1. <- vector("list", length = ngroups) missing.flag. <- FALSE zero.cell.tables <- vector("list", length = ngroups) YLp <- vector("list", length = ngroups) # group weights group.w <- vector("list", length = ngroups) # convenience? # FIXME! x.idx <- vector("list", length = ngroups) WLS.VD <- vector("list", length = ngroups) if(is.null(WLS.V)) { WLS.V <- vector("list", length = ngroups) WLS.V.user <- FALSE } else { if(!is.list(WLS.V)) { if(ngroups == 1L) { WLS.V <- list(WLS.V) } else { stop("lavaan ERROR: WLS.V argument should be a list of length ", ngroups) } } else { if(length(WLS.V) != ngroups) stop("lavaan ERROR: WLS.V assumes ", length(WLS.V), " groups; data contains ", ngroups, " groups") } # is WLS.V full? check first if(is.null(dim(WLS.V[[1]]))) { # we will assume it is the diagonal only WLS.VD <- WLS.V WLS.V <- lapply(WLS.VD, diag) } else { # create WLS.VD WLS.VD <- lapply(WLS.V, diag) } WLS.V.user <- TRUE # FIXME: check dimension of WLS.V!! } NACOV.compute <- TRUE if(is.null(NACOV)) { NACOV <- vector("list", length = ngroups) NACOV.user <- FALSE } else if(is.logical(NACOV)) { if(!NACOV) { NACOV.compute <- FALSE } else { NACOV.compute <- TRUE } NACOV.user <- FALSE NACOV <- vector("list", length = ngroups) } else { NACOV.compute <- FALSE if(!is.list(NACOV)) { if(ngroups == 1L) { NACOV <- list(NACOV) } else { stop("lavaan ERROR: NACOV argument should be a list of length ", ngroups) } } else { if(length(NACOV) != ngroups) stop("lavaan ERROR: NACOV assumes ", length(NACOV), " groups; data contains ", ngroups, " groups") } NACOV.user <- TRUE # FIXME: check dimension of NACOV!! } # compute some sample statistics per group for(g in 1:ngroups) { # check nobs if(nobs[[g]] < 2L) { if(nobs[[g]] == 0L) { stop("lavaan ERROR: data contains no observations", ifelse(ngroups > 1L, paste(" in group ", g, sep=""), "")) } else { stop("lavaan ERROR: data contains only a single observation", ifelse(ngroups > 1L, paste(" in group ", g, sep=""), "")) } } # exogenous x? nexo <- length(ov.names.x[[g]]) if(nexo) { stopifnot( nexo == NCOL(eXo[[g]]) ) # two cases: ov.names contains 'x' variables, or not if(conditional.x) { # ov.names.x are NOT in ov.names x.idx[[g]] <- length(ov.names[[g]]) + seq_len(nexo) } else { if(fixed.x) { # ov.names.x are a subset of ov.names x.idx[[g]] <- match(ov.names.x[[g]], ov.names[[g]]) stopifnot( !anyNA(x.idx[[g]]) ) } else { x.idx[[g]] <- integer(0L) } } } else { x.idx[[g]] <- integer(0L) conditional.x <- FALSE fixed.x <- FALSE } # group weight group.w[[g]] <- nobs[[g]] / sum(unlist(nobs)) # check if we have categorical data in this group categorical <- FALSE ov.types <- DataOv$type[ match(ov.names[[g]], DataOv$name) ] ov.levels <- DataOv$nlev[ match(ov.names[[g]], DataOv$name) ] CAT <- list() if("ordered" %in% ov.types) { if(nlevels > 1L) { stop("lavaan ERROR: multilevel + categorical not supported yet.") } else { categorical <- TRUE } } if(categorical) { if(estimator %in% c("ML","REML","PML","FML","MML","none")) { WLS.W <- FALSE } else { WLS.W <- TRUE } if(verbose) { cat("Estimating sample thresholds and correlations ... ") } CAT <- muthen1984(Data=X[[g]], ov.names=ov.names[[g]], ov.types=ov.types, ov.levels=ov.levels, ov.names.x=ov.names.x[[g]], eXo=eXo[[g]], group = g, # for error messages only missing = missing, # listwise or pairwise? WLS.W = WLS.W, optim.method = optim.method, zero.add = zero.add, zero.keep.margins = zero.keep.margins, zero.cell.warn = FALSE, zero.cell.tables = TRUE, verbose=debug) # empty cell tables zero.cell.tables[[g]] <- CAT$zero.cell.tables if(verbose) cat("done\n") } # fill in the other slots if(!is.null(eXo[[g]])) { cov.x[[g]] <- cov(eXo[[g]], use="pairwise") if(rescale) { # we 'transform' the sample cov (divided by n-1) # to a sample cov divided by 'n' cov.x[[g]] <- (nobs[[g]]-1)/nobs[[g]] * cov.x[[g]] } mean.x[[g]] <- colMeans(eXo[[g]]) } if(categorical) { # convenience th.idx[[g]] <- unlist(CAT$TH.IDX) th.names[[g]] <- unlist(CAT$TH.NAMES) if(conditional.x) { # residual var/cov res.var[[g]] <- unlist(CAT$VAR) res.cov[[g]] <- unname(CAT$COV) # th also contains the means of numeric variables res.th[[g]] <- unlist(CAT$TH) res.th.nox[[g]] <- unlist(CAT$TH.NOX) # for convenience, we store the intercept of numeric # variables in res.int NVAR <- NCOL(res.cov[[g]]) mean[[g]] <- res.int[[g]] <- numeric(NVAR) num.idx <- which(!seq_len(NVAR) %in% th.idx[[g]]) if(length(num.idx) > 0L) { NUM.idx <- which(th.idx[[g]] == 0L) mean[[g]][num.idx] <- res.th.nox[[g]][NUM.idx] res.int[[g]][num.idx] <- res.th[[g]][NUM.idx] } # slopes res.slopes[[g]] <- CAT$SLOPES } else { # var/cov var[[g]] <- unlist(CAT$VAR) cov[[g]] <- unname(CAT$COV) # th also contains the means of numeric variables th[[g]] <- unlist(CAT$TH) # mean (numeric only) NVAR <- NCOL(cov[[g]]) mean[[g]] <- numeric(NVAR) num.idx <- which(!seq_len(NVAR) %in% th.idx[[g]]) if(length(num.idx) > 0L) { NUM.idx <- which(th.idx[[g]] == 0L) mean[[g]][num.idx] <- th[[g]][NUM.idx] } } } else if(nlevels > 1L) { # continuous, multilevel setting # overwrite later with within cov? -- used for starting values cov[[g]] <- stats::cov(X[[g]], use = "pairwise") var[[g]] <- diag(cov[[g]]) # rescale cov by (N-1)/N? (only COV!) if(rescale) { # we 'transform' the sample cov (divided by n-1) # to a sample cov divided by 'n' cov[[g]] <- (nobs[[g]]-1)/nobs[[g]] * cov[[g]] } mean[[g]] <- colMeans(X[[g]], na.rm = TRUE) #YLp[[g]] <- lav_samplestats_cluster_patterns(Y = X[[g]], # Lp = Lp[[g]]) } else { # continuous, single-level case if(conditional.x) { # residual covariances! # FIXME: how to handle missing data here? Y <- cbind(X[[g]], eXo[[g]]) COV <- unname( stats::cov(Y, use="pairwise")) MEAN <- unname( colMeans(Y, na.rm=TRUE) ) var[[g]] <- diag(COV) cov[[g]] <- COV # rescale cov by (N-1)/N? (only COV!) if(rescale) { # we 'transform' the sample cov (divided by n-1) # to a sample cov divided by 'n' COV <- (nobs[[g]]-1)/nobs[[g]] * COV } cov[[g]] <- COV mean[[g]] <- MEAN A <- COV[-x.idx[[g]], -x.idx[[g]], drop=FALSE] B <- COV[-x.idx[[g]], x.idx[[g]], drop=FALSE] C <- COV[ x.idx[[g]], x.idx[[g]], drop=FALSE] # FIXME: make robust against singular C!!! res.cov[[g]] <- A - B %*% solve(C) %*% t(B) res.var[[g]] <- diag( cov[[g]] ) MY <- MEAN[-x.idx[[g]]]; MX <- MEAN[x.idx[[g]]] C3 <- rbind(c(1,MX), cbind(MX, C + tcrossprod(MX))) B3 <- cbind(MY, B + tcrossprod(MY,MX)) COEF <- unname(solve(C3, t(B3))) res.int[[g]] <- COEF[1,] # intercepts res.slopes[[g]] <- t(COEF[-1,,drop = FALSE]) # slopes } else if(missing == "two.stage" || missing == "robust.two.stage") { stopifnot(!conditional.x) # for now missing.flag. <- FALSE #!!! just use sample statistics missing.[[g]] <- lav_samplestats_missing_patterns(Y = X[[g]], Mp = Mp[[g]]) out <- lav_mvnorm_missing_h1_estimate_moments(Y = X[[g]], Mp = Mp[[g]], Yp = missing.[[g]], verbose = verbose) missing.h1.[[g]]$sigma <- out$Sigma missing.h1.[[g]]$mu <- out$Mu missing.h1.[[g]]$h1 <- out$fx # here, sample statistics == EM estimates cov[[g]] <- missing.h1.[[g]]$sigma var[[g]] <- diag(cov[[g]]) mean[[g]] <- missing.h1.[[g]]$mu } else { cov[[g]] <- stats::cov(X[[g]], use = "pairwise") var[[g]] <- diag(cov[[g]]) # rescale cov by (N-1)/N? (only COV!) if(rescale) { # we 'transform' the sample cov (divided by n-1) # to a sample cov divided by 'n' cov[[g]] <- (nobs[[g]]-1)/nobs[[g]] * cov[[g]] } mean[[g]] <- colMeans(X[[g]], na.rm=TRUE) } # icov and cov.log.det (but not if missing) if(missing != "ml") { out <- lav_samplestats_icov(COV = cov[[g]], ridge = ridge, x.idx = x.idx[[g]], ngroups = ngroups, g = g, warn = TRUE) icov[[g]] <- out$icov cov.log.det[[g]] <- out$cov.log.det # the same for res.cov if conditional.x = TRUE if(conditional.x) { out <- lav_samplestats_icov(COV = res.cov[[g]], ridge=ridge, x.idx = x.idx[[g]], ngroups = ngroups, g = g, warn = TRUE) res.icov[[g]] <- out$icov res.cov.log.det[[g]] <- out$cov.log.det } } } # WLS.obs if(nlevels == 1L) { WLS.obs[[g]] <- lav_samplestats_wls_obs(mean.g = mean[[g]], cov.g = cov[[g]], var.g = var[[g]], th.g = th[[g]], th.idx.g = th.idx[[g]], res.int.g = res.int[[g]], res.cov.g = res.cov[[g]], res.var.g = res.var[[g]], res.th.g = res.th[[g]], res.slopes.g = res.slopes[[g]], group.w.g = group.w[[g]], categorical = categorical, conditional.x = conditional.x, meanstructure = meanstructure, slopestructure = conditional.x, group.w.free = group.w.free) } # if missing = "fiml", sample statistics per pattern if(missing == "ml") { if(nlevels > 1L) { stop("lavaan ERROR: multilevel + fiml not supported yet") } if(conditional.x) { stop("lavaan ERROR: multilevel + conditional.x not supported yet") } stopifnot(!conditional.x) # for now missing.flag. <- TRUE missing.[[g]] <- lav_samplestats_missing_patterns(Y = X[[g]], Mp = Mp[[g]]) #cat("missing.h1 = "); print(missing.h1); cat("\n") if(missing.h1) { # estimate moments unrestricted model out <- lav_mvnorm_missing_h1_estimate_moments(Y = X[[g]], Mp = Mp[[g]], Yp = missing.[[g]], verbose = verbose) missing.h1.[[g]]$sigma <- out$Sigma missing.h1.[[g]]$mu <- out$Mu missing.h1.[[g]]$h1 <- out$fx } } # NACOV (=GAMMA) if(!NACOV.user && nlevels == 1L) { if(estimator == "ML" && !missing.flag. && NACOV.compute) { if(conditional.x) { Y <- Y } else { Y <- X[[g]] } NACOV[[g]] <- lav_samplestats_Gamma(Y = Y, x.idx = x.idx[[g]], # FALSE for now, until we use to # compute SB #fixed.x = fixed.x, conditional.x = conditional.x, meanstructure = meanstructure, slopestructure = conditional.x, Mplus.WLS = FALSE) } else if(estimator %in% c("WLS","DWLS","ULS")) { if(!categorical) { # sample size large enough? nvar <- ncol(X[[g]]) #if(conditional.x && nexo > 0L) { # nvar <- nvar - nexo #} pstar <- nvar*(nvar+1)/2 if(meanstructure) pstar <- pstar + nvar if(conditional.x && nexo > 0L) { pstar <- pstar + (nvar * nexo) } if(nrow(X[[g]]) < pstar) { if(ngroups > 1L) { txt <- paste(" in group: ", g, "\n", sep="") } else { txt <- "\n" } warning("lavaan WARNING: number of observations (", nrow(X[[g]]), ") too small to compute Gamma", txt) } if(conditional.x) { Y <- Y } else { Y <- X[[g]] } NACOV[[g]] <- lav_samplestats_Gamma(Y = Y, x.idx = x.idx[[g]], fixed.x = fixed.x, conditional.x = conditional.x, meanstructure = meanstructure, slopestructure = conditional.x, Mplus.WLS = (mimic=="Mplus")) } else { # categorical case NACOV[[g]] <- CAT$WLS.W * (nobs[[g]] - 1L) } } else if(estimator == "PML") { # no NACOV ... for now } } # WLS.V if(!WLS.V.user && nlevels == 1L) { if(estimator == "GLS") { # Note: we need the 'original' COV/MEAN/ICOV # sample statistics; not the 'residual' version WLS.V[[g]] <- lav_samplestats_Gamma_inverse_NT( ICOV = icov[[g]], COV = cov[[g]], MEAN = mean[[g]], x.idx = x.idx[[g]], fixed.x = fixed.x, conditional.x = conditional.x, meanstructure = meanstructure, slopestructure = conditional.x) if(mimic == "Mplus" && !conditional.x && meanstructure) { # bug in Mplus? V11 rescaled by nobs[[g]]/(nobs[[g]]-1) nvar <- NCOL(cov[[g]]) WLS.V[[g]][1:nvar, 1:nvar] <- WLS.V[[g]][1:nvar, 1:nvar, drop = FALSE] * nobs[[g]]/(nobs[[g]]-1) } } else if(estimator == "ML") { # no WLS.V here, since function of model-implied moments } else if(estimator %in% c("WLS","DWLS","ULS")) { if(!categorical) { if(estimator == "WLS") { if(!fixed.x) { # Gamma should be po before we invert ev <- eigen(NACOV[[g]], # symmetric=FALSE, only.values=TRUE)$values if(is.complex(ev) || any(Re(ev) < 0)) { stop("lavaan ERROR: Gamma (NACOV) matrix is not positive-definite") } WLS.V[[g]] <- inv.chol(NACOV[[g]]) } else { # fixed.x: we have zero cols/rows # ginv does the trick, but perhaps this is overkill # just removing the zero rows/cols, invert, and # fill back in the zero rows/cols would do it WLS.V[[g]] <- MASS::ginv(NACOV[[g]]) } } else if(estimator == "DWLS") { dacov <- diag(NACOV[[g]]) if(!all(is.finite(dacov))) { stop("lavaan ERROR: diagonal of Gamma (NACOV) contains non finite values") } if(fixed.x) { # structural zeroes! zero.idx <- which(dacov == 0.0) idacov <- 1/dacov idacov[zero.idx] <- 0.0 } else { idacov <- 1/dacov } WLS.V[[g]] <- diag(idacov, nrow=NROW(NACOV[[g]]), ncol=NCOL(NACOV[[g]])) WLS.VD[[g]] <- idacov } else if(estimator == "ULS") { #WLS.V[[g]] <- diag(length(WLS.obs[[g]])) WLS.VD[[g]] <- rep(1, length(WLS.obs[[g]])) } } else { if(estimator == "WLS") { WLS.V[[g]] <- inv.chol(CAT$WLS.W * nobs[[g]]) } else if(estimator == "DWLS") { dacov <- diag(CAT$WLS.W * nobs[[g]]) #WLS.V[[g]] <- diag(1/dacov, nrow=NROW(CAT$WLS.W), # ncol=NCOL(CAT$WLS.W)) WLS.VD[[g]] <- 1/dacov } else if(estimator == "ULS") { #WLS.V[[g]] <- diag(length(WLS.obs[[g]])) WLS.VD[[g]] <- rep(1, length(WLS.obs[[g]])) } } } else if(estimator == "PML" || estimator == "FML") { # no WLS.V here } # group.w.free if(!is.null(WLS.V[[g]]) && group.w.free) { # unweight!! #a <- group.w[[g]] * sum(unlist(nobs)) / nobs[[g]] # always 1!!! ### FIXME: this is consistent with expected information ### but why not group.w[[g]] * (1 - group.w[[g]])? #a <- 1 a <- group.w[[g]] * (1 - group.w[[g]]) * sum(unlist(nobs)) / nobs[[g]] # invert a <- 1/a WLS.V[[g]] <- lav_matrix_bdiag( matrix(a, 1, 1), WLS.V[[g]] ) } } } # ngroups # remove 'CAT', unless debug -- this is to save memory if(!debug) { CAT <- list() } # construct SampleStats object lavSampleStats <- new("lavSampleStats", # sample moments th = th, th.idx = th.idx, th.names = th.names, mean = mean, cov = cov, var = var, # residual (y | x) res.cov = res.cov, res.var = res.var, res.th = res.th, res.th.nox = res.th.nox, res.slopes = res.slopes, res.int = res.int, mean.x = mean.x, cov.x = cov.x, bifreq = bifreq, group.w = group.w, # convenience nobs = nobs, ntotal = sum(unlist(nobs)), ngroups = ngroups, x.idx = x.idx, # extra sample statistics icov = icov, cov.log.det = cov.log.det, res.icov = res.icov, res.cov.log.det = res.cov.log.det, ridge = ridge.eps, WLS.obs = WLS.obs, WLS.V = WLS.V, WLS.VD = WLS.VD, NACOV = NACOV, NACOV.user = NACOV.user, # cluster/levels YLp = YLp, # missingness missing.flag = missing.flag., missing = missing., missing.h1 = missing.h1., zero.cell.tables = zero.cell.tables ) # just a SINGLE warning if we have empty cells if(categorical && zero.cell.warn && any(sapply(zero.cell.tables, nrow) > 0L)) { nempty <- sum(sapply(zero.cell.tables, nrow)) warning("lavaan WARNING: ", nempty, " bivariate tables have empty cells; to see them, use:\n", " lavInspect(fit, \"zero.cell.tables\")") } lavSampleStats } lav_samplestats_from_moments <- function(sample.cov = NULL, sample.mean = NULL, sample.nobs = NULL, rescale = FALSE, ov.names = NULL, estimator = "ML", mimic = "lavaan", WLS.V = NULL, NACOV = NULL, ridge = 1e-5, meanstructure = FALSE, group.w.free = FALSE) { # ridge default ridge.eps <- 0.0 # matrix -> list if(!is.list(sample.cov)) sample.cov <- list(sample.cov) if(!is.null(sample.mean) && !is.list(sample.mean)) { # check if sample.mean is string (between single quotes) if(is.character(sample.mean)) { sample.mean <- char2num(sample.mean) } sample.mean <- list(sample.mean) } # number of groups ngroups <- length(sample.cov) # sample statistics per group cov <- vector("list", length = ngroups) var <- vector("list", length = ngroups) mean <- vector("list", length = ngroups) th <- vector("list", length = ngroups) th.idx <- vector("list", length = ngroups) th.names <- vector("list", length = ngroups) # residual (y | x) res.cov <- vector("list", length = ngroups) res.var <- vector("list", length = ngroups) res.th <- vector("list", length = ngroups) res.th.nox <- vector("list", length = ngroups) res.slopes <- vector("list", length = ngroups) res.int <- vector("list", length = ngroups) # fixed.x mean.x <- vector("list", length = ngroups) cov.x <- vector("list", length = ngroups) bifreq <- vector("list", length = ngroups) # extra sample statistics per group icov <- vector("list", length = ngroups) cov.log.det <- vector("list", length = ngroups) res.icov <- vector("list", length = ngroups) res.cov.log.det <- vector("list", length = ngroups) WLS.obs <- vector("list", length = ngroups) missing. <- vector("list", length = ngroups) missing.h1. <- vector("list", length = ngroups) missing.flag. <- FALSE zero.cell.tables <- vector("list", length = ngroups) YLp <- vector("list", length = ngroups) # group weights group.w <- vector("list", length = ngroups) x.idx <- vector("list", length = ngroups) # for now, we do NOT support categorical data (using moments only), # fixed.x, and conditional.x categorical <- FALSE fixed.x <- FALSE conditional.x <- FALSE WLS.VD <- vector("list", length = ngroups) if(is.null(WLS.V)) { WLS.V <- vector("list", length = ngroups) WLS.V.user <- FALSE } else { if(!is.list(WLS.V)) { if(ngroups == 1L) { WLS.V <- list(WLS.V) } else { stop("lavaan ERROR: WLS.V argument should be a list of length ", ngroups) } } else { if(length(WLS.V) != ngroups) stop("lavaan ERROR: WLS.V assumes ", length(WLS.V), " groups; data contains ", ngroups, " groups") } # is WLS.V full? check first if(is.null(dim(WLS.V[[1]]))) { # we will assume it is the diagonal only WLS.VD <- WLS.V WLS.V <- lapply(WLS.VD, diag) } else { # create WLS.VD WLS.VD <- lapply(WLS.V, diag) } WLS.V.user <- TRUE # FIXME: check dimension of WLS.V!! } if(is.null(NACOV)) { NACOV <- vector("list", length = ngroups) NACOV.user <- FALSE } else { if(!is.list(NACOV)) { if(ngroups == 1L) { NACOV <- list(NACOV) } else { stop("lavaan ERROR: NACOV argument should be a list of length ", ngroups) } } else { if(length(NACOV) != ngroups) stop("lavaan ERROR: NACOV assumes ", length(NACOV), " groups; data contains ", ngroups, " groups") } NACOV.user <- TRUE # FIXME: check dimension of NACOV!! } nobs <- as.list(as.integer(sample.nobs)) for(g in 1:ngroups) { # FIXME: if the user provides x.idx, we could use this!!! # exogenous x? x.idx[[g]] <- integer(0L) conditional.x <- FALSE fixed.x <- FALSE # group weight group.w[[g]] <- nobs[[g]] / sum(unlist(nobs)) tmp.cov <- sample.cov[[g]] # make sure that the matrix is fully symmetric (NEEDED?) T <- t(tmp.cov) tmp.cov[upper.tri(tmp.cov)] <- T[upper.tri(T)] # check dimnames if(!is.null(rownames(tmp.cov))) { cov.names <- rownames(tmp.cov) } else if(!is.null(colnames(tmp.cov))) { cov.names <- colnames(tmp.cov) } else { stop("lavaan ERROR: please provide row/col names ", "for the covariance matrix!\n") } # extract only the part we need (using ov.names) idx <- match(ov.names[[g]], cov.names) if(any(is.na(idx))) { cat("found: ", cov.names, "\n") cat("expected: ", ov.names[[g]], "\n") stop("lavaan ERROR: rownames of covariance matrix do not match ", "the model!\n", " found: ", paste(cov.names, collapse=" "), "\n", " expected: ", paste(ov.names[[g]], collapse=" "), "\n") } else { tmp.cov <- tmp.cov[idx,idx,drop=FALSE] } # strip dimnames dimnames(tmp.cov) <- NULL if(is.null(sample.mean)) { # assume zero mean vector tmp.mean <- numeric(ncol(tmp.cov)) } else { # extract only the part we need tmp.mean <- as.numeric(sample.mean[[g]][idx]) } cov[[g]] <- tmp.cov var[[g]] <- diag(tmp.cov) mean[[g]] <- tmp.mean # rescale cov by (N-1)/N? if(rescale) { # we 'transform' the sample cov (divided by n-1) # to a sample cov divided by 'n' cov[[g]] <- (nobs[[g]]-1)/nobs[[g]] * cov[[g]] } # FIXME: create res.cov if conditional.x = TRUE!!! stopifnot(!conditional.x) # icov and cov.log.det out <- lav_samplestats_icov(COV = cov[[g]], ridge = ridge, x.idx = x.idx[[g]], ngroups = ngroups, g = g, warn = TRUE) icov[[g]] <- out$icov; cov.log.det[[g]] <- out$cov.log.det # the same for res.cov if conditional.x = TRUE if(conditional.x) { out <- lav_samplestats_icov(COV = res.cov[[g]], ridge = ridge, x.idx = x.idx[[g]], ngroups = ngroups, g = g, warn = TRUE) res.icov[[g]] <- out$icov res.cov.log.det[[g]] <- out$cov.log.det } # WLS.obs WLS.obs[[g]] <- lav_samplestats_wls_obs(mean.g = mean[[g]], cov.g = cov[[g]], var.g = var[[g]], th.g = th[[g]], th.idx.g = th.idx[[g]], res.int.g = res.int[[g]], res.cov.g = res.cov[[g]], res.var.g = res.var[[g]], res.th.g = res.th[[g]], res.slopes.g = res.slopes[[g]], group.w.g = group.w[[g]], categorical = categorical, conditional.x = conditional.x, meanstructure = meanstructure, slopestructure = conditional.x, group.w.free = group.w.free) # NACOV # NACOV (=GAMMA) #if(!NACOV.user) { # nothing to do here; only used if provided by user #} # WLS.V if(!WLS.V.user) { if(estimator == "GLS") { # FIXME: in <0.5-21, we had #V11 <- icov[[g]] # if(mimic == "Mplus") { # is this a bug in Mplus? # V11 <- V11 * nobs[[g]]/(nobs[[g]]-1) # } WLS.V[[g]] <- lav_samplestats_Gamma_inverse_NT(ICOV = icov[[g]], COV = cov[[g]], MEAN = mean[[g]], x.idx = x.idx[[g]], fixed.x = fixed.x, conditional.x = conditional.x, meanstructure = meanstructure, slopestructure = conditional.x) } else if(estimator == "ULS") { WLS.V[[g]] <- diag(length(WLS.obs[[g]])) WLS.VD[[g]] <- rep(1, length(WLS.obs[[g]])) } else if(estimator == "WLS" || estimator == "DWLS") { if(is.null(WLS.V[[g]])) stop("lavaan ERROR: the (D)WLS estimator is only available with full data or with a user-provided WLS.V") } # group.w.free if(!is.null(WLS.V[[g]]) && group.w.free) { # FIXME!!! WLS.V[[g]] <- lav_matrix_bdiag( matrix(1, 1, 1), WLS.V[[g]] ) } } } # ngroups # construct SampleStats object lavSampleStats <- new("lavSampleStats", # sample moments th = th, th.idx = th.idx, th.names = th.names, mean = mean, cov = cov, var = var, # residual (y | x) res.cov = res.cov, res.var = res.var, res.th = res.th, res.th.nox = res.th.nox, res.slopes = res.slopes, res.int = res.int, # fixed.x mean.x = mean.x, cov.x = cov.x, # other bifreq = bifreq, group.w = group.w, # convenience nobs = nobs, ntotal = sum(unlist(nobs)), ngroups = ngroups, x.idx = x.idx, # extra sample statistics icov = icov, cov.log.det = cov.log.det, res.icov = res.icov, res.cov.log.det = res.cov.log.det, ridge = ridge.eps, WLS.obs = WLS.obs, WLS.V = WLS.V, WLS.VD = WLS.VD, NACOV = NACOV, NACOV.user = NACOV.user, # cluster/level YLp = YLp, # missingness missing.flag = missing.flag., missing = missing., missing.h1 = missing.h1., zero.cell.tables = zero.cell.tables ) lavSampleStats } # compute sample statistics, per missing pattern lav_samplestats_missing_patterns <- function(Y = NULL, Mp = NULL) { # coerce Y to matrix Y <- as.matrix(Y) if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y, sort.freq = FALSE, coverage = FALSE) } Yp <- vector("list", length = Mp$npatterns) # fill in pattern statistics for(p in seq_len(Mp$npatterns)) { # extract raw data for these cases RAW <- Y[Mp$case.idx[[p]], Mp$pat[p, ], drop = FALSE] # more than one case if (Mp$freq[p] > 1L) { MY <- colMeans(RAW) SY <- crossprod(RAW)/Mp$freq[p] - tcrossprod(MY) } # only a single observation else { SY <- 0 MY <- as.numeric(RAW) } # store sample statistics, var.idx and freq Yp[[p]] <- list(SY = SY, MY = MY, var.idx = Mp$pat[p,], freq = Mp$freq[p]) } Yp } # compute sample statistics, per cluster lav_samplestats_cluster_patterns <- function(Y = NULL, Lp = NULL) { # coerce Y to matrix Y <- as.matrix(Y) if(is.null(Lp)) { stop("lavaan ERROR: Lp is NULL") } YLp <- vector("list", length = length(Lp$cluster)) YLp } lavaan/R/lav_func_deriv.R0000644000176200001440000001510712616435254015042 0ustar liggesusers# numerical derivatives using complex numbers # see Squire & Trapp 1998, siam rev 40(1) 110-112 # or Ridout, MS (2009), the american statistician 63(1) 66-74 # it would seem that you can choose h to be fairly small, without # sacrifycing accuracy due to rounding errors # YR 17 July 2012 lav_func_gradient_complex <- function(func, x, h = .Machine$double.eps, ... , check.scalar = TRUE, fallback.simple = TRUE) { # check current point, see if it is a scalar function if(check.scalar) { f0 <- try(func(x*(0+1i), ...), silent = TRUE) if(inherits(f0, "try-error")) { if(fallback.simple) { dx <- lav_func_gradient_simple(func = func, x = x, h = sqrt(h), check.scalar = check.scalar, ...) return(dx) } else { stop("function does not support non-numeric (complex) argument") } } if(length(f0) != 1L) { stop("function is not scalar and returns more than one element") } } nvar <- length(x) # determine 'h' per element of x h <- pmax(h, abs(h*x)) # get exact h, per x tmp <- x + h h <- (tmp - x) # simple 'forward' method dx <- rep(as.numeric(NA), nvar) for(p in seq_len(nvar)) { dx[p] <- Im(func(x + h*1i*(seq.int(nvar) == p),...))/h[p] } dx } # as a backup, if func() is not happy about non-numeric arguments lav_func_gradient_simple <- function(func, x, h = sqrt(.Machine$double.eps), ... , check.scalar = TRUE) { # check current point, see if it is a scalar function if(check.scalar) { f0 <- func(x, ...) if(length(f0) != 1L) { stop("function is not scalar and returns more than one element") } } nvar <- length(x) # determine 'h' per element of x h <- pmax(h, abs(h*x)) # get exact h, per x tmp <- x + h h <- (tmp - x) # simple 'forward' method dx <- rep(as.numeric(NA), nvar) for(p in seq_len(nvar)) { dx[p] <- (func(x + h*(seq.int(nvar) == p), ...) - func(x,...))/h[p] } dx } lav_func_jacobian_complex <- function(func, x, h = .Machine$double.eps, ... , fallback.simple = TRUE) { f0 <- try(func(x*(0+1i), ...), silent = TRUE) if(inherits(f0, "try-error")) { if(fallback.simple) { dx <- lav_func_jacobian_simple(func = func, x = x, h = sqrt(h), ...) return(dx) } else { stop("function does not support non-numeric (complex) argument") } } nres <- length(f0) nvar <- length(x) # determine 'h' per element of x h <- pmax(h, abs(h*x)) # get exact h, per x tmp <- x + h h <- (tmp - x) # simple 'forward' method dx <- matrix(as.numeric(NA), nres, nvar) for(p in seq_len(nvar)) { dx[,p] <- Im(func(x + h*1i*(seq.int(nvar) == p), ...))/h[p] } dx } lav_func_jacobian_simple <- function(func, x, h = sqrt(.Machine$double.eps), ...) { f0 <- func(x, ...) nres <- length(f0) nvar <- length(x) # determine 'h' per element of x h <- pmax(h, abs(h*x)) # get exact h, per x tmp <- x + h h <- (tmp - x) # simple 'forward' method dx <- matrix(as.numeric(NA), nres, nvar) for(p in seq_len(nvar)) { dx[,p] <- (func(x + h*(seq.int(nvar) == p), ...) - func(x,...))/h[p] } dx } # this is based on the Ridout (2009) paper, and the code snippet for 'h4' lav_func_hessian_complex <- function(func, x, h = .Machine$double.eps, ... , check.scalar = TRUE) { # check current point, see if it is a scalar function if(check.scalar) { f0 <- try(func(x*(0+1i), ...), silent = TRUE) if(inherits(f0, "try-error")) { stop("function does not support non-numeric (complex) argument") } if(length(f0) != 1L) { stop("function is not scalar and returns more than one element") } } nvar <- length(x) # determine 'h' per element of x #delta1 <- pmax(h^(1/3), abs(h^(1/3)*x)) #delta2 <- pmax(h^(1/5), abs(h^(1/5)*x)) delta1 <- h^(1/3) delta2 <- h^(1/5) H <- matrix(as.numeric(NA), nvar, nvar) for(i in seq_len(nvar)) { for(j in 1:i) { if(i == j) { delta <- delta2 } else { delta <- delta1 } H[i,j] <- H[j,i] <- Im(func(x + delta*1i*(seq.int(nvar) == i)*x + delta*(seq.int(nvar) == j)*x, ...) - func(x + delta*1i*(seq.int(nvar) == i)*x - delta*(seq.int(nvar) == j)*x, ...)) / (2*delta*delta*x[i]*x[j]) } } H } # quick and dirty (FIXME!!!) way to get # surely there must be a more elegant way? # dCor/dCov lav_deriv_cov2cor <- function(COV = NULL, num.idx = NULL) { # dCor/dvar1 = - cov / (2*var1 * sqrt(var1) * sqrt(var2)) # dCor/dvar2 = - cov / (2*var2 * sqrt(var1) * sqrt(var2)) # dCor/dcov = 1/(sqrt(var1) * sqrt(var2)) # diagonal: diag(lav_matrix_vech(tcrossprod(1/delta))) nvar <- ncol(COV); pstar <- nvar*(nvar+1)/2 delta <- sqrt(diag(COV)) if(length(num.idx) > 0L) { delta[num.idx] <- 1.0 } A <- COV * -1/( 2*delta*delta*tcrossprod(delta) ) if(length(num.idx) > 0L) { A[num.idx,] <- 0; A[cbind(num.idx, num.idx)] <- 1 } A2 <- diag(nvar) %x% t(A) OUT <- diag( pstar ) diag(OUT) <- lav_matrix_vech(tcrossprod(1/delta)) var.idx <- lav_matrix_diagh_idx(nvar) DUP <- lav_matrix_duplication(nvar) OUT[,var.idx] <- t(DUP) %*% A2[,lav_matrix_diag_idx(nvar)] if(length(num.idx) > 0L) { var.idx <- var.idx[-num.idx] } OUT[var.idx, var.idx] <- 0 OUT } lav_deriv_cov2cor_numerical <- function(COV, num.idx=integer(0)) { compute.R <- function(x) { S <- lav_matrix_vech_reverse(x) diagS <- diag(S); delta <- 1/sqrt(diagS) if(length(num.idx) > 0L) { delta[num.idx] <- 1.0 } R <- diag(delta) %*% S %*% diag(delta) #R <- cov2cor(S) R.vec <- lav_matrix_vech(R, diagonal = TRUE) R.vec } x <- lav_matrix_vech(COV, diagonal = TRUE) dx <- lav_func_jacobian_complex(func=compute.R, x=x) dx } lavaan/R/lav_test_satorra_bentler.R0000644000176200001440000003466613042173466017154 0ustar liggesusers## ## FIXME: does not work yet if WLS.V is diagonal/numeric!!! ## lav_test_satorra_bentler <- function(lavobject = NULL, lavsamplestats = NULL, lavmodel = NULL, lavoptions = NULL, lavdata = NULL, TEST.unscaled = NULL, E.inv = NULL, Delta = NULL, WLS.V = NULL, Gamma = NULL, test = "satorra.bentler", mimic = "lavaan", method = "default", return.ugamma = FALSE) { TEST <- list() if(!is.null(lavobject)) { lavsamplestats <- lavobject@SampleStats lavmodel <- lavobject@Model lavoptions <- lavobject@Options lavpartable <- lavobject@ParTable lavdata <- lavobject@Data TEST$standard <- lavobject@test[[1]] } # check test if(!all(test %in% c("satorra.bentler", "scaled.shifted", "mean.var.adjusted"))) { warning("lavaan WARNING: test must be one of `satorra.bentler', `scaled.shifted' or `mean.var.adjusted'; will use `satorra.bentler' only") test <- "satorra.benter" } # check method if(method == "default") { method <- "original" } else if(!all(method %in% c("original", "orthogonal.complement", "ABA"))) { warning("lavaan WARNING: method must be one of `original', `orthogonal.complement'; will use `original'") method <- "original" } # do we have E.inv, Delta, WLS.V? if(is.null(E.inv) || is.null(Delta) || is.null(WLS.V)) { if(mimic == "Mplus" && lavoptions$estimator == "ML") { E <- lav_model_information_expected_MLM(lavmodel = lavmodel, augmented = FALSE, inverted = FALSE, lavsamplestats=lavsamplestats, extra = TRUE) } else { E <- lav_model_information_expected(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, extra = TRUE) } E.inv <- try(lav_model_information_augment_invert(lavmodel, information = E, inverted = TRUE), silent=TRUE) if(inherits(E.inv, "try-error")) { TEST <- list(test = test, stat = as.numeric(NA), stat.group = rep(as.numeric(NA), lavsamplestats@ngroups), df = TEST.unscaled$df, refdistr = TEST.unscaled$refdistr, pvalue = as.numeric(NA), scaling.factor = as.numeric(NA)) warning("lavaan WARNING: could not invert information matrix\n") return(TEST) } Delta <- attr(E, "Delta") WLS.V <- attr(E, "WLS.V") } stopifnot(is.matrix(WLS.V[[1]])) # Gamma if(is.null(Gamma)) { Gamma <- lavsamplestats@NACOV } # ngroups ngroups <- lavsamplestats@ngroups # mean and variance adjusted? Satterthwaite <- FALSE if(any(test %in% c("mean.var.adjusted", "scaled.shifted"))) { Satterthwaite <- TRUE } if(method == "original") { out <- lav_test_satorra_bentler_trace_original(Gamma = Gamma, Delta = Delta, WLS.V = WLS.V, E.inv = E.inv, ngroups = ngroups, nobs = lavsamplestats@nobs, ntotal = lavsamplestats@ntotal, return.ugamma = return.ugamma, Satterthwaite = Satterthwaite) } else if(method == "orthogonal.complement") { out <- lav_test_satorra_bentler_trace_complement(Gamma = Gamma, Delta = Delta, WLS.V = WLS.V, lavmodel = lavmodel, ngroups = ngroups, nobs = lavsamplestats@nobs, ntotal = lavsamplestats@ntotal, return.ugamma = return.ugamma, Satterthwaite = Satterthwaite) } else if(method == "ABA") { out <- lav_test_satorra_bentler_trace_ABA(Gamma = Gamma, Delta = Delta, WLS.V = WLS.V, E.inv = E.inv, ngroups = ngroups, nobs = lavsamplestats@nobs, ntotal = lavsamplestats@ntotal, return.ugamma = return.ugamma, Satterthwaite = Satterthwaite) } else { stop("lavaan ERROR: method `", method, "' not supported") } trace.UGamma <- out$trace.UGamma trace.UGamma2 <- out$trace.UGamma2 if("satorra.bentler" %in% test) { # same df df.scaled <- TEST$standard$df # scaling factor scaling.factor <- trace.UGamma/df.scaled if(scaling.factor < 0) scaling.factor <- as.numeric(NA) # scaled test statistic per group stat.group <- TEST$standard$stat.group / scaling.factor # scaled test statistic global stat <- sum(stat.group) TEST$satorra.benter <- list(test = "satorra.bentler", stat = stat, stat.group = stat.group, df = df.scaled, pvalue = 1 - pchisq(stat, df.scaled), trace.UGamma = trace.UGamma, scaling.factor = scaling.factor) } if("mean.var.adjusted" %in% test) { if(mimic == "Mplus") { df.scaled <- floor(trace.UGamma^2/trace.UGamma2 + 0.5) } else { # more precise, fractional df df.scaled <- trace.UGamma^2 / trace.UGamma2 } # scaling factor scaling.factor <- trace.UGamma/df.scaled if(scaling.factor < 0) scaling.factor <- as.numeric(NA) # scaled test statistic per group stat.group <- TEST$standard$stat.group / scaling.factor # scaled test statistic global stat <- sum(stat.group) TEST$mean.var.adjusted <- list(test = "mean.var.adjusted", stat = stat, stat.group = stat.group, df = df.scaled, pvalue = 1 - pchisq(stat, df.scaled), trace.UGamma = trace.UGamma, trace.UGamma2 = trace.UGamma2, scaling.factor = scaling.factor) } if("scaled.shifted" %in% test) { # this is the T3 statistic as used by Mplus 6 and higher # see 'Simple Second Order Chi-Square Correction' 2010 # www.statmodel.com # however, for multiple groups, Mplus reports something else # YR. 30 Aug 2012 -- after much trial and error, it turns out # that the shift-parameter (b) is weighted (while a is not)?? # however, the chisq.square per group are different; only # the sum seems ok?? # same df df.scaled <- TEST$standard$df # scaling factor fg <- unlist(lavsamplestats@nobs)/lavsamplestats@ntotal a <- sqrt(df.scaled/trace.UGamma2) shift.parameter <- fg * (df.scaled - a*trace.UGamma) scaling.factor <- 1/a if(scaling.factor < 0) scaling.factor <- as.numeric(NA) # # scaled test statistic per group stat.group <- (TEST$standard$stat.group * a + shift.parameter) # scaled test statistic global stat <- sum(stat.group) TEST$scaled.shifted <- list(test = "scaled.shifted", stat = stat, stat.group = stat.group, df = df.scaled, pvalue = 1 - pchisq(stat, df.scaled), trace.UGamma = trace.UGamma, trace.UGamma2 = trace.UGamma2, scaling.factor = scaling.factor, shift.parameter = shift.parameter) } if(return.ugamma) { TEST$UGamma <- out$UGamma } TEST } # using the `classical' formula # UG = Gamma * [V - V Delta E.inv Delta' V'] lav_test_satorra_bentler_trace_original <- function(Gamma = NULL, Delta = NULL, WLS.V = NULL, E.inv = NULL, ngroups = NULL, nobs = NULL, ntotal = NULL, return.ugamma = FALSE, Satterthwaite = FALSE) { # trace of UGamma per group trace.UGamma <- trace.UGamma2 <- rep(as.numeric(NA), ngroups) # per group for(g in 1:ngroups) { fg <- nobs[[g]]/ntotal Gamma.g <- Gamma[[g]] / fg ## ?? check this Delta.g <- Delta[[g]] WLS.Vg <- WLS.V[[g]] * fg U <- (WLS.Vg - WLS.Vg %*% Delta[[g]] %*% E.inv %*% t(Delta[[g]]) %*% WLS.Vg) trace.UGamma[g] <- sum(U * Gamma.g) UG <- NULL if(Satterthwaite || return.ugamma) { UG <- U %*% Gamma.g trace.UGamma2[g] <- sum(UG * t(UG)) } } # sum over groups trace.UGamma <- sum(trace.UGamma) trace.UGamma2 <- sum(trace.UGamma2) list(trace.UGamma = trace.UGamma, trace.UGamma2 = trace.UGamma2, UGamma = UG) } # using the orthogonal complement of Delta: Delta.c # UG = [ (Delta.c' W Delta.c)^{-1} (Delta.c' Gamma Delta.c) lav_test_satorra_bentler_trace_complement <- function(Gamma = NULL, Delta = NULL, WLS.V = NULL, lavmodel = NULL, ngroups = NULL, nobs = NULL, ntotal = NULL, return.ugamma = FALSE, Satterthwaite = FALSE) { # trace of UGamma per group trace.UGamma <- trace.UGamma2 <- rep(as.numeric(NA), ngroups) # per group for(g in 1:ngroups) { fg <- nobs[[g]]/ntotal Gamma.g <- Gamma[[g]] / fg ## ?? check this Delta.g <- Delta[[g]] WLS.Vg <- WLS.V[[g]] * fg # handle equality constraints if(lavmodel@eq.constraints) { Delta.g <- Delta.g %*% lavmodel@eq.constraints.K } # orthogonal complement of Delta.g Delta.c <- lav_matrix_orthogonal_complement(Delta.g) ### FIXME: compute WLS.W directly, instead of using solve(WLS.V) tmp1 <- solve(t(Delta.c) %*% solve(WLS.Vg) %*% Delta.c) tmp2 <- t(Delta.c) %*% Gamma.g %*% Delta.c trace.UGamma[g] <- sum(tmp1 * tmp2) UG <- NULL if(Satterthwaite || return.ugamma) { UG <- tmp1 %*% tmp2 trace.UGamma2[g] <- sum(UG * t(UG)) } } # sum over groups trace.UGamma <- sum(trace.UGamma) trace.UGamma2 <- sum(trace.UGamma2) list(trace.UGamma = trace.UGamma, trace.UGamma2 = trace.UGamma2, UGamma = UG) } # using the ABA form # UG = Gamma %*% [V - V %*% Delta %*% E.inv %*% tDelta %*% V] # = Gamma %*% V - Gamma %*% V %*% Delta %*% E.inv %*% tDelta %*% V # = Gamma %*% A1 - Gamma %*% A1 %*% Delta %*% E.inv %*% tDelta %*% A1 # (B1 = A1 %*% Gamma %*% A1) # = B1 %*% A1.inv - B1 %*% A1.inv %*% Delta %*% E.inv %*% tDelta %*% A1 # # if only the trace is needed, we can use reduce the rhs (after the minus) # to B1 %*% Delta %*% E.inv %*% tDelta (eliminating A1 and A1.inv) # we write it like this to allow for fixed.x covariates which affect A1 # and B1 lav_test_satorra_bentler_trace_ABA <- function(Gamma = NULL, Delta = NULL, WLS.V = NULL, E.inv = NULL, ngroups = NULL, nobs = NULL, ntotal = NULL, return.ugamma = FALSE, Satterthwaite = FALSE) { # trace of UGamma per group trace.UGamma <- trace.UGamma2 <- rep(as.numeric(NA), ngroups) # per group for(g in 1:ngroups) { fg <- nobs[[g]]/ntotal Gamma.g <- Gamma[[g]] / fg ## ?? check this Delta.g <- Delta[[g]] WLS.Vg <- WLS.V[[g]] * fg # diagonal WLS.V? we check for this since 0.5-17 diagonal <- FALSE if(is.matrix(WLS.V[[g]])) { A1 <- WLS.V[[g]] * fg B1 <- A1 %*% Gamma.g %*% A1 } else { diagonal <- TRUE a1 <- WLS.V[[g]] * fg # numeric vector! B1 <- Gamma.g * tcrossprod(a1) } # mask independent 'fixed-x' variables # note: this only affects the saturated H1 model #if(length(x.idx[[g]]) > 0L) { # nvar <- ncol(lavsamplestats@cov[[g]]) # idx <- eliminate.pstar.idx(nvar=nvar, el.idx=x.idx[[g]], # meanstructure=TRUE, type="all") # if(diagonal) { # a1 <- a1[idx] # } else { # A1 <- A1[idx,idx] # } # B1 <- B1[idx,idx] # Delta.g <- Delta.g[idx,] #} if(diagonal) { UG <- t((1/a1) * B1) - (B1 %*% Delta.g %*% tcrossprod(E.inv, Delta.g)) } else { A1.inv <- solve(A1) UG <- (B1 %*% A1.inv) - (B1 %*% Delta.g %*% tcrossprod(E.inv, Delta.g)) } trace.UGamma[g] <- sum(diag(UG)) if(Satterthwaite) { trace.UGamma2[g] <- sum(UG * t(UG)) } } # sum over groups trace.UGamma <- sum(trace.UGamma) trace.UGamma2 <- sum(trace.UGamma2) if(!return.ugamma) { UG <- NULL } list(trace.UGamma = trace.UGamma, trace.UGamma2 = trace.UGamma2, UGamma = UG) } lavaan/R/lav_print.R0000644000176200001440000005162513054025063014045 0ustar liggesusers## NOTE: ## round(1.2355, 3) = 1.236 ## but ## round(1.2345, 3) = 1.234 ## ## perhaps we should add 0.0005 or something to avoid this? print.lavaan.data.frame <- function(x, ..., nd=3) { ROW.NAMES <- rownames(x) y <- as.data.frame(lapply(x, function(x) { if(is.numeric(x)) round(x, nd) else x})) rownames(y) <- ROW.NAMES if(!is.null(attr(x, "header"))) { cat("\n", attr(x, "header"), "\n\n", sep = "") } print(y, ...) invisible(x) } print.lavaan.list <- function(x, ...) { y <- unclass(x) attr(y, "header") <- NULL header <- attr(x, "header") if(!is.null(header)) { if(is.character(header)) { cat("\n", header, "\n\n", sep = "") } else { print(header); cat("\n") } } print(y, ...) invisible(x) } # prints only lower triangle of a symmetric matrix print.lavaan.matrix.symmetric <- function(x, ..., nd=3) { # print only lower triangle of a symmetric matrix # this function was inspired by the `print.correlation' function # in package nlme y <- x; y <- unclass(y) ll <- lower.tri(x, diag=TRUE) y[ll] <- format(round(x[ll], digits=nd)); y[!ll] <- "" if (!is.null(colnames(x))) { colnames(y) <- abbreviate(colnames(x), minlength = nd + 3) } print(y, ..., quote = FALSE) invisible(x) } print.lavaan.matrix <- function(x, ..., nd=3) { y <- unclass(x) if (!is.null(colnames(x))) { colnames(y) <- abbreviate(colnames(x), minlength = nd + 3) } print( round(y, nd), ... ) invisible(x) } print.lavaan.vector <- function(x, ..., nd=3) { y <- unclass(x) #if(!is.null(names(x))) { # names(y) <- abbreviate(names(x), minlength = nd + 3) #} print( round(y, nd), ... ) invisible(x) } print.lavaan.character <- function(x) { cat(x) invisible(x) } print.lavaan.parameterEstimates <- function(x, ..., nd = 3L) { # format for numeric values num.format <- paste("%", max(8, nd + 5), ".", nd, "f", sep = "") char.format <- paste("%", max(8, nd + 5), "s", sep="") # output sections GSECTIONS <- c("Latent Variables", "Composites", "Regressions", "Covariances", "Intercepts", "Thresholds", "Variances", "Scales y*", "Group Weight", "R-Square") ASECTIONS <- c("Defined Parameters", "Constraints") cat("\nParameter Estimates:\n\n") # info about standard errors (if we have x$se only) # 1. information # 2. se # 3. bootstrap requested/successful draws if(!is.null(x$se)) { # 1. t0.txt <- sprintf(" %-40s", "Information") tmp.txt <- attr(x, "information") t1.txt <- sprintf(" %10s", paste(toupper(substring(tmp.txt,1,1)), substring(tmp.txt,2), sep="")) cat(t0.txt, t1.txt, "\n", sep="") # 2. t0.txt <- sprintf(" %-31s", "Standard Errors") tmp.txt <- attr(x, "se") t1.txt <- sprintf(" %19s", paste(toupper(substring(tmp.txt,1,1)), substring(tmp.txt,2), sep="")) cat(t0.txt, t1.txt, "\n", sep="") # 3. if(attr(x, "se") == "bootstrap" && !is.null(attr(x, "bootstrap"))) { t0.txt <- sprintf(" %-40s", "Number of requested bootstrap draws") t1.txt <- sprintf(" %10i", attr(x, "bootstrap")) cat(t0.txt, t1.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Number of successful bootstrap draws") t1.txt <- sprintf(" %10i", attr(x, "bootstrap.successful")) cat(t0.txt, t1.txt, "\n", sep="") } # 4. if(attr(x, "missing") %in% c("two.stage", "robust.two.stage")) { t0.txt <- sprintf(" %-35s", "Information saturated (h1) model") tmp.txt <- attr(x, "h1.information") t1.txt <- sprintf(" %15s", paste(toupper(substring(tmp.txt,1,1)), substring(tmp.txt,2), sep="")) cat(t0.txt, t1.txt, "\n", sep="") if(attr(x, "information") == "observed") { t0.txt <- sprintf(" %-35s", "Observed information based on") tmp.txt <- attr(x, "observed.information") t1.txt <- sprintf(" %15s", paste(toupper(substring(tmp.txt,1,1)), substring(tmp.txt,2), sep="")) cat(t0.txt, t1.txt, "\n", sep="") } } } # number of groups if(is.null(x$group)) { ngroups <- 1L x$group <- rep(1L, length(x$lhs)) } else { ngroups <- lav_partable_ngroups(x) } # number of levels if(is.null(x$level)) { nlevels <- 1L x$level <- rep(1L, length(x$lhs)) } else { nlevels <- lav_partable_nlevels(x) } # block column if(is.null(x$block)) { x$block <- rep(1L, length(x$lhs)) } # round to 3 digits after the decimal point y <- as.data.frame( lapply(x, function(x) { if(is.numeric(x)) { sprintf(num.format, x) } else { x } }), stringsAsFactors = FALSE) # always remove /block/level/group/op/rhs/label/exo columns y$op <- y$group <- y$rhs <- y$label <- y$exo <- NULL y$block <- y$level <- NULL # if standardized, remove std.nox column (space reasons only) y$std.nox <- NULL # convert to character matrix m <- as.matrix(format.data.frame(y, na.encode = FALSE, justify = "right")) # use empty row names rownames(m) <- rep("", nrow(m)) # handle se == 0.0 if(!is.null(x$se)) { se.idx <- which(x$se == 0) if(length(se.idx) > 0L) { m[se.idx, "se"] <- "" if(!is.null(x$z)) { m[se.idx, "z"] <- "" } if(!is.null(x$pvalue)) { m[se.idx, "pvalue"] <- "" } } # handle se == NA se.idx <- which(is.na(x$se)) if(length(se.idx) > 0L) { if(!is.null(x$z)) { m[se.idx, "z"] <- "" } if(!is.null(x$pvalue)) { m[se.idx, "pvalue"] <- "" } } } # handle fmi if(!is.null(x$fmi)) { se.idx <- which(x$se == 0) if(length(se.idx) > 0L) { m[se.idx, "fmi"] <- "" } not.idx <- which(x$op %in% c(":=", "<", ">", "==")) if(length(not.idx) > 0L) { if(!is.null(x$fmi)) { m[not.idx, "fmi"] <- "" } } } # for blavaan, handle Post.SD and PSRF if(!is.null(x$Post.SD)) { se.idx <- which(x$Post.SD == 0) if(length(se.idx) > 0L) { m[se.idx, "Post.SD"] <- "" if(!is.null(x$psrf)) { m[se.idx, "psrf"] <- "" } if(!is.null(x$PSRF)) { m[se.idx, "PSRF"] <- "" } } # handle psrf for defined parameters not.idx <- which(x$op %in% c(":=", "<", ">", "==")) if(length(not.idx) > 0L) { if(!is.null(x$psrf)) { m[not.idx, "psrf"] <- "" } if(!is.null(x$PSRF)) { m[not.idx, "PSRF"] <- "" } } } # rename some column names colnames(m)[ colnames(m) == "lhs" ] <- "" colnames(m)[ colnames(m) == "op" ] <- "" colnames(m)[ colnames(m) == "rhs" ] <- "" colnames(m)[ colnames(m) == "est" ] <- "Estimate" colnames(m)[ colnames(m) == "se" ] <- "Std.Err" colnames(m)[ colnames(m) == "z" ] <- "z-value" colnames(m)[ colnames(m) == "pvalue" ] <- "P(>|z|)" colnames(m)[ colnames(m) == "std.lv" ] <- "Std.lv" colnames(m)[ colnames(m) == "std.all"] <- "Std.all" colnames(m)[ colnames(m) == "std.nox"] <- "Std.nox" colnames(m)[ colnames(m) == "prior" ] <- "Prior" colnames(m)[ colnames(m) == "fmi" ] <- "FMI" # format column names colnames(m) <- sprintf(char.format, colnames(m)) # exceptions for blavaan: Post.Mean (width = 9), Prior (width = 14) if(!is.null(x$Post.Mean)) { tmp <- gsub("[ \t]+", "", colnames(m), perl=TRUE) # reformat "Post.Mean" column col.idx <- which(tmp == "Post.Mean") if(length(col.idx) > 0L) { tmp.format <- paste("%", max(9, nd + 5), "s", sep="") colnames(m)[col.idx] <- sprintf(tmp.format, colnames(m)[col.idx]) m[,col.idx] <- sprintf(tmp.format, m[,col.idx]) } # reformat "Prior" column col.idx <- which(tmp == "Prior") if(length(col.idx) > 0L) { MAX <- max( nchar( m[,col.idx] ) ) + 1L tmp.format <- paste("%", max(MAX, nd + 5), "s", sep="") colnames(m)[col.idx] <- sprintf(tmp.format, colnames(m)[col.idx]) m[,col.idx] <- sprintf(tmp.format, m[,col.idx]) } } b <- 0L # group-specific sections for(g in 1:ngroups) { # block number b <- b + 1L # ov/lv names ov.names <- lavNames(x, "ov", block = b) lv.names <- lavNames(x, "lv", block = b) # group header if(ngroups > 1L) { group.label <- attr(x, "group.label") cat("\n\n") cat("Group ", g, " [", group.label[g], "]:\n", sep="") } for(l in 1:nlevels) { # level header if(nlevels > 1L) { level.label <- attr(x, "level.label") cat("\n\n") cat("Level ", l, " [", level.label[l], "]:\n", sep="") } # group-specific sections for(s in GSECTIONS) { if(s == "Latent Variables") { row.idx <- which( x$op == "=~" & !x$lhs %in% ov.names & x$block == b) if(length(row.idx) == 0L) next m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx]) } else if(s == "Composites") { row.idx <- which( x$op == "<~" & x$block == b) if(length(row.idx) == 0L) next m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx]) } else if(s == "Regressions") { row.idx <- which( x$op == "~" & x$block == b) if(length(row.idx) == 0L) next m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx]) } else if(s == "Covariances") { row.idx <- which(x$op == "~~" & x$lhs != x$rhs & !x$exo & x$block == b) if(length(row.idx) == 0L) next # make distinction between residual and plain y.names <- unique( c(lavNames(x, "eqs.y"), lavNames(x, "ov.ind")) ) PREFIX <- rep("", length(row.idx)) PREFIX[ x$rhs[row.idx] %in% y.names ] <- " ." m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx], PREFIX = PREFIX) #m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx]) } else if(s == "Intercepts") { row.idx <- which(x$op == "~1" & !x$exo & x$block == b) if(length(row.idx) == 0L) next # make distinction between intercepts and means y.names <- unique( c(lavNames(x, "eqs.y"), lavNames(x, "ov.ind")) ) PREFIX <- rep("", length(row.idx)) PREFIX[ x$lhs[row.idx] %in% y.names ] <- " ." m[row.idx,1] <- .makeNames(x$lhs[row.idx], x$label[row.idx], PREFIX = PREFIX) #m[row.idx,1] <- .makeNames(x$lhs[row.idx], x$label[row.idx]) } else if(s == "Thresholds") { row.idx <- which(x$op == "|" & x$block == b) if(length(row.idx) == 0L) next m[row.idx,1] <- .makeNames(paste(x$lhs[row.idx], "|", x$rhs[row.idx], sep=""), x$label[row.idx]) } else if(s == "Variances") { row.idx <- which(x$op == "~~" & x$lhs == x$rhs & !x$exo & x$block == b) if(length(row.idx) == 0L) next # make distinction between residual and plain y.names <- unique( c(lavNames(x, "eqs.y"), lavNames(x, "ov.ind")) ) PREFIX <- rep("", length(row.idx)) PREFIX[ x$rhs[row.idx] %in% y.names ] <- " ." m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx], PREFIX = PREFIX) } else if(s == "Scales y*") { row.idx <- which(x$op == "~*~" & x$block == b) if(length(row.idx) == 0L) next m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx]) } else if(s == "Group Weight") { row.idx <- which(x$lhs == "group" & x$op == "%" & x$block == b) if(length(row.idx) == 0L) next m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx]) } else if(s == "R-Square") { row.idx <- which(x$op == "r2" & x$block == b) if(length(row.idx) == 0L) next m[row.idx,1] <- .makeNames(x$rhs[row.idx], x$label[row.idx]) } else { row.idx <- integer(0L) } # do we need special formatting for this section? # three types: # - regular (nothing to do, except row/colnames) # - R-square # - Latent Variables (and Composites), Regressions and Covariances # 'bundle' the output per lhs element # bundling if(s %in% c("Latent Variables", "Composites", "Regressions", "Covariances")) { nel <- length(row.idx) M <- matrix("", nrow = nel*2, ncol = ncol(m)) colnames(M) <- colnames(m) rownames(M) <- rep("", NROW(M)) #colnames(M)[1] <- sprintf("%-17s", paste(s, ":", sep = "")) LHS <- paste(x$lhs[row.idx], x$op[row.idx]) lhs.idx <- seq(1, nel*2L, 2L) rhs.idx <- seq(1, nel*2L, 2L) + 1L if(s == "Covariances") { # make distinction between residual and plain y.names <- unique( c(lavNames(x, "eqs.y"), lavNames(x, "ov.ind")) ) PREFIX <- rep("", length(row.idx)) PREFIX[ x$lhs[row.idx] %in% y.names ] <- "." } else { PREFIX <- rep("", length(LHS)) } M[lhs.idx, 1] <- sprintf("%1s%-15s", PREFIX, LHS) M[rhs.idx, ] <- m[row.idx,] # avoid duplicated LHS labels if(nel > 1L) { del.idx <- integer(0) old.lhs <- "" for(i in 1:nel) { if(LHS[i] == old.lhs) { del.idx <- c(del.idx, lhs.idx[i]) } old.lhs <- LHS[i] } if(length(del.idx) > 0L) { M <- M[-del.idx,,drop=FALSE] } } cat("\n", s, ":\n", sep = "") #cat("\n") print(M, quote = FALSE) # R-square } else if(s == "R-Square") { M <- m[row.idx,1:2,drop=FALSE] colnames(M) <- colnames(m)[1:2] rownames(M) <- rep("", NROW(M)) #colnames(M)[1] <- sprintf("%-17s", paste(s, ":", sep = "")) cat("\n", s, ":\n", sep = "") #cat("\n") print(M, quote = FALSE) # Regular } else { #M <- rbind(matrix("", nrow = 1L, ncol = ncol(m)), # m[row.idx,]) M <- m[row.idx,,drop=FALSE] colnames(M) <- colnames(m) rownames(M) <- rep("", NROW(M)) #colnames(M)[1] <- sprintf("%-17s", paste(s, ":", sep = "")) cat("\n", s, ":\n", sep = "") #cat("\n") print(M, quote = FALSE) } } } # groups } # levels # asections for(s in ASECTIONS) { if(s == "Defined Parameters") { row.idx <- which(x$op == ":=") m[row.idx,1] <- .makeNames(x$lhs[row.idx], "") M <- m[row.idx,,drop=FALSE] colnames(M) <- colnames(m) } else if(s == "Constraints") { row.idx <- which(x$op %in% c("==", "<", ">")) if(length(row.idx) == 0) next m[row.idx,1] <- .makeConNames(x$lhs[row.idx], x$op[row.idx], x$rhs[row.idx], nd = nd) m[row.idx,2] <- sprintf(num.format, abs(x$est[row.idx])) M <- m[row.idx,1:2,drop=FALSE] colnames(M) <- c("", sprintf(char.format, "|Slack|")) } else { row.idx <- integer(0L) } if(length(row.idx) == 0L) { next } rownames(M) <- rep("", NROW(M)) #colnames(M)[1] <- sprintf("%-17s", paste(s, ":", sep = "")) #cat("\n") cat("\n", s, ":\n", sep = "") print(M, quote = FALSE) } cat("\n") invisible(m) } .makeNames <- function(NAMES, LABELS, PREFIX = NULL) { W <- 14 if(is.null(PREFIX)) { PREFIX <- rep("", length(NAMES)) } multiB <- FALSE if(any(nchar(NAMES) != nchar(NAMES, "bytes"))) multiB <- TRUE if(any(nchar(LABELS) != nchar(LABELS, "bytes"))) multiB <- TRUE # labels? l.idx <- which(nchar(LABELS) > 0L) if(length(l.idx) > 0L) { if(!multiB) { LABELS <- abbreviate(LABELS, 4) LABELS[l.idx] <- paste(" (", LABELS[l.idx], ")", sep="") MAX.L <- max(nchar(LABELS)) NAMES <- abbreviate(NAMES, minlength = (W - MAX.L), strict = TRUE) } else { # do not abbreviate anything (eg in multi-byte locales) MAX.L <- 4L } NAMES <- sprintf(paste("%-", (W - MAX.L), "s%", MAX.L, "s", sep=""), NAMES, LABELS) } else { if(!multiB) { NAMES <- abbreviate(NAMES, minlength = W, strict = TRUE) } else { NAMES <- sprintf(paste("%-", W, "s", sep=""), NAMES) } } char.format <- paste("%3s%-", W, "s", sep = "") sprintf(char.format, PREFIX, NAMES) } .makeConNames <- function(lhs, op, rhs, nd) { nd <- max(nd, 3) W <- 41 + (nd - 3)*3 nel <- length(lhs) if(length(nel) == 0L) return(character(0)) NAMES <- character(nel) for(i in 1:nel) { if(rhs[i] == "0" && op[i] == ">") { con.string <- paste(lhs[i], " - 0", sep="") } else if(rhs[i] == "0" && op[i] == "<") { con.string <- paste(rhs[i], " - (", lhs[i], ")", sep="") } else if(rhs[i] != "0" && op[i] == ">") { con.string <- paste(lhs[i], " - (", rhs[i], ")", sep="") } else if(rhs[i] != "0" && op[i] == "<") { con.string <- paste(rhs[i], " - (", lhs[i], ")", sep="") } else if(rhs[i] == "0" && op[i] == "==") { con.string <- paste(lhs[i], " - 0", sep="") } else if(rhs[i] != "0" && op[i] == "==") { con.string <- paste(lhs[i], " - (", rhs[i], ")", sep="") } con.string <- abbreviate(con.string, W, strict = TRUE) char.format <- paste(" %-", W, "s", sep = "") NAMES[i] <- sprintf(char.format, con.string) } NAMES } summary.lavaan.fsr <- function(object, ...) { dotdotdot <- list(...) if(!is.null(dotdotdot$nd)) { nd <- dotdotdot$nd } else { nd <- 3L } print.lavaan.fsr(x = object, nd = nd) } print.lavaan.fsr <- function(x, ..., nd = 3L) { y <- unclass(x) # print header if(!is.null(y$header)) { cat(y$header) cat("\n") } # print PE print.lavaan.parameterEstimates(y$PE, ..., nd = nd) invisible(y) } lavaan/R/lav_partable_utils.R0000644000176200001440000001122013053256625015717 0ustar liggesusers# guess number of blocks from a partable lav_partable_nblocks <- function(partable) { if(is.null(partable$block)) { nblocks <- 1L } else { # always integers tmp <- partable$block[ partable$block > 0L ] # non-zero only nblocks <- length(unique(na.omit(tmp))) # all combinations } nblocks } # what are the block values (not necessarly 1..nb) lav_partable_block_values <- function(partable) { if(is.null(partable$block)) { block.values <- 1L } else { # always integers tmp <- partable$block[ partable$block > 0L ] # non-zero only block.values <- unique(na.omit(tmp)) # could be, eg, '2' only } block.values } # guess number of groups from a partable lav_partable_ngroups <- function(partable) { if(is.null(partable$group)) { ngroups <- 1L } else if(is.numeric(partable$group)) { tmp <- partable$group[ partable$group > 0L ] ngroups <- length(unique(na.omit(tmp))) } else { # character tmp <- partable$group[nchar(partable$group) > 0L] ngroups <- length(unique(na.omit(tmp))) } ngroups } # guess number of levels from a partable lav_partable_nlevels <- function(partable) { if(is.null(partable$level)) { nlevels. <- 1L } else if(is.numeric(partable$level)) { tmp <- partable$level[ partable$level > 0L ] nlevels. <- length(unique(na.omit(tmp))) } else { # character tmp <- partable$level[nchar(partable$level) > 0L] nlevels. <- length(unique(na.omit(tmp))) } nlevels. } # number of sample statistics per block lav_partable_ndat <- function(partable) { # global meanstructure <- any(partable$op == "~1") fixed.x <- any(partable$exo > 0L & partable$free == 0L) conditional.x <- any(partable$exo > 0L & partable$op == "~") categorical <- any(partable$op == "|") if(categorical) { meanstructure <- TRUE } # blocks nblocks <- lav_partable_nblocks(partable) ndat <- integer(nblocks) for(b in seq_len(nblocks)) { # how many observed variables in this block? if(conditional.x) { ov.names <- lav_partable_vnames(partable, "ov.nox", block = b) } else { ov.names <- lav_partable_vnames(partable, "ov", block = b) } nvar <- length(ov.names) # pstar pstar <- nvar*(nvar+1)/2 if(meanstructure) { pstar <- pstar + nvar } ndat[b] <- pstar # correction for fixed.x? if(!conditional.x && fixed.x) { ov.names.x <- lav_partable_vnames(partable, "ov.x", block = b) nvar.x <- length(ov.names.x) pstar.x <- nvar.x * (nvar.x + 1) / 2 if(meanstructure) { pstar.x <- pstar.x + nvar.x } ndat[b] <- ndat[b] - pstar.x } # correction for ordinal data? if(categorical) { ov.names.x <- lav_partable_vnames(partable, "ov.x", block = b) nexo <- length(ov.names.x) ov.ord <- lav_partable_vnames(partable, "ov.ord", block = b) nvar.ord <- length(ov.ord) th <- lav_partable_vnames(partable, "th", block = b) nth <- length(th) # no variances ndat[b] <- ndat[b] - nvar.ord # no means ndat[b] <- ndat[b] - nvar.ord # but additional thresholds ndat[b] <- ndat[b] + nth # add slopes ndat[b] <- ndat[b] + (nvar * nexo) } # correction for conditional.x not categorical if(conditional.x && !categorical) { ov.names.x <- lav_partable_vnames(partable, "ov.x", block = b) nexo <- length(ov.names.x) # add slopes ndat[b] <- ndat[b] + (nvar * nexo) } # correction for group proportions? group.idx <- which(partable$lhs == "group" & partable$op == "%" & partable$block == b) if(length(group.idx) > 0L) { # ndat <- ndat + (length(group.idx) - 1L) # G - 1 (sum to one) ndat[b] <- ndat[b] + 1L # poisson: each cell a parameter } } # blocks # sum over all blocks sum(ndat) } # total number of free parameters lav_partable_npar <- function(partable) { # we only assume non-zero values npar <- length( which(partable$free > 0L) ) npar } # global degrees of freedom: ndat - npar lav_partable_df <- function(partable) { npar <- lav_partable_npar(partable) ndat <- lav_partable_ndat(partable) # degrees of freedom df <- ndat - npar as.integer(df) } lavaan/R/lav_dataframe.R0000644000176200001440000001021612465075714014641 0ustar liggesusers# data.frame utilities, or how to avoid copying (parts) of the data # Y.R. 11 April 2013 # this is to replace sapply(frame, function(x) class(x)[1]) # try (in R3.0.0): # N <- 100000 # frame <- data.frame(a=factor(sample(1:5, size=N, replace=TRUE)), # b=factor(sample(1:5, size=N, replace=TRUE)), # c=rnorm(N)) # system.time(replicate(1000, sapply(frame, function(x) class(x)[1]))) # # user system elapsed # # 1.223 0.000 1.222 # system.time(replicate(1000, lav_dataframe_check_vartype(frame))) # # user system elapsed # # 0.093 0.000 0.093 lav_dataframe_check_vartype <- function(frame = NULL, ov.names = character(0)) { if(missing(ov.names)) { var.idx <- seq_len(ncol(frame)) } else { var.idx <- match(unlist(ov.names, use.names = FALSE), names(frame)) } nvar <- length(var.idx) out <- character(nvar) for(i in seq_len(nvar)) { out[i] <- class(frame[[var.idx[i]]])[1L] # watch out for matrix type with 1 column if(out[i] == "matrix" && ncol(frame[[var.idx[i]]]) == 1L) { out[i] <- "numeric" } } out } # check if any of the variables in frame are ordered or not lav_dataframe_check_ordered <- function(frame = NULL, ov.names = character(0)) { if(missing(ov.names)) { var.idx <- seq_len(ncol(frame)) } else { var.idx <- match(unlist(ov.names, use.names = FALSE), names(frame)) } nvar <- length(var.idx) for(i in seq_len(nvar)) { if(class(frame[[var.idx[i]]])[1L] == "ordered") return(TRUE) } FALSE } # construct vartable, but allow 'ordered/factor' argument to intervene # we do NOT change the data.frame lav_dataframe_vartable <- function(frame = NULL, ov.names = NULL, ov.names.x = NULL, ordered = NULL, factor = NULL, as.data.frame. = FALSE) { if(missing(ov.names)) { var.names <- names(frame) } else { ov.names <- unlist(ov.names, use.names=FALSE) ov.names.x <- unlist(ov.names.x, use.names=FALSE) var.names <- unique(c(ov.names, ov.names.x)) } nvar <- length(var.names) var.idx <- match(var.names, names(frame)) nobs <- integer(nvar) type <- character(nvar) user <- integer(nvar) exo <- ifelse(var.names %in% ov.names.x, 1L, 0L) mean <- numeric(nvar); var <- numeric(nvar) nlev <- integer(nvar); lnam <- character(nvar) for(i in seq_len(nvar)) { x <- frame[[var.idx[i]]] type.x <- class(x)[1L] # correct for matrix with 1 column if(type.x == "matrix" && ncol(x) == 1L) { type.x <- "numeric" } # correct for integers if(type.x == "integer") { type.x <- "numeric" } # handle ordered/factor if(!is.null(ordered) && var.names[i] %in% ordered) { type.x <- "ordered" lev <- sort(unique(x)) # we assume integers! nlev[i] <- length(lev) lnam[i] <- paste(lev, collapse="|") user[i] <- 1L } else if(!is.null(factor) && var.names[i] %in% factor) { type.x <- "factor" lev <- sort(unique(x)) # we assume integers! nlev[i] <- length(lev) lnam[i] <- paste(lev, collapse="|") user[i] <- 1L } else { nlev[i] <- nlevels(x) lnam[i] <- paste(levels(x), collapse="|") } type[i] <- type.x nobs[i] <- sum(!is.na(x)) mean[i] <- ifelse(type.x == "numeric", mean(x, na.rm=TRUE), as.numeric(NA)) var[i] <- ifelse(type.x == "numeric", var(x, na.rm=TRUE), as.numeric(NA)) } VAR <- list(name=var.names, idx=var.idx, nobs=nobs, type=type, exo=exo, user=user, mean=mean, var=var, nlev=nlev, lnam=lnam) if(as.data.frame.) { VAR <- as.data.frame(VAR, stringsAsFactors=FALSE, row.names=1:length(VAR$name)) class(VAR) <- c("lavaan.data.frame", "data.frame") } VAR } lavaan/R/xxx_prelav.R0000644000176200001440000000642013044156012014236 0ustar liggesusers# prelav # # a program not unlike prelis (unfinished) # # YR 19 Sept 2013 prelav <- function(object = NULL, ordered = NULL, ov.names.x = NULL, group = NULL, missing = "pairwise", output = list(MA ="none", # moment matrix SR = FALSE, # transformed raw data RA = FALSE, # transformed raw data SA = FALSE, # asymptotic covariance matrix AC = FALSE, # asymptotic covariance matrix SV = FALSE, # asymptotic variances TH = FALSE, # thresholds ME = FALSE, # means ND = 3L, # number of decimals PK = FALSE, # Mardia's (1970) mult kurtosis WP = FALSE, # wide print XB = FALSE, # omit bivariate freq tables XT = FALSE, # omit omit test statistics XM = FALSE # omit tests of mult normality ), mimic="LISREL") { # empty output? if(length(output) == 0L) { return( list() ) } else { OU <- toupper(substr(names(output), 1, 2)) } # parse data NAMES <- names(object) if(!is.null(group)) { NAMES <- NAMES[- match(group, NAMES)] } lav.data <- lavData(data = object, group = group, ov.names = NAMES, ordered = ordered, ov.names.x = ov.names.x, lavoptions = list(missing = missing)) lav.stats <- lav_samplestats_from_data(lavdata = lav.data, missing = missing, rescale = FALSE, estimator = "ML", mimic = mimic, meanstructure = TRUE, conditional.x = FALSE, group.w.free = FALSE, missing.h1 = FALSE, WLS.V = NULL, NACOV = NULL, ridge = 1e-5, debug = FALSE, verbose = FALSE) out <- list(lav.data=lav.data, lav.stats=lav.stats, OU=OU) # output class(out) <- c("prelav", "list") out } # S3 method print.prelav <- function(x, ..., nd=3) { # shorcuts lav.data <- x$lav.data lav.stats <- x$lav.stats ngroups <- lav.data@ngroups # header version <- read.dcf(file=system.file("DESCRIPTION", package="lavaan"), fields="Version") cat("This is prelav ", version, ".\n", sep="") # data information + missingness cat("\n") print(lav.data) # univariate information cat("\n") cat("Univariate information:\n") # varTable! print(as.data.frame(lav.data@ov)) invisible(x) } lavaan/R/zzz_OLDNAMES.R0000644000176200001440000000073512642701437014172 0ustar liggesusers# keep 'old' names for some function names that have been used # (or are still being used) by external packages lavJacobianD <- lav_func_jacobian_simple lavJacobianC <- lav_func_jacobian_complex lavGradientC <- lav_func_gradient_complex # Myrsini getHessian <- lav_object_inspect_hessian getVariability <- lav_object_inspect_firstorder # rsem computeExpectedInformation <- lav_model_information_expected # only for simsem .... getParameterLabels <- lav_partable_labels lavaan/R/lav_pearson.R0000644000176200001440000001207112505525446014362 0ustar liggesusers# pearson product moment correlation: both Y1 and Y2 are NUMERIC # (summed) loglikelihood pp_logl <- function(Y1, Y2, eXo=NULL, rho=NULL, fit.y1=NULL, fit.y2=NULL) { lik <- pp_lik(Y1=Y1, Y2=Y2, eXo=eXo, rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) if(all(lik > 0, na.rm = TRUE)) logl <- sum(log(lik), na.rm = TRUE) else logl <- -Inf logl } # individual likelihoods pp_lik <- function(Y1, Y2, eXo=NULL, rho=NULL, fit.y1=NULL, fit.y2=NULL) { stopifnot(!is.null(rho)) if(is.null(fit.y1)) fit.y1 <- lavOLS(Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavOLS(Y2, X=eXo) if(missing(Y1)) Y1 <- fit.y1$y if(missing(Y2)) Y2 <- fit.y2$y var.y1 <- fit.y1$theta[fit.y1$var.idx] var.y2 <- fit.y2$theta[fit.y2$var.idx] eta.y1 <- fit.y1$yhat eta.y2 <- fit.y2$yhat # lik cov.y12 <- rho*sqrt(var.y1)*sqrt(var.y2) sigma <- matrix(c(var.y1,cov.y12,cov.y12,var.y2), 2L, 2L) #lik <- numeric(length(Y1)) #for(i in 1:length(Y1)) # lik[i] <- dmvnorm(c(Y1[i],Y2[i]), mean=c(eta.y1[i], eta.y2[i]), # sigma=sigma) lik <- dmnorm( cbind(Y1,Y2), mean=cbind(eta.y1, eta.y2), varcov=sigma) lik } # loglikelihood (x-version) pp_logl_x <- function(x, Y1, Y2, eXo=NULL) { nexo <- ifelse(is.null(eXo), 0L, ncol(eXo)); S <- seq_len stopifnot(length(x) == (5L + 2*nexo)) rho = x[1L] mu.y1 = x[2L] var.y1 = x[3L] mu.y2 = x[4L] var.y2 = x[5L] sl.y1 = x[5L + S(nexo)] sl.y2 = x[5L + nexo + S(nexo)] fit.y1 <- lavOLS(y=Y1, X=eXo) fit.y1$theta[fit.y1$int.idx] <- mu.y1 fit.y1$theta[fit.y1$var.idx] <- var.y1 fit.y1$theta[fit.y1$slope.idx] <- sl.y1 fit.y1$lik() fit.y2 <- lavOLS(y=Y2, X=eXo) fit.y2$theta[fit.y2$int.idx] <- mu.y2 fit.y2$theta[fit.y2$var.idx] <- var.y2 fit.y2$theta[fit.y2$slope.idx] <- sl.y2 fit.y2$lik() pp_logl(Y1=Y1, Y2=Y2, eXo=eXo, rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) } # pearson correlation (just for fun); solution is cor(Y1,Y2) or cor(e1,e2) pp_cor_TS <- function(Y1, Y2, eXo=NULL, fit.y1=NULL, fit.y2=NULL, method="nlminb", verbose=FALSE) { stopifnot(method == "nlminb") if(is.null(fit.y1)) fit.y1 <- lavOLS(Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavOLS(Y2, X=eXo) if(missing(Y1)) Y1 <- fit.y1$y if(missing(Y2)) Y2 <- fit.y2$y if(missing(eXo) && length(fit.y1$slope.idx) > 0L) eXo <- fit.y2$X[,-1] Y1c <- Y1 - fit.y1$yhat Y2c <- Y2 - fit.y2$yhat var.y1 <- fit.y1$theta[fit.y1$var.idx]; sd.y1 <- sqrt(var.y1) var.y2 <- fit.y2$theta[fit.y2$var.idx]; sd.y2 <- sqrt(var.y2) objectiveFunction <- function(x) { rho = tanh(x[1L]) logl <- pp_logl(rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) -logl } gradientFunction <- function(x) { rho = tanh(x[1L]) R <- (1 - rho*rho) z <- (Y1c*Y1c)/var.y1 - 2*rho*Y1c*Y2c/(sd.y1*sd.y2) + (Y2c*Y2c)/var.y2 dx.rho <- sum( rho/R + (Y1c*Y2c/(sd.y1*sd.y2*R) - z*rho/(R*R)), na.rm = TRUE ) # tanh + minimize -dx.rho * 1/(cosh(x)*cosh(x)) } # FIXME:: TODO!!! hessianFunction <- function(x) { } # starting value (just to see if the gradient works) rho.init <- 0 # minimize out <- nlminb(start=atanh(rho.init), objective=objectiveFunction, gradient=gradientFunction, scale=10, control=list(trace=ifelse(verbose,1L,0L), rel.tol=1e-10)) if(out$convergence != 0L) warning("no convergence") rho <- tanh(out$par) rho } pp_cor_scores <- function(Y1, Y2, eXo=NULL, rho=NULL, fit.y1=NULL, fit.y2=NULL) { stopifnot(!is.null(rho)) if(is.null(fit.y1)) fit.y1 <- lavOLS(Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavOLS(Y2, X=eXo) if(missing(Y1)) Y1 <- fit.y1$y if(missing(Y2)) Y2 <- fit.y2$y if(missing(eXo) && length(fit.y1$slope.idx) > 0L) eXo <- fit.y2$X[,-1] R <- (1 - rho*rho) Y1c <- Y1 - fit.y1$yhat Y2c <- Y2 - fit.y2$yhat var.y1 <- fit.y1$theta[fit.y1$var.idx]; sd.y1 <- sqrt(var.y1) var.y2 <- fit.y2$theta[fit.y2$var.idx]; sd.y2 <- sqrt(var.y2) # mu.y1 dx.mu.y1 <- (2*Y1c/var.y1 - 2*rho*Y2c/(sd.y1*sd.y2))/(2*R) # mu.y2 dx.mu.y2 <- - (2*rho*Y1c/(sd.y1*sd.y2) - 2*Y2c/var.y2)/(2*R) # var.y1 dx.var.y1 <- - (0.5/var.y1 - ((Y1c*Y1c)/(var.y1*var.y1) - rho*Y1c*Y2c/(var.y1*sd.y1*sd.y2))/(2*R)) # var.y2 dx.var.y2 <- -(0.5/var.y2 + (rho*Y1c*Y2c/(var.y2*sd.y1*sd.y2) - (Y2c*Y2c)/(var.y2*var.y2))/(2*R)) # sl.y1 dx.sl.y1 <- NULL if(length(fit.y1$slope.idx) > 0L) dx.sl.y1 <- dx.mu.y1 * eXo # sl.y2 dx.sl.y2 <- NULL if(length(fit.y2$slope.idx) > 0L) dx.sl.y2 <- dx.mu.y2 * eXo # rho z <- (Y1c*Y1c)/var.y1 - 2*rho*Y1c*Y2c/(sd.y1*sd.y2) + (Y2c*Y2c)/var.y2 dx.rho <- rho/R + (Y1c*Y2c/(sd.y1*sd.y2*R) - z*rho/(R*R)) list(dx.mu.y1=dx.mu.y1, dx.var.y1=dx.var.y1, dx.mu.y2=dx.mu.y2, dx.var.y2=dx.var.y2, dx.sl.y1=dx.sl.y1, dx.sl.y2=dx.sl.y2, dx.rho=dx.rho) } lavaan/R/lav_model_objective.R0000644000176200001440000003137513054013425016042 0ustar liggesusers# model objective lav_model_objective <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL, lavdata = NULL, lavcache = NULL, verbose = FALSE, forcePD = TRUE, debug = FALSE) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST # shortcut for data.type == "none" or estimator == "none" if(lavmodel@estimator == "none" || length(lavsamplestats@cov) == 0L) { fx <- as.numeric(NA) attr(fx, "fx.group") <- rep(as.numeric(NA), lavsamplestats@ngroups) return(fx) } meanstructure <- lavmodel@meanstructure estimator <- lavmodel@estimator categorical <- lavmodel@categorical group.w.free <- lavmodel@group.w.free fixed.x <- lavmodel@fixed.x conditional.x <- lavmodel@conditional.x num.idx <- lavmodel@num.idx th.idx <- lavmodel@th.idx # do we need WLS.est? if(estimator %in% c("ULS", "GLS", "WLS", "DWLS", "NTRLS")) { WLS.est <- lav_model_wls_est(lavmodel = lavmodel, GLIST = GLIST) #, #cov.x = lavsamplestats@cov.x) if(estimator == "NTRLS") { Sigma.hat <- computeSigmaHat(lavmodel = lavmodel, GLIST = GLIST, extra = TRUE) Mu.hat <- computeMuHat(lavmodel = lavmodel, GLIST = GLIST) } if(debug) print(WLS.est) } else if(estimator %in% c("ML", "PML", "FML", "REML")) { # compute moments for all groups #if(conditional.x) { # Sigma.hat <- computeSigmaHatJoint(lavmodel = lavmodel, # GLIST = GLIST, lavsamplestats = lavsamplestats, # extra = (estimator %in% c("ML", "REML","NTRLS"))) #} else { Sigma.hat <- computeSigmaHat(lavmodel = lavmodel, GLIST = GLIST, extra = (estimator %in% c("ML", "REML","NTRLS"))) #} if(estimator == "REML") { LAMBDA <- computeLAMBDA(lavmodel = lavmodel, GLIST = GLIST) } # ridge? if( lavsamplestats@ridge > 0.0 ) { for(g in 1:lavsamplestats@ngroups) { diag(Sigma.hat[[g]]) <- diag(Sigma.hat[[g]]) + lavsamplestats@ridge } } if(debug) print(Sigma.hat) if(meanstructure && !categorical) { #if(conditional.x) { # Mu.hat <- computeMuHatJoint(lavmodel = lavmodel, GLIST = GLIST, # lavsamplestats = lavsamplestats) #} else { Mu.hat <- computeMuHat(lavmodel = lavmodel, GLIST = GLIST) #} } else if(categorical) { TH <- computeTH(lavmodel = lavmodel, GLIST = GLIST) } if(conditional.x) { PI <- computePI(lavmodel = lavmodel, GLIST = GLIST) } if(estimator == "PML") { if(lavmodel@nexo > 0L) { PI <- computePI(lavmodel = lavmodel) } else { PI <- vector("list", length = lavsamplestats@ngroups) } } if(group.w.free) { GW <- computeGW(lavmodel = lavmodel, GLIST = GLIST) } } else if(estimator == "MML") { TH <- computeTH( lavmodel = lavmodel, GLIST = GLIST) THETA <- computeTHETA(lavmodel = lavmodel, GLIST = GLIST) GW <- computeGW( lavmodel = lavmodel, GLIST = GLIST) } fx <- 0.0 fx.group <- numeric( lavsamplestats@ngroups ) logl.group <- rep(as.numeric(NA), lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { # incomplete data and fiml? if(lavsamplestats@missing.flag && estimator != "Bayes") { if(estimator == "ML") { # FIML if(!attr(Sigma.hat[[g]], "po")) return(Inf) group.fx <- estimator.FIML(Sigma.hat=Sigma.hat[[g]], Mu.hat=Mu.hat[[g]], Yp=lavsamplestats@missing[[g]], h1=lavsamplestats@missing.h1[[g]]$h1, N=lavsamplestats@nobs[[g]]) } else { stop("this estimator: `", estimator, "' can not be used with incomplete data and the missing=\"ml\" option") } } else if(estimator == "ML" || estimator == "Bayes") { # complete data # ML and friends if(lavdata@nlevels > 1L) { group.fx <- 0 } else if(conditional.x) { group.fx <- estimator.ML_res( Sigma.hat = Sigma.hat[[g]], Mu.hat = Mu.hat[[g]], PI = PI[[g]], res.cov = lavsamplestats@res.cov[[g]], res.int = lavsamplestats@res.int[[g]], res.slopes = lavsamplestats@res.slopes[[g]], res.cov.log.det = lavsamplestats@res.cov.log.det[[g]], cov.x = lavsamplestats@cov.x[[g]], mean.x = lavsamplestats@mean.x[[g]]) } else { group.fx <- estimator.ML( Sigma.hat = Sigma.hat[[g]], Mu.hat = Mu.hat[[g]], data.cov = lavsamplestats@cov[[g]], data.mean = lavsamplestats@mean[[g]], data.cov.log.det = lavsamplestats@cov.log.det[[g]], meanstructure = meanstructure) } } else if(estimator == "GLS" || estimator == "WLS" || estimator == "NTRLS") { # full weight matrix if(estimator == "GLS" || estimator == "WLS") { WLS.V <- lavsamplestats@WLS.V[[g]] } else if(estimator == "NTRLS") { #WLS.V <- lav_samplestats_Gamma_inverse_NT( # ICOV = attr(Sigma.hat[[g]],"inv")[,,drop=FALSE], # COV = Sigma.hat[[g]][,,drop=FALSE], # MEAN = Mu.hat[[g]], # x.idx = c(10000,10001), ### FIXME!!!! # fixed.x = fixed.x, # conditional.x = conditional.x, # meanstructure = meanstructure, # slopestructure = conditional.x) WLS.V <- lav_mvnorm_information_expected( Sigma = Sigma.hat[[g]], meanstructure = lavmodel@meanstructure) # DEBUG!!!! #WLS.V <- 2*WLS.V } group.fx <- estimator.WLS(WLS.est = WLS.est[[g]], WLS.obs = lavsamplestats@WLS.obs[[g]], WLS.V = WLS.V) attr(group.fx, "WLS.est") <- WLS.est[[g]] } else if(estimator == "DWLS" || estimator == "ULS") { # diagonal weight matrix group.fx <- estimator.DWLS(WLS.est = WLS.est[[g]], WLS.obs = lavsamplestats@WLS.obs[[g]], WLS.VD = lavsamplestats@WLS.VD[[g]]) attr(group.fx, "WLS.est") <- WLS.est[[g]] } else if(estimator == "PML") { # Pairwise maximum likelihood group.fx <- estimator.PML(Sigma.hat = Sigma.hat[[g]], TH = TH[[g]], PI = PI[[g]], th.idx = th.idx[[g]], num.idx = num.idx[[g]], X = lavdata@X[[g]], eXo = lavdata@eXo[[g]], lavcache = lavcache[[g]], missing = lavdata@missing) logl.group[g] <- attr(group.fx, "logl") } else if(estimator == "FML") { # Full maximum likelihood (underlying multivariate normal) group.fx <- estimator.FML(Sigma.hat = Sigma.hat[[g]], TH = TH[[g]], th.idx = th.idx[[g]], num.idx = num.idx[[g]], X = lavdata@X[[g]], lavcache = lavcache[[g]]) } else if(estimator == "MML") { # marginal maximum likelihood group.fx <- estimator.MML(lavmodel= lavmodel, GLIST = GLIST, THETA = THETA[[g]], TH = TH[[g]], group = g, lavdata = lavdata, sample.mean = lavsamplestats@mean[[g]], sample.mean.x = lavsamplestats@mean.x[[g]], lavcache = lavcache) } else if(estimator == "REML") { # restricted/residual maximum likelihood group.fx <- estimator.REML(Sigma.hat = Sigma.hat[[g]], Mu.hat=Mu.hat[[g]], data.cov=lavsamplestats@cov[[g]], data.mean=lavsamplestats@mean[[g]], data.cov.log.det=lavsamplestats@cov.log.det[[g]], meanstructure=meanstructure, group = g, lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata) } else { stop("unsupported estimator: ", estimator) } if(estimator == "ML" || estimator == "REML" || estimator == "NTRLS") { if(lavdata@nlevels == 1L) { group.fx <- 0.5 * group.fx ## FIXME } } else if(estimator == "PML" || estimator == "FML" || estimator == "MML") { # do nothing } else { group.fx <- 0.5 * (lavsamplestats@nobs[[g]]-1)/lavsamplestats@nobs[[g]] * group.fx } fx.group[g] <- group.fx } # g if(lavsamplestats@ngroups > 1) { ## FIXME: if group.w.free, should we use group.w or nobs??? ## - if we use estimated group.w, gradient changes!!!! ## - but, if group models are misspecified, the group weights ## will be affected too... which is unwanted (I think) #if(group.w.free) { # nobs <- unlist(GW) * lavsamplestats@ntotal # nobs <- exp(unlist(GW)) #} else { if(estimator == "PML") { # no weighting needed! (since N_g is part of the logl per group) fx <- sum(fx.group) } else { nobs <- unlist(lavsamplestats@nobs) #} fx <- weighted.mean(fx.group, w=nobs) } } else { # single group fx <- fx.group[1] } # penalty for group.w + ML if(group.w.free && estimator %in% c("ML","MML","FML","PML", "REML")) { #obs.prop <- unlist(lavsamplestats@group.w) #est.prop <- unlist(GW) # if(estimator %in% c("WLS", "GLS", ...) { # # X2 style discrepancy measures (aka GLS/WLS!!) # fx.w <- sum ( (obs.prop-est.prop)^2/est.prop ) # } else { # # G2 style discrepancy measures (aka ML) # # deriv is here -2 * (obs.prop - est.prop) #fx.w <- sum(obs.prop * log(obs.prop/est.prop) ) # } # poisson kernel obs.freq <- unlist(lavsamplestats@group.w) * lavsamplestats@ntotal est.freq <- exp(unlist(GW)) fx.w <- -1 * sum( obs.freq * log(est.freq) - est.freq ) # divide by N (to be consistent with the rest of lavaan) fx.w <- fx.w / lavsamplestats@ntotal fx.sat <- sum( obs.freq * log(obs.freq) - obs.freq ) fx.sat <- fx.sat / lavsamplestats@ntotal # saturated - poisson #fx.w <- sum(obs.freq * log(obs.freq/est.freq)) # does not work without constraints? fx <- fx + (fx.w + fx.sat) } fx.value <- as.numeric(fx) attr(fx, "fx.group") <- fx.group if(estimator == "PML") { attr(fx, "logl.group") <- logl.group attr(fx, "fx.pml") <- fx.value } fx } lavaan/R/ctr_pml_plrt.R0000644000176200001440000003145313043377522014555 0ustar liggesusersctr_pml_plrt <- function(lavobject = NULL, lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, lavpartable = NULL, lavoptions = NULL, x = NULL, VCOV = NULL, lavcache = NULL) { if(!is.null(lavobject)) { lavmodel <- lavobject@Model lavdata <- lavobject@Data lavoptions <- lavobject@Options lavsamplestats <- lavobject@SampleStats lavcache <- lavobject@Cache lavpartable <- lavobject@ParTable } if(is.null(x)) { # compute 'fx' = objective function value # (NOTE: since 0.5-18, NOT divided by N!!) fx <- lav_model_objective(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache) H0.fx <- as.numeric(fx) H0.fx.group <- attr(fx, "fx.group") } else { H0.fx <- attr(attr(x, "fx"), "fx.pml") H0.fx.group <- attr(attr(x, "fx"), "fx.group") } # fit a saturated model 'fittedSat' ModelSat <- lav_partable_unrestricted(lavobject = NULL, lavdata = lavdata, lavoptions = lavoptions, lavsamplestats = lavsamplestats) # FIXME: se="none", test="none"?? Options <- lavoptions Options$verbose <- FALSE Options$se <- "none" Options$test <- "none" fittedSat <- lavaan(ModelSat, slotOptions = Options, slotSampleStats = lavsamplestats, slotData = lavdata, slotCache = lavcache) fx <- lav_model_objective(lavmodel = fittedSat@Model, lavsamplestats = fittedSat@SampleStats, lavdata = fittedSat@Data, lavcache = fittedSat@Cache) SAT.fx <- as.numeric(fx) SAT.fx.group <- attr(fx, "fx.group") # we also need a `saturated model', but where the moments are based # on the model-implied sample statistics under H0 ModelSat2 <- lav_partable_unrestricted(lavobject = NULL, lavdata = lavdata, lavoptions = lavoptions, lavsamplestats = NULL, sample.cov = computeSigmaHat(lavmodel), sample.mean = computeMuHat(lavmodel), sample.th = computeTH(lavmodel), sample.th.idx = lavsamplestats@th.idx) Options2 <- Options Options2$optim.method <- "none" Options2$optim.force.converged <- TRUE fittedSat2 <- lavaan(ModelSat2, slotOptions = Options2, slotSampleStats = lavsamplestats, slotData = lavdata, slotCache = lavcache) # the code below was contributed by Myrsini Katsikatsou (Jan 2015) # for now, only a single group is supported: # g = 1L ########################### The code for PLRT for overall goodness of fit # First define the number of non-redundant elements of the (fitted) # covariance/correlation matrix of the underlying variables. #nvar <- lavmodel@nvar[[g]] #dSat <- nvar*(nvar-1)/2 #if(length(lavmodel@num.idx[[g]]) > 0L) { # dSat <- dSat + length(lavmodel@num.idx[[g]]) #} # select `free' parameters (excluding thresholds) from fittedSat2 model PT.Sat2 <- fittedSat2@ParTable dSat.idx <- PT.Sat2$free[ PT.Sat2$free > 0L & PT.Sat2$op != "|" ] # remove thresholds # Secondly, we need to specify the indices of the rows/columns of vcov(), hessian, and # variability matrix that refer to all SEM parameters except thresholds. PT <- lavpartable index.par <- PT$free[PT$free > 0L & PT$op != "|"] # Thirdly, specify the sample size. # nsize <- lavdata@nobs[[g]] nsize <- lavsamplestats@ntotal # Now we can proceed to the computation of the quantities needed for PLRT. # Briefly, to say that PLRT is equal to the difference of two quadratic forms. # To compute the first and second moment adjusted PLRT we should compute # the asymptotic mean and variance of each quadratic quantity as well as # their asymptotic covariance. ##### Section 1. Compute the asymptotic mean and variance of the first quadratic quantity # Below I assume that lavobject is the output of lavaan function. I guess # vcov(lavobject) can be substituted by VCOV object insed lavaan function # defined at lines 703 -708. But what is the object inside lavaan function # for getHessian(lavobject)? if(is.null(VCOV)) { lavoptions$se <- "robust.huber.white" VCOV <- lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavoptions = lavoptions, lavdata = lavdata, lavpartable = lavpartable, lavcache = lavcache) } InvG_to_psipsi_attheta0 <- (lavsamplestats@ntotal * VCOV )[index.par, index.par] #G^psipsi(theta0) #below the lavaan function getHessian is used #Hattheta0 <- (-1) * H0.Hessian #Hattheta0 <- H0.Hessian #InvHattheta0 <- solve(Hattheta0) InvHattheta0 <- attr(VCOV, "E.inv") InvH_to_psipsi_attheta0 <- InvHattheta0[index.par, index.par] #H^psipsi(theta0) if(lavmodel@eq.constraints) { IN <- InvH_to_psipsi_attheta0 IN.npar <- ncol(IN) # create `bordered' matrix if(nrow(lavmodel@con.jac) > 0L) { H <- lavmodel@con.jac[, index.par, drop = FALSE] inactive.idx <- attr(H, "inactive.idx") lambda <- lavmodel@con.lambda # lagrangean coefs if(length(inactive.idx) > 0L) { H <- H[-inactive.idx,,drop=FALSE] lambda <- lambda[-inactive.idx] } if(nrow(H) > 0L) { H0 <- matrix(0,nrow(H),nrow(H)) H10 <- matrix(0, ncol(IN), nrow(H)) DL <- 2*diag(lambda, nrow(H), nrow(H)) # FIXME: better include inactive + slacks?? E3 <- rbind( cbind( IN, H10, t(H)), cbind( t(H10), DL, H0), cbind( H, H0, H0) ) Inv_of_InvH_to_psipsi_attheta0 <- MASS::ginv(IN)[1:IN.npar, 1:IN.npar, drop = FALSE] } else { Inv_of_InvH_to_psipsi_attheta0 <- solve(IN) } } } else { Inv_of_InvH_to_psipsi_attheta0 <- solve(InvH_to_psipsi_attheta0) #[H^psipsi(theta0)]^(-1) } H0tmp_prod1 <- Inv_of_InvH_to_psipsi_attheta0 %*% InvG_to_psipsi_attheta0 H0tmp_prod2 <- H0tmp_prod1 %*% H0tmp_prod1 E_tww <- sum(diag(H0tmp_prod1)) #expected mean of the first quadratic quantity var_tww <- 2* sum(diag(H0tmp_prod2)) #variance of the first quadratic quantity ##### Section 2: Compute the asymptotic mean and variance of the second quadratic quantity. # Now we need to evaluate the fitted (polychoric) correlation/ covariance matrix # using the estimates of SEM parameters derived under the fitted model # which is the model of the null hypothesis. We also need to compute the # vcov matrix of these estimates (estimates of polychoric correlations) # as well as the related hessian and variability matrix. tmp.options <- fittedSat2@Options tmp.options$se <- lavoptions$se VCOV.Sat2 <- lav_model_vcov(lavmodel = fittedSat2@Model, lavsamplestats = fittedSat2@SampleStats, lavoptions = tmp.options, lavdata = fittedSat2@Data, lavpartable = fittedSat2@ParTable, lavcache = fittedSat2@Cache, use.ginv = TRUE) InvG_to_sigmasigma_attheta0 <- lavsamplestats@ntotal * VCOV.Sat2[dSat.idx, dSat.idx, drop = FALSE] #G^sigmasigma(theta0) #Hattheta0 <- (-1)* getHessian(fittedSat2) #Hattheta0 <- getHessian(fittedSat2) #InvHattheta0 <- solve(Hattheta0) InvHattheta0 <- attr(VCOV.Sat2, "E.inv") InvH_to_sigmasigma_attheta0 <- InvHattheta0[dSat.idx, dSat.idx, drop = FALSE] #H^sigmasigma(theta0) #Inv_of_InvH_to_sigmasigma_attheta0 <- solve(InvH_to_sigmasigma_attheta0) #[H^sigmasigma(theta0)]^(-1) Inv_of_InvH_to_sigmasigma_attheta0 <- MASS::ginv(InvH_to_sigmasigma_attheta0, tol = .Machine$double.eps^(3/4)) H1tmp_prod1 <- Inv_of_InvH_to_sigmasigma_attheta0 %*% InvG_to_sigmasigma_attheta0 H1tmp_prod2 <- H1tmp_prod1 %*% H1tmp_prod1 E_tzz <- sum(diag(H1tmp_prod1)) #expected mean of the second quadratic quantity var_tzz <- 2* sum(diag(H1tmp_prod2))#variance of the second quadratic quantity ##### Section 3: Compute the asymptotic covariance of the two quadratic quantities drhodpsi_MAT <- vector("list", length = lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { delta.g <- computeDelta(lavmodel)[[g]] # [[1]] to be substituted by g? # The above gives the derivatives of thresholds and polychoric correlations # with respect to SEM param (including thresholds) evaluated under H0. # From deltamat we need to exclude the rows and columns referring to thresholds. # For this: # order of the rows: first the thresholds, then the correlations # we need to map the rows of delta.g to the rows/cols of H_at_vartheta0 # of H1 PT <- fittedSat2@ParTable PT$label <- lav_partable_labels(PT) free.idx <- which(PT$free > 0 & PT$op != "|" & PT$group == g) PARLABEL <- PT$label[free.idx] # for now, we can assume that computeDelta will always return # the thresholds first, then the correlations # # later, we should add a (working) add.labels = TRUE option to # computeDelta #th.names <- lavobject@pta$vnames$th[[g]] #ov.names <- lavobject@pta$vnames$ov[[g]] th.names <- lavNames(lavpartable, "th") ov.names <- lavNames(lavpartable, "ov.nox") tmp <- utils::combn(ov.names, 2) cor.names <- paste(tmp[1,], "~~", tmp[2,], sep = "") NAMES <- c(th.names, cor.names) if(g > 1L) { NAMES <- paste(NAMES, ".g", g, sep = "") } par.idx <- match(PARLABEL, NAMES) drhodpsi_MAT[[g]] <- delta.g[par.idx, index.par] } drhodpsi_mat <- do.call(rbind, drhodpsi_MAT) tmp_prod <- t(drhodpsi_mat) %*% Inv_of_InvH_to_sigmasigma_attheta0 %*% drhodpsi_mat %*% InvG_to_psipsi_attheta0 %*% H0tmp_prod1 cov_tzztww <- 2* sum(diag(tmp_prod)) ##### Section 4: compute the adjusted PLRT and its p-value # PLRTH0Sat <- 2*nsize*(lavfit@fx - fittedSat@Fit@fx) PLRTH0Sat <- 2*(H0.fx - SAT.fx) PLRTH0Sat.group <- 2*(H0.fx.group - SAT.fx.group) asym_mean_PLRTH0Sat <- E_tzz - E_tww # catch zero value for asym_mean_PLRTH0Sat if(asym_mean_PLRTH0Sat == 0) { asym_var_PLRTH0Sat <- 0 scaling.factor <- as.numeric(NA) FSA_PLRT_SEM <- as.numeric(NA) adjusted_df <- as.numeric(NA) pvalue <- as.numeric(NA) } else { asym_var_PLRTH0Sat <- var_tzz + var_tww -2*cov_tzztww scaling.factor <- (asym_mean_PLRTH0Sat / (asym_var_PLRTH0Sat/2) ) FSA_PLRT_SEM <- (asym_mean_PLRTH0Sat / (asym_var_PLRTH0Sat/2) )* PLRTH0Sat adjusted_df <- (asym_mean_PLRTH0Sat*asym_mean_PLRTH0Sat) / (asym_var_PLRTH0Sat/2) # In some very few cases (simulations show very few cases in small # sample sizes) the adjusted_df is a negative number, we should then # print a warning like: "The adjusted df is computed to be a negative number # and for this the first and second moment adjusted PLRT is not computed." if(scaling.factor > 0) { pvalue <- 1-pchisq(FSA_PLRT_SEM, df=adjusted_df ) } else { pvalue <- as.numeric(NA) } } list(PLRTH0Sat = PLRTH0Sat, PLRTH0Sat.group = PLRTH0Sat.group, stat = FSA_PLRT_SEM, df = adjusted_df, p.value = pvalue, scaling.factor = scaling.factor) } ############################################################################ ctr_pml_aic_bic <- function(lavobject) { ########################## The code for PL version fo AIC and BIC # The following should be done because it is not the pl log-likelihood # that is maximized but a fit function that should be minimized. So, we # should find the value of log-PL at the estimated parameters through the # value of the fitted function. # The following may need to be updated if we change the fit function # so that it is correct for the case of missing values as well. logPL <- lavobject@optim$logl nsize <- lavobject@SampleStats@ntotal # inverted observed unit information H.inv <- lavTech(lavobject, "inverted.information.observed") # first order unit information J <- lavTech(lavobject, "information.first.order") # trace (J %*% H.inv) = sum (J * t(H.inv)) dimTheta <- sum(J * H.inv) # computations of PL versions of AIC and BIC PL_AIC <- (-2)*logPL + 2*dimTheta PL_BIC <- (-2)*logPL + dimTheta *log(nsize) list(logPL = logPL, PL_AIC = PL_AIC, PL_BIC = PL_BIC) } lavaan/R/ctr_mplus2lavaan.R0000644000176200001440000012231113034446062015317 0ustar liggesusers# this code is written by Michael Hallquist #First draft of parser to convert Mplus model syntax to lavaan model syntax #idea: build parTable and run model from mplus syntax #then perhaps write export function: parTable2Mplus #and/or parTable2lavaan trimSpace <- function(string) { stringTrim <- sapply(string, function(x) { x <- sub("^\\s*", "", x, perl=TRUE) x <- sub("\\s*$","", x, perl=TRUE) return(x) }, USE.NAMES=FALSE) return(stringTrim) } #small utility function to join strings in a regexp loop joinRegexExpand <- function(cmd, argExpand, matches, iterator, matchLength="match.length") { if (iterator == 1 && matches[iterator] > 1) { pre <- substr(cmd, 1, matches[iterator] - 1) } else pre <- "" #if this is not the final match, then get sub-string between the end of this match and the beginning of the next #otherwise, match to the end of the command post.end <- ifelse(iterator < length(matches), matches[iterator+1] - 1, nchar(cmd)) post <- substr(cmd, matches[iterator] + attr(matches, matchLength)[iterator], post.end) cmd.expand <- paste(pre, argExpand, post, sep="") return(cmd.expand) } #expand Mplus hyphen syntax (will also expand constraints with hyphens) expandCmd <- function(cmd, alphaStart=TRUE) { #use negative lookahead and negative lookbehind to eliminate possibility of hyphen being used as a negative starting value (e.g., x*-1) #also avoid match of anything that includes a decimal point, such as a floating-point starting value -10.5*x1 #if alphaStart==TRUE, then require that the matches before and after hyphens begin with alpha character #this is used for variable names, whereas the more generic expansion works for numeric constraints and such #need to do a better job of this so that u1-u20* is supported... I don't think the regexp below is general enough #if (alphaStart) { # hyphens <- gregexpr("[_A-Za-z]+\\w*\\s*-\\s*[_A-Za-z]+\\w*", cmd, perl=TRUE)[[1]] #} else { # hyphens <- gregexpr("(?!<(\\*|\\.))\\w+(?!(\\*|\\.))\\s*-\\s*(?!<(\\*|\\.))\\w+(?!(\\*|\\.))", cmd, perl=TRUE)[[1]] #} hyphens <- gregexpr("(?!<(\\*|\\.))\\w+(?!(\\*|\\.))\\s*-\\s*(?!<(\\*|\\.))\\w+(?!(\\*|\\.))", cmd, perl=TRUE)[[1]] if (hyphens[1L] > 0) { cmd.expand <- c() ep <- 1 for (v in 1:length(hyphens)) { #match one keyword before and after hyphen argsplit <- strsplit(substr(cmd, hyphens[v], hyphens[v] + attr(hyphens, "match.length")[v] - 1), "\\s*-\\s*", perl=TRUE)[[1]] v_pre <- argsplit[1] v_post <- argsplit[2] #the basic positive lookbehind blows up with pure numeric constraints (1 - 3) because no alpha char precedes digit #can use an non-capturing alternation grouping to allow for digits only or the final digits after alphas (as in v_post.num) v_pre.num <- as.integer(sub("\\w*(?<=[A-Za-z_])(\\d+)$", "\\1", v_pre, perl=TRUE)) #use positive lookbehind to avoid greedy \w+ match -- capture all digits v_post.match <- regexpr("^(?:\\w*(?<=[A-Za-z_])(\\d+)|(\\d+))$", v_post, perl=TRUE) stopifnot(v_post.match[1L] > 0) #match mat be under capture[1] or capture[2] because of alternation above whichCapture <- which(attr(v_post.match, "capture.start") > 0) v_post.num <- as.integer(substr(v_post, attr(v_post.match, "capture.start")[whichCapture], attr(v_post.match, "capture.start")[whichCapture] + attr(v_post.match, "capture.length")[whichCapture] - 1)) v_post.prefix <- substr(v_post, 1, attr(v_post.match, "capture.start")[whichCapture] - 1) #just trusting that pre and post match if (is.na(v_pre.num) || is.na(v_post.num)) stop("Cannot expand variables: ", v_pre, ", ", v_post) v_expand <- paste(v_post.prefix, v_pre.num:v_post.num, sep="", collapse=" ") #for first hyphen, there may be non-hyphenated syntax preceding the initial match cmd.expand[ep] <- joinRegexExpand(cmd, v_expand, hyphens, v) ep <- ep + 1 } return(paste(cmd.expand, collapse="")) } else { return(cmd) #no hyphens to expand } } #handle starting values and fixed parameters on rhs parseFixStart <- function(cmd) { cmd.parse <- c() ep <- 1L if ((fixed.starts <- gregexpr("[\\w\\.-]+\\s*([@*])\\s*[\\w\\.-]+", cmd, perl=TRUE)[[1]])[1L] > 0) { #shouldn't it be \\*, not * ?! Come back to this. for (f in 1:length(fixed.starts)) { #capture above obtains the fixed/start character (@ or *), whereas match obtains the full regex match opchar <- substr(cmd, attr(fixed.starts, "capture.start")[f], attr(fixed.starts, "capture.start")[f] + attr(fixed.starts, "capture.length")[f] - 1) #match arguments around asterisk/at symbol argsplit <- strsplit(substr(cmd, fixed.starts[f], fixed.starts[f] + attr(fixed.starts, "match.length")[f] - 1), paste0("\\s*", ifelse(opchar=="*", "\\*", opchar), "\\s*"), perl=TRUE)[[1]] v_pre <- argsplit[1] v_post <- argsplit[2] if (suppressWarnings(is.na(as.numeric(v_pre)))) { #fixed.starts value post-multiplier var <- v_pre val <- v_post } else if (suppressWarnings(is.na(as.numeric(v_post)))) { #starting value pre-multiplier var <- v_post val <- v_pre } else stop("Cannot parse Mplus fixed/starts values specification: ", v_pre, v_post) if (opchar == "@") { cmd.parse[ep] <- joinRegexExpand(cmd, paste0(val, "*", var, sep=""), fixed.starts, f) ep <- ep + 1L } else { cmd.parse[ep] <- joinRegexExpand(cmd, paste0("start(", val, ")*", var, sep=""), fixed.starts, f) ep <- ep + 1L } } return(paste(cmd.parse, collapse="")) } else { return(cmd) } } parseConstraints <- function(cmd) { #Allow cmd to have newlines embedded. In this case, split on newlines, and loop over and parse each chunk #Dump leading and trailing newlines, which contain no information about constraints, but may add dummy elements to vector after strsplit #Maybe return LHS and RHS parsed command where constraints only appear on the RHS, whereas the LHS contains only parameters. #Example: LHS is v1 v2 v3 and RHS is con1*v1 con2*v2 con3*v3 cmd.split <- strsplit(cmd, "\n")[[1]] #drop empty lines (especially leading newline) cmd.split <- if(length(emptyPos <- which(cmd.split == "")) > 0L) {cmd.split[-1*emptyPos]} else {cmd.split} #Create a version of the command with no constraint specifications. #This is useful for constraint specs that use the params on the LHS and RHS. Example: v1 ~~ conB*v1 cmd.noconstraints <- paste0(gsub("\\s*\\([^\\)]+\\)\\s*", "", cmd.split, perl=TRUE), collapse=" ") cmd.tojoin <- c() #will store all chunks divided by newlines, which will be joined at the end. #iterate over each newline segment for (n in 1:length(cmd.split)) { #in principle, now that we respect newlines, parens should only be of length 1, since Mplus syntax dictates newlines for each use of parentheses for constraints if ((parens <- gregexpr("(? 0) { #match parentheses, but not start() #the syntax chunk after all parentheses have been matched cmd.expand <- c() for (p in 1:length(parens)) { #string within the constraint parentheses constraints <- substr(cmd.split[n], attr(parens, "capture.start")[p], attr(parens, "capture.start")[p] + attr(parens, "capture.length")[p] - 1) #Divide constraints on spaces to determine number of constraints to parse. Use trimSpace to avoid problem of user including leading/trailing spaces within parentheses. con.split <- strsplit(trimSpace(constraints), "\\s+", perl=TRUE)[[1]] #if Mplus uses a purely numeric constraint, then add ".con" prefix to be consistent with R naming. con.split <- sapply(con.split, function(x) { if (! suppressWarnings(is.na(as.numeric(x)))) { make.names(paste0(".con", x)) } else { x } }) #determine the parameters that precede the parentheses (either first character for p == 1 or character after preceding parentheses) prestrStart <- ifelse(p > 1, attr(parens, "capture.start")[p-1] + attr(parens, "capture.length")[p-1] + 1, 1) #obtain the parameters that precede the parentheses, divide into arguments on spaces #use trimSpace here because first char after prestrStart for p > 1 will probably be a space precmd.split <- strsplit(trimSpace(substr(cmd.split[n], prestrStart, parens[p] - 1)), "\\s+", perl=TRUE)[[1]] #peel off any potential LHS arguments, such as F1 BY precmdLHSOp <- which(tolower(precmd.split) %in% c("by", "with", "on")) if (any(precmdLHSOp)) { lhsop <- paste0(precmd.split[1:precmdLHSOp[1L]], " ", collapse=" ") #join lhs and op as a single string, add trailing space so that paste with expanded RHS is right. rhs <- precmd.split[(precmdLHSOp+1):length(precmd.split)] } else { lhsop <- "" rhs <- precmd.split } if (length(con.split) > 1L) { #several constraints listed within parentheses. Example: F1 BY X1 X2 X3 X4 (C2 C3 C4) #thus, backwards match the constraints to parameters #restrict parameters to backwards match to be of the same length as number of constraints rhs.backmatch <- rhs[(length(rhs)-length(con.split)+1):length(rhs)] rhs.expand <- c() #check that no mean or scale markers are part of the rhs param to expand if ((preMark.match <- regexpr("^\\s*[\\[\\{]", rhs.backmatch[1L], perl=TRUE))[1L] > 0) { preMark <- substr(rhs.backmatch[1L], preMark.match[1L], preMark.match[1L] + attr(preMark.match, "match.length")[1L] - 1) rhs.backmatch[1L] <- substr(rhs.backmatch[1L], preMark.match[1L] + attr(preMark.match, "match.length")[1L], nchar(rhs.backmatch[1L])) } else { preMark <- "" } if ((postMark.match <- regexpr("[\\]\\}]\\s*$", rhs.backmatch[length(rhs.backmatch)], perl=TRUE))[1L] > 0) { postMark <- substr(rhs.backmatch[length(rhs.backmatch)], postMark.match[1L], nchar(rhs.backmatch[length(rhs.backmatch)])) rhs.backmatch[length(rhs.backmatch)] <- substr(rhs.backmatch[length(rhs.backmatch)], 1, postMark.match[1L] - 1) } else { postMark <- "" } #pre-multiply each parameter with each corresponding constraint for (i in 1:length(rhs.backmatch)) { rhs.expand[i] <- paste0(con.split[i], "*", rhs.backmatch[i]) } #join rhs as string and add back in mean/scale operator, if present rhs.expand <- paste0(preMark, paste(rhs.expand, collapse=" "), postMark) #if there were params that preceded the backwards match, then add these back to the syntax #append this syntax to the parsed command, cmd.expand if (length(rhs) - length(con.split) > 0L) { cmd.expand <- c(cmd.expand, paste(lhsop, paste(rhs[1:(length(rhs)-length(con.split))], collapse=" "), rhs.expand)) } else { cmd.expand <- c(cmd.expand, paste0(lhsop, rhs.expand)) } } else { #should be able to reduce redundancy with above #all parameters on the right hand side are to be equated #thus, pre-multiply each parameter by the constraint #check that no mean or scale markers are part of the rhs param to expand #DUPE CODE FROM ABOVE. Make Function?! if ((preMark.match <- regexpr("^\\s*[\\[\\{]", rhs[1L], perl=TRUE))[1L] > 0) { preMark <- substr(rhs[1L], preMark.match[1L], preMark.match[1L] + attr(preMark.match, "match.length")[1L] - 1) rhs[1L] <- substr(rhs[1L], preMark.match[1L] + attr(preMark.match, "match.length")[1L], nchar(rhs[1L])) } else { preMark <- "" } if ((postMark.match <- regexpr("[\\]\\}]\\s*$", rhs[length(rhs)], perl=TRUE))[1L] > 0) { postMark <- substr(rhs[length(rhs)], postMark.match[1L], nchar(rhs[length(rhs)])) rhs[length(rhs)] <- substr(rhs[length(rhs)], 1, postMark.match[1L] - 1) } else { postMark <- "" } rhs.expand <- c() for (i in 1:length(rhs)) { rhs.expand[i] <- paste0(con.split[1L], "*", rhs[i]) } #join rhs as string rhs.expand <- paste0(preMark, paste(rhs.expand, collapse=" "), postMark) cmd.expand <- c(cmd.expand, paste0(lhsop, rhs.expand)) } } cmd.tojoin[n] <- paste(cmd.expand, collapse=" ") } else { cmd.tojoin[n] <- cmd.split[n] } #no parens } #eliminate newlines in this function so that they don't mess up \\s+ splits downstream toReturn <- paste(cmd.tojoin, collapse=" ") attr(toReturn, "noConstraints") <- cmd.noconstraints return(toReturn) } expandGrowthCmd <- function(cmd) { #can assume that any spaces between tscore and variable were stripped by parseFixStart #verify that this is not a random slope if (any(tolower(strsplit(cmd, "\\s+", perl=TRUE)[[1]]) %in% c("on", "at"))) { stop("lavaan does not support random slopes or individually varying growth model time scores") } cmd.split <- strsplit(cmd, "\\s*\\|\\s*", perl=TRUE)[[1]] if (!length(cmd.split) == 2) stop("Unknown growth syntax: ", cmd) lhs <- cmd.split[1] lhs.split <- strsplit(lhs, "\\s+", perl=TRUE)[[1]] rhs <- cmd.split[2] rhs.split <- strsplit(rhs, "(\\*|\\s+)", perl=TRUE)[[1]] if (length(rhs.split) %% 2 != 0) stop("Number of variables and number of tscores does not match: ", rhs) tscores <- as.numeric(rhs.split[1:length(rhs.split) %% 2 != 0]) #pre-multipliers vars <- rhs.split[1:length(rhs.split) %% 2 == 0] cmd.expand <- c() for (p in 0:(length(lhs.split)-1)) { if (p == 0) { #intercept cmd.expand <- c(cmd.expand, paste(lhs.split[(p+1)], "=~", paste("1*", vars, sep="", collapse=" + "))) } else { cmd.expand <- c(cmd.expand, paste(lhs.split[(p+1)], "=~", paste(tscores^p, "*", vars, sep="", collapse=" + "))) } } return(cmd.expand) } #function to wrap long lines at a certain width, splitting on + symbols to be consistent with R syntax wrapAfterPlus <- function(cmd, width=90, exdent=5) { result <- lapply(cmd, function(line) { if (nchar(line) > width) { split <- c() spos <- 1L plusMatch <- gregexpr("+", line, fixed=TRUE)[[1]] mpos <- 1L if (plusMatch[1L] > 0L) { #split after plus symbol charsRemain <- nchar(line) while(charsRemain > 0L) { toProcess <- substr(line, nchar(line) - charsRemain + 1, nchar(line)) offset <- nchar(line) - charsRemain + 1 if (nchar(remainder <- substr(line, offset, nchar(line))) <= (width - exdent)) { #remainder of line fits within width -- no need to continue wrapping split[spos] <- remainder charsRemain <- 0 } else { wrapAt <- which(plusMatch < (width + offset - exdent)) wrapAt <- wrapAt[length(wrapAt)] #at the final + split[spos] <- substr(line, offset, plusMatch[wrapAt]) charsRemain <- charsRemain - nchar(split[spos]) spos <- spos + 1 } } #remove leading and trailing chars split <- trimSpace(split) #handle exdent split <- sapply(1:length(split), function(x) { if (x > 1) paste0(paste(rep(" ", exdent), collapse=""), split[x]) else split[x] }) return(split) } else { return(strwrap(line, width=width, exdent=exdent)) #convention strwrap when no + present } } else { return(line) } }) #bind together multi-line expansions into single vector return(unname(do.call(c, result))) } mplus2lavaan.constraintSyntax <- function(syntax) { #should probably pass in model syntax along with some tracking of which parameter labels are defined. #convert MODEL CONSTRAINT section to lavaan model syntax syntax <- paste(lapply(trimSpace(strsplit(syntax, "\n")), function(x) { if (length(x) == 0L && is.character(x)) "" else x}), collapse="\n") #replace ! with # for comment lines. Also strip newline and replace with semicolon syntax <- gsub("(\\s*)!(.+)\n", "\\1#\\2;", syntax, perl=TRUE) #split into vector of strings #need to peel off leading or trailing newlines -- leads to parsing confusion downstream otherwise syntax.split <- gsub("(^\n|\n$)", "", unlist( strsplit(syntax, ";") ), perl=TRUE) constraint.out <- c() #TODO: Handle PLOT and LOOP syntax for model constraints. #TODO: Handle DO loop convention #first parse new parameters defined in MODEL CONSTRAINT into a vector new.parameters <- c() #parameters that are defined in constraint section if (length(new.con.lines <- grep("^\\s*NEW\\s*\\([^\\)]+\\)", syntax.split, perl=TRUE, ignore.case=TRUE)) > 0L) { for (cmd in syntax.split[new.con.lines]) { #process new constraint definition new.con <- regexpr("^\\s*NEW\\s*\\(([^\\)]+)\\)", cmd, perl=TRUE, ignore.case=TRUE) if (new.con[1L] == -1) stop("Unable to parse names of new contraints") new.con <- substr(cmd, attr(new.con, "capture.start"), attr(new.con, "capture.start") + attr(new.con, "capture.length") - 1L) new.con <- expandCmd(new.con) #allow for hyphen expansion new.parameters <- c(new.parameters, strsplit(trimSpace(new.con), "\\s+", perl=TRUE)[[1L]]) } syntax.split <- syntax.split[-1L * new.con.lines] #drop out these lines parameters.undefined <- new.parameters #to be used below to handle ambiguity of equation versus definition } for (cmd in syntax.split) { if (grepl("^\\s*#", cmd, perl=TRUE)) { #comment line constraint.out <- c(constraint.out , gsub("\n", "", cmd, fixed=TRUE)) #drop any newlines } else if (grepl("^\\s+$", cmd, perl=TRUE)) { #do nothing, just a space line } else { #constraint proper cmd <- gsub("**", "^", cmd, fixed=TRUE) #handle exponent #lower case the math operations supported by Mplus to be consistent with R #match all math operators, then lower case each and rejoin string maths <- gregexpr("(SQRT|LOG|LOG10|EXP|ABS|SIN|COS|TAN|ASIN|ACOS|ATAN)\\s*\\(", cmd, perl=TRUE)[[1L]] if (maths[1L] > 0) { maths.replace <- c() ep <- 1 for (i in 1:length(maths)) { operator <- tolower(substr(cmd, attr(maths, "capture.start")[i], attr(maths, "capture.start")[i] + attr(maths, "capture.length")[i] - 1)) maths.replace[ep] <- joinRegexExpand(cmd, operator, maths, i, matchLength="capture.length") #only match operator, not opening ( ep <- ep + 1 } cmd <- paste(maths.replace, collapse="") } #equating some lhs and rhs: could reflect definition of new parameter if ((equals <- regexpr("=", cmd, fixed=TRUE))[1L] > 0) { lhs <- trimSpace(substr(cmd, 1, equals - 1)) rhs <- trimSpace(substr(cmd, equals + attr(equals, "match.length"), nchar(cmd))) #possibility of lhs or rhs containing the single variable to be equated if (regexpr("\\s+", lhs, perl=TRUE)[1L] > 0L) { def <- rhs body <- lhs } else if (regexpr("\\s+", rhs, perl=TRUE)[1L] > 0L) { def <- lhs body <- rhs } else { #warning("Can't figure out which side of constraint defines a parameter") #this would occur for simple rel5 = rel2 sort of syntax def <- lhs body <- rhs } #must decide whether this is a new parameter (:=) or equation of exising labels (==) #alternatively, could be zero, as in 0 = x + y #this is tricky, because mplus doesn't differentiate definition from equation #consequently, could confuse the issue as in ex5.20 #NEW(rel2 rel5 stan3 stan6); #rel2 = lam2**2*vf1/(lam2**2*vf1 + ve2); #rel5 = lam5**2*vf2/(lam5**2*vf2 + ve5); #rel5 = rel2; #for now, only define a new constraint if it's not already defined #otherwise equate if (def %in% new.parameters && def %in% parameters.undefined) { constraint.out <- c(constraint.out, paste(def, ":=", body)) parameters.undefined <- parameters.undefined[!parameters.undefined==def] } else { constraint.out <- c(constraint.out, paste(def, "==", body)) } } else { #inequality constraints -- paste as is constraint.out <- c(constraint.out, cmd) } } } wrap <- paste(wrapAfterPlus(constraint.out, width=90, exdent=5), collapse="\n") return(wrap) } mplus2lavaan.modelSyntax <- function(syntax) { #initial strip of leading/trailing whitespace, which can interfere with splitting on spaces #strsplit generates character(0) for empty strings, which causes problems in paste because paste actually includes it as a literal #example: paste(list(character(0), "asdf", character(0)), collapse=" ") #thus, use lapply to convert these to empty strings first syntax <- paste(lapply(trimSpace(strsplit(syntax, "\n")), function(x) { if (length(x) == 0L && is.character(x)) "" else x}), collapse="\n") #replace ! with # for comment lines. Also strip newline and replace with semicolon syntax <- gsub("(\\s*)!(.+)\n*", "\\1#\\2;", syntax, perl=TRUE) #new direction: retain newlines in parsed syntax until after constraints have been parsed #delete newlines #syntax <- gsub("\n", "", syntax, fixed=TRUE) # replace semicolons with newlines prior to split (divide into commands) #syntax <- gsub(";", "\n", syntax, fixed=TRUE) #split into vector of strings #syntax.split <- unlist( strsplit(syntax, "\n") ) syntax.split <- trimSpace(unlist( strsplit(syntax, ";") )) #format of parTable to mimic. # 'data.frame': 34 obs. of 12 variables: # $ id : int 1 2 3 4 5 6 7 8 9 10 ... # $ lhs : chr "ind60" "ind60" "ind60" "dem60" ... # $ op : chr "=~" "=~" "=~" "=~" ... # $ rhs : chr "x1" "x2" "x3" "y1" ... # $ user : int 1 1 1 1 1 1 1 1 1 1 ... # $ group : int 1 1 1 1 1 1 1 1 1 1 ... # $ free : int 0 1 2 0 3 4 5 0 6 7 ... # $ ustart: num 1 NA NA 1 NA NA NA 1 NA NA ... # $ exo : int 0 0 0 0 0 0 0 0 0 0 ... # $ label : chr "" "" "" "" ... # $ eq.id : int 0 0 0 0 0 0 0 0 0 0 ... # $ unco : int 0 1 2 0 3 4 5 0 6 7 ... #vector of lavaan syntax lavaan.out <- c() for (cmd in syntax.split) { if (grepl("^\\s*#", cmd, perl=TRUE)) { #comment line lavaan.out <- c(lavaan.out, gsub("\n", "", cmd, fixed=TRUE)) #drop any newlines (otherwise done by parseConstraints) } else if (grepl("^\\s*$", cmd, perl=TRUE)) { #do nothing, just a space or blank line } else { #hyphen expansion cmd <- expandCmd(cmd) #blow up on growth syntax for now # if (grepl("|", cmd, fixed=TRUE)) stop("Growth modeling syntax using | not supported at the moment.") #parse fixed parameters and starting values cmd <- parseFixStart(cmd) #parse any constraints here (avoid weird logic below) cmd <- parseConstraints(cmd) if ((op <- regexpr("\\s+(by|on|with|pwith)\\s+", cmd, ignore.case=TRUE, perl=TRUE))[1L] > 0) { #regressions, factors, covariances lhs <- substr(cmd, 1, op - 1) #using op takes match.start which will omit spaces before operator rhs <- substr(cmd, op + attr(op, "match.length"), nchar(cmd)) operator <- tolower(substr(cmd, attr(op, "capture.start"), attr(op, "capture.start") + attr(op, "capture.length") - 1)) if (operator == "by") { lav.operator <- "=~" } else if (operator == "with" || operator == "pwith") { lav.operator <- "~~" } else if (operator == "on") { lav.operator <- "~" } #handle parameter combinations lhs.split <- strsplit(lhs, "\\s+")[[1]] #trimSpace( #handle pwith syntax if (operator == "pwith") { #TODO: Figure out if pwith can be paired with constraints? rhs.split <- strsplit(rhs, "\\s+")[[1]] #trimSpace( if (length(lhs.split) != length(rhs.split)) { browser(); stop("PWITH command does not have the same number of arguments on the left and right sides.")} cmd <- sapply(1:length(lhs.split), function(i) paste(lhs.split[i], lav.operator, rhs.split[i])) } else { #insert plus signs on the rhs rhs <- gsub("\\s+", " + ", rhs, perl=TRUE) if (length(lhs.split) > 1L) { #expand using possible combinations cmd <- sapply(lhs.split, function(larg) { pair <- paste(larg, lav.operator, rhs) return(pair) }) } else { cmd <- paste(lhs, lav.operator, rhs) } } } else if ((means.scales <- regexpr("^\\s*([\\[\\{])([^\\]\\}]+)[\\]\\}]\\s*$", cmd, ignore.case=TRUE, perl=TRUE))[1L] > 0) { #intercepts/means or scales #first capture is the operator: [ or { operator <- substr(cmd, attr(means.scales, "capture.start")[1L], attr(means.scales, "capture.start")[1L] + attr(means.scales, "capture.length")[1L] - 1) params <- substr(cmd, attr(means.scales, "capture.start")[2L], attr(means.scales, "capture.start")[2L] + attr(means.scales, "capture.length")[2L] - 1) #obtain parameters with no constraint specification for LHS params.noconstraints <- sub("^\\s*[\\[\\{]([^\\]\\}]+)[\\]\\}]\\s*$", "\\1", attr(cmd, "noConstraints"), perl=TRUE) means.scales.split <- strsplit(params, "\\s+")[[1]] #trimSpace( means.scales.noConstraints.split <- strsplit(params.noconstraints, "\\s+")[[1]] #trimSpace( if (operator == "[") { #Tricky syntax shift (and corresponding kludge). For means, need to put constraint on RHS as pre-multiplier of 1 (e.g., x1 ~ 5*1). #But parseConstraints returns constraints multiplied by parameters cmd <- sapply(means.scales.split, function(v) { #shift pre-multiplier if ((premult <- regexpr("([^\\*]+)\\*([^\\*]+)", v, perl=TRUE))[1L] > 0) { modifier <- substr(v, attr(premult, "capture.start")[1L], attr(premult, "capture.start")[1L] + attr(premult, "capture.length")[1L] - 1) paramName <- substr(v, attr(premult, "capture.start")[2L], attr(premult, "capture.start")[2L] + attr(premult, "capture.length")[2L] - 1) paste0(paramName, " ~ ", modifier, "*1") } else { paste(v, "~ 1") } }) } else if (operator == "{"){ #only include constraints on RHS cmd <- sapply(1:length(means.scales.split), function(v) paste(means.scales.noConstraints.split[v], "~*~", means.scales.split[v])) } else { stop("What's the operator?!") } } else if (grepl("|", cmd, fixed=TRUE)) { #expand growth modeling language cmd <- expandGrowthCmd(cmd) } else { #no operator, no means, must be variance. #cat("assuming vars: ", cmd, "\n") vars.lhs <- strsplit(attr(cmd, "noConstraints"), "\\s+")[[1]] #trimSpace( vars.rhs <- strsplit(cmd, "\\s+")[[1]] #trimSpace( cmd <- sapply(1:length(vars.lhs), function(v) paste(vars.lhs[v], "~~", vars.rhs[v])) } #handle threshold substitution: $ -> | cmd <- gsub("$", "|", cmd, fixed=TRUE) lavaan.out <- c(lavaan.out, cmd) } } #for now, include a final trimSpace call since some arguments have leading/trailing space stripped. wrap <- paste(wrapAfterPlus(lavaan.out, width=90, exdent=5), collapse="\n") #trimSpace( return(wrap) } mplus2lavaan <- function(inpfile, run=TRUE) { stopifnot(length(inpfile) == 1L) stopifnot(grepl("\\.inp$", inpfile)) if (!file.exists(inpfile)) { stop("Could not find file: ", inpfile) } #for future consideration. For now, require a .inp file # if (length(inpfile) == 1L && grepl("\\.inp$", inpfile)) { # if (!file.exists(inpfile)) { stop("Could not find file: ", inpfile) } # inpfile.text <- scan(inpfile, what="character", sep="\n", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE) # } else { # #assume that inpfile itself is syntax (e.g., in a character vector) # inpfile.text <- inpfile # } inpfile.text <- scan(inpfile, what="character", sep="\n", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE) sections <- divideInputIntoSections(inpfile.text, inpfile) mplus.inp <- list() mplus.inp$title <- trimSpace(paste(sections$title, collapse=" ")) mplus.inp$data <- divideIntoFields(sections$data, required="file") mplus.inp$variable <- divideIntoFields(sections$variable, required="names") mplus.inp$analysis <- divideIntoFields(sections$analysis) meanstructure <- "default" #lavaan default if(!is.null(mplus.inp$analysis$model)) { if (tolower(mplus.inp$analysis$model) == "nomeanstructure") { meanstructure=FALSE } #explicitly disable mean structure } information <- "default" #lavaan default if(!is.null(mplus.inp$analysis$information)) { information <- tolower(mplus.inp$analysis$information) } estimator <- "default" if (!is.null(est <- mplus.inp$analysis$estimator)) { #no memory of what this is up to.... if (toupper(est) == "MUML") warning("Mplus does not support MUML estimator. Using default instead.") estimator <- est #march 2013: handle case where categorical data are specified, but ML-based estimator requested. #use WLSMV instead if (!is.null(mplus.inp$variable$categorical) && toupper(substr(mplus.inp$analysis$estimator, 1, 2)) == "ML") { warning("Lavaan does not yet support ML-based estimation for categorical data. Reverting to WLSMV") estimator <- "WLSMV" } } #expand hyphens in variable names and split into vector that will be the names for read.table mplus.inp$variable$names <- strsplit(expandCmd(mplus.inp$variable$names), "\\s+", perl=TRUE)[[1]] #expand hyphens in categorical declaration if (!is.null(mplus.inp$variable$categorical)) mplus.inp$variable$categorical <- strsplit(expandCmd(mplus.inp$variable$categorical), "\\s+", perl=TRUE)[[1]] #convert mplus syntax to lavaan syntax mplus.inp$model <- mplus2lavaan.modelSyntax(sections$model) #handle model constraint if ("model.constraint" %in% names(sections)) { mplus.inp$model.constraint <- mplus2lavaan.constraintSyntax(sections$model.constraint) mplus.inp$model <- paste(mplus.inp$model, mplus.inp$model.constraint, sep="\n") } #read mplus data (and handle missing spec) mplus.inp$data <- readMplusInputData(mplus.inp, inpfile) #handle bootstrapping specification se="default" bootstrap <- 1000L test <- "default" if (!is.null(mplus.inp$analysis$bootstrap)) { boot.type <- "standard" #check whether standard versus residual bootstrap is specified if ((boot.match <- regexpr("\\((\\w+)\\)", mplus.inp$analysis$bootstrap, perl=TRUE)) > 0L) { boot.type <- tolower(substr(mplus.inp$analysis$bootstrap, attr(boot.match, "capture.start"), attr(boot.match, "capture.start") + attr(boot.match, "capture.length") - 1L)) } if (boot.type == "residual") test <- "Bollen.Stine" se <- "bootstrap" if ((nboot.match <- regexpr("^\\s*(\\d+)", mplus.inp$analysis$bootstrap, perl=TRUE)) > 0L) { bootstrap <- as.numeric(substr(mplus.inp$analysis$bootstrap, attr(nboot.match, "capture.start"), attr(nboot.match, "capture.start") + attr(nboot.match, "capture.length") - 1L)) } } if (run) { fit <- sem(mplus.inp$model, data=mplus.inp$data, meanstructure=meanstructure, mimic="Mplus", estimator=estimator, test=test, se=se, bootstrap=bootstrap, information=information) fit@external <- list(mplus.inp=mplus.inp) } else { fit <- mplus.inp #just return the syntax outside of a lavaan object } return(fit) } divideIntoFields <- function(section.text, required) { if (is.null(section.text)) { return(NULL) } #The parser breaks down when there is a line with a trailing comment because then splitting on semicolon will combine it with the following line #Thus, trim off trailing comments before initial split section.text <- gsub("\\s*!.*$", "", section.text, perl=TRUE) section.split <- strsplit(paste(section.text, collapse=" "), ";", fixed=TRUE)[[1]] #split on semicolons section.divide <- list() for (cmd in section.split) { if (grepl("^\\s*!.*", cmd, perl=TRUE)) next #skip comment lines if (grepl("^\\s+$", cmd, perl=TRUE)) next #skip blank lines #mplus is apparently tolerant of specifications that don't include IS/ARE/= #example: usevariables x1-x10; #thus, split on spaces and assume that first element is lhs, drop second element if IS/ARE/=, and assume remainder is rhs #but if user uses equals sign, then spaces will not always be present (e.g., usevariables=x1-x10) if ( (leadingEquals <- regexpr("^\\s*[A-Za-z]+[A-Za-z_-]*\\s*(=)", cmd[1L], perl=TRUE))[1L] > 0) { cmdName <- trimSpace(substr(cmd[1L], 1, attr(leadingEquals, "capture.start") - 1)) cmdArgs <- trimSpace(substr(cmd[1L], attr(leadingEquals, "capture.start") + 1, nchar(cmd[1L]))) } else { cmd.spacesplit <- strsplit(trimSpace(cmd[1L]), "\\s+", perl=TRUE)[[1L]] if (length(cmd.spacesplit) < 2L) { #for future: make room for this function to prase things like just TECH13 (no rhs) } else { cmdName <- trimSpace(cmd.spacesplit[1L]) if (length(cmd.spacesplit) > 2L && tolower(cmd.spacesplit[2L]) %in% c("is", "are")) { cmdArgs <- paste(cmd.spacesplit[3L:length(cmd.spacesplit)], collapse=" ") #remainder, removing is/are } else { cmdArgs <- paste(cmd.spacesplit[2L:length(cmd.spacesplit)], collapse=" ") #is/are not used, so just join rhs } } } section.divide[[make.names(tolower(cmdName))]] <- cmdArgs } if (!missing(required)) { stopifnot(all(required %in% names(section.divide))) } return(section.divide) } #helper function splitFilePath <- function(abspath) { #function to split path into path and filename #code adapted from R.utils filePath command if (!is.character(abspath)) stop("Path not a character string") if (nchar(abspath) < 1 || is.na(abspath)) stop("Path is missing or of zero length") components <- strsplit(abspath, split="[\\/]")[[1]] lcom <- length(components) stopifnot(lcom > 0) #the file is the last element in the list. In the case of length == 1, this will extract the only element. relFilename <- components[lcom] absolute <- FALSE if (lcom == 1) { dirpart <- NA_character_ } else if (lcom > 1) { #drop the file from the list (the last element) components <- components[-lcom] dirpart <- do.call("file.path", as.list(components)) #if path begins with C:, /, //, or \\, then treat as absolute if (grepl("^([A-Z]{1}:|/|//|\\\\)+.*$", dirpart, perl=TRUE)) absolute <- TRUE } return(list(directory=dirpart, filename=relFilename, absolute=absolute)) } readMplusInputData <- function(mplus.inp, inpfile) { #handle issue of mplus2lavaan being called with an absolute path, whereas mplus has only a local data file inpfile.split <- splitFilePath(inpfile) datfile.split <- splitFilePath(mplus.inp$data$file) #if inp file target directory is non-empty, but mplus data is without directory, then append #inp file directory to mplus data. This ensures that R need not be in the working directory #to read the dat file. But if mplus data has an absolute directory, don't append #if mplus data directory is present and absolute, or if no directory in input file, just use filename as is if (!is.na(datfile.split$directory) && datfile.split$absolute) datFile <- mplus.inp$data$file #just use mplus data filename if it has absolute path else if (is.na(inpfile.split$directory)) datFile <- mplus.inp$data$file #just use mplus data filename if inp file is missing path (working dir) else datFile <- file.path(inpfile.split$directory, mplus.inp$data$file) #dat file path is relative or absent, and inp file directory is present if (!file.exists(datFile)) { warning("Cannot find data file: ", datFile) return(NULL) } #handle missing is/are: missList <- NULL if (!is.null(missSpec <- mplus.inp$variable$missing)) { expandMissVec <- function(missStr) { #sub-function to obtain a vector of all missing values within a set of parentheses missSplit <- strsplit(missStr, "\\s+")[[1L]] missVals <- c() for (f in missSplit) { if ((hyphenPos <- regexpr("\\d+(-)\\d+", f, perl=TRUE))[1L] > -1L) { #expand hyphen preHyphen <- substr(f, 1, attr(hyphenPos, "capture.start") - 1) postHyphen <- substr(f, attr(hyphenPos, "capture.start") + 1, nchar(f)) missVals <- c(missVals, as.character(seq(preHyphen, postHyphen))) } else { #append to vector missVals <- c(missVals, f) } } return(as.numeric(missVals)) } if (missSpec == "." || missSpec=="*") { #case 1: MISSING ARE|=|IS .; na.strings <- missSpec } else if ((allMatch <- regexpr("\\s*ALL\\s*\\(([^\\)]+)\\)", missSpec, perl=TRUE))[1L] > -1L) { #case 2: use of ALL with parens missStr <- trimSpace(substr(missSpec, attr(allMatch, "capture.start"), attr(allMatch, "capture.start") + attr(allMatch, "capture.length") - 1L)) na.strings <- expandMissVec(missStr) } else { #case 3: specific missing values per variable #process each element missBlocks <- gregexpr("(?:(\\w+)\\s+\\(([^\\)]+)\\))+", missSpec, perl=TRUE)[[1]] missList <- list() if (missBlocks[1L] > -1L) { for (i in 1:length(missBlocks)) { vname <- substr(missSpec, attr(missBlocks, "capture.start")[i,1L], attr(missBlocks, "capture.start")[i,1L] + attr(missBlocks, "capture.length")[i,1L] - 1L) vmiss <- substr(missSpec, attr(missBlocks, "capture.start")[i,2L], attr(missBlocks, "capture.start")[i,2L] + attr(missBlocks, "capture.length")[i,2L] - 1L) vnameHyphen <- regexpr("(\\w+)-(\\w+)", vname, perl=TRUE)[1L] if (vnameHyphen > -1L) { #lookup against variable names vstart <- which(mplus.inp$variable$names == substr(vname, attr(vnameHyphen, "capture.start")[1L], attr(vnameHyphen, "capture.start")[1L] + attr(vnameHyphen, "capture.length")[1L] - 1L)) vend <- which(mplus.inp$variable$names == substr(vname, attr(vnameHyphen, "capture.start")[2L], attr(vnameHyphen, "capture.start")[2L] + attr(vnameHyphen, "capture.length")[2L] - 1L)) if (length(vstart) == 0L || length(vend) == 0L) { stop("Unable to lookup missing variable list: ", vname) } #I suppose start or finish could be mixed up if (vstart > vend) { vstart.orig <- vstart; vstart <- vend; vend <- vstart.orig } vname <- mplus.inp$variable$names[vstart:vend] } missVals <- expandMissVec(vmiss) for (j in 1:length(vname)) { missList[[ vname[j] ]] <- missVals } } } else { stop("I don't understand this missing specification: ", missSpec) } } } else { na.strings <- "NA" } if (!is.null(missList)) { dat <- read.table(datFile, header=FALSE, col.names=mplus.inp$variable$names, colClasses="numeric") #loop over variables in missList and set missing values to NA dat[,names(missList)] <- lapply(names(missList), function(vmiss) { dat[which(dat[,vmiss] %in% missList[[vmiss]]), vmiss] <- NA return(dat[,vmiss]) }) names(dat) <- mplus.inp$variable$names #loses these from the lapply } else { dat <- read.table(datFile, header=FALSE, col.names=mplus.inp$variable$names, na.strings=na.strings, colClasses="numeric") } #TODO: support covariance/mean+cov inputs #store categorical variables as ordered factors if (!is.null(mplus.inp$variable$categorical)) { dat[,c(mplus.inp$variable$categorical)] <- lapply(dat[,c(mplus.inp$variable$categorical), drop=FALSE], ordered) } return(dat) } divideInputIntoSections <- function(inpfile.text, filename) { inputHeaders <- grep("^\\s*(title:|data.*:|variable:|define:|analysis:|model.*:|output:|savedata:|plot:|montecarlo:)", inpfile.text, ignore.case=TRUE, perl=TRUE) stopifnot(length(inputHeaders) > 0L) mplus.sections <- list() for (h in 1:length(inputHeaders)) { sectionEnd <- ifelse(h < length(inputHeaders), inputHeaders[h+1] - 1, length(inpfile.text)) section <- inpfile.text[inputHeaders[h]:sectionEnd] sectionName <- trimSpace(sub("^([^:]+):.*$", "\\1", section[1L], perl=TRUE)) #obtain text before the colon #dump section name from input syntax section[1L] <- sub("^[^:]+:(.*)$", "\\1", section[1L], perl=TRUE) mplus.sections[[make.names(tolower(sectionName))]] <- section } return(mplus.sections) } lavaan/R/lav_samplestats_igamma.R0000644000176200001440000001056312770174436016575 0ustar liggesusers# YR 18 Dec 2015 # - functions to (directly) compute the inverse of 'Gamma' (the asymptotic # variance matrix of the sample statistics) # - often used as 'WLS.V' (the weight matrix in WLS estimation) # and when computing the expected information matrix # NOTE: # - three types: # 1) plain (conditional.x = FALSE, fixed.x = FALSE) # 2) fixed.x (conditional.x = FALSE, fixed.x = TRUE) # 3) conditional.x (conditional.x = TRUE) # - if conditional.x = TRUE, we ignore fixed.x (can be TRUE or FALSE) # NORMAL-THEORY lav_samplestats_Gamma_inverse_NT <- function(Y = NULL, COV = NULL, ICOV = NULL, MEAN = NULL, rescale = FALSE, x.idx = integer(0L), fixed.x = FALSE, conditional.x = FALSE, meanstructure = FALSE, slopestructure = FALSE) { # check arguments if(length(x.idx) == 0L) { conditional.x <- FALSE fixed.x <- FALSE } if(is.null(ICOV)) { if(is.null(COV)) { stopifnot(!is.null(Y)) # coerce to matrix Y <- unname(as.matrix(Y)); N <- nrow(Y) COV <- cov(Y) } if(rescale) { COV <- COV * (N-1) / N # ML version } ICOV <- solve(COV) } # if conditional.x, we may also need COV and MEAN if(conditional.x && length(x.idx) > 0L && (meanstructure || slopestructure)) { if(is.null(COV)) { stopifnot(!is.null(Y)) # coerce to matrix Y <- unname(as.matrix(Y)); N <- nrow(Y) COV <- cov(Y) if(rescale) { COV <- COV * (N-1) / N # ML version } } if(is.null(MEAN)) { stopifnot(!is.null(Y)) MEAN <- unname(colMeans(Y)) } } # rename S.inv <- ICOV S <- COV M <- MEAN # unconditional if(!conditional.x) { # unconditional - stochastic x if(!fixed.x) { Gamma.inv <- 0.5*lav_matrix_duplication_pre_post(S.inv %x% S.inv) if(meanstructure) { Gamma.inv <- lav_matrix_bdiag(S.inv, Gamma.inv) } # unconditional - fixed x } else { # handle fixed.x = TRUE Gamma.inv <- 0.5*lav_matrix_duplication_pre_post(S.inv %x% S.inv) # zero rows/cols corresponding with x/x combinations nvar <- NROW(ICOV); pstar <- nvar*(nvar+1)/2 M <- matrix(0, nvar, nvar) M[ lav_matrix_vech_idx(nvar) ] <- seq_len(pstar) zero.idx <- lav_matrix_vech(M[x.idx, x.idx, drop = FALSE]) Gamma.inv[zero.idx,] <- 0 Gamma.inv[,zero.idx] <- 0 if(meanstructure) { S.inv.nox <- S.inv S.inv.nox[x.idx,] <- 0; S.inv.nox[,x.idx] <- 0 Gamma.inv <- lav_matrix_bdiag(S.inv.nox, Gamma.inv) } } } else { # conditional.x # 4 possibilities: # - no meanstructure, no slopes # - meanstructure, no slopes # - no meanstructure, slopes # - meanstructure, slopes S11 <- S.inv[-x.idx, -x.idx, drop = FALSE] Gamma.inv <- 0.5*lav_matrix_duplication_pre_post(S11 %x% S11) if(meanstructure || slopestructure) { C <- S[ x.idx, x.idx, drop=FALSE] MY <- M[-x.idx]; MX <- M[x.idx] C3 <- rbind(c(1,MX), cbind(MX, C + tcrossprod(MX))) } if(meanstructure) { if(slopestructure) { A11 <- C3 %x% S11 } else { c11 <- 1 / solve(C3)[1, 1, drop=FALSE] A11 <- c11 %x% S11 } } else { if(slopestructure) { A11 <- C %x% S11 } else { A11 <- matrix(0,0,0) } } if(meanstructure || slopestructure) { Gamma.inv <- lav_matrix_bdiag(A11, Gamma.inv) } } Gamma.inv } lavaan/R/lav_object_inspect.R0000644000176200001440000021634513053065515015713 0ustar liggesusers# inspect a fitted lavaan object # backward compatibility -- wrapper around lavInspect inspect.lavaan <- function(object, what = "free", ...) { lavInspect.lavaan(object = object, what = what, add.labels = TRUE, add.class = TRUE, drop.list.single.group = TRUE) } # the `tech' version: no labels, full matrices, ... for further processing lavTech.lavaan <- function(object, what = "free", add.labels = FALSE, add.class = FALSE, list.by.group = FALSE, drop.list.single.group = FALSE) { lavInspect.lavaan(object, what = what, add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } # the `user' version: with defaults for display only lavInspect.lavaan <- function(object, what = "free", add.labels = TRUE, add.class = TRUE, list.by.group = TRUE, drop.list.single.group = TRUE) { # object must inherit from class lavaan stopifnot(inherits(object, "lavaan")) # only a single argument if(length(what) > 1) { stop("`what' arguments contains multiple arguments; only one is allowed") } # be case insensitive what <- tolower(what) #### model matrices, with different contents #### if(what == "free") { lav_object_inspect_modelmatrices(object, what = "free", type = "free", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "impute" || what == "imputed") { # just to ease the transition for semTools! object@imputed } else if(what == "partable" || what == "user") { lav_object_inspect_modelmatrices(object, what = "free", type="partable", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "se" || what == "std.err" || what == "standard.errors") { lav_object_inspect_modelmatrices(object, what = "se", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "start" || what == "starting.values") { lav_object_inspect_modelmatrices(object, what = "start", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "est" || what == "estimates" || what == "coef" || what == "coefficients" || what == "x") { lav_object_inspect_modelmatrices(object, what = "est", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, #list.by.group = FALSE, for semTools only drop.list.single.group = drop.list.single.group) } else if(what == "dx.free") { lav_object_inspect_modelmatrices(object, what = "dx.free", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "dx.all") { lav_object_inspect_modelmatrices(object, what = "dx.all", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "std" || what == "std.all" || what == "standardized") { lav_object_inspect_modelmatrices(object, what = "std.all", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "std.lv") { lav_object_inspect_modelmatrices(object, what = "std.lv", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "std.nox") { lav_object_inspect_modelmatrices(object, what = "std.nox", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) #### parameter table #### } else if(what == "list") { parTable(object) #### fit indices #### } else if(what == "fit" || what == "fitmeasures" || what == "fit.measures" || what == "fit.indices") { fitMeasures(object) #### modification indices #### } else if(what == "mi" || what == "modindices" || what == "modification.indices") { modificationIndices(object) #### sample statistics ##### } else if(what == "obs" || what == "observed" || what == "sampstat" || what == "sampstats" || what == "samplestats" || what == "samp" || what == "sample" || what == "samplestatistics") { lav_object_inspect_sampstat(object, h1 = FALSE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "h1" || what == "missing.h1" || what == "sampstat.h1") { lav_object_inspect_sampstat(object, h1 = TRUE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) #### wls.est - wls.obs - wls.v #### } else if(what == "wls.est") { lav_object_inspect_wls_est(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "wls.obs") { lav_object_inspect_wls_obs(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "wls.v") { lav_object_inspect_wls_v(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) #### data + missingness #### } else if(what == "data") { lav_object_inspect_data(object, drop.list.single.group = drop.list.single.group) } else if(what == "case.idx") { lav_object_inspect_case_idx(object, drop.list.single.group = drop.list.single.group) } else if(what == "ngroups") { object@Data@ngroups } else if(what == "group") { object@Data@group } else if(what == "cluster") { object@Data@cluster } else if(what == "ordered") { object@Data@ordered } else if(what == "group.label") { object@Data@group.label } else if(what == "nobs") { unlist( object@Data@nobs ) } else if(what == "norig") { unlist( object@Data@norig ) } else if(what == "ntotal") { sum(unlist( object@Data@nobs )) } else if(what == "coverage") { lav_object_inspect_missing_coverage(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what %in% c("patterns", "pattern")) { lav_object_inspect_missing_patterns(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "empty.idx") { lav_object_inspect_empty_idx(object, drop.list.single.group = drop.list.single.group) #### rsquare #### } else if(what == "rsquare" || what == "r-square" || what == "r2") { lav_object_inspect_rsquare(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) #### model-implied sample statistics #### } else if(what == "implied" || what == "fitted") { lav_object_inspect_implied(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "resid" || what == "res" || what == "residual" || what == "residuals") { lav_object_inspect_residuals(object, h1 = TRUE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "cov.lv" || what == "veta") { lav_object_inspect_cov_lv(object, correlation.metric = FALSE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "cor.lv") { lav_object_inspect_cov_lv(object, correlation.metric = TRUE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "mean.lv" || what == "eeta") { lav_object_inspect_mean_lv(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "cov.all") { lav_object_inspect_cov_all(object, correlation.metric = FALSE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "cor.all") { lav_object_inspect_cov_all(object, correlation.metric = TRUE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "cov.ov" || what == "sigma" || what == "sigma.hat") { lav_object_inspect_cov_ov(object, correlation.metric = FALSE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "cor.ov") { lav_object_inspect_cov_ov(object, correlation.metric = TRUE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "mean.ov" || what == "mu" || what == "mu.hat") { lav_object_inspect_mean_ov(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "th" || what == "thresholds") { lav_object_inspect_th(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "vy") { lav_object_inspect_vy(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) #### specific model matrices? #### } else if(what == "theta" || what == "theta.cov") { lav_object_inspect_theta(object, correlation.metric = FALSE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) } else if(what == "theta.cor") { lav_object_inspect_theta(object, correlation.metric = TRUE, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) #### convergence, meanstructure, categorical #### } else if(what == "converged") { object@optim$converged } else if(what == "iterations" || what == "iter" || what == "niter") { object@optim$iterations } else if(what == "meanstructure") { object@Model@meanstructure } else if(what == "categorical") { object@Model@categorical } else if(what == "fixed.x") { object@Model@fixed.x } else if(what == "parameterization") { object@Model@parameterization #### NACOV samplestats #### } else if(what == "gamma") { lav_object_inspect_sampstat_gamma(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) #### gradient, Hessian, information, first.order, vcov #### } else if(what == "gradient") { lav_object_inspect_gradient(object, add.labels = add.labels, add.class = add.class) } else if(what == "hessian") { lav_object_inspect_hessian(object, add.labels = add.labels, add.class = add.class) } else if(what == "information") { lav_object_inspect_information(object, information = "default", augmented = FALSE, inverted = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "information.expected") { lav_object_inspect_information(object, information = "expected", augmented = FALSE, inverted = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "information.observed") { lav_object_inspect_information(object, information = "observed", augmented = FALSE, inverted = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "information.first.order" || what == "first.order") { lav_object_inspect_information(object, information = "first.order", augmented = FALSE, inverted = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "augmented.information") { lav_object_inspect_information(object, information = "default", augmented = TRUE, inverted = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "augmented.information.expected") { lav_object_inspect_information(object, information = "expected", augmented = TRUE, inverted = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "augmented.information.observed") { lav_object_inspect_information(object, information = "observed", augmented = TRUE, inverted = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "augmented.information.first.order" || what == "augmented.first.order") { lav_object_inspect_information(object, information = "first.order", augmented = TRUE, inverted = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "inverted.information") { lav_object_inspect_information(object, information = "default", augmented = TRUE, inverted = TRUE, add.labels = add.labels, add.class = add.class) } else if(what == "inverted.information.expected") { lav_object_inspect_information(object, information = "expected", augmented = TRUE, inverted = TRUE, add.labels = add.labels, add.class = add.class) } else if(what == "inverted.information.observed") { lav_object_inspect_information(object, information = "observed", augmented = TRUE, inverted = TRUE, add.labels = add.labels, add.class = add.class) } else if(what == "inverted.information.first.order" || what == "inverted.first.order") { lav_object_inspect_information(object, information = "first.order", augmented = TRUE, inverted = TRUE, add.labels = add.labels, add.class = add.class) } else if(what == "vcov") { lav_object_inspect_vcov(object, standardized = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "vcov.std.all" || what == "vcov.standardized" || what == "vcov.std") { lav_object_inspect_vcov(object, standardized = TRUE, type = "std.all", add.labels = add.labels, add.class = add.class) } else if(what == "vcov.std.lv") { lav_object_inspect_vcov(object, standardized = TRUE, type = "std.lv", add.labels = add.labels, add.class = add.class) } else if(what == "vcov.std.nox") { lav_object_inspect_vcov(object, standardized = TRUE, type = "std.nox", add.labels = add.labels, add.class = add.class) } else if(what == "vcov.def") { lav_object_inspect_vcov_def(object, standardized = FALSE, add.labels = add.labels, add.class = add.class) } else if(what == "vcov.def.std.all" || what == "vcov.def.standardized" || what == "vcov.def.std") { lav_object_inspect_vcov_def(object, standardized = TRUE, type = "std.all", add.labels = add.labels, add.class = add.class) } else if(what == "vcov.def.std.lv") { lav_object_inspect_vcov_def(object, standardized = TRUE, type = "std.lv", add.labels = add.labels, add.class = add.class) } else if(what == "vcov.def.std.nox") { lav_object_inspect_vcov_def(object, standardized = TRUE, type = "std.nox", add.labels = add.labels, add.class = add.class) } else if(what == "ugamma" || what == "ug" || what == "u.gamma") { lav_object_inspect_UGamma(object, add.labels = add.labels, add.class = add.class) ### jacobians #### } else if(what == "delta") { lav_object_inspect_delta(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) # post-checking } else if(what == "post.check" || what == "post") { lav_object_post_check(object) # options } else if(what == "options" || what == "lavoptions") { object@Options # call } else if(what == "call") { as.list( object@call ) # timing } else if(what == "timing") { object@timing # optim } else if(what == "optim") { object@optim # test } else if(what == "test") { object@test # zero cell tables } else if(what == "zero.cell.tables") { lav_object_inspect_zero_cell_tables(object, add.labels = add.labels, add.class = add.class, drop.list.single.group = drop.list.single.group) #### not found #### } else { stop("unknown `what' argument in inspect function: `", what, "'") } } # helper functions (mostly to deal with older 'object' that may have # been save somewhere) lav_object_inspect_est <- function(object) { if(class(object) == "lavaan") { # from 0.5-19, they are in the partable if(!is.null(object@ParTable$est)) { OUT <- object@ParTable$est } else if("Fit" %in% slotNames(object)) { # in < 0.5-19, we should look in @Fit@est OUT <- object@Fit@est } else { PT <- parTable(object) OUT <- rep(as.numeric(NA), length(PT$lhs)) } } else { # try coef() OUT <- coef(object, type = "user") if(is.matrix(OUT)) { # lavaanList? OUT <- rowMeans(OUT) } } OUT } lav_object_inspect_se <- function(object) { # from 0.5-19, they are in the partable if(!is.null(object@ParTable$se)) { OUT <- object@ParTable$se } else if("Fit" %in% slotNames(object)) { # in < 0.5-19, we should look in @Fit@se OUT <- object@Fit@se } else { PT <- parTable(object) OUT <- rep(as.numeric(NA), length(PT$lhs)) } OUT } lav_object_inspect_start <- function(object) { # from 0.5-19, they are in the partable if(!is.null(object@ParTable$start)) { OUT <- object@ParTable$start } else { # in < 0.5-19, we should look in @Fit@start OUT <- object@Fit@start } OUT } lav_object_inspect_boot <- function(object) { # from 0.5-19. they are in a separate slot tmp <- try(slot(object,"boot"), silent = TRUE) if(inherits(tmp, "try-error")) { # older version of object? est <- lav_object_inspect_est(object) BOOT <- attr(est, "BOOT.COEF") } else { # 0.5-19 way BOOT <- object@boot$coef } BOOT } lav_object_inspect_modelmatrices <- function(object, what = "free", type = "free", add.labels = FALSE, add.class = FALSE, list.by.group = FALSE, drop.list.single.group = FALSE) { GLIST <- object@Model@GLIST if(what == "dx.free") { DX <- lav_model_gradient(lavmodel = object@Model, GLIST = NULL, lavsamplestats = object@SampleStats, lavdata = object@Data, lavcache = object@Cache, type = "free", verbose = FALSE, forcePD = TRUE, group.weight = TRUE, Delta = NULL) } else if(what == "dx.all") { GLIST <- lav_model_gradient(lavmodel = object@Model, GLIST = NULL, lavsamplestats = object@SampleStats, lavdata = object@Data, lavcache = object@Cache, type = "allofthem", verbose = FALSE, forcePD = TRUE, group.weight = TRUE, Delta = NULL) names(GLIST) <- names(object@Model@GLIST) } else if(what == "std.all") { STD <- standardize.est.all(object) } else if(what == "std.lv") { STD <- standardize.est.lv(object) } else if(what == "std.nox") { STD <- standardize.est.all.nox(object) } for(mm in 1:length(GLIST)) { if(add.labels) { dimnames(GLIST[[mm]]) <- object@Model@dimNames[[mm]] } if(what == "free") { # fill in free parameter counts if(type == "free") { m.el.idx <- object@Model@m.free.idx[[mm]] x.el.idx <- object@Model@x.free.idx[[mm]] #} else if(type == "unco") { # m.el.idx <- object@Model@m.unco.idx[[mm]] # x.el.idx <- object@Model@x.unco.idx[[mm]] } else if(type == "partable") { m.el.idx <- object@Model@m.user.idx[[mm]] x.el.idx <- object@Model@x.user.idx[[mm]] } else { stop("lavaan ERROR: unknown type argument:", type, ) } # erase everything GLIST[[mm]][,] <- 0.0 GLIST[[mm]][m.el.idx] <- x.el.idx } else if(what == "se") { # fill in standard errors m.user.idx <- object@Model@m.user.idx[[mm]] x.user.idx <- object@Model@x.user.idx[[mm]] SE <- lav_object_inspect_se(object) # erase everything GLIST[[mm]][,] <- 0.0 GLIST[[mm]][m.user.idx] <- SE[x.user.idx] } else if(what == "start") { # fill in starting values m.user.idx <- object@Model@m.user.idx[[mm]] x.user.idx <- object@Model@x.user.idx[[mm]] START <- lav_object_inspect_start(object) GLIST[[mm]][m.user.idx] <- START[x.user.idx] } else if(what == "est") { # fill in estimated parameter values m.user.idx <- object@Model@m.user.idx[[mm]] x.user.idx <- object@Model@x.user.idx[[mm]] EST <- lav_object_inspect_est(object) GLIST[[mm]][m.user.idx] <- EST[x.user.idx] } else if(what == "dx.free") { # fill in derivatives free parameters m.el.idx <- object@Model@m.free.idx[[mm]] x.el.idx <- object@Model@x.free.idx[[mm]] # erase everything GLIST[[mm]][,] <- 0.0 GLIST[[mm]][m.el.idx] <- DX[x.el.idx] } else if(what %in% c("std.all", "std.lv", "std.nox")) { m.user.idx <- object@Model@m.user.idx[[mm]] x.user.idx <- object@Model@x.user.idx[[mm]] GLIST[[mm]][m.user.idx] <- STD[x.user.idx] } # class if(add.class) { if(object@Model@isSymmetric[mm]) { class(GLIST[[mm]]) <- c("lavaan.matrix.symmetric", "matrix") } else { class(GLIST[[mm]]) <- c("lavaan.matrix", "matrix") } } } # try to reflect `equality constraints' con.flag <- FALSE if(what == "free" && object@Model@eq.constraints) { # extract constraints from parameter table PT <- parTable(object) CON <- PT[PT$op %in% c("==","<",">") ,c("lhs","op","rhs")] rownames(CON) <- NULL # replace 'labels' by parameter numbers ID <- lav_partable_constraints_label_id(PT) LABEL <- names(ID) for(con in 1:nrow(CON)) { # lhs LHS.labels <- all.vars(as.formula(paste("~",CON[con,"lhs"]))) if(length(LHS.labels) > 0L) { # par id LHS.freeid <- ID[match(LHS.labels, LABEL)] # substitute tmp <- CON[con,"lhs"] for(pat in 1:length(LHS.labels)) { tmp <- sub(LHS.labels[pat], LHS.freeid[pat], tmp) } CON[con,"lhs"] <- tmp } # rhs RHS.labels <- all.vars(as.formula(paste("~",CON[con,"rhs"]))) if(length(RHS.labels) > 0L) { # par id RHS.freeid <- ID[match(RHS.labels, LABEL)] # substitute tmp <- CON[con,"rhs"] for(pat in 1:length(RHS.labels)) { tmp <- sub(RHS.labels[pat], RHS.freeid[pat], tmp) } CON[con,"rhs"] <- tmp } } # con # add this info at the top #GLIST <- c(constraints = list(CON), GLIST) #no, not a good idea, it does not work with list.by.group # add it as a 'header' attribute? attr(CON, "header") <- "Note: model contains equality constraints:" con.flag <- TRUE } # should we group them per group? if(list.by.group) { lavsamplestats <- object@SampleStats lavmodel <- object@Model nmat <- lavmodel@nmat OUT <- vector("list", length = lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] mm.names <- names( GLIST[mm.in.group] ) OUT[[g]] <- GLIST[mm.in.group] } if(lavsamplestats@ngroups == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } } else { OUT <- GLIST } # header if(con.flag) { attr(OUT, "header") <- CON } # lavaan.list if(add.class) { class(OUT) <- c("lavaan.list", "list") } OUT } # - fixme, should we export this function? # - since 0.5-21, conditional.x = TRUE returns residual sample statistics # for ML, we have both joint and residual cov/var/...; but for # categorical = TRUE, we only have residual cov/var...; so, we # only return residual in both cases, whenever residual lav_object_inspect_sampstat <- function(object, h1 = FALSE, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups ov.names <- object@pta$vnames$ov ov.names.res <- object@pta$vnames$ov.nox ov.names.x <- object@pta$vnames$ov.x lavsamplestats <- object@SampleStats OUT <- vector("list", length=G) for(g in 1:G) { if(!object@Model@conditional.x) { # covariance matrix if(h1 && !is.null(lavsamplestats@missing.h1[[g]])) { OUT[[g]]$cov <- lavsamplestats@missing.h1[[g]]$sigma } else { OUT[[g]]$cov <- lavsamplestats@cov[[g]] } if(add.labels && !is.null(OUT[[g]]$cov)) { rownames(OUT[[g]]$cov) <- colnames(OUT[[g]]$cov) <- ov.names[[g]] } if(add.class) { class(OUT[[g]]$cov) <- c("lavaan.matrix.symmetric", "matrix") } # mean vector if(h1 && !is.null(lavsamplestats@missing.h1[[g]])) { OUT[[g]]$mean <- lavsamplestats@missing.h1[[g]]$mu } else { OUT[[g]]$mean <- as.numeric(lavsamplestats@mean[[g]]) } if(add.labels) { names(OUT[[g]]$mean) <- ov.names[[g]] } if(add.class) { class(OUT[[g]]$mean) <- c("lavaan.vector", "numeric") } # thresholds if(object@Model@categorical) { OUT[[g]]$th <- as.numeric(lavsamplestats@th[[g]]) if(length(object@Model@num.idx[[g]]) > 0L) { NUM.idx <- which(object@Model@th.idx[[g]] == 0) OUT[[g]]$th <- OUT[[g]]$th[ -NUM.idx ] } if(add.labels) { names(OUT[[g]]$th) <- object@pta$vnames$th[[g]] } if(add.class) { class(OUT[[g]]$th) <- c("lavaan.vector", "numeric") } } } # !conditional.x else { # if conditional.x = TRUE # residual covariance matrix OUT[[g]]$res.cov <- lavsamplestats@res.cov[[g]] if(add.labels) { rownames(OUT[[g]]$res.cov) <- colnames(OUT[[g]]$res.cov) <- ov.names.res[[g]] } if(add.class) { class(OUT[[g]]$res.cov) <- c("lavaan.matrix.symmetric", "matrix") } # intercepts if(object@Model@conditional.x) { OUT[[g]]$res.int <- as.numeric(lavsamplestats@res.int[[g]]) if(add.labels) { names(OUT[[g]]$res.int) <- ov.names.res[[g]] } if(add.class) { class(OUT[[g]]$res.int) <- c("lavaan.vector", "numeric") } } # thresholds if(object@Model@categorical) { OUT[[g]]$res.th <- as.numeric(lavsamplestats@res.th[[g]]) if(length(object@Model@num.idx[[g]]) > 0L) { NUM.idx <- which(object@Model@th.idx[[g]] == 0) OUT[[g]]$res.th <- OUT[[g]]$res.th[ -NUM.idx ] } if(add.labels) { names(OUT[[g]]$res.th) <- object@pta$vnames$th[[g]] } if(add.class) { class(OUT[[g]]$res.th) <- c("lavaan.vector", "numeric") } } # slopes if(object@Model@nexo > 0L) { OUT[[g]]$res.slopes <- lavsamplestats@res.slopes[[g]] if(add.labels) { rownames(OUT[[g]]$res.slopes) <- ov.names.res[[g]] colnames(OUT[[g]]$res.slopes) <- ov.names.x[[g]] } if(add.class) { class(OUT[[g]]$res.slopes) <- c("lavaan.matrix", "matrix") } } # cov.x if(object@Model@nexo > 0L) { OUT[[g]]$cov.x <- lavsamplestats@cov.x[[g]] if(add.labels) { rownames(OUT[[g]]$cov.x) <- ov.names.x[[g]] colnames(OUT[[g]]$cov.x) <- ov.names.x[[g]] } if(add.class) { class(OUT[[g]]$cov.x) <- c("lavaan.matrix.symmetric", "matrix") } } } # conditional.x # stochastic weights if(object@Model@group.w.free) { # to be consistent with the 'implied' values, # transform so group.w is the 'log(group.freq)' OUT[[g]]$group.w <- log(lavsamplestats@group.w[[g]] * lavsamplestats@ntotal) if(add.labels) { names(OUT[[g]]$group.w) <- "w" } if(add.class) { class(OUT[[g]]$group.w) <- c("lavaan.vector", "numeric") } } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_data <- function(object, add.labels = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups OUT <- object@Data@X if(add.labels) { for(g in 1:G) { colnames(OUT[[g]]) <- object@Data@ov.names[[g]] } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_case_idx <- function(object, drop.list.single.group = FALSE) { G <- object@Data@ngroups OUT <- object@Data@case.idx if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_rsquare <- function(object, est.std.all=NULL, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups OUT <- vector("list", length=G) if(is.null(est.std.all)) { est.std.all <- standardize.est.all(object) } partable <- object@ParTable partable$rsquare <- 1.0 - est.std.all # no values > 1.0 partable$rsquare[partable$rsquare > 1.0] <- as.numeric(NA) for(g in 1:G) { ind.names <- partable$rhs[ which(partable$op == "=~" & partable$group == g) ] eqs.y.names <- partable$lhs[ which(partable$op == "~" & partable$group == g) ] y.names <- unique( c(ind.names, eqs.y.names) ) idx <- which(partable$op == "~~" & partable$lhs %in% y.names & partable$rhs == partable$lhs & partable$group == g) tmp <- partable$rsquare[idx] if(add.labels && length(tmp) > 0L) { names(tmp) <- partable$lhs[idx] } if(add.class) { class(tmp) <- c("lavaan.vector", "numeric") } OUT[[g]] <- tmp } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } # model implied sample stats lav_object_inspect_implied <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups ov.names <- object@pta$vnames$ov ov.names.res <- object@pta$vnames$ov.nox ov.names.x <- object@pta$vnames$ov.x lavimplied <- object@implied OUT <- vector("list", length=G) for(g in 1:G) { if(!object@Model@conditional.x) { # covariance matrix OUT[[g]]$cov <- lavimplied$cov[[g]] if(add.labels && !is.null(OUT[[g]]$cov)) { rownames(OUT[[g]]$cov) <- colnames(OUT[[g]]$cov) <- ov.names[[g]] } if(add.class) { class(OUT[[g]]$cov) <- c("lavaan.matrix.symmetric", "matrix") } # mean vector OUT[[g]]$mean <- as.numeric(lavimplied$mean[[g]]) if(add.labels) { names(OUT[[g]]$mean) <- ov.names[[g]] } if(add.class) { class(OUT[[g]]$mean) <- c("lavaan.vector", "numeric") } # thresholds if(object@Model@categorical) { OUT[[g]]$th <- as.numeric(lavimplied$th[[g]]) if(length(object@Model@num.idx[[g]]) > 0L) { NUM.idx <- which(object@Model@th.idx[[g]] == 0) OUT[[g]]$th <- OUT[[g]]$th[ -NUM.idx ] } if(add.labels) { names(OUT[[g]]$th) <- object@pta$vnames$th[[g]] } if(add.class) { class(OUT[[g]]$th) <- c("lavaan.vector", "numeric") } } } # !conditional.x else { # if conditional.x = TRUE # residual covariance matrix OUT[[g]]$res.cov <- lavimplied$res.cov[[g]] if(add.labels) { rownames(OUT[[g]]$res.cov) <- colnames(OUT[[g]]$res.cov) <- ov.names.res[[g]] } if(add.class) { class(OUT[[g]]$res.cov) <- c("lavaan.matrix.symmetric", "matrix") } # intercepts if(object@Model@conditional.x) { OUT[[g]]$res.int <- as.numeric(lavimplied$res.int[[g]]) if(add.labels) { names(OUT[[g]]$res.int) <- ov.names.res[[g]] } if(add.class) { class(OUT[[g]]$res.int) <- c("lavaan.vector", "numeric") } } # thresholds if(object@Model@categorical) { OUT[[g]]$res.th <- as.numeric(lavimplied$res.th[[g]]) if(length(object@Model@num.idx[[g]]) > 0L) { NUM.idx <- which(object@Model@th.idx[[g]] == 0) OUT[[g]]$res.th <- OUT[[g]]$res.th[ -NUM.idx ] } if(add.labels) { names(OUT[[g]]$res.th) <- object@pta$vnames$th[[g]] } if(add.class) { class(OUT[[g]]$res.th) <- c("lavaan.vector", "numeric") } } # slopes if(object@Model@nexo > 0L) { OUT[[g]]$res.slopes <- lavimplied$res.slopes[[g]] if(add.labels) { rownames(OUT[[g]]$res.slopes) <- ov.names.res[[g]] colnames(OUT[[g]]$res.slopes) <- ov.names.x[[g]] } if(add.class) { class(OUT[[g]]$res.slopes) <- c("lavaan.matrix", "matrix") } } # cov.x if(object@Model@nexo > 0L) { OUT[[g]]$cov.x <- object@SampleStats@cov.x[[g]] if(add.labels) { rownames(OUT[[g]]$cov.x) <- ov.names.x[[g]] colnames(OUT[[g]]$cov.x) <- ov.names.x[[g]] } if(add.class) { class(OUT[[g]]$cov.x) <- c("lavaan.matrix.symmetric", "matrix") } } } # conditional.x # stochastic weights if(object@Model@group.w.free) { OUT[[g]]$group.w <- lavimplied$group.w[[g]] if(add.labels) { names(OUT[[g]]$group.w) <- "w" # somewhat redundant } if(add.class) { class(OUT[[g]]$group.w) <- c("lavaan.vector", "numeric") } } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } # residuals: _inspect_sampstat - _inspect_implied lav_object_inspect_residuals <- function(object, h1 = TRUE, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { # unstandardized residuals obsList <- lav_object_inspect_sampstat(object, h1 = h1, add.labels = add.labels, add.class = FALSE, drop.list.single.group = FALSE) estList <- lav_object_inspect_implied(object, add.labels = add.labels, add.class = FALSE, drop.list.single.group = FALSE) # multiple groups ngroups <- length(obsList) resList <- vector("list", length = ngroups) for(g in 1:ngroups) { if(object@Model@conditional.x) { if(!is.null(estList[[g]]$res.cov)) { resList[[g]]$res.cov <- ( obsList[[g]]$res.cov - estList[[g]]$res.cov ) if(add.class) { class(resList[[g]]$res.cov) <- c("lavaan.matrix.symmetric", "matrix") } } if(!is.null(estList[[g]]$res.int)) { resList[[g]]$res.int <- ( obsList[[g]]$res.int - estList[[g]]$res.int ) if(add.class) { class(resList[[g]]$res.int) <- c("lavaan.vector", "numeric") } } if(!is.null(estList[[g]]$res.th)) { resList[[g]]$res.th <- ( obsList[[g]]$res.th - estList[[g]]$res.th ) if(add.class) { class(resList[[g]]$res.th) <- c("lavaan.vector", "numeric") } } if(!is.null(estList[[g]]$res.slopes)) { resList[[g]]$res.slopes <- ( obsList[[g]]$res.slopes - estList[[g]]$res.slopes ) if(add.class) { class(resList[[g]]$res.slopes) <- c("lavaan.matrix", "matrix") } } if(!is.null(estList[[g]]$cov.x)) { resList[[g]]$cov.x <- ( obsList[[g]]$cov.x - estList[[g]]$cov.x ) if(add.class) { class(resList[[g]]$cov.x) <- c("lavaan.matrix.symmetric", "matrix") } } # unconditional } else { if(!is.null(estList[[g]]$cov)) { resList[[g]]$cov <- ( obsList[[g]]$cov - estList[[g]]$cov ) if(add.class) { class(resList[[g]]$cov) <- c("lavaan.matrix.symmetric", "matrix") } } if(!is.null(estList[[g]]$mean)) { resList[[g]]$mean <- ( obsList[[g]]$mean - estList[[g]]$mean ) if(add.class) { class(resList[[g]]$mean) <- c("lavaan.vector", "numeric") } } if(!is.null(estList[[g]]$th)) { resList[[g]]$th <- ( obsList[[g]]$th - estList[[g]]$th ) if(add.class) { class(resList[[g]]$th) <- c("lavaan.vector", "numeric") } } } # free group.w if(!is.null(estList[[g]]$group.w)) { resList[[g]]$group.w <- ( obsList[[g]]$group.w - estList[[g]]$group.w ) if(add.class) { class(resList[[g]]$group.w) <- c("lavaan.vector", "numeric") } } } OUT <- resList if(ngroups == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_cov_lv <- function(object, correlation.metric = FALSE, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # compute lv covar OUT <- computeVETA(lavmodel = object@Model, lavsamplestats = object@SampleStats, remove.dummy.lv = TRUE) # cor + labels + class for(g in 1:G) { if(correlation.metric && nrow(OUT[[g]]) > 1L) { # note: cov2cor fails if matrix is empty! OUT[[g]] <- cov2cor(OUT[[g]]) } if(add.labels) { colnames(OUT[[g]]) <- rownames(OUT[[g]]) <- object@pta$vnames$lv[[g]] } if(add.class) { class(OUT[[g]]) <- c("lavaan.matrix.symmetric", "matrix") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_mean_lv <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # compute lv means OUT <- computeEETA(lavmodel = object@Model, lavsamplestats = object@SampleStats, remove.dummy.lv = TRUE) OUT <- lapply(OUT, as.numeric) # labels + class for(g in 1:G) { if(add.labels && length(OUT[[g]]) > 0L) { names(OUT[[g]]) <- object@pta$vnames$lv.regular[[g]] } if(add.class) { class(OUT[[g]]) <- c("lavaan.vector", "numeric") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_cov_all <- function(object, correlation.metric = FALSE, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # compute extended model implied covariance matrix (both ov and lv) OUT <- computeCOV(lavmodel = object@Model, lavsamplestats = object@SampleStats, remove.dummy.lv = TRUE) # cor + labels + class for(g in 1:G) { if(correlation.metric && nrow(OUT[[g]]) > 1L) { # note: cov2cor fails if matrix is empty! OUT[[g]] <- cov2cor(OUT[[g]]) } if(add.labels) { NAMES <- c(object@pta$vnames$ov.model[[g]], object@pta$vnames$lv.regular[[g]]) colnames(OUT[[g]]) <- rownames(OUT[[g]]) <- NAMES } if(add.class) { class(OUT[[g]]) <- c("lavaan.matrix.symmetric", "matrix") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_cov_ov <- function(object, correlation.metric = FALSE, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # get model-implied covariance matrix observed if(object@Model@conditional.x) { OUT <- object@implied$res.cov } else { OUT <- object@implied$cov } # cor + labels + class for(g in 1:G) { if(correlation.metric && nrow(OUT[[g]]) > 1L) { # note: cov2cor fails if matrix is empty! OUT[[g]] <- cov2cor(OUT[[g]]) } if(add.labels) { colnames(OUT[[g]]) <- rownames(OUT[[g]]) <- object@pta$vnames$ov.model[[g]] } if(add.class) { class(OUT[[g]]) <- c("lavaan.matrix.symmetric", "matrix") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_mean_ov <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # compute lv means if(object@Model@conditional.x) { OUT <- object@implied$res.int } else { OUT <- object@implied$mean } # make numeric OUT <- lapply(OUT, as.numeric) # labels + class for(g in 1:G) { if(add.labels && length(OUT[[g]]) > 0L) { names(OUT[[g]]) <- object@pta$vnames$ov.model[[g]] } if(add.class) { class(OUT[[g]]) <- c("lavaan.vector", "numeric") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_th <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # thresholds if(object@Model@conditional.x) { OUT <- object@implied$res.th } else { OUT <- object@implied$th } # make numeric OUT <- lapply(OUT, as.numeric) # labels + class for(g in 1:G) { if(length(object@Model@num.idx[[g]]) > 0L) { NUM.idx <- which(object@Model@th.idx[[g]] == 0) OUT[[g]] <- OUT[[g]][ -NUM.idx ] } if(add.labels && length(OUT[[g]]) > 0L) { names(OUT[[g]]) <- object@pta$vnames$th[[g]] } if(add.class) { class(OUT[[g]]) <- c("lavaan.vector", "numeric") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_vy <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # 'unconditional' model-implied variances # - same as diag(Sigma.hat) if all Y are continuous) # - 1.0 (or delta^2) if categorical # - if also Gamma, cov.x is used (only if categorical) OUT <- computeVY(lavmodel = object@Model, GLIST = NULL, lavsamplestats = object@SampleStats, diagonal.only = TRUE) # labels + class for(g in 1:G) { if(add.labels && length(OUT[[g]]) > 0L) { if(object@Model@categorical) { names(OUT[[g]]) <- object@pta$vnames$ov.nox[[g]] } else { names(OUT[[g]]) <- object@pta$vnames$ov[[g]] } } if(add.class) { class(OUT[[g]]) <- c("lavaan.vector", "numeric") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_theta <- function(object, correlation.metric = FALSE, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # get residual covariances OUT <- computeTHETA(lavmodel = object@Model) # labels + class for(g in 1:G) { if(correlation.metric && nrow(OUT[[g]]) > 0L) { OUT[[g]] <- cov2cor(OUT[[g]]) } if(add.labels && length(OUT[[g]]) > 0L) { colnames(OUT[[g]]) <- rownames(OUT[[g]]) <- object@pta$vnames$ov.model[[g]] } if(add.class) { class(OUT[[g]]) <- c("lavaan.matrix.symmetric", "matrix") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_missing_coverage <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # get missing covarage OUT <- vector("list", G) for(g in 1:G) { if(!is.null(object@Data@Mp[[g]])) { OUT[[g]] <- object@Data@Mp[[g]]$coverage } else { nvar <- length(object@Data@ov.names[[g]]) OUT[[g]] <- matrix(1.0, nvar, nvar) } if(add.labels && length(OUT[[g]]) > 0L) { colnames(OUT[[g]]) <- rownames(OUT[[g]]) <- object@pta$vnames$ov.model[[g]] } if(add.class) { class(OUT[[g]]) <- c("lavaan.matrix.symmetric", "matrix") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_missing_patterns <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups # get missing covarage OUT <- vector("list", G) for(g in 1:G) { if(!is.null(object@Data@Mp[[g]])) { OUT[[g]] <- object@Data@Mp[[g]]$pat } else { nvar <- length(object@Data@ov.names[[g]]) OUT[[g]] <- matrix(TRUE, 1L, nvar) rownames(OUT[[g]]) <- object@Data@nobs[[g]] } if(add.labels && length(OUT[[g]]) > 0L) { colnames(OUT[[g]]) <- object@pta$vnames$ov.model[[g]] } if(add.class) { class(OUT[[g]]) <- c("lavaan.matrix", "matrix") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_empty_idx <- function(object, drop.list.single.group = FALSE) { G <- object@Data@ngroups # get empty idx OUT <- vector("list", G) for(g in 1:G) { if(!is.null(object@Data@Mp[[g]])) { OUT[[g]] <- object@Data@Mp[[g]]$empty.idx } else { OUT[[g]] <- integer(0L) } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_wls_est <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups OUT <- lav_model_wls_est(object@Model) #, #cov.x = object@SampleStats@cov.x) for(g in 1:G) { if(add.labels && length(OUT[[g]]) > 0L) { #FIXME!!!! #names(OUT[[g]]) <- ?? } if(add.class) { class(OUT[[g]]) <- c("lavaan.vector", "numeric") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_wls_obs <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { G <- object@Data@ngroups OUT <- object@SampleStats@WLS.obs for(g in 1:G) { if(add.labels && length(OUT[[g]]) > 0L) { #FIXME!!!! #names(OUT[[g]]) <- ?? } if(add.class) { class(OUT[[g]]) <- c("lavaan.vector", "numeric") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_wls_v <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { # shortcuts G <- object@Data@ngroups OUT <- lav_model_wls_v(lavmodel = object@Model, lavsamplestats = object@SampleStats, structured = TRUE, lavdata = object@Data) # if estimator == "DWLS" or "ULS", we only stored the diagonal # hence, we create a full matrix here if(object@Options$estimator %in% c("DWLS", "ULS")) { OUT <- lapply(OUT, function(x) { nr = NROW(x); diag(x, nrow=nr, ncol=nr) }) } # label + class for(g in 1:G) { if(add.labels && nrow(OUT[[g]]) > 0L) { #FIXME!!!! #names(OUT[[g]]) <- ?? } if(add.class) { class(OUT[[g]]) <- c("lavaan.matrix", "matrix") } } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_sampstat_gamma <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { # shortcuts G <- object@Data@ngroups if(!is.null(object@SampleStats@NACOV[[1]])) { OUT <- object@SampleStats@NACOV } else { OUT <- lavGamma(object) } if(G == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_gradient <- function(object, add.labels = FALSE, add.class = FALSE) { if(object@SampleStats@missing.flag || object@Options$estimator == "PML") { group.weight <- FALSE } else { group.weight <- TRUE } OUT <- lav_model_gradient(lavmodel = object@Model, GLIST = NULL, lavsamplestats = object@SampleStats, lavdata = object@Data, lavcache = object@Cache, type = "free", verbose = FALSE, group.weight = group.weight) # labels if(add.labels) { names(OUT) <- lav_partable_labels(object@ParTable, type="free") } # class if(add.class) { class(OUT) <- c("lavaan.vector", "numeric") } OUT } lav_object_inspect_hessian <- function(object, add.labels = FALSE, add.class = FALSE) { OUT <- lav_model_hessian(lavmodel = object@Model, lavsamplestats = object@SampleStats, lavdata = object@Data, lavcache = object@Cache, group.weight = TRUE) # labels if(add.labels) { colnames(OUT) <- rownames(OUT) <- lav_partable_labels(object@ParTable, type="free") } # class if(add.class) { class(OUT) <- c("lavaan.matrix.symmetric", "matrix") } OUT } lav_object_inspect_information <- function(object, information = "default", augmented = FALSE, inverted = FALSE, add.labels = FALSE, add.class = FALSE) { if(information == "default") { information <- object@Options$information } if(information == "expected" || information == "observed") { OUT <- lav_model_information(lavmodel = object@Model, lavsamplestats = object@SampleStats, lavdata = object@Data, lavcache = object@Cache, information = information, augmented = augmented, inverted = inverted) } else if(information == "first.order") { B0 <- lav_model_information_firstorder(lavmodel = object@Model, lavsamplestats = object@SampleStats, lavdata = object@Data, lavcache = object@Cache, check.pd = FALSE, augmented = augmented, inverted = inverted) attr(B0, "B0.group") <- NULL OUT <- B0 } # labels if(add.labels) { NAMES <- lav_partable_labels(object@ParTable, type="free") if(augmented) { nExtra <- nrow(OUT) - length(NAMES) if(nExtra > 0L) { NAMES <- c(NAMES, paste("aug", 1:nExtra, sep="")) } } colnames(OUT) <- rownames(OUT) <- NAMES } # class if(add.class) { class(OUT) <- c("lavaan.matrix.symmetric", "matrix") } OUT } # only to provide a direct function to the old 'getVariability()' function lav_object_inspect_firstorder <- function(object, add.labels = FALSE, add.class = FALSE) { B0 <- lav_model_information_firstorder(lavmodel = object@Model, lavsamplestats = object@SampleStats, lavdata = object@Data, lavcache = object@Cache, check.pd = FALSE, augmented = FALSE, inverted = FALSE) attr(B0, "B0.group") <- NULL OUT <- B0 # labels if(add.labels) { colnames(OUT) <- rownames(OUT) <- lav_partable_labels(object@ParTable, type="free") } # class if(add.class) { class(OUT) <- c("lavaan.matrix.symmetric", "matrix") } OUT } lav_object_inspect_vcov <- function(object, standardized = FALSE, type = "std.all", free.only = TRUE, add.labels = FALSE, add.class = FALSE, remove.duplicated = FALSE) { npar <- max(object@ParTable$free) if(object@optim$npar == 0) { OUT <- matrix(0,0,0) } else { # check if we already have it tmp <- try(slot(object, "vcov"), silent = TRUE) if(!inherits(tmp, "try-error") && !is.null(object@vcov$vcov)) { OUT <- object@vcov$vcov } else { # compute it again OUT <- lav_model_vcov(lavmodel = object@Model, lavsamplestats = object@SampleStats, lavoptions = object@Options, lavdata = object@Data, lavcache = object@Cache ) } } # strip attributes attr(OUT, "E.inv") <- NULL attr(OUT, "B0") <- NULL attr(OUT, "B0.group") <- NULL attr(OUT, "Delta") <- NULL attr(OUT, "WLS.V") <- NULL attr(OUT, "BOOT.COEF") <- NULL attr(OUT, "BOOT.TEST") <- NULL # standardized? if(standardized) { if(type == "std.lv") { JAC <- try(lav_func_jacobian_complex(func = standardize.est.lv.x, x = object@optim$x, lavobject = object), silent = TRUE) if(inherits(JAC, "try-error")) { # eg. pnorm() JAC <- lav_func_jacobian_simple(func = standardize.est.lv.x, x = object@optim$x, lavobject = object) } } else if(type == "std.all") { JAC <- try(lav_func_jacobian_complex(func = standardize.est.all.x, x = object@optim$x, object = object), silent = TRUE) if(inherits(JAC, "try-error")) { # eg. pnorm() JAC <- lav_func_jacobian_simple(func = standardize.est.all.x, x = object@optim$x, lavobject = object) } } else if(type == "std.nox") { JAC <- try(lav_func_jacobian_complex(func = standardize.est.all.nox.x, x = object@optim$x, lavobject = object), silent = TRUE) if(inherits(JAC, "try-error")) { # eg. pnorm() JAC <- lav_func_jacobian_simple(func = standardize.est.all.nox.x, x = object@optim$x, lavobject = object) } } # JAC contains *all* parameters in the parameter table if(free.only) { free.idx <- which(object@ParTable$free > 0L) JAC <- JAC[free.idx,, drop = FALSE] } OUT <- JAC %*% OUT %*% t(JAC) } # labels if(add.labels) { colnames(OUT) <- rownames(OUT) <- lav_partable_labels(object@ParTable, type="free") } # alias? if(remove.duplicated && object@Model@eq.constraints) { simple.flag <- lav_constraints_check_simple(object@Model) if(simple.flag) { LAB <- lav_partable_labels(object@ParTable, type="free") dup.flag <- duplicated(LAB) OUT <- OUT[!dup.flag, !dup.flag, drop = FALSE] } else { warning("lavaan WARNING: alias is TRUE, but equality constraints do not appear to be simple; returning full vcov") } } # class if(add.class) { class(OUT) <- c("lavaan.matrix.symmetric", "matrix") } OUT } lav_object_inspect_vcov_def <- function(object, standardized = FALSE, type = "std.all", add.labels = FALSE, add.class = FALSE) { lavmodel <- object@Model lavpartable <- object@ParTable def.idx <- which(lavpartable$op == ":=") if(length(def.idx) == 0L) { return( matrix(0,0,0) ) } if(standardized) { # compute VCOV for "free" parameters only VCOV <- lav_object_inspect_vcov(object, standardized = TRUE, type = type, free.only = FALSE, add.labels = FALSE, add.class = FALSE) OUT <- VCOV[def.idx, def.idx, drop = FALSE] } else { # get free parameters x <- lav_model_get_parameters(lavmodel, type = "free") # bootstrap or not? if(!is.null(object@boot$coef)) { BOOT <- object@boot$coef BOOT.def <- apply(BOOT, 1L, lavmodel@def.function) if(length(def.idx) == 1L) { BOOT.def <- as.matrix(BOOT.def) } else { BOOT.def <- t(BOOT.def) } OUT <- cov(BOOT.def) } else { # VCOV VCOV <- lav_object_inspect_vcov(object, standardized = FALSE, type = type, free.only = TRUE, add.labels = FALSE, add.class = FALSE) # regular delta method JAC <- try(lav_func_jacobian_complex(func = lavmodel@def.function, x = x), silent=TRUE) if(inherits(JAC, "try-error")) { # eg. pnorm() JAC <- lav_func_jacobian_simple(func = lavmodel@def.function, x = x) } OUT <- JAC %*% VCOV %*% t(JAC) } } # labels if(add.labels) { LHS.names <- lavpartable$lhs[def.idx] colnames(OUT) <- rownames(OUT) <- LHS.names } # class if(add.class) { class(OUT) <- c("lavaan.matrix.symmetric", "matrix") } OUT } lav_object_inspect_UGamma <- function(object, add.labels = FALSE, add.class = FALSE) { out <- lav_test_satorra_bentler(lavobject = object, return.ugamma = TRUE) OUT <- out$UGamma # labels if(add.labels) { # colnames(OUT) <- rownames(OUT) <- } # class if(add.class) { class(OUT) <- c("lavaan.matrix.symmetric", "matrix") } OUT } # Delta (jacobian: d samplestats / d free_parameters) lav_object_inspect_delta <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { OUT <- computeDelta(object@Model) # labels lavmodel <- object@Model categorical <- lavmodel@categorical conditional.x <- lavmodel@conditional.x group.w.free <- lavmodel@group.w.free nvar <- lavmodel@nvar num.idx <- lavmodel@num.idx th.idx <- lavmodel@th.idx nexo <- lavmodel@nexo nblocks <- lavmodel@nblocks if(add.labels) { PNAMES <- lav_partable_labels(object@ParTable, type="free") for(g in 1:nblocks) { colnames(OUT[[g]]) <- PNAMES if(conditional.x) { ov.names <- object@pta$vnames$ov.nox[[g]] } else { ov.names <- object@pta$vnames$ov[[g]] } ov.names.x <- object@pta$vnames$ov.x[[g]] nvar <- length(ov.names) names.cov <- names.cor <- names.var <- character(0L) names.mu <- names.pi <- names.th <- character(0L) names.gw <- character(0L) # Sigma # - if continuous: vech(Sigma) # - if categorical: first numeric variances, then tmp <- apply(expand.grid(ov.names, ov.names), 1L, paste, collapse = "~~") if(categorical) { names.cor <- tmp[lav_matrix_vech_idx(nvar, diagonal = FALSE)] names.var <- tmp[lav_matrix_diag_idx(nvar)[num.idx[[g]]]] } else { names.cov <- tmp[lav_matrix_vech_idx(nvar, diagonal = TRUE)] } # Mu if(!categorical && lavmodel@meanstructure) { names.mu <- paste(ov.names, "~1", sep = "") } # Pi if(conditional.x && lavmodel@nexo[g] > 0L) { names.pi <- apply(expand.grid(ov.names, ov.names.x), 1L, paste, collapse = "~") } # th if(categorical) { names.th <- object@pta$vnames$th[[g]] # interweave numeric intercepts, if any if(length(num.idx[[g]]) > 0L) { tmp <- character( length(th.idx[[g]]) ) tmp[ th.idx[[g]] > 0 ] <- names.th tmp[ th.idx[[g]] == 0 ] <- paste(ov.names[ num.idx[[g]] ], "~1", sep = "") names.th <- tmp } } # gw if(group.w.free) { names.gw <- "w" } rownames(OUT[[g]]) <- c(names.gw, names.th, names.mu, names.pi, names.cov, names.var, names.cor) # class if(add.class) { class(OUT[[g]]) <- c("lavaan.matrix", "matrix") } } # g } # labels if(nblocks == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } OUT } lav_object_inspect_zero_cell_tables <- function(object, add.labels = FALSE, add.class = FALSE, drop.list.single.group = FALSE) { # categorical? if(!object@Model@categorical) { warning("lavaan WARNING: no categorical variables in fitted model") return(invisible(list())) } lavdata <- object@Data # create 2-way tables TABLE <- lavTables(object, dimension = 2L, output = "data.frame", statistic = NULL) # select tables with empty cells empty.id <- TABLE$id[which(TABLE$obs.freq == 0)] if(length(empty.id) == 0L) { # only when lavInspect() is used, give message if(add.class) { cat("(There are no tables with empty cells for this fitted model)\n") } return(invisible(list())) } else { OUT <- lav_tables_cells_format(TABLE[TABLE$id %in% empty.id,], lavdata = lavdata, drop.list.single.group = drop.list.single.group) } OUT } lavaan/R/lav_test_score.R0000644000176200001440000002050013052610721015046 0ustar liggesusers# classic score test (= Lagrange Multiplier test) # # this function can run in two modes: # # MODE 1: 'add' # add new parameters that are currently not included in de model # (aka fixed to zero), but should be released # # MODE 2: 'release' (the default) # release existing "==" constraints # lavTestScore <- function(object, add = NULL, release = NULL, univariate = TRUE, cumulative = FALSE, epc = FALSE, verbose = FALSE, warn = TRUE) { # check object stopifnot(inherits(object, "lavaan")) lavoptions <- object@Options if(object@optim$npar > 0L && !object@optim$converged) { stop("lavaan ERROR: model did not converge") } # check for inequality constraints PT <- object@ParTable if(any(PT$op == ">" | PT$op == "<")) { stop("lavaan ERROR: lavTestScore() does not handle inequality constraints (yet)") } # check arguments if(cumulative) { univariate <- TRUE } # Mode 1: ADDING new parameters if(!is.null(add) && nchar(add) > 0L) { # check release argument if(!is.null(release)) { stop("lavaan ERROR: `add' and `release' arguments can be used together.") } # extend model with extra set of parameters FIT <- lav_object_extended(object, add = add) score <- lavTech(FIT, "gradient") information <- lavTech(FIT, "information.expected") npar <- object@Model@nx.free nadd <- FIT@Model@nx.free - npar # R R.model <- object@Model@con.jac[,,drop = FALSE] if(nrow(R.model) > 0L) { R.model <- cbind(R.model, matrix(0, nrow(R.model), ncol = nadd)) R.add <- cbind(matrix(0, nrow = nadd, ncol = npar), diag(nadd)) R <- rbind(R.model, R.add) Z <- cbind(rbind(information, R.model), rbind(t(R.model),matrix(0,nrow(R.model),nrow(R.model)))) Z.plus <- MASS::ginv(Z) J.inv <- Z.plus[ 1:nrow(information), 1:nrow(information) ] r.idx <- seq_len(nadd) + nrow(R.model) } else { R <- cbind(matrix(0, nrow = nadd, ncol = npar), diag(nadd)) J.inv <- MASS::ginv(information) r.idx <- seq_len(nadd) } # lhs/rhs lhs <- lav_partable_labels(FIT@ParTable)[ FIT@ParTable$user == 10L ] op <- rep("==", nadd) rhs <- rep("0", nadd) Table <- data.frame(lhs = lhs, op = op, rhs = rhs) class(Table) <- c("lavaan.data.frame", "data.frame") } else { # MODE 2: releasing constraints R <- object@Model@con.jac[,,drop = FALSE] if(nrow(R) == 0L) { stop("lavaan ERROR: no equality constraints found in model.") } score <- lavTech(object, "gradient") information <- lavTech(object, "information.expected") J.inv <- MASS::ginv(information) #R <- object@Model@con.jac[,] if(is.null(release)) { # ALL constraints r.idx <- seq_len( nrow(R) ) } else if(is.numeric(release)) { r.idx <- release if(max(r.idx) > nrow(R)) { stop("lavaan ERROR: maximum constraint number (", max(r.idx), ") is larger than number of constraints (", nrow(R), ")") } # neutralize the non-needed constraints R1 <- R[-r.idx,,drop = FALSE] Z1 <- cbind( rbind(information, R1), rbind(t(R1), matrix(0,nrow(R1),nrow(R1))) ) Z1.plus <- MASS::ginv(Z1) J.inv <- Z1.plus[ 1:nrow(information), 1:nrow(information) ] } else if(is.character(release)) { stop("not implemented yet") } # lhs/rhs eq.idx <- which(object@ParTable$op == "==") if(length(eq.idx) > 0L) { lhs <- object@ParTable$lhs[eq.idx][r.idx] op <- rep("==", length(r.idx)) rhs <- object@ParTable$rhs[eq.idx][r.idx] } Table <- data.frame(lhs = lhs, op = op, rhs = rhs) class(Table) <- c("lavaan.data.frame", "data.frame") } N <- nobs(object) if(lavoptions$mimic == "EQS") { N <- N - 1 } if(lavoptions$se == "standard") { stat <- as.numeric(N * score %*% J.inv %*% score) } else { # generalized score test if(warn) { warning("lavaan WARNING: se is not `standard'; not implemented yet; falling back to ordinary score test") } # NOTE!!! # we can NOT use VCOV here, because it reflects the constraints, # and the whole point is to test for these constraints... stat <- as.numeric(N * score %*% J.inv %*% score) } # compute df, taking into account that some of the constraints may # be needed to identify the model (and hence information is singular) # information.plus <- information + crossprod(R) #df <- qr(R[r.idx,,drop = FALSE])$rank + # ( qr(information)$rank - qr(information.plus)$rank ) df <- nrow( R[r.idx,,drop = FALSE] ) pvalue <- 1 - pchisq(stat, df=df) # total score test TEST <- data.frame(test = "score", X2 = stat, df = df, p.value = pvalue) class(TEST) <- c("lavaan.data.frame", "data.frame") attr(TEST, "header") <- "total score test:" OUT <- list(test = TEST) if(univariate) { TS <- numeric( nrow(R) ) for(r in r.idx) { R1 <- R[-r,,drop = FALSE] Z1 <- cbind( rbind(information, R1), rbind(t(R1), matrix(0,nrow(R1),nrow(R1))) ) Z1.plus <- MASS::ginv(Z1) Z1.plus1 <- Z1.plus[ 1:nrow(information), 1:nrow(information) ] TS[r] <- as.numeric(N * t(score) %*% Z1.plus1 %*% score) } Table2 <- Table Table2$X2 <- TS[r.idx] Table2$df <- rep(1, length(r.idx)) Table2$p.value <- 1 - pchisq(Table2$X2, df = Table2$df) attr(Table2, "header") <- "univariate score tests:" OUT$uni <- Table2 } if(cumulative) { TS.order <- sort.int(TS, index.return = TRUE, decreasing = TRUE)$ix TS <- numeric( length(r.idx) ) for(r in 1:length(r.idx)) { rcumul.idx <- TS.order[1:r] R1 <- R[-rcumul.idx,,drop = FALSE] Z1 <- cbind( rbind(information, R1), rbind(t(R1), matrix(0,nrow(R1),nrow(R1))) ) Z1.plus <- MASS::ginv(Z1) Z1.plus1 <- Z1.plus[ 1:nrow(information), 1:nrow(information) ] TS[r] <- as.numeric(N * t(score) %*% Z1.plus1 %*% score) } Table3 <- Table Table3$X2 <- TS Table3$df <- seq_len( length(TS) ) Table3$p.value <- 1 - pchisq(Table3$X2, df = Table3$df) attr(Table3, "header") <- "cumulative score tests:" OUT$cumulative <- Table3 } if(epc) { #EPC <- vector("list", length = length(r.idx)) #for(i in 1:length(r.idx)) { # r <- r.idx[i] # R1 <- R[-r,,drop = FALSE] # Z1 <- cbind( rbind(information, R1), # rbind(t(R1), matrix(0,nrow(R1),nrow(R1))) ) # Z1.plus <- MASS::ginv(Z1) # Z1.plus1 <- Z1.plus[ 1:nrow(information), 1:nrow(information) ] # EPC[[i]] <- -1 * as.numeric(score %*% Z1.plus1) #} # #OUT$EPC <- EPC # alltogether R1 <- R[-r.idx,,drop = FALSE] Z1 <- cbind( rbind(information, R1), rbind(t(R1), matrix(0,nrow(R1),nrow(R1))) ) Z1.plus <- MASS::ginv(Z1) Z1.plus1 <- Z1.plus[ 1:nrow(information), 1:nrow(information) ] EPC.all <- -1 * as.numeric(score %*% Z1.plus1) # create epc table for the 'free' parameters LIST <- parTable(object) LIST <- LIST[,c("lhs","op","rhs","group","free","label","plabel")] if(lav_partable_ngroups(LIST) == 1L) { LIST$group <- NULL } nonpar.idx <- which(LIST$op %in% c("==", ":=", "<", ">")) if(length(nonpar.idx) > 0L) { LIST <- LIST[-nonpar.idx,] } LIST$est[ LIST$free > 0 ] <- coef(object) LIST$epc <- rep(as.numeric(NA), length(LIST$lhs)) LIST$epc[ LIST$free > 0 ] <- EPC.all LIST$epv <- LIST$est + LIST$epc attr(LIST, "header") <- "expected parameter changes (epc) and expected parameter values (epv):" OUT$epc <- LIST } OUT } lavaan/R/lav_representation.R0000644000176200001440000000265413052526713015757 0ustar liggesusers# user visible function to add 'matrix' entries in the parameter table lavMatrixRepresentation <- function(partable, representation = "LISREL", add.attributes = FALSE, as.data.frame. = TRUE) { # check parameter table partable <- lav_partable_complete(partable) # get model matrices if(representation == "LISREL") { REP <- representation.LISREL(partable, target = NULL, extra = add.attributes) } else { stop("lavaan ERROR: only representation \"LISREL\" has been implemented.") } partable$mat <- REP$mat partable$row <- REP$row partable$col <- REP$col if(as.data.frame.) { partable <- as.data.frame(partable, stringsAsFactors=FALSE) class(partable) <- c("lavaan.data.frame", "data.frame") } if(add.attributes) { attr(partable, "ov.dummy.names.nox") <- attr(REP, "ov.dummy.names.nox") attr(partable, "ov.dummy.names.x") <- attr(REP, "ov.dummy.names.x") attr(partable, "mmNames") <- attr(REP, "mmNames") attr(partable, "mmNumber") <- attr(REP, "mmNumber") attr(partable, "mmRows") <- attr(REP, "mmRows") attr(partable, "mmCols") <- attr(REP, "mmCols") attr(partable, "mmDimNames") <- attr(REP, "mmDimNames") attr(partable, "mmSymmetric") <- attr(REP, "mmSymmetric") } partable } lavaan/R/lav_samplestats_step1.R0000644000176200001440000001041112631330346016354 0ustar liggesuserslav_samplestats_step1 <- function(Y, ov.names = NULL, ov.types = NULL, ov.levels = NULL, ov.names.x = character(0L), eXo = NULL, scores.flag = TRUE, # scores? group = 1L) { # for error message # just in case Y is a vector Y <- as.matrix(Y) nvar <- NCOL(Y); N <- NROW(Y) nTH <- ov.levels - 1L; nTH[nTH == -1L] <- 1L nth <- sum(nTH) th.end.idx <- cumsum(nTH); th.start.idx <- th.end.idx - (nTH - 1L) # variable types; default = numeric nexo <- length(ov.names.x) if(nexo > 0L) stopifnot(NCOL(eXo) == nexo) # means/thresholds/intercepts, slopes, variances TH <- vector("list", length=nvar) TH.NOX <- vector("list", length=nvar) TH.NAMES <- vector("list", length=nvar) TH.IDX <- vector("list", length=nvar) SLOPES <- matrix(as.numeric(NA), nrow=nvar, ncol=nexo) # if conditional.x VAR <- numeric(length=nvar) # continuous variables only # SCORES SC.VAR <- matrix(0, N, nvar) SC.SL <- matrix(0, N, nvar*nexo) SC.TH <- matrix(0, N, nth) # fitted objects FIT <- vector("list", length=nvar) # stage one - TH/SLOPES/VAR only for(i in 1:nvar) { th.idx <- th.start.idx[i]:th.end.idx[i] sl.idx <- seq(i, by=nvar, length.out=nexo) if(ov.types[i] == "numeric") { fit <- lavOLS(y=Y[,i], X=eXo) if( any(is.na(fit$theta)) ) { stop("lavaan ERROR: linear regression failed for ",ov.names[i], "; X may not be of full rank in group ", group) } FIT[[i]] <- fit # compute mean and variance TH[[i]] <- TH.NOX[[i]] <- unname(fit$theta[1L]) VAR[i] <- unname(fit$theta[fit$npar]) TH.NAMES[[i]] <- ov.names[i]; TH.IDX[[i]] <- 0L if(scores.flag) { scores <- fit$scores() SC.TH[,th.idx] <- scores[,1L] SC.VAR[,i] <- scores[,fit$npar] } if(nexo > 0L) { SLOPES[i,] <- fit$theta[-c(1L, fit$npar)] if(scores.flag) { SC.SL[,sl.idx] <- scores[,-c(1L, fit$npar),drop=FALSE] } TH.NOX[[i]] <- mean(Y[,i], na.rm=TRUE) } } else if(ov.types[i] == "ordered") { # check if we have enough categories in this group # FIXME: should we more tolerant here??? y.freq <- tabulate(Y[,i], nbins=ov.levels[i]) if(length(y.freq) != ov.levels[i]) stop("lavaan ERROR: variable ", ov.names[i], " has fewer categories (", length(y.freq), ") than expected (", ov.levels[i], ") in group ", group) if(any(y.freq == 0L)) stop("lavaan ERROR: some categories of variable `", ov.names[i], "' are empty in group ", group, "; frequencies are [", paste(y.freq, collapse=" "), "]") fit <- lavProbit(y=Y[,i], X=eXo) if( any(is.na(fit$theta)) ) { stop("lavaan ERROR: probit regression failed for ",ov.names[i], "; X may not be of full rank in group ", group) } FIT[[i]] <- fit TH[[i]] <- unname(fit$theta[fit$th.idx]) TH.NOX[[i]] <- pc_th(Y=Y[,i]) if(scores.flag) { scores <- fit$scores() SC.TH[,th.idx] <- scores[,fit$th.idx,drop=FALSE] } SLOPES[i,] <- fit$theta[fit$slope.idx] if(scores.flag) { SC.SL[,sl.idx] <- scores[,fit$slope.idx,drop=FALSE] } VAR[i] <- 1.0 TH.NAMES[[i]] <- paste(ov.names[i], "|t", 1:length(TH[[i]]), sep="") TH.IDX[[i]] <- rep(i, length(TH[[i]])) } else { stop("lavaan ERROR: unknown ov.types:", ov.types[i]) } } list(FIT = FIT, VAR = VAR, SLOPES = SLOPES, TH = TH, TH.NOX = TH.NOX, TH.IDX = TH.IDX, TH.NAMES = TH.NAMES, SC.TH = SC.TH, SC.VAR = SC.VAR, SC.SL = SC.SL, th.start.idx = th.start.idx, th.end.idx = th.end.idx) } lavaan/R/lav_export_mplus.R0000644000176200001440000002107413052610235015444 0ustar liggesusers# export to Mplus syntax lav2mplus <- function(lav, group.label=NULL) { lav <- lav2check(lav) header <- " ! this model syntax is autogenerated by lavExport\n" footer <- "\n" lav <- as.data.frame(lav, stringsAsFactors=FALSE) ngroups <- lav_partable_ngroups(lav) lav_one_group <- function(lav) { # mplus does not like variable names with a 'dot' # replace them by an underscore '_' lav$lhs <- gsub("\\.", "_", lav$lhs) lav$rhs <- gsub("\\.", "_", lav$rhs) # remove contraints (:=, <, >, ==) here con.idx <- which(lav$op %in% c(":=", "<",">","==")) if(length(con.idx) > 0L) { lav <- lav[-con.idx,] } # remove exogenous variances/covariances/intercepts... exo.idx <- which(lav$exo == 1L) if(length(exo.idx)) { lav <- lav[-exo.idx,] } # remove intercepts for categorical variables ord.names <- unique(lav$lhs[ lav$op == "|" ]) ord.int.idx <- which(lav$op == "~1" & lav$lhs %in% ord.names) if(length(ord.int.idx)) { lav <- lav[-ord.int.idx,] } # end of line lav$eol <- rep(";", length(lav$lhs)) lav$ustart <- ifelse(is.na(lav$ustart), "", lav$ustart) lav$rhs2 <- ifelse(lav$free == 0L, paste("@",lav$ustart,sep=""), paste("*",lav$ustart,sep="")) lav$plabel <- gsub("\\.", "", lav$plabel) lav$plabel <- ifelse(lav$plabel == "", lav$plabel, paste(" (",lav$plabel,")",sep="")) # remove variances for ordered variables ov.names.ord <- vnames(lav, type="ov.ord") ord.idx <- which(lav$lhs %in% ov.names.ord & lav$op == "~~" & lav$free == 0L & lav$lhs == lav$rhs) lav$lhs[ord.idx] <- paste("! ", lav$lhs[ord.idx], sep="") lav$op[ord.idx] <- "" lav$rhs[ord.idx] <- "" # variances var.idx <- which(lav$op == "~~" & lav$rhs == lav$lhs) lav$op[var.idx] <- "" lav$rhs[var.idx] <- "" # scaling factors scal.idx <- which(lav$op == "~*~") lav$op[scal.idx] <- "" lav$rhs2[scal.idx] <- paste(lav$rhs2[scal.idx],"}",sep="") lav$lhs[scal.idx] <- "{" # intercepts - excluding categorical observed int.idx <- which(lav$op == "~1") lav$op[int.idx] <- "" lav$rhs2[int.idx] <- paste(lav$rhs2[int.idx],"]",sep="") lav$lhs[int.idx] <- paste("[", lav$lhs[int.idx],sep="") # thresholds th.idx <- which(lav$op == "|") lav$op[th.idx] <- "$" lav$rhs[th.idx] <- gsub("t", "", x=lav$rhs[th.idx]) lav$rhs2[th.idx] <- paste(lav$rhs2[th.idx],"]",sep="") lav$lhs[th.idx] <- paste("[", lav$lhs[th.idx],sep="") # replace binary operators lav$op <- ifelse(lav$op == "=~", " BY ", lav$op) lav$op <- ifelse(lav$op == "~", " ON ", lav$op) lav$op <- ifelse(lav$op == "~~", " WITH ", lav$op) lav2 <- paste(lav$lhs, lav$op, lav$rhs, lav$rhs2, lav$plabel, lav$eol, sep="") body <- paste(" ", lav2, collapse="\n") body } if(ngroups == 1L) { body <- lav_one_group(lav) } else { # group 1 body <- lav_one_group(lav[lav$group == 1,]) if(is.null(group.label) || length(group.label) == 0L) { group.label <- paste(1:ngroups) } for(g in 2:ngroups) { body <- paste(body, paste("\nMODEL ", group.label[g], ":\n", sep=""), lav_one_group(lav[lav$group == g,]), sep="") } } # constraints go to a 'MODEL CONSTRAINTS' block con.idx <- which(lav$op %in% c(":=", "<",">","==")) if(length(con.idx) > 0L) { ### FIXME: we need to convert the operator ### eg b^2 --> b**2, others?? lav$lhs[con.idx] <- gsub("\\^","**",lav$lhs[con.idx]) lav$rhs[con.idx] <- gsub("\\^","**",lav$rhs[con.idx]) constraints <- "\nMODEL CONSTRAINT:\n" # define 'new' variables def.idx <- which(lav$op == ":=") if(length(def.idx) > 0L) { def <- paste(lav$lhs[def.idx], collapse= " ") constraints <- paste(constraints, "NEW (", def, ");") lav$op[def.idx] <- "=" } # replace '==' by '=' eq.idx <- which(lav$op == "==") if(length(eq.idx) > 0L) { lav$op[eq.idx] <- "=" } con <- paste(gsub("\\.","",lav$lhs[con.idx]), " ", lav$op[con.idx], " ", gsub("\\.","",lav$rhs[con.idx]), ";", sep="") con2 <- paste(" ", con, collapse="\n") constraints <- paste(constraints, con2, sep="\n") } else { constraints <- "" } out <- paste(header, body, constraints, footer, sep="") class(out) <- c("lavaan.character", "character") out } # helper functions lav_mplus_estimator <- function(object) { estimator <- object@Options$estimator if(estimator == "DWLS") { estimator <- "WLS" } if(estimator == "ML") { if(object@Options$test == "yuan.bentler") { estimator <- "MLR" } else if(object@Options$test == "satorra.bentler") { estimator <- "MLM" } else if(object@Options$test == "scaled.shifted") { estimator <- "MLMV" } else if(object@Options$se == "first.order") { estimator <- "MLF" } } else if(estimator %in% c("ULS","WLS")) { if(object@Options$test == "satorra.bentler") { estimator <- paste(estimator, "M", sep="") } else if(object@Options$test == "scaled.shifted") { estimator <- paste(estimator, "MV", sep="") } } else if(estimator == "MML") { estimator <- "ML" } estimator } lav_mplus_header <- function(data.file=NULL, group.label="", ov.names="", ov.ord.names="", estimator="ML", data.type="full", nobs=NULL) { # replace '.' by '_' in all variable names ov.names <- gsub("\\.", "_", ov.names) ov.ord.names <- gsub("\\.", "_", ov.ord.names) ### FIXME!! ### this is old code from lavaan 0.3-1 ### surely, this can be done better... # TITLE command c.TITLE <- "TITLE:\n" c.TITLE <- paste(c.TITLE, " [This syntax is autogenerated by lavExport]\n") # DATA command c.DATA <- "DATA:\n" ngroups <- length(data.file) if(ngroups == 1L) { c.DATA <- paste(c.DATA, " file is ", data.file, ";\n", sep="") } else { for(g in 1:ngroups) { c.DATA <- paste(c.DATA, " file (", group.label[g] ,") is ", data.file[g], ";\n", sep="") } } if(data.type == "full") { c.DATA <- paste(c.DATA, " type is individual;\n", sep="") } else if(data.type == "moment") { c.DATA <- paste(c.DATA, " type is fullcov;\n", sep="") c.DATA <- paste(c.DATA, " nobservations are ", nobs, ";\n", sep="") } else { stop("lavaan ERROR: data.type must be full or moment") } # VARIABLE command c.VARIABLE <- "VARIABLE:\n" c.VARIABLE <- paste(c.VARIABLE, " names are", sep="") nvar <- length(ov.names); tmp <- 0 for(i in 1:nvar) { if(tmp%%6 == 0) { c.VARIABLE <- paste(c.VARIABLE,"\n ", sep="") } c.VARIABLE <- paste(c.VARIABLE, ov.names[i], sep=" ") tmp <- tmp+1 } c.VARIABLE <- paste(c.VARIABLE, ";\n", sep="") # missing if(data.type == "full") { c.VARIABLE <- paste(c.VARIABLE, " missing are all (-999999);\n",sep="") } # categorical? if(length(ov.ord.names)) { c.VARIABLE <- paste(c.VARIABLE, " categorical are", sep="") nvar <- length(ov.ord.names); tmp <- 0 for(i in 1:nvar) { if(tmp%%6 == 0) { c.VARIABLE <- paste(c.VARIABLE,"\n ", sep="") } c.VARIABLE <- paste(c.VARIABLE, ov.ord.names[i]) tmp <- tmp+1 } c.VARIABLE <- paste(c.VARIABLE,";\n",sep="") } # ANALYSIS command c.ANALYSIS <- paste("ANALYSIS:\n type = general;\n", sep="") c.ANALYSIS <- paste(c.ANALYSIS, " estimator = ", toupper(estimator), ";\n", sep="") # MODEL command c.MODEL <- paste("MODEL:\n") # assemble pre-model header out <- paste(c.TITLE, c.DATA, c.VARIABLE, c.ANALYSIS, c.MODEL, sep="") out } lavaan/R/lav_samplestats_gamma.R0000644000176200001440000002772313044155742016424 0ustar liggesusers# YR 21 March 2015 # new approach to compute 'Gamma': the asymptotic variance matrix of # sqrt{N} times the # observed sample statistics (means + varcov) # # Gamma = N x ACOV[ ybar, vech(S) ] # = NACOV[ ybar, vech(S) ] # # - one single function for mean + cov # - handle 'fixed.x' exogenous covariates # - YR 3 Dec 2015: allow for conditional.x = TRUE # generic public function # input for lavGamma can be lavobject, lavdata, data.frame, or matrix lavGamma <- function(object, group = NULL, missing = "listwise", ov.names.x = NULL, fixed.x = FALSE, conditional.x = FALSE, meanstructure = FALSE, slopestructure = FALSE, Mplus.WLS = FALSE, add.labels) { if(inherits(object, "lavaan")) { lavdata <- object@Data if(missing(missing)) { missing <- object@Options$missing if(missing != "listwise") { ### FIXME!!!!! return(NULL) # for now! } } else { missing <- "listwise" } } else if(inherits(object, "lavData")) { lavdata <- object } else if(inherits(object, "data.frame") || inherits(object, "matrix")) { NAMES <- names(object) if(!is.null(NAMES) && !is.null(group)) { NAMES <- NAMES[- match(group, NAMES)] } lavdata <- lavData(data = object, group = group, ov.names = NAMES, ordered = NULL, ov.names.x = ov.names.x, lavoptions = list(warn = FALSE, missing = missing)) } else { stop("lavaan ERROR: lavGamma can not handle objects of class ", paste(class(object), collapse= " ")) } # extract data Y <- lavdata@X # x-covariates? x.idx <- lapply(seq_len(lavdata@ngroups), function(g) match(lavdata@ov.names.x[[g]], lavdata@ov.names[[g]]) ) OUT <- lapply(seq_len(lavdata@ngroups), function(g) lav_samplestats_Gamma(Y = Y[[g]], x.idx = x.idx[[g]], fixed.x = fixed.x, conditional.x = conditional.x, meanstructure = meanstructure, slopestructure = slopestructure, Mplus.WLS = Mplus.WLS)) OUT } # NOTE: # - three types: # 1) plain (conditional.x = FALSE, fixed.x = FALSE) # 2) fixed.x (conditional.x = FALSE, fixed.x = TRUE) # 3) conditional.x (conditional.x = TRUE) # - if conditional.x = TRUE, we ignore fixed.x (can be TRUE or FALSE) # NORMAL-THEORY lav_samplestats_Gamma_NT <- function(Y = NULL, COV = NULL, MEAN = NULL, rescale = FALSE, x.idx = integer(0L), fixed.x = FALSE, conditional.x = FALSE, meanstructure = FALSE, slopestructure = FALSE) { # check arguments if(length(x.idx) == 0L) { conditional.x <- FALSE fixed.x <- FALSE } if(is.null(COV)) { stopifnot(!is.null(Y)) # coerce to matrix Y <- unname(as.matrix(Y)); N <- nrow(Y) COV <- cov(Y) } if(rescale) { COV <- COV * (N-1) / N # ML version } if(conditional.x && length(x.idx) > 0L && is.null(MEAN) && (meanstructure || slopestructure)) { stopifnot(!is.null(Y)) MEAN <- colMeans(Y) } # rename S <- COV M <- MEAN # unconditional if(!conditional.x) { # unconditional - stochastic x if(!fixed.x) { Gamma <- 2*lav_matrix_duplication_ginv_pre_post(S %x% S) if(meanstructure) { Gamma <- lav_matrix_bdiag(S, Gamma) } # unconditional - fixed x } else { # handle fixed.x = TRUE # cov(Y|X) = A - B C^{-1} B' # where A = cov(Y), B = cov(Y,X), C = cov(X) A <- S[-x.idx, -x.idx, drop=FALSE] B <- S[-x.idx, x.idx, drop=FALSE] C <- S[ x.idx, x.idx, drop=FALSE] YbarX <- A - B %*% solve(C, t(B)) # reinsert YbarX in Y+X (residual) covariance matrix YbarX.aug <- matrix(0, nrow = NROW(S), ncol = NCOL(S)) YbarX.aug[ -x.idx, -x.idx ] <- YbarX # take difference R <- S - YbarX.aug Gamma.S <- 2*lav_matrix_duplication_ginv_pre_post(S %x% S) Gamma.R <- 2*lav_matrix_duplication_ginv_pre_post(R %x% R) Gamma <- Gamma.S - Gamma.R if(meanstructure) { Gamma <- lav_matrix_bdiag(YbarX.aug, Gamma) } } } else { # conditional.x # 4 possibilities: # - no meanstructure, no slopes # - meanstructure, no slopes # - no meanstructure, slopes # - meanstructure, slopes # regress Y on X, and compute covariance of residuals 'R' A <- S[-x.idx, -x.idx, drop=FALSE] B <- S[-x.idx, x.idx, drop=FALSE] C <- S[ x.idx, x.idx, drop=FALSE] Cov.YbarX <- A - B %*% solve(C) %*% t(B) Gamma <- 2*lav_matrix_duplication_ginv_pre_post(Cov.YbarX %x% Cov.YbarX) if(meanstructure || slopestructure) { MY <- M[-x.idx]; MX <- M[x.idx] C3 <- rbind(c(1,MX), cbind(MX, C + tcrossprod(MX))) #B3 <- cbind(MY, B + tcrossprod(MY,MX)) } if(meanstructure) { if(slopestructure) { A11 <- solve(C3) %x% Cov.YbarX } else { A11 <- solve(C3)[1, 1, drop=FALSE] %x% Cov.YbarX } } else { if(slopestructure) { A11 <- solve(C3)[-1, -1, drop=FALSE] %x% Cov.YbarX } else { A11 <- matrix(0,0,0) } } Gamma <- lav_matrix_bdiag(A11, Gamma) } Gamma } # NOTE: # - three types: # 1) plain (conditional.x = FALSE, fixed.x = FALSE) # 2) fixed.x (conditional.x = FALSE, fixed.x = TRUE) # 3) conditional.x (conditional.x = TRUE) # - if conditional.x = TRUE, we ignore fixed.x (can be TRUE or FALSE) # ADF THEORY lav_samplestats_Gamma <- function(Y, x.idx = integer(0L), fixed.x = FALSE, conditional.x = FALSE, meanstructure = FALSE, slopestructure = FALSE, Mplus.WLS = FALSE, add.attributes = FALSE) { # coerce to matrix Y <- unname(as.matrix(Y)); N <- nrow(Y) # check arguments if(length(x.idx) == 0L) { conditional.x <- FALSE fixed.x <- FALSE } if(Mplus.WLS) { stopifnot(!conditional.x, !fixed.x) } if(!conditional.x) { # center only, so we can use crossprod instead of cov Yc <- base::scale(Y, center = TRUE, scale = FALSE) p <- ncol(Y) # create Z where the rows_i contain the following elements: # - intercepts (if meanstructure is TRUE) # - vech(Yc_i' %*% Yc_i) where Yc_i are the residuals idx1 <- lav_matrix_vech_col_idx(p) idx2 <- lav_matrix_vech_row_idx(p) if(meanstructure) { Z <- cbind(Yc, Yc[,idx1] * Yc[,idx2]) } else { Z <- Yc[,idx1] * Yc[,idx2] } # handle fixed.x = TRUE if(fixed.x) { YX <- Yc # here, we do not need the intercepts, data is centered QR <- qr(Yc[, x.idx, drop = FALSE]) RES <- qr.resid(QR, Yc[,-x.idx, drop = FALSE]) # substract residuals from original Yc's YX[, -x.idx] <- Yc[, -x.idx, drop = FALSE] - RES if(meanstructure) { Z2 <- cbind(YX, YX[,idx1] * YX[,idx2]) } else { Z2 <- YX[,idx1] * YX[,idx2] } # substract Z2 from original Z Z <- Z - Z2 } #Gamma = (N-1)/N * cov(Z, use = "pairwise") # we center so we can use crossprod instead of cov Zc <- base::scale(Z, center = TRUE, scale = FALSE) # note: centering is the same as substracting lav_matrix_vech(S), # where S is the sample covariance matrix (divided by N) if(anyNA(Zc)) { Gamma <- lav_matrix_crossprod(Zc) / N } else { Gamma <- base::crossprod(Zc) / N } } else { # conditional.x # 4 possibilities: # - no meanstructure, no slopes # - meanstructure, no slopes # - no meanstructure, slopes # - meanstructure, slopes # regress Y on X, and compute residuals X <- cbind(1, Y[, x.idx, drop = FALSE]) QR <- qr(X) RES <- qr.resid(QR, Y[, -x.idx, drop = FALSE]) p <- ncol(RES) idx1 <- lav_matrix_vech_col_idx(p) idx2 <- lav_matrix_vech_row_idx(p) if(meanstructure || slopestructure) { XtX.inv <- unname(solve(crossprod(X))) Xi <- (X %*% XtX.inv) * N ## FIXME, shorter way? ncX <- NCOL(X); ncY <- NCOL(RES) } if(meanstructure) { if(slopestructure) { Xi.idx <- rep(seq_len(ncX), each = ncY) Res.idx <- rep(seq_len(ncY), times = ncX) Z <- cbind( Xi[, Xi.idx, drop = FALSE] * RES[,Res.idx, drop = FALSE], RES[, idx1, drop = FALSE] * RES[, idx2, drop = FALSE] ) } else { Xi.idx <- rep(1L, each = ncY) Z <- cbind( Xi[, Xi.idx ,drop = FALSE] * RES, RES[, idx1, drop = FALSE] * RES[, idx2, drop = FALSE] ) } } else { if(slopestructure) { Xi.idx <- rep(seq_len(ncX), each = ncY) Xi.idx <- Xi.idx[ -seq_len(ncY) ] Res.idx <- rep(seq_len(ncY), times = (ncX - 1L)) Z <- cbind( Xi[, Xi.idx, drop = FALSE] * RES[,Res.idx, drop = FALSE], RES[, idx1, drop = FALSE] * RES[, idx2, drop = FALSE] ) } else { Z <- RES[,idx1,drop = FALSE] * RES[,idx2,drop = FALSE] } } Zc <- base::scale(Z, center = TRUE, scale = FALSE) if(anyNA(Zc)) { Gamma <- lav_matrix_crossprod(Zc) / N } else { Gamma <- base::crossprod(Zc) / N } } # only to mimic Mplus when estimator = "WLS" if(Mplus.WLS && !fixed.x && !conditional.x) { # adjust G_22 (the varcov part) S <- cov(Y, use = "pairwise") w <- lav_matrix_vech(S) w.biased <- (N-1)/N * w diff <- outer(w,w) - outer(w.biased, w.biased) if(meanstructure) { Gamma[-seq_len(p), -seq_len(p)] <- Gamma[-seq_len(p), -seq_len(p), drop = FALSE] - diff } else { Gamma <- Gamma - diff } if(meanstructure) { # adjust G_12/G_21 (third-order) # strange rescaling? N1 <- (N - 1) / N Gamma[seq_len(p),-seq_len(p)] <- Gamma[seq_len(p),-seq_len(p)] * N1 Gamma[-seq_len(p),seq_len(p)] <- Gamma[-seq_len(p),seq_len(p)] * N1 } } Gamma } lavaan/R/lav_partable_complete.R0000644000176200001440000001025213053002623016356 0ustar liggesusers# handle bare-minimum partables # add some additional columns lav_partable_complete <- function(partable = NULL, start = TRUE) { # check if we hava a data.frame # if so, check for columns that are 'factor' and convert them to 'character' if(is.data.frame(partable)) { fac.idx <- sapply(partable, is.factor) partable[fac.idx] <- lapply(partable[fac.idx], as.character) } # check if we have lhs, op, rhs stopifnot(!is.null(partable$lhs), !is.null(partable$op), !is.null(partable$rhs)) # number of elements N <- length(partable$lhs) if(!is.data.frame(partable)) { # check for equal column length nel <- sapply(partable, length) short.idx <- which(nel < N) long.idx <- which(nel > N) if(length(long.idx) > 0L) { warning("lavaan WARNING: partable columns have unequal length") } if(length(short.idx) > 0L) { # try to extend them in a 'natural' way for(i in short.idx) { too.short <- N - nel[i] if(is.integer(partable[[i]])) { partable[[i]] <- c(partable[[i]], integer( too.short )) } else if(is.numeric(partable[[i]])) { partable[[i]] <- c(partable[[i]], numeric( too.short )) } else { partable[[i]] <- c(partable[[i]], character( too.short )) } } } } # create new id column #if(is.null(partable$id)) { partable$id <- seq_len(N) #} # add block column if(is.null(partable$block)) { partable$block <- rep(1L, N) } else { partable$block <- as.integer(partable$block) } # add user column if(is.null(partable$user)) { partable$user <- rep(1L, N) } else { partable$user <- as.integer( partable$user ) } # add free column if(is.null(partable$free)) { partable$free <- seq_len(N) } else { # treat non-zero as 'free' free.idx <- which(as.logical(partable$free)) partable$free <- rep(0L, N) if(length(free.idx) > 0L) { partable$free[free.idx] <- seq_len(length(free.idx)) } } # add ustart column if(is.null(partable$ustart)) { # do we have something else? start? est? if(!is.null(partable$start)) { partable$ustart <- as.numeric(partable$start) } else if(!is.null(partable$est)) { partable$ustart <- as.numeric(partable$est) } else { partable$ustart <- rep(as.numeric(NA), N) non.free <- which(!partable$free) if(length(non.free)) { partable$ustart[non.free] <- 0 } } } else { partable$ustart <- as.numeric(partable$ustart) } # add exo column if(is.null(partable$exo)) { partable$exo <- rep(0, N) } else { partable$exo <- as.integer( partable$exo ) } # add label column if(is.null(partable$label)) { partable$label <- rep("", N) } else { partable$label <- as.character( partable$label ) } # add eq.id column #if(is.null(partable$eq.id)) { # partable$eq.id <- rep(0, N) #} # add unco column #if(is.null(partable$unco)) { # partable$unco <- partable$free #} # order them nicely: id lhs op rhs group idx <- match(c("id", "lhs","op","rhs", "block","user", "free","ustart","exo","label"), names(partable)) # order them nicely: id lhs op rhs group #idx <- match(c("id", "lhs","op","rhs", "group","user", # "free","ustart","exo","label","eq.id","unco"), # names(partable)) tmp <- partable[idx] partable <- c(tmp, partable[-idx]) # add start column if(start) { if(is.null(partable$start)) { partable$start <- lav_start(start.method = "simple", lavpartable = partable) } } partable } lavaan/R/lav_mvnorm_missing.R0000644000176200001440000011747613040424615015770 0ustar liggesusers# the multivariate normal distribution + missing values # (so-called 'FIML') # 1) loglikelihood (from raw data, or sample statitics) # 2) derivatives with respect to mu, Sigma, vech(Sigma) # 3) casewise scores with respect to mu, vech(Sigma), mu + vech(Sigma) # 4) hessian of mu + vech(Sigma) # 5) (unit) information of mu + vech(Sigma) # 5a: (unit) expected information # 5b: (unit) observed information # 5c: (unit) first.order information # 5d: lav_mvnorm_missing_information_both (both observed + first.order) # 6) inverted information h0 mu + vech(Sigma) # 6a: / # 6b: / # 6c: / # 7) ACOV h0 mu + vech(Sigma) # 7a: 1/N * inverted expected information # 7b: 1/N * inverted observed information # 7c: 1/N * inverted first-order information # 7d: sandwich acov # 10) additional functions # - lav_mvnorm_missing_impute_pattern # - lav_mvnorm_missing_estep # YR 09 Feb 2016: first version # YR 19 Mar 2017: 10) # 1) likelihood # 1a: input is raw data # - two strategies: 1) using missing patterns (pattern = TRUE) # 2) truly case per case (pattern = FALSE) # depending on the sample size, missing patterns, etc... one can be # (much) faster than the other lav_mvnorm_missing_loglik_data <- function(Y = NULL, Mu = NULL, Sigma = NULL, casewise = FALSE, pattern = TRUE, Sinv.method = "eigen", log2pi = TRUE, minus.two = FALSE) { if(pattern) { llik <- lav_mvnorm_missing_llik_pattern(Y = Y, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, log2pi = log2pi, minus.two = minus.two) } else { llik <- lav_mvnorm_missing_llik_casewise(Y = Y, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, log2pi = log2pi, minus.two = minus.two) } if(casewise) { loglik <- llik } else { loglik <- sum(llik, na.rm = TRUE) } loglik } # 1b: input are sample statistics (mean, cov, N) per pattern lav_mvnorm_missing_loglik_samplestats <- function(Yp = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", log2pi = TRUE, minus.two = FALSE) { LOG.2PI <- log(2*pi); pat.N <- length(Yp); P <- length(Yp[[1]]$var.idx) # global inverse + logdet Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) Sigma.logdet <- attr(Sigma.inv, "logdet") # DIST/logdet per pattern DIST <- logdet <- P.LOG.2PI <- numeric(pat.N) # for each pattern, compute sigma.inv/logdet; compute DIST for all # observations of this pattern for(p in seq_len(pat.N)) { # observed variables for this pattern var.idx <- Yp[[p]]$var.idx # missing values for this pattern na.idx <- which(!var.idx) # constant P.LOG.2PI[p] <- sum(var.idx) * LOG.2PI * Yp[[p]]$freq # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = TRUE, S.logdet = Sigma.logdet) logdet[p] <- attr(sigma.inv, "logdet") * Yp[[p]]$freq } else { sigma.inv <- Sigma.inv logdet[p] <- Sigma.logdet * Yp[[p]]$freq } TT <- Yp[[p]]$SY + tcrossprod(Yp[[p]]$MY - Mu[var.idx]) DIST[p] <- sum(sigma.inv * TT) * Yp[[p]]$freq } # loglikelihood all data if(log2pi) { loglik <- sum(-(P.LOG.2PI + logdet + DIST)/2) } else { loglik <- sum(-(logdet + DIST)/2) } if(minus.two) { loglik <- -2 * loglik } loglik } ## casewise loglikelihoods # casewise Sinv.method lav_mvnorm_missing_llik_casewise <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", log2pi = TRUE, minus.two = FALSE) { P <- NCOL(Y); N <- NROW(Y); LOG.2PI <- log(2*pi); Mu <- as.numeric(Mu) # global inverse + logdet Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) Sigma.logdet <- attr(Sigma.inv, "logdet") # subtract Mu Yc <- t( t(Y) - Mu ) # DIST/logdet per case DIST <- logdet <- P.LOG.2PI <- rep(as.numeric(NA), N) # missing pattern per case OBS <- !is.na(Y); P.i <- rowSums(OBS) # constant P.LOG.2PI <- P.i * LOG.2PI # complete cases first (only an advantage if we have mostly complete # observations) other.idx <- seq_len(N) complete.idx <- which(P.i == P) if(length(complete.idx) > 0L) { other.idx <- other.idx[-complete.idx] DIST[complete.idx] <- rowSums(Yc[complete.idx,,drop = FALSE] %*% Sigma.inv * Yc[complete.idx,,drop = FALSE]) logdet[complete.idx] <- Sigma.logdet } # non-complete cases for(i in other.idx) { na.idx <- which(!OBS[i,]) # catch empty cases if(length(na.idx) == P) next # invert Sigma for this pattern sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = TRUE, S.logdet = Sigma.logdet) logdet[i] <- attr(sigma.inv, "logdet") # distance for this case DIST[i] <- sum(sigma.inv * crossprod(Yc[i, OBS[i,], drop = FALSE])) } # compute casewise loglikelihoods if(log2pi) { llik <- -(P.LOG.2PI + logdet + DIST)/2 } else { llik <- -(logdet + DIST)/2 } # minus.two if(minus.two) { llik <- -2 * llik } llik } # pattern-based, but casewise loglikelihoods lav_mvnorm_missing_llik_pattern <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", log2pi = TRUE, minus.two = FALSE) { P <- NCOL(Y); N <- NROW(Y); LOG.2PI <- log(2*pi); Mu <- as.numeric(Mu) # global inverse + logdet Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) Sigma.logdet <- attr(Sigma.inv, "logdet") # subtract Mu Yc <- t( t(Y) - Mu ) # DIST/logdet per case DIST <- logdet <- P.LOG.2PI <- rep(as.numeric(NA), N) # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # for each pattern, compute sigma.inv/logdet; compute DIST for all # observations of this pattern for(p in seq_len(Mp$npatterns)) { # observed values for this pattern var.idx <- Mp$pat[p,] # missing values for this pattern na.idx <- which(!var.idx) # identify cases with this pattern case.idx <- Mp$case.idx[[p]] # constant P.LOG.2PI[case.idx] <- sum(var.idx) * LOG.2PI # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = TRUE, S.logdet = Sigma.logdet) logdet[case.idx] <- attr(sigma.inv, "logdet") } else { sigma.inv <- Sigma.inv logdet[case.idx] <- Sigma.logdet } if(Mp$freq[p] == 1L) { DIST[case.idx] <- sum(sigma.inv * crossprod(Yc[case.idx, var.idx, drop = FALSE])) } else { DIST[case.idx] <- rowSums(Yc[case.idx, var.idx, drop = FALSE] %*% sigma.inv * Yc[case.idx, var.idx, drop = FALSE]) } } # compute casewise loglikelihoods if(log2pi) { llik <- -(P.LOG.2PI + logdet + DIST)/2 } else { llik <- -(logdet + DIST)/2 } # minus.two if(minus.two) { llik <- -2 * llik } llik } # 2. Derivatives # 2a: derivative logl with respect to mu lav_mvnorm_missing_dlogl_dmu <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { SC <- lav_mvnorm_missing_scores_mu(Y = Y, Mu = Mu, Sigma = Sigma, Sigma.inv = Sigma.inv, Sinv.method = Sinv.method) colSums(SC, na.rm = TRUE) } # 2abis: using samplestats lav_mvnorm_missing_dlogl_dmu_samplestats <- function(Yp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { pat.N <- length(Yp); P <- length(Yp[[1]]$var.idx) if(is.null(Sigma.inv)) { Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # dmu dmu <- numeric(P) # for each pattern, compute sigma.inv for(p in seq_len(pat.N)) { # observed variables for this pattern var.idx <- Yp[[p]]$var.idx # missing values for this pattern na.idx <- which(!var.idx) # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) } else { sigma.inv <- Sigma.inv } # dmu for this pattern dmu.pattern <- as.numeric(sigma.inv %*% (Yp[[p]]$MY - Mu[var.idx])) # update mu dmu[var.idx] <- dmu[var.idx] + (dmu.pattern * Yp[[p]]$freq) } dmu } # 2b: derivative logl with respect to Sigma (full matrix, ignoring symmetry) lav_mvnorm_missing_dlogl_dSigma <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { P <- NCOL(Y); N <- NROW(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # subtract Mu Yc <- t( t(Y) - Mu ) # dvechSigma dSigma <- matrix(0, P, P) # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # for each pattern for(p in seq_len(Mp$npatterns)) { # observed values for this pattern var.idx <- Mp$pat[p,] # missing values for this pattern na.idx <- which(!var.idx) # cases with this pattern case.idx <- Mp$case.idx[[p]] # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) } else { sigma.inv <- Sigma.inv } if(length(case.idx) > 1L) { W.tilde <- crossprod(Yc[case.idx, var.idx, drop = FALSE])/Mp$freq[p] } else { W.tilde <- tcrossprod(Yc[case.idx, var.idx]) } # dSigma for this pattern dSigma.pattern <- matrix(0, P, P) dSigma.pattern[var.idx, var.idx] <- -(1/2) * (sigma.inv - (sigma.inv %*% W.tilde %*% sigma.inv)) # update dSigma dSigma <- dSigma + (dSigma.pattern * Mp$freq[p]) } dSigma } # 2bbis: using samplestats lav_mvnorm_missing_dlogl_dSigma_samplestats <- function(Yp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { pat.N <- length(Yp); P <- length(Yp[[1]]$var.idx) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # dvechSigma dSigma <- matrix(0, P, P) # for each pattern for(p in seq_len(pat.N)) { # observed variables for this pattern var.idx <- Yp[[p]]$var.idx # missing values for this pattern na.idx <- which(!var.idx) # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) } else { sigma.inv <- Sigma.inv } W.tilde <- Yp[[p]]$SY + tcrossprod(Yp[[p]]$MY - Mu[var.idx]) # dSigma for this pattern dSigma.pattern <- matrix(0, P, P) dSigma.pattern[var.idx, var.idx] <- -(1/2) * (sigma.inv - (sigma.inv %*% W.tilde %*% sigma.inv)) # update dSigma dSigma <- dSigma + (dSigma.pattern * Yp[[p]]$freq) } dSigma } # 2c: derivative logl with respect to vech(Sigma) lav_mvnorm_missing_dlogl_dvechSigma <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { FULL <- lav_mvnorm_missing_dlogl_dSigma(Y = Y, Mu = Mu, Sigma = Sigma, Sigma.inv = Sigma.inv, Sinv.method = Sinv.method) as.numeric( lav_matrix_duplication_pre( as.matrix(lav_matrix_vec(FULL)) ) ) } # 2cbis: using samplestats lav_mvnorm_missing_dlogl_dvechSigma_samplestats <- function(Yp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { pat.N <- length(Yp); P <- length(Yp[[1]]$var.idx) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # dvechSigma dvechSigma <- numeric(P*(P+1)/2) # for each pattern for(p in seq_len(pat.N)) { # observed variables for this pattern var.idx <- Yp[[p]]$var.idx # missing values for this pattern na.idx <- which(!var.idx) # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) } else { sigma.inv <- Sigma.inv } W.tilde <- Yp[[p]]$SY + tcrossprod(Yp[[p]]$MY - Mu[var.idx]) # dSigma for this pattern dSigma.pattern <- matrix(0, P, P) dSigma.pattern[var.idx, var.idx] <- -(1/2) * (sigma.inv - (sigma.inv %*% W.tilde %*% sigma.inv)) # convert to vechSigma dvechSigma.pattern <- as.numeric( lav_matrix_duplication_pre( as.matrix(lav_matrix_vec(dSigma.pattern)) ) ) # update dvechSigma dvechSigma <- dvechSigma + (dvechSigma.pattern * Yp[[p]]$freq) } dvechSigma } # 3. Casewise scores # 3a: casewise scores with respect to mu lav_mvnorm_missing_scores_mu <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { P <- NCOL(Y); N <- NROW(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # subtract Mu Yc <- t( t(Y) - Mu ) # dmu per case dmu <- matrix(as.numeric(NA), N, P) # for each pattern, compute sigma.inv for(p in seq_len(Mp$npatterns)) { # observed values for this pattern var.idx <- Mp$pat[p,] # missing values for this pattern na.idx <- which(!var.idx) case.idx <- Mp$case.idx[[p]] # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) } else { sigma.inv <- Sigma.inv } # compute dMu for all observations of this pattern dmu[case.idx, var.idx] <- Yc[case.idx, var.idx, drop = FALSE] %*% sigma.inv } dmu } # 3b: casewise scores with respect to vech(Sigma) lav_mvnorm_missing_scores_vech_sigma <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { P <- NCOL(Y); N <- NROW(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # for the tcrossprod idx1 <- lav_matrix_vech_col_idx(P); idx2 <- lav_matrix_vech_row_idx(P) # vech(Sigma.inv) iSigma <- lav_matrix_vech(Sigma.inv) # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # subtract Mu Yc <- t( t(Y) - Mu ) # SC SC <- matrix(as.numeric(NA), nrow = N, ncol = length(iSigma)) # for each pattern for(p in seq_len(Mp$npatterns)) { # observed values for this pattern var.idx <- Mp$pat[p,] # missing values for this pattern na.idx <- which(!var.idx) # cases with this pattern case.idx <- Mp$case.idx[[p]] # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) tmp <- matrix(0, P, P) tmp[var.idx, var.idx] <- sigma.inv isigma <- lav_matrix_vech(tmp) } else { sigma.inv <- Sigma.inv isigma <- iSigma } # postmultiply these cases with sigma.inv Yc[case.idx, var.idx] <- Yc[case.idx, var.idx] %*% sigma.inv # tcrossprod SC[case.idx,] <- Yc[case.idx, idx1] * Yc[case.idx, idx2] # substract isigma from each row SC[case.idx,] <- t( t(SC[case.idx,,drop = FALSE]) - isigma ) } # adjust for vech SC[,lav_matrix_diagh_idx(P)] <- SC[,lav_matrix_diagh_idx(P)] / 2 SC } # 3c: casewise scores with respect to mu + vech(Sigma) lav_mvnorm_missing_scores_mu_vech_sigma <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { P <- NCOL(Y); N <- NROW(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # for the tcrossprod idx1 <- lav_matrix_vech_col_idx(P); idx2 <- lav_matrix_vech_row_idx(P) # vech(Sigma.inv) iSigma <- lav_matrix_vech(Sigma.inv) # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # subtract Mu Yc <- t( t(Y) - Mu ) # dmu per case dmu <- matrix(as.numeric(NA), N, P) # SC SC <- matrix(as.numeric(NA), nrow = N, ncol = length(iSigma)) # for each pattern, compute Yc %*% sigma.inv for(p in seq_len(Mp$npatterns)) { # observed values for this pattern var.idx <- Mp$pat[p,] # missing values for this pattern na.idx <- which(!var.idx) # cases with this pattern case.idx <- Mp$case.idx[[p]] # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) tmp <- matrix(0, P, P) tmp[var.idx, var.idx] <- sigma.inv isigma <- lav_matrix_vech(tmp) } else { sigma.inv <- Sigma.inv isigma <- iSigma } # compute dMu for all observations of this pattern dmu[case.idx, var.idx] <- Yc[case.idx, var.idx, drop = FALSE] %*% sigma.inv # postmultiply these cases with sigma.inv Yc[case.idx, var.idx] <- Yc[case.idx, var.idx] %*% sigma.inv # tcrossprod SC[case.idx,] <- Yc[case.idx, idx1] * Yc[case.idx, idx2] # substract isigma from each row SC[case.idx,] <- t( t(SC[case.idx,,drop = FALSE]) - isigma ) } # adjust for vech SC[,lav_matrix_diagh_idx(P)] <- SC[,lav_matrix_diagh_idx(P)] / 2 cbind(dmu, SC) } # 4) Hessian of logl lav_mvnorm_missing_logl_hessian_data <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { # missing patterns Yp <- lav_samplestats_missing_patterns(Y = Y, Mp = Mp) lav_mvnorm_missing_logl_hessian_samplestats(Yp = Yp, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) } lav_mvnorm_missing_logl_hessian_samplestats <- function(Yp = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { pat.N <- length(Yp); P <- length(Yp[[1]]$var.idx) if(is.null(Sigma.inv)) { Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } H11 <- matrix(0, P, P) H21 <- matrix(0, P*(P+1)/2, P) H22 <- matrix(0, P*(P+1)/2, P*(P+1)/2) # for each pattern, compute sigma.inv for(p in seq_len(pat.N)) { # observed variables var.idx <- Yp[[p]]$var.idx pat.freq <- Yp[[p]]$freq # missing values for this pattern na.idx <- which(!var.idx) # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) } else { sigma.inv <- Sigma.inv } S.inv <- matrix(0, P, P) S.inv[var.idx, var.idx] <- sigma.inv tmp21 <- matrix(0,P,1) tmp21[var.idx,1] <- sigma.inv %*% (Yp[[p]]$MY - Mu[var.idx]) W.tilde <- Yp[[p]]$SY + tcrossprod(Yp[[p]]$MY - Mu[var.idx]) AAA <- ( sigma.inv %*% (2*W.tilde - Sigma[var.idx,var.idx,drop = FALSE]) %*% sigma.inv ) tmp22 <- matrix(0, P, P) tmp22[var.idx, var.idx] <- AAA i11 <- S.inv i21 <- lav_matrix_duplication_pre( tmp21 %x% S.inv ) i22 <- (1/2) * lav_matrix_duplication_pre_post(S.inv %x% tmp22) H11 <- H11 + pat.freq * i11 H21 <- H21 + pat.freq * i21 H22 <- H22 + pat.freq * i22 } H12 <- t(H21) -1 * rbind( cbind(H11, H12), cbind(H21, H22) ) } # 5) Information # 5a: expected unit information Mu and vech(Sigma) # (only useful under MCAR) # (old term: Abeta, expected) lav_mvnorm_missing_information_expected <- function(Y = NULL, Mp = NULL, Mu = NULL,# unused Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { P <- NCOL(Y) if(is.null(Sigma.inv)) { Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # N N <- sum(Mp$freq) # removed empty cases! I11 <- matrix(0, P, P) I22 <- matrix(0, P*(P+1)/2, P*(P+1)/2) # for each pattern, compute sigma.inv for(p in seq_len(Mp$npatterns)) { # observed variables var.idx <- Mp$pat[p,] # missing values for this pattern na.idx <- which(!var.idx) # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) } else { sigma.inv <- Sigma.inv } S.inv <- matrix(0, P, P) S.inv[var.idx, var.idx] <- sigma.inv S2.inv <- 0.5 * lav_matrix_duplication_pre_post(S.inv %x% S.inv) I11 <- I11 + Mp$freq[p] * S.inv I22 <- I22 + Mp$freq[p] * S2.inv } lav_matrix_bdiag(I11, I22)/N } # 5b: unit observed information Mu and vech(Sigma) from raw data # (old term: Abeta, observed) lav_mvnorm_missing_information_observed_data <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # N N <- sum(Mp$freq) # observed information observed <- lav_mvnorm_missing_logl_hessian_data(Y = Y, Mp = Mp, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) -observed/N } # 5b-bis: unit observed information Mu and vech(Sigma) from samplestats lav_mvnorm_missing_information_observed_samplestats <- function(Yp = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- sum(sapply(Yp, "[[", "freq")) # implicitly: removed empty cases! # observed information observed <- lav_mvnorm_missing_logl_hessian_samplestats(Yp = Yp, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) -observed/N } # 5c: unit first-order information Mu and vech(Sigma) from raw data # (old term: Bbeta) lav_mvnorm_missing_information_firstorder <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # N N <- sum(Mp$freq) SC <- lav_mvnorm_missing_scores_mu_vech_sigma(Y = Y, Mp = Mp, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) lav_matrix_crossprod(SC)/N } # 5d: both unit first-order information and expected/observed information # from raw data, in one go for efficiency lav_mvnorm_missing_information_both <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL, information = "observed") { P <- NCOL(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # for the tcrossprod idx1 <- lav_matrix_vech_col_idx(P); idx2 <- lav_matrix_vech_row_idx(P) # vech(Sigma.inv) iSigma <- lav_matrix_vech(Sigma.inv) # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } if(information == "observed") { Yp <- lav_samplestats_missing_patterns(Y = Y, Mp = Mp) } # N N <- sum(Mp$freq) # subtract Mu Yc <- t( t(Y) - Mu ) # dmu per case dmu <- matrix(as.numeric(NA), nrow = NROW(Y), ncol = P) # SC SC <- matrix(as.numeric(NA), nrow = NROW(Y), ncol = length(iSigma)) # expected/observed information I11 <- matrix(0, P, P) I22 <- matrix(0, P*(P+1)/2, P*(P+1)/2) if(information == "observed") { I21 <- matrix(0, P*(P+1)/2, P) } # for each pattern, compute Yc %*% sigma.inv for(p in seq_len(Mp$npatterns)) { # observed values for this pattern var.idx <- Mp$pat[p,] # missing values for this pattern na.idx <- which(!var.idx) # cases with this pattern case.idx <- Mp$case.idx[[p]] # invert Sigma for this pattern if(length(na.idx) > 0L) { sigma.inv <- lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE) tmp <- matrix(0, P, P) tmp[var.idx, var.idx] <- sigma.inv isigma <- lav_matrix_vech(tmp) } else { sigma.inv <- Sigma.inv isigma <- iSigma } # information S.inv <- matrix(0, P, P) S.inv[var.idx, var.idx] <- sigma.inv if(information == "expected") { S2.inv <- 0.5 * lav_matrix_duplication_pre_post(S.inv %x% S.inv) I11 <- I11 + Mp$freq[p] * S.inv I22 <- I22 + Mp$freq[p] * S2.inv } else { pat.freq <- Yp[[p]]$freq tmp21 <- matrix(0,P,1) tmp21[var.idx,1] <- sigma.inv %*% (Yp[[p]]$MY - Mu[var.idx]) W.tilde <- Yp[[p]]$SY + tcrossprod(Yp[[p]]$MY - Mu[var.idx]) AAA <- ( sigma.inv %*% (2*W.tilde - Sigma[var.idx,var.idx,drop = FALSE]) %*% sigma.inv ) tmp22 <- matrix(0, P, P) tmp22[var.idx, var.idx] <- AAA i11 <- S.inv i21 <- lav_matrix_duplication_pre( tmp21 %x% S.inv ) i22 <- (1/2) * lav_matrix_duplication_pre_post(S.inv %x% tmp22) I11 <- I11 + pat.freq * i11 I21 <- I21 + pat.freq * i21 I22 <- I22 + pat.freq * i22 } # compute dMu for all observations of this pattern dmu[case.idx, var.idx] <- Yc[case.idx, var.idx, drop = FALSE] %*% sigma.inv # postmultiply these cases with sigma.inv Yc[case.idx, var.idx] <- Yc[case.idx, var.idx] %*% sigma.inv # tcrossprod SC[case.idx,] <- Yc[case.idx, idx1] * Yc[case.idx, idx2] # substract isigma from each row SC[case.idx,] <- t( t(SC[case.idx,,drop = FALSE]) - isigma ) } # adjust for vech SC[,lav_matrix_diagh_idx(P)] <- SC[,lav_matrix_diagh_idx(P)] / 2 # add dmu SC <- cbind(dmu, SC) # first order information Bbeta <- lav_matrix_crossprod(SC)/N # expected/observed information if(information == "expected") { Abeta <- lav_matrix_bdiag(I11, I22)/N } else { Abeta <- rbind( cbind(I11, t(I21) ), cbind(I21, I22) )/N } list(Abeta = Abeta, Bbeta = Bbeta) } # 6) inverted information h0 mu + vech(Sigma) # 6a: (unit) inverted expected information # NOT USED: is not equal to solve(expected) # (although it does converge to the same solution eventually) # lav_mvnorm_missing_inverted_information_expected <- function(Y = NULL, # Mp = NULL, # Mu = NULL,# unused # Sigma = NULL) { # P <- NCOL(Y) # # # missing patterns # if(is.null(Mp)) { # Mp <- lav_data_missing_patterns(Y) # } # # # N # N <- sum(Mp$freq) # removed empty cases! # # I11 <- matrix(0, P, P) # I22 <- matrix(0, P*(P+1)/2, P*(P+1)/2) # # # for each pattern # for(p in seq_len(Mp$npatterns)) { # # # observed variables # var.idx <- Mp$pat[p,] # # sigma <- matrix(0, P, P) # sigma[var.idx, var.idx] <- Sigma[var.idx, var.idx] # sigma2 <- 2 * lav_matrix_duplication_ginv_pre_post(sigma %x% sigma) # # I11 <- I11 + Mp$freq[p] * sigma # I22 <- I22 + Mp$freq[p] * sigma2 # } # # lav_matrix_bdiag(I11, I22)/N #} # 6b: / # 6c: / # 7) ACOV h0 mu + vech(Sigma) # 7a: 1/N * inverted expected information # 7b: 1/N * inverted observed information # 7c: 1/N * inverted first-order information # 7d: sandwich acov # 10) other stuff # single imputation missing cells, under the normal model, pattern-based lav_mvnorm_missing_impute_pattern <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { Mu <- as.numeric(Mu) # complete data Y.complete <- Y # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # subtract Mu Yc <- t( t(Y) - Mu ) # fill in data per pattern for(p in seq_len(Mp$npatterns)) { # observed values for this pattern var.idx <- Mp$pat[p,] # if complete, nothing to do if(all(var.idx)) { next } # missing values for this pattern na.idx <- which(!var.idx) # extract observed data for these (centered) cases Oc <- Yc[Mp$case.idx[[p]], Mp$pat[p, ], drop = FALSE] # invert Sigma (Sigma_22, observed part only) for this pattern Sigma_22.inv <- try(lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE), silent = TRUE) if(inherits(Sigma_22.inv, "try-error")) { stop("lavaan ERROR: Sigma_22.inv cannot be inverted") } # estimate missing values in this pattern Sigma_12 <- Sigma[!var.idx, var.idx, drop=FALSE] Y.missing <- t( Sigma_12 %*% Sigma_22.inv %*% t(Oc) + Mu[!var.idx] ) # complete data for this pattern Y.complete[Mp$case.idx[[p]], !var.idx] <- Y.missing } Y.complete } # E-step: expectations of sum, sum of squares, sum of crossproducts # plus correction lav_mvnorm_missing_estep <- function(Y = NULL, Mp = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen") { P <- NCOL(Y); Mu <- as.numeric(Mu) # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # T1, T2 T1 <- numeric(P) T2 <- matrix(0, P, P) # update T1 and T2 per pattern for(p in seq_len(Mp$npatterns)) { # observed values for this pattern var.idx <- Mp$pat[p,] # missing values for this pattern na.idx <- which(!var.idx) # extract observed data O <- Y[Mp$case.idx[[p]], Mp$pat[p, ], drop = FALSE] # if complete, just compute first and second moments if(all(var.idx)) { # complete pattern T1 <- T1 + colSums(O) T2 <- T2 + crossprod(O) next } # missing values for this pattern na.idx <- which(!var.idx) # partition Sigma (1=missing, 2=complete) Sigma_11 <- Sigma[!var.idx, !var.idx, drop=FALSE] Sigma_12 <- Sigma[!var.idx, var.idx, drop=FALSE] Sigma_21 <- Sigma[ var.idx, !var.idx, drop=FALSE] # invert Sigma (Sigma_22, observed part only) for this pattern Sigma_22.inv <- try(lav_matrix_symmetric_inverse_update(S.inv = Sigma.inv, rm.idx = na.idx, logdet = FALSE), silent = TRUE) if(inherits(Sigma_22.inv, "try-error")) { stop("lavaan ERROR: Sigma_22.inv cannot be inverted") } # estimate missing values in this pattern Oc <- t( t(O) - Mu[var.idx]) Y.missing <- t( Sigma_12 %*% Sigma_22.inv %*% t(Oc) + Mu[!var.idx] ) # complete data for this pattern Y.complete <- matrix(0, Mp$freq[[p]], P) Y.complete[, var.idx] <- O Y.complete[,!var.idx] <- Y.missing # 1. SUM `completed' pattern T1.pat <- colSums(Y.complete) # 2. CROSSPROD `completed' pattern T2.pat <- crossprod(Y.complete) # correction for missing cells: conditional covariances T2.p11 <- Sigma_11 - (Sigma_12 %*% Sigma_22.inv %*% Sigma_21) T2.pat[!var.idx, !var.idx] <- T2.pat[!var.idx, !var.idx] + (T2.p11 * Mp$freq[[p]]) # accumulate T1 <- T1 + T1.pat T2 <- T2 + T2.pat } list(T1 = T1, T2 = T2) } lavaan/R/ctr_pml_doubly_robust_utils.R0000644000176200001440000004272513013536450017706 0ustar liggesusers# This code was contributed by Myrsini Katsikatsou (LSE) -- September 2016 # # compute_uniCondProb_based_on_bivProb() # pairwiseExpProbVec_GivenObs() # LongVecTH.Rho.Generalised() # pairwiseExpProbVec_GivenObs_UncMod() compute_uniCondProb_based_on_bivProb <- function(bivProb, nvar, idx.pairs, idx.Y1, idx.Gy2, idx.cat.y1.split, idx.cat.y2.split) { bivProb.split <- split(bivProb, idx.pairs) lngth <- 2*length(bivProb) idx.vec.el <- 1:lngth ProbY1Gy2 <- rep(NA, lngth) no.pairs <- nvar*(nvar-1)/2 idx2.pairs <- combn(nvar,2) for(k in 1:no.pairs){ y2Sums <- tapply(bivProb.split[[k]], idx.cat.y2.split[[k]], sum) y2Sums.mult <- y2Sums[idx.cat.y2.split[[k]] ] Y1Gy2 <- bivProb.split[[k]]/ y2Sums.mult tmp.idx.vec.el <- idx.vec.el[(idx.Y1 == idx2.pairs[1,k]) & (idx.Gy2 == idx2.pairs[2,k])] ProbY1Gy2[tmp.idx.vec.el] <- Y1Gy2 } for(k in 1:no.pairs){ y1Sums <- tapply(bivProb.split[[k]], idx.cat.y1.split[[k]], sum) y1Sums.mult <- y1Sums[idx.cat.y1.split[[k]] ] Y2Gy1 <- bivProb.split[[k]]/ y1Sums.mult reordered_Y2Gy1 <- Y2Gy1[order(idx.cat.y1.split[[k]])] tmp.idx.vec.el <- idx.vec.el[(idx.Y1 == idx2.pairs[2,k]) & (idx.Gy2 == idx2.pairs[1,k])] ProbY1Gy2[tmp.idx.vec.el] <- reordered_Y2Gy1 } ProbY1Gy2 } # The input of the function is a lavobject, which, in turn, is the output of the # sem function having specified estimator="PML", missing="available.cases" #The output of the function is a list of two lists: the pairwiseProbGivObs list and # the univariateProbGivObs list. Each of the two lists consists of G matrices where G # is the number of groups in a multigroup analysis. If G=1 each of the lists # contains only one matrix that can be called as pairwiseProbGivObs[[1]], and # univariateProbGivObs[[1]]. # Each of the matrices in the pairwiseProbGivObs list is of dimension: nrow=sample size, #ncol=sum of the number of response categories for all pairs of variables #(i.e. the length of the vector pxixj.ab where i1, it is a list of G matrices #where G the number of groups and the matrices are fo dimension #nrow=sample size and ncol=number of items. #If lavobject@Data@ngroups=1 then yhat is a matrix. yhat <- lavPredict(object=lavobject, type = "yhat" ) #compute bivariate probabilities ngroups <- lavobject@Data@ngroups univariateProb <- vector("list", length=ngroups) pairwiseProb <- vector("list", length=ngroups) #save the indices of the Theta matrices for the groups stored in GLIST idx.ThetaMat <- which(names(lavobject@Model@GLIST)=="theta") for(g in seq_len(ngroups)) { # g<-1 if(ngroups>1L){ yhat_group <- yhat[[g]] } else { yhat_group <- yhat } nsize <- lavobject@Data@nobs[[g]] nvar <- lavobject@Model@nvar[[g]] Data <- lavobject@Data@X[[g]] TH <- lavobject@Fit@TH[[g]] th.idx <- lavobject@Model@th.idx[[g]] Theta <- lavobject@Model@GLIST[ idx.ThetaMat[g] ]$theta error.stddev <- diag(Theta)^0.5 #for the computation of the univariate probabilities nlev <- lavobject@Data@ov$nlev idx.uniy <- rep(1:nvar, times=nlev) #indices vectors for the computation of bivariate probabilities idx.pairs.yiyj <- combn(1:nvar,2) no_biv_resp_cat_yiyj <- sapply(1:ncol(idx.pairs.yiyj), function(x){ prod( nlev[idx.pairs.yiyj[,x]] ) }) idx.y1 <- unlist( mapply(rep, idx.pairs.yiyj[1,], each= no_biv_resp_cat_yiyj) ) idx.y2 <- unlist( mapply(rep, idx.pairs.yiyj[2,], each= no_biv_resp_cat_yiyj) ) univariateProb[[g]] <- matrix(0, nrow = nsize, ncol = sum(nlev) ) pairwiseProb[[g]] <- matrix(0, nrow = nsize, ncol = length(lavobject@Cache[[g]]$bifreq)) idx.MissVar.casewise <- apply(Data, 1, function(x) { which(is.na(x)) } ) for(i in 1:nsize){ idx.MissVar <- idx.MissVar.casewise[[i]] noMissVar <- length(idx.MissVar) if( noMissVar>0L ) { #compute the univariate probabilities TH.list <- split(TH,th.idx) tmp.TH <- TH.list[idx.MissVar] tmp.lowerTH <- unlist(lapply(tmp.TH, function(x){c(-Inf,x)})) tmp.upperTH <- unlist(lapply(tmp.TH, function(x){c(x,Inf) })) idx.items <- rep(c(1:noMissVar), times=nlev[idx.MissVar]) tmp.mean <- yhat_group[i,idx.MissVar] tmp.mean.extended <- tmp.mean[idx.items] tmp.stddev <- error.stddev[idx.MissVar] tmp.stddev.extended <- tmp.stddev[idx.items] tmp.uniProb <- pnorm( (tmp.upperTH - tmp.mean.extended )/ tmp.stddev.extended ) - pnorm( (tmp.lowerTH - tmp.mean.extended )/ tmp.stddev.extended ) idx.columnsUni <- which(idx.uniy %in% idx.MissVar) univariateProb[[g]][i, idx.columnsUni] <- tmp.uniProb #compute the bivariate probabilities if( noMissVar>1L ) { idx.pairsMiss <- combn(idx.MissVar ,2) no.pairs <- ncol(idx.pairsMiss) idx.pairsV2 <- combn(noMissVar, 2) idx.columns <- unlist(lapply(1:no.pairs, function(x){ which( (idx.y1 == idx.pairsMiss[1,x]) & (idx.y2 == idx.pairsMiss[2,x]) ) } ) ) if( all( Theta[t(idx.pairsMiss)]==0 ) ){ #items independence given eta tmp.uniProb.list <- split(tmp.uniProb, idx.items) pairwiseProb[[g]][i, idx.columns] <- unlist( lapply(1:no.pairs, function(x){ c( outer(tmp.uniProb.list[[ idx.pairsV2[1,x] ]] , tmp.uniProb.list[[ idx.pairsV2[2,x] ]] ) ) }) ) } else { #when correlation between measurement errors tmp.th.idx <- th.idx[th.idx %in% idx.MissVar] #recode so that it is always 1,1,..,1, 2,...,2, etc. tmp.th.idx.recoded <- rep(c(1:noMissVar), times=table(tmp.th.idx)) tmp.TH <- TH[th.idx %in% idx.MissVar] tmp.ind.vec <- LongVecInd(no.x = noMissVar, all.thres = tmp.TH, index.var.of.thres = tmp.th.idx.recoded) tmp.th.rho.vec <- LongVecTH.Rho.Generalised( no.x = noMissVar, TH = tmp.TH, th.idx = tmp.th.idx.recoded, cov.xixj = Theta[t(idx.pairsMiss)] , mean.x = yhat_group[i,idx.MissVar], stddev.x = error.stddev[idx.MissVar] ) tmp.bivProb <- pairwiseExpProbVec(ind.vec = tmp.ind.vec , th.rho.vec = tmp.th.rho.vec) pairwiseProb[[g]][i, idx.columns] <- tmp.bivProb } #end of else of if( all( Theta[t(idx.pairsMiss)]==0 ) ) # which checks item local independence } #end of if( noMissVar>1L ) #cat(i, "\n") } #end of if(noMissVar>0L) } #end of for(i in 1:nsize) } #end of for(g in seq_len(lavobject@Data@ngroups)) list(univariateProbGivObs = univariateProb, pairwiseProbGivObs = pairwiseProb) } # end of the function pairwiseExpProbVec_GivenObs ################################################################## # LongVecTH.Rho.Generalised function is defined as follows LongVecTH.Rho.Generalised <- function(no.x, TH, th.idx, cov.xixj, mean.x, stddev.x ) { all.std.thres <- (TH - mean.x[th.idx]) / stddev.x[th.idx] id.pairs <- utils::combn(no.x,2) cor.xixj <- cov.xixj /( stddev.x[id.pairs[1,]] * stddev.x[id.pairs[2,]]) LongVecTH.Rho(no.x = no.x, all.thres = all.std.thres, index.var.of.thres = th.idx, rho.xixj = cor.xixj) } # LongVecTH.Rho.Generalised is a generalisation of the function # lavaan:::LongVecTH.Rho . The latter assumes that all y* follow standard # normal so the thresholds are automatically the standardised ones. # LongVecTH.Rho.Generalised does not assume that, each of y*'s can follow # a normal distribution with mean mu and standard deviation sigma. # LongVecTH.Rho.Generalised has the following input arguments: # no.x (same as in lavaan:::LongVecTH.Rho), # TH (similar to the TH in lavaan:::LongVecTH.Rho but here they are the unstandardised thresholds, i.e. of the normal distribution with mean mu and standard deviation sigma) # th.idx (same as index.var.of.thres in lavaan:::LongVecTH.Rho) # cov.xixj which are the polychoric covariances of the pairs of underlying variables provided in a similar fashion as rho.xixj in lavaan:::LongVecTH.Rho) # mean.x is a vector including the means of y*'s provided in the order mean.x1, mean.x2, ...., mean.xp # stddev.x is a vector including the standard deviations of y*'s provided in the order stddev.x1, stddev.x2, ...., stddev.xp # The output of the new function is similar to that of lavaan:::LongVecTH.Rho############################################# #lavobject is the output of lavaan function where either the unconstrained #or a hypothesized model has been fitted pairwiseExpProbVec_GivenObs_UncMod <- function(lavobject) { ngroups <- lavobject@Data@ngroups TH <- lavobject@implied$th #these are the standardized thresholds #mean and variance of y* have been taken into account TH.IDX <- lavobject@SampleStats@th.idx Sigma.hat <- lavobject@implied$cov univariateProb <- vector("list", length=ngroups) pairwiseProb <- vector("list", length=ngroups) for(g in 1:ngroups) { Sigma.hat.g <- Sigma.hat[[g]] # is Sigma.hat always a correlation matrix? Cor.hat.g <- cov2cor(Sigma.hat.g) cors <- Cor.hat.g[lower.tri(Cor.hat.g)] if(any(abs(cors) > 1)) { warning("lavaan WARNING: some model-implied correlations are larger than 1.0") } nvar <- nrow(Sigma.hat.g) MEAN <- rep(0, nvar) TH.g <- TH[[g]] th.idx.g <- TH.IDX[[g]] nlev <- lavobject@Data@ov$nlev #create index vector to keep track which variable each column of #univariateProb matrix refers to idx.uniy <- rep(1:nvar, times=nlev) #create index vector to keep track which variables each column of #pairwiseProb matrix refers to idx.pairs.yiyj <- combn(1:nvar,2) no_biv_resp_cat_yiyj <- sapply(1:ncol(idx.pairs.yiyj), function(x){ prod( nlev[idx.pairs.yiyj[,x]] ) }) idx.y1 <- unlist( mapply(rep, idx.pairs.yiyj[1,], each= no_biv_resp_cat_yiyj) ) idx.y2 <- unlist( mapply(rep, idx.pairs.yiyj[2,], each= no_biv_resp_cat_yiyj) ) Data <- lavobject@Data@X[[g]] nsize <- nrow(Data) #create the lists of matrices univariateProb[[g]] <- matrix(0, nrow = nsize, ncol = sum(nlev) ) pairwiseProb[[g]] <- matrix(0, nrow = nsize, ncol = length(lavobject@Cache[[g]]$bifreq)) idx.MissVar.casewise <- apply(Data, 1, function(x) { which(is.na(x)) } ) for(i in 1:nsize){ idx.MissVar <- idx.MissVar.casewise[[i]] noMissVar <- length(idx.MissVar) if( noMissVar>0L ) { #compute the denominator of the conditional probability TH.VAR <- lapply(1:nvar, function(x) c(-Inf, TH.g[th.idx.g==x], +Inf)) lower <- sapply(1:nvar, function(x) TH.VAR[[x]][ Data[i,x] ]) upper <- sapply(1:nvar, function(x) TH.VAR[[x]][ Data[i,x] + 1L ]) lower.denom <- lower[-idx.MissVar] upper.denom <- upper[-idx.MissVar] MEAN.i <- MEAN[-idx.MissVar] Corhat.i <- Cor.hat.g[-idx.MissVar, -idx.MissVar, drop=FALSE] denom <- sadmvn(lower.denom, upper.denom, mean=MEAN.i, varcov=Corhat.i)[1] } #end of if( noMissVar>0L ) if( noMissVar==1L ) { #only univariate probabilities for one item #compute the numerator TH.MissVar <- c(-Inf, TH.g[th.idx.g==idx.MissVar], +Inf) #for all response categories of the missing item no.cat <- nlev[idx.MissVar] numer <- sapply(1:no.cat, function(x){ lower[idx.MissVar] <- TH.MissVar[x] upper[idx.MissVar] <- TH.MissVar[x+ 1L] sadmvn(lower, upper, mean=MEAN, varcov=Cor.hat.g)[1] }) idx.columnsUni <- which(idx.uniy %in% idx.MissVar) univariateProb[[g]][i, idx.columnsUni] <- numer / denom } #end of if( noMissVar==1L ) if( noMissVar>1L ) { #compute the bivariate probabilities and based on them #calculate the univariate ones #form all possible pairs of items with missing values idx.pairsMiss <- combn(idx.MissVar ,2) no.pairs <- ncol(idx.pairsMiss) for(j in 1:no.pairs ) { idx.Missy1y2 <- idx.pairsMiss[,j] idx.Missy1 <- idx.Missy1y2[1] idx.Missy2 <- idx.Missy1y2[2] idx.MissRestItems <- idx.MissVar[ !(idx.MissVar %in% idx.Missy1y2)] TH.Missy1 <- c(-Inf, TH.g[th.idx.g==idx.Missy1], +Inf) TH.Missy2 <- c(-Inf, TH.g[th.idx.g==idx.Missy2], +Inf) no.cat.Missy1 <- nlev[ idx.Missy1 ] no.cat.Missy2 <- nlev[ idx.Missy2 ] no.bivRespCat <- no.cat.Missy1 * no.cat.Missy2 mat_bivRespCat <- matrix(1:no.bivRespCat, nrow= no.cat.Missy1, ncol=no.cat.Missy2) numer <- sapply(1:no.bivRespCat, function(x){ idx_y1_cat <- which(mat_bivRespCat==x, arr.ind=TRUE)[1] idx_y2_cat <- which(mat_bivRespCat==x, arr.ind=TRUE)[2] lower[idx.Missy1y2] <- c( TH.Missy1[idx_y1_cat], TH.Missy2[idx_y2_cat] ) upper[idx.Missy1y2] <- c( TH.Missy1[idx_y1_cat+1L], TH.Missy2[idx_y2_cat+1L] ) lower.tmp <- lower upper.tmp <- upper MEAN.tmp <- MEAN Cor.hat.g.tmp <- Cor.hat.g if( length(idx.MissRestItems)>0 ){ lower.tmp <- lower[-idx.MissRestItems] upper.tmp <- upper[-idx.MissRestItems] MEAN.tmp <- MEAN[-idx.MissRestItems] Cor.hat.g.tmp <- Cor.hat.g[-idx.MissRestItems, -idx.MissRestItems] } sadmvn(lower.tmp, upper.tmp, mean=MEAN.tmp, varcov=Cor.hat.g.tmp)[1] }) idx.columns <- which( (idx.y1 == idx.Missy1) & (idx.y2 == idx.Missy2) ) tmp_biv <- numer/denom pairwiseProb[[g]][i, idx.columns] <- tmp_biv #compute the univariateProb based on the above bivariate # probabilities if(j==1L){ univariateProb[[g]][i, which(idx.uniy %in% idx.Missy1) ] <- apply(mat_bivRespCat, 1, function(x){ sum( tmp_biv[x])} ) univariateProb[[g]][i, which(idx.uniy %in% idx.Missy2) ] <- apply(mat_bivRespCat, 2, function(x){ sum( tmp_biv[x])} ) } if(j>1L & j1L ) } #end of for(i in 1:nsize) } # end of for(g in 1:ngroups) list(univariateProbGivObs = univariateProb, pairwiseProbGivObs = pairwiseProb) } #end of function lavaan/R/lav_mvnorm_h1.R0000644000176200001440000002500113040202674014604 0ustar liggesusers# the multivariate normal distribution, unrestricted (h1) # - everything is evalued under the MLEs: Mu = ybar, Sigma = S # 1) loglikelihood h1 (from raw data, or sample statistics) # 4) hessian h1 around MLEs # 5) information h1 (restricted Sigma/mu) # 5a: (unit) expected information h1 (A1 = Gamma.NT^{-1}) # 5b: (unit) observed information h1 (A1 = Gamma.NT^{-1}) # 5c: (unit) first.order information h1 (B1 = A1 %*% Gamma %*% A1) # 6) inverted information h1 mu + vech(Sigma) # 6a: (unit) inverted expected information (A1.inv = Gamma.NT) # 6b: (unit) inverted observed information (A1.inv = Gamma.NT) # 6c: (unit) inverted first-order information (B1.inv) # 7) ACOV h1 mu + vech(Sigma) # 7a: 1/N * Gamma.NT # 7b: 1/N * Gamma.NT # 7c: 1/N * (Gamma.NT * Gamma^{-1} * Gamma.NT) # 7d: 1/N * Gamma (sandwich) # YR 25 Mar 2016: first version # YR 19 Jan 2017: added 6) + 7) # 1. likelihood h1 # 1a: input is raw data lav_mvnorm_h1_loglik_data <- function(Y = NULL, casewise = FALSE, Sinv.method = "eigen") { P <- NCOL(Y); N <- NROW(Y) # sample statistics sample.mean <- colMeans(Y) sample.cov <- 1/N*crossprod(Y) - tcrossprod(sample.mean) if(casewise) { LOG.2PI <- log(2 * pi) # invert sample.cov if(Sinv.method == "chol") { cS <- chol(sample.cov); icS <- backsolve(cS, diag(P)) Yc <- t( t(Y) - sample.mean ) DIST <- rowSums((Yc %*% icS)^2) logdet <- -2 * sum(log(diag(icS))) } else { sample.cov.inv <- lav_matrix_symmetric_inverse(S = sample.cov, logdet = TRUE, Sinv.method = Sinv.method) logdet <- attr(sample.cov.inv, "logdet") # mahalanobis distance Yc <- t( t(Y) - sample.mean ) DIST <- rowSums(Yc %*% sample.cov.inv * Yc) } loglik <- -(P * LOG.2PI + logdet + DIST)/2 } else { # invert sample.cov sample.cov.inv <- lav_matrix_symmetric_inverse(S = sample.cov, logdet = TRUE, Sinv.method = Sinv.method) loglik <- lav_mvnorm_h1_loglik_samplestats(sample.mean = sample.mean, sample.cov = sample.cov, sample.nobs = N, sample.cov.inv = sample.cov.inv) } loglik } # 1b: input are sample statistics (mean, cov, N) only lav_mvnorm_h1_loglik_samplestats <- function(sample.mean = NULL, sample.cov = NULL, sample.nobs = NULL, Sinv.method = "eigen", sample.cov.inv = NULL) { P <- length(sample.mean); N <- sample.nobs sample.mean <- as.numeric(sample.mean) LOG.2PI <- log(2 * pi) if(is.null(sample.cov.inv)) { sample.cov.inv <- lav_matrix_symmetric_inverse(S = sample.cov, logdet = TRUE, Sinv.method = Sinv.method) logdet <- attr(sample.cov.inv, "logdet") } else { logdet <- attr(sample.cov.inv, "logdet") if(is.null(logdet)) { # compute - ln|S.inv| ev <- eigen(sample.cov.inv, symmetric = TRUE, only.values = TRUE) logdet <- -1 * sum(log(ev$values)) } } loglik <- -N/2 * (P * LOG.2PI + logdet + P) loglik } # 4. hessian of logl (around MLEs of Mu and Sigma) # 4a: hessian logl Mu and vech(Sigma) from raw data lav_mvnorm_h1_logl_hessian_data <- function(Y = NULL, Sinv.method = "eigen", sample.cov.inv = NULL) { N <- NROW(Y) # observed information observed <- lav_mvnorm_h1_information_observed_data(Y = Y, Sinv.method = Sinv.method, sample.cov.inv = sample.cov.inv) -N*observed } # 4b: hessian Mu and vech(Sigma) from samplestats lav_mvnorm_h1_logl_hessian_samplestats <- function(sample.mean = NULL, # unused! sample.cov = NULL, sample.nobs = NULL, Sinv.method = "eigen", sample.cov.inv = NULL) { N <- sample.nobs # observed information observed <- lav_mvnorm_h1_information_observed_samplestats(sample.mean = sample.mean, sample.cov = sample.cov, Sinv.method = Sinv.method, sample.cov.inv = sample.cov.inv) -N*observed } # 5) Information h1 (not expected == observed if data is complete!) # 5a: unit expected information h1 lav_mvnorm_h1_information_expected <- function(Y = NULL, sample.cov = NULL, Sinv.method = "eigen", sample.cov.inv = NULL) { if(is.null(sample.cov.inv)) { if(is.null(sample.cov)) { # sample statistics sample.mean <- colMeans(Y); N <- NROW(Y) sample.cov <- 1/N*crossprod(Y) - tcrossprod(sample.mean) } # invert sample.cov sample.cov.inv <- lav_matrix_symmetric_inverse(S = sample.cov, logdet = FALSE, Sinv.method = Sinv.method) } I11 <- sample.cov.inv I22 <- 0.5 * lav_matrix_duplication_pre_post(sample.cov.inv %x% sample.cov.inv) lav_matrix_bdiag(I11, I22) } # 5b: unit observed information h1 lav_mvnorm_h1_information_observed_data <- function(Y = NULL, Sinv.method = "eigen", sample.cov.inv = NULL) { lav_mvnorm_h1_information_expected(Y = Y, Sinv.method = Sinv.method, sample.cov.inv = sample.cov.inv) } # 5b-bis: observed information h1 from sample statistics lav_mvnorm_h1_information_observed_samplestats <- function(sample.mean = NULL, # unused! sample.cov = NULL, Sinv.method = "eigen", sample.cov.inv = NULL) { if(is.null(sample.cov.inv)) { # invert sample.cov sample.cov.inv <- lav_matrix_symmetric_inverse(S = sample.cov, logdet = FALSE, Sinv.method = Sinv.method) } I11 <- sample.cov.inv I22 <- 0.5 * lav_matrix_duplication_pre_post(sample.cov.inv %x% sample.cov.inv) lav_matrix_bdiag(I11, I22) } # 5c: unit first-order information h1 # note: first order information h1 == A1 %*% Gamma %*% A1 # (where A1 = obs/exp information h1) lav_mvnorm_h1_information_firstorder <- function(Y = NULL, sample.cov = NULL, Sinv.method = "eigen", sample.cov.inv = NULL, Gamma = NULL) { # Gamma if(is.null(Gamma)) { Gamma <- lav_samplestats_Gamma(Y, meanstructure = TRUE) } # sample.cov.in if(is.null(sample.cov.inv)) { # invert sample.cov sample.cov.inv <- lav_matrix_symmetric_inverse(S = sample.cov, logdet = FALSE, Sinv.method = Sinv.method) } # A1 A1 <- lav_mvnorm_h1_information_expected(Y = Y, Sinv.method = Sinv.method, sample.cov.inv = sample.cov.inv) A1 %*% Gamma %*% A1 } # 6) inverted information h1 mu + vech(Sigma) # 6a: (unit) inverted expected information (A1.inv = Gamma.NT) # 6b: (unit) inverted observed information (A1.inv = Gamma.NT) lav_mvnorm_h1_inverted_information_expected <- lav_mvnorm_h1_inverted_information_observed <- function(Y = NULL, sample.cov = NULL) { # sample.cov if(is.null(sample.cov)) { sample.mean <- colMeans(Y); N <- NROW(Y) sample.cov <- 1/N*crossprod(Y) - tcrossprod(sample.mean) } I11 <- sample.cov I22 <- 2 * lav_matrix_duplication_ginv_pre_post(sample.cov %x% sample.cov) Gamma.NT <- lav_matrix_bdiag(I11, I22) Gamma.NT } # 6c: (unit) inverted first-order information (B1.inv) lav_mvnorm_h1_inverted_information_firstorder <- function(Y = NULL, sample.cov = NULL, Sinv.method = "eigen", sample.cov.inv = NULL, Gamma = NULL) { # Gamma if(is.null(Gamma)) { Gamma <- lav_samplestats_Gamma(Y, meanstructure = TRUE) } # Gamma.NT Gamma.NT <- lav_mvnorm_h1_inverted_information_expected(Y = Y, sample.cov = sample.cov) Gamma.NT %*% solve(Gamma, Gamma.NT) } # 7) ACOV h1 mu + vech(Sigma) # 7a: 1/N * Gamma.NT # 7b: 1/N * Gamma.NT lav_mvnorm_h1_acov_expected <- lav_mvnorm_h1_acov_observed <- function(Y = NULL, sample.cov = NULL) { N <- NROW(Y) Gamma.NT <- lav_mvnorm_h1_inverted_information_expected(Y = Y, sample.cov = sample.cov) (1/N) * Gamma.NT } # 7c: 1/N * (Gamma.NT * Gamma^{-1} * Gamma.NT) lav_mvnorm_h1_acov_firstorder <- function(Y = NULL, sample.cov = NULL, Sinv.method = "eigen", sample.cov.inv = NULL, Gamma = NULL) { N <- NROW(Y) J1.inv <- lav_mvnorm_h1_inverted_information_firstorder(Y = Y, sample.cov = sample.cov, Sinv.method = Sinv.method, sample.cov.inv = sample.cov.inv, Gamma = Gamma) (1/N) * J1.inv } # 7d: 1/N * Gamma (sandwich) lav_mvnorm_h1_acov_sandwich <- function(Y = NULL, sample.cov = NULL, Gamma = NULL) { N <- NROW(Y) # Gamma if(is.null(Gamma)) { Gamma <- lav_samplestats_Gamma(Y, meanstructure = TRUE) } (1/N) * Gamma } lavaan/R/lav_model_compute.R0000644000176200001440000006004213052774424015550 0ustar liggesuserscomputeSigmaHat <- function(lavmodel = NULL, GLIST = NULL, extra = FALSE, delta = TRUE, debug = FALSE) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nmat <- lavmodel@nmat nvar <- lavmodel@nvar nblocks <- lavmodel@nblocks representation <- lavmodel@representation # return a list Sigma.hat <- vector("list", length=nblocks) for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[mm.in.group] if(representation == "LISREL") { Sigma.hat[[g]] <- computeSigmaHat.LISREL(MLIST = MLIST, delta = delta) } else { stop("only representation LISREL has been implemented for now") } if(debug) print(Sigma.hat[[g]]) if(extra) { # check if matrix is positive definite ev <- eigen(Sigma.hat[[g]], symmetric=TRUE, only.values=TRUE)$values if(any(ev < .Machine$double.eps) || sum(ev) == 0) { Sigma.hat.inv <- MASS::ginv(Sigma.hat[[g]]) Sigma.hat.log.det <- log(.Machine$double.eps) attr(Sigma.hat[[g]], "po") <- FALSE attr(Sigma.hat[[g]], "inv") <- Sigma.hat.inv attr(Sigma.hat[[g]], "log.det") <- Sigma.hat.log.det } else { ## FIXME ## since we already do an 'eigen' decomposition, we should ## 'reuse' that information, instead of doing a new cholesky Sigma.hat.inv <- inv.chol(Sigma.hat[[g]], logdet=TRUE) Sigma.hat.log.det <- attr(Sigma.hat.inv, "logdet") attr(Sigma.hat[[g]], "po") <- TRUE attr(Sigma.hat[[g]], "inv") <- Sigma.hat.inv attr(Sigma.hat[[g]], "log.det") <- Sigma.hat.log.det } } } # nblocks Sigma.hat } ## only if conditional.x = TRUE ## compute the (larger) unconditional 'joint' covariance matrix (y,x) ## ## Sigma (Joint ) = [ (S11, S12), ## (S21, S22) ] where ## S11 = Sigma.res + PI %*% cov.x %*% t(PI) ## S12 = PI %*% cov.x ## S21 = cov.x %*% t(PI) ## S22 = cov.x computeSigmaHatJoint <- function(lavmodel = NULL, GLIST = NULL, extra = FALSE, lavsamplestats = NULL, delta = TRUE, debug = FALSE) { stopifnot(lavmodel@conditional.x) # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nmat <- lavmodel@nmat nvar <- lavmodel@nvar nblocks <- lavmodel@nblocks representation <- lavmodel@representation # return a list Sigma.hat <- vector("list", length=nblocks) for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[mm.in.group] if(representation == "LISREL") { res.Sigma <- computeSigmaHat.LISREL(MLIST = MLIST, delta = delta) res.int <- computeMuHat.LISREL(MLIST = MLIST) res.slopes <- computePI.LISREL(MLIST = MLIST) S.xx <- lavsamplestats@cov.x[[g]] S.yy <- res.Sigma + res.slopes %*% S.xx %*% t(res.slopes) S.yx <- res.slopes %*% S.xx S.xy <- S.xx %*% t(res.slopes) Sigma.hat[[g]] <- rbind( cbind(S.yy, S.yx), cbind(S.xy, S.xx) ) } else { stop("only representation LISREL has been implemented for now") } if(debug) print(Sigma.hat[[g]]) if(extra) { # check if matrix is positive definite ev <- eigen(Sigma.hat[[g]], symmetric=TRUE, only.values=TRUE)$values if(any(ev < .Machine$double.eps) || sum(ev) == 0) { Sigma.hat.inv <- MASS::ginv(Sigma.hat[[g]]) Sigma.hat.log.det <- log(.Machine$double.eps) attr(Sigma.hat[[g]], "po") <- FALSE attr(Sigma.hat[[g]], "inv") <- Sigma.hat.inv attr(Sigma.hat[[g]], "log.det") <- Sigma.hat.log.det } else { ## FIXME ## since we already do an 'eigen' decomposition, we should ## 'reuse' that information, instead of doing a new cholesky Sigma.hat.inv <- inv.chol(Sigma.hat[[g]], logdet=TRUE) Sigma.hat.log.det <- attr(Sigma.hat.inv, "logdet") attr(Sigma.hat[[g]], "po") <- TRUE attr(Sigma.hat[[g]], "inv") <- Sigma.hat.inv attr(Sigma.hat[[g]], "log.det") <- Sigma.hat.log.det } } } # nblocks Sigma.hat } computeMuHat <- function(lavmodel = NULL, GLIST = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nmat <- lavmodel@nmat nblocks <- lavmodel@nblocks representation <- lavmodel@representation meanstructure <- lavmodel@meanstructure # return a list Mu.hat <- vector("list", length=nblocks) for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] if(!meanstructure) { Mu.hat[[g]] <- numeric( lavmodel@nvar[g] ) } else if(representation == "LISREL") { Mu.hat[[g]] <- computeMuHat.LISREL(MLIST = GLIST[ mm.in.group ]) } else { stop("only representation LISREL has been implemented for now") } } # nblocks Mu.hat } ## only if conditional.x = TRUE ## compute the (larger) unconditional 'joint' mean vector (y,x) ## ## Mu (Joint ) = [ Mu.y, Mu.x ] where ## Mu.y = res.int + PI %*% M.x ## Mu.x = M.x computeMuHatJoint <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nmat <- lavmodel@nmat nblocks <- lavmodel@nblocks representation <- lavmodel@representation meanstructure <- lavmodel@meanstructure # return a list Mu.hat <- vector("list", length=nblocks) for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] if(!meanstructure) { Mu.hat[[g]] <- numeric( lavmodel@nvar[g] ) } else if(representation == "LISREL") { MLIST <- GLIST[ mm.in.group ] res.int <- computeMuHat.LISREL(MLIST = MLIST) res.slopes <- computePI.LISREL(MLIST = MLIST) M.x <- lavsamplestats@mean.x[[g]] Mu.y <- res.int + res.slopes %*% M.x Mu.x <- M.x Mu.hat[[g]] <- c(Mu.y, Mu.x) } else { stop("only representation LISREL has been implemented for now") } } # nblocks Mu.hat } # TH.star = DELTA.star * (th.star - pi0.star) # see Muthen 1984 eq 11 computeTH <- function(lavmodel = NULL, GLIST = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation th.idx <- lavmodel@th.idx # return a list TH <- vector("list", length=nblocks) # compute TH for each group for(g in 1:nblocks) { if(length(th.idx[[g]]) == 0) { TH[[g]] <- numeric(0L) next } # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] if(representation == "LISREL") { TH[[g]] <- computeTH.LISREL(MLIST = GLIST[ mm.in.group ], th.idx=th.idx[[g]]) } else { stop("only representation LISREL has been implemented for now") } } TH } # PI = slope structure # see Muthen 1984 eq 12 computePI <- function(lavmodel = NULL, GLIST = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation conditional.x <- lavmodel@conditional.x # return a list PI <- vector("list", length=nblocks) # compute TH for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(!conditional.x) { PI.g <- numeric( lavmodel@nvar[g] ) } else if(representation == "LISREL") { PI.g <- computePI.LISREL(MLIST = MLIST) } else { stop("only representation LISREL has been implemented for now") } PI[[g]] <- PI.g } PI } # GW = group weight computeGW <- function(lavmodel = NULL, GLIST=NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation group.w.free <- lavmodel@group.w.free # return a list GW <- vector("list", length=nblocks) # compute GW for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(!group.w.free) { GW.g <- 0.0 # FIXME } else if(representation == "LISREL") { GW.g <- as.numeric(MLIST$gw[1,1]) } else { stop("only representation LISREL has been implemented for now") } GW[[g]] <- GW.g } # transform to proportions #gw <- unlist(GW) #gw <- exp(gw) / sum(exp(gw)) #for(g in 1:nblocks) { # GW[[g]] <- gw[g] #} GW } # *unconditional* variance/covariance matrix of Y # - same as Sigma.hat if all Y are continuous) # - if also Gamma, cov.x is used (only if categorical) computeVY <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL, diagonal.only = FALSE) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list VY <- vector("list", length=nblocks) # compute TH for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(!is.null(lavsamplestats)) { cov.x <- lavsamplestats@cov.x[[g]] } else { if(lavmodel@fixed.x) { stop("lavaaan ERROR: fixed.x = TRUE, but cov.x is NULL") } cov.x <- NULL } if(representation == "LISREL") { VY.g <- computeVY.LISREL(MLIST = MLIST, cov.x = cov.x) } else { stop("only representation LISREL has been implemented for now") } if(diagonal.only) { VY[[g]] <- diag(VY.g) } else { VY[[g]] <- VY.g } } VY } # V(ETA): latent variances variances/covariances computeVETA <- function(lavmodel = NULL, GLIST = NULL, remove.dummy.lv = FALSE, lavsamplestats = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list ETA <- vector("list", length=nblocks) # compute ETA for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(!is.null(lavsamplestats)) { cov.x <- lavsamplestats@cov.x[[g]] } else { if(lavmodel@fixed.x) { stop("lavaaan ERROR: fixed.x = TRUE, but cov.x is NULL") } cov.x <- NULL } if(representation == "LISREL") { ETA.g <- computeVETA.LISREL(MLIST = MLIST, cov.x = cov.x) if(remove.dummy.lv) { # remove all dummy latent variables lv.idx <- c(lavmodel@ov.y.dummy.lv.idx[[g]], lavmodel@ov.x.dummy.lv.idx[[g]]) if(!is.null(lv.idx)) { ETA.g <- ETA.g[-lv.idx, -lv.idx, drop=FALSE] } } } else { stop("only representation LISREL has been implemented for now") } ETA[[g]] <- ETA.g } ETA } # V(ETA|x_i): latent variances variances/covariances, conditional on x_ # - this is always (I-B)^-1 PSI (I-B)^-T, after REMOVING lv dummies computeVETAx <- function(lavmodel = NULL, GLIST = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list ETA <- vector("list", length=nblocks) # compute ETA for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(representation == "LISREL") { lv.idx <- c(lavmodel@ov.y.dummy.lv.idx[[g]], lavmodel@ov.x.dummy.lv.idx[[g]]) ETA.g <- computeVETAx.LISREL(MLIST = MLIST, lv.dummy.idx = lv.idx) } else { stop("only representation LISREL has been implemented for now") } ETA[[g]] <- ETA.g } ETA } # COV: observed+latent variances variances/covariances computeCOV <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL, remove.dummy.lv = FALSE) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list COV <- vector("list", length=nblocks) # compute COV for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] cov.x <- lavsamplestats@cov.x[[g]] if(representation == "LISREL") { COV.g <- computeCOV.LISREL(MLIST = MLIST, cov.x = cov.x) if(remove.dummy.lv) { # remove all dummy latent variables lv.idx <- c(lavmodel@ov.y.dummy.lv.idx[[g]], lavmodel@ov.x.dummy.lv.idx[[g]]) if(!is.null(lv.idx)) { # offset for ov lambda.names <- lavmodel@dimNames[[which(names(GLIST) == "lambda")[g]]][[1L]] lv.idx <- lv.idx + length(lambda.names) COV.g <- COV.g[-lv.idx, -lv.idx, drop=FALSE] } } } else { stop("only representation LISREL has been implemented for now") } COV[[g]] <- COV.g } COV } # E(ETA): expectation (means) of latent variables (return vector) computeEETA <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL, remove.dummy.lv = FALSE) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list EETA <- vector("list", length=nblocks) # compute E(ETA) for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(representation == "LISREL") { EETA.g <- computeEETA.LISREL(MLIST, mean.x=lavsamplestats@mean.x[[g]], sample.mean=lavsamplestats@mean[[g]], ov.y.dummy.lv.idx=lavmodel@ov.y.dummy.lv.idx[[g]], ov.x.dummy.lv.idx=lavmodel@ov.x.dummy.lv.idx[[g]], ov.y.dummy.ov.idx=lavmodel@ov.y.dummy.ov.idx[[g]], ov.x.dummy.ov.idx=lavmodel@ov.x.dummy.ov.idx[[g]]) if(remove.dummy.lv) { # remove dummy lv.dummy.idx <- c(lavmodel@ov.y.dummy.lv.idx[[g]], lavmodel@ov.x.dummy.lv.idx[[g]]) if(length(lv.dummy.idx) > 0L) { EETA.g <- EETA.g[-lv.dummy.idx] } } } else { stop("only representation LISREL has been implemented for now") } EETA[[g]] <- EETA.g } EETA } # E(ETA|x_i): conditional expectation (means) of latent variables # for a given value of x_i (instead of E(x_i)) computeEETAx <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL, eXo = NULL, remove.dummy.lv = FALSE) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list EETAx <- vector("list", length=nblocks) # compute E(ETA) for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] EXO <- eXo[[g]] if(is.null(EXO)) { # create empty matrix EXO <- matrix(0, lavsamplestats@nobs[[g]], 0L) } if(representation == "LISREL") { EETAx.g <- computeEETAx.LISREL(MLIST, eXo=EXO, N=lavsamplestats@nobs[[g]], sample.mean=lavsamplestats@mean[[g]], ov.y.dummy.lv.idx=lavmodel@ov.y.dummy.lv.idx[[g]], ov.x.dummy.lv.idx=lavmodel@ov.x.dummy.lv.idx[[g]], ov.y.dummy.ov.idx=lavmodel@ov.y.dummy.ov.idx[[g]], ov.x.dummy.ov.idx=lavmodel@ov.x.dummy.ov.idx[[g]]) if(remove.dummy.lv) { # remove dummy lv.dummy.idx <- c(lavmodel@ov.y.dummy.lv.idx[[g]], lavmodel@ov.x.dummy.lv.idx[[g]]) if(length(lv.dummy.idx) > 0L) { EETAx.g <- EETAx.g[ ,-lv.dummy.idx, drop=FALSE] } } } else { stop("only representation LISREL has been implemented for now") } EETAx[[g]] <- EETAx.g } EETAx } # return 'regular' LAMBDA computeLAMBDA <- function(lavmodel = NULL, GLIST = NULL, remove.dummy.lv = FALSE) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list LAMBDA <- vector("list", length=nblocks) # compute LAMBDA for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(representation == "LISREL") { LAMBDA.g <- computeLAMBDA.LISREL(MLIST = MLIST, ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[g]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[g]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[g]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[g]], remove.dummy.lv = remove.dummy.lv) } else { stop("only representation LISREL has been implemented for now") } LAMBDA[[g]] <- LAMBDA.g } LAMBDA } # THETA: observed (residual) variances computeTHETA <- function(lavmodel = NULL, GLIST = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list THETA <- vector("list", length=nblocks) # compute THETA for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(representation == "LISREL") { THETA.g <- computeTHETA.LISREL(MLIST = MLIST, ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[g]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[g]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[g]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[g]]) } else { stop("only representation LISREL has been implemented for now") } THETA[[g]] <- THETA.g } THETA } # E(Y): expectation (mean) of observed variables # returns vector 1 x nvar computeEY <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks nmat <- lavmodel@nmat representation <- lavmodel@representation # return a list EY <- vector("list", length=nblocks) # compute E(Y) for each group for(g in 1:nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] MLIST <- GLIST[ mm.in.group ] if(representation == "LISREL") { EY.g <- computeEY.LISREL(MLIST = MLIST, mean.x=lavsamplestats@mean.x[[g]], sample.mean=lavsamplestats@mean[[g]], ov.y.dummy.ov.idx=lavmodel@ov.y.dummy.ov.idx[[g]], ov.x.dummy.ov.idx=lavmodel@ov.x.dummy.ov.idx[[g]], ov.y.dummy.lv.idx=lavmodel@ov.y.dummy.lv.idx[[g]], ov.x.dummy.lv.idx=lavmodel@ov.x.dummy.lv.idx[[g]]) } else { stop("only representation LISREL has been implemented for now") } EY[[g]] <- EY.g } EY } # E(Y | ETA, x_i): conditional expectation (means) of observed variables # for a given value of x_i AND eta_i computeYHAT <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL, eXo = NULL, ETA = NULL, duplicate = FALSE) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST # ngroups, not nblocks! ngroups <- lavsamplestats@ngroups # return a list YHAT <- vector("list", length=ngroups) # compute YHAT for each group for(g in seq_len(ngroups)) { # which mm belong to group g? # FIXME: what if more than g blocks??? mm.in.group <- 1:lavmodel@nmat[g] + cumsum(c(0L,lavmodel@nmat))[g] MLIST <- GLIST[ mm.in.group ] if(is.null(eXo[[g]]) && duplicate) { Nobs <- lavsamplestats@nobs[[g]] } else { Nobs <- 1L } if(lavmodel@representation == "LISREL") { if(lavmodel@conditional.x) { YHAT[[g]] <- computeEYetax.LISREL(MLIST = MLIST, eXo = eXo[[g]], ETA = ETA[[g]], N = Nobs, sample.mean = lavsamplestats@mean[[g]], ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[g]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[g]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[g]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[g]]) } else { # unconditional case YHAT[[g]] <- computeEYetax3.LISREL(MLIST = MLIST, ETA = ETA[[g]], sample.mean = lavsamplestats@mean[[g]], mean.x = lavsamplestats@mean.x[[g]], ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[g]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[g]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[g]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[g]]) # impute back ov.y values that are NOT indicators } } else { stop("lavaan ERROR: representation ", lavmodel@representation, " not supported yet.") } } YHAT } lavaan/R/lav_lavaanList_inspect.R0000644000176200001440000002303713053066601016532 0ustar liggesusers# inspect a lavaanList object inspect.lavaanList <- function(object, what = "free", ...) { lavListInspect(object = object, what = what, add.labels = TRUE, add.class = TRUE, drop.list.single.group = TRUE) } # the `tech' version: no labels, full matrices, ... for further processing lavTech.lavaanList <- function(object, what = "free", add.labels = FALSE, add.class = FALSE, list.by.group = FALSE, drop.list.single.group = FALSE) { lavListInspect(object = object, what = what, add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } lavListTech <- function(object, what = "free", add.labels = FALSE, add.class = FALSE, list.by.group = FALSE, drop.list.single.group = FALSE) { lavListInspect(object = object, what = what, add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } # just in case some uses lavInspect on a lavaanList object lavInspect.lavaanList <- function(object, what = "free", add.labels = TRUE, add.class = TRUE, list.by.group = TRUE, drop.list.single.group = TRUE) { lavListInspect(object = object, what = what, add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } lavListInspect <- function(object, what = "free", add.labels = TRUE, add.class = TRUE, list.by.group = TRUE, drop.list.single.group = TRUE) { # object must inherit from class lavaanList stopifnot(inherits(object, "lavaanList")) # only a single argument if(length(what) > 1) { stop("`what' arguments contains multiple arguments; only one is allowed") } # be case insensitive what <- tolower(what) #### model matrices, with different contents #### if(what == "free") { lav_lavaanList_inspect_modelmatrices(object, what = "free", type = "free", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "partable" || what == "user") { lav_lavaanList_inspect_modelmatrices(object, what = "free", type="partable", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) } else if(what == "start" || what == "starting.values") { lav_lavaanList_inspect_modelmatrices(object, what = "start", add.labels = add.labels, add.class = add.class, list.by.group = list.by.group, drop.list.single.group = drop.list.single.group) #### parameter table #### } else if(what == "list") { parTable(object) #### data + missingness #### } else if(what == "ngroups") { object@Data@ngroups } else if(what == "group") { object@Data@group } else if(what == "cluster") { object@Data@cluster } else if(what == "ordered") { object@Data@ordered } else if(what == "group.label") { object@Data@group.label } else if(what == "nobs") { unlist( object@Data@nobs ) } else if(what == "norig") { unlist( object@Data@norig ) } else if(what == "ntotal") { sum(unlist( object@Data@nobs )) #### meanstructure, categorical #### } else if(what == "meanstructure") { object@Model@meanstructure } else if(what == "categorical") { object@Model@categorical } else if(what == "fixed.x") { object@Model@fixed.x } else if(what == "parameterization") { object@Model@parameterization # options } else if(what == "options" || what == "lavoptions") { object@Options # call } else if(what == "call") { as.list( object@call ) #### not found #### } else { stop("unknown `what' argument in inspect function: `", what, "'") } } lav_lavaanList_inspect_start <- function(object) { # from 0.5-19, they are in the partable if(!is.null(object@ParTable$start)) { OUT <- object@ParTable$start } else { # in < 0.5-19, we should look in @Fit@start OUT <- object@Fit@start } OUT } lav_lavaanList_inspect_modelmatrices <- function(object, what = "free", type = "free", add.labels = FALSE, add.class = FALSE, list.by.group = FALSE, drop.list.single.group = FALSE) { GLIST <- object@Model@GLIST for(mm in 1:length(GLIST)) { if(add.labels) { dimnames(GLIST[[mm]]) <- object@Model@dimNames[[mm]] } if(what == "free") { # fill in free parameter counts if(type == "free") { m.el.idx <- object@Model@m.free.idx[[mm]] x.el.idx <- object@Model@x.free.idx[[mm]] #} else if(type == "unco") { # m.el.idx <- object@Model@m.unco.idx[[mm]] # x.el.idx <- object@Model@x.unco.idx[[mm]] } else if(type == "partable") { m.el.idx <- object@Model@m.user.idx[[mm]] x.el.idx <- object@Model@x.user.idx[[mm]] } else { stop("lavaan ERROR: unknown type argument:", type, ) } # erase everything GLIST[[mm]][,] <- 0.0 GLIST[[mm]][m.el.idx] <- x.el.idx } else if(what == "start") { # fill in starting values m.user.idx <- object@Model@m.user.idx[[mm]] x.user.idx <- object@Model@x.user.idx[[mm]] START <- lav_lavaanList_inspect_start(object) GLIST[[mm]][m.user.idx] <- START[x.user.idx] } # class if(add.class) { if(object@Model@isSymmetric[mm]) { class(GLIST[[mm]]) <- c("lavaan.matrix.symmetric", "matrix") } else { class(GLIST[[mm]]) <- c("lavaan.matrix", "matrix") } } } # try to reflect `equality constraints' con.flag <- FALSE if(what == "free" && object@Model@eq.constraints) { # extract constraints from parameter table PT <- parTable(object) CON <- PT[PT$op %in% c("==","<",">") ,c("lhs","op","rhs")] rownames(CON) <- NULL # replace 'labels' by parameter numbers ID <- lav_partable_constraints_label_id(PT) LABEL <- names(ID) for(con in 1:nrow(CON)) { # lhs LHS.labels <- all.vars(as.formula(paste("~",CON[con,"lhs"]))) if(length(LHS.labels) > 0L) { # par id LHS.freeid <- ID[match(LHS.labels, LABEL)] # substitute tmp <- CON[con,"lhs"] for(pat in 1:length(LHS.labels)) { tmp <- sub(LHS.labels[pat], LHS.freeid[pat], tmp) } CON[con,"lhs"] <- tmp } # rhs RHS.labels <- all.vars(as.formula(paste("~",CON[con,"rhs"]))) if(length(RHS.labels) > 0L) { # par id RHS.freeid <- ID[match(RHS.labels, LABEL)] # substitute tmp <- CON[con,"rhs"] for(pat in 1:length(RHS.labels)) { tmp <- sub(RHS.labels[pat], RHS.freeid[pat], tmp) } CON[con,"rhs"] <- tmp } } # con # add this info at the top #GLIST <- c(constraints = list(CON), GLIST) #no, not a good idea, it does not work with list.by.group # add it as a 'header' attribute? attr(CON, "header") <- "Note: model contains equality constraints:" con.flag <- TRUE } # should we group them per group? if(list.by.group) { lavmodel <- object@Model nmat <- lavmodel@nmat OUT <- vector("list", length = object@Data@ngroups) for(g in 1:object@Data@ngroups) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] mm.names <- names( GLIST[mm.in.group] ) OUT[[g]] <- GLIST[mm.in.group] } if(object@Data@ngroups == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(object@Data@group.label) > 0L) { names(OUT) <- unlist(object@Data@group.label) } } } else { OUT <- GLIST } # header if(con.flag) { attr(OUT, "header") <- CON } # lavaan.list if(add.class) { class(OUT) <- c("lavaan.list", "list") } OUT } lavaan/R/lav_export_bugs.R0000644000176200001440000003125413053015603015244 0ustar liggesusers# export go BUGS syntax # we assume that N1, N2, ... are in data lav2bugs <- function(partable, pta = NULL, as.function.=FALSE) { # get parameter table attributes pta <- lav_partable_attributes(partable = partable, pta = pta) vnames <- pta$vnames; nblocks <- pta$nblocks nvar <- pta$nvar; nfac <- pta$nfac # sanity check partable <- lav2check(partable) # tabs t1 <- paste(rep(" ", 2L), collapse="") t2 <- paste(rep(" ", 4L), collapse="") t3 <- paste(rep(" ", 6L), collapse="") t4 <- paste(rep(" ", 8L), collapse="") # TXT header if(as.function.) { TXT <- paste("{\n", sep="") } else { TXT <- paste("model {\n", sep="") } # model for every i for(g in 1:nblocks) { ov.names <- vnames$ov[[g]] lv.names <- vnames$lv[[g]] yname <- paste("y", g, sep="") if(nblocks > 1L) { TXT <- paste(TXT, t1, "# block ", g, "\n", sep="") } else { TXT <- paste(TXT, "\n") } TXT <- paste(TXT, t1, "for(i in 1:N", g, ") {\n", sep="") # ov.nox - all observed variables (except exogenous ones) ov.names.nox <- vnames$ov.nox[[g]]; nov <- length(ov.names.nox) TXT <- paste(TXT, "\n", t2, "# ov.nox", sep="") for(i in 1:nov) { ov.idx <- match(ov.names.nox[i], ov.names) theta.free.idx <- which(partable$block == g & partable$op == "~~" & partable$lhs == partable$rhs & partable$lhs == ov.names.nox[i]) if(length(theta.free.idx) != 1L) { stop("lavaan ERROR: parameter for residual variance ", ov.names.nox[i], " not found") } else { theta.idx <- partable$free[ theta.free.idx ] } TXT <- paste(TXT, "\n", t2, yname, "[i,", ov.idx, "] ~ dnorm(mu", g, "[i,", ov.idx, "], itheta[", theta.idx, "])", sep="") } TXT <- paste(TXT, "\n", t2, sep="") for(i in 1:nov) { ov.idx <- match(ov.names.nox[i], ov.names) TXT <- paste(TXT, "\n", t2, "mu", g, "[i,", ov.idx, "] <- ", sep="") # find rhs for this observed variable # 1. intercept? int.idx <- which(partable$block == g & partable$op == "~1" & partable$lhs == ov.names.nox[i]) if(length(int.idx) == 1L) { # fixed or free? if(partable$free[int.idx] == 0L) { TXT <- paste(TXT, partable$ustart[int.idx], sep="") } else { TXT <- paste(TXT, "theta[", partable$free[int.idx], "]", sep="") } } else { # no intercept, say '0', so we always have rhs TXT <- paste(TXT, "0", sep="") } # 2. factor loading? lam.idx <- which(partable$block == g & partable$op == "=~" & partable$rhs == ov.names.nox[i]) for(j in lam.idx) { # fixed or free? if(partable$free[j] == 0L) { TXT <- paste(TXT, " + ", partable$ustart[j], "*eta", g, "[i,", match(partable$lhs[j], lv.names) , "]", sep="") } else { TXT <- paste(TXT, " + ", "theta[", partable$free[j], "]*eta", g, "[i,", match(partable$lhs[j], lv.names) , "]", sep="") } } # 3. regression? r.idx <- which(partable$block == g & partable$op == "~" & partable$lhs == ov.names.nox[i]) for(j in r.idx) { # what is the rhs? rhs <- partable$rhs[j] if(rhs %in% lv.names) { RHS <- paste("eta", g, "[i,", match(rhs, lv.names), "]", sep="") } else if(rhs %in% vnames$ov[[g]]) { RHS <- paste("y", g, "[i,", match(rhs, ov.names), "]", sep="") } # fixed or free? if(partable$free[j] == 0L) { TXT <- paste(TXT, " + ", partable$ustart[j], "*", RHS, sep="") } else { TXT <- paste(TXT, " + ", "theta[", partable$free[j], "]*", RHS, sep="") } } } # lv.y # var(lv.y) = PSI (lisrel style) lv.y <- vnames$lv.y[[g]] if(length(lv.y) > 0L) { TXT <- paste(TXT, "\n\n", t2, "# lv.y", sep="") lv.y.idx <- match(lv.y, lv.names); ny <- length(lv.y.idx) for(j in 1:ny) { theta.free.idx <- which(partable$block == g & partable$op == "~~" & partable$lhs == partable$rhs & partable$lhs == lv.y[j]) if(length(theta.free.idx) != 1L) { stop("lavaan ERROR: parameter for residual variance ", lv.y[j], " not found") } else { theta.idx <- partable$free[ theta.free.idx ] } TXT <- paste(TXT, "\n", t2, # dnorm for now "eta", g, "[i,", lv.y.idx[j], "] ~ dnorm(mu.eta", g, "[i,", lv.y.idx[j], "], itheta[", theta.idx, "])", sep="") } for(j in 1:ny) { TXT <- paste(TXT, "\n", t2, # dnorm for now "mu.eta", g, "[i,", lv.y.idx[j], "] <- ", sep="") # lhs elements regression # 1. intercept? int.idx <- which(partable$block == g & partable$op == "~1" & partable$lhs == lv.y[j]) if(length(int.idx) == 1L) { # fixed or free? if(partable$free[int.idx] == 0L) { TXT <- paste(TXT, partable$ustart[int.idx], sep="") } else { TXT <- paste(TXT, "theta[", partable$free[int.idx], "]", sep="") } } else { # no intercept, say '0', so we always have rhs TXT <- paste(TXT, "0", sep="") } rhs.idx <- which(partable$block == g & partable$op == "~" & partable$lhs == lv.y[j]) np <- length(rhs.idx) for(p in 1:np) { TXT <- paste(TXT, " + ", "theta[", partable$free[rhs.idx[p]], "]*eta", g, "[i,", match(partable$rhs[rhs.idx[p]], lv.names), "]", sep="") } } } # exogenous lv -- FIXME: we assume the lv.x array is continous # (eg 3,4,5, but NOT 3,5,6) # var(lv.x) = PHI (lisrel style) lv.x <- vnames$lv.x[[g]] if(length(lv.x) > 0L) { TXT <- paste(TXT, "\n\n", t2, "# lv.x", sep="") lv.x.idx <- match(lv.x, lv.names); nx <- length(lv.x.idx) TXT <- paste(TXT, "\n", t2, # dmnorm for now "eta", g, "[i,", min(lv.x.idx), ":", max(lv.x.idx), "] ~ dmnorm(mu.eta", g, "[i,", min(lv.x.idx), ":", max(lv.x.idx), "], iphi", g, "[1:", nx, ",1:", nx, "])", sep="") for(j in 1:nx) { TXT <- paste(TXT, "\n", t2, "mu.eta", g, "[i,", lv.x.idx[j], "] <- 0", sep="") } } # exogenous ov ??? (what to do here?) # end of this block TXT <- paste(TXT, "\n\n", t1, "} # end of block ", g, "\n", sep="") } # priors (both fixed and free) TXT <- paste(TXT, "\n", t1, "# Priors free parameters (univariate):", sep="") npt <- length(partable$lhs) for(i in seq_len(npt)) { if(partable$free[i] == 0L) next # skip non-free parameters lhs <- partable$lhs[i]; op <- partable$op[i]; rhs <- partable$rhs[i] free.idx <- partable$free[i]; g <- partable$block[i] if(op == "=~") { # factor loading TXT <- paste(TXT, "\n", t1, "theta[", free.idx, "] ~ dnorm(0.8, 1)", sep="") } else if(op == "~") { # regression TXT <- paste(TXT, "\n", t1, "theta[", free.idx, "] ~ dnorm(0, 1)", sep="") } else if(op == "~~" && lhs == rhs) { # variance # 1. lv.x + lv.x (skip -> multivariate) # 2. lv.y + lv.y # 3. observed + observed # 4. else -> fix (upgrade to latent?) if(lhs %in% vnames$lv.x[[g]] && rhs %in% vnames$lv.x[[g]]) { # lv.x: move to multivariate... (dwish) next } else if(lhs %in% vnames$lv.y[[g]] && rhs %in% vnames$lv.y[[g]]) { # lv.y TXT <- paste(TXT, "\n", t1, "itheta[", free.idx, "] ~ dgamma(9, 4)", sep="") TXT <- paste(TXT, "\n", t1, "theta[", free.idx, "] <- 1/itheta[", free.idx, "]", sep="") } else if(lhs %in% vnames$ov[[g]] && rhs %in% vnames$ov[[g]]) { TXT <- paste(TXT, "\n", t1, "itheta[", free.idx, "] ~ dgamma(9, 4)", sep="") TXT <- paste(TXT, "\n", t1, "theta[", free.idx, "] <- 1/itheta[", free.idx, "]", sep="") } else { stop("lavaan ERROR: FIXME!! parameter ", i) } } else if(op == "~~" && lhs != rhs) { # covariance # 1. lv.x + lv.x (skip -> multivariate) # 2. lv.y + lv.y # 3. observed + observed # 4. else -> fix (upgrade to latent?) if(lhs %in% vnames$lv.x[[g]] && rhs %in% vnames$lv.x[[g]]) { # exo lv covariance next } else if(lhs %in% vnames$lv.y[[g]] && rhs %in% vnames$lv.y[[g]]) { # lv.y stop("lavaan ERROR: FIXME!! parameter ", i) } else if(lhs %in% vnames$ov[[g]] && rhs %in% vnames$ov[[g]]) { TXT <- paste(TXT, "\n", t1, "itheta[", free.idx, "] ~ dgamma(9, 4)", sep="") TXT <- paste(TXT, "\n", t1, "theta[", free.idx, "] <- 1/itheta[", free.idx, "]", sep="") } else { stop("lavaan ERROR: FIXME!! parameter ", i) } } else if(op == "~1") { # intercept TXT <- paste(TXT, "\n", t1, "theta[", free.idx, "] ~ dnorm(0, 1)", sep="") } else { stop("lavaan ERROR: op not supported yet for parameter ", i) } } TXT <- paste(TXT, "\n\n", t1, "# Priors free parameters (multivariate):", sep="") for(g in 1:nblocks) { lv.phi.idx <- which(partable$block == g & partable$op == "~~" & partable$lhs %in% vnames$lv.x[[g]] & partable$rhs %in% vnames$lv.x[[g]]) nx <- length(vnames$lv.x[[g]]) if(length(nx) > 0L) { TXT <- paste(TXT, "\n", t1, "iphi", g, "[1:", nx, ",1:", nx, "] ~ dwish(R", g, "[1:", nx, ",1:", nx, "], 5)", sep="") TXT <- paste(TXT, "\n", t1, "phi", g, "[1:", nx, ",1:", nx, "] <- inverse(iphi", g, "[1:", nx, ",1:", nx, "])", sep="") for(idx in lv.phi.idx) { TXT <- paste(TXT, "\n", t1, "theta[", partable$free[idx], "] <- phi", g, "[", match(partable$lhs[idx], vnames$lv.x[[g]]), ",", match(partable$rhs[idx], vnames$lv.x[[g]]), "]", sep="") } } } # end of model TXT <- paste(TXT, "\n\n", "} # End of model\n", sep="") # end of model if(as.function.) { out <- function() NULL formals(out) <- alist() body(out) <- parse(file="", text=TXT) } else { out <- TXT class(out) <- c("lavaan.character", "character") } out } lavaan/R/lav_model_information.R0000644000176200001440000004753513046034742016430 0ustar liggesusers# here, we compute various versions of the `information' matrix # NOTE: # 1) we ALWAYS compute the UNIT information (not the total information) # # 2) by default, we ignore the constraints (we deal with this when we # take the inverse later on) lav_model_information <- function(lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL, Delta = NULL, lavcache = NULL, lavoptions = NULL, information = "observed", extra = FALSE, augmented = FALSE, inverted = FALSE, use.ginv = FALSE) { estimator <- lavmodel@estimator # compute information matrix if(information == "observed") { if(lavsamplestats@missing.flag) { group.weight <- FALSE } else { group.weight <- TRUE } E <- lav_model_information_observed(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, group.weight = group.weight, lavoptions = lavoptions, augmented = augmented, inverted = inverted, use.ginv = use.ginv) } else { # structured of unstructured? (since 0.5-23) if(!is.null(lavoptions) && !is.null(lavoptions$h1.information) && lavoptions$h1.information == "unstructured") { structured <- FALSE } else { structured <- TRUE } E <- lav_model_information_expected(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, extra = extra, structured = structured, augmented = augmented, inverted = inverted, use.ginv = use.ginv) } # information, augmented information, or inverted information E } # fisher/expected information # # information = Delta' H Delta, where H is the unit information of # the saturated model (evaluated either at the structured or unstructured # estimates) lav_model_information_expected <- function(lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL, structured = TRUE, Delta = NULL, lavcache = NULL, extra = FALSE, augmented = FALSE, inverted = FALSE, use.ginv = FALSE) { estimator <- lavmodel@estimator if(inverted) { augmented <- TRUE } # compute DELTA if(is.null(Delta)) { Delta <- computeDelta(lavmodel = lavmodel) } # compute/get WLS.V # if DWLS or ULS, this is the diagonal only! (since 0.5-17) WLS.V <- lav_model_wls_v(lavmodel = lavmodel, lavsamplestats = lavsamplestats, structured = structured, lavdata = lavdata) # compute Information per group Info.group <- vector("list", length=lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { # note LISREL documentation suggest (Ng - 1) instead of Ng... fg <- lavsamplestats@nobs[[g]]/lavsamplestats@ntotal # compute information for this group if(estimator %in% c("DWLS", "ULS")) { # diagonal weight matrix Delta2 <- sqrt(WLS.V[[g]]) * Delta[[g]] Info.group[[g]] <- fg * crossprod(Delta2) } else { # full weight matrix # Info.group[[g]] <- # fg * (t(Delta[[g]]) %*% WLS.V[[g]] %*% Delta[[g]]) Info.group[[g]] <- fg * ( crossprod(Delta[[g]], WLS.V[[g]]) %*% Delta[[g]] ) } } # assemble over groups Information <- Info.group[[1]] if(lavsamplestats@ngroups > 1) { for(g in 2:lavsamplestats@ngroups) { Information <- Information + Info.group[[g]] } } # augmented information? if(augmented) { Information <- lav_model_information_augment_invert(lavmodel = lavmodel, information = Information, inverted = inverted, use.ginv = use.ginv) } if(extra) { attr(Information, "Delta") <- Delta attr(Information, "WLS.V") <- WLS.V # unweighted } # possibly augmented/inverted Information } # only for Mplus MLM lav_model_information_expected_MLM <- function(lavmodel = NULL, lavsamplestats = NULL, Delta = NULL, extra = FALSE, augmented = FALSE, inverted = FALSE, use.ginv = FALSE) { if(inverted) { augmented <- TRUE } if(is.null(Delta)) { Delta = computeDelta(lavmodel = lavmodel) } # compute WLS.V WLS.V <- vector("list", length=lavsamplestats@ngroups) if(lavmodel@group.w.free) { GW <- unlist(computeGW(lavmodel = lavmodel)) } for(g in 1:lavsamplestats@ngroups) { WLS.V[[g]] <- lav_mvnorm_h1_information_expected( sample.cov = lavsamplestats@cov[[g]], sample.cov.inv = lavsamplestats@icov[[g]]) # the same as GLS... (except for the N/N-1 scaling) if(lavmodel@group.w.free) { # unweight!! a <- exp(GW[g]) / lavsamplestats@nobs[[g]] # a <- exp(GW[g]) * lavsamplestats@ntotal / lavsamplestats@nobs[[g]] WLS.V[[g]] <- lav_matrix_bdiag( matrix(a,1,1), WLS.V[[g]]) } } # compute Information per group Info.group <- vector("list", length=lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { fg <- lavsamplestats@nobs[[g]]/lavsamplestats@ntotal # compute information for this group Info.group[[g]] <- fg * (t(Delta[[g]]) %*% WLS.V[[g]] %*% Delta[[g]]) } # assemble over groups Information <- Info.group[[1]] if(lavsamplestats@ngroups > 1) { for(g in 2:lavsamplestats@ngroups) { Information <- Information + Info.group[[g]] } } # augmented information? if(augmented) { Information <- lav_model_information_augment_invert(lavmodel = lavmodel, information = Information, inverted = inverted, use.ginv = use.ginv) } if(extra) { attr(Information, "Delta") <- Delta attr(Information, "WLS.V") <- WLS.V # unweighted } Information } lav_model_information_observed <- function(lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL, lavcache = NULL, lavoptions = NULL, group.weight = TRUE, augmented = FALSE, inverted = FALSE, use.ginv = FALSE) { estimator <- lavmodel@estimator if(inverted) { augmented <- TRUE } # observed.information: # - "hessian": second derivative of objective function # - "h1": observed information matrix of saturated (h1) model, # pre- and post-multiplied by the jacobian of the model # parameters (Delta), usually evaluated at the structured # sample statistics if(!is.null(lavoptions) && !is.null(lavoptions$observed.information) && lavoptions$observed.information == "h1") { observed.information <- "h1" } else { observed.information <- "hessian" } if(observed.information == "hessian") { Hessian <- lav_model_hessian(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavoptions = lavoptions, lavcache = lavcache, group.weight = group.weight) # NOTE! What is the relationship between the Hessian of the objective # function, and the `information' matrix (unit or total) # 1. in lavaan, we ALWAYS minimize, so the Hessian is already pos def # 2. currently, all estimators give unit information, except MML and PML Information <- Hessian # divide by 'N' for MML and PML if(estimator == "PML" || estimator == "MML") { Information <- Information / lavsamplestats@ntotal } # using 'observed h1 information' } else { # compute DELTA Delta <- computeDelta(lavmodel = lavmodel) # compute observed information h1 if(lavmodel@estimator == "GLS" || lavmodel@estimator == "WLS") { WLS.V <- lavsamplestats@WLS.V } else if(lavmodel@estimator == "DWLS" || lavmodel@estimator == "ULS") { # diagonal only!! WLS.V <- lavsamplestats@WLS.VD } else if(lavmodel@estimator == "ML") { WLS.V <- vector("list", length=lavsamplestats@ngroups) # four options: # - complete data, structured (default) # - complete data, unstructured # - incomplete data, structured (default) # - incomplete data, unstructured if(lavoptions$h1.information == "structured") { SIGMA <- computeSigmaHat(lavmodel = lavmodel) MU <- computeMuHat(lavmodel = lavmodel) } else { SIGMA <- lavsamplestats@cov MU <- lavsamplestats@mean } # - if missing = two.stage, MU/SIGMA can be EM estimates # if unstructured, or model-implied moments if structured for(g in 1:lavsamplestats@ngroups) { WLS.V[[g]] <- lav_mvnorm_information_observed_samplestats( sample.mean = lavsamplestats@mean[[g]], sample.cov = lavsamplestats@cov[[g]], Mu = MU[[g]], Sigma = SIGMA[[g]]) } } else { stop("lavaan ERROR: observed.information = ", dQuote(observed.information), " not supported for estimator ", dQuote(lavmodel@estimator) ) } # compute Information per group Info.group <- vector("list", length=lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { fg <- lavsamplestats@nobs[[g]]/lavsamplestats@ntotal # compute information for this group if(estimator %in% c("DWLS", "ULS")) { # diagonal weight matrix Delta2 <- sqrt(WLS.V[[g]]) * Delta[[g]] Info.group[[g]] <- fg * crossprod(Delta2) } else { # full weight matrix Info.group[[g]] <- fg * ( crossprod(Delta[[g]], WLS.V[[g]]) %*% Delta[[g]] ) } } # assemble over groups Information <- Info.group[[1]] if(lavsamplestats@ngroups > 1) { for(g in 2:lavsamplestats@ngroups) { Information <- Information + Info.group[[g]] } } } # augmented information? if(augmented) { Information <- lav_model_information_augment_invert(lavmodel = lavmodel, information = Information, inverted = inverted, use.ginv = use.ginv) } # for two.stage + observed.hession = "h1" if(observed.information != "hessian") { attr(Information, "Delta") <- Delta attr(Information, "WLS.V") <- WLS.V } Information } # outer product of the case-wise scores (gradients) lav_model_information_firstorder <- function(lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL, lavcache = NULL, extra = FALSE, check.pd = FALSE, augmented = FALSE, inverted = FALSE, use.ginv = FALSE) { estimator <- lavmodel@estimator if(inverted) { augmented <- TRUE } B0.group <- vector("list", lavsamplestats@ngroups) if(estimator == "PML") { Delta <- computeDelta(lavmodel = lavmodel) Sigma.hat <- computeSigmaHat(lavmodel = lavmodel) TH <- computeTH(lavmodel = lavmodel) if(lavmodel@nexo > 0L) { PI <- computePI(lavmodel = lavmodel) } else { PI <- vector("list", length = lavsamplestats@ngroups) } } else { Sigma.hat <- computeSigmaHat(lavmodel = lavmodel) Mu.hat <- computeMuHat(lavmodel = lavmodel) Delta <- computeDelta(lavmodel = lavmodel) } for(g in 1:lavsamplestats@ngroups) { if(estimator == "PML") { # slow approach: compute outer product of case-wise scores SC <- pml_deriv1(Sigma.hat = Sigma.hat[[g]], TH = TH[[g]], th.idx = lavmodel@th.idx[[g]], num.idx = lavmodel@num.idx[[g]], X = lavdata@X[[g]], eXo = lavdata@eXo[[g]], PI = PI[[g]], lavcache = lavcache[[g]], missing = lavdata@missing, scores = TRUE, negative = FALSE) # chain rule group.SC <- SC %*% Delta[[g]] # outer product B0.group[[g]] <- crossprod(group.SC) } else { if(lavsamplestats@missing.flag) { B1 <- lav_mvnorm_missing_information_firstorder(Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = Mu.hat[[g]], Sigma = Sigma.hat[[g]]) } else { if(lavmodel@meanstructure) { B1 <- lav_mvnorm_information_firstorder(Y = lavdata@X[[g]], Mu = Mu.hat[[g]], Sigma = Sigma.hat[[g]], meanstructure = lavmodel@meanstructure) } else { B1 <- lav_mvnorm_information_firstorder(Y = lavdata@X[[g]], Mu = lavsamplestats@mean[[g]], # saturated Sigma = Sigma.hat[[g]], meanstructure = lavmodel@meanstructure) } } B0.group[[g]] <- t(Delta[[g]]) %*% B1 %*% Delta[[g]] } } # g if(lavsamplestats@ngroups > 1L) { # groups weights B0 <- (lavsamplestats@nobs[[1]]/lavsamplestats@ntotal) * B0.group[[1]] for(g in 2:lavsamplestats@ngroups) { B0 <- B0 + (lavsamplestats@nobs[[g]]/lavsamplestats@ntotal) * B0.group[[g]] } } else { B0 <- B0.group[[1]] } Information <- B0 # NOTE: for MML and PML, we get 'total' information (instead of unit) # divide by 'N' for MML and PML if(estimator == "PML" || estimator == "MML") { Information <- Information / lavsamplestats@ntotal for(g in 1:lavsamplestats@ngroups) { B0.group[[g]] <- B0.group[[g]] / lavsamplestats@ntotal } } # augmented information? if(augmented) { Information <- lav_model_information_augment_invert(lavmodel = lavmodel, information = Information, check.pd = check.pd, inverted = inverted, use.ginv = use.ginv) } if(extra) { attr(Information, "B0.group") <- B0.group } Information } # create augmented information matrix (if needed), and take the inverse # (if inverted = TRUE), returning only the [1:npar, 1:npar] elements lav_model_information_augment_invert <- function(lavmodel = NULL, information = NULL, inverted = FALSE, check.pd = FALSE, use.ginv = FALSE) { npar <- nrow(information) is.augmented <- FALSE # handle constraints if(nrow(lavmodel@con.jac) > 0L) { H <- lavmodel@con.jac inactive.idx <- attr(H, "inactive.idx") lambda <- lavmodel@con.lambda # lagrangean coefs if(length(inactive.idx) > 0L) { H <- H[-inactive.idx,,drop=FALSE] lambda <- lambda[-inactive.idx] } if(nrow(H) > 0L) { is.augmented <- TRUE H0 <- matrix(0,nrow(H),nrow(H)) H10 <- matrix(0, ncol(information), nrow(H)) DL <- 2*diag(lambda, nrow(H), nrow(H)) # FIXME: better include inactive + slacks?? E3 <- rbind( cbind( information, H10, t(H)), cbind( t(H10), DL, H0), cbind( H, H0, H0) ) information <- E3 } } if(check.pd) { eigvals <- eigen(information, symmetric = TRUE, only.values = TRUE)$values if(any(eigvals < -1 * .Machine$double.eps^(3/4))) { warning("lavaan WARNING: matrix based on first order outer product of the derivatives is not positive definite; the model may not be identified") } } if(inverted) { if(is.augmented) { # note: default tol in MASS::ginv is sqrt(.Machine$double.eps) # which seems a bit too conservative # from 0.5-20, we changed this to .Machine$double.eps^(3/4) information <- try( MASS::ginv(information, tol = .Machine$double.eps^(3/4))[1:npar, 1:npar, drop = FALSE], silent = TRUE ) } else { if(use.ginv) { information <- try( MASS::ginv(information, tol = .Machine$double.eps^(3/4)), silent = TRUE ) } else { information <- try( solve(information), silent = TRUE ) } } } # augmented/inverted information information } lavaan/R/lav_norm.R0000644000176200001440000000456412650437034013672 0ustar liggesusers # simple derivatives of the normal distribution # dnorm dnorm_dummy <- function(y, mu = 0, sigma2 = 1) { sigma <- sqrt(sigma2) 1/(sigma*sqrt(2*pi)) * exp( -0.5 * ((y - mu)/sigma * (y - mu)/sigma) ) } # dnorm_dmu_x <- function(x, y, sigma2 = 1) { # dnorm_dummy(y = y, mu = x, sigma2 = sigma2) # } # numDeriv:::grad(func=dnorm_dmu_x, x=0.3, y=2.3, sigma2=16) # partial derivative - mu dnorm_dmu <- function(y, mu = 0, sigma2 = 1) { dy <- dnorm(x = y, mean = mu, sd = sqrt(sigma2)) (y - mu) / sigma2 * dy } #dnorm_dsigma2_x <- function(x, y, mu = 0) { # dnorm_dummy(y = y, mu = mu, sigma2 = x) #} #numDeriv:::grad(func=dnorm_dsigma2_x, x=16, y=2.3, mu=0.3) # partial derivative - sigma2 dnorm_dsigma2 <- function(y, mu = 0, sigma2 = 1) { dy <- dnorm(x = y, mean = mu, sd = sqrt(sigma2)) (1/(2*sigma2*sigma2) * (y - mu)*(y - mu) - 1/(2*sigma2)) * dy } #dnorm_dy_x <- function(x, mu = 0, sigma2 = 1) { # dnorm_dummy(y = x, mu = mu, sigma2 = sigma2) #} #numDeriv:::grad(func=dnorm_dy_x, x=2.3, mu=0.3, sigma2=16) # partial derivative - y dnorm_dy <- function(y, mu = 0, sigma2 = 1) { dy <- dnorm(x = y, mean = mu, sd = sqrt(sigma2)) -(y - mu) / sigma2 * dy } #### d log dnorm #### # # d log dnorm() / d theta = 1/dy d dnorm() / d theta dlogdnorm <- function(y, mu = 0, sigma2 = 1) { sigma <- sqrt(sigma2) -log( sigma*sqrt(2*pi) ) + (-0.5 * ((y - mu)/sigma*(y - mu)/sigma)) } #dlogdnorm_dmu_x <- function(x, y, sigma2 = 1) { # dlogdnorm(y = y, mu = x, sigma2 = sigma2) #} #numDeriv:::grad(func=dlogdnorm_dmu_x, x=0.3, y=2.3, sigma2=16) # partial derivative - mu dlogdnorm_dmu <- function(y, mu = 0, sigma2 = 1) { (y - mu) / sigma2 } #dlogdnorm_dmu(y = 2.3, mu = 0.3, sigma2 = 16) #dlogdnorm_dsigma2_x <- function(x, y, mu = 0) { # dlogdnorm(y = y, mu = mu, sigma2 = x) #} #numDeriv:::grad(func=dlogdnorm_dsigma2_x, x=16, y=2.3, mu=0.3) # partial derivative - sigma2 dlogdnorm_dsigma2 <- function(y, mu = 0, sigma2 = 1) { 1/(2*sigma2*sigma2) * (y - mu)*(y - mu) - 1/(2*sigma2) } #dlogdnorm_dsigma2(y = 2.3, mu = 0.3, sigma2 = 16) #dlogdnorm_dy_x <- function(x, mu = 0, sigma2 = 1) { # dlogdnorm(y = x, mu = mu, sigma2 = sigma2) #} #numDeriv:::grad(func=dlogdnorm_dy_x, x=2.3, mu=0.3, sigma2=16) # partial derivative - y dlogdnorm_dy <- function(y, mu = 0, sigma2 = 1) { -(y - mu) / sigma2 } #dlogdnorm_dy(y = 2.3, mu = 0.3, sigma2 = 16) lavaan/R/ctr_pml_plrt2.R0000644000176200001440000002044713043377617014645 0ustar liggesusersctr_pml_plrt2 <- function(lavobject = NULL, lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, lavpartable = NULL, lavoptions = NULL, x = NULL, VCOV = NULL, lavcache = NULL) { if(!is.null(lavobject)) { lavmodel <- lavobject@Model lavdata <- lavobject@Data lavoptions <- lavobject@Options lavsamplestats <- lavobject@SampleStats lavcache <- lavobject@Cache lavpartable <- lavobject@ParTable } if(is.null(x)) { # compute 'fx' = objective function value # (NOTE: since 0.5-18, NOT divided by N!!) fx <- lav_model_objective(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache) H0.fx <- as.numeric(fx) H0.fx.group <- attr(fx, "fx.group") } else { H0.fx <- attr(attr(x, "fx"), "fx.pml") H0.fx.group <- attr(attr(x, "fx"), "fx.group") } # fit a saturated model 'fittedSat' ModelSat <- lav_partable_unrestricted(lavobject = NULL, lavdata = lavdata, lavoptions = lavoptions, lavsamplestats = lavsamplestats) # FIXME: se="none", test="none"?? Options <- lavoptions Options$verbose <- FALSE Options$se <- "none" Options$test <- "none" fittedSat <- lavaan(ModelSat, slotOptions = Options, slotSampleStats = lavsamplestats, slotData = lavdata, slotCache = lavcache) fx <- lav_model_objective(lavmodel = fittedSat@Model, lavsamplestats = fittedSat@SampleStats, lavdata = fittedSat@Data, lavcache = fittedSat@Cache) SAT.fx <- as.numeric(fx) SAT.fx.group <- attr(fx, "fx.group") # we also need a `saturated model', but where the moments are based # on the model-implied sample statistics under H0 ModelSat2 <- lav_partable_unrestricted(lavobject = NULL, lavdata = lavdata, lavoptions = lavoptions, lavsamplestats = NULL, sample.cov = computeSigmaHat(lavmodel), sample.mean = computeMuHat(lavmodel), sample.th = computeTH(lavmodel), sample.th.idx = lavsamplestats@th.idx) Options2 <- Options Options2$optim.method <- "none" Options2$optim.force.converged <- TRUE fittedSat2 <- lavaan(ModelSat2, slotOptions = Options2, slotSampleStats = lavsamplestats, slotData = lavdata, slotCache = lavcache) # the code below was contributed by Myrsini Katsikatsou (Jan 2015) # for now, only a single group is supported: # g = 1L ########################### The code for PLRT for overall goodness of fit ##### Section 1. Compute the asymptotic mean and variance of the first quadratic quantity #if(is.null(VCOV)) { # VCOV <- lav_model_vcov(lavmodel = lavmodel, # lavsamplestats = lavsamplestats, # lavoptions = lavoptions, # lavdata = lavdata, # lavpartable = lavpartable, # lavcache = lavcache) #} # G.inv #InvG_attheta0 <- lavsamplestats@ntotal * VCOV[,] # Hessian #H_attheta0 <- solve(attr(VCOV, "E.inv")) # inverted observed information ('H.inv') if(is.null(VCOV)) { H0.inv <- lav_model_information(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, information = "observed", augmented = TRUE, inverted = TRUE) } else { H0.inv <- attr(VCOV, "E.inv") } # first order information ('J') if(is.null(VCOV)) { J0 <- lav_model_information_firstorder(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache)[,] } else { # we do not get J, but J.group, FIXME? J0 <- lav_model_information_firstorder(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache)[,] } # inverted Godambe information G0.inv <- H0.inv %*% J0 %*% H0.inv H0tmp_prod1 <- H0.inv %*% J0 #H0tmp_prod1 <- InvG_attheta0 %*% H_attheta0 H0tmp_prod2 <- H0tmp_prod1 %*% H0tmp_prod1 E_tww <- sum(diag(H0tmp_prod1)) var_tww <- 2* sum(diag(H0tmp_prod2)) ##### Section 2: Compute the asymptotic mean and variance of the second quadratic quantity. tmp.options <- fittedSat2@Options tmp.options$se <- "robust.huber.white" VCOV.Sat2 <- lav_model_vcov(lavmodel = fittedSat2@Model, lavsamplestats = fittedSat2@SampleStats, lavoptions = tmp.options, lavdata = fittedSat2@Data, lavpartable = fittedSat2@ParTable, lavcache = fittedSat2@Cache) # G.inv at vartheta_0 InvG_at_vartheta0 <- lavsamplestats@ntotal * VCOV.Sat2[,] # Hessian at vartheta_0 H_at_vartheta0 <- solve(attr(VCOV.Sat2, "E.inv")) # should always work #H1.inv <- lavTech(fittedSat2, "inverted.information.observed") #J1 <- lavTech(fittedSat2, "information.first.order") # H1tmp_prod1 <- H1.inv %*% J1 H1tmp_prod1 <- InvG_at_vartheta0 %*% H_at_vartheta0 H1tmp_prod2 <- H1tmp_prod1 %*% H1tmp_prod1 E_tzz <- sum(diag(H1tmp_prod1)) var_tzz <- 2* sum(diag(H1tmp_prod2)) ##### Section 3: Compute the asymptotic covariance of the two quadratic quantities drhodpsi_MAT <- vector("list", length = lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { delta.g <- computeDelta(lavmodel)[[g]] # order of the rows: first the thresholds, then the correlations # we need to map the rows of delta.g to the rows/cols of H_at_vartheta0 # of H1 PT <- fittedSat2@ParTable PT$label <- lav_partable_labels(PT) free.idx <- which(PT$free > 0 & PT$group == g) PARLABEL <- PT$label[free.idx] # for now, we can assume that computeDelta will always return # the thresholds first, then the correlations # # later, we should add a (working) add.labels = TRUE option to # computeDelta th.names <- lavobject@pta$vnames$th[[g]] ov.names <- lavobject@pta$vnames$ov[[g]] tmp <- utils::combn(ov.names, 2) cor.names <- paste(tmp[1,], "~~", tmp[2,], sep = "") NAMES <- c(th.names, cor.names) if(g > 1L) { NAMES <- paste(NAMES, ".g", g, sep = "") } par.idx <- match(PARLABEL, NAMES) drhodpsi_MAT[[g]] <- delta.g[par.idx,,drop = FALSE] } drhodpsi_mat <- do.call(rbind, drhodpsi_MAT) # tmp_prod <- ( t(drhodpsi_mat) %*% H_at_vartheta0 %*% # drhodpsi_mat %*% InvG_attheta0 %*% # H_attheta0 %*% InvG_attheta0 ) tmp_prod <- ( t(drhodpsi_mat) %*% H_at_vartheta0 %*% drhodpsi_mat %*% H0.inv %*% J0 %*% G0.inv ) cov_tzztww <- 2*sum(diag(tmp_prod)) ##### Section 4: compute the adjusted PLRT and its p-value PLRTH0Sat <- 2*(H0.fx - SAT.fx) PLRTH0Sat.group <- 2*(H0.fx.group - SAT.fx.group) asym_mean_PLRTH0Sat <- E_tzz - E_tww asym_var_PLRTH0Sat <- var_tzz + var_tww -2*cov_tzztww scaling.factor <- (asym_mean_PLRTH0Sat / (asym_var_PLRTH0Sat/2) ) FSA_PLRT_SEM <- (asym_mean_PLRTH0Sat / (asym_var_PLRTH0Sat/2) )*PLRTH0Sat adjusted_df <- (asym_mean_PLRTH0Sat*asym_mean_PLRTH0Sat) / (asym_var_PLRTH0Sat/2) # In some very few cases (simulations show very few cases in small sample sizes) # the adjusted_df is a negative number, we should then # print a warning like: "The adjusted df is computed to be a negative number # and for this the first and second moment adjusted PLRT is not computed." . pvalue <- 1-pchisq(FSA_PLRT_SEM, df=adjusted_df ) list(PLRTH0Sat = PLRTH0Sat, PLRTH0Sat.group = PLRTH0Sat.group, stat = FSA_PLRT_SEM, df = adjusted_df, p.value = pvalue, scaling.factor = scaling.factor) } ############################################################################ lavaan/R/lav_model_hessian.R0000644000176200001440000001012713045634425015523 0ustar liggesusers# numeric approximation of the Hessian # using an analytic gradient lav_model_hessian <- function(lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL, lavoptions = NULL, lavcache = NULL, group.weight = TRUE) { estimator <- lavmodel@estimator # computing the Richardson extrapolation Hessian <- matrix(0, lavmodel@nx.free, lavmodel@nx.free) x <- lav_model_get_parameters(lavmodel = lavmodel) for(j in 1:lavmodel@nx.free) { h.j <- 10e-6 x.left <- x.left2 <- x.right <- x.right2 <- x x.left[j] <- x[j] - h.j; x.left2[j] <- x[j] - 2*h.j x.right[j] <- x[j] + h.j; x.right2[j] <- x[j] + 2*h.j g.left <- lav_model_gradient(lavmodel = lavmodel, GLIST = lav_model_x2GLIST(lavmodel = lavmodel, x.left), lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, type = "free", group.weight = group.weight) g.left2 <- lav_model_gradient(lavmodel = lavmodel, GLIST = lav_model_x2GLIST(lavmodel = lavmodel, x.left2), lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, type = "free", group.weight = group.weight) g.right <- lav_model_gradient(lavmodel = lavmodel, GLIST = lav_model_x2GLIST(lavmodel = lavmodel, x.right), lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, type = "free", group.weight = group.weight) g.right2 <- lav_model_gradient(lavmodel = lavmodel, GLIST = lav_model_x2GLIST(lavmodel = lavmodel, x.right2), lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, type = "free", group.weight = group.weight) Hessian[,j] <- (g.left2 - 8*g.left + 8*g.right - g.right2)/(12*h.j) } # make symmetric (NEEDED? probably not) Hessian <- ( Hessian + t(Hessian) )/2.0 Hessian } # if only chol would accept a complex matrix... lav_model_hessian_complex <- function(lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL, lavcache = NULL, group.weight = TRUE) { gradf <- function(x) { GLIST <- lav_model_x2GLIST(lavmodel = lavmodel, x = x) dx <- lav_model_gradient(lavmodel = lavmodel, GLIST = GLIST, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, type = "free", group.weight = group.weight) dx } x <- lav_model_get_parameters(lavmodel = lavmodel) Hessian <- lav_func_jacobian_complex(func = gradf, x = x) Hessian } lavaan/R/lav_cor.R0000644000176200001440000001102513044561366013474 0ustar liggesusers# user-visible routine to # compute polychoric/polyserial/... correlations # # YR 17 Sept 2013 # # - YR 26 Nov 2013: big change - make it a wrapper around lavaan() # estimator = "none" means two.step (starting values) lavCor <- function(object, # lav.data options ordered = NULL, group = NULL, missing = "listwise", ov.names.x = NULL, # lavaan options se = "none", estimator = "two.step", # other options (for lavaan) ..., output = "cor") { # check estimator estimator <- tolower(estimator) if(estimator %in% c("two.step", "two.stage")) { estimator <- "none" } # se? se <- tolower(se); output <- tolower(output) if(se != "none") { if(output %in% c("cor","cov","sampstat","th","thresholds")) { warning("lavaan WARNING: argument `se' is ignored since standard erros are not needed for the requested `output'") se <- "none" } } # check object class if(inherits(object, "lavaan")) { lav.data <- object@Data if(missing(missing)) { missing <- object@Options$missing } else { missing <- "default" } } else if(inherits(object, "lavData")) { lav.data <- object } else if(inherits(object, "data.frame")) { NAMES <- names(object) if(!is.null(group)) { NAMES <- NAMES[- match(group, NAMES)] } lav.data <- lavData(data = object, group = group, ov.names = NAMES, ordered = ordered, ov.names.x = ov.names.x, lavoptions = list(missing = missing)) } else { stop("lavaan ERROR: lavCor can not handle objects of class ", paste(class(object), collapse= " ")) } # set default estimator if se != "none" categorical <- any(lav.data@ov$type == "ordered") if(se != "none" && estimator == "none") { if(categorical) { estimator <- "WLSMV" } else { estimator <- "ML" } } # extract partable options from dots dots <- list(...) meanstructure <- FALSE; fixed.x <- FALSE; mimic <- "lavaan" conditional.x <- FALSE if(!is.null(dots$meanstructure)) { meanstructure <- dots$meanstructure } if(categorical) { meanstructure <- TRUE } if(!is.null(dots$fixed.x)) { fixed.x <- dots$fixed.x } if(!is.null(dots$mimic)) { mimic <- dots$mimic } if(!is.null(dots$conditional.x)) { conditional.x <- dots$conditional.x } # generate partable for unrestricted model PT.un <- lav_partable_unrestricted(lavobject = NULL, lavdata = lav.data, lavoptions = list(meanstructure = meanstructure, fixed.x = fixed.x, conditional.x = conditional.x, mimic = mimic), sample.cov = NULL, sample.mean = NULL, sample.th = NULL) fit <- lavaan(slotParTable = PT.un, slotData = lav.data, model.type = "unrestricted", missing = missing, se = se, estimator = estimator, ...) # check output if(output %in% c("cor","cov")) { out <- inspect(fit, "sampstat") if(fit@Data@ngroups == 1L) { out <- out$cov if(output == "cor") { out <- cov2cor(out) } } else { out <- lapply(out, "[[", "cov") if(output == "cor") { out <- lapply(out, cov2cor) } } } else if(output %in% c("th","thresholds")) { out <- inspect(fit, "sampstat") if(fit@Data@ngroups == 1L) { out <- out$th } else { out <- lapply(out, "[[", "th") } } else if(output %in% c("sampstat")) { out <- inspect(fit, "sampstat") } else if(output %in% c("parameterEstimates", "pe", "parameterestimates", "est")) { #out <- parameterEstimates(fit) out <- standardizedSolution(fit) } else { out <- fit } out } lavaan/R/lav_samplestats_wls_obs.R0000644000176200001440000000611012643676124017003 0ustar liggesuserslav_samplestats_wls_obs <- function(mean.g, cov.g, var.g, th.g, th.idx.g, res.int.g, res.cov.g, res.var.g, res.th.g, res.slopes.g, group.w.g, categorical = FALSE, conditional.x = FALSE, meanstructure = FALSE, slopestructure = FALSE, group.w.free = FALSE) { # WLS.obs if(categorical) { # order of elements is important here: # 1. thresholds + (negative) means (interleaved) # 2. slopes (if any) # 3. variances (if any) # 4. covariance matrix (no diagonal!) # NOTE: prior to 0.5-17, we had this: # TH[ov.types == "numeric"] <- -1*TH[ov.types == "numeric"] # which is WRONG if we have more than one threshold per variable # (thanks to Sacha Epskamp for spotting this!) if(conditional.x) { TH <- res.th.g TH[ th.idx.g == 0 ] <- -1*TH[ th.idx.g == 0 ] nvar <- length(res.var.g) num.idx <- which(!seq_len(nvar) %in% th.idx.g) WLS.obs <- c(TH, lav_matrix_vec(res.slopes.g), res.var.g[ num.idx ], lav_matrix_vech(res.cov.g, diagonal = FALSE) ) } else { TH <- th.g TH[ th.idx.g == 0 ] <- -1*TH[ th.idx.g == 0 ] nvar <- length(var.g) num.idx <- which(!seq_len(nvar) %in% th.idx.g) WLS.obs <- c(TH, var.g[ num.idx ], lav_matrix_vech(cov.g, diagonal = FALSE) ) } } else { if(conditional.x) { if(meanstructure) { if(slopestructure) { WLS.obs <- c( res.int.g, lav_matrix_vec( res.slopes.g ), lav_matrix_vech( res.cov.g )) } else { WLS.obs <- c( res.int.g, lav_matrix_vech( res.cov.g )) } } else { if(slopestructure) { WLS.obs <- c(lav_matrix_vec( res.slopes.g ), lav_matrix_vech( res.cov.g )) } else { WLS.obs <- lav_matrix_vech( res.cov.g ) } } } else { if(meanstructure) { WLS.obs <- c( mean.g, lav_matrix_vech( cov.g )) } else { WLS.obs <- lav_matrix_vech( cov.g ) } } } # group.w.free? if(group.w.free) { #group.w.last <- nobs[[ngroups]] / sum(unlist(nobs)) #WLS.obs[[g]] <- c(log(group.w[[g]]/group.w.last), WLS.obs[[g]]) WLS.obs <- c(group.w.g, WLS.obs) } WLS.obs } lavaan/R/lav_objective.R0000644000176200001440000005713613030544040014661 0ustar liggesusers# fitting function for standard ML estimator.ML <- function(Sigma.hat=NULL, Mu.hat=NULL, data.cov=NULL, data.mean=NULL, data.cov.log.det=NULL, meanstructure=FALSE) { # FIXME: WHAT IS THE BEST THING TO DO HERE?? # CURRENTLY: return Inf (at least for nlminb, this works well) if(!attr(Sigma.hat, "po")) return(Inf) Sigma.hat.inv <- attr(Sigma.hat, "inv") Sigma.hat.log.det <- attr(Sigma.hat, "log.det") nvar <- ncol(Sigma.hat) if(!meanstructure) { fx <- (Sigma.hat.log.det + sum(data.cov * Sigma.hat.inv) - data.cov.log.det - nvar) } else { W.tilde <- data.cov + tcrossprod(data.mean - Mu.hat) fx <- (Sigma.hat.log.det + sum(W.tilde * Sigma.hat.inv) - data.cov.log.det - nvar) } # no negative values if(is.finite(fx) && fx < 0.0) fx <- 0.0 fx } # fitting function for standard ML estimator.ML_res <- function(Sigma.hat=NULL, Mu.hat=NULL, PI=NULL, res.cov=NULL, res.int=NULL, res.slopes=NULL, res.cov.log.det=NULL, cov.x = NULL, mean.x = NULL) { if(!attr(Sigma.hat, "po")) return(Inf) # augmented mean.x + cov.x matrix C3 <- rbind(c(1,mean.x), cbind(mean.x, cov.x + tcrossprod(mean.x))) Sigma.hat.inv <- attr(Sigma.hat, "inv") Sigma.hat.log.det <- attr(Sigma.hat, "log.det") nvar <- ncol(Sigma.hat) # sigma objective.sigma <- ( Sigma.hat.log.det + sum(res.cov * Sigma.hat.inv) - res.cov.log.det - nvar ) # beta OBS <- t(cbind(res.int, res.slopes)) EST <- t(cbind(Mu.hat, PI)) Diff <- OBS - EST objective.beta <- sum(Sigma.hat.inv * crossprod(Diff, C3) %*% Diff) fx <- objective.sigma + objective.beta # no negative values if(is.finite(fx) && fx < 0.0) fx <- 0.0 fx } # fitting function for restricted ML estimator.REML <- function(Sigma.hat=NULL, Mu.hat=NULL, data.cov=NULL, data.mean=NULL, data.cov.log.det=NULL, meanstructure=FALSE, group = 1L, lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL) { if(!attr(Sigma.hat, "po")) return(Inf) Sigma.hat.inv <- attr(Sigma.hat, "inv") Sigma.hat.log.det <- attr(Sigma.hat, "log.det") nvar <- ncol(Sigma.hat) if(!meanstructure) { fx <- (Sigma.hat.log.det + sum(data.cov * Sigma.hat.inv) - data.cov.log.det - nvar) } else { W.tilde <- data.cov + tcrossprod(data.mean - Mu.hat) fx <- (Sigma.hat.log.det + sum(W.tilde * Sigma.hat.inv) - data.cov.log.det - nvar) } lambda.idx <- which(names(lavmodel@GLIST) == "lambda") LAMBDA <- lavmodel@GLIST[[ lambda.idx[group] ]] data.cov.inv <- lavsamplestats@icov[[group]] reml.h0 <- log(det(t(LAMBDA) %*% Sigma.hat.inv %*% LAMBDA)) reml.h1 <- log(det(t(LAMBDA) %*% data.cov.inv %*% LAMBDA)) nobs <- lavsamplestats@nobs[[group]] #fx <- (Sigma.hat.log.det + tmp - data.cov.log.det - nvar) + 1/Ng * (reml.h0 - reml.h1) fx <- fx + ( 1/nobs * (reml.h0 - reml.h1) ) # no negative values if(is.finite(fx) && fx < 0.0) fx <- 0.0 fx } # 'classic' fitting function for GLS, not used for now estimator.GLS <- function(Sigma.hat=NULL, Mu.hat=NULL, data.cov=NULL, data.mean=NULL, data.nobs=NULL, meanstructure=FALSE) { W <- data.cov W.inv <- solve(data.cov) if(!meanstructure) { tmp <- ( W.inv %*% (W - Sigma.hat) ) fx <- 0.5 * (data.nobs-1)/data.nobs * sum( tmp * t(tmp)) } else { tmp <- W.inv %*% (W - Sigma.hat) tmp1 <- 0.5 * (data.nobs-1)/data.nobs * sum( tmp * t(tmp)) tmp2 <- sum(diag( W.inv %*% tcrossprod(data.mean - Mu.hat) )) fx <- tmp1 + tmp2 } # no negative values if(is.finite(fx) && fx < 0.0) fx <- 0.0 fx } # general WLS estimator (Muthen, Appendix 4, eq 99 single group) # full weight (WLS.V) matrix estimator.WLS <- function(WLS.est=NULL, WLS.obs=NULL, WLS.V=NULL) { #diff <- as.matrix(WLS.obs - WLS.est) #fx <- as.numeric( t(diff) %*% WLS.V %*% diff ) # since 0.5-17, we use crossprod twice diff <- WLS.obs - WLS.est fx <- as.numeric( crossprod(crossprod(WLS.V, diff), diff) ) # no negative values if(is.finite(fx) && fx < 0.0) fx <- 0.0 fx } # diagonally weighted LS (DWLS) estimator.DWLS <- function(WLS.est = NULL, WLS.obs = NULL, WLS.VD = NULL) { diff <- WLS.obs - WLS.est fx <- sum(diff * diff * WLS.VD) # no negative values if(is.finite(fx) && fx < 0.0) fx <- 0.0 fx } # Full Information ML estimator (FIML) handling the missing values estimator.FIML <- function(Sigma.hat = NULL, Mu.hat = NULL, Yp = NULL, h1 = NULL, N = NULL) { if(is.null(N)) { N <- sum(sapply(Yp, "[[", "freq")) } fx <- lav_mvnorm_missing_loglik_samplestats(Yp = Yp, Mu = Mu.hat, Sigma = Sigma.hat, log2pi = FALSE, minus.two = TRUE)/N # ajust for h1 if(!is.null(h1)) { fx <- fx - h1 # no negative values if(is.finite(fx) && fx < 0.0) fx <- 0.0 } fx } # pairwise maximum likelihood # this is adapted from code written by Myrsini Katsikatsou # # some changes: # - no distinction between x/y (ksi/eta) # - loglikelihoods are computed case-wise # - 29/03/2016: adapt for exogenous covariates # - 21/09/2016: added code for missing = doubly.robust (contributed by # Myrsini Katsikatsou) estimator.PML <- function(Sigma.hat = NULL, # model-based var/cov/cor TH = NULL, # model-based thresholds + means PI = NULL, # slopes th.idx = NULL, # threshold idx per variable num.idx = NULL, # which variables are numeric X = NULL, # raw data eXo = NULL, # eXo data lavcache = NULL, # housekeeping stuff missing = NULL) { # how to deal with missings? # YR 3 okt 2012 # the idea is to compute for each pair of variables, the model-based # probability (or likelihood in mixed case) (that we observe the data # for this pair under the model) for *each case* # after taking logs, the sum over the cases gives the # log probablity/likelihood for this pair # the sum over all pairs gives the final PML based logl # first of all: check if all correlations are within [-1,1] # if not, return Inf; (at least with nlminb, this works well) cors <- Sigma.hat[lower.tri(Sigma.hat)] #cat("[DEBUG objective\n]"); print(range(cors)); print(range(TH)); cat("\n") if(any(abs(cors) > 1)) { # question: what is the best approach here?? OUT <- +Inf attr(OUT, "logl") <- as.numeric(NA) return(OUT) #idx <- which( abs(cors) > 0.99 ) #cors[idx] <- 0.99 # clip #cat("CLIPPING!\n") } nvar <- nrow(Sigma.hat) if(is.null(eXo)) { nexo <- 0L } else { nexo <- NCOL(eXo) } pstar <- nvar*(nvar-1)/2 ov.types <- rep("ordered", nvar) if(length(num.idx) > 0L) ov.types[num.idx] <- "numeric" #print(Sigma.hat); print(TH); print(th.idx); print(num.idx); print(str(X)) LIK <- matrix(0, nrow(X), pstar) # likelihood per case, per pair PSTAR <- matrix(0, nvar, nvar) # utility matrix, to get indices PSTAR[lav_matrix_vech_idx(nvar, diagonal = FALSE)] <- 1:pstar PROW <- row(PSTAR) PCOL <- col(PSTAR) # shortcut for all ordered - tablewise if(all(ov.types == "ordered") && nexo == 0L) { # prepare for Myrsini's vectorization scheme LONG2 <- LongVecTH.Rho(no.x = nvar, all.thres = TH, index.var.of.thres = th.idx, rho.xixj = cors) # get expected probability per table, per pair pairwisePI <- pairwiseExpProbVec(ind.vec = lavcache$LONG, th.rho.vec = LONG2) pairwisePI_orig <- pairwisePI # for doubly.robust # get frequency per table, per pair logl <- sum(lavcache$bifreq * log(pairwisePI)) # more convenient fit function prop <- lavcache$bifreq / lavcache$nobs freq <- lavcache$bifreq # remove zero props # FIXME!!! or add 0.5??? #zero.idx <- which(prop == 0.0) zero.idx <- which( (prop == 0.0) | !is.finite(prop) ) if(length(zero.idx) > 0L) { freq <- freq[-zero.idx] prop <- prop[-zero.idx] pairwisePI <- pairwisePI[-zero.idx] } ##Fmin <- sum( prop*log(prop/pairwisePI) ) Fmin <- sum( freq * log(prop/pairwisePI) ) # to avoid 'N' if(missing == "available.cases" || missing == "doubly.robust") { uniPI <- univariateExpProbVec(TH = TH, th.idx = th.idx) # shortcuts unifreq <- lavcache$unifreq uninobs <- lavcache$uninobs uniweights <- lavcache$uniweights logl <- logl + sum(uniweights * log(uniPI)) uniprop <- unifreq / uninobs # remove zero props # uni.zero.idx <- which(uniprop == 0.0) uni.zero.idx <- which( (uniprop == 0.0) | !is.finite(uniprop) ) if(length(uni.zero.idx) > 0L) { uniprop <- uniprop[-uni.zero.idx] uniPI <- uniPI[-uni.zero.idx] uniweights <- uniweights[-uni.zero.idx] } Fmin <- Fmin + sum(uniweights * log(uniprop/uniPI)) } if (missing =="doubly.robust") { # COMPUTE THE SUM OF THE EXPECTED BIVARIATE CONDITIONAL LIKELIHOODS #SUM_{i,j} [ E_{Yi,Yj|y^o}}(lnf(Yi,Yj))) ] #First compute the terms of the summand. Since the cells of # pairwiseProbGivObs are zero for the pairs of variables that at least #one of the variables is observed (hence not contributing to the summand) #there is no need to construct an index vector for summing appropriately #within each individual. log_pairwisePI_orig <- log(pairwisePI_orig) pairwiseProbGivObs <- lavcache$pairwiseProbGivObs tmp_prod <- t(t(pairwiseProbGivObs)*log_pairwisePI_orig) SumElnfijCasewise <- apply(tmp_prod, 1, sum) SumElnfij <- sum(SumElnfijCasewise) logl <- logl + SumElnfij Fmin <- Fmin - SumElnfij # COMPUTE THE THE SUM OF THE EXPECTED UNIVARIATE CONDITIONAL LIKELIHOODS # SUM_{i,j} [ E_{Yj|y^o}}(lnf(Yj|yi))) ] #First compute the model-implied conditional univariate probabilities # p(y_i=a|y_j=b). Let ModProbY1Gy2 be the vector of these # probabilities. The order the probabilities #are listed in the vector ModProbY1Gy2 is as follows: # y1|y2, y1|y3, ..., y1|yp, y2|y1, y2|y3, ..., y2|yp, # ..., yp|y1, yp|y2, ..., yp|y(p-1). Within each pair of variables the #index "a" which represents the response category of variable yi runs faster than #"b" which represents the response category of the given variable yj. #The computation of these probabilities are based on the model-implied #bivariate probabilities p(y_i=a,y_j=b). To do the appropriate summations #and divisions we need some index vectors to keep track of the index i, j, #a, and b, as well as the pair index. These index vectors should be #computed once and stored in lavcache. About where in the lavaan code #we will add the computations and how they will be done please see the #file "new objects in lavcache for DR-PL.r" idx.pairs <- lavcache$idx.pairs idx.cat.y2.split <- lavcache$idx.cat.y2.split idx.cat.y1.split <- lavcache$idx.cat.y1.split idx.Y1 <- lavcache$idx.Y1 idx.Gy2 <- lavcache$idx.Gy2 idx.cat.Y1 <- lavcache$idx.cat.Y1 idx.cat.Gy2 <- lavcache$idx.cat.Gy2 id.uniPrGivObs <- lavcache$id.uniPrGivObs #the latter keeps track which variable each column of the matrix #univariateProbGivObs refers to #For the function compute_uniCondProb_based_on_bivProb see the .r file #with the same name. ModProbY1Gy2 <- compute_uniCondProb_based_on_bivProb( bivProb = pairwisePI_orig, nvar = nvar, idx.pairs = idx.pairs, idx.Y1 = idx.Y1, idx.Gy2 = idx.Gy2, idx.cat.y1.split = idx.cat.y1.split, idx.cat.y2.split = idx.cat.y2.split) log_ModProbY1Gy2 <- log(ModProbY1Gy2) #Let univariateProbGivObs be the matrix of the conditional univariate # probabilities Pr(y_i=a|y^o) that has been computed in advance and are #fed to the DR-PL function. The rows represent different individuals, #i.e. nrow=nobs, and the columns different probabilities. The columns # are listed as follows: a runs faster than i. #Note that the number of columns of univariateProbGivObs is not the #same with the length(log_ModProbY1Gy2), actually #ncol(univariateProbGivObs) < length(log_ModProbY1Gy2). #For this we use the following commands in order to multiply correctly. #Compute for each case the product Pr(y_i=a|y^o) * log[ p(y_i=a|y_j=b) ] #i.e. univariateProbGivObs * log_ModProbY1Gy2 univariateProbGivObs <- lavcache$univariateProbGivObs nobs <- nrow(X) uniweights.casewise <- lavcache$uniweights.casewise id.cases.with.missing <- which(uniweights.casewise > 0) no.cases.with.missing <- length(id.cases.with.missing) no.obs.casewise <- nvar - uniweights.casewise idx.missing.var <- apply(X, 1, function(x) { which(is.na(x)) }) idx.observed.var <- lapply(idx.missing.var, function(x) { c(1:nvar)[-x] }) idx.cat.observed.var <- sapply(1:nobs, function(i) { X[i, idx.observed.var[[i]]] }) ElnyiGivyjbCasewise <- sapply(1:no.cases.with.missing,function(i) { tmp.id.case <- id.cases.with.missing[i] tmp.no.mis <- uniweights.casewise[tmp.id.case] tmp.idx.mis <- idx.missing.var[[tmp.id.case]] tmp.idx.obs <- idx.observed.var[[tmp.id.case]] tmp.no.obs <- no.obs.casewise[tmp.id.case] tmp.idx.cat.obs <- idx.cat.observed.var[[tmp.id.case]] tmp.uniProbGivObs.i <- univariateProbGivObs[tmp.id.case, ] sapply(1:tmp.no.mis, function(k) { tmp.idx.mis.var <- tmp.idx.mis[k] tmp.uniProbGivObs.ik <- tmp.uniProbGivObs.i[id.uniPrGivObs == tmp.idx.mis.var] tmp.log_ModProbY1Gy2 <- sapply(1:tmp.no.obs, function(z) { log_ModProbY1Gy2[idx.Y1 == tmp.idx.mis.var & idx.Gy2 == tmp.idx.obs[z] & idx.cat.Gy2 == tmp.idx.cat.obs[z]]}) sum(tmp.log_ModProbY1Gy2 * tmp.uniProbGivObs.ik) }) }) ElnyiGivyjb <- sum(unlist(ElnyiGivyjbCasewise)) logl <- logl + ElnyiGivyjb # for the Fmin function Fmin <- Fmin - ElnyiGivyjb } #end of if (missing =="doubly.robust") } else { # # order! first i, then j, lav_matrix_vec(table)! for(j in seq_len(nvar-1L)) { for(i in (j+1L):nvar) { pstar.idx <- PSTAR[i,j] # cat("pstar.idx =", pstar.idx, "i = ", i, " j = ", j, "\n") if(ov.types[i] == "numeric" && ov.types[j] == "numeric") { # ordinary pearson correlation stop("not done yet") } else if(ov.types[i] == "numeric" && ov.types[j] == "ordered") { # polyserial correlation stop("not done yet") } else if(ov.types[j] == "numeric" && ov.types[i] == "ordered") { # polyserial correlation stop("not done yet") } else if(ov.types[i] == "ordered" && ov.types[j] == "ordered") { # polychoric correlation if(nexo == 0L) { pairwisePI <- pc_PI(rho = Sigma.hat[i,j], th.y1 = TH[ th.idx == i ], th.y2 = TH[ th.idx == j ]) LIK[,pstar.idx] <- pairwisePI[ cbind(X[,i], X[,j]) ] } else { LIK[,pstar.idx] <- pc_lik_PL_with_cov(Y1 = X[,i], Y2 = X[,j], Rho = Sigma.hat[i,j], th.y1 = TH[th.idx==i], th.y2 = TH[th.idx==j], eXo = eXo, PI.y1 = PI[i,], PI.y2 = PI[j,], missing.ind = missing) #pc_lik2(Y1 = X[, i], Y2 = X[,j], # rho = Sigma.hat[i, j], # th.y1 = TH[th.idx == i], # th.y2 = TH[th.idx == j], # eXo = eXo, # sl.y1 = PI[i, ], # sl.y2 = PI[j, ]) } } #cat("Done\n") } } # check for zero likelihoods/probabilities # FIXME: or should we replace them with a tiny number? if(any(LIK == 0.0, na.rm = TRUE)) { OUT <- +Inf attr(OUT, "logl") <- as.numeric(NA) return(OUT) } # loglikelihood LogLIK.cases <- log(LIK) # sum over cases LogLIK.pairs <- colSums(LogLIK.cases, na.rm = TRUE) # sum over pairs LogLik <- logl <- sum(LogLIK.pairs) # Fmin Fmin <- (-1)*LogLik } if(missing == "available.cases" && all(ov.types == "ordered") && nexo != 0L) { uni_LIK <- matrix(0, nrow(X), ncol(X)) for(i in seq_len(nvar)) { uni_LIK[,i] <- uni_lik(Y1 = X[,i], th.y1 = TH[th.idx==i], eXo = eXo, PI.y1 = PI[i,]) } if(any(uni_LIK == 0.0, na.rm = TRUE)) { OUT <- +Inf attr(OUT, "logl") <- as.numeric(NA) return(OUT) } uni_logLIK_cases <- log(uni_LIK) * lavcache$uniweights.casewise #sum over cases uni_logLIK_varwise <- colSums(uni_logLIK_cases) #sum over variables uni_logLIK <- sum(uni_logLIK_varwise) #add with the pairwise part of LogLik LogLik <- logl <- LogLik + uni_logLIK #we minimise Fmin <- (-1)*LogLik } # function value as returned to the minimizer #fx <- -1 * LogLik fx <- Fmin # attach 'loglikelihood' attr(fx, "logl") <- logl fx } # full information maximum likelihood # underlying multivariate normal approach (see Joreskog & Moustaki, 2001) # estimator.FML <- function(Sigma.hat = NULL, # model-based var/cov/cor TH = NULL, # model-based thresholds + means th.idx = NULL, # threshold idx per variable num.idx = NULL, # which variables are numeric X = NULL, # raw data lavcache = NULL) { # patterns # YR 27 aug 2013 # just for fun, and to compare with PML for small models # first of all: check if all correlations are within [-1,1] # if not, return Inf; (at least with nlminb, this works well) cors <- Sigma.hat[lower.tri(Sigma.hat)] if(any(abs(cors) > 1)) { return(+Inf) } nvar <- nrow(Sigma.hat) pstar <- nvar*(nvar-1)/2 ov.types <- rep("ordered", nvar) if(length(num.idx) > 0L) ov.types[num.idx] <- "numeric" MEAN <- rep(0, nvar) # shortcut for all ordered - per pattern if(all(ov.types == "ordered")) { PAT <- lavcache$pat; npatterns <- nrow(PAT) freq <- as.numeric( rownames(PAT) ) PI <- numeric(npatterns) TH.VAR <- lapply(1:nvar, function(x) c(-Inf, TH[th.idx==x], +Inf)) # FIXME!!! ok to set diagonal to 1.0? diag(Sigma.hat) <- 1.0 for(r in 1:npatterns) { # compute probability for each pattern lower <- sapply(1:nvar, function(x) TH.VAR[[x]][ PAT[r,x] ]) upper <- sapply(1:nvar, function(x) TH.VAR[[x]][ PAT[r,x] + 1L ]) # how accurate must we be here??? PI[r] <- sadmvn(lower, upper, mean=MEAN, varcov=Sigma.hat, maxpts=10000*nvar, abseps = 1e-07) } # sum (log)likelihood over all patterns #LogLik <- sum(log(PI) * freq) # more convenient fit function prop <- freq/sum(freq) # remove zero props # FIXME!!! or add 0.5??? zero.idx <- which(prop == 0.0) if(length(zero.idx) > 0L) { prop <- prop[-zero.idx] PI <- PI[-zero.idx] } Fmin <- sum( prop*log(prop/PI) ) } else { # case-wise PI <- numeric(nobs) for(i in 1:nobs) { # compute probability for each case PI[i] <- stop("not implemented") } # sum (log)likelihood over all observations LogLik <- sum(log(PI)) stop("not implemented") } # function value as returned to the minimizer #fx <- -1 * LogLik fx <- Fmin fx } estimator.MML <- function(lavmodel = NULL, THETA = NULL, TH = NULL, GLIST = NULL, group = 1L, lavdata = NULL, sample.mean = NULL, sample.mean.x = NULL, lavcache = NULL) { # compute case-wise likelihoods lik <- lav_model_lik_mml(lavmodel = lavmodel, THETA = THETA, TH = TH, GLIST = GLIST, group = group, lavdata = lavdata, sample.mean = sample.mean, sample.mean.x = sample.mean.x, lavcache = lavcache) # log + sum over observations logl <- sum( log(lik) ) # function value as returned to the minimizer fx <- -logl fx } lavaan/R/lav_start.R0000644000176200001440000005371613053015462014052 0ustar liggesusers# lav_start.R: provide starting values for model parameters # # YR 30/11/2010: initial version # YR 08/06/2011: add fabin3 start values for factor loadings # YR 14 Jan 2014: moved to lav_start.R # fill in the 'ustart' column in a User data.frame with reasonable # starting values, using the sample data lav_start <- function(start.method = "default", lavpartable = NULL, lavsamplestats = NULL, model.type = "sem", mimic = "lavaan", debug = FALSE) { # check arguments stopifnot(is.list(lavpartable)) # categorical? categorical <- any(lavpartable$op == "|") # conditional.x? conditional.x <- any(lavpartable$exo == 1L & lavpartable$op == "~") #ord.names <- unique(lavpartable$lhs[ lavpartable$op == "|" ]) # shortcut for 'simple' if(identical(start.method, "simple")) { start <- numeric( length(lavpartable$ustart) ) start[ which(lavpartable$op == "=~") ] <- 1.0 start[ which(lavpartable$op == "~*~") ] <- 1.0 ov.names.ord <- vnames(lavpartable, "ov.ord") var.idx <- which(lavpartable$op == "~~" & lavpartable$lhs == lavpartable$rhs & !(lavpartable$lhs %in% ov.names.ord)) start[var.idx] <- 1.0 user.idx <- which(!is.na(lavpartable$ustart)) start[user.idx] <- lavpartable$ustart[user.idx] return(start) } # check start.method if(mimic == "lavaan") { start.initial <- "lavaan" } else if(mimic == "Mplus") { start.initial <- "mplus" } else { # FIXME: use LISREL/EQS/AMOS/.... schems start.initial <- "lavaan" } start.user <- NULL if(is.character(start.method)) { start.method. <- tolower(start.method) if(start.method. == "default") { # nothing to do } else if(start.method. %in% c("simple", "lavaan", "mplus")) { start.initial <- start.method. } else { stop("lavaan ERROR: unknown value for start argument") } } else if(is.list(start.method)) { start.user <- start.method } else if(inherits(start.method, "lavaan")) { start.user <- parTable(start.method) } # check model list elements, if provided if(!is.null(start.user)) { if(is.null(start.user$lhs) || is.null(start.user$op) || is.null(start.user$rhs)) { stop("lavaan ERROR: problem with start argument: model list does not contain all elements: lhs/op/rhs") } if(!is.null(start.user$est)) { # excellent, we got an est column; nothing to do } else if(!is.null(start.user$start)) { # no est column, but we use the start column start.user$est <- start.user$start } else if(!is.null(start.user$ustart)) { # no ideal, but better than nothing start.user$est <- start.user$ustart } else { stop("lavaan ERROR: problem with start argument: could not find est/start column in model list") } } # global settings # 0. everyting is zero start <- numeric( length(lavpartable$ustart) ) # 1. =~ factor loadings: if(categorical) { # if std.lv=TRUE, more likely initial Sigma.hat is positive definite # 0.8 is too large start[ which(lavpartable$op == "=~") ] <- 0.7 } else { start[ which(lavpartable$op == "=~") ] <- 1.0 } # 2. residual lv variances for latent variables lv.names <- vnames(lavpartable, "lv") # all blocks lv.var.idx <- which(lavpartable$op == "~~" & lavpartable$lhs %in% lv.names & lavpartable$lhs == lavpartable$rhs) start[lv.var.idx] <- 0.05 # 3. latent response scales (if any) delta.idx <- which(lavpartable$op == "~*~") start[delta.idx] <- 1.0 # block-specific settings nblocks <- lav_partable_nblocks(lavpartable) for(g in 1:nblocks) { # info from user model for this block if(conditional.x) { ov.names <- vnames(lavpartable, "ov.nox", block = g) } else { ov.names <- vnames(lavpartable, "ov", block = g) } if(categorical) { ov.names.num <- vnames(lavpartable, "ov.num", block = g) ov.names.ord <- vnames(lavpartable, "ov.ord", block = g) } else { ov.names.num <- ov.names } lv.names <- vnames(lavpartable, "lv", block = g) ov.names.x <- vnames(lavpartable, "ov.x", block = g) # g1) factor loadings if(start.initial %in% c("lavaan", "mplus") && model.type %in% c("sem", "cfa") && #!categorical && sum( lavpartable$ustart[ lavpartable$op == "=~" & lavpartable$block == g], na.rm=TRUE) == length(lv.names) ) { # only if all latent variables have a reference item, # we use the fabin3 estimator (2sls) of Hagglund (1982) # per factor # 9 Okt 2013: if only 2 indicators, we use the regression # coefficient (y=marker, x=2nd indicator) for(f in lv.names) { free.idx <- which( lavpartable$lhs == f & lavpartable$op == "=~" & lavpartable$block == g & lavpartable$free > 0L) user.idx <- which( lavpartable$lhs == f & lavpartable$op == "=~" & lavpartable$block == g ) # no second order if(any(lavpartable$rhs[user.idx] %in% lv.names)) next # get observed indicators for this latent variable ov.idx <- match(lavpartable$rhs[user.idx], ov.names) if(length(ov.idx) > 2L && !any(is.na(ov.idx))) { if(lavsamplestats@missing.flag) { COV <- lavsamplestats@missing.h1[[g]]$sigma[ov.idx, ov.idx] } else { if(conditional.x) { COV <- lavsamplestats@res.cov[[g]][ov.idx,ov.idx] } else { COV <- lavsamplestats@cov[[g]][ov.idx,ov.idx] } } start[user.idx] <- fabin3.uni(COV) } else if(length(free.idx) == 1L && length(ov.idx) == 2L) { if(conditional.x) { REG2 <- ( lavsamplestats@res.cov[[g]][ov.idx[1], ov.idx[2]] / lavsamplestats@res.cov[[g]][ov.idx[1], ov.idx[1]] ) } else { REG2 <- ( lavsamplestats@cov[[g]][ov.idx[1], ov.idx[2]] / lavsamplestats@cov[[g]][ov.idx[1], ov.idx[1]] ) } start[free.idx] <- REG2 } # standardized? var.f.idx <- which(lavpartable$lhs == f & lavpartable$op == "~~" & lavpartable$rhs == f) if(length(var.f.idx) > 0L && lavpartable$free[var.f.idx] == 0 && lavpartable$ustart[var.f.idx] == 1) { # make sure factor loadings are between -0.7 and 0.7 x <- start[user.idx] start[user.idx] <- (x / max(abs(x))) * 0.7 } } } if(model.type == "unrestricted") { # fill in 'covariances' from lavsamplestats cov.idx <- which(lavpartable$block == g & lavpartable$op == "~~" & lavpartable$lhs != lavpartable$rhs) lhs.idx <- match(lavpartable$lhs[cov.idx], ov.names) rhs.idx <- match(lavpartable$rhs[cov.idx], ov.names) start[cov.idx] <- lavsamplestats@cov[[g]][ cbind(lhs.idx, rhs.idx) ] } # 2g) residual ov variances (including exo, to be overriden) ov.var.idx <- which(lavpartable$block == g & lavpartable$op == "~~" & lavpartable$lhs %in% ov.names.num & lavpartable$lhs == lavpartable$rhs) sample.var.idx <- match(lavpartable$lhs[ov.var.idx], ov.names) if(model.type == "unrestricted") { start[ov.var.idx] <- diag(lavsamplestats@cov[[g]])[sample.var.idx] } else { if(start.initial == "mplus") { if(conditional.x) { start[ov.var.idx] <- (1.0 - 0.50)*lavsamplestats@res.var[[1L]][sample.var.idx] } else { start[ov.var.idx] <- (1.0 - 0.50)*lavsamplestats@var[[1L]][sample.var.idx] } } else { if(conditional.x) { start[ov.var.idx] <- (1.0 - 0.50)*diag(lavsamplestats@res.cov[[g]])[sample.var.idx] } else { start[ov.var.idx] <- (1.0 - 0.50)*diag(lavsamplestats@cov[[g]])[sample.var.idx] } } } # variances of ordinal variables - set to 1.0 if(categorical) { ov.var.ord.idx <- which(lavpartable$block == g & lavpartable$op == "~~" & lavpartable$lhs %in% ov.names.ord & lavpartable$lhs == lavpartable$rhs) start[ov.var.ord.idx] <- 1.0 } # 3g) intercepts/means ov.int.idx <- which(lavpartable$block == g & lavpartable$op == "~1" & lavpartable$lhs %in% ov.names) sample.int.idx <- match(lavpartable$lhs[ov.int.idx], ov.names) if(lavsamplestats@missing.flag) { start[ov.int.idx] <- lavsamplestats@missing.h1[[g]]$mu[sample.int.idx] } else { if(conditional.x) { start[ov.int.idx] <- lavsamplestats@res.int[[g]][sample.int.idx] } else { start[ov.int.idx] <- lavsamplestats@mean[[g]][sample.int.idx] } } # 4g) thresholds th.idx <- which(lavpartable$block == g & lavpartable$op == "|") if(length(th.idx) > 0L) { th.names.lavpartable <- paste(lavpartable$lhs[th.idx], "|", lavpartable$rhs[th.idx], sep="") th.names.sample <- lavsamplestats@th.names[[g]][ lavsamplestats@th.idx[[g]] > 0L ] # th.names.sample should identical to # vnames(lavpartable, "th", block = g) if(conditional.x) { th.values <- lavsamplestats@res.th[[g]][lavsamplestats@th.idx[[g]] > 0L] } else { th.values <- lavsamplestats@th[[g]][lavsamplestats@th.idx[[g]] > 0L] } start[th.idx] <- th.values[match(th.names.lavpartable, th.names.sample)] } # 5g) exogenous `fixed.x' covariates if(!conditional.x && length(ov.names.x) > 0) { exo.idx <- which(lavpartable$block == g & lavpartable$op == "~~" & lavpartable$lhs %in% ov.names.x & lavpartable$rhs %in% ov.names.x) row.idx <- match(lavpartable$lhs[exo.idx], ov.names) col.idx <- match(lavpartable$rhs[exo.idx], ov.names) if(lavsamplestats@missing.flag) { start[exo.idx] <- lavsamplestats@missing.h1[[g]]$sigma[cbind(row.idx,col.idx)] # using slightly smaller starting values for free # variance/covariances (fixed.x = FALSE); # this somehow avoids false convergence in saturated models nobs <- lavsamplestats@nobs[[g]] this.idx <- which( seq_len(length(lavpartable$free)) %in% exo.idx & lavpartable$free > 0L ) start[this.idx] <- start[this.idx] * (nobs-1)/nobs } else { start[exo.idx] <- lavsamplestats@cov[[g]][cbind(row.idx,col.idx)] } } # 6b. exogenous lv variances if single indicator -- new in 0.5-21 lv.x <- vnames(lavpartable, "lv.x", block = g) if(length(lv.x) > 0L) { for(ll in lv.x) { ind.idx <- which(lavpartable$op == "=~" & lavpartable$lhs == ll, lavpartable$block == g) if(length(ind.idx) == 1L) { single.ind <- lavpartable$rhs[ind.idx] single.fvar.idx <- which(lavpartable$op == "~~" & lavpartable$lhs == ll & lavpartable$rhs == ll & lavpartable$block == g) single.var.idx <- which(lavpartable$op == "~~" & lavpartable$lhs == single.ind & lavpartable$rhs == single.ind & lavpartable$block == g) # user-defined residual variance single.var <- lavpartable$ustart[single.var.idx] if(is.na(single.var)) { single.var <- 1 } ov.idx <- match(single.ind, ov.names) if(conditional.x) { ov.var <- diag(lavsamplestats@res.cov[[g]])[ov.idx] } else { ov.var <- diag(lavsamplestats@cov[[g]])[ov.idx] } # take (1 - (rvar/ov.var) * ov.var tmp <- (1 - (single.var/ov.var)) * ov.var # just in case if(is.na(tmp) || tmp < 0.05) { tmp <- 0.05 } start[single.fvar.idx] <- tmp } } } # 7g) regressions "~" } # group weights group.idx <- which(lavpartable$lhs == "group" & lavpartable$op == "%") if(length(group.idx) > 0L) { ngroups <- length(group.idx) #prop <- rep(1/ngroups, ngroups) # use last group as reference #start[group.idx] <- log(prop/prop[ngroups]) # poisson version start[group.idx] <- log( rep(lavsamplestats@ntotal/ngroups, ngroups) ) } # growth models: # - compute starting values for mean latent variables # - compute starting values for variance latent variables if(start.initial %in% c("lavaan", "mplus") && model.type == "growth") { ### DEBUG ONLY #lv.var.idx <- which(lavpartable$op == "~~" & # lavpartable$lhs %in% lv.names & # lavpartable$lhs == lavpartable$rhs) ### DEBUG ONLY #lv.int.idx <- which(lavpartable$op == "~1" & # lavpartable$lhs %in% lv.names) } # override if a user list with starting values is provided # we only look at the 'est' column for now if(!is.null(start.user)) { if(is.null(lavpartable$block)) { lavpartable$block <- rep(1L, length(lavpartable$lhs)) } if(is.null(start.user$block)) { start.user$block <- rep(1L, length(start.user$lhs)) } # FIXME: avoid for loop!!! for(i in 1:length(lavpartable$lhs)) { # find corresponding parameters lhs <- lavpartable$lhs[i] op <- lavpartable$op[i] rhs <- lavpartable$rhs[i] grp <- lavpartable$block[i] start.user.idx <- which(start.user$lhs == lhs & start.user$op == op & start.user$rhs == rhs & start.user$block == grp) if(length(start.user.idx) == 1L && is.finite(start.user$est[start.user.idx])) { start[i] <- start.user$est[start.user.idx] } } } # override if the model syntax contains explicit starting values user.idx <- which(!is.na(lavpartable$ustart)) start[user.idx] <- lavpartable$ustart[user.idx] if(debug) { cat("lavaan DEBUG: lavaanStart\n") print( start ) } start } # backwards compatibility # StartingValues <- lav_start # sanity check: (user-specified) variances smaller than covariances lav_start_check_cov <- function(lavpartable = NULL, start = lavpartable$start) { nblocks <- lav_partable_nblocks(lavpartable) for(g in 1:nblocks) { # collect all non-zero covariances cov.idx <- which(lavpartable$op == "~~" & lavpartable$block == g & lavpartable$lhs != lavpartable$rhs & !lavpartable$exo & start != 0) # for each covariance, use corresponding variances to standardize; # the end result should not exceed abs(1) for(cc in seq_along(cov.idx)) { this.cov.idx <- cov.idx[cc] # find corresponding variances var.lhs <- lavpartable$lhs[this.cov.idx] var.rhs <- lavpartable$rhs[this.cov.idx] var.lhs.idx <- which(lavpartable$op == "~~" & lavpartable$block == g & lavpartable$lhs == var.lhs & lavpartable$lhs == lavpartable$rhs) var.rhs.idx <- which(lavpartable$op == "~~" & lavpartable$block == g & lavpartable$lhs == var.rhs & lavpartable$lhs == lavpartable$rhs) var.lhs.value <- start[var.lhs.idx] var.rhs.value <- start[var.rhs.idx] block.txt <- "" if(nblocks > 1L) { block.txt <- paste(" [in block ", g, "]", sep = "") } # check for zero variances if(var.lhs.value == 0 || var.rhs.value == 0) { # this can only happen if it is user-specified # cov.idx free? set it to zero if(start[this.cov.idx] == 0) { # nothing to do } else if(lavpartable$free[this.cov.idx] > 0L) { warning( "lavaan WARNING: non-zero covariance element set to zero, due to fixed-to-zero variances\n", " variables involved are: ", var.lhs, " ", var.rhs, block.txt) start[this.cov.idx] <- 0 } else { stop("lavaan ERROR: please provide better fixed values for (co)variances;\n", " variables involved are: ", var.lhs, " ", var.rhs, block.txt) } next } # which one is the smallest? abs() in case of negative variances if(abs(var.lhs.value) < abs(var.rhs.value)) { var.min.idx <- var.lhs.idx var.max.idx <- var.rhs.idx } else { var.min.idx <- var.rhs.idx var.max.idx <- var.lhs.idx } # check COR <- start[this.cov.idx] / sqrt(var.lhs.value * var.rhs.value) if(!is.finite(COR)) { # force simple values warning( "lavaan WARNING: starting values imply NaN for a correlation value;\n", " variables involved are: ", var.lhs, " ", var.rhs, block.txt) start[var.lhs.idx] <- 1 start[var.rhs.idx] <- 1 start[this.cov.idx] <- 0 } else if(abs(COR) > 1) { warning( "lavaan WARNING: starting values imply a correlation larger than 1;\n", " variables involved are: ", var.lhs, " ", var.rhs, block.txt) # three ways to fix it: rescale cov12, var1 or var2 # we prefer a free parameter, and not user-specified if( lavpartable$free[this.cov.idx] > 0L && is.na(lavpartable$ustart[this.cov.idx])) { start[this.cov.idx] <- start[this.cov.idx] / (COR * 1.1) } else if( lavpartable$free[var.min.idx] > 0L && is.na(lavpartable$ustart[var.min.idx])) { start[var.min.idx] <- start[var.min.idx] * (COR * 1.1)^2 } else if( lavpartable$free[var.max.idx] > 0L && is.na(lavpartable$ustart[var.max.idx])) { start[var.max.idx] <- start[var.max.idx] * (COR * 1.1)^2 # not found? try just a free parameter } else if (lavpartable$free[this.cov.idx] > 0L) { start[this.cov.idx] <- start[this.cov.idx] / (COR * 1.1) } else if( lavpartable$free[var.min.idx] > 0L) { start[var.min.idx] <- start[var.min.idx] * (COR * 1.1)^2 } else if( lavpartable$free[var.max.idx] > 0L) { start[var.max.idx] <- start[var.max.idx] * (COR * 1.1)^2 # nothing? abort } else { stop("lavaan ERROR: please provide better fixed values for (co)variances;\n", " variables involved are: ", var.lhs, " ", var.rhs, block.txt) } } # COR > 1 } # cov.idx } start } lavaan/R/lav_partable_check.R0000644000176200001440000000722012465075714015645 0ustar liggesusers# check if the partable is complete/consistent # we may have added intercepts/variances (user = 0), fixed to zero lav_partable_check <- function(partable, categorical = FALSE, warn = TRUE) { check <- TRUE # check for empy table - or should we WARN? if(length(partable$lhs) == 0) return(check) # get observed/latent variables ov.names <- vnames(partable, "ov.nox") # no need to specify exo?? lv.names <- vnames(partable, "lv") all.names <- c(ov.names, lv.names) ov.names.ord <- vnames(partable, "ov.ord") # if categorical, we should have some ov.names.ord if(categorical && length(ov.names.ord) == 0L) { check <- FALSE if(warn) { warning("lavaan WARNING: parameter table does not contain thresholds ") } } # we should have a (residual) variance for *each* ov/lv # note: if lavaanify() has been used, this is always TRUE var.idx <- which(partable$op == "~~" & partable$lhs == partable$rhs) missing.idx <- which(is.na(match(all.names, partable$lhs[var.idx]))) if(length(missing.idx) > 0L) { check <- FALSE if(warn) { warning("lavaan WARNING: parameter table does not contain (residual) variances for one or more variables: [", paste(all.names[missing.idx], collapse = " "), "]") } } # meanstructure? meanstructure <- any(partable$op == "~1") # if meanstructure, check for missing intercepts # note if lavaanify() has been used, this is always TRUE if(meanstructure) { # we should have a intercept for *each* ov/lv int.idx <- which(partable$op == "~1") missing.idx <- which(is.na(match(all.names, partable$lhs[int.idx]))) if(length(missing.idx) > 0L) { check <- FALSE if(warn) { warning("lavaan WARNING: parameter table does not contain intercepts for one or more variables: [", paste(all.names[missing.idx], collapse = " "), "]") } } } # ok, now the 'real' checks # do we have added (residual) variances (user = 0) that are fixed to zero? # this is not necessarily problematic! # eg. in latent change score models # therefore, we do NOT give a warning # var.fixed <- which(partable$op == "~~" & # partable$lhs == partable$rhs & # partable$user == 0 & # partable$free == 0) # if(length(var.fixed) > 0L) { # check <- FALSE # if(warn) { # warning("lavaan WARNING: missing (residual) variances are set to zero: [", paste(partable$lhs[var.fixed], collapse = " "), "]") # } # } # do we have added intercepts (user = 0) that are fixed to zero? # this is not necessarily problematic; perhaps only for # exogenous variables? ov.ind <- unique(partable$rhs[partable$op == "=~"]) lv.names <- unique(partable$lhs[partable$op == "=~"]) int.fixed <- which(partable$op == "~1" & partable$user == 0 & partable$free == 0 & partable$ustart == 0 & # do not include factors !partable$lhs %in% lv.names & # do not include ordered variables !partable$lhs %in% ov.names.ord & # do not include indicators !partable$lhs %in% ov.ind) if(length(int.fixed) > 0L) { check <- FALSE if(warn) { warning("lavaan WARNING: missing intercepts are set to zero: [", paste(partable$lhs[int.fixed], collapse = " "), "]") } } # return check code check } lavaan/R/lav_options.R0000644000176200001440000010230513053261254014377 0ustar liggesusers# initial version YR 02/08/2010 # YR 28 Jan 2017: add lavOptions(), lav_options_default() # public function lavOptions <- function(x = NULL, default = NULL, mimic = "lavaan") { lavoptions <- lav_options_default(mimic = mimic) # selection only if(!is.null(x)) { if(is.character(x)) { # lower case only x <- tolower(x) # check if x is in names(lavoptions) not.ok <- which(!x %in% names(lavoptions)) if(length(not.ok) > 0L) { # only warn if multiple options were requested if(length(x) > 1L) { warning("lavaan WARNING: option `", x[not.ok], "' not available") } x <- x[ -not.ok ] } # return requested option(s) if(length(x) == 0L) { return(default) } else { lavoptions[x] } } else { stop("lavaan ERROR: `x' must be a character string") } } else { lavoptions } } # set the default options (including unspecified values "default") lav_options_default <- function(mimic = "lavaan") { opt <- list(model.type = "sem", # global mimic = "lavaan", # model modifiers meanstructure = "default", int.ov.free = FALSE, int.lv.free = FALSE, conditional.x = "default", # or FALSE? fixed.x = "default", # or FALSE? orthogonal = FALSE, std.lv = FALSE, parameterization = "default", auto.fix.first = FALSE, auto.fix.single = FALSE, auto.var = FALSE, auto.cov.lv.x = FALSE, auto.cov.y = FALSE, auto.th = FALSE, auto.delta = FALSE, # full data std.ov = FALSE, missing = "default", # summary data sample.cov.rescale = "default", ridge = 1e-5, # multiple groups group = NULL, group.label = NULL, group.equal = '', group.partial = '', group.w.free = FALSE, # clusters cluster = NULL, level.label = NULL, # estimation estimator = "default", likelihood = "default", link = "default", representation = "default", do.fit = TRUE, # inference information = "default", h1.information = "structured", #h1.information.se = "structured", #h1.information.test = "structured", se = "default", test = "default", bootstrap = 1000L, observed.information = "hessian", # optimization control = list(), optim.method = "nlminb", optim.method.cor = "nlminb", optim.force.converged = FALSE, optim.gradient = "analytic", optim.init_nelder_mead = FALSE, # numerical integration integration.ngh = 21L, # parallel parallel = "no", ncpus = 1L, cl = NULL, iseed = NULL, # zero values zero.add = "default", zero.keep.margins = "default", zero.cell.warn = TRUE, # starting values start = "default", # sanity checks check = c("start", "post"), # verbosity verbose = FALSE, warn = TRUE, debug = FALSE) opt } # this function collects and checks the user-provided options/arguments, # and fills in the "default" values, or changes them in an attempt to # produce a consistent set of values... # # returns a list with the named options lav_options_set <- function(opt = NULL) { if(opt$debug) { cat("lavaan DEBUG: lavaanOptions IN\n"); str(opt) } if(opt$debug) { opt$partrace <- TRUE } else { opt$partrace <- FALSE } # everything lowercase opt.old <- opt opt <- lapply(opt, function(x) { if(is.character(x)) tolower(x) else x}) # except group,group.partial, which may contain capital letters opt$group <- opt.old$group opt$group.partial <- opt.old$group.partial # do.fit implies se="none and test="none" (unless not default) if(!opt$do.fit) { if(opt$se == "default") { opt$se <- "none" } if(opt$test == "default") { opt$test <- "none" } } # mimic if(opt$mimic == "default" || opt$mimic == "lavaan") { opt$mimic <- "lavaan" } else if(opt$mimic == "mplus") { opt$mimic <- "Mplus" } else if(opt$mimic == "eqs") { opt$mimic <- "EQS" } else if(opt$mimic == "lisrel") { cat("Warning: mimic=\"LISREL\" is not ready yet. Using EQS instead.\n") opt$mimic <- "EQS" } else { stop("mimic must be \"lavaan\", \"Mplus\" or \"EQS\" \n") } # group.equal and group.partial if(opt$group.equal[1] == "none") { opt$group.equal <- character(0) } else if(is.null(opt$group.equal) || nchar(opt$group.equal) == 0L) { if(opt$mimic == "Mplus" && !is.null(opt$group)) { if(opt$categorical) { opt$group.equal <- c("loadings", "thresholds") } else { opt$group.equal <- c("loadings", "intercepts") } } else { opt$group.equal <- character(0) } } else if(length(opt$group.equal) == 0) { # nothing to do } else if(all(opt$group.equal %in% c("loadings", "intercepts", "means", "regressions", "residuals", "residual.covariances", "thresholds", "lv.variances", "lv.covariances"))) { # nothing to do } else { stop("unknown value for `group.equal' argument: ", opt$group.equal, "\n") } if(is.null(opt$group.partial) || nchar(opt$group.partial) == 0L) { opt$group.partial <- character(0) } else if(length(opt$group.partial) == 0) { # nothing to do } else { # strip white space opt$group.partial <- gsub("[[:space:]]+", "", opt$group.partial) } # if categorical, and group.equal contains "intercepts", also add # thresholds (and vice versa) if(opt$categorical && "intercepts" %in% opt$group.equal) { opt$group.equal <- unique(c(opt$group.equal, "thresholds")) } if(opt$categorical && "thresholds" %in% opt$group.equal) { opt$group.equal <- unique(c(opt$group.equal, "intercepts")) } # representation if(opt$representation == "default") { opt$representation <- "LISREL" } else if(opt$representation == "lisrel") { opt$representation <- "LISREL" } else if(opt$representation == "eqs" || opt$representation == "bentler-weeks") { opt$representation <- "EQS" } else { stop("representation must be \"LISREL\" or \"EQS\" \n") } # missing if(opt$missing == "default") { if(opt$mimic == "Mplus" && !opt$categorical && opt$estimator %in% c("default", "ml", "mlr")) { # since version 5? opt$missing <- "ml" # check later if this is ok } else { opt$missing <- "listwise" } } else if(opt$missing %in% c("ml", "direct", "fiml")) { opt$missing <- "ml" if(opt$estimator %in% c("mlm", "mlmv", "gls", "wls", "wlsm", "wlsmv", "uls", "ulsm", "ulsmv", "pml")) { stop("lavaan ERROR: missing=\"ml\" is not allowed for estimator MLM, MLMV, GLS, ULS, ULSM, ULSMV, DWLS, WLS, WLSM, WLSMV, PML") } } else if(opt$missing %in% c("two.stage", "twostage", "two-stage", "two.step", "twostep", "two-step")) { opt$missing <- "two.stage" if(opt$categorical) { stop("lavaan ERROR: missing=\"two.stage\" not available in the categorical setting") } if(opt$estimator %in% c("mlm", "mlmv", "gls", "wls", "wlsm", "wlsmv", "uls", "ulsm", "ulsmv", "pml", "mml")) { stop("lavaan ERROR: missing=\"two.stage\" is not allowed for estimator MLM, MLMV, GLS, ULS, ULSM, ULSMV, DWLS, WLS, WLSM, WLSMV, PML, MML") } } else if(opt$missing %in% c("robust.two.stage", "robust.twostage", "robust.two-stage", "robust-two-stage", "robust.two.step", "robust.twostep", "robust-two-step")) { opt$missing <- "robust.two.stage" if(opt$categorical) { stop("lavaan ERROR: missing=\"robust.two.stage\" not available in the categorical setting") } if(opt$estimator %in% c("mlm", "mlmv", "gls", "wls", "wlsm", "wlsmv", "uls", "ulsm", "ulsmv", "pml", "mml")) { stop("lavaan ERROR: missing=\"robust.two.stage\" is not allowed for estimator MLM, MLMV, GLS, ULS, ULSM, ULSMV, DWLS, WLS, WLSM, WLSMV, PML, MML") } } else if(opt$missing == "listwise") { # nothing to do } else if(opt$missing == "pairwise") { # nothing to do } else if(opt$missing == "available.cases") { # nothing to do, or warn if not categorical? } else if(opt$missing == "doubly.robust") { if(opt$estimator != "pml") { stop("lavaan ERROR: doubly.robust option only available for estimator PML") } } else if(opt$missing == "doubly_robust") { opt$missing <- "doubly.robust" if(opt$estimator != "pml") { stop("lavaan ERROR: doubly.robust option only available for estimator PML") } } else if(opt$missing == "available_cases") { opt$missing <- "available.cases" } else { stop("unknown value for `missing' argument: ", opt$missing, "\n") } # default test statistic if(opt$test == "default") { if(opt$missing == "two.stage" || opt$missing == "robust.two.stage") { opt$test <- "satorra.bentler" } else { opt$test <- "standard" } } else if(opt$test %in% c("none", "standard")) { # nothing to do } else if(opt$test == "satorra" || opt$test == "sb" || opt$test == "SB" || opt$test == "satorra.bentler" || opt$test == "satorra-bentler") { opt$test <- "satorra.bentler" } else if(opt$test == "yuan" || opt$test == "yb" || opt$test == "YB" || opt$test == "yuan.bentler" || opt$test == "yuan-bentler") { opt$test <- "yuan.bentler" } else if(opt$test == "m.adjusted" || opt$test == "m" || opt$test == "mean.adjusted" || opt$test == "mean-adjusted") { opt$test <- "satorra.bentler" } else if(opt$test == "mean.var.adjusted" || opt$test == "mean-var-adjusted" || opt$test == "mv" || opt$test == "second.order" || opt$test == "satterthwaite" || opt$test == "Satterthwaite" || opt$test == "mv.adjusted") { opt$test <- "mean.var.adjusted" } else if(opt$test == "mplus6" || opt$test == "scale.shift" || opt$test == "scaled.shifted") { opt$test <- "scaled.shifted" } else if(opt$test == "bootstrap" || opt$test == "boot" || opt$test == "bollen.stine" || opt$test == "bollen-stine") { opt$test <- "bollen.stine" } else { stop("`test' argument must one of \"none\", \"standard\", \"satorra.bentler\", \"yuan.bentler\", \"mean.var.adjusted\", \"scaled.shifted\", \"bollen.stine\", or \"bootstrap\"") } # check missing if(opt$missing == "ml" && opt$se == "robust.sem") { warning("lavaan WARNING: missing will be set to ", dQuote("listwise"), " for se = ", dQuote(opt$se) ) opt$missing <- "listwise" } if(opt$missing == "ml" && opt$test %in% c("satorra.bentler", "mean.var.adjusted", "scaled.shifted")) { warning("lavaan WARNING: missing will be set to ", dQuote("listwise"), " for test = ", dQuote(opt$test) ) opt$missing <- "listwise" } # missing = "two.stage" if(opt$missing == "two.stage" || opt$missing == "robust.two.stage") { opt$meanstructure <- TRUE # se if(opt$se == "default") { if(opt$missing == "two.stage") { opt$se <- "two.stage" } else { opt$se <- "robust.two.stage" } } else if(opt$missing == "two.stage" && opt$se == "two.stage") { # nothing to do } else if(opt$missing == "robust.two.stage" && opt$se == "robust.two.stage") { # nothing to do } else { warning("lavaan WARNING: se will be set to ", dQuote(opt$missing), " if missing = ", dQuote(opt$missing) ) opt$se <- opt$missing } # information if(opt$information == "default") { # for both two.stage and robust.two.stage opt$information <- "observed" } else if(opt$information == "first.order") { warning("lavaan WARNING: information will be set to ", dQuote("observed"), " if missing = ", dQuote(opt$missing) ) opt$information <- "observed" } # observed.information (ALWAYS "h1" for now) opt$observed.information <- "h1" # test if(opt$test == "default" || opt$test == "satorra.bentler") { opt$test <- "satorra.bentler" } else { warning("lavaan WARNING: test will be set to ", dQuote("satorra.bentler"), " if missing = ", dQuote(opt$missing) ) opt$test <- "satorra.bentler" } } # meanstructure if(is.logical(opt$meanstructure)) { if(opt$meanstructure == FALSE) { # user explicitly wants meanstructure == FALSE # check for conflicting arguments if(opt$estimator %in% c("mlm", "mlmv", "mlr", "mlf", "ulsm", "ulsmv", "wlsm", "wlsmv", "pml")) { warning("lavaan WARNING: estimator forces meanstructure = TRUE") } if(opt$missing %in% c("ml", "two.stage")) { warning("lavaan WARNING: missing argument forces meanstructure = TRUE") } } } else if(opt$meanstructure == "default") { # by default: no meanstructure! opt$meanstructure <- FALSE # unless there is a group argument? (added since 0.4-10) # if(!is.null(opt$group)) opt$meanstructure <- TRUE } else { stop("meanstructure must be TRUE, FALSE or \"default\"\n") } # estimator and se if(opt$se == "boot" || opt$se == "bootstrap") { opt$se <- "bootstrap" opt$information <- "observed" opt$bootstrap <- as.integer(opt$bootstrap) stopifnot(opt$bootstrap > 0L) } # default estimator if(opt$estimator == "default") { if(opt$categorical) { opt$estimator <- "wlsmv" } else { opt$estimator <- "ml" } } # backwards compatibility (0.4 -> 0.5) if(opt$se == "robust.mlm") opt$se <- "robust.sem" if(opt$se == "robust.mlr") opt$se <- "robust.huber.white" if(opt$estimator == "ml") { opt$estimator <- "ML" if(opt$se == "default") { opt$se <- "standard" } else if(opt$se %in% c("first.order","bootstrap", "none", "external", "standard", "robust.huber.white", "two.stage", "robust.two.stage", "robust.sem")) { # nothing to do } else if(opt$se == "robust") { if(opt$missing == "ml") { opt$se <- "robust.huber.white" } else if(opt$missing == "two.stage") { opt$se <- "robust.two.stage" } else { opt$se <- "robust.sem" } } else { stop("unknown value for `se' argument when estimator is ML: ", opt$se, "\n") } } else if(opt$estimator == "mlm" || opt$estimator == "mlmv" || opt$estimator == "mlmvs") { est.orig <- opt$estimator if(opt$test != "none") { if(opt$estimator == "mlm") { opt$test <- "satorra.bentler" } else if(opt$estimator == "mlmv") { opt$test <- "scaled.shifted" } else if(opt$estimator == "mlmvs") { opt$test <- "mean.var.adjusted" } } opt$estimator <- "ML" opt$meanstructure <- TRUE if(opt$se == "bootstrap") stop("use ML estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "robust.sem" if(!(opt$information %in% c("expected", "default"))) { warning("lavaan WARNING: information will be set to ", dQuote("expected"), " for estimator = ", dQuote(toupper(est.orig)) ) } opt$information <- "expected" opt$missing <- "listwise" } else if(opt$estimator == "mlf") { opt$estimator <- "ML" opt$meanstructure <- TRUE if(opt$se == "bootstrap") stop("use ML estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "first.order" } else if(opt$estimator == "mlr") { opt$estimator <- "ML" if(opt$se == "bootstrap") stop("use ML estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "robust.huber.white" if(opt$test != "none" && opt$se != "external") opt$test <- "yuan.bentler" opt$meanstructure <- TRUE } else if(opt$estimator == "gls") { opt$estimator <- "GLS" if(opt$se == "default" || opt$se == "standard") { opt$se <- "standard" } else if(opt$se == "none" || opt$se == "bootstrap" || opt$se == "external") { # nothing to do } else { stop("invalid value for `se' argument when estimator is GLS: ", opt$se, "\n") } if(!opt$test %in% c("standard","none")) { stop("invalid value for `test' argument when estimator is GLS: ", opt$test, "\n") } opt$missing <- "listwise" } else if(opt$estimator == "ntrls") { opt$estimator <- "NTRLS" if(opt$se == "default" || opt$se == "standard") { opt$se <- "standard" } else if(opt$se == "none" || opt$se == "bootstrap" || opt$se == "external") { # nothing to do } else { stop("invalid value for `se' argument when estimator is NTRLS: ", opt$se, "\n") } if(!opt$test %in% c("standard","none")) { stop("invalid value for `test' argument when estimator is NTRLS: ", opt$test, "\n") } opt$missing <- "listwise" } else if(opt$estimator == "wls") { opt$estimator <- "WLS" if(opt$se == "default" || opt$se == "standard") { opt$se <- "standard" } else if(opt$se == "none" || opt$se == "bootstrap" || opt$se == "external") { # nothing to do } else if(opt$se == "robust.sem") { # nothing to do } else if(opt$se == "robust") { opt$se <- "robust.sem" } else { stop("invalid value for `se' argument when estimator is WLS: ", opt$se, "\n") } if(!opt$test %in% c("standard","none")) { stop("invalid value for `test' argument when estimator is WLS: ", opt$test, "\n") } #opt$missing <- "listwise" } else if(opt$estimator == "dwls") { opt$estimator <- "DWLS" if(opt$se == "default" || opt$se == "standard") { opt$se <- "standard" } else if(opt$se == "none" || opt$se == "bootstrap" || opt$se == "external") { # nothing to do } else if(opt$se == "robust.sem") { # nothing to do } else if(opt$se == "robust") { opt$se <- "robust.sem" } else { stop("invalid value for `se' argument when estimator is DWLS: ", opt$se, "\n") } if(!opt$test %in% c("standard","none","satorra.bentler", "mean.adjusted", "mean.var.adjusted","scaled.shifted")) { stop("invalid value for `test' argument when estimator is DWLS: ", opt$test, "\n") } #opt$missing <- "listwise" } else if(opt$estimator == "wlsm") { opt$estimator <- "DWLS" if(opt$se == "bootstrap") stop("use (D)WLS estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "robust.sem" if(opt$test != "none") opt$test <- "satorra.bentler" #opt$missing <- "listwise" } else if(opt$estimator == "wlsmv") { opt$estimator <- "DWLS" if(opt$se == "bootstrap") stop("use (D)WLS estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "robust.sem" if(opt$test != "none") opt$test <- "scaled.shifted" #opt$missing <- "listwise" } else if(opt$estimator == "wlsmvs") { opt$estimator <- "DWLS" if(opt$se == "bootstrap") stop("use (D)WLS estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "robust.sem" if(opt$test != "none") opt$test <- "mean.var.adjusted" #opt$missing <- "listwise" } else if(opt$estimator == "uls") { opt$estimator <- "ULS" if(opt$se == "default" || opt$se == "standard") { opt$se <- "standard" } else if(opt$se == "none" || opt$se == "bootstrap" || opt$se == "external") { # nothing to do } else if(opt$se == "robust.sem") { # nothing to do } else if(opt$se == "robust") { opt$se <- "robust.sem" } else { stop("invalid value for `se' argument when estimator is ULS: ", opt$se, "\n") } if(!opt$test %in% c("standard","none", "satorra.bentler", "mean.adjusted", "mean.var.adjusted","scaled.shifted")) { stop("invalid value for `test' argument when estimator is ULS: ", opt$test, "\n") } #opt$missing <- "listwise" } else if(opt$estimator == "ulsm") { opt$estimator <- "ULS" if(opt$se == "bootstrap") stop("use ULS estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "robust.sem" if(opt$test != "none") opt$test <- "satorra.bentler" #opt$missing <- "listwise" } else if(opt$estimator == "ulsmv") { opt$estimator <- "ULS" if(opt$se == "bootstrap") stop("use ULS estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "robust.sem" if(opt$test != "none") opt$test <- "scaled.shifted" #opt$missing <- "listwise" } else if(opt$estimator == "ulsmvs") { opt$estimator <- "ULS" if(opt$se == "bootstrap") stop("use ULS estimator for bootstrap") if(opt$se != "none" && opt$se != "external") opt$se <- "robust.sem" if(opt$test != "none") opt$test <- "mean.var.adjusted" #opt$missing <- "listwise" } else if(opt$estimator == "pml") { opt$estimator <- "PML" opt$information <- "observed" if(opt$se == "default") opt$se <- "robust.huber.white" if(opt$test != "none") opt$test <- "mean.var.adjusted" #opt$missing <- "listwise" } else if(opt$estimator %in% c("fml","umn")) { opt$estimator <- "FML" opt$information <- "observed" if(opt$se == "default") opt$se <- "standard" if(opt$test != "none") opt$test <- "standard" #opt$missing <- "listwise" } else if(opt$estimator == "reml") { opt$estimator <- "REML" opt$information <- "observed" if(opt$se == "default") opt$se <- "standard" if(opt$test != "none") opt$test <- "standard" opt$missing <- "listwise" } else if(opt$estimator %in% c("mml")) { opt$estimator <- "MML" opt$information <- "observed" if(opt$se == "default") opt$se <- "standard" if(opt$test == "default") opt$test <- "none" #opt$missing <- "listwise" if(opt$link == "default") { #opt$link <- "logit" opt$link <- "probit" } else if(opt$link %in% c("logit","probit")) { # nothing to do } else { stop("lavaan ERROR: link must be `logit' or `probit'") } # check for parameterization if(opt$parameterization == "default") { opt$parameterization <- "mml" } else { stop("lavaan WARNING: parameterization argument is ignored if estimator = MML") } } else if(opt$estimator == "none") { if(opt$se == "default") { opt$se <- "none" } if(opt$test == "default") { opt$test <- "none" } } else { stop("unknown value for `estimator' argument: ", opt$estimator, "\n") } # special stuff for categorical if(opt$categorical) { opt$meanstructure <- TRUE # Mplus style if(opt$estimator == "ML") { stop("lavaan ERROR: estimator ML for ordered data is not supported yet. Use WLSMV instead.") } } # link if(opt$link == "logit") { if(opt$estimator != "mml") { warning("lavaan WARNING: link will be set to ", dQuote("probit"), " for estimator = ", dQuote(opt$estimator) ) } } # likelihood approach (wishart or normal) + sample.cov.rescale if(!opt$estimator %in% c("ML", "REML", "PML", "FML","NTRLS")) { if(opt$likelihood != "default") { stop("likelihood argument is only relevant if estimator = ML") } if(opt$sample.cov.rescale == "default") { opt$sample.cov.rescale <- FALSE } else { warning("sample.cov.rescale argument is only relevant if estimator = ML") } } else { # ml and friends if(opt$estimator %in% c("PML", "FML")) { opt$likelihood <- "normal" } else if(opt$likelihood == "default") { opt$likelihood <- "normal" if(opt$mimic == "EQS" || opt$mimic == "LISREL" || opt$mimic == "AMOS") { opt$likelihood <- "wishart" } } else if(opt$likelihood == "wishart" || opt$likelihood == "normal") { # nothing to do } else { stop("invalid value for `likelihood' argument: ", opt$likelihood, "\n") } if(opt$sample.cov.rescale == "default") { opt$sample.cov.rescale <- FALSE if(opt$likelihood == "normal") { opt$sample.cov.rescale <- TRUE } } else if(!is.logical(opt$sample.cov.rescale)) { stop("sample.cov.rescale must be either \"default\", TRUE, or FALSE") } else { # nothing to do } } # information if(opt$information == "default") { if(opt$missing == "ml" || opt$se == "robust.huber.white" || opt$se == "first.order") { #nchar(opt$constraints) > 0L) { opt$information <- "observed" } else { opt$information <- "expected" } } else if(opt$information %in% c("observed", "expected")) { # nothing to do } else { stop("information must be either \"expected\" or \"observed\"\n") } if(opt$h1.information == "structured" || opt$h1.information == "unstructured") { # nothing to do } else { stop("lavaan ERROR: h1.information must be either \"structured\" or \"unstructured\"\n") } #if(opt$h1.information.test == "structured" || # opt$h1.information.test == "unstructured") { # # nothing to do #} else { # stop("lavaan ERROR: h1.information.se must be either \"structured\" or \"unstructured\"\n") #} # check information if se == "robust.sem" if(opt$se == "robust.sem" && opt$information == "observed") { warning("lavaan WARNING: information will be set to ", dQuote("expected"), " for se = ", dQuote(opt$se)) opt$information <- "expected" } # conditional.x if(is.logical(opt$conditional.x)) { } else if(opt$conditional.x == "default") { if(opt$estimator == "ML" && (opt$mimic == "Mplus" || opt$mimic == "lavaan")) { opt$conditional.x <- FALSE } else if(opt$categorical) { opt$conditional.x <- TRUE } else { opt$conditional.x <- FALSE } } else { stop("lavaan ERROR: conditional.x must be TRUE, FALSE or \"default\"\n") } # if conditional.x, always use a meanstructure if(opt$conditional.x) { opt$meanstructure <- TRUE } # fixed.x if(is.logical(opt$fixed.x)) { if(opt$conditional.x && opt$fixed.x == FALSE) { stop("lavaan ERROR: fixed.x = FALSE is not supported when conditional.x = TRUE.") } } else if(opt$fixed.x == "default") { if(opt$estimator %in% c("MML", "ML") && (opt$mimic == "Mplus" || opt$mimic == "lavaan")) { opt$fixed.x <- TRUE } else if(opt$conditional.x) { opt$fixed.x <- TRUE } else { opt$fixed.x <- FALSE } } else { stop("lavaan ERROR: fixed.x must be TRUE, FALSE or \"default\"\n") } # meanstructure again if(opt$missing == "ml" || opt$model.type == "growth") { opt$meanstructure <- TRUE } if("intercepts" %in% opt$group.equal || "means" %in% opt$group.equal) { opt$meanstructure <- TRUE } if(opt$se == "robust.huber.white" || opt$se == "robust.sem" || opt$test == "satorra.bentler" || opt$test == "mean.var.adjusted" || opt$test == "scaled.shifted" || opt$test == "yuan.bentler") { opt$meanstructure <- TRUE } stopifnot(is.logical(opt$meanstructure)) stopifnot(is.logical(opt$verbose)) stopifnot(is.logical(opt$warn)) if(opt$debug) { opt$verbose <- opt$warn <- TRUE } # zero cell frequencies if(is.character(opt$zero.add) && opt$zero.add == "default") { # default: c(0.5, 0.0) opt$zero.add <- c(0.5, 0.0) # FIXME: TODO: mimic EQS , LISREL (0.0, 0.0) } else if(is.numeric(opt$zero.add)) { if(length(opt$zero.add) == 1L) { opt$zero.add <- c(opt$zero.add, opt$zero.add) } else if(length(opt$zero.add) > 2L) { warning("lavaan WARNING: argument `zero.add' only uses the first two numbers") opt$zero.add <- opt$zero.add[1:2] } } else { stop("lavaan ERROR: argument `zero.add' must be numeric or \"default\"") } if(is.character(opt$zero.keep.margins) && opt$zero.keep.margins == "default") { if(opt$mimic %in% c("lavaan", "Mplus")) { opt$zero.keep.margins <- TRUE } else { opt$zero.keep.margins <- FALSE } } else if(is.logical(opt$zero.keep.margins)) { # nothing to do } else { stop("lavaan ERROR: argument `zero.keep.margins' must be logical or \"default\"") } # parameterization if(opt$parameterization == "default") { # for now, default is always delta opt$parameterization <- "delta" } else if(opt$parameterization %in% c("delta", "theta", "mml")) { # nothing to do } else { stop("lavaan ERROR: argument `parameterization' should be `delta' or `theta'") } if(opt$debug) { cat("lavaan DEBUG: lavaanOptions OUT\n"); str(opt) } opt } lavaan/R/lav_probit.R0000644000176200001440000001574612465075714014231 0ustar liggesusers# ordered probit regression # # YR 21 June 2012 # # why not using MASS::polr? # - it does not deal with binary responses (must use glm.fit instead) # - we need scores # - Newton-Raphson is much faster # - allow for empty X, just to get thresholds (and scores) # - however, we do NOT force thresholds to be strictly positive! # (which is mainly a problem if you allow for 'zero' frequencies, I think) # NOTE: X should NOT contain a column of 1's for the intercept!! # wrapper function lavProbit <- function(y, X=NULL, y.levels=length(tabulate(y)), weights = rep(1, length(y)), offset = rep(0, length(y)), fast=FALSE, method = "nlminb.hessian", control = list(), verbose = FALSE) { # sanity check y.freq <- tabulate(y, nbins=y.levels) if(!missing(y.levels) && y.levels < length(y.freq)) stop("y.levels smaller than number of categories in y") if(y.freq[1L] == 0L) warning("first category of y has zero observations") if(y.freq[y.levels] == 0L) warning("last category of y has zero observations") y.middle <- y.freq[-c(1L, y.levels)] if(length(y.middle) > 0L && any(y.middle == 0L)) stop("zero counts in middle categories; please recode") # initialize ref class lavR <- lavRefProbit$new(y = y, X = X, y.levels=y.levels, weights = weights, offset = offset) # optimize (only if X) if(!is.null(X)) { lavR$optimize(method = method, control = control, verbose = verbose) } else { lavR$theta <- lavR$start() lavR$lik() # initialize all elements (z1,z2,Y1,Y2) } lavR } # lavRefProbit # # classic probit regression lavRefProbit <- setRefClass("lavProbit", # inherits contains = "lavML", # fields fields = list(y = "integer", X = "matrix", nobs = "integer", nexo = "integer", nth = "integer", weights = "numeric", offset = "numeric", missing.values = "logical", missing.idx = "integer", Y1 = "matrix", Y2 = "matrix", th.idx = "integer", slope.idx = "integer", # cache: # this doesn't result in better speed; but makes the code cleaner z1 = "numeric", z2 = "numeric", probits = "numeric", p1 = "numeric", p2 = "numeric"), # methods methods = list( initialize = function(y, X=NULL, y.levels=length(tabulate(y)), weights = rep(1, length(y)), offset = rep(0, length(y))) { # y y <<- as.integer(y); nth <<- as.integer(y.levels - 1L) nobs <<- length(y) # X if(is.null(X)) { nexo <<- 0L } else { X <<- unname(X); nexo <<- ncol(X) } if(any(is.na(y)) || (!is.null(X) && any(is.na(X)) )) { missing.values <<- TRUE missing.idx <<- which(apply(cbind(y, X), 1, function(x) any(is.na(x)))) } else { missing.values <<- FALSE } # weights and offset weights <<- weights; offset <<- offset # TH matrices (TRUE/FALSE) Y1 <<- matrix(1:nth, nobs, nth, byrow=TRUE) == .self$y Y2 <<- matrix(1:nth, nobs, nth, byrow=TRUE) == (.self$y - 1L) # indices of free parameters th.idx <<- 1:nth slope.idx <<- seq_len(nexo) + nth # set up for Optim npar <<- nth + nexo start(); theta <<- theta.start th.lab <- paste("th", seq_len(nth), sep="") sl.lab <- character(0) if(nexo > 0L) sl.lab <- paste("beta",seq_len(nexo),sep="") theta.labels <<- c(th.lab, sl.lab) }, start = function() { if(nth == 1L && nexo > 0L) { th.start <- 0 } else { th.start <- pc_th(freq=tabulate(y, nbins=nth+1L)) # unconditional th's } beta.start <- rep(0, nexo) #Y <- as.numeric(y); range <- 16; Y <- Y*range/nexo #fit.ols <- lavOLS(y=Y, X=X, weights=weights, offset=offset) #beta.start <- fit.ols$theta[fit.ols$slope.idx[-1L]] #print(beta.start) theta.start <<- c( th.start, beta.start ) }, # formulas: see Maddala 1983 pages 48 + 49 lik = function(x) { if(!missing(x)) theta <<- x th <- theta[1:nth]; TH <- c(-Inf, th, +Inf); beta <- theta[-c(1:nth)] if(nexo > 0L) { eta <- drop(X %*% beta) + offset } else { eta <- numeric(nobs) } z1 <<- pmin( 100, TH[y+1L ] - eta) z2 <<- pmax(-100, TH[y+1L-1L] - eta) probits <<- pnorm(z1) - pnorm(z2) probits }, scores = function(x) { if(!missing(x)) lik(x) if(length(probits) == 0L) lik() p1 <<- dnorm(z1); p2 <<- dnorm(z2) # th scores.th <- -1 * (Y2*p2 - Y1*p1) * (weights/probits) # beta scores.beta <- matrix(0, length(p1), 0L) if(nexo > 0L) scores.beta <- -1 * weights*(p1 - p2)/probits * X cbind(scores.th, scores.beta) }, #gradient = function(x) { # if(!missing(x)) objective(x) # p1 <<- dnorm(z1); p2 <<- dnorm(z2) # # # beta # dx.beta <- numeric(0L) # if(nexo > 0L) # dx.beta <- crossprod(X, weights*(p1 - p2)/probits) # # th # dx.th <- crossprod(Y2*p2 - Y1*p1, weights/probits) # # c(dx.th, dx.beta) #}, hessian = function(x) { if(!missing(x)) { lik(x); gradient() } #cat("hessian num = \n"); print(round(numDeriv::hessian(func=.self$objective, x=x),3)) if(length(probits) == 0L) lik(); scores() # not initialized gnorm <- function(x) { -x * dnorm(x) } wtpr <- weights/probits dxa <- Y1*p1 - Y2*p2 # handle missing values -- FIXME!!! better approach? # we could also adapt crossprod, to work pairwise... if(missing.values) { .probits <- probits[-missing.idx] .wtpr <- wtpr[-missing.idx] .dxa <- dxa[-missing.idx,,drop=FALSE] .Y1 <- Y1[-missing.idx,,drop=FALSE] .Y2 <- Y2[-missing.idx,,drop=FALSE] .z1 <- z1[-missing.idx] .z2 <- z2[-missing.idx] .X <- X[-missing.idx,,drop=FALSE] .p1 <- p1[-missing.idx] .p2 <- p2[-missing.idx] } else { .probits <- probits .wtpr <- wtpr .dxa <- dxa .Y1 <- Y1 .Y2 <- Y2 .z1 <- z1 .z2 <- z2 .X <- X .p1 <- p1 .p2 <- p2 } dx2.alpha <- -1 * (crossprod(.dxa, (.dxa * .wtpr / .probits)) - ( crossprod(.Y1 * gnorm(.z1) * .wtpr, .Y1) - crossprod(.Y2 * gnorm(.z2) * .wtpr, .Y2) ) ) # only for empty X if(nexo == 0L) return(dx2.alpha) dxb <- .X*.p1 - .X*.p2 dx2.beta <- -1 * (crossprod(dxb, (dxb * .wtpr / .probits)) - ( crossprod(.X * gnorm(.z1) * .wtpr, .X) - crossprod(.X * gnorm(.z2) * .wtpr, .X) ) ) dx.ab <- crossprod(.dxa, (dxb * .wtpr / .probits)) - ( crossprod(.Y1 * gnorm(.z1) * .wtpr, .X) - crossprod(.Y2 * gnorm(.z2) * .wtpr, .X) ) rbind( cbind(dx2.alpha, dx.ab, deparse.level=0), cbind(t(dx.ab), dx2.beta, deparse.level=0) ) }, minObjective = function(x) { -logl(x) }, minGradient = function(x) { -gradient(x) }, minHessian = function(x) { -hessian(x) } )) lavaan/R/lav_residuals.R0000644000176200001440000003577613053006554014720 0ustar liggesusers#lav_residuals_samplestats <- function(lavobject, h1 = FALSE) { # # # unstandardized residuals # obsList <- lav_object_inspect_sampstat(lavobject, h1 = h1, # add.labels = TRUE) # estList <- # #} setMethod("residuals", "lavaan", function(object, type="raw", labels=TRUE) { # lowercase type type <- tolower(type) # catch type="casewise" if(type %in% c("casewise","case","obs","observations","ov")) { return( lav_residuals_casewise(object, labels = labels) ) } # checks if(type %in% c("normalized", "standardized")) { if(object@Options$estimator != "ML") { stop("standardized and normalized residuals only availabe if estimator = ML (or MLF, MLR, MLM\n") } if(object@optim$npar > 0L && !object@optim$converged) { stop("lavaan ERROR: model dit not converge") } if(object@Model@conditional.x && type == "standardized") { stop("lavaan ERROR: resid + standardized + conditional.x not supported yet") } if(object@Model@conditional.x && type == "normalized") { stop("lavaan ERROR: resid + normalized + conditional.x not supported yet") } } # NOTE: for some reason, Mplus does not compute the normalized/standardized # residuals if estimator = MLM !!! # check type if(!type %in% c("raw", "cor", "cor.bollen", "cor.bentler", "cor.eqs", "normalized", "standardized", "casewise")) { stop("type must be one of \"raw\", \"cor\", \"cor.bollen\", \"cor.bentler\", \"normalized\" or \"standardized\" or \"casewise\"") } # if cor, choose 'default' if(type == "cor") { if(object@Options$mimic == "EQS") { type <- "cor.bentler" } else { type <- "cor.bollen" } } # check for 0 parameters if type == standardized if(type == "standardized" && object@optim$npar == 0) { stop("lavaan ERROR: can not compute standardized residuals if there are no free parameters in the model") } G <- object@Model@nblocks meanstructure <- object@Model@meanstructure ov.names <- object@Data@ov.names # if type == standardized, we need VarCov and Delta if(type == "standardized") { # fixed.x idx? x.idx <- integer(0) if(object@Options$fixed.x) { x.idx <- match(vnames(object@ParTable, "ov.x", block=1L), object@Data@ov.names[[1L]]) ### FIXME!!!! will not ### work for different } ### models in groups if(length(x.idx) > 0L) { # we need to: # 1) to `augment' VarCov and Delta with the fixed.x elements # 2) set cov between free and fixed.x elements in VarCov to zero # create 'augmented' User object (as if fixed.x=FALSE was used) augUser <- object@ParTable idx <- which(augUser$exo > 0L) augUser$exo[ idx ] <- 0L augUser$free[ idx ] <- max(augUser$free) + 1:length(idx) #augUser$unco[idx ] <- max(augUser$unco) + 1:length(idx) augModel <- lav_model(lavpartable = augUser, lavoptions = object@Options) VarCov <- lav_model_vcov(lavmodel = augModel, lavsamplestats = object@SampleStats, lavdata = object@Data, lavpartable = object@ParTable, lavoptions = object@Options) # set cov between free and fixed.x elements to zero ### ### FIXME: should we not do this on the information level, ### *before* we compute VarCov? ### fixed.x.idx <- max(object@ParTable$free) + 1:length(idx) free.idx <- 1:max(object@ParTable$free) VarCov[free.idx, fixed.x.idx] <- 0.0 VarCov[fixed.x.idx, free.idx] <- 0.0 Delta <- computeDelta(lavmodel = augModel) } else { VarCov <- lav_model_vcov(lavmodel = object@Model, lavdata = object@Data, lavpartable = object@ParTable, lavsamplestats = object@SampleStats, lavoptions = object@Options) Delta <- computeDelta(lavmodel = object@Model) } } R <- vector("list", length=G) for(g in 1:G) { # add type R[[g]]$type <- type # sample moments if(!object@SampleStats@missing.flag) { if(object@Model@conditional.x) { S <- object@SampleStats@res.cov[[g]] M <- object@SampleStats@res.int[[g]] } else { S <- object@SampleStats@cov[[g]] M <- object@SampleStats@mean[[g]] } } else { S <- object@SampleStats@missing.h1[[g]]$sigma M <- object@SampleStats@missing.h1[[g]]$mu } if(!meanstructure) { M <- numeric( length(M) ) } nvar <- ncol(S) # residuals (for this block) if(type == "cor.bollen") { if(object@Model@conditional.x) { R[[g]]$cov <- cov2cor(S) - cov2cor(object@implied$res.cov[[g]]) R[[g]]$mean <- ( M/sqrt(diag(S)) - ( object@implied$res.int[[g]] / sqrt(diag(object@implied$res.cov[[g]])) ) ) } else { R[[g]]$cov <- cov2cor(S) - cov2cor(object@implied$cov[[g]]) R[[g]]$mean <- ( M/sqrt(diag(S)) - object@implied$mean[[g]]/sqrt(diag(object@implied$cov[[g]])) ) } } else if(type == "cor.bentler" || type == "cor.eqs") { # Bentler EQS manual: divide by (sqrt of) OBSERVED variances delta <- 1/sqrt(diag(S)) DELTA <- diag(delta, nrow=nvar, ncol=nvar) if(object@Model@conditional.x) { R[[g]]$cov <- DELTA %*% (S - object@implied$res.cov[[g]]) %*% DELTA R[[g]]$mean <- (M - object@implied$res.int[[g]])/sqrt(diag(S)) } else { R[[g]]$cov <- DELTA %*% (S - object@implied$cov[[g]]) %*% DELTA R[[g]]$mean <- (M - object@implied$mean[[g]])/sqrt(diag(S)) } } else { # covariance/raw residuals if(object@Model@conditional.x) { R[[g]]$cov <- S - object@implied$res.cov[[g]] R[[g]]$mean <- M - object@implied$res.int[[g]] } else { R[[g]]$cov <- S - object@implied$cov[[g]] R[[g]]$mean <- M - object@implied$mean[[g]] } } if(labels) { rownames(R[[g]]$cov) <- colnames(R[[g]]$cov) <- ov.names[[g]] } if(object@Model@conditional.x) { R[[g]]$slopes <- ( object@SampleStats@res.slopes[[g]] - object@implied$res.slopes[[g]] ) if(labels) { rownames(R[[g]]$slopes) <- ov.names[[g]] colnames(R[[g]]$slopes) <- object@Data@ov.names.x[[g]] } } if(object@Model@categorical) { if(object@Model@conditional.x) { R[[g]]$th <- object@SampleStats@res.th[[g]] - object@implied$res.th[[g]] } else { R[[g]]$th <- object@SampleStats@th[[g]] - object@implied$th[[g]] } if(length(object@Model@num.idx[[g]]) > 0L) { NUM.idx <- which(object@Model@th.idx[[g]] == 0) R[[g]]$th <- R[[g]]$th[ -NUM.idx ] } if(labels) { names(R[[g]]$th) <- vnames(object@ParTable, type="th", block=g) } } if(type == "normalized" || type == "standardized") { # compute normalized residuals N <- object@SampleStats@nobs[[g]]; nvar <- length(R[[g]]$mean) idx.mean <- 1:nvar if(object@Options$se == "standard" || object@Options$se == "none") { dS <- diag(S) Var.mean <- Var.sample.mean <- dS / N Var.cov <- Var.sample.cov <- (tcrossprod(dS) + S*S) / N # this is identical to solve(A1)/N for complete data!! } else if(object@Options$se == "robust.huber.white" || object@Options$se == "robust.sem") { lavdata <- object@Data lavsamplestats <- object@SampleStats if(lavsamplestats@missing.flag) { if(object@Options$information == "expected") { A1 <- lav_mvnorm_missing_information_expected( Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = lavsamplestats@missing.h1[[g]]$mu, Sigma = lavsamplestats@missing.h1[[g]]$sigma) } else { A1 <- lav_mvnorm_missing_information_observed_samplestats( Yp = lavsamplestats@missing[[g]], Mu = lavsamplestats@missing.h1[[g]]$mu, Sigma = lavsamplestats@missing.h1[[g]]$sigma) } } else { # data complete, under h1, expected == observed A1 <- lav_mvnorm_h1_information_observed_samplestats( sample.cov = lavsamplestats@cov[[g]], sample.cov.inv = lavsamplestats@icov[[g]]) } if(lavsamplestats@missing.flag) { B1 <- lav_mvnorm_missing_information_firstorder( Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = lavsamplestats@missing.h1[[g]]$mu, Sigma = lavsamplestats@missing.h1[[g]]$sigma) } else { B1 <- lav_mvnorm_h1_information_firstorder( Y = lavdata@X[[g]], Gamma = lavsamplestats@NACOV[[g]], sample.cov = lavsamplestats@cov[[g]], sample.cov.inv = lavsamplestats@icov[[g]]) } Info <- (solve(A1) %*% B1 %*% solve(A1)) / N Var.mean <- Var.sample.mean <- diag(Info)[idx.mean] Var.cov <- Var.sample.cov <- lav_matrix_vech_reverse(diag(Info)[-idx.mean]) } else if(object@Options$se == "first.order") { if(lavsamplestats@missing.flag) { B1 <- lav_mvnorm_missing_information_firstorder( Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = lavsamplestats@missing.h1[[g]]$mu, Sigma = lavsamplestats@missing.h1[[g]]$sigma) } else { B1 <- lav_mvnorm_h1_information_firstorder( Y = lavdata@X[[g]], Gamma = lavsamplestats@NACOV[[g]], sample.cov = lavsamplestats@cov[[g]], sample.cov.inv = lavsamplestats@icov[[g]]) } Info <- solve(B1) / N Var.mean <- Var.sample.mean <- diag(Info)[idx.mean] Var.cov <- Var.sample.cov <- lav_matrix_vech_reverse(diag(Info)[-idx.mean]) } } if(type == "standardized") { Var.model <- diag(Delta[[g]] %*% VarCov %*% t(Delta[[g]])) if(meanstructure) { Var.model.mean <- Var.model[idx.mean] Var.model.cov <- lav_matrix_vech_reverse(Var.model[-idx.mean]) } else { Var.model.mean <- rep(0, nvar) Var.model.cov <- lav_matrix_vech_reverse(Var.model) } Var.mean <- (Var.sample.mean - Var.model.mean) Var.cov <- (Var.sample.cov - Var.model.cov ) # not for fixed x covariates if(length(x.idx) > 0L) { Var.mean[x.idx] <- 1.0 Var.cov[x.idx,x.idx] <- 1.0 } # avoid negative variances Var.mean[which(Var.mean < 0)] <- NA Var.cov[ which(Var.cov < 0)] <- NA } # normalize/standardize if(type == "normalized" || type == "standardized") { # avoid small number (< 1.0e-15) to be divided # by another small number and get bigger... # FIXME!!! tol <- 1.0e-5 R[[g]]$mean[ which(abs(R[[g]]$mean) < tol)] <- 0.0 R[[g]]$cov[ which(abs(R[[g]]$cov) < tol)] <- 0.0 R[[g]]$mean <- R[[g]]$mean / sqrt( Var.mean ) R[[g]]$cov <- R[[g]]$cov / sqrt( Var.cov ) } # prepare for pretty printing R[[g]]$mean <- as.numeric(R[[g]]$mean) if(labels) names(R[[g]]$mean) <- ov.names[[g]] class(R[[g]]$mean) <- c("lavaan.vector", "numeric") class(R[[g]]$cov) <- c("lavaan.matrix.symmetric", "matrix") if(object@Model@conditional.x) { class(R[[g]]$slopes) <- c("lavaan.matrix", "matrix") } } # replace 'cov' by 'cor' if type == "cor" if(type %in% c("cor","cor.bollen","cor.eqs","cor.bentler")) { if("th" %in% names(R[[1]])) { R <- lapply(R, "names<-", c("type", "cor", "mean", "th") ) } else { R <- lapply(R, "names<-", c("type", "cor", "mean") ) } } if(G == 1) { R <- R[[1]] } else { names(R) <- unlist(object@Data@block.label) } R }) setMethod("resid", "lavaan", function(object, type="raw") { residuals(object, type=type) }) lav_residuals_casewise <- function(object, labels = labels) { # check if we have full data if(object@Data@data.type != "full") { stop("lavaan ERROR: casewise residuals not available if sample statistics were used for fitting the model") } G <- object@Data@ngroups ov.names <- object@Data@ov.names X <- object@Data@X if(object@Model@categorical) { # add 'eXo' columns to X X <- lapply(seq_len(object@Data@ngroups), function(g) { ret <- cbind(X[[g]], object@Data@eXo[[g]]) ret }) } M <- lav_predict_yhat(object) # Note: if M has already class lavaan.matrix, print goes crazy # with Error: C stack usage is too close to the limit OUT <- lapply(seq_len(G), function(x) { out <- X[[x]] - M[[x]] class(out) <- c("lavaan.matrix", "matrix") out }) if(labels) { for(g in 1:G) { colnames(OUT[[g]]) <- object@pta$vnames$ov[[g]] } } if(G == 1) { OUT <- OUT[[1]] } else { names(OUT) <- unlist(object@Data@group.label) } OUT } lavaan/R/lav_samplestats_step2.R0000644000176200001440000000671313046367231016373 0ustar liggesuserslav_samplestats_step2 <- function(UNI = NULL, ov.names = NULL, # error message only # polychoric and empty cells zero.add = c(0.5, 0.0), zero.keep.margins = TRUE, zero.cell.warn = FALSE, # keep track of tables with zero cells? zero.cell.tables = TRUE, optim.method = "nlminb") { nvar <- length(UNI) COR <- diag(nvar) if(zero.cell.tables) { zero.var1 <- character(0L) zero.var2 <- character(0L) } # one-by-one (for now) for(j in seq_len(nvar-1L)) { for(i in (j+1L):nvar) { #if(verbose) { cat(" i = ", i, " j = ", j, # "[",ov.names[i], "-", ov.names[j], "] ", # "(",ov.types[i], "-", ov.types[j], ")\n") } #pstar.idx <- PSTAR[i,j] #COR.NAMES[pstar.idx] <- paste(ov.names[i],"~~",ov.names[j],sep="") if(class(UNI[[i]]) == "lavOLS" && class(UNI[[j]]) == "lavOLS") { if(UNI[[i]]$nexo > 0L) { Y1 <- UNI[[i]]$y - UNI[[i]]$yhat Y2 <- UNI[[j]]$y - UNI[[j]]$yhat } else { Y1 <- UNI[[i]]$y; Y2 <- UNI[[j]]$y } COR[i,j] <- COR[j,i] <- cor(Y1, Y2, use="pairwise.complete.obs") } else if(class(UNI[[i]]) == "lavOLS" && class(UNI[[j]]) == "lavProbit") { # polyserial out <- ps_cor_TS(fit.y1=UNI[[i]], fit.y2=UNI[[j]]) COR[i,j] <- COR[j,i] <- out } else if(class(UNI[[j]]) == "lavOLS" && class(UNI[[i]]) == "lavProbit") { # polyserial out <- ps_cor_TS(fit.y1=UNI[[j]], fit.y2=UNI[[i]]) COR[i,j] <- COR[j,i] <- out } else if(class(UNI[[i]]) == "lavProbit" && class(UNI[[j]]) == "lavProbit") { # polychoric correlation out <- pc_cor_TS(fit.y1=UNI[[i]], fit.y2=UNI[[j]], method = optim.method, zero.add = zero.add, zero.keep.margins = zero.keep.margins, zero.cell.warn = zero.cell.warn, zero.cell.flag = zero.cell.tables, Y1.name = ov.names[i], Y2.name = ov.names[j]) if(zero.cell.tables) { if(attr(out, "zero.cell.flag")) { zero.var1 <- c(zero.var1, ov.names[j]) zero.var2 <- c(zero.var2, ov.names[i]) } attr(out, "zero.cell.flag") <- NULL } COR[i,j] <- COR[j,i] <- out } # check for near 1.0 correlations if(abs(COR[i,j]) > 0.99) { warning("lavaan WARNING: correlation between variables ", ov.names[i], " and ", ov.names[j], " is (nearly) 1.0") } } } # keep track of tables with zero cells if(zero.cell.tables) { zero.cell.tables <- cbind(zero.var1, zero.var2) attr(COR, "zero.cell.tables") <- zero.cell.tables } COR } lavaan/R/lav_model_implied.R0000644000176200001440000000252213052775615015521 0ustar liggesusers# compute model implied statistics lav_model_implied <- function(lavmodel = NULL) { stopifnot(inherits(lavmodel, "lavModel")) # model-implied variance/covariance matrix ('sigma hat') Sigma.hat <- computeSigmaHat(lavmodel = lavmodel) # model-implied mean structure ('mu hat') Mu.hat <- computeMuHat(lavmodel = lavmodel) # if conditional.x, slopes if(lavmodel@conditional.x) { SLOPES <- computePI(lavmodel = lavmodel) } else { SLOPES <- vector("list", length = lavmodel@nblocks) } # if categorical, model-implied thresholds if(lavmodel@categorical) { TH <- computeTH(lavmodel = lavmodel) } else { TH <- vector("list", length = lavmodel@nblocks) } if(lavmodel@group.w.free) { w.idx <- which(names(lavmodel@GLIST) == "gw") GW <- unname(lavmodel@GLIST[ w.idx ]) GW <- lapply(GW, as.numeric) } else { GW <- vector("list", length = lavmodel@nblocks) } # FIXME: should we use 'res.cov', 'res.int', 'res.th' if conditionl.x?? # Yes, since 0.5-22 if(lavmodel@conditional.x) { implied <- list(res.cov = Sigma.hat, res.int = Mu.hat, res.slopes = SLOPES, res.th = TH, group.w = GW) } else { implied <- list(cov = Sigma.hat, mean = Mu.hat, slopes = SLOPES, th = TH, group.w = GW) } implied } lavaan/R/ctr_pml_plrt_nested.R0000644000176200001440000005244612743730447016131 0ustar liggesusers# All code below is written by Myrsini Katsikatsou (Feb 2015) #The following function refers to PLRT for nested models and equality constraints. # Namely, it is developed to test either of the following hypotheses: # a) H0 states that some parameters are equal to 0 # b) H0 states that some parameters are equal to some others. #Note that for the latter I haven't checked if it is ok when equality constraints #are imposed on parameters that refer to different groups in a multi-group #analysis. All the code below has been developed for a single-group analysis. # Let fit_objH0 and fit_objH1 be the outputs of lavaan() function when we fit # a model under the null hypothesis and under the alternative, respectively. # The argument equalConstr is logical (T/F) and it is TRUE if equality constraints # are imposed on subsets of the parameters. # The main idea of the code below is that we consider the parameter vector # under the alternative H1 evaluated at the values derived under H0 and for these # values we should evaluate the Hessian, the variability matrix (denoted by J) # and Godambe matrix. ctr_pml_plrt_nested <- function(fit_objH0, fit_objH1) { # sanity check, perhaps we misordered H0 and H1 in the function call?? if(fit_objH1@test[[1]]$df > fit_objH0@test[[1]]$df) { tmp <- fit_objH0 fit_objH0 <- fit_objH1 fit_objH1 <- tmp } # check if we have equality constraints if(fit_objH0@Model@eq.constraints) { equalConstr = TRUE } else { equalConstr = FALSE } nsize <- fit_objH0@SampleStats@ntotal PLRT <- 2 * (fit_objH1@optim$logl - fit_objH0@optim$logl) # create a new object 'objH1_h0': the object 'H1', but where # the parameter values are from H0 objH1_h0 <- lav_test_diff_m10(m1 = fit_objH1, m0 = fit_objH0, test = FALSE) # EqMat EqMat <- lav_test_diff_A(m1 = fit_objH1, m0 = fit_objH0) # Observed information (= for PML, this is Hessian / N) Hes.theta0 <- lavTech(objH1_h0, "information.observed") # handle possible constraints in H1 (and therefore also in objH1_h0) Inv.Hes.theta0 <- lav_model_information_augment_invert(lavmodel = objH1_h0@Model, information = Hes.theta0, inverted = TRUE) # the estimated variability matrix is given (=unit information first order) J.theta0 <- lavTech(objH1_h0, "first.order") # the Inverse of the G matrix Inv.G <- Inv.Hes.theta0 %*% J.theta0 %*% Inv.Hes.theta0 MInvGtM <- EqMat %*% Inv.G %*% t(EqMat) MinvHtM <- EqMat %*% Inv.Hes.theta0 %*% t(EqMat) Inv_MinvHtM <- solve(MinvHtM) tmp.prod <- MInvGtM %*% Inv_MinvHtM tmp.prod2 <- tmp.prod %*% tmp.prod sum.eig <- sum(diag(tmp.prod)) sum.eigsq <- sum(diag(tmp.prod2)) FSMA.PLRT <- (sum.eig/sum.eigsq) * PLRT adj.df <- (sum.eig*sum.eig)/sum.eigsq pvalue <- 1 - pchisq(FSMA.PLRT, df = adj.df) list(FSMA.PLRT = FSMA.PLRT, adj.df = adj.df, pvalue = pvalue) } # for testing: this is the 'original' (using m.el.idx and x.el.idx) ctr_pml_plrt_nested2 <- function (fit_objH0, fit_objH1) { if (fit_objH1@test[[1]]$df > fit_objH0@test[[1]]$df) { tmp <- fit_objH0 fit_objH0 <- fit_objH1 fit_objH1 <- tmp } if (fit_objH0@Model@eq.constraints) { equalConstr = TRUE } else { equalConstr = FALSE } nsize <- fit_objH0@Data@nobs[[1]] PLRT <- 2 * nsize * (fit_objH0@optim$fx - fit_objH1@optim$fx) Npar <- fit_objH1@optim$npar MY.m.el.idx2 <- fit_objH1@Model@m.free.idx MY.x.el.idx2 <- fit_objH1@Model@x.free.idx MY.m.el.idx <- MY.m.el.idx2 MY.x.el.idx <- MY.x.el.idx2 #MY.m.el.idx2 <- fit_objH1@Model@m.free.idx # MY.m.el.idx2 gives the POSITION index of the free parameters within each # parameter matrix under H1 model. # The index numbering restarts from 1 when we move to a new parameter matrix. # Within each matrix the index numbering "moves" columnwise. #MY.x.el.idx2 <- fit_objH1@Model@x.free.idx # MY.x.el.idx2 ENUMERATES the free parameters within each parameter matrix. # The numbering continues as we move from one parameter matrix to the next one. # In the case of the symmetric matrices, Theta and Psi,in some functions below # we need to give as input MY.m.el.idx2 and MY.x.el.idx2 after # we have eliminated the information about the redundant parameters # (those placed above the main diagonal). # That's why I do the following: #MY.m.el.idx <- MY.m.el.idx2 #MY.x.el.idx <- MY.x.el.idx2 # Psi, the variance - covariance matrix of factors #if( length(MY.x.el.idx2[[3]])!=0 & any(table(MY.x.el.idx2[[3]])>1)) { # nfac <- ncol(fit_objH1@Model@GLIST$lambda) #number of factors # tmp <- matrix(c(1:(nfac^2)), nrow= nfac, ncol= nfac ) # tmp_keep <- tmp[lower.tri(tmp, diag=TRUE)] # MY.m.el.idx[[3]] <- MY.m.el.idx[[3]][MY.m.el.idx[[3]] %in% tmp_keep] # MY.x.el.idx[[3]] <- unique( MY.x.el.idx2[[3]] ) #} #for Theta, the variance-covariance matrix of measurement errors # if( length(MY.x.el.idx2[[2]])!=0 & any(table(MY.x.el.idx2[[2]])>1)) { # nvar <- fit_objH1@Model@nvar #number of indicators # tmp <- matrix(c(1:(nvar^2)), nrow= nvar, ncol= nvar ) # tmp_keep <- tmp[lower.tri(tmp, diag=TRUE)] # MY.m.el.idx[[2]] <- MY.m.el.idx[[2]][MY.m.el.idx[[2]] %in% tmp_keep] # MY.x.el.idx[[2]] <- unique( MY.x.el.idx2[[2]] ) # } #below the commands to find the row-column indices of the Hessian that correspond to #the parameters to be tested equal to 0 #tmp.ind contains these indices # MY.m.el.idx2.H0 <- fit_objH0@Model@m.free.idx # tmp.ind <- c() # for(i in 1:6) { # tmp.ind <- c(tmp.ind , # MY.x.el.idx2[[i]] [!(MY.m.el.idx2[[i]] %in% # MY.m.el.idx2.H0[[i]] ) ] ) # } # next line added by YR # tmp.ind <- unique(tmp.ind) # YR: use partable to find which parameters are restricted in H0 # (this should work in multiple groups too) #h0.par.idx <- which( PT.H1.extended$free[PT.H1.extended$user < 2] > 0 & # !(PT.H0.extended$free[PT.H0.extended$user < 2] > 0) ) #tmp.ind <- PT.H1.extended$free[ h0.par.idx ] #print(tmp.ind) if (length(MY.x.el.idx2[[3]]) != 0 & any(table(MY.x.el.idx2[[3]]) > 1)) { nfac <- ncol(fit_objH1@Model@GLIST$lambda) tmp <- matrix(c(1:(nfac*nfac)), nrow = nfac, ncol = nfac) tmp_keep <- tmp[lower.tri(tmp, diag = TRUE)] MY.m.el.idx[[3]] <- MY.m.el.idx[[3]][MY.m.el.idx[[3]] %in% tmp_keep] MY.x.el.idx[[3]] <- unique(MY.x.el.idx2[[3]]) } if (length(MY.x.el.idx2[[2]]) != 0 & any(table(MY.x.el.idx2[[2]]) > 1)) { nvar <- fit_objH1@Model@nvar tmp <- matrix(c(1:(nvar*nvar)), nrow = nvar, ncol = nvar) tmp_keep <- tmp[lower.tri(tmp, diag = TRUE)] MY.m.el.idx[[2]] <- MY.m.el.idx[[2]][MY.m.el.idx[[2]] %in% tmp_keep] MY.x.el.idx[[2]] <- unique(MY.x.el.idx2[[2]]) } MY.m.el.idx2.H0 <- fit_objH0@Model@m.free.idx tmp.ind <- c() for (i in 1:6) { tmp.ind <- c(tmp.ind, MY.x.el.idx2[[i]][!(MY.m.el.idx2[[i]] %in% MY.m.el.idx2.H0[[i]])]) } tmp.ind <- unique(tmp.ind) # if the models are nested because of equality constraints among the parameters, we need # to construct the matrix of derivatives of function g(theta) with respect to theta # where g(theta) is the function that represents the equality constraints. g(theta) is # an rx1 vector where r are the equality constraints. In the null hypothesis # we test H0: g(theta)=0. The matrix of derivatives is of dimension: # nrows= number of free non-redundant parameters under H0, namely # NparH0 <- fit_objH0[[1]]@optim$npar , and ncols= number of free non-redundant # parameters under H1, namely NparH1 <- fit_objH0[[1]]@optim$npar. # The matrix of derivatives of g(theta) is composed of 0's, 1's, -1's, and # in the rows that refer to odd number of parameters that are equal there is one -2. # The 1's, -1's (and possibly -2) are the contrast coefficients of the parameters. # The sum of the rows should be equal to 0. #if(equalConstr==TRUE) { # EqMat <- fit_objH0@Model@ceq.JAC #} else { # no.par0 <- length(tmp.ind) # tmp.ind2 <- cbind(1:no.par0, tmp.ind) # EqMat <- matrix(0, nrow = no.par0, ncol = Npar) # EqMat[tmp.ind2] <- 1 # } if (equalConstr == TRUE) { EqMat <- fit_objH0@Model@ceq.JAC } else { no.par0 <- length(tmp.ind) tmp.ind2 <- cbind(1:no.par0, tmp.ind ) EqMat <- matrix(0, nrow=no.par0, ncol=Npar) EqMat[tmp.ind2] <- 1 } obj <- fit_objH0 # Compute the sum of the eigenvalues and the sum of the squared eigenvalues # so that the adjustment to PLRT can be applied. # Here a couple of functions (e.g. MYgetHessian) which are modifications of # lavaan functions (e.g. getHessian) are needed. These are defined in the end of the file. #the quantity below follows the same logic as getHessian of lavaan 0.5-18 #and it actually gives N*Hessian. That's why the command following the command below. # NHes.theta0 <- MYgetHessian (object = obj@Model, # samplestats = obj@SampleStats , # X = obj@Data@X , # estimator = "PML", # lavcache = obj@Cache, # MY.m.el.idx = MY.m.el.idx, # MY.x.el.idx = MY.x.el.idx, # MY.m.el.idx2 = MY.m.el.idx2, # input for MYx2GLIST # MY.x.el.idx2 = MY.x.el.idx2, # input for MYx2GLIST # Npar = Npar, # equalConstr=equalConstr) NHes.theta0 <- MYgetHessian(object = obj@Model, samplestats = obj@SampleStats, X = obj@Data@X, estimator = "PML", lavcache = obj@Cache, MY.m.el.idx = MY.m.el.idx, MY.x.el.idx = MY.x.el.idx, MY.m.el.idx2 = MY.m.el.idx2, MY.x.el.idx2 = MY.x.el.idx2, Npar = Npar, equalConstr = equalConstr) Hes.theta0 <- NHes.theta0/nsize Inv.Hes.theta0 <- solve(Hes.theta0) NJ.theta0 <- MYgetVariability(object = obj, MY.m.el.idx = MY.m.el.idx, MY.x.el.idx = MY.x.el.idx, equalConstr = equalConstr) J.theta0 <- NJ.theta0/(nsize*nsize) Inv.G <- Inv.Hes.theta0 %*% J.theta0 %*% Inv.Hes.theta0 MInvGtM <- EqMat %*% Inv.G %*% t(EqMat) MinvHtM <- EqMat %*% Inv.Hes.theta0 %*% t(EqMat) Inv_MinvHtM <- solve(MinvHtM) #!!! change names tmp.prod <- MInvGtM %*% Inv_MinvHtM #!!! change names tmp.prod2 <- tmp.prod %*% tmp.prod sum.eig <- sum(diag(tmp.prod)) sum.eigsq <- sum(diag(tmp.prod2)) FSMA.PLRT <- (sum.eig/sum.eigsq) * PLRT adj.df <- (sum.eig*sum.eig)/sum.eigsq pvalue <- 1 - pchisq(FSMA.PLRT, df = adj.df) list(FSMA.PLRT = FSMA.PLRT, adj.df = adj.df, pvalue = pvalue) } ################################################################################### # auxiliary functions used above, they are all copy from the corresponding functions # of lavaan where parts no needed were deleted and some parts were modified. # I mark the modifications with comments. # library(lavaan) # To run an example for the functions below the following input is needed. # obj <- fit.objH0[[i]] # object <- obj@Model # samplestats = obj@SampleStats # X = obj@Data@X # estimator = "PML" # lavcache = obj@Cache # MY.m.el.idx = MY.m.el.idx # MY.x.el.idx = MY.x.el.idx # MY.m.el.idx2 = MY.m.el.idx2 # input for MYx2GLIST # MY.x.el.idx2 = MY.x.el.idx2 # input for MYx2GLIST # Npar = Npar # equalConstr =TRUE MYgetHessian <- function (object, samplestats , X , estimator = "PML", lavcache, MY.m.el.idx, MY.x.el.idx, MY.m.el.idx2, MY.x.el.idx2, # input for MYx2GLIST Npar, #Npar is the number of parameters under H1 equalConstr ) { # takes TRUE/ FALSE if(equalConstr){ #!!! added line } Hessian <- matrix(0, Npar, Npar) # #!!!! MYfunction below x <- MYgetModelParameters(object=object, GLIST = NULL, N=Npar, #N the number of parameters to consider MY.m.el.idx=MY.m.el.idx, MY.x.el.idx= MY.x.el.idx) for (j in 1:Npar) { h.j <- 1e-05 x.left <- x.left2 <- x.right <- x.right2 <- x x.left[j] <- x[j] - h.j x.left2[j] <- x[j] - 2 * h.j x.right[j] <- x[j] + h.j x.right2[j] <- x[j] + 2 * h.j #!!!! MYfunction below : MYcomputeGradient and MYx2GLIST g.left <- MYcomputeGradient(object=object, GLIST = MYx2GLIST(object=object, x = x.left, MY.m.el.idx=MY.m.el.idx2, MY.x.el.idx= MY.x.el.idx2), samplestats = samplestats, X = X, lavcache = lavcache, estimator = "PML", MY.m.el.idx=MY.m.el.idx, MY.x.el.idx= MY.x.el.idx, equalConstr = equalConstr ) g.left2 <- MYcomputeGradient(object=object, GLIST = MYx2GLIST(object=object, x = x.left2, MY.m.el.idx=MY.m.el.idx2, MY.x.el.idx= MY.x.el.idx2), samplestats = samplestats, X = X, lavcache = lavcache, estimator = "PML", MY.m.el.idx=MY.m.el.idx, MY.x.el.idx= MY.x.el.idx, equalConstr = equalConstr ) g.right <- MYcomputeGradient(object=object, GLIST = MYx2GLIST(object=object, x = x.right, MY.m.el.idx=MY.m.el.idx2, MY.x.el.idx= MY.x.el.idx2), samplestats = samplestats, X = X, lavcache = lavcache, estimator = "PML", MY.m.el.idx=MY.m.el.idx, MY.x.el.idx= MY.x.el.idx, equalConstr = equalConstr ) g.right2 <- MYcomputeGradient(object=object, GLIST = MYx2GLIST(object=object, x = x.right2, MY.m.el.idx=MY.m.el.idx2, MY.x.el.idx= MY.x.el.idx2), samplestats = samplestats, X = X, lavcache = lavcache, estimator = "PML", MY.m.el.idx=MY.m.el.idx, MY.x.el.idx= MY.x.el.idx, equalConstr = equalConstr ) Hessian[, j] <- (g.left2 - 8 * g.left + 8 * g.right - g.right2)/(12 * h.j) } Hessian <- (Hessian + t(Hessian))/2 #(-1) * Hessian Hessian } ############################################################################# ################################## MYgetModelParameters #different input arguments: MY.m.el.idx, MY.x.el.idx MYgetModelParameters <- function (object, GLIST = NULL, N, #N the number of parameters to consider MY.m.el.idx, MY.x.el.idx) { if (is.null(GLIST)) { GLIST <- object@GLIST } x <- numeric(N) for (mm in 1:length(object@GLIST)) { # mm<-1 m.idx <- MY.m.el.idx[[mm]] #!!!!! different here and below x.idx <- MY.x.el.idx[[mm]] x[x.idx] <- GLIST[[mm]][m.idx] } x } ############################################################################# ############################# MYcomputeGradient #the difference are the input arguments MY.m.el.idx, MY.x.el.idx #used in lavaan:::computeDelta MYcomputeGradient <- function (object, GLIST, samplestats = NULL, X = NULL, lavcache = NULL, estimator = "PML", MY.m.el.idx, MY.x.el.idx, equalConstr ) { if(equalConstr){ #added line } num.idx <- object@num.idx th.idx <- object@th.idx if (is.null(GLIST)) { GLIST <- object@GLIST } Sigma.hat <- computeSigmaHat(object, GLIST = GLIST, extra = (estimator == "ML")) TH <- computeTH(object, GLIST = GLIST) g<-1 d1 <- pml_deriv1(Sigma.hat = Sigma.hat[[g]], TH = TH[[g]], th.idx = th.idx[[g]], num.idx = num.idx[[g]], X = X[[g]], lavcache = lavcache[[g]]) #!? if(equalConstr) { #delete the following three commented lines, wrong # Delta <- lavaan:::computeDelta (lavmodel= object, GLIST. = GLIST) # } else { Delta <- computeDelta (lavmodel= object, GLIST. = GLIST, m.el.idx. = MY.m.el.idx , x.el.idx. = MY.x.el.idx) # } #!!!!! that was before: as.numeric(t(d1) %*% Delta[[g]])/samplestats@nobs[[g]] as.numeric(t(d1) %*% Delta[[g]]) #!!! modified to follow current computeGradient() function of lavaan #!!! which gives minus the gradient of PL-loglik } ############################################################################### ################################## MYx2GLIST #difference in input arguments MY.m.el.idx, MY.x.el.idx MYx2GLIST <- function (object, x = NULL, MY.m.el.idx, MY.x.el.idx) { GLIST <- object@GLIST for (mm in 1:length(GLIST)) { m.el.idx <- MY.m.el.idx[[mm]] x.el.idx <- MY.x.el.idx[[mm]] GLIST[[mm]][m.el.idx] <- x[x.el.idx] } GLIST } ############################################################################ #####MYgetVariability function #difference from corresponding of lavaan: I use MYNvcov.first.order MYgetVariability <- function (object, MY.m.el.idx, MY.x.el.idx, equalConstr ) { NACOV <- MYNvcov.first.order(lavmodel=object@Model, lavsamplestats = object@SampleStats, lavdata = object@Data, estimator = "PML", MY.m.el.idx=MY.m.el.idx, MY.x.el.idx= MY.x.el.idx, equalConstr = equalConstr) if(equalConstr){ #added lines } B0 <- attr(NACOV, "B0") #!!!! Note below that I don't multiply with nsize #!!! so what I get is J matrix divided by n #if (object@Options$estimator == "PML") { # B0 <- B0 * object@SampleStats@ntotal #} #!!!!!!!!!!!!!!!!!!! added the following lines so that the output of #!!!!! MYgetVariability is in line with that of lavaan 0.5-18 getVariability #!! what's the purpose of the following lines? if (object@Options$estimator == "PML") { B0 <- B0 * object@SampleStats@ntotal } B0 } ############################################################################## # example # obj <- fit.objH0[[i]] # object <- obj@Model # samplestats = obj@SampleStats # X = obj@Data@X # estimator = "PML" # lavcache = obj@Cache # MY.m.el.idx = MY.m.el.idx # MY.x.el.idx = MY.x.el.idx # MY.m.el.idx2 = MY.m.el.idx2 # input for MYx2GLIST # MY.x.el.idx2 = MY.x.el.idx2 # input for MYx2GLIST # Npar = Npar # equalConstr =TRUE MYNvcov.first.order <- function (lavmodel, lavsamplestats = NULL, lavdata = NULL, lavcache = NULL, estimator = "PML", MY.m.el.idx, MY.x.el.idx, equalConstr ) { #equalConstr takes TRUE/FALSE if(equalConstr){ #added lines } B0.group <- vector("list", lavsamplestats@ngroups) #in my case list of length 1 #!? if (equalConstr) { ###the following three lines are commented because they are wrong # Delta <- lavaan:::computeDelta(lavmodel, GLIST. = NULL) # } else { Delta <- computeDelta(lavmodel, GLIST. = NULL, m.el.idx. = MY.m.el.idx,#!!!!! different here and below x.el.idx. = MY.x.el.idx) # } Sigma.hat <- computeSigmaHat(lavmodel) TH <- computeTH(lavmodel) g <-1 SC <- pml_deriv1(Sigma.hat = Sigma.hat[[g]], TH = TH[[g]], th.idx = lavmodel@th.idx[[g]], num.idx = lavmodel@num.idx[[g]], X = lavdata@X[[g]], lavcache = lavcache, scores = TRUE, negative = FALSE) group.SC <- SC %*% Delta[[g]] B0.group[[g]] <- crossprod(group.SC) #!!!! B0.group[[g]] <- B0.group[[g]]/lavsamplestats@ntotal !!! skip so that the result # is in line with the 0.5-18 version of lavaan B0 <- B0.group[[1]] E <- B0 eigvals <- eigen(E, symmetric = TRUE, only.values = TRUE)$values if (any(eigvals < -1 * .Machine$double.eps^(3/4))) { warning("lavaan WARNING: matrix based on first order outer product of the derivatives is not positive definite; the standard errors may not be thrustworthy") } NVarCov <- MASS::ginv(E) attr(NVarCov, "B0") <- B0 attr(NVarCov, "B0.group") <- B0.group NVarCov } lavaan/R/lav_ols.R0000644000176200001440000001071412631360621013502 0ustar liggesusers# simple wrapper around lm.fit to get scores() # # YR 25 June 2012 # # NOTES: - X should NOT already contain a column of 1's for the intercept! # - weights not used yet # wrapper function lavOLS <- function(y, X = NULL, method = "none", start.values = NULL, control = list(), verbose = FALSE) { # initialize lavR <- lavRefOLS$new(y = y, X = X) # optimize if(method != "none") { lavR$optimize(method = method, control = control, verbose = verbose, start.values = start.values) } else { lavR$lik() # initialize } lavR } # lavRefOLS # # classic univariate OLS regression lavRefOLS <- setRefClass("lavOLS", # inherits contains = "lavML", # fields fields = list(X = "matrix", nexo = "integer", # housekeeping int.idx = "integer", slope.idx = "integer", var.idx = "integer", missing.values = "logical", missing.idx = "integer", # internal yhat = "numeric"), # methods methods = list( initialize = function(y, X = NULL, ...) { # y y <<- y; nobs <<- length(y) # X if(!is.null(X)) { nexo <<- ncol(X) X <<- matrix(c(rep.int(1,nobs),X),nobs,nexo+1L) } else { nexo <<- 0L X <<- matrix(1, nobs, 1L) } if(any(is.na(y)) || (!is.null(X) && any(is.na(X)) )) { missing.values <<- TRUE missing.idx <<- which(apply(cbind(y, X), 1, function(x) any(is.na(x)))) } else { missing.values <<- FALSE } # indices of free parameters int.idx <<- 1L slope.idx <<- seq_len(nexo) + 1L var.idx <<- 1L + nexo + 1L # set up for Optim npar <<- 1L + nexo + 1L # intercept + slopes + var start(); theta <<- theta.start if(nexo > 0L) sl.lab <- paste("beta",seq_len(nexo),sep="") else sl.lab <- character(0) theta.labels <<- c("int", sl.lab, "var.e") }, start = function() { if(nexo > 0L) { if(length(missing.idx) > 0L) { fit.lm <- lm.fit(y=y[-missing.idx], x=X[-missing.idx,,drop=FALSE]) } else { fit.lm <- lm.fit(y=y, x=X) } #fit.lm <- lm.wfit(y=y, x=X, w=weights) beta.start <- fit.lm$coef var.start <- crossprod(fit.lm$residual)/nobs } else { beta.start <- mean(y, na.rm=TRUE) var.start <- var(y, na.rm=TRUE)*(nobs-1)/nobs # ML } theta.start <<- c( beta.start, var.start ) }, lik = function(x) { if(!missing(x)) theta <<- x beta <- theta[-npar] # not the variance e.var <- theta[npar] # the variance of the error if(nexo > 0L) yhat <<- drop(X %*% beta) else yhat <<- rep(beta[1L], nobs) #weights * dnorm(y, mean=yhat, sd=sqrt(e.var)) dnorm(y, mean=yhat, sd=sqrt(e.var)) }, #gradient = function(x) { # if(!missing(x)) logl(x) # e.var <- theta[npar] # # # beta # if(nexo > 0L) { # dx.beta <- 1/e.var * crossprod(X, y - yhat) # } else { # dx.beta <- 1/e.var * sum(y - yhat) # } # # var # dx.var <- -nobs/(2*e.var) + 1/(2*e.var^2) * crossprod(y - yhat) # # c(dx.beta, dx.var) #}, scores = function(x) { if(!missing(x)) lik(x) e.var <- theta[npar] if(length(yhat) == 0L) lik() # not initialized yet # beta scores.beta <- 1/e.var * X * (y - yhat) # var scores.var <- -1/(2*e.var) + 1/(2*e.var*e.var) * (y - yhat)*(y - yhat) cbind(scores.beta, scores.var, deparse.level=0) }, hessian = function(x) { if(!missing(x)) { lik(x); gradient() } #cat("hessian num = \n"); print(round(numDeriv::hessian(func=.self$logl, x=x),3)) e.var <- theta[npar] # beta - beta if(nexo > 0L) { dx2.beta <- -1/e.var * crossprod(X) } else { dx2.beta <- -1/e.var * nobs } # beta - var if(nexo > 0L) { dx.beta.var <- -1/(e.var*e.var) * crossprod(X, y-yhat) } else { dx.beta.var <- -1/(e.var*e.var) * sum(y-yhat) } # var - var sq.e.var <- sqrt(e.var) sq.e.var6 <- sq.e.var*sq.e.var*sq.e.var*sq.e.var*sq.e.var*sq.e.var #dx2.var <- nobs/(2*e.var*e.var) - 1/sq.e.var6 * crossprod(y-yhat) dx2.var <- nobs/(2*e.var*e.var) - 1/sq.e.var6 * (e.var * nobs) rbind( cbind( dx2.beta, dx.beta.var, deparse.level=0), cbind(t(dx.beta.var), dx2.var, deparse.level=0), deparse.level=0 ) }, minObjective = function(x) { -logl(x) }, minGradient = function(x) { -gradient(x) }, minHessian = function(x) { -hessian(x) } )) lavaan/R/01RefClass_00lavRefModel.R0000644000176200001440000000121013042163534016316 0ustar liggesusers# generic statistical model -- YR 10 july 2012 # super class -- virtual statistical model lavRefModel <- setRefClass("lavRefModel", # fields fields = list( npar = "integer", # number of free model parameters theta = "numeric", # the model parameters (free only) theta.labels = "character" # parameter names (if any) ), # methods methods = list( show = function(header=TRUE) { if(header) cat(class(.self), "model parameters (theta):\n") out <- theta # avoid changing theta by giving names if(length(theta.labels) > 0L) names(out) <- theta.labels print(out) } )) lavaan/R/lav_polyserial.R0000644000176200001440000001372112505526140015071 0ustar liggesusers# polyserial Y1 is numeric, Y2 is ordinal # (summed) loglikelihood ps_logl <- function(Y1, Y2, eXo=NULL, rho=NULL, fit.y1=NULL, fit.y2=NULL) { lik <- ps_lik(Y1=Y1, Y2=Y2, eXo=eXo, rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) if(all(lik > 0, na.rm = TRUE)) logl <- sum(log(lik), na.rm = TRUE) else logl <- -Inf logl } # individual likelihoods ps_lik <- function(Y1, Y2, eXo=NULL, rho=NULL, fit.y1=NULL, fit.y2=NULL) { stopifnot(!is.null(rho)) if(is.null(fit.y1)) fit.y1 <- lavOLS(Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavProbit(Y2, X=eXo) if(missing(Y1)) Y1 <- fit.y1$y R <- sqrt(1 - rho*rho) y1.SD <- sqrt(fit.y1$theta[fit.y1$var.idx]) y1.ETA <- fit.y1$yhat Z <- (Y1 - y1.ETA) / y1.SD # p(Y2|Y1) tauj.star <- (fit.y2$z1 - rho*Z)/R tauj1.star <- (fit.y2$z2 - rho*Z)/R py2y1 <- pnorm(tauj.star) - pnorm(tauj1.star) py2y1[py2y1 < .Machine$double.eps] <- .Machine$double.eps # p(Y1) py1 <- dnorm(Y1, mean=y1.ETA, sd=y1.SD) # lik py1 * py2y1 } # loglikelihood (x-version) ps_logl_x <- function(x, Y1, Y2, eXo=NULL, nth.y2) { nexo <- ifelse(is.null(eXo), 0L, ncol(eXo)); S <- seq_len stopifnot(length(x) == (3L + nth.y2 + 2*nexo)) rho = x[1L] mu.y1 = x[2L] var.y1 = x[3L] th.y2 = x[3L + S(nth.y2)] sl.y1 = x[3L + nth.y2 + S(nexo)] sl.y2 = x[3L + nth.y2 + nexo + S(nexo)] fit.y1 <- lavOLS(y=Y1, X=eXo) fit.y1$theta[fit.y1$int.idx] <- mu.y1 fit.y1$theta[fit.y1$var.idx] <- var.y1 fit.y1$theta[fit.y1$slope.idx] <- sl.y1 fit.y1$lik() fit.y2 <- lavProbit(y=Y2, X=eXo) fit.y2$theta[fit.y2$th.idx] <- th.y2 fit.y2$theta[fit.y2$slope.idx] <- sl.y2 fit.y2$lik() ps_logl(Y1=Y1, Y2=Y2, eXo=eXo, rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) } # polyserial correlation ps_cor_TS <- function(Y1, Y2, eXo=NULL, fit.y1=NULL, fit.y2=NULL, method="nlminb", verbose=FALSE) { stopifnot(method %in% c("nlminb", "BFGS")) if(is.null(fit.y1)) fit.y1 <- lavOLS(Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavProbit(Y2, X=eXo) if(missing(Y1)) Y1 <- fit.y1$y if(missing(Y2)) Y2 <- fit.y2$y else as.integer(Y2) if(missing(eXo) && length(fit.y2$slope.idx) > 0L) eXo <- fit.y2$X y1.VAR <- fit.y1$theta[fit.y1$var.idx]; y1.SD <- sqrt(y1.VAR) y1.ETA <- fit.y1$yhat Z <- (Y1 - y1.ETA) / y1.SD objectiveFunction <- function(x) { rho = tanh(x[1L]) logl <- ps_logl(rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) -logl } gradientFunction <- function(x) { rho = tanh(x[1L]) R <- sqrt(1 - rho*rho) tauj.star <- (fit.y2$z1 - rho*Z)/R tauj1.star <- (fit.y2$z2 - rho*Z)/R y.Z1 <- dnorm(tauj.star); y.Z2 <- dnorm(tauj1.star) # p(y|x) py2y1 <- pnorm(tauj.star) - pnorm(tauj1.star) py2y1[py2y1 < .Machine$double.eps] <- .Machine$double.eps pyx.inv <- 1/py2y1 # rho TAUj <- y.Z1 * (fit.y2$z1*rho - Z) TAUj1 <- y.Z2 * (fit.y2$z2*rho - Z) dx.rho <- sum( pyx.inv * 1/(R*R*R) * (TAUj - TAUj1), na.rm = TRUE ) # tanh + minimize -dx.rho * 1/(cosh(x)*cosh(x)) } # FIXME:: TODO!!! hessianFunction <- function(x) { } # starting value -- Olsson 1982 eq 38 if(length(fit.y2$slope.idx) > 0L) { # exo rho.init <- ( cor(Z,Y2,use="pairwise.complete.obs")*sd(Y2,na.rm=TRUE) / sum(dnorm(fit.y2$theta[fit.y2$th.idx])) ) } else { # no exo rho.init <- ( cor(Y1,Y2,use="pairwise.complete.obs")*sd(Y2,na.rm=TRUE) / sum(dnorm(fit.y2$theta[fit.y2$th.idx])) ) } # check range of rho.init is within [-1,+1] if(abs(rho.init) >= 1.0) { rho.init <- 0.0 } # minimize if(method == "nlminb") { out <- nlminb(start=atanh(rho.init), objective=objectiveFunction, gradient=gradientFunction, scale=10, control=list(trace=ifelse(verbose,1L,0L), rel.tol=1e-10)) } else if(method == "BFGS") { out <- optim(par = atanh(rho.init), fn = objectiveFunction, gr = gradientFunction, control = list(parscale = 1, reltol = 1e-10, abstol=(.Machine$double.eps * 10)), method = "BFGS") } if(out$convergence != 0L) warning("no convergence") rho <- tanh(out$par) rho } ps_cor_scores <- function(Y1, Y2, eXo=NULL, rho=NULL, fit.y1=NULL, fit.y2=NULL) { stopifnot(!is.null(rho)) if(is.null(fit.y1)) fit.y1 <- lavOLS(Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavProbit(Y2, X=eXo) if(missing(Y1)) Y1 <- fit.y1$y if(missing(eXo) && length(fit.y2$slope.idx) > 0L) eXo <- fit.y2$X R <- sqrt(1 - rho*rho) y1.VAR <- fit.y1$theta[fit.y1$var.idx]; y1.SD <- sqrt(y1.VAR) y1.ETA <- fit.y1$yhat Z <- (Y1 - y1.ETA) / y1.SD tauj.star <- (fit.y2$z1 - rho*Z)/R tauj1.star <- (fit.y2$z2 - rho*Z)/R y.Z1 <- dnorm(tauj.star); y.Z2 <- dnorm(tauj1.star) # p(Y2|Y1) py2y1 <- pnorm(tauj.star) - pnorm(tauj1.star) py2y1[py2y1 < .Machine$double.eps] <- .Machine$double.eps pyx.inv <- 1/py2y1 # mu.y1 y.Z1.y.Z2 <- y.Z1-y.Z2 dx.mu.y1 <- 1/y1.SD * (Z + (pyx.inv * (rho/R) * y.Z1.y.Z2)) # var.y1 dx.var.y1 <- 1/(2*y1.VAR) * ( ((Z*Z)-1) + (pyx.inv*rho*Z/R)*(y.Z1.y.Z2) ) # th.y2 dx.th.y2 <- (fit.y2$Y1*y.Z1 - fit.y2$Y2*y.Z2) * 1/R * pyx.inv # sl.y1 dx.sl.y1 <- NULL if(length(fit.y1$slope.idx) > 0L) dx.sl.y1 <- dx.mu.y1 * eXo # sl.y2 dx.sl.y2 <- NULL if(length(fit.y2$slope.idx) > 0L) dx.sl.y2 <- (y.Z2 - y.Z1) * eXo * 1/R * pyx.inv # rho TAUj <- y.Z1 * (fit.y2$z1*rho - Z) TAUj1 <- y.Z2 * (fit.y2$z2*rho - Z) dx.rho <- pyx.inv * 1/(R*R*R) * (TAUj - TAUj1) list(dx.mu.y1=dx.mu.y1, dx.var.y1=dx.var.y1, dx.th.y2=dx.th.y2, dx.sl.y1=dx.sl.y1, dx.sl.y2=dx.sl.y2, dx.rho=dx.rho) } lavaan/R/lav_partable_full.R0000644000176200001440000001267113053001776015527 0ustar liggesusers# create `full' parameter table, containing (almost) all parameters # that could be free # # main motivation: univariate scores tests (modification indices) # lav_partable_full <- function(partable = NULL, strict.exo = FALSE, free = FALSE, start = FALSE) { # check minimum requirements: lhs, op, rhs stopifnot( !is.null(partable$lhs), !is.null(partable$op), !is.null(partable$rhs) ) # meanstructure meanstructure <- any(partable$op == "~1") # number of blocks nblocks <- lav_partable_nblocks(partable) # extract `names' of various types of variables: lv.names <- lav_partable_vnames(partable, type="lv") ov.names <- lav_partable_vnames(partable, type="ov") ov.names.x <- lav_partable_vnames(partable, type="ov.x") ov.names.nox <- lav_partable_vnames(partable, type="ov.nox") lv.names.x <- lav_partable_vnames(partable, type="lv.x") ov.names.y <- lav_partable_vnames(partable, type="ov.y") lv.names.y <- lav_partable_vnames(partable, type="lv.y") lvov.names.y <- c(ov.names.y, lv.names.y) ov.names.ord <- lav_partable_vnames(partable, type="ov.ord") ov.names.ind <- lav_partable_vnames(partable, type="ov.ind") # 1 "=~" l.lhs <- r.rhs <- op <- character(0) l.lhs <- rep(lv.names, each=length(ov.names.nox)) l.rhs <- rep(ov.names.nox, times=length(lv.names)) # remove factor ~ eqs.y combinations, if any # because they also appear as a regression #bad.idx <- which( l.lhs %in% lv.names & # l.rhs %in% eqs.y) #if(length(bad.idx) > 0L) { # l.lhs <- l.lhs[-bad.idx] # l.rhs <- l.rhs[-bad.idx] #} l.op <- rep("=~", length(l.lhs)) # 2a. "~~" ov ## FIXME: ov.names.nox or ov.names?? ov.lhs <- ov.rhs <- ov.op <- character(0) #if(strict.exo) { OV <- ov.names.nox #} else { # OV <- ov.names #} nx <- length(OV) idx <- lower.tri(matrix(0, nx, nx), diag=TRUE) ov.lhs <- rep(OV, each=nx)[idx] # fill upper.tri ov.rhs <- rep(OV, times=nx)[idx] ov.op <- rep("~~", length(ov.lhs)) # exo ~~ if(!strict.exo && length(ov.names.x) > 0L) { OV <- ov.names.x nx <- length(OV) idx <- lower.tri(matrix(0, nx, nx), diag=TRUE) more.lhs <- rep(OV, each=nx)[idx] # fill upper.tri more.rhs <- rep(OV, times=nx)[idx] ov.lhs <- c(ov.lhs, more.lhs) ov.rhs <- c(ov.rhs, more.rhs) ov.op <- c(ov.op, rep("~~", length(more.lhs))) } # 2b. "~~" lv lv.lhs <- lv.rhs <- lv.op <- character(0) nx <- length(lv.names) idx <- lower.tri(matrix(0, nx, nx), diag=TRUE) lv.lhs <- rep(lv.names, each=nx)[idx] # fill upper.tri lv.rhs <- rep(lv.names, times=nx)[idx] lv.op <- rep("~~", length(lv.lhs)) # 3 regressions? r.lhs <- r.rhs <- r.op <- character(0) if(any(partable$op == "~")) { eqs.names <- unique( c(partable$lhs[partable$op == "~"], partable$rhs[partable$op == "~"]) ) eqs.y <- eqs.names if(strict.exo) { x.idx <- which(eqs.names %in% ov.names.x) if(length(x.idx) > 0L) { eqs.y <- eqs.names[-x.idx] } } eqs.x <- eqs.names r.lhs <- rep(eqs.y, each=length(eqs.x)) r.rhs <- rep(eqs.x, times=length(eqs.y)) # remove self-arrows idx <- which(r.lhs == r.rhs) r.lhs <- r.lhs[-idx] r.rhs <- r.rhs[-idx] # remove indicator ~ factor if they exist bad.idx <- which(r.lhs %in% ov.names.ind & r.rhs %in% lv.names) if(length(bad.idx) > 0L) { r.lhs <- r.lhs[-bad.idx] r.rhs <- r.rhs[-bad.idx] } r.op <- rep("~", length(r.rhs)) } # 4. intercepts int.lhs <- int.rhs <- int.op <- character(0) if(meanstructure) { if(strict.exo) { int.lhs <- c(ov.names.nox, lv.names) } else { int.lhs <- c(ov.names, lv.names) } int.rhs <- rep("", length(int.lhs)) int.op <- rep("~1", length(int.lhs)) } # 5. thresholds th.lhs <- th.rhs <- th.op <- character(0) if(length(ov.names.ord) > 0L) { tmp <- strsplit(lav_partable_vnames(partable, "th"), "\\|") th.lhs <- sapply(tmp, function(x) x[1]) th.rhs <- sapply(tmp, function(x) x[2]) th.op <- rep("|", length(th.lhs)) } # 6. scaling parameters delta.lhs <- delta.rhs <- delta.op <- character(0) if(nblocks > 1L && length(ov.names.ord) > 0L) { delta.lhs <- ov.names.ord delta.rhs <- ov.names.ord delta.op <- rep("~*~", length(delta.lhs)) } # combine lhs <- c(l.lhs, ov.lhs, lv.lhs, r.lhs, int.lhs, th.lhs, delta.lhs) rhs <- c(l.rhs, ov.rhs, lv.rhs, r.rhs, int.rhs, th.rhs, delta.rhs) op <- c(l.op, ov.op, lv.op, r.op, int.op, th.op, delta.op) # multiple blocks! block <- 1L if(nblocks > 1) { block <- rep(1:nblocks, each = length(lhs)) lhs <- rep(lhs, times = nblocks) op <- rep(op, times = nblocks) rhs <- rep(rhs, times = nblocks) } LIST <- data.frame(lhs = lhs, op = op, rhs = rhs, block = block, stringsAsFactors = FALSE) if(free) { LIST$free <- rep(0L, nrow(LIST)) } if(start) { LIST$start <- rep(0, nrow(LIST)) } LIST } lavaan/R/00class.R0000644000176200001440000002063213054005406013305 0ustar liggesusers# class definitions # # initial version: YR 25/03/2009 # added ModelSyntax: YR 02/08/2010 # deleted ModelSyntax: YR 01/11/2010 (using flattened model syntax now) setClass("lavData", representation( data.type="character", # "full", "moment" or "none" ngroups="integer", # number of groups group="character", # group variable nlevels="integer", # number of levels cluster="character", # cluster variable(s) group.label="character", # group labels level.label="character", # level labels std.ov="logical", # standardize observed variables? nobs="list", # effective number of observations norig="list", # original number of observations ov.names="list", # variable names (per group) ov.names.x="list", # exo variable names (per group) #ov.types="list", # variable types (per group) #ov.idx="list", # column indices (all observed variables) ordered="character", # ordered variables ov="list", # variable table case.idx="list", # case indices per group missing="character", # "listwise" or not? Mp="list", # if not complete, missing patterns # we need this here, to get nobs right! Rp="list", # response patterns (categorical only) Lp="list", # level patterns eXo="list", # local copy exo only X="list" # local copy ) ) setClass("lavSampleStats", # sample moments representation( var="list", # observed variances (per group) cov="list", # observed var/cov matrix (per group) mean="list", # observed mean vector (per group) th="list", # thresholds for non-numeric var (per group) th.idx="list", # th index (0 for numeric) th.names="list", # threshold names res.cov="list", # residual var/cov matrix (if conditional.x) res.var="list", # residual variances res.th="list", # residual thresholds res.th.nox="list", # residual thresholds ignoring x res.slopes="list", # slopes exo (if conditional.x) res.int="list", # intercepts (if conditional.x) mean.x="list", # mean exo cov.x="list", # variance/covariance exo bifreq="list", # bivariate frequency tables group.w="list", # group weight nobs="list", # effective number of obs (per group) ntotal="integer", # total number of obs (all groups) ngroups="integer", # number of groups x.idx="list", # x.idx if fixed.x = TRUE icov="list", # inverse of observed cov (per group) cov.log.det="list", # log det of observed cov (per group) res.icov="list", res.cov.log.det="list", ridge="numeric", # ridge constant WLS.obs="list", # all relevant observed stats in a vector WLS.V="list", # weight matrix for GLS/WLS WLS.VD="list", # diagonal of weight matrix only NACOV="list", # N times the asymptotic covariance matrix NACOV.user="logical", # user-specified NACOV? missing.flag="logical", # missing patterns? missing="list", # missingness information missing.h1="list", # h1 model YLp = "list", # cluster/level information zero.cell.tables="list" # bivariate tables with empty cells ) ) setClass("lavModel", # MATRIX representation of the sem model representation( GLIST="list", # list of all model matrices (for all groups) dimNames="list", # dim names for the model matrices isSymmetric="logical", # model matrix symmetric? mmSize="integer", # model matrix size (unique only) representation="character", # stub, until we define more classes meanstructure="logical", categorical="logical", group.w.free="logical", link="character", nblocks="integer", ngroups="integer", # only for rsem!! (which uses rsem:::computeDelta) nmat="integer", nvar="integer", num.idx="list", th.idx="list", nx.free="integer", #nx.unco="integer", nx.user="integer", m.free.idx="list", x.free.idx="list", #m.unco.idx="list", #x.unco.idx="list", m.user.idx="list", x.user.idx="list", x.def.idx="integer", x.ceq.idx="integer", x.cin.idx="integer", #x.free.var.idx="integer", eq.constraints="logical", eq.constraints.K="matrix", eq.constraints.k0="numeric", def.function="function", ceq.function="function", ceq.jacobian="function", ceq.JAC="matrix", ceq.rhs="numeric", ceq.linear.idx="integer", ceq.nonlinear.idx="integer", cin.function="function", cin.jacobian="function", cin.JAC="matrix", cin.rhs="numeric", cin.linear.idx="integer", cin.nonlinear.idx="integer", con.jac="matrix", con.lambda="numeric", nexo="integer", conditional.x="logical", fixed.x="logical", parameterization="character", ov.x.dummy.ov.idx="list", ov.x.dummy.lv.idx="list", ov.y.dummy.ov.idx="list", ov.y.dummy.lv.idx="list", estimator="character" ) ) setClass("Fit", representation( npar="integer", # number of free parameters #ndat="integer", #df="integer", x="numeric", # x partrace="matrix", # parameter trace start="numeric", # starting values (only for other packages) est="numeric", # estimated values (only for other packages) se="numeric", # standard errors fx="numeric", fx.group="numeric", logl="numeric", logl.group="numeric", iterations="integer", # number of iterations converged="logical", control="list", Sigma.hat="list", Mu.hat="list", TH="list", test="list" ) ) setClass("lavaan", representation( call = "call", # matched call timing = "list", # timing information Options = "list", # lavOptions ParTable = "list", # parameter table user-specified model pta = "list", # parameter table attributes Data = "lavData", # full data SampleStats = "lavSampleStats", # sample statistics Model = "lavModel", # internal matrix representation Cache = "list", # housekeeping stuff Fit = "Fit", # fitted results boot = "list", # bootstrap results optim = "list", # optimizer results implied = "list", # model implied moments vcov = "list", # vcov test = "list", # test external = "list" # optional slot, for add-on packages ) ) setClass("lavaanList", representation( call = "call", # matched call Options = "list", # lavOptions ParTable = "list", pta = "list", Data = "lavData", # from first dataset (ngroups!) Model = "lavModel", # based on first dataset meta = "list", timingList = "list", ParTableList = "list", DataList = "list", SampleStatsList = "list", CacheList = "list", vcovList = "list", testList = "list", optimList = "list", impliedList = "list", funList = "list", external = "list" # optional slot, for add-on packages ) ) lavaan/R/lav_utils.R0000644000176200001440000002507612774514377014075 0ustar liggesusers# utility functions # # initial version: YR 25/03/2009 # compute log(sum(exp(x))) avoiding under/overflow # using the identity: log(sum(exp(x)) = a + log(sum(exp(x - a))) lav_utils_logsumexp <- function(x) { a <- max(x) a + log(sum(exp(x - a))) } # invert positive definite symmetric matrix (eg cov matrix) # using choleski decomposition # return log determinant as an attribute inv.chol <- function(S, logdet=FALSE) { cS <- chol(S) #if( inherits(cS, "try-error") ) { # print(S) # warning("lavaan WARNING: symmetric matrix is not positive symmetric!") #} S.inv <- chol2inv( cS ) if(logdet) { diag.cS <- diag(cS) attr(S.inv, "logdet") <- sum(log(diag.cS*diag.cS)) } S.inv } # convert correlation matrix + standard deviations to covariance matrix # based on cov2cor in package:stats cor2cov <- function(R, sds, names=NULL) { p <- (d <- dim(R))[1L] if(!is.numeric(R) || length(d) != 2L || p != d[2L]) stop("'V' is not a square numeric matrix") if(any(!is.finite(sds))) warning("sds had 0 or NA entries; non-finite result is doubtful") #if(sum(diag(R)) != p) # stop("The diagonal of a correlation matrix should be all ones.") if(p != length(sds)) stop("The standard deviation vector and correlation matrix have a different number of variables") S <- R S[] <- sds * R * rep(sds, each=p) # optionally, add names if(!is.null(names)) { stopifnot(length(names) == p) rownames(S) <- colnames(S) <- names } S } # convert characters within single quotes to numeric vector # eg. s <- '3 4.3 8e-3 2.0' # x <- char2num(s) char2num <- function(s = '') { # first, strip all ',' or ';' s. <- gsub(","," ", s); s. <- gsub(";"," ", s.) tc <- textConnection(s.) x <- scan(tc, quiet=TRUE) close(tc) x } # create full matrix based on lower.tri or upper.tri elements; add names # always ROW-WISE!! getCov <- function(x, lower = TRUE, diagonal = TRUE, sds = NULL, names = paste("V", 1:nvar, sep="")) { # check x and sds if(is.character(x)) x <- char2num(x) if(is.character(sds)) sds <- char2num(sds) nels <- length(x) if(lower) { COV <- lav_matrix_lower2full(x, diagonal = diagonal) } else { COV <- lav_matrix_upper2full(x, diagonal = diagonal) } nvar <- ncol(COV) # if diagonal is false, assume unit diagonal if(!diagonal) diag(COV) <- 1 # check if we have a sds argument if(!is.null(sds)) { stopifnot(length(sds) == nvar) COV <- cor2cov(COV, sds) } # names stopifnot(length(names) == nvar) rownames(COV) <- colnames(COV) <- names COV } # translate row+col matrix indices to vec idx rowcol2vec <- function(row.idx, col.idx, nrow, symmetric=FALSE) { idx <- row.idx + (col.idx-1)*nrow if(symmetric) { idx2 <- col.idx + (row.idx-1)*nrow idx <- unique(sort(c(idx, idx2))) } idx } # dummy function to 'pretty' print a vector with fixed width pprint.vector <- function(x, digits.after.period=3, ncols=NULL, max.col.width=11, newline=TRUE) { n <- length(x) var.names <- names(x) total.width = getOption("width") max.width <- max(nchar(var.names)) if( max.width < max.col.width) { # shrink max.col.width <- max( max.width, digits.after.period+2) } # automatic number of columns if(is.null(ncols)) { ncols <- floor( (total.width-2) / (max.col.width+2) ) } nrows <- ceiling(n / ncols) if(digits.after.period >= (max.col.width-3)) { max.col.width <- digits.after.period + 3 } string.format <- paste(" %", max.col.width, "s", sep="") number.format <- paste(" %", max.col.width, ".", digits.after.period, "f", sep="") for(nr in 1:nrows) { rest <- min(ncols, n) if(newline) cat("\n") # labels for(nc in 1:rest) { vname <- substr(var.names[(nr-1)*ncols + nc], 1, max.col.width) cat(sprintf(string.format, vname)) } cat("\n") for(nc in 1:rest) { cat(sprintf(number.format, x[(nr-1)*ncols + nc])) } cat("\n") n <- n - ncols } if(newline) cat("\n") } # print only lower half of symmetric matrix pprint.matrix.symm <- function(x, digits.after.period=3, ncols=NULL, max.col.width=11, newline=TRUE) { n <- ncol <- ncol(x); nrow <- nrow(x) stopifnot(ncol == nrow) var.names <- rownames(x) total.width = getOption("width") max.width <- max(nchar(var.names)) if( max.width < max.col.width) { # shrink max.col.width <- max( max.width, digits.after.period+2) } # automatic number of columns if(is.null(ncols)) { ncols <- floor( (total.width-2) / (max.col.width+2) ) } nblocks <- ceiling(n / ncols) if(digits.after.period >= (max.col.width-3)) { max.col.width <- digits.after.period + 3 } fc.format <- paste(" %", min(max.width, max.col.width), "s", sep="") string.format <- paste(" %", max.col.width, "s", sep="") number.format <- paste(" %", max.col.width, ".", digits.after.period, "f", sep="") for(nb in 1:nblocks) { rest <- min(ncols, n) if(newline) cat("\n") # empty column cat(sprintf(fc.format, "")) # labels for(nc in 1:rest) { vname <- substr(var.names[(nb-1)*ncols + nc], 1, max.col.width) cat(sprintf(string.format, vname)) } cat("\n") row.start <- (nb-1)*ncols + 1 for(nr in row.start:nrow) { # label vname <- substr(var.names[nr], 1, max.col.width) cat(sprintf(fc.format, vname)) col.rest <- min(rest, (nr - row.start + 1)) for(nc in 1:col.rest) { value <- x[nr, (nb-1)*ncols + nc] cat(sprintf(number.format, value)) } cat("\n") } n <- n - ncols } if(newline) cat("\n") } # elimination of rows/cols symmetric matrix eliminate.rowcols <- function(x, el.idx=integer(0)) { if(length(el.idx) == 0) { return( x ) } stopifnot(ncol(x) == nrow(x)) stopifnot(min(el.idx) > 0 && max(el.idx) <= ncol(x)) x[-el.idx, -el.idx] } # elimination of rows/cols pstar symmetric matrix eliminate.pstar.idx <- function(nvar=1, el.idx=integer(0), meanstructure=FALSE, type="all") { if(length(el.idx) > 0) { stopifnot(min(el.idx) > 0 && max(el.idx) <= nvar) } XX <- utils::combn(1:(nvar+1),2) XX[2,] <- XX[2,] - 1 if(type == "all") { idx <- !(apply(apply(XX, 2, function(x) {x %in% el.idx}), 2, all)) } else { idx <- !(apply(apply(XX, 2, function(x) {x %in% el.idx}), 2, any)) } if(meanstructure) { idx <- c(!(1:nvar %in% el.idx), idx) #idx <- c(rep(TRUE, nvar), idx) } idx } # construct 'augmented' covariance matrix # based on the covariance matrix and the mean vector augmented.covariance <- function(S., mean) { S <- as.matrix(S.) m <- as.matrix(mean) p <- ncol(S) if(nrow(m) != p) { stop("incompatible dimension of mean vector") } out <- matrix(0, ncol=(p+1), nrow=(p+1)) out[1:p,1:p] <- S + m %*% t(m) out[p+1,1:p] <- t(m) out[1:p,p+1] <- m out[p+1,p+1] <- 1 out } # linesearch using 'armijo' backtracking # to find a suitable `stepsize' (alpha) linesearch.backtracking.armijo <- function(f.alpha, s.alpha, alpha=10) { tau <- 0.5 ftol <- 0.001 f.old <- f.alpha(0) s.old <- s.alpha(0) armijo.condition <- function(alpha) { f.new <- f.alpha(alpha) # condition f.new > f.old + ftol * alpha * s.old } i <- 1 while(armijo.condition(alpha)) { alpha <- alpha * tau f.new <- f.alpha(alpha) cat("... backtracking: ", i, "alpha = ", alpha, "f.new = ", f.new, "\n") i <- i + 1 } alpha } steepest.descent <- function(start, objective, gradient, iter.max, verbose) { x <- start if(verbose) { cat("Steepest descent iterations\n") cat("iter function abs.change rel.change step.size norm.gx\n") gx <- gradient(x) norm.gx <- sqrt( gx %*% gx ) fx <- objective(x) cat(sprintf("%4d %11.7E %11.5E %11.5E", 0, fx, 0, norm.gx), "\n") } for(iter in 1:iter.max) { fx.old <- objective(x) # normalized gradient gx <- gradient(x) old.gx <- gx norm.gx <- sqrt( gx %*% gx ) gradient.old <- gx / norm.gx direction.vector <- (-1) * gradient.old f.alpha <- function(alpha) { new.x <- x + alpha * direction.vector fx <- objective(new.x) #cat(" [stepsize] iter ", iter, " step size = ", alpha, # " fx = ", fx, "\n", sep="") # for optimize only if(is.infinite(fx)) { fx <- .Machine$double.xmax } fx } #s.alpha <- function(alpha) { # new.x <- x + alpha * direction.vector # gradient.new <- gradient(new.x) # norm.gx <- sqrt( gradient.new %*% gradient.new) # gradient.new <- gradient.new/norm.gx # as.numeric(gradient.new %*% direction.vector) #} # find step size #alpha <- linesearch.backtracking.armijo(f.alpha, s.alpha, alpha=1) if(iter == 1) { alpha <- 0.1 } else { alpha <- optimize(f.alpha, lower=0.0, upper=1)$minimum if( f.alpha(alpha) > fx.old ) { alpha <- optimize(f.alpha, lower=-1, upper=0.0)$minimum } } # steepest descent step old.x <- x x <- x + alpha * direction.vector gx.old <- gx gx <- gradient(x) dx.max <- max(abs( gx )) # verbose if(verbose) { fx <- fx.old fx.new <- objective(x) abs.change <- fx.new - fx.old rel.change <- abs.change / fx.old norm.gx <- sqrt(gx %*% gx) if(verbose) { cat(sprintf("%4d %11.7E %10.7f %10.7f %11.5E %11.5E", iter, fx.new, abs.change, rel.change, alpha, norm.gx), "\n") } } # convergence check if( dx.max < 1e-05 ) break } x } lavaan/R/lav_lavaanList_simulate.R0000644000176200001440000000457112741167732016724 0ustar liggesusers# lavSimulate: fit the *same* model, on simulated datasets # YR - 4 July 2016 lavSimulate <- function(pop.model = NULL, # population model model = NULL, # user model dataFunction = simulateData, dataFunction.args = list(model = pop.model, sample.nobs = 1000L), ndat = 1000L, cmd = "sem", cmd.pop = "sem", ..., store.slots = c("partable"), FUN = NULL, show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL) { # dotdotdot dotdotdot <- list(...) # dotdotdot for fit.orig dotdotdot.orig <- dotdotdot dotdotdot.orig$verbose <- FALSE dotdotdot.orig$debug <- FALSE dotdotdot.orig$data <- NULL dotdotdot.orig$sample.cov <- NULL # 'fit' population model, to get 'true' parameters fit.orig <- do.call(cmd.pop, args = c(list(model = pop.model), dotdotdot.orig)) # create (empty) 'model' object if(is.null(model)) { model <- fit.orig } else { # this is more tricky!! we must 'embed' the model in the original # model, otherwise, the order of the parameters will be different!! # TODO stop("model argument not supported yet") } # dotdotdot dotdotdot <- list() # per default, use 'true' values as starting values if(is.null(dotdotdot$start)) { dotdotdot$start = fit.orig } # generate simulations fit <- do.call("lavaanList", args = c(list(model = model, dataFunction = dataFunction, dataFunction.args = dataFunction.args, ndat = ndat, cmd = cmd, store.slots = store.slots, FUN = FUN, show.progress = show.progress, parallel = parallel, ncpus = ncpus, cl = cl), dotdotdot)) # store 'true' parameters in meta$est.true fit@meta$lavSimulate <- TRUE fit@meta$est.true <- fit.orig@ParTable$est fit } lavaan/R/lav_fit_measures.R0000644000176200001440000020357713052636633015415 0ustar liggesuserssetMethod("fitMeasures", signature(object = "lavaan"), function(object, fit.measures = "all", baseline.model = NULL) { lav_fit_measures(object = object, fit.measures = fit.measures, baseline.model = baseline.model) }) setMethod("fitmeasures", signature(object = "lavaan"), function(object, fit.measures = "all", baseline.model = NULL) { lav_fit_measures(object = object, fit.measures = fit.measures, baseline.model = baseline.model) }) lav_fit_measures <- function(object, fit.measures="all", baseline.model = NULL) { # has the model converged? if(object@optim$npar > 0L && !object@optim$converged) { stop("lavaan ERROR: fit measures not available if model did not converge") } TEST <- lavInspect(object, "test") # do we have a test statistic? if(TEST[[1]]$test == "none") { # to deal with semTools 0.4-9, we need to check the @Fit@test slot #if(object@Fit@test[[1]]$test != "none") { # TEST <- object@Fit@test #} else { stop("lavaan ERROR: please refit the model with test=\"standard\"") #} } if("all" %in% fit.measures) { class.flag <- TRUE } else { class.flag <- FALSE } # collect info from the lavaan slots GLIST <- object@Model@GLIST # N versus N-1 # this affects BIC, RMSEA, cn_01/05, MFI and ECVI # Changed 0.5-15: suggestion by Mark Seeto if(object@Options$estimator %in% c("ML","PML","FML") && object@Options$likelihood == "normal") { N <- object@SampleStats@ntotal } else { N <- object@SampleStats@ntotal - object@SampleStats@ngroups } # Change 0.5-13: take into account explicit equality constraints!! # reported by Mark L. Taper (affects AIC and BIC) npar <- object@optim$npar if(nrow(object@Model@con.jac) > 0L) { ceq.idx <- attr(object@Model@con.jac, "ceq.idx") if(length(ceq.idx) > 0L) { neq <- qr(object@Model@con.jac[ceq.idx,,drop=FALSE])$rank npar <- npar - neq } } fx <- object@optim$fx fx.group <- object@optim$fx.group meanstructure <- object@Model@meanstructure categorical <- object@Model@categorical multigroup <- object@Data@ngroups > 1L estimator <- object@Options$estimator test <- object@Options$test G <- object@Data@ngroups # number of groups X2 <- TEST[[1]]$stat df <- TEST[[1]]$df # fit stat and df are NA (perhaps test="none"?), try again: if(is.na(df)) { df <- lav_partable_df(object@ParTable) if(nrow(object@Model@con.jac) > 0L) { df <- ( df + length(attr(object@Model@con.jac, "ceq.idx")) ) } } #if(is.na(X2) && is.finite(fx)) { # #} if(test %in% c("satorra.bentler", "yuan.bentler", "mean.var.adjusted", "scaled.shifted")) { scaled <- TRUE } else { scaled <- FALSE } # scaled X2 if(scaled) { X2.scaled <- TEST[[2]]$stat df.scaled <- TEST[[2]]$df } # define 'sets' of fit measures: fit.always <- c("npar") # basic chi-square test fit.chisq <- c("fmin", "chisq", "df", "pvalue") if(scaled) { fit.chisq <- c(fit.chisq, "chisq.scaled", "df.scaled", "pvalue.scaled", "chisq.scaling.factor") } # basline model fit.baseline <- c("baseline.chisq", "baseline.df", "baseline.pvalue") if(scaled) { fit.baseline <- c(fit.baseline, "baseline.chisq.scaled", "baseline.df.scaled", "baseline.pvalue.scaled", "baseline.chisq.scaling.factor") } # cfi/tli fit.cfi.tli <- c("cfi", "tli") if(scaled) { fit.cfi.tli <- c(fit.cfi.tli, "cfi.scaled", "tli.scaled", "cfi.robust", "tli.robust") } # more incremental fit indices fit.incremental <- c("cfi", "tli", "nnfi", "rfi", "nfi", "pnfi", "ifi", "rni") if(scaled) { fit.incremental <- c(fit.incremental, "cfi.scaled", "tli.scaled", "cfi.robust", "tli.robust", "nnfi.scaled", "nnfi.robust", "rfi.scaled", "nfi.scaled", "ifi.scaled", "rni.scaled", "rni.robust") } # likelihood based measures if(estimator == "MML") { fit.logl <- c("logl", "aic", "bic", "ntotal", "bic2") } else { fit.logl <- c("logl", "unrestricted.logl", "aic", "bic", "ntotal", "bic2") } if(scaled && object@Options$test == "yuan.bentler") { fit.logl <- c(fit.logl, "scaling.factor.h1", "scaling.factor.h0") } # rmsea fit.rmsea <- c("rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "rmsea.pvalue") if(scaled) { fit.rmsea <- c(fit.rmsea, "rmsea.scaled", "rmsea.ci.lower.scaled", "rmsea.ci.upper.scaled", "rmsea.pvalue.scaled", "rmsea.robust", "rmsea.ci.lower.robust", "rmsea.ci.upper.robust", "rmsea.pvalue.robust") } # srmr if(categorical) { fit.srmr <- c("srmr", "wrmr") fit.srmr2 <- c("rmr", "rmr_nomean", "srmr", # per default equal to srmr_bentler_nomean "srmr_bentler", "srmr_bentler_nomean", "srmr_bollen", "srmr_bollen_nomean", "srmr_mplus", "srmr_mplus_nomean") } else { fit.srmr <- c("srmr") fit.srmr2 <- c("rmr", "rmr_nomean", "srmr", # the default "srmr_bentler", "srmr_bentler_nomean", "srmr_bollen", "srmr_bollen_nomean", "srmr_mplus", "srmr_mplus_nomean") } # table #if(categorical) { # # FIXME: Cp: no exo, all ordinal! # fit.table <- c("c_p", "c_p.df", "c_p.p.value", # "c_f", "c_f.df", "c_f.p.value", # "rpat.observed", "rpat.total", "rpat.empty", # "c_m", "c_m.df", "c_m.p.value") #} else { fit.table <- character(0L) #} # various fit.other <- c("cn_05","cn_01","gfi","agfi","pgfi","mfi") if(!categorical && G == 1) { fit.other <- c(fit.other, "ecvi") } # lower case fit.measures <- tolower(fit.measures) # select 'default' fit measures if(length(fit.measures) == 1L) { if(fit.measures == "default") { if(estimator == "ML" || estimator == "PML") { fit.measures <- c(fit.always, fit.chisq, fit.baseline, fit.cfi.tli, fit.logl, fit.rmsea, fit.srmr) } else if(estimator == "MML") { fit.measures <- c(fit.always, fit.logl) } else { fit.measures <- c(fit.always, fit.chisq, fit.baseline, fit.cfi.tli, fit.rmsea, fit.srmr, fit.table) if(object@Options$mimic == "Mplus") { fit.measures <- c(fit.measures, "wrmr") } } } else if(fit.measures == "all") { if(estimator == "ML") { fit.measures <- c(fit.always, fit.chisq, fit.baseline, fit.incremental, fit.logl, fit.rmsea, fit.srmr2, fit.other) } else { fit.measures <- c(fit.always, fit.chisq, fit.baseline, fit.incremental, fit.rmsea, fit.srmr2, fit.other, fit.table) } } } # main container indices <- list() if("npar" %in% fit.measures) { indices["npar"] <- npar } # Chi-square value estimated model (H0) if(any(c("fmin", "chisq", "chisq.scaled", "chisq.scaling.factor") %in% fit.measures)) { indices["fmin"] <- fx indices["chisq"] <- X2 if(scaled) { indices["chisq.scaled"] <- X2.scaled indices["chisq.scaling.factor"] <- TEST[[2]]$scaling.factor } } if(any(c("df", "df.scaled") %in% fit.measures)) { indices["df"] <- df if(scaled) { indices["df.scaled"] <- df.scaled } } if(any(c("pvalue", "pvalue.scaled") %in% fit.measures)) { indices["pvalue"] <- TEST[[1]]$pvalue if(scaled) { indices["pvalue.scaled"] <- TEST[[2]]$pvalue } } if(any(c("cfi", "cfi.scaled", "cfi.robust", "tli", "tli.scaled", "tli.robust", "nnfi", "nnfi.scaled", "nnfi.robust", "pnfi", "pnfi.scaled", "rfi", "rfi.scaled", "nfi", "nfi.scaled", "ifi", "ifi.scaled", "rni", "rni.scaled", "rni.robust", "baseline.chisq", "baseline.chisq.scaled", "baseline.pvalue", "baseline.pvalue.scaled") %in% fit.measures)) { # call explicitly independence model # this is not strictly needed for ML, but it is for # GLS and WLS # and MLM and MLR to get the scaling factor(s)! if (!is.null(baseline.model) && is(baseline.model, "lavaan")) { fit.indep <- baseline.model } else if (!is.null(object@external$baseline.model) && is(object@external$baseline.model, "lavaan")) { fit.indep <- object@external$baseline.model ## check baseline converged if (!fit.indep@optim$converged) { fit.indep <- NULL } else { ## check test matches and baseline converged sameTest <- ( object@Options$test == fit.indep@Options$test ) sameSE <- ( object@Options$se == fit.indep@Options$se ) sameEstimator <- ( object@Options$estimator == fit.indep@Options$estimator ) if (!all(sameTest, sameSE, sameEstimator)) { fit.indep <- try(update(fit.indep, test = object@Options$test, se = object@Options$se, estimator = object@Options$estimator), silent = TRUE) } } } else { fit.indep <- try(lav_object_independence(object), silent = TRUE) } if(inherits(fit.indep, "try-error")) { X2.null <- df.null <- as.numeric(NA) if(scaled) { X2.null.scaled <- df.null.scaled <- as.numeric(NA) } } else { X2.null <- fit.indep@test[[1]]$stat df.null <- fit.indep@test[[1]]$df if(scaled) { X2.null.scaled <- fit.indep@test[[2]]$stat df.null.scaled <- fit.indep@test[[2]]$df } } # check for NAs if(is.na(X2) || is.na(df) || is.na(X2.null) || is.na(df.null)) { indices[fit.incremental] <- as.numeric(NA) } else { if("baseline.chisq" %in% fit.measures) { indices["baseline.chisq"] <- X2.null if(scaled) { indices["baseline.chisq.scaled"] <- X2.null.scaled } } if("baseline.df" %in% fit.measures) { indices["baseline.df"] <- df.null if(scaled) { indices["baseline.df.scaled"] <- df.null.scaled } } if("baseline.pvalue" %in% fit.measures) { indices["baseline.pvalue"] <- fit.indep@test[[1]]$pvalue if(scaled) { indices["baseline.pvalue.scaled"] <- fit.indep@test[[2]]$pvalue } } if("baseline.chisq.scaling.factor" %in% fit.measures) { indices["baseline.chisq.scaling.factor"] <- fit.indep@test[[2]]$scaling.factor } # CFI - comparative fit index (Bentler, 1990) if("cfi" %in% fit.measures) { t1 <- max( c(X2 - df, 0) ) t2 <- max( c(X2 - df, X2.null - df.null, 0) ) if(t1 == 0 && t2 == 0) { indices["cfi"] <- 1 } else { indices["cfi"] <- 1 - t1/t2 } } if("cfi.scaled" %in% fit.measures) { t1 <- max( c(X2.scaled - df.scaled, 0) ) t2 <- max( c(X2.scaled - df.scaled, X2.null.scaled - df.null.scaled, 0) ) if(is.na(t1) || is.na(t2)) { indices["cfi.scaled"] <- NA } else if(t1 == 0 && t2 == 0) { indices["cfi.scaled"] <- 1 } else { indices["cfi.scaled"] <- 1 - t1/t2 } } if("cfi.robust" %in% fit.measures) { if(TEST[[2]]$test %in% c("satorra.bentler", "yuan.bentler")) { # see Brosseau-Liard & Savalei MBR 2014, equation 15 # what to do if X2 = 0 and df = 0? in this case, # the scaling factor (ch) will be NA, and we get NA # (instead of 1) if(X2 < .Machine$double.eps && df == 0) { ch <- 0 } else { ch <- TEST[[2]]$scaling.factor } cb <- fit.indep@test[[2]]$scaling.factor t1 <- max( c(X2 - (ch*df), 0) ) t2 <- max( c(X2 - (ch*df), X2.null - (cb*df.null), 0) ) if(is.na(t1) || is.na(t2)) { indices["cfi.robust"] <- NA } else if(t1 == 0 && t2 == 0) { indices["cfi.robust"] <- 1 } else { indices["cfi.robust"] <- 1 - t1/t2 } } else { indices["cfi.robust"] <- NA } } # RNI - relative noncentrality index (McDonald & Marsh, 1990) # same as CFI, but without the max(0,) if("rni" %in% fit.measures) { t1 <- X2 - df t2 <- X2.null - df.null if(t2 == 0) { RNI <- NA } else { RNI <- 1 - t1/t2 } indices["rni"] <- RNI } if("rni.scaled" %in% fit.measures) { t1 <- X2.scaled - df.scaled t2 <- X2.null.scaled - df.null.scaled t2 <- X2.null - df.null if(is.na(t1) || is.na(t2)) { RNI <- NA } else if(t2 == 0) { RNI <- NA } else { RNI <- 1 - t1/t2 } indices["rni.scaled"] <- RNI } if("rni.robust" %in% fit.measures) { if(TEST[[2]]$test %in% c("satorra.bentler", "yuan.bentler")) { # see Brosseau-Liard & Savalei MBR 2014, equation 15 # what to do if X2 = 0 and df = 0? in this case, # the scaling factor (ch) will be NA, and we get NA # (instead of 1) if(X2 < .Machine$double.eps && df == 0) { ch <- 0 } else { ch <- TEST[[2]]$scaling.factor } cb <- fit.indep@test[[2]]$scaling.factor t1 <- X2 - ch*df t2 <- X2.null - cb*df.null if(is.na(t1) || is.na(t2)) { RNI <- NA } else if(t2 == 0) { RNI <- NA } else { RNI <- 1 - t1/t2 } indices["rni.robust"] <- RNI } else { indices["rni.robust"] <- NA } } # TLI - Tucker-Lewis index (Tucker & Lewis, 1973) # same as # NNFI - nonnormed fit index (NNFI, Bentler & Bonett, 1980) if("tli" %in% fit.measures || "nnfi" %in% fit.measures) { # note: formula in lavaan <= 0.5-20: # t1 <- X2.null/df.null - X2/df # t2 <- X2.null/df.null - 1 # if(t1 < 0 && t2 < 0) { # TLI <- 1 #} else { # TLI <- t1/t2 #} # note: TLI original formula was in terms of fx/df, not X2/df # then, t1 <- fx_0/df.null - fx/df # t2 <- fx_0/df.null - 1/N (or N-1 for wishart) # note: in lavaan 0.5-21, we use the alternative formula: # TLI <- 1 - ((X2 - df)/(X2.null - df.null) * df.null/df) # - this one has the advantage that a 'robust' version # can be derived; this seems non-trivial for the original one # - unlike cfi, we do not use 'max(0, )' for t1 and t2 # therefore, t1 can go negative, and TLI can be > 1 t1 <- (X2 - df)*df.null t2 <- (X2.null - df.null)*df if(df > 0) { indices["tli"] <- indices["nnfi"] <- 1 - t1/t2 } else { indices["tli"] <- indices["nnfi"] <- 1 } } if("tli.scaled" %in% fit.measures || "nnfi.scaled" %in% fit.measures) { t1 <- (X2.scaled - df.scaled)*df.null.scaled t2 <- (X2.null.scaled - df.null.scaled)*df.scaled if(is.na(t1) || is.na(t2)) { indices["tli.scaled"] <- indices["nnfi.scaled"] <- NA } else if(df > 0 && t2 != 0) { indices["tli.scaled"] <- indices["nnfi.scaled"] <- 1 - t1/t2 } else { indices["tli.scaled"] <- indices["nnfi.scaled"] <- 1 } } if("tli.robust" %in% fit.measures || "nnfi.robust" %in% fit.measures) { if(TEST[[2]]$test %in% c("satorra.bentler", "yuan.bentler")) { # see Brosseau-Liard & Savalei MBR 2014, equation 16 # what to do if X2 = 0 and df = 0? in this case, # the scaling factor (ch) will be NA, and we get NA # (instead of 1) if(X2 < .Machine$double.eps && df == 0) { ch <- 0 } else { ch <- TEST[[2]]$scaling.factor } cb <- fit.indep@test[[2]]$scaling.factor t1 <- (X2 - ch*df)*df.null t2 <- (X2.null - cb*df.null)*df if(is.na(t1) || is.na(t2)) { indices["tli.robust"] <- indices["nnfi.robust"] <- NA } else if(df > 0 && t2 != 0) { indices["tli.robust"] <- indices["nnfi.robust"] <- 1 - t1/t2 } else { indices["tli.robust"] <- indices["nnfi.robust"] <- 1 } } else { indices["tli.robust"] <- indices["nnfi.robust"] <- NA } } # RFI - relative fit index (Bollen, 1986; Joreskog & Sorbom 1993) if("rfi" %in% fit.measures) { if(df > 0) { t1 <- X2.null/df.null - X2/df t2 <- X2.null/df.null if(t1 < 0 || t2 < 0) { RLI <- 1 } else { RLI <- t1/t2 } } else { RLI <- 1 } indices["rfi"] <- RLI } if("rfi.scaled" %in% fit.measures) { if(df > 0) { t1 <- X2.null.scaled/df.null.scaled - X2.scaled/df.scaled t2 <- X2.null.scaled/df.null.scaled if(is.na(t1) || is.na(t2)) { RLI <- NA } else if(t1 < 0 || t2 < 0) { RLI <- 1 } else { RLI <- t1/t2 } } else { RLI <- 1 } indices["rfi.scaled"] <- RLI } # NFI - normed fit index (Bentler & Bonett, 1980) if("nfi" %in% fit.measures) { if(df > 0) { t1 <- X2.null - X2 t2 <- X2.null NFI <- t1/t2 } else { NFI <- 1 } indices["nfi"] <- NFI } if("nfi.scaled" %in% fit.measures) { t1 <- X2.null.scaled - X2.scaled t2 <- X2.null.scaled NFI <- t1/t2 indices["nfi.scaled"] <- NFI } # PNFI - Parsimony normed fit index (James, Mulaik & Brett, 1982) if("pnfi" %in% fit.measures) { t1 <- X2.null - X2 t2 <- X2.null PNFI <- (df/df.null) * t1/t2 indices["pnfi"] <- PNFI } if("pnfi.scaled" %in% fit.measures) { t1 <- X2.null.scaled - X2.scaled t2 <- X2.null.scaled PNFI <- (df/df.null) * t1/t2 indices["pnfi.scaled"] <- PNFI } # IFI - incremental fit index (Bollen, 1989; Joreskog & Sorbom, 1993) if("ifi" %in% fit.measures) { t1 <- X2.null - X2 t2 <- X2.null - df if(t2 < 0) { IFI <- 1 } else { IFI <- t1/t2 } indices["ifi"] <- IFI } if("ifi.scaled" %in% fit.measures) { t1 <- X2.null.scaled - X2.scaled t2 <- X2.null.scaled if(is.na(t2)) { IFI <- NA } else if(t2 < 0) { IFI <- 1 } else { IFI <- t1/t2 } indices["ifi.scaled"] <- IFI } } } if("logl" %in% fit.measures || "unrestricted.logl" %in% fit.measures || "aic" %in% fit.measures || "bic" %in% fit.measures || "bic2" %in% fit.measures) { if(estimator == "ML" || estimator == "MML") { # logl H1 -- unrestricted (aka saturated) model logl.H1.group <- numeric(G) # check if everything is numeric, OR if we have exogenous # factor with 2 levels only logl.ok <- FALSE if(all(object@Data@ov$type == "numeric")) { logl.ok <- TRUE } else { not.idx <- which(object@Data@ov$type != "numeric") for(i in not.idx) { if(object@Data@ov$type[i] == "factor" && object@Data@ov$exo[i] == 1L && object@Data@ov$nlev[i] == 2L) { logl.ok <- TRUE } else { logl.ok <- FALSE break } } } if(logl.ok) { for(g in 1:G) { if(!object@SampleStats@missing.flag) { if(object@Model@conditional.x) { nvar <- ncol(object@SampleStats@res.cov[[g]]) Ng <- object@SampleStats@nobs[[g]] c <- Ng*nvar/2 * log(2 * pi) logl.H1.group[g] <- ( -c -(Ng/2) * object@SampleStats@res.cov.log.det[[g]] - (Ng/2)*nvar ) } else { nvar <- ncol(object@SampleStats@cov[[g]]) Ng <- object@SampleStats@nobs[[g]] c <- Ng*nvar/2 * log(2 * pi) logl.H1.group[g] <- ( -c -(Ng/2) * object@SampleStats@cov.log.det[[g]] - (Ng/2)*nvar ) } } else { # missing patterns case pat <- object@Data@Mp[[g]]$pat Ng <- object@Data@nobs[[g]] ni <- as.numeric(apply(pat, 1, sum) %*% object@Data@Mp[[g]]$freq) fx.full <- object@SampleStats@missing.h1[[g]]$h1 logl.H1.group[g] <- - (ni/2 * log(2 * pi)) - (Ng/2 * fx.full) } } if(G > 1) { logl.H1 <- sum(logl.H1.group) } else { logl.H1 <- logl.H1.group[1] } } else { logl.H1.group <- as.numeric(NA) logl.H1 <- as.numeric(NA) } if("unrestricted.logl" %in% fit.measures) { indices["unrestricted.logl"] <- logl.H1 } # logl H0 logl.H0.group <- numeric(G) if(logl.ok) { for(g in 1:G) { Ng <- object@SampleStats@nobs[[g]] logl.H0.group[g] <- -Ng * (fx.group[g] - logl.H1.group[g]/Ng) } if(G > 1) { logl.H0 <- sum(logl.H0.group) } else { logl.H0 <- logl.H0.group[1] } } else if(object@Options$estimator == "MML") { logl.H0 <- -1 * fx } else { logl.H0 <- as.numeric(NA) } if("logl" %in% fit.measures) { indices["logl"] <- logl.H0 } # AIC AIC <- -2*logl.H0 + 2*npar if("aic" %in% fit.measures) { indices["aic"] <- AIC } # BIC if("bic" %in% fit.measures || "bic2" %in% fit.measures) { BIC <- -2*logl.H0 + npar*log(N) indices["bic"] <- BIC # add sample-size adjusted bic N.star <- (N + 2) / 24 BIC2 <- -2*logl.H0 + npar*log(N.star) indices["bic2"] <- BIC2 } # scaling factor for MLR if(object@Options$test == "yuan.bentler") { indices["scaling.factor.h1"] <- TEST[[2]]$scaling.factor.h1 indices["scaling.factor.h0"] <- TEST[[2]]$scaling.factor.h0 } } # ML else { # no ML! if("logl" %in% fit.measures) { indices["logl"] <- as.numeric(NA) } if("unrestricted.logl" %in% fit.measures) { indices["unrestricted.logl"] <- as.numeric(NA) } if("aic" %in% fit.measures) { indices["aic"] <- as.numeric(NA) } if("bic" %in% fit.measures) { indices["bic"] <- as.numeric(NA) } if("bic2" %in% fit.measures) { indices["bic2"] <- as.numeric(NA) } } } N.RMSEA <- max(N, X2*4) # FIXME: good strategy?? if(any(c("rmsea","rmsea.scaled","rmsea.robust") %in% fit.measures)) { # RMSEA # - RMSEA.scaled replaces X2 by X2.scaled (which is not ok) # - RMSEA.robust uses the formula from Broseau-Liard, Savalei & Li # (2012) paper (see eq 8) if(is.na(X2) || is.na(df)) { RMSEA <- RMSEA.scaled <- RMSEA.robust <- as.numeric(NA) } else if(df > 0) { if(scaled) { d <- sum(TEST[[2]]$trace.UGamma) if(is.na(d) || d==0) d <- NA # scaling factor c.hat <- TEST[[2]]$scaling.factor } if(object@Options$mimic %in% c("Mplus", "lavaan")) { RMSEA <- sqrt( max( c((X2/N)/df - 1/N, 0) ) ) * sqrt(G) if(scaled && test != "scaled.shifted") { RMSEA.scaled <- sqrt( max( c((X2/N)/d - 1/N, 0) ) ) * sqrt(G) RMSEA.robust <- sqrt( max( c((X2/N)/df - c.hat/N, 0) ) ) * sqrt(G) } else if(test == "scaled.shifted") { RMSEA.scaled <- sqrt( max( c((X2.scaled/N)/df - 1/N, 0))) * sqrt(G) RMSEA.robust <- sqrt( max( c((X2/N)/df - c.hat/N, 0) ) ) * sqrt(G) } } else { # no multiple group correction RMSEA <- sqrt( max( c((X2/N)/df - 1/N, 0) ) ) if(scaled) { RMSEA.scaled <- sqrt( max( c((X2/N)/d - 1/N, 0) ) ) RMSEA.robust <- sqrt( max( c((X2/N)/df - c.hat/N, 0) ) ) } } } else { RMSEA <- RMSEA.scaled <- RMSEA.robust <- 0 } indices["rmsea"] <- RMSEA if(scaled) { indices["rmsea.scaled"] <- RMSEA.scaled if(TEST[[2]]$test %in% c("satorra.bentler", "yuan.bentler")) { indices["rmsea.robust"] <- RMSEA.robust } else { indices["rmsea.robust"] <- NA } } } if("rmsea.ci.lower" %in% fit.measures) { lower.lambda <- function(lambda) { (pchisq(X2, df=df, ncp=lambda) - 0.95) } if(is.na(X2) || is.na(df)) { indices["rmsea.ci.lower"] <- NA } else if(df < 1 || lower.lambda(0) < 0.0) { indices["rmsea.ci.lower"] <- 0 } else { lambda.l <- try(uniroot(f=lower.lambda, lower=0, upper=X2)$root, silent=TRUE) if(inherits(lambda.l, "try-error")) { lambda.l <- NA } if(object@Options$mimic %in% c("lavaan", "Mplus")) { GG <- 0 indices["rmsea.ci.lower"] <- sqrt( lambda.l/((N-GG)*df) ) * sqrt(G) } else { indices["rmsea.ci.lower"] <- sqrt( lambda.l/(N*df) ) } } } if("rmsea.ci.lower.scaled" %in% fit.measures || "rmsea.ci.lower.robust" %in% fit.measures) { if(test == "scaled.shifted") { XX2 <- X2.scaled df2 <- df } else { XX2 <- X2 df2 <- sum(TEST[[2]]$trace.UGamma) } lower.lambda <- function(lambda) { (pchisq(XX2, df=df2, ncp=lambda) - 0.95) } if(is.na(XX2) || is.na(df2)) { indices["rmsea.ci.lower.scaled"] <- indices["rmsea.ci.lower.robust"] <- NA } else if(df < 1 || df2 < 1 || lower.lambda(0) < 0.0) { indices["rmsea.ci.lower.scaled"] <- indices["rmsea.ci.lower.robust"] <- 0 } else { # 'scaled' lambda.l <- try(uniroot(f=lower.lambda, lower=0, upper=XX2)$root, silent=TRUE) if(inherits(lambda.l, "try-error")) { lambda.l <- NA } if(object@Options$mimic %in% c("lavaan", "Mplus")) { indices["rmsea.ci.lower.scaled"] <- sqrt( lambda.l/(N*df2) ) * sqrt(G) } else { # no multiple group correction indices["rmsea.ci.lower.scaled"] <- sqrt( lambda.l/(N*df2) ) } if(TEST[[2]]$test %in% c("satorra.bentler", "yuan.bentler")) { # robust XX2 <- X2.scaled df2 <- df # scaling factor c.hat <- TEST[[2]]$scaling.factor lambda.l <- try(uniroot(f=lower.lambda, lower=0, upper=XX2)$root, silent=TRUE) if(inherits(lambda.l, "try-error")) { lambda.l <- NA } if(object@Options$mimic %in% c("lavaan", "Mplus")) { indices["rmsea.ci.lower.robust"] <- sqrt( (c.hat*lambda.l)/(N*df2) ) * sqrt(G) } else { # no multiple group correction indices["rmsea.ci.lower.robust"] <- sqrt( (c.hat*lambda.l)/(N*df2) ) } } else { indices["rmsea.ci.lower.robust"] <- NA } } } if("rmsea.ci.upper" %in% fit.measures) { upper.lambda <- function(lambda) { (pchisq(X2, df=df, ncp=lambda) - 0.05) } if(is.na(X2) || is.na(df)) { indices["rmsea.ci.upper"] <- NA } else if(df < 1 || upper.lambda(N.RMSEA) > 0 || upper.lambda(0) < 0) { indices["rmsea.ci.upper"] <- 0 } else { lambda.u <- try(uniroot(f=upper.lambda, lower=0,upper=N.RMSEA)$root, silent=TRUE) if(inherits(lambda.u, "try-error")) { lambda.u <- NA } if(object@Options$mimic %in% c("lavaan", "Mplus")) { GG <- 0 indices["rmsea.ci.upper"] <- sqrt( lambda.u/((N-GG)*df) ) * sqrt(G) } else { indices["rmsea.ci.upper"] <- sqrt( lambda.u/(N*df) ) } } } if("rmsea.ci.upper.scaled" %in% fit.measures || "rmsea.ci.upper.robust" %in% fit.measures) { if(test == "scaled.shifted") { XX2 <- X2.scaled df2 <- df } else { XX2 <- X2 df2 <- sum(TEST[[2]]$trace.UGamma) } upper.lambda <- function(lambda) { (pchisq(XX2, df=df2, ncp=lambda) - 0.05) } if(is.na(XX2) || is.na(df2)) { indices["rmsea.ci.upper.scaled"] <- indices["rmsea.ci.upper.robust"] <- NA } else if(df < 1 || df2 < 1 || upper.lambda(N.RMSEA) > 0 || upper.lambda(0) < 0) { indices["rmsea.ci.upper.scaled"] <- indices["rmsea.ci.upper.robust"] <- 0 } else { # 'scaled' lambda.u <- try(uniroot(f=upper.lambda, lower=0,upper=N.RMSEA)$root, silent=TRUE) if(inherits(lambda.u, "try-error")) { lambda.u <- NA } if(object@Options$mimic %in% c("lavaan", "Mplus")) { indices["rmsea.ci.upper.scaled"] <- sqrt( lambda.u/(N*df2) ) * sqrt(G) } else { # no multiple group correction indices["rmsea.ci.upper.scaled"] <- sqrt( lambda.u/(N*df2) ) } if(TEST[[2]]$test %in% c("satorra.bentler", "yuan.bentler")) { # robust XX2 <- X2.scaled df2 <- df # scaling factor c.hat <- TEST[[2]]$scaling.factor lambda.u <- try(uniroot(f=upper.lambda, lower=0,upper=N.RMSEA)$root, silent=TRUE) if(inherits(lambda.u, "try-error")) { lambda.u <- NA } if(object@Options$mimic %in% c("lavaan", "Mplus")) { indices["rmsea.ci.upper.robust"] <- sqrt( (c.hat*lambda.u)/(N*df2) ) * sqrt(G) } else { # no multiple group correction indices["rmsea.ci.upper.robust"] <- sqrt( (c.hat*lambda.u)/(N*df2) ) } } else { indices["rmsea.ci.upper.robust"] <- NA } } } if("rmsea.pvalue" %in% fit.measures) { if(is.na(X2) || is.na(df)) { indices["rmsea.pvalue"] <- as.numeric(NA) } else if(df > 0) { if(object@Options$mimic %in% c("lavaan","Mplus")) { ncp <- N*df*0.05^2/G indices["rmsea.pvalue"] <- 1 - pchisq(X2, df=df, ncp=ncp) } else { indices["rmsea.pvalue"] <- 1 - pchisq(X2, df=df, ncp=(N*df*0.05^2)) } } else { indices["rmsea.pvalue"] <- NA # used to be 1 in < 0.5-21 } } if("rmsea.pvalue.scaled" %in% fit.measures || "rmsea.pvalue.robust" %in% fit.measures) { if(test == "scaled.shifted") { XX2 <- X2.scaled df2 <- df } else { XX2 <- X2 df2 <- sum(TEST[[2]]$trace.UGamma) } if(is.na(XX2) || is.na(df2)) { indices["rmsea.pvalue.scaled"] <- indices["rmsea.pvalue.robust"] <- as.numeric(NA) } else if(df > 0) { # scaled if(object@Options$mimic %in% c("lavaan", "Mplus")) { ncp <- N*df2*0.05^2/G indices["rmsea.pvalue.scaled"] <- 1 - pchisq(XX2, df=df2, ncp=ncp) } else { indices["rmsea.pvalue.scaled"] <- 1 - pchisq(XX2, df=df2, ncp=(N*df2*0.05^2)) } if(TEST[[2]]$test %in% c("satorra.bentler", "yuan.bentler")) { # robust XX2 <- X2.scaled df2 <- df # scaling factor c.hat <- TEST[[2]]$scaling.factor #if(object@Options$mimic %in% c("lavaan", "Mplus")) { # ncp <- N*(df2/c.hat)*0.05^2/G # indices["rmsea.pvalue.robust"] <- # 1 - pchisq(XX2, df=df2, ncp=ncp) #} else { # indices["rmsea.pvalue.robust"] <- # 1 - pchisq(XX2, df=df2, ncp=(N*(df2/c.hat)*0.05^2)) #} indices["rmsea.pvalue.robust"] <- NA } else { indices["rmsea.pvalue.robust"] <- NA } } else { indices["rmsea.pvalue.scaled"] <- indices["rmsea.pvalue.robust"] <- NA # used to be 1 in < 0.5-21 } } if(any(c("rmr","srmr") %in% fit.measures)) { # RMR and SRMR rmr.group <- numeric(G) rmr_nomean.group <- numeric(G) srmr_bentler.group <- numeric(G) srmr_bentler_nomean.group <- numeric(G) srmr_bollen.group <- numeric(G) srmr_bollen_nomean.group <- numeric(G) srmr_mplus.group <- numeric(G) srmr_mplus_nomean.group <- numeric(G) for(g in 1:G) { # observed if(!object@SampleStats@missing.flag) { if(object@Model@conditional.x) { S <- object@SampleStats@res.cov[[g]] M <- object@SampleStats@res.int[[g]] } else { S <- object@SampleStats@cov[[g]] M <- object@SampleStats@mean[[g]] } } else { # EM estimates S <- object@SampleStats@missing.h1[[g]]$sigma M <- object@SampleStats@missing.h1[[g]]$mu } nvar <- ncol(S) # estimated implied <- object@implied lavmodel <- object@Model Sigma.hat <- if(lavmodel@conditional.x) implied$res.cov[[g]] else implied$cov[[g]] Mu.hat <- if(lavmodel@conditional.x) implied$res.int[[g]] else implied$mean[[g]] # unstandardized residuals RR <- (S - Sigma.hat) # standardized residual covariance matrix # this is the Hu and Bentler definition, not the Bollen one! # this one is used by EQS # and Mplus, but only if information=expected (god knows why) sqrt.d <- 1/sqrt(diag(S)) D <- diag(sqrt.d, ncol=length(sqrt.d)) R <- D %*% (S - Sigma.hat) %*% D # Bollen approach: simply using cov2cor ('residual correlations') S.cor <- cov2cor(S) Sigma.cor <- cov2cor(Sigma.hat) R.cor <- (S.cor - Sigma.cor) if(meanstructure) { # standardized residual mean vector R.mean <- D %*% (M - Mu.hat) # EQS approach! RR.mean <- (M - Mu.hat) # not standardized R.cor.mean <- M/sqrt(diag(S)) - Mu.hat/sqrt(diag(Sigma.hat)) e <- nvar*(nvar+1)/2 + nvar srmr_bentler.group[g] <- sqrt( (sum(R[lower.tri(R, diag=TRUE)]^2) + sum(R.mean^2))/ e ) rmr.group[g] <- sqrt( (sum(RR[lower.tri(RR, diag=TRUE)]^2) + sum(RR.mean^2))/ e ) srmr_bollen.group[g] <- sqrt( (sum(R.cor[lower.tri(R.cor, diag=TRUE)]^2) + sum(R.cor.mean^2)) / e ) # see http://www.statmodel.com/download/SRMR.pdf srmr_mplus.group[g] <- sqrt( (sum(R.cor[lower.tri(R.cor, diag=FALSE)]^2) + sum(R.cor.mean^2) + sum(((diag(S) - diag(Sigma.hat))/diag(S))^2)) / e ) e <- nvar*(nvar+1)/2 srmr_bentler_nomean.group[g] <- sqrt( sum( R[lower.tri( R, diag=TRUE)]^2) / e ) rmr_nomean.group[g] <- sqrt( sum(RR[lower.tri(RR, diag=TRUE)]^2) / e ) srmr_bollen_nomean.group[g] <- sqrt( sum(R.cor[lower.tri(R.cor, diag=TRUE)]^2) / e ) srmr_mplus_nomean.group[g] <- sqrt( (sum(R.cor[lower.tri(R.cor, diag=FALSE)]^2) + sum(((diag(S) - diag(Sigma.hat))/diag(S))^2)) / e ) } else { e <- nvar*(nvar+1)/2 srmr_bentler_nomean.group[g] <- srmr_bentler.group[g] <- sqrt( sum(R[lower.tri(R, diag=TRUE)]^2) / e ) rmr_nomean.group[g] <- rmr.group[g] <- sqrt( sum(RR[lower.tri(RR, diag=TRUE)]^2) / e ) srmr_bollen_nomean.group[g] <- srmr_bollen.group[g] <- sqrt( sum(R.cor[lower.tri(R.cor, diag=TRUE)]^2) / e ) srmr_mplus_nomean.group[g] <- srmr_mplus.group[g] <- sqrt( (sum(R.cor[lower.tri(R.cor, diag=FALSE)]^2) + sum(((diag(S) - diag(Sigma.hat))/diag(S))^2)) / e ) } } if(G > 1) { ## FIXME: get the scaling right SRMR_BENTLER <- as.numeric( (unlist(object@SampleStats@nobs) %*% srmr_bentler.group) / object@SampleStats@ntotal ) SRMR_BENTLER_NOMEAN <- as.numeric( (unlist(object@SampleStats@nobs) %*% srmr_bentler_nomean.group) / object@SampleStats@ntotal ) SRMR_BOLLEN <- as.numeric( (unlist(object@SampleStats@nobs) %*% srmr_bollen.group) / object@SampleStats@ntotal ) SRMR_BOLLEN_NOMEAN <- as.numeric( (unlist(object@SampleStats@nobs) %*% srmr_bollen_nomean.group) / object@SampleStats@ntotal ) SRMR_MPLUS <- as.numeric( (unlist(object@SampleStats@nobs) %*% srmr_mplus.group) / object@SampleStats@ntotal ) SRMR_MPLUS_NOMEAN <- as.numeric( (unlist(object@SampleStats@nobs) %*% srmr_mplus_nomean.group) / object@SampleStats@ntotal ) RMR <- as.numeric( (unlist(object@SampleStats@nobs) %*% rmr.group) / object@SampleStats@ntotal ) RMR_NOMEAN <- as.numeric( (unlist(object@SampleStats@nobs) %*% rmr_nomean.group) / object@SampleStats@ntotal ) } else { SRMR_BENTLER <- srmr_bentler.group[1] SRMR_BENTLER_NOMEAN <- srmr_bentler_nomean.group[1] SRMR_BOLLEN <- srmr_bollen.group[1] SRMR_BOLLEN_NOMEAN <- srmr_bollen_nomean.group[1] SRMR_MPLUS <- srmr_mplus.group[1] SRMR_MPLUS_NOMEAN <- srmr_mplus_nomean.group[1] RMR <- rmr.group[1] RMR_NOMEAN <- rmr_nomean.group[1] } # the default! if(object@Options$mimic == "lavaan") { if(categorical) { indices["srmr"] <- SRMR_BENTLER_NOMEAN } else { indices["srmr"] <- SRMR_BENTLER } } else if(object@Options$mimic == "EQS") { indices["srmr"] <- SRMR_BENTLER } else if(object@Options$mimic == "Mplus") { if(object@Options$information == "expected") { indices["srmr"] <- SRMR_BENTLER } else { indices["srmr"] <- SRMR_MPLUS } } # the others indices["srmr_bentler"] <- SRMR_BENTLER indices["srmr_bentler_nomean"] <- SRMR_BENTLER_NOMEAN indices["srmr_bollen"] <- SRMR_BOLLEN indices["srmr_bollen_nomean"] <- SRMR_BOLLEN_NOMEAN indices["srmr_mplus"] <- SRMR_MPLUS indices["srmr_mplus_nomean"] <- SRMR_MPLUS_NOMEAN if(categorical) { indices["rmr"] <- RMR } else { indices["rmr"] <- RMR_NOMEAN } indices["rmr_nomean"] <- RMR_NOMEAN } if(any(c("cn_05", "cn_01") %in% fit.measures)) { # catch df=0, X2=0 if(df == 0 && X2 == 0) { CN_05 <- as.numeric(NA) CN_01 <- as.numeric(NA) } else { CN_05 <- qchisq(p=0.95, df=df)/(X2/N) + 1 CN_01 <- qchisq(p=0.99, df=df)/(X2/N) + 1 } indices["cn_05"] <- CN_05 indices["cn_01"] <- CN_01 } if("wrmr" %in% fit.measures) { # we use the definition: wrmr = sqrt ( 2*N*F / e ) e <- length(object@SampleStats@WLS.obs[[1]]) ### only first group??? WRMR <- sqrt( X2 / e ) indices["wrmr"] <- WRMR } if(any(c("gfi","agfi","pgfi") %in% fit.measures)) { gfi.group <- numeric(G) WLS.obs <- object@SampleStats@WLS.obs WLS.V <- lav_model_wls_v(lavmodel = object@Model, lavsamplestats = object@SampleStats, structured = TRUE, lavdata = object@Data) WLS.est <- lav_object_inspect_wls_est(object) for(g in 1:G) { wls.obs <- WLS.obs[[g]] wls.est <- WLS.est[[g]] wls.v <- WLS.V[[g]] if(is.null(wls.v)) { gfi.group[g] <- as.numeric(NA) } else { wls.diff <- wls.obs - wls.est if(is.matrix(wls.v)) { # full weight matrix t1 <- crossprod(wls.diff, wls.v) %*% wls.diff t2 <- crossprod(wls.obs, wls.v) %*% wls.obs } else { # diagonal weight matrix t1 <- as.numeric(crossprod(wls.diff^2, wls.v)) t2 <- as.numeric(crossprod(wls.obs^2, wls.v)) } gfi.group[g] <- 1 - t1/t2 } } if(G > 1) { ## FIXME: get the scaling right GFI <- as.numeric( (unlist(object@SampleStats@nobs) %*% gfi.group) / object@SampleStats@ntotal ) } else { GFI <- gfi.group[1L] } indices["gfi"] <- GFI nel <- length(unlist(WLS.obs)) # total number of modeled sample stats if(is.na(df)) { indices["agfi"] <- as.numeric(NA) } else if(df > 0) { indices["agfi"] <- 1 - (nel/df) * (1 - GFI) } else { indices["agfi"] <- 1 } # LISREL formula (Simplis book 2002, p. 126) indices["pgfi"] <- (df/nel)*GFI } # MFI - McDonald Fit Index (McDonald, 1989) if("mfi" %in% fit.measures) { #MFI <- exp(-0.5 * (X2 - df)/(N-1)) # Hu & Bentler 1998 Table 1 MFI <- exp(-0.5 * (X2 - df)/N) indices["mfi"] <- MFI } # ECVI - cross-validation index (Brown & Cudeck, 1989) # not defined for multiple groups and/or models with meanstructures if("ecvi" %in% fit.measures) { if(G > 1 || meanstructure) { ECVI <- as.numeric(NA) } else { ECVI <- X2/N + (2*npar)/N } indices["ecvi"] <- ECVI } # C_p if("c_p" %in% fit.measures) { out <- lavTablesFitCp(object) CpMax <- attr(out, "CpMax") indices["c_p"] <- CpMax$LR indices["c_p.df"] <- CpMax$df indices["c_p.p.value"] <- CpMax$p.value.Bonferroni } # C_F if("c_f" %in% fit.measures) { CF <- lavTablesFitCf(object) DF <- attr(CF, "DF") attributes(CF) <- NULL indices["c_f"] <- CF indices["c_f.df"] <- DF indices["c_f.p.value"] <- pchisq(CF, DF, lower.tail=FALSE) indices["rpat.observed"] <- object@Data@Rp[[1L]]$npatterns indices["rpat.total"] <- object@Data@Rp[[1L]]$total.patterns indices["rpat.empty"] <- object@Data@Rp[[1L]]$empty.patterns } # C_M if("c_m" %in% fit.measures) { CM <- lavTablesFitCm(object) DF <- attr(CM, "DF") attributes(CM) <- NULL indices["c_m"] <- CM indices["c_m.df"] <- DF indices["c_m.p.value"] <- pchisq(CM, DF, lower.tail=FALSE) } if("ntotal" %in% fit.measures) { indices["ntotal"] <- object@SampleStats@ntotal } # do we have everything that we requested? #idx.missing <- which(is.na(match(fit.measures, names(indices)))) #if(length(idx.missing) > 0L) { # cat("lavaan WARNING: some requested fit measure(s) are not available for this model:\n") # print( fit.measures[ idx.missing ] ) # cat("\n") #} out <- unlist(indices[fit.measures]) if(length(out) > 0L) { class(out) <- c("lavaan.vector", "numeric") } else { return( invisible(numeric(0)) ) } out } # print a nice summary of the fit measures print.fit.measures <- function(x) { names.x <- names(x) # scaled? scaled <- "chisq.scaled" %in% names.x # table fit measures if("C_F" %in% names.x) { cat("\nFull response patterns fit statistics:\n\n") t0.txt <- sprintf(" %-40s", "Observed response patterns (1st group):") t1.txt <- sprintf(" %10i", x["rpat.observed"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Total response patterns (1st group):") t1.txt <- sprintf(" %10i", x["rpat.total"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Empty response patterns (1st group):") t1.txt <- sprintf(" %10i", x["rpat.empty"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") cat("\n") t0.txt <- sprintf(" %-40s", "C_F Test Statistic") t1.txt <- sprintf(" %10.3f", x["C_F"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Degrees of freedom") t1.txt <- sprintf(" %10i", x["C_F.df"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "P-value") t1.txt <- sprintf(" %10.3f", x["C_F.p.value"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") cat("\n") t0.txt <- sprintf(" %-40s", "C_M Test Statistic") t1.txt <- sprintf(" %10.3f", x["C_M"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Degrees of freedom") t1.txt <- sprintf(" %10i", x["C_M.df"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "P-value") t1.txt <- sprintf(" %10.3f", x["C_M.p.value"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("C_p" %in% names.x) { cat("\nPairwise tables summary statistic:\n\n") t0.txt <- sprintf(" %-40s", "C_P Test Statistic") t1.txt <- sprintf(" %10.3f", x["C_p"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Degrees of freedom") t1.txt <- sprintf(" %10i", x["C_p.df"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Bonferroni corrected P-value") t1.txt <- sprintf(" %10.3f", x["C_p.p.value"]) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } # independence model if("baseline.chisq" %in% names.x) { cat("\nModel test baseline model:\n\n") t0.txt <- sprintf(" %-40s", "Minimum Function Test Statistic") t1.txt <- sprintf(" %10.3f", x["baseline.chisq"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["baseline.chisq.scaled"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Degrees of freedom") t1.txt <- sprintf(" %10i", x["baseline.df"]) t2.txt <- ifelse(scaled, ifelse(round(x["baseline.df.scaled"]) == x["baseline.df.scaled"], sprintf(" %10i", x["baseline.df.scaled"]), sprintf(" %10.3f", x["baseline.df.scaled"])), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "P-value") t1.txt <- sprintf(" %10.3f", x["baseline.pvalue"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["baseline.pvalue.scaled"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } # cfi/tli if(any(c("cfi","tli","nnfi","rfi","nfi","ifi","rni","pnfi") %in% names.x)) { cat("\nUser model versus baseline model:\n\n") if("cfi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Comparative Fit Index (CFI)") t1.txt <- sprintf(" %10.3f", x["cfi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["cfi.scaled"]), "") #t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["cfi.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("tli" %in% names.x) { t0.txt <- sprintf(" %-40s", "Tucker-Lewis Index (TLI)") t1.txt <- sprintf(" %10.3f", x["tli"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["tli.scaled"]), "") #t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["tli.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("cfi.robust" %in% names.x) { t0.txt <- sprintf("\n %-40s", "Robust Comparative Fit Index (CFI)") t1.txt <- sprintf(" %10s", "") t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["cfi.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("tli.robust" %in% names.x) { t0.txt <- sprintf(" %-40s", "Robust Tucker-Lewis Index (TLI)") t1.txt <- sprintf(" %10s", "") t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["tli.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("nnfi" %in% names.x) { t0.txt <- sprintf(" %-42s", "Bentler-Bonett Non-normed Fit Index (NNFI)") t1.txt <- sprintf(" %8.3f", x["nnfi"]) #t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["nnfi.scaled"]), "") t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["nnfi.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("nfi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Bentler-Bonett Normed Fit Index (NFI)") t1.txt <- sprintf(" %10.3f", x["nfi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["nfi.scaled"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("nfi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Parsimony Normed Fit Index (PNFI)") t1.txt <- sprintf(" %10.3f", x["pnfi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["pnfi.scaled"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("rfi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Bollen's Relative Fit Index (RFI)") t1.txt <- sprintf(" %10.3f", x["rfi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["rfi.scaled"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("ifi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Bollen's Incremental Fit Index (IFI)") t1.txt <- sprintf(" %10.3f", x["ifi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["ifi.scaled"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("rni" %in% names.x) { t0.txt <- sprintf(" %-40s", "Relative Noncentrality Index (RNI)") t1.txt <- sprintf(" %10.3f", x["rni"]) #t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["rni.scaled"]), "") t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["rni.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } } # likelihood if("logl" %in% names.x) { cat("\nLoglikelihood and Information Criteria:\n\n") t0.txt <- sprintf(" %-40s", "Loglikelihood user model (H0)") t1.txt <- sprintf(" %10.3f", x["logl"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["logl"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") #cat(t0.txt, t1.txt, "\n", sep="") if(!is.na(x["scaling.factor.h0"])) { t0.txt <- sprintf(" %-40s", "Scaling correction factor") t1.txt <- sprintf(" %10s", "") t2.txt <- sprintf(" %10.3f", x["scaling.factor.h0"]) cat(t0.txt, t1.txt, t2.txt, "\n", sep="") cat(" for the MLR correction\n") } if("unrestricted.logl" %in% names.x) { t0.txt <- sprintf(" %-40s", "Loglikelihood unrestricted model (H1)") t1.txt <- sprintf(" %10.3f", x["unrestricted.logl"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["unrestricted.logl"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") #cat(t0.txt, t1.txt, "\n", sep="") if(!is.na(x["scaling.factor.h1"])) { t0.txt <- sprintf(" %-40s", "Scaling correction factor") t1.txt <- sprintf(" %10s", "") t2.txt <- sprintf(" %10.3f", x["scaling.factor.h1"]) cat(t0.txt, t1.txt, t2.txt, "\n", sep="") cat(" for the MLR correction\n") } } cat("\n") t0.txt <- sprintf(" %-40s", "Number of free parameters") t1.txt <- sprintf(" %10i", x["npar"]) t2.txt <- ifelse(scaled, sprintf(" %10i", x["npar"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Akaike (AIC)") t1.txt <- sprintf(" %10.3f", x["aic"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["aic"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") #cat(t0.txt, t1.txt, "\n", sep="") t0.txt <- sprintf(" %-40s", "Bayesian (BIC)") t1.txt <- sprintf(" %10.3f", x["bic"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["bic"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") #cat(t0.txt, t1.txt, "\n", sep="") if(!is.na(x["bic2"])) { t0.txt <- sprintf(" %-40s", "Sample-size adjusted Bayesian (BIC)") t1.txt <- sprintf(" %10.3f", x["bic2"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["bic2"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } } # RMSEA if("rmsea" %in% names.x) { cat("\nRoot Mean Square Error of Approximation:\n\n") t0.txt <- sprintf(" %-40s", "RMSEA") t1.txt <- sprintf(" %10.3f", x["rmsea"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["rmsea.scaled"]), "") #sprintf(" %10.3f", x["rmsea.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") if("rmsea.ci.lower" %in% names.x) { t0.txt <- sprintf(" %-38s", "90 Percent Confidence Interval") t1.txt <- sprintf(" %5.3f", x["rmsea.ci.lower"]) t2.txt <- sprintf(" %5.3f", x["rmsea.ci.upper"]) t3.txt <- ifelse(scaled, sprintf(" %5.3f %5.3f", x["rmsea.ci.lower.scaled"], x["rmsea.ci.upper.scaled"]), "") #sprintf(" %5.3f %5.3f", x["rmsea.ci.lower.robust"], # x["rmsea.ci.upper.robust"]), "") cat(t0.txt, t1.txt, t2.txt, t3.txt, "\n", sep="") } if("rmsea.pvalue" %in% names.x) { t0.txt <- sprintf(" %-40s", "P-value RMSEA <= 0.05") t1.txt <- sprintf(" %10.3f", x["rmsea.pvalue"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["rmsea.pvalue.scaled"]), "") #sprintf(" %10.3f", x["rmsea.pvalue.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } # robust if("rmsea.robust" %in% names.x) { t0.txt <- sprintf("\n %-40s", "Robust RMSEA") t1.txt <- sprintf(" %10s", "") t2.txt <- ifelse(scaled, #sprintf(" %10.3f", x["rmsea.scaled"]), "") sprintf(" %10.3f", x["rmsea.robust"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("rmsea.ci.lower.robust" %in% names.x) { t0.txt <- sprintf(" %-38s", "90 Percent Confidence Interval") t1.txt <- sprintf(" %5.3s", "") t2.txt <- sprintf(" %5.3s", "") t3.txt <- ifelse(scaled, #sprintf(" %5.3f %5.3f", x["rmsea.ci.lower.scaled"], # x["rmsea.ci.upper.scaled"]), "") sprintf(" %5.3f %5.3f", x["rmsea.ci.lower.robust"], x["rmsea.ci.upper.robust"]), "") cat(t0.txt, t1.txt, t2.txt, t3.txt, "\n", sep="") } #if("rmsea.pvalue.robust" %in% names.x) { # t0.txt <- sprintf(" %-40s", "P-value RMSEA <= 0.05") # t1.txt <- sprintf(" %10.3f", x["rmsea.pvalue"]) # t2.txt <- ifelse(scaled, # #sprintf(" %10.3f", x["rmsea.pvalue.scaled"]), "") # sprintf(" %10.3f", x["rmsea.pvalue.robust"]), "") # cat(t0.txt, t1.txt, t2.txt, "\n", sep="") #} } # SRMR if(any(c("rmr","srmr") %in% names.x)) { cat("\nStandardized Root Mean Square Residual:\n\n") if("rmr" %in% names.x) { t0.txt <- sprintf(" %-40s", "RMR") t1.txt <- sprintf(" %10.3f", x["rmr"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["rmr"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("rmr_nomean" %in% names.x) { t0.txt <- sprintf(" %-40s", "RMR (No Mean)") t1.txt <- sprintf(" %10.3f", x["rmr_nomean"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["rmr_nomean"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("srmr" %in% names.x) { t0.txt <- sprintf(" %-40s", "SRMR") t1.txt <- sprintf(" %10.3f", x["srmr"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["srmr"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("srmr_nomean" %in% names.x) { t0.txt <- sprintf(" %-40s", "SRMR (No Mean)") t1.txt <- sprintf(" %10.3f", x["srmr_nomean"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["srmr_nomean"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } } # WRMR if("wrmr" %in% names.x) { cat("\nWeighted Root Mean Square Residual:\n\n") if("wrmr" %in% names.x) { t0.txt <- sprintf(" %-40s", "WRMR") t1.txt <- sprintf(" %10.3f", x["wrmr"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["wrmr"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } } # Other if(any(c("cn_05","cn_01","gfi","agfi","pgfi","mfi") %in% names.x)) { cat("\nOther Fit Indices:\n\n") if("cn_05" %in% names.x) { t0.txt <- sprintf(" %-40s", "Hoelter Critical N (CN) alpha=0.05") t1.txt <- sprintf(" %10.3f", x["cn_05"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["cn_05"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("cn_01" %in% names.x) { t0.txt <- sprintf(" %-40s", "Hoelter Critical N (CN) alpha=0.01") t1.txt <- sprintf(" %10.3f", x["cn_01"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["cn_01"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if(any(c("cn_05", "cn_01") %in% names.x)) { cat("\n") } if("gfi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Goodness of Fit Index (GFI)") t1.txt <- sprintf(" %10.3f", x["gfi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["gfi"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("agfi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Adjusted Goodness of Fit Index (AGFI)") t1.txt <- sprintf(" %10.3f", x["agfi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["agfi"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("pgfi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Parsimony Goodness of Fit Index (PGFI)") t1.txt <- sprintf(" %10.3f", x["pgfi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["pgfi"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if(any(c("gfi","agfi","pgfi") %in% names.x)) { cat("\n") } if("mfi" %in% names.x) { t0.txt <- sprintf(" %-40s", "McDonald Fit Index (MFI)") t1.txt <- sprintf(" %10.3f", x["mfi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["mfi"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } if("mfi" %in% names.x) { cat("\n") } if("ecvi" %in% names.x) { t0.txt <- sprintf(" %-40s", "Expected Cross-Validation Index (ECVI)") t1.txt <- sprintf(" %10.3f", x["ecvi"]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", x["ecvi"]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } } #cat("\n") } lavaan/R/lav_partable_constraints.R0000644000176200001440000003732613031513031017124 0ustar liggesusers# build def function from partable lav_partable_constraints_def <- function(partable, con = NULL, debug = FALSE, txtOnly = FALSE) { # empty function def.function <- function() NULL # if 'con', merge partable + con if(!is.null(con)) { partable$lhs <- c(partable$lhs, con$lhs) partable$op <- c(partable$op, con$op ) partable$rhs <- c(partable$rhs, con$rhs) } # get := definitions def.idx <- which(partable$op == ":=") # catch empty def if(length(def.idx) == 0L) { if(txtOnly) { return(character(0L)) } else { return(def.function) } } # create function formals(def.function) <- alist(.x.=, ...=) if(txtOnly) { BODY.txt <- "" } else { BODY.txt <- paste("{\n# parameter definitions\n\n") } lhs.names <- partable$lhs[def.idx] def.labels <- all.vars( parse(file="", text=partable$rhs[def.idx]) ) # remove the ones in lhs.names idx <- which(def.labels %in% lhs.names) if(length(idx) > 0L) def.labels <- def.labels[-idx] # get corresponding 'x' indices def.x.idx <- partable$free[match(def.labels, partable$label)] if(any(is.na(def.x.idx))) { stop("lavaan ERROR: unknown label(s) in variable definition(s): ", paste(def.labels[which(is.na(def.x.idx))], collapse=" ")) } if(any(def.x.idx == 0)) { stop("lavaan ERROR: non-free parameter(s) in variable definition(s): ", paste(def.labels[which(def.x.idx == 0)], collapse=" ")) } def.x.lab <- paste(".x.[", def.x.idx, "]",sep="") # put both the labels the function BODY if(length(def.x.idx) > 0L) { BODY.txt <- paste(BODY.txt, "# parameter labels\n", paste(def.labels, " <- ",def.x.lab, collapse="\n"), "\n", sep="") } # write the definitions literally BODY.txt <- paste(BODY.txt, "\n# parameter definitions\n", sep="") for(i in 1:length(def.idx)) { BODY.txt <- paste(BODY.txt, lhs.names[i], " <- ", partable$rhs[def.idx[i]], "\n", sep="") } if(txtOnly) return(BODY.txt) # put the results in 'out' BODY.txt <- paste(BODY.txt, "\nout <- ", paste("c(", paste(lhs.names, collapse=","),")\n", sep=""), sep="") # what to do with NA values? -> return +Inf??? BODY.txt <- paste(BODY.txt, "out[is.na(out)] <- Inf\n", sep="") BODY.txt <- paste(BODY.txt, "names(out) <- ", paste("c(\"", paste(lhs.names, collapse="\",\""), "\")\n", sep=""), sep="") BODY.txt <- paste(BODY.txt, "return(out)\n}\n", sep="") body(def.function) <- parse(file="", text=BODY.txt) if(debug) { cat("def.function = \n"); print(def.function); cat("\n") } def.function } # build ceq function from partable # non-trivial equality constraints (linear or nonlinear) # convert to 'ceq(x)' function where 'x' is the (free) parameter vector # and ceq(x) returns the evaluated equality constraints # # eg. if b1 + b2 == 2 (and b1 correspond to, say, x[10] and x[17]) # ceq <- function(x) { # out <- rep(NA, 1) # b1 = x[10]; b2 = x[17] # out[1] <- b1 + b2 - 2 # } lav_partable_constraints_ceq <- function(partable, con = NULL, debug = FALSE, txtOnly = FALSE) { # empty function ceq.function <- function() NULL # if 'con', merge partable + con if(!is.null(con)) { partable$lhs <- c(partable$lhs, con$lhs) partable$op <- c(partable$op, con$op ) partable$rhs <- c(partable$rhs, con$rhs) } # get equality constraints eq.idx <- which(partable$op == "==") # catch empty ceq if(length(eq.idx) == 0L) { if(txtOnly) { return(character(0L)) } else { return(ceq.function) } } # create function formals(ceq.function) <- alist(.x.=, ...=) if(txtOnly) { BODY.txt <- "" } else { BODY.txt <- paste("{\nout <- rep(NA, ", length(eq.idx), ")\n", sep="") } # first come the variable definitions DEF.txt <- lav_partable_constraints_def(partable, txtOnly=TRUE) def.idx <- which(partable$op == ":=") BODY.txt <- paste(BODY.txt, DEF.txt, "\n", sep="") # extract labels lhs.labels <- all.vars( parse(file="", text=partable$lhs[eq.idx]) ) rhs.labels <- all.vars( parse(file="", text=partable$rhs[eq.idx]) ) eq.labels <- unique(c(lhs.labels, rhs.labels)) # remove def.names from eq.labels if(length(def.idx) > 0L) { def.names <- as.character(partable$lhs[def.idx]) d.idx <- which(eq.labels %in% def.names) if(length(d.idx) > 0) eq.labels <- eq.labels[-d.idx] } eq.x.idx <- rep(as.integer(NA), length(eq.labels)) # get user-labels ids ulab.idx <- which(eq.labels %in% partable$label) if(length(ulab.idx) > 0L) { eq.x.idx[ ulab.idx] <- partable$free[match(eq.labels[ulab.idx], partable$label)] } # get plabels ids plab.idx <- which(eq.labels %in% partable$plabel) if(length(plab.idx) > 0L) { eq.x.idx[ plab.idx] <- partable$free[match(eq.labels[plab.idx], partable$plabel)] } # check if we have found the label if(any(is.na(eq.x.idx))) { stop("lavaan ERROR: unknown label(s) in equality constraint(s): ", paste(eq.labels[which(is.na(eq.x.idx))], collapse=" ")) } # check if they are all 'free' if(any(eq.x.idx == 0)) { fixed.eq.idx <- which(eq.x.idx == 0) # FIXME: what should we do here? we used to stop with an error # from 0.5.18, we give a warning, and replace the non-free label # with its fixed value in ustart warning("lavaan WARNING: non-free parameter(s) in equality constraint(s): ", paste(eq.labels[fixed.eq.idx], collapse=" ")) fixed.lab.lhs <- eq.labels[fixed.eq.idx] fixed.lab.rhs <- partable$ustart[match(fixed.lab.lhs, partable$label)] BODY.txt <- paste(BODY.txt, "# non-free parameter labels\n", paste(fixed.lab.lhs, "<-", fixed.lab.rhs, collapse="\n"), "\n", sep="") eq.x.idx <- eq.x.idx[-fixed.eq.idx] eq.labels <- eq.labels[-fixed.eq.idx] } # put the labels the function BODY eq.x.lab <- paste(".x.[", eq.x.idx, "]",sep="") if(length(eq.x.idx) > 0L) { BODY.txt <- paste(BODY.txt, "# parameter labels\n", paste(eq.labels, "<-", eq.x.lab, collapse="\n"), "\n", sep="") } # write the equality constraints literally BODY.txt <- paste(BODY.txt, "\n# equality constraints\n", sep="") for(i in 1:length(eq.idx)) { lhs <- partable$lhs[ eq.idx[i] ] rhs <- partable$rhs[ eq.idx[i] ] if(rhs == "0") { eq.string <- lhs } else { eq.string <- paste(lhs, " - (", rhs, ")", sep="") } BODY.txt <- paste(BODY.txt, "out[", i, "] <- ", eq.string, "\n", sep="") } if(txtOnly) return(BODY.txt) # put the results in 'out' #BODY.txt <- paste(BODY.txt, "\nout <- ", # paste("c(", paste(lhs.names, collapse=","),")\n", sep=""), sep="") # what to do with NA values? -> return +Inf??? BODY.txt <- paste(BODY.txt, "\n", "out[is.na(out)] <- Inf\n", sep="") BODY.txt <- paste(BODY.txt, "return(out)\n}\n", sep="") body(ceq.function) <- parse(file="", text=BODY.txt) if(debug) { cat("ceq.function = \n"); print(ceq.function); cat("\n") } ceq.function } # build ciq function from partable # non-trivial inequality constraints (linear or nonlinear) # convert to 'cin(x)' function where 'x' is the (free) parameter vector # and cin(x) returns the evaluated inequality constraints # # eg. if b1 + b2 > 2 (and b1 correspond to, say, x[10] and x[17]) # cin <- function(x) { # out <- rep(NA, 1) # b1 = x[10]; b2 = x[17] # out[1] <- b1 + b2 - 2 # } # # NOTE: very similar, but not identitical to ceq, because we need to take # care of the difference between '<' and '>' lav_partable_constraints_ciq <- function(partable, con = NULL, debug = FALSE, txtOnly = FALSE) { # empty function cin.function <- function() NULL # if 'con', merge partable + con if(!is.null(con)) { partable$lhs <- c(partable$lhs, con$lhs) partable$op <- c(partable$op, con$op ) partable$rhs <- c(partable$rhs, con$rhs) } # get inequality constraints ineq.idx <- which(partable$op == ">" | partable$op == "<") # catch empty ciq if(length(ineq.idx) == 0L) { if(txtOnly) { return(character(0L)) } else { return(cin.function) } } # create function formals(cin.function) <- alist(.x.=, ...=) if(txtOnly) { BODY.txt <- "" } else { BODY.txt <- paste("{\nout <- rep(NA, ", length(ineq.idx), ")\n", sep="") } # first come the variable definitions DEF.txt <- lav_partable_constraints_def(partable, txtOnly=TRUE) def.idx <- which(partable$op == ":=") BODY.txt <- paste(BODY.txt, DEF.txt, "\n", sep="") # extract labels lhs.labels <- all.vars( parse(file="", text=partable$lhs[ineq.idx]) ) rhs.labels <- all.vars( parse(file="", text=partable$rhs[ineq.idx]) ) ineq.labels <- unique(c(lhs.labels, rhs.labels)) # remove def.names from ineq.labels if(length(def.idx) > 0L) { def.names <- as.character(partable$lhs[def.idx]) d.idx <- which(ineq.labels %in% def.names) if(length(d.idx) > 0) ineq.labels <- ineq.labels[-d.idx] } ineq.x.idx <- rep(as.integer(NA), length(ineq.labels)) # get user-labels ids ulab.idx <- which(ineq.labels %in% partable$label) if(length(ulab.idx) > 0L) { ineq.x.idx[ ulab.idx] <- partable$free[match(ineq.labels[ulab.idx], partable$label)] } # get plabels ids plab.idx <- which(ineq.labels %in% partable$plabel) if(length(plab.idx) > 0L) { ineq.x.idx[ plab.idx] <- partable$free[match(ineq.labels[plab.idx], partable$plabel)] } # check if we have found the label if(any(is.na(ineq.x.idx))) { stop("lavaan ERROR: unknown label(s) in inequality constraint(s): ", paste(ineq.labels[which(is.na(ineq.x.idx))], collapse=" ")) } # check if they are all 'free' if(any(ineq.x.idx == 0)) { fixed.ineq.idx <- which(ineq.x.idx == 0) # FIXME: what should we do here? we used to stop with an error # from 0.5.18, we give a warning, and replace the non-free label # with its fixed value in ustart warning("lavaan WARNING: non-free parameter(s) in inequality constraint(s): ", paste(ineq.labels[fixed.ineq.idx], collapse=" ")) fixed.lab.lhs <- ineq.labels[fixed.ineq.idx] fixed.lab.rhs <- partable$ustart[match(fixed.lab.lhs, partable$label)] BODY.txt <- paste(BODY.txt, "# non-free parameter labels\n", paste(fixed.lab.lhs, "<-", fixed.lab.rhs, collapse="\n"), "\n", sep="") ineq.x.idx <- ineq.x.idx[-fixed.ineq.idx] ineq.labels <- ineq.labels[-fixed.ineq.idx] } # put the labels the function BODY ineq.x.lab <- paste(".x.[", ineq.x.idx, "]",sep="") if(length(ineq.x.idx) > 0L) { BODY.txt <- paste(BODY.txt, "# parameter labels\n", paste(ineq.labels, "<-", ineq.x.lab, collapse="\n"), "\n", sep="") } # write the constraints literally BODY.txt <- paste(BODY.txt, "\n# inequality constraints\n", sep="") for(i in 1:length(ineq.idx)) { lhs <- partable$lhs[ ineq.idx[i] ] op <- partable$op[ ineq.idx[i] ] rhs <- partable$rhs[ ineq.idx[i] ] # note,this is different from ==, because we have < AND > if(rhs == "0" && op == ">") { ineq.string <- lhs } else if(rhs == "0" && op == "<") { ineq.string <- paste(rhs, " - (", lhs, ")", sep="") } else if(rhs != "0" && op == ">") { ineq.string <- paste(lhs, " - (", rhs, ")", sep="") } else if(rhs != "0" && op == "<") { ineq.string <- paste(rhs, " - (", lhs, ")", sep="") } BODY.txt <- paste(BODY.txt, "out[", i, "] <- ", ineq.string, "\n", sep="") } if(txtOnly) return(BODY.txt) # put the results in 'out' #BODY.txt <- paste(BODY.txt, "\nout <- ", # paste("c(", paste(lhs.names, collapse=","),")\n", sep=""), sep="") # what to do with NA values? -> return +Inf??? BODY.txt <- paste(BODY.txt, "\n", "out[is.na(out)] <- Inf\n", sep="") BODY.txt <- paste(BODY.txt, "return(out)\n}\n", sep="") body(cin.function) <- parse(file="", text=BODY.txt) if(debug) { cat("cin.function = \n"); print(cin.function); cat("\n") } cin.function } lav_partable_constraints_label_id <- function(partable, con = NULL, warn = TRUE) { # if 'con', merge partable + con if(!is.null(con)) { partable$lhs <- c(partable$lhs, con$lhs) partable$op <- c(partable$op, con$op ) partable$rhs <- c(partable$rhs, con$rhs) } # get constraints con.idx <- which(partable$op %in% c("==", "<", ">")) # catch empty con if(length(con.idx) == 0L) { return(integer(0L)) } def.idx <- which(partable$op == ":=") # extract labels lhs.labels <- all.vars( parse(file="", text=partable$lhs[con.idx]) ) rhs.labels <- all.vars( parse(file="", text=partable$rhs[con.idx]) ) con.labels <- unique(c(lhs.labels, rhs.labels)) # remove def.names from con.labels if(length(def.idx) > 0L) { def.names <- as.character(partable$lhs[def.idx]) d.idx <- which(con.labels %in% def.names) if(length(d.idx) > 0) { con.labels <- con.labels[-d.idx] } } con.x.idx <- rep(as.integer(NA), length(con.labels)) # get user-labels ids ulab.idx <- which(con.labels %in% partable$label) if(length(ulab.idx) > 0L) { con.x.idx[ ulab.idx] <- partable$free[match(con.labels[ulab.idx], partable$label)] } # get plabels ids plab.idx <- which(con.labels %in% partable$plabel) if(length(plab.idx) > 0L) { con.x.idx[ plab.idx] <- partable$free[match(con.labels[plab.idx], partable$plabel)] } # check if we have found the label if(any(is.na(con.x.idx)) && warn) { warning("lavaan WARNING: unknown label(s) in equality constraint(s): ", paste(con.labels[which(is.na(con.x.idx))], collapse=" ")) } # return named integer vector names(con.x.idx) <- con.labels con.x.idx } # for all parameters in p1, find the 'id' of the corresponding parameter # in p2 lav_partable_map_id_p1_in_p2 <- function(p1, p2) { # get all parameters that have a '.p*' plabel # (they exclude "==", "<", ">", ":=") p1.idx <- which(grepl("\\.p", p1$plabel)); np1 <- length(p1.idx) # return p2.id p2.id <- integer(np1) # check every parameter in p1 for(i in seq_len(np1)) { # identify parameter in p1 lhs <- p1$lhs[i]; op <- p1$op[i]; rhs <- p1$rhs[i]; group <- p1$group[i] # search for corresponding parameter in p2 p2.idx <- which(p2$lhs == lhs & p2$op == op & p2$rhs == rhs & p2$group == group) # found? if(length(p2.idx) == 0L) { stop("lavaan ERROR: parameter in p1 not found in p2: ", paste(lhs, op, rhs, "(group = ", group, ")", sep=" ")) } else { p2.id[i] <- p2.idx } } p2.id } lavaan/R/lav_binorm.R0000644000176200001440000001035512476315260014202 0ustar liggesusers# functions to deal with bivariate normal distributions # YR # TODO: better handling of rho=1.0 # density of a bivariate standard normal dbinorm <- function(u, v, rho) { # dirty hack to handle extreme large values for rho # note that u, v, and rho are vectorized! RHO.limit <- 0.9999 abs.rho <- abs(rho); idx <- which(abs.rho > RHO.limit) if(length(idx) > 0L) rho[idx] <- sign(rho[idx]) * RHO.limit R <- 1 - rho*rho 1/(2*pi*sqrt(R)) * exp( - 0.5*(u*u - 2*rho*u*v + v*v)/R ) } # partial derivative - rho dbinorm_drho <- function(u, v, rho) { R <- 1 - rho*rho dbinorm(u,v,rho) * (u*v*R -rho*(u*u - 2*rho*u*v + v*v) + rho*R )/R*R } # partial derivative - u dbinorm_du <- function(u, v, rho) { R <- 1 - rho*rho -dbinorm(u,v,rho) * (u - rho*v)/R } # partial derivative - v dbinorm_dv <- function(u, v, rho) { R <- 1 - rho*rho -dbinorm(u,v,rho) * (v - rho*u)/R } # CDF of bivariate standard normal # function pbinorm(upper.x, upper.y, rho) # partial derivative pbinorm - upper.x pbinorm_dupper.x <- function(upper.x, upper.y, rho=0.0) { R <- 1 - rho*rho dnorm(upper.x) * pnorm( (upper.y - rho*upper.x)/R ) } pbinorm_dupper.y <- function(upper.x, upper.y, rho=0.0) { R <- 1 - rho*rho dnorm(upper.y) * pnorm( (upper.x - rho*upper.y)/R ) } pbinorm_drho <- function(upper.x, upper.y, rho=0.0) { dbinorm(upper.x, upper.y, rho) } # switch between pbivnorm, mnormt, ... pbinorm <- function(upper.x=NULL, upper.y=NULL, rho=0.0, lower.x=-Inf, lower.y=-Inf, check=FALSE) { pbinorm2(upper.x=upper.x, upper.y=upper.y, rho=rho, lower.x=lower.x, lower.y=lower.y, check=check) } # using vectorized version (a la pbivnorm) pbinorm2 <- function(upper.x=NULL, upper.y=NULL, rho=0.0, lower.x=-Inf, lower.y=-Inf, check=FALSE) { N <- length(upper.x) stopifnot(length(upper.y) == N) if(N > 1L) { if(length(rho) == 1L) rho <- rep(rho, N) if(length(lower.x) == 1L) lower.x <- rep(lower.x, N) if(length(lower.y) == 1L) lower.y <- rep(lower.y, N) } upper.only <- all(lower.x == -Inf & lower.y == -Inf) if(upper.only) { upper.x[upper.x == +Inf] <- exp(10) # better pnorm? upper.y[upper.y == +Inf] <- exp(10) upper.x[upper.x == -Inf] <- -exp(10) upper.y[upper.y == -Inf] <- -exp(10) res <- pbivnorm(upper.x, upper.y, rho=rho) } else { # pbivnorm does not handle -Inf well... lower.x[lower.x == -Inf] <- -exp(10) lower.y[lower.y == -Inf] <- -exp(10) res <- pbivnorm(upper.x, upper.y, rho=rho) - pbivnorm(lower.x, upper.y, rho=rho) - pbivnorm(upper.x, lower.y, rho=rho) + pbivnorm(lower.x, lower.y, rho=rho) } res } # using non-vectorized version #pbinorm1 <- function(upper.x=NULL, upper.y=NULL, rho=0.0, # lower.x=-Inf, lower.y=-Inf, check=FALSE) { # # p2_i <- function(lower.x, lower.y, upper.x, upper.y, rho) { # # MVTNORM # #pmvnorm(lower=c(lower.x, lower.y), # # upper=c(upper.x, upper.y), # # corr=matrix(c(1,rho,rho,1),2L,2L)) # # # MNORMT # biv.nt.prob(df=0, # lower=c(lower.x, lower.y), # upper=c(upper.x, upper.y), # mean=c(0,0), # S=matrix(c(1,rho,rho,1),2L,2L)) # # # PBIVNORM # # } # # N <- length(upper.x) # stopifnot(length(upper.y) == N) # if(N > 1L) { # if(length(rho) == 1L) # rho <- rep(rho, N) # if(length(lower.x) == 1L) # lower.x <- rep(lower.x, N) # if(length(lower.y) == 1L) # lower.y <- rep(lower.y, N) # } # # biv.nt.prob does not handle +Inf well for upper # upper.x[upper.x == +Inf] <- exp(10) # better pnorm? # upper.y[upper.y == +Inf] <- exp(10) # better pnorm? # # biv.nt.prob does allow abs(rho) > 1 # stopifnot(all(abs(rho) <= 1)) # # # vectorize (this would be faster if the loop is in the fortran code!) # res <- sapply(seq_len(N), function(i) # p2_i(lower.x[i], lower.y[i], # upper.x[i], upper.y[i], # rho[i])) # res #} lavaan/R/lav_model_gradient_mml.R0000644000176200001440000002661313043400717016533 0ustar liggesuserslav_model_gradient_mml <- function(lavmodel = NULL, THETA = NULL, TH = NULL, GLIST = NULL, group = 1L, lavdata = NULL, sample.mean = NULL, sample.mean.x = NULL, lavcache = NULL) { if(lavmodel@link == "logit") stop("logit link not implemented yet; use probit") # shortcut ov.y.dummy.ov.idx <- lavmodel@ov.y.dummy.ov.idx[[group]] ov.x.dummy.ov.idx <- lavmodel@ov.x.dummy.ov.idx[[group]] ov.y.dummy.lv.idx <- lavmodel@ov.y.dummy.lv.idx[[group]] ov.x.dummy.lv.idx <- lavmodel@ov.x.dummy.lv.idx[[group]] ov.dummy.idx <- c(ov.y.dummy.ov.idx, ov.x.dummy.ov.idx) lv.dummy.idx <- c(ov.y.dummy.lv.idx, ov.x.dummy.lv.idx) th.idx <- lavmodel@th.idx[[group]] num.idx <- lavmodel@num.idx[[group]] ord.idx <- unique( th.idx[th.idx > 0L] ) # data for this group X <- lavdata@X[[group]]; nobs <- nrow(X); nvar <- ncol(X) eXo <- lavdata@eXo[[group]] # MLIST (for veta and yhat) mm.in.group <- 1:lavmodel@nmat[group] + cumsum(c(0,lavmodel@nmat))[group] MLIST <- GLIST[ mm.in.group ] # quadrature points GH <- lavcache[[group]]$GH; nGH <- nrow(GH$x) nfac <- ncol(GH$x) # compute VETAx (latent lv only) #VETAx <- computeVETAx.LISREL(MLIST = MLIST, lv.dummy.idx = lv.dummy.idx) VETAx <- computeVETAx.LISREL(MLIST = MLIST) # check for negative values? if(any(diag(VETAx) < 0)) { warning("lavaan WARNING: --- VETAx contains negative values") print(VETAx) return(0) } # cholesky? #if(is.null(lavmodel@control$cholesky)) { CHOLESKY <- TRUE #} else { # CHOLESKY <- as.logical(lavmodel@control$cholesky) #if(nfac > 1L && !CHOLESKY) { # warning("lavaan WARNING: CHOLESKY is OFF but nfac > 1L") #} #} if(!CHOLESKY) { # we should still 'scale' the factors, if std.lv=FALSE ETA.sd <- sqrt( diag(VETAx) ) } else { # cholesky takes care of scaling ETA.sd <- rep(1, nfac) tchol.VETA <- try(chol(VETAx), silent = TRUE) if(inherits(tchol.VETA, "try-error")) { warning("lavaan WARNING: --- VETAx not positive definite") print(VETAx) return(0) } if(!is.null(MLIST$alpha) || !is.null(MLIST$gamma)) { EETAx <- computeEETAx.LISREL(MLIST = MLIST, eXo = eXo, N = nobs, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) #if(length(lv.dummy.idx) > 0L) { # EETAx <- EETAx[,-lv.dummy.idx,drop=FALSE] #} } } # prepare common stuff # fix Lambda? LAMBDA <- computeLAMBDA.LISREL(MLIST = MLIST, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # fix ALPHA ALPHA <- MLIST$alpha if(is.null(ALPHA)) { ALPHA <- numeric( nfac ) } else if(length(lv.dummy.idx)) { ALPHA <- ALPHA[-lv.dummy.idx,,drop=FALSE] } # Beta? BETA <- MLIST$beta if(is.null(BETA)) { LAMBDA..IB.inv <- LAMBDA } else { tmp <- -BETA; nr <- nrow(BETA); i <- seq_len(nr); tmp[cbind(i, i)] <- 1 IB.inv <- solve(tmp) LAMBDA..IB.inv <- MLIST$lambda %*% IB.inv ## no need to FIX??? if(length(lv.dummy.idx) > 0L) { LAMBDA..IB.inv <- LAMBDA..IB.inv[,-lv.dummy.idx,drop=FALSE] } # fix BETA if(length(lv.dummy.idx)) { BETA <- MLIST$beta[-lv.dummy.idx, -lv.dummy.idx, drop=FALSE] } tmp <- -BETA; nr <- nrow(BETA); i <- seq_len(nr); tmp[cbind(i, i)] <- 1 IB.inv <- solve(tmp) } # fix GAMMA GAMMA <- MLIST$gamma if(is.null(GAMMA)) { ALPHA.GAMMA.eXo <- matrix(as.numeric(ALPHA), nobs, nfac, byrow=TRUE) } else if(length(lv.dummy.idx)) { GAMMA <- GAMMA[-lv.dummy.idx,,drop=FALSE] ALPHA.GAMMA.eXo <- sweep(eXo %*% t(GAMMA), MARGIN=2 ,STATS=as.numeric(ALPHA), FUN="+") } # Delta ##DD <- lavcache[[group]]$DD DD <- lav_model_gradient_DD(lavmodel, GLIST = GLIST, group = group) ## FIXME!!! do this analytically... x <- lav_model_get_parameters(lavmodel = lavmodel, GLIST = MLIST) dVetadx <- function(x, lavmodel = lavmodel, g = 1L) { GLIST <- lav_model_x2GLIST(lavmodel, x=x, type="free") VETAx <- computeVETAx(lavmodel, GLIST = GLIST)[[g]] if(CHOLESKY) { S <- chol(VETAx) ### FIXME or t(chol())???? } else { S <- diag( sqrt(diag(VETAx)) ) } S } Delta.S <- lav_func_jacobian_simple(func=dVetadx, x=x, lavmodel = lavmodel, g = group) DD$S <- Delta.S # compute dL/dx for each node #dLdx <- matrix(0, nGH, lavmodel@nx.free) dFYp <- matrix(0, nobs, lavmodel@nx.free) SUM.LOG.FY <- matrix(0, nrow=nGH, ncol=nobs) for(q in 1:nGH) { # contribution to dFYp for this q dFYp.q <- matrix(0, nobs, lavmodel@nx.free) # current value(s) for ETA eta <- ksi <- GH$x[q,,drop=FALSE] # rescale/unwhiten if(CHOLESKY) { eta <- eta %*% tchol.VETA } else { # no unit scale? (un-standardize) eta <- sweep(eta, MARGIN=2, STATS=ETA.sd, FUN="*") } # eta_i = alpha + BETA eta_i + GAMMA eta_i + error # # - direct effect of BETA is already in VETAx, and hence tchol.VETA # - need to add alpha, and GAMMA eta_i if(!is.null(MLIST$alpha) || !is.null(MLIST$gamma)) { eta <- sweep(EETAx, MARGIN=2, STATS=eta, FUN="+") } # again, compute yhat for this node (eta) if(lavmodel@conditional.x) { yhat <- computeEYetax.LISREL(MLIST = MLIST, eXo = eXo, ETA = eta, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) } else { yhat <- computeEYetax3.LISREL(MLIST = MLIST, ETA = eta, sample.mean = sample.mean, mean.x = sample.mean.x, ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[group]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[group]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[group]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[group]]) } # compute fy.var, for this node (eta): P(Y_i = y_i | eta_i, x_i) log.fy.var <- lav_predict_fy_internal(X = X, yhat = yhat, TH = TH, THETA = THETA, num.idx = num.idx, th.idx = th.idx, link = lavmodel@link, log. = TRUE) # if log, fy is just the sum of log.fy.var log.fy <- apply(log.fy.var, 1L, sum) # store log likelihoods for this node SUM.LOG.FY[q,] <- log.fy # FY FY <- exp(log.fy.var) ### FIXME log/exp/log/... LIK.eta <- apply(FY, 1, prod) #fyp <- LIK.eta * GH$w[q] ######### dFY_p ########################################### # note, dFYp is actually 1/FY[,p] * dFYp PRE <- matrix(0, nobs, nvar) if(length(num.idx) > 0L) { tmp <- X[,num.idx,drop=FALSE] - yhat[,num.idx,drop=FALSE] theta.var <- diag(THETA)[num.idx] PRE[,num.idx] <- sweep(tmp, MARGIN=2, STATS=1/theta.var, FUN="*") } if(length(ord.idx) > 0L) { for(p in ord.idx) { # just in case we need theta[v,v] after all... sd.v.inv <- 1/sqrt(THETA[p,p]) # lav_probit y <- X[,p] th.y <- TH[ th.idx == p]; TH.Y <- c(-Inf, th.y, Inf) ncat <- length(th.y) + 1L; nth <- ncat - 1L Y1 <- matrix(1:nth, nobs, nth, byrow=TRUE) == y Y2 <- matrix(1:nth, nobs, nth, byrow=TRUE) == (y - 1L) z1 <- pmin( 100, TH.Y[y+1L ] - yhat[,p]) z2 <- pmax(-100, TH.Y[y+1L-1L] - yhat[,p]) p1 <- dnorm(z1) p2 <- dnorm(z2) # probits = p1 - p2 PRE[,p] <- -1 * (p1 - p2) * sd.v.inv * (1/FY[,p]) # [nobx * n.th] # dth <- -1 * (Y2*p2 - Y1*p1) * sd.v.inv dth <- -1 * (Y2*p2 - Y1*p1) * sd.v.inv * (1/FY[,p]) dFYp.q <- dFYp.q + (dth %*% DD$tau[which(th.idx==p),,drop=FALSE]) } } if(length(num.idx) > 0L) { # THETA (num only) dsigma2 <- sweep(0.5*PRE[,num.idx]*PRE[,num.idx], MARGIN=2, STATS=1/(2*theta.var), FUN="-") dFYp.q <- dFYp.q + (dsigma2 %*% DD$theta) # NU (num only) dnu <- PRE[,num.idx] dFYp.q <- dFYp.q + (dnu %*% DD$nu) } # LAMBDA if(nrow(eta) == 1L) { dlambda <- PRE %*% eta ### FIXME!!!!! } else { dlambda <- matrix(apply(PRE, 2, function(x) x * eta), nobs, ) #dlambda <- sweep(PRE, MARGIN=1, STATS=eta, FUN="*") } dFYp.q <- dFYp.q + (dlambda %*% DD$lambda) # PSI #if(nrow(ksi) == 1L) { dpsi <- PRE %*% kronecker(LAMBDA[,,drop=FALSE], ksi) #} else { # dpsi <- PRE * kronecker(LAMBDA[,,drop=FALSE], ksi) #} dFYp.q <- dFYp.q + (dpsi %*% DD$S) # KAPPA if(length(ov.y.dummy.ov.idx) > 0L) { dkappa <- matrix(apply(PRE[,ov.y.dummy.ov.idx,drop=FALSE], 2, function(x) x * eXo), nobs, ) dFYp.q <- dFYp.q + (dkappa %*% DD$kappa) } # GAMMA if(!is.null(eXo)) { dgamma <- matrix(apply(PRE %*% LAMBDA..IB.inv, 2, function(x) x * eXo), nobs, ) dFYp.q <- dFYp.q + (dgamma %*% DD$gamma) } # BETA if(!is.null(BETA)) { #tmp <- kronecker(LAMBDA, ALPHA.GAMMA.eXo) %*% # t( kronecker(t(IB.inv), IB.inv) ) #dbeta <- apply(matrix(as.numeric(PRE) * tmp, nobs, ), 1, sum) dbeta <- matrix(apply(PRE %*% LAMBDA..IB.inv, 2, function(x) x * ALPHA.GAMMA.eXo), nobs, ) dFYp.q <- dFYp.q + (dbeta %*% DD$beta) } dFYp <- dFYp + ( (LIK.eta * GH$w[q]) * dFYp.q ) } lik <- as.numeric( t(GH$w) %*% exp(SUM.LOG.FY) ) # avoid underflow idx <- which(lik < exp(-600)) if(length(idx) > 0L) { lik[idx] <- exp(-600) } dFYp <- 1/lik * dFYp dx <- apply(dFYp, 2, sum) # integration #dx <- apply(as.numeric(GH$w) * dLdx, 2, sum) # minimize dx <- -1*dx dx } lavaan/R/lav_mplus.R0000644000176200001440000000243012465075714014054 0ustar liggesusers# read in information from Mplus difftest output, return as list # # line 1: test statistic (unscaled) # line 2: number of groups # line 3: number of sample statistics (ndat) # line 4: number of free parameters (npar) # delta (ndat x npar) # P1 (E.inv) lav_matrix_vechr(npar x npar) # V1 (NVarCov) lav_matrix_vechr(npar x npar) lavutils_mplus_readdifftest <- function(file="deriv.dat") { ###FIXME: does not work for multiple groups yet!!! raw <- scan(file, quiet=TRUE) T1 <- raw[1] # function value (usually T1 * 2 * nobs to get X2) ngroups <- as.integer(raw[2]) ndat <- as.integer(raw[3]) npar <- as.integer(raw[4]) pstar <- npar*(npar+1)/2 # delta offset <- 4L delta_raw <- raw[offset + seq_len(npar*ndat)] Delta <- matrix(delta_raw, nrow=ndat, ncol=npar, byrow=TRUE) # P1 offset <- 4L + npar*ndat p1_raw <- raw[offset + seq_len(pstar)] P1 <- lav_matrix_lower2full(p1_raw) # (robust) NACOV npar offset <- 4L + npar*ndat + pstar nacov_raw <- raw[offset + seq_len(pstar)] V1 <- lav_matrix_lower2full(nacov_raw) # just for fun, M1 # M1 <- (P1 - P1 %*% H %*% solve(t(H) %*% P1 %*% H) %*% t(H) %*% P1) %*% V1 list(T1=T1, ngroups=ngroups, ndat=ndat, npar=npar, pstar=pstar, Delta=Delta, P1=P1, V1=V1) } lavaan/R/xxx_fsr.R0000644000176200001440000003372413052017157013553 0ustar liggesusers# factor score regression # three methods: # - naive (regression or Bartlett) # - Skrondal & Laake (2001) (regression models only) # - Croon (2002) (general + robust SE) fsr <- function(model = NULL, data = NULL, cmd = "sem", fsr.method = "Croon", fs.method = "Bartlett", fs.scores = FALSE, Gamma.NT = TRUE, lvinfo = FALSE, ...) { # we need full data if(is.null(data)) { stop("lavaan ERROR: full data is required for factor score regression") } # check fsr.method argument fsr.method <- tolower(fsr.method) if(fsr.method == "naive") { # nothing to do } else if(fsr.method %in% c("skrondal", "laake", "skrondallaake", "skrondal.laake", "skrondal-laake")) { fsr.method <- "skrondal.laake" } else if(fsr.method == "croon") { # nothing to do } else { stop("lavaan ERROR: invalid option for argument fsr.method: ", fsr.method) } # check fs.method argument fs.method <- tolower(fs.method) if(fs.method %in% c("bartlett", "barttlett", "bartlet")) { fs.method <- "Bartlett" } else if(fs.method == "regression") { # nothing to do } else { stop("lavaan ERROR: invalid option for argument fs.method: ", fs.method) } # dot dot dot dotdotdot <- list(...) # change 'default' values for fsr if(is.null(dotdotdot$se)) { dotdotdot$se <- "none" } if(is.null(dotdotdot$test)) { dotdotdot$test <- "satorra.bentler" } if(is.null(dotdotdot$missing)) { dotdotdot$missing <- "ml" } if(is.null(dotdotdot$meanstructure)) { dotdotdot$meanstructure <- TRUE } # STEP 0: process full model, without fitting dotdotdot0 <- dotdotdot dotdotdot0$do.fit <- NULL dotdotdot0$se <- "none" # to avoid warning about missing="listwise" dotdotdot0$test <- "none" # to avoid warning about missing="listwise" # check for arguments that we do not want (eg sample.cov)? # TODO # initial processing of the model, no fitting FIT <- do.call(cmd, args = c(list(model = model, data = data, #meanstructure = TRUE, do.fit = FALSE), dotdotdot0) ) lavoptions <- lavInspect(FIT, "options") # restore lavoptions$se <- dotdotdot$se lavoptions$test <- dotdotdot$test ngroups <- lavInspect(FIT, "ngroups") lavpta <- FIT@pta # FIXME: not ready for multiple groups yet if(ngroups > 1L) { stop("lavaan ERROR: fsr code not ready for multiple groups (yet)") } # if missing = "listwise", make data complete if(lavoptions$missing == "listwise") { # FIXME: make this work for multiple groups!! OV <- unique(unlist(lavpta$vnames$ov)) data <- na.omit(data[,OV]) } # any `regular' latent variables? lv.names <- unique(unlist(FIT@pta$vnames$lv.regular)) if(length(lv.names) == 0L) { stop("lavaan ERROR: model does not contain any latent variables") } nfac <- length(lv.names) # check parameter table PT <- parTable(FIT) PT$est <- PT$se <- NULL # find the structural regressions in the parameter table eqs.idx <- which(PT$op == "~" & (PT$lhs %in% lv.names | PT$rhs %in% lv.names)) if(length(eqs.idx) == 0L) { stop("lavaan ERROR: regressions do not involve any latent variables") } # determine eqs.y and eqs.x names eqs.x.names <- unlist(FIT@pta$vnames$eqs.x) eqs.y.names <- unlist(FIT@pta$vnames$eqs.y) eqs.names <- unique( c(eqs.x.names, eqs.y.names) ) # check if we can use skrondal & laake (no mediational terms?) if(fsr.method == "skrondal.laake") { if(any(eqs.x.names %in% eqs.y.names)) { stop("lavaan ERROR: mediational relationships are not allowed for the Skrondal.Laake method; use ", sQuote("Croon"), " instead.") } } # STEP 1a: compute factor scores for each latent variable # compute factor scores, per latent variable FS.SCORES <- vector("list", length = ngroups) LVINFO <- vector("list", length = ngroups) if(ngroups > 1L) { names(FS.SCORES) <- names(LVINFO) <- lavInspect(FIT, "group.label") } for(g in 1:ngroups) { FS.SCORES[[g]] <- vector("list", length = nfac) names(FS.SCORES[[g]]) <- lv.names LVINFO[[g]] <- vector("list", length = nfac) names(LVINFO[[g]]) <- lv.names } # adjust options dotdotdot2 <- dotdotdot dotdotdot2$se <- "none" dotdotdot2$test <- "none" dotdotdot2$debug <- FALSE dotdotdot2$verbose <- FALSE dotdotdot2$auto.cov.lv.x <- TRUE # allow correlated exogenous factors # we assume the same number/names of lv's per group!!! for(f in 1:nfac) { # create parameter table for this factor only PT.1fac <- lav_partable_subset_measurement_model(PT = PT, lavpta = lavpta, lv.names = lv.names[f]) # fit 1-factor model fit.1fac <- do.call("lavaan", args = c(list(model = PT.1fac, data = data), dotdotdot2) ) # fs.method? if(fsr.method == "skrondal.laake") { # dependent -> Bartlett if(lv.names[f] %in% eqs.y.names) { fs.method <- "Bartlett" } else { fs.method <- "regression" } } # compute factor scores if(fsr.method %in% c("croon") || lavoptions$se == "robust.sem") { SC <- lav_predict_eta(fit.1fac, method = fs.method, fsm = TRUE) FSM <- attr(SC, "fsm"); attr(SC, "fsm") <- NULL LAMBDA <- computeLAMBDA(fit.1fac@Model) THETA <- computeTHETA(fit.1fac@Model) } else { SC <- lav_predict_eta(fit.1fac, method = fs.method, fsm = FALSE) } # store results for(g in 1:ngroups) { FS.SCORES[[g]][[f]] <- SC[[g]] if(fsr.method %in% c("croon") || lavoptions$se == "robust.sem") { LVINFO[[g]][[f]] <- list(fsm = FSM[[g]], lambda = LAMBDA[[g]], theta = THETA[[g]]) } } # g } # nfac # cbind factor scores FS.SCORES <- lapply(1:ngroups, function(g) { SC <- as.data.frame(FS.SCORES[[g]]) SC }) # compute empirical covariance matrix factor scores FS.COV <- lapply(1:ngroups, function(g) { COV <- cov(FS.SCORES[[g]]) ## divided by N-1!!! if(lavoptions$likelihood == "normal") { Ng <- lavInspect(FIT, "nobs")[g] COV <- COV * (Ng - 1) / Ng } COV }) if(lavoptions$meanstructure) { FS.MEAN <- lapply(1:ngroups, function(g) { colMeans(FS.SCORES[[g]]) }) } else { FS.MEAN <- NULL } # STEP 1b: if using `Croon' method: correct COV matrix: if(fsr.method %in% c("croon")) { FSR.COV <- lav_fsr_croon_correction(FS.COV = FS.COV, LVINFO = LVINFO, fs.method = fs.method) } else { FSR.COV <- FS.COV } # STEP 1c: do we need full set of factor scores? if(fs.scores) { # transform? if(fsr.method == "croon") { for(g in 1:ngroups) { OLD.inv <- solve(FS.COV[[g]]) OLD.inv.sqrt <- lav_matrix_symmetric_sqrt(OLD.inv) FSR.COV.sqrt <- lav_matrix_symmetric_sqrt(FSR.COV[[g]]) SC <- as.matrix(FS.SCORES[[g]]) SC <- SC %*% OLD.inv.sqrt %*% FSR.COV.sqrt SC <- as.data.frame(SC) names(SC) <- lv.names FS.SCORES[[g]] <- SC } } # unlist if multiple groups, add group column if(ngroups == 1L) { FS.SCORES <- as.data.frame(FS.SCORES[[1]]) } else { stop("fix this!") } } # STEP 2: fit structural model using (corrected?) factor scores PT.PA <- lav_partable_subset_structural_model(PT, lavpta = lavpta) # free all means/intercepts int.idx <- which(PT.PA$op == "~1") PT.PA$free[int.idx] <- 1L PT.PA$ustart[int.idx] <- NA # adjust lavoptions if(is.null(dotdotdot$do.fit)) { lavoptions$do.fit <- TRUE } else { lavoptions$do.fit <- dotdotdot$do.fit } if(is.null(dotdotdot$se)) { lavoptions$se <- "robust.sem" } else { lavoptions$se <- dotdotdot$se } if(is.null(dotdotdot$test)) { lavoptions$test <- "satorra.bentler" } else { lavoptions$test <- dotdotdot$test } if(is.null(dotdotdot$sample.cov.rescale)) { lavoptions$sample.cov.rescale <- FALSE } else { lavoptions$sample.cov.rescale <- dotdotdot$sample.cov.rescale } # take care of NACOV, in case we want correct standard errors if(lavoptions$se == "robust.sem") { Omega.f <- vector("list", length = ngroups) for(g in 1:ngroups) { DATA <- FIT@Data@X[[g]] if(Gamma.NT) { if(lavoptions$missing == "listwise") { Omega.y <- lav_samplestats_Gamma_NT(Y = DATA, meanstructure = lavoptions$meanstructure, rescale = TRUE, fixed.x = FALSE) } else if(lavoptions$missing == "ml") { # we assume UNSTRUCTURED Mu and Sigma!! MU <- FIT@SampleStats@missing.h1[[g]]$mu SIGMA <- FIT@SampleStats@missing.h1[[g]]$sigma if(lavoptions$information == "expected") { Info <- lav_mvnorm_missing_information_expected( Y = DATA, Mp = FIT@Data@Mp[[g]], Mu = MU, Sigma = SIGMA) } else { Info <- lav_mvnorm_missing_information_observed_samplestats( Yp = FIT@SampleStats@missing[[g]], Mu = MU, Sigma = SIGMA) } Omega.y <- lav_matrix_symmetric_inverse(Info) } else { stop("lavaan ERROR: can not handle missing = ", lavoptions$missing) } } else { if(lavoptions$missing == "listwise") { Omega.y <- lav_samplestats_Gamma(Y = DATA, meanstructure = lavoptions$meanstructure, fixed.x = FALSE) } else if(lavoptions$missing == "ml") { # we assume UNSTRUCTURED Mu and Sigma!! MU <- FIT@SampleStats@missing.h1[[g]]$mu SIGMA <- FIT@SampleStats@missing.h1[[g]]$sigma Omega.y <- lav_mvnorm_missing_h1_omega_sw(Y = DATA, Mp = FIT@Data@Mp[[g]], Yp = FIT@SampleStats@missing[[g]], Mu = MU, Sigma = SIGMA, information = lavoptions$information) } else { stop("lavaan ERROR: can not handle missing = ", lavoptions$missing) } } # factor score matrices A <- lav_matrix_bdiag(lapply(LVINFO[[g]], "[[", "fsm")) # compensate for Croon correction if(fs.method == "regression") { if(!exists("OLD.inv.sqrt")) { OLD.inv <- solve(FS.COV[[g]]) OLD.inv.sqrt <- lav_matrix_symmetric_sqrt(OLD.inv) } if(!exists("FSR.COV.sqrt")) { FSR.COV.sqrt <- lav_matrix_symmetric_sqrt(FSR.COV[[g]]) } A <- OLD.inv.sqrt %*% FSR.COV.sqrt %*% A } # mean + vech(sigma) A22 <- lav_matrix_duplication_post( lav_matrix_duplication_ginv_pre(A %x% A)) if(lavoptions$meanstructure) { A11 <- A A.tilde <- lav_matrix_bdiag(A11, A22) } else { A.tilde <- A22 } Omega.f[[g]] <- A.tilde %*% Omega.y %*% t(A.tilde) } # g } else { Omega.f <- NULL } # fit structural model lavoptions2 <- lavoptions #lavoptions2$se <- "none" #lavoptions2$test <- "none" lavoptions2$missing <- "listwise" # always complete data anyway... fit <- lavaan(PT.PA, sample.cov = FSR.COV, sample.mean = FS.MEAN, sample.nobs = FIT@SampleStats@nobs, NACOV = Omega.f, slotOptions = lavoptions2) # extra info extra <- list( FS.COV = FS.COV, FS.SCORES = FS.SCORES, FSR.COV = FSR.COV, LVINFO = LVINFO) PE <- parameterEstimates(fit, add.attributes = TRUE) # standard errors #lavsamplestats <- fit@SampleStats #lavsamplestats@NACOV <- Omega.f #VCOV <- lav_model_vcov(fit@Model, lavsamplestats = lavsamplestats, # lavoptions = lavoptions) #SE <- lav_model_vcov_se(fit@Model, fit@ParTable, VCOV = VCOV) #PE$se <- SE #tmp.se <- ifelse(PE$se == 0.0, NA, PE$se) #zstat <- pvalue <- TRUE #if(zstat) { # PE$z <- PE$est / tmp.se # if(pvalue) { # PE$pvalue <- 2 * (1 - pnorm( abs(PE$z) )) # } #} out <- list(header = "This is fsr (0.1) -- factor score regression.", PE = PE) if(lvinfo) { out$lvinfo <- extra } class(out) <- c("lavaan.fsr", "list") out } lavaan/R/ctr_modelcov.R0000644000176200001440000000137212465075714014536 0ustar liggesusers# takes a model in lavaan syntax and the user's data and returns the covariance # matrix of observed variables. Useful so that the user can do things like # diagnose errors in the cov matrix, use cov2cor to look at the correlation # matrix, try and invert the sample covariance matrix, etc. # update 5/27/2011 JEB # changelog: using sem and inspect to get output. # This way, all arguments such as groups, etc, can be used # update 3 june 2011 YR: removed se="none" (since now implied by do.fit=FALSE) # update 13 dec 2011 YR: changed name (to avoid confusion with the # model-implied cov) inspectSampleCov <- function(model, data, ...) { fit <- sem(model, data=data, ..., do.fit=FALSE) inspect(fit, "sampstat") } lavaan/R/lav_matrix.R0000644000176200001440000007512313040423433014212 0ustar liggesusers# Magnus & Neudecker (1999) style matrix operations # YR - 11 may 2011: initial version # YR - 19 okt 2014: rename functions using lav_matrix_ prefix # vec operator # # the vec operator (for 'vectorization') transforms a matrix into # a vector by stacking the *columns* of the matrix one underneath the other # # M&N book: page 30 # # note: we do not coerce to 'double/numeric' storage-mode (like as.numeric) lav_matrix_vec <- function(A) { as.vector(A) } # vecr operator # # the vecr operator ransforms a matrix into # a vector by stacking the *rows* of the matrix one underneath the other lav_matrix_vecr <- function(A) { # faster way?? # nRow <- NROW(A); nCol <- NCOL(A) # idx <- (seq_len(nCol) - 1L) * nRow + rep(seq_len(nRow), each = nCol) lav_matrix_vec(t(A)) } # vech # # the vech operator (for 'half vectorization') transforms a *symmetric* matrix # into a vector by stacking the *columns* of the matrix one underneath the # other, but eliminating all supradiagonal elements # # see Henderson & Searle, 1979 # # M&N book: page 48-49 # lav_matrix_vech <- function(S, diagonal = TRUE) { ROW <- row(S); COL <- col(S) if(diagonal) S[ROW >= COL] else S[ROW > COL] } # the vechr operator transforms a *symmetric* matrix # into a vector by stacking the *rows* of the matrix one after the # other, but eliminating all supradiagonal elements lav_matrix_vechr <- function(S, diagonal = TRUE) { S[lav_matrix_vechr_idx(n = NCOL(S), diagonal = diagonal)] } # the vechu operator transforms a *symmetric* matrix # into a vector by stacking the *columns* of the matrix one after the # other, but eliminating all infradiagonal elements lav_matrix_vechu <- function(S, diagonal = TRUE) { S[lav_matrix_vechu_idx(n = NCOL(S), diagonal = diagonal)] } # the vechru operator transforms a *symmetric* matrix # into a vector by stacking the *rows* of the matrix one after the # other, but eliminating all infradiagonal elements # # same as vech (but using upper-diagonal elements) lav_matrix_vechru <- function(S, diagonal = TRUE) { S[lav_matrix_vechru_idx(n = NCOL(S), diagonal = diagonal)] } # return the *vector* indices of the lower triangular elements of a # symmetric matrix of size 'n' lav_matrix_vech_idx <- function(n = 1L, diagonal = TRUE) { # FIXME: is there a way to avoid creating ROW/COL matrices? n <- as.integer(n) ROW <- matrix(seq_len(n), n, n) COL <- matrix(seq_len(n), n, n, byrow = TRUE) if(diagonal) which(ROW >= COL) else which(ROW > COL) } # return the *row* indices of the lower triangular elements of a # symmetric matrix of size 'n' lav_matrix_vech_row_idx <- function(n = 1L, diagonal = TRUE) { n <- as.integer(n) if(diagonal ) { unlist(lapply(seq_len(n), seq.int, n)) } else { 1 + unlist(lapply(seq_len(n-1), seq.int, n-1)) } } # return the *col* indices of the lower triangular elements of a # symmetric matrix of size 'n' lav_matrix_vech_col_idx <- function(n = 1L, diagonal = TRUE) { n <- as.integer(n) if(!diagonal) { n <- n - 1L } rep.int(seq_len(n), times = rev(seq_len(n))) } # return the *vector* indices of the lower triangular elements of a # symmetric matrix of size 'n' -- ROW-WISE lav_matrix_vechr_idx <- function(n = 1L, diagonal = TRUE) { n <- as.integer(n) ROW <- matrix(seq_len(n), n, n) COL <- matrix(seq_len(n), n, n, byrow = TRUE) tmp <- matrix(seq_len(n*n), n, n, byrow = TRUE) if(diagonal) tmp[ROW <= COL] else tmp[ROW < COL] } # return the *vector* indices of the upper triangular elements of a # symmetric matrix of size 'n' -- COLUMN-WISE lav_matrix_vechu_idx <- function(n = 1L, diagonal = TRUE) { n <- as.integer(n) ROW <- matrix(seq_len(n), n, n) COL <- matrix(seq_len(n), n, n, byrow = TRUE) if(diagonal) which(ROW <= COL) else which(ROW < COL) } # return the *vector* indices of the upper triangular elements of a # symmetric matrix of size 'n' -- ROW-WISE # # FIXME!! make this more efficient (without creating 3 n*n matrices!) # lav_matrix_vechru_idx <- function(n = 1L, diagonal = TRUE) { n <- as.integer(n) ROW <- matrix(seq_len(n), n, n) COL <- matrix(seq_len(n), n, n, byrow = TRUE) tmp <- matrix(seq_len(n*n), n, n, byrow = TRUE) if(diagonal) tmp[ROW >= COL] else tmp[ROW > COL] } # vech.reverse and vechru.reverse (aka `upper2full') # # given the output of vech(S) --or vechru(S) which is identical-- # reconstruct S lav_matrix_vech_reverse <- lav_matrix_vechru_reverse <- lav_matrix_upper2full <- function(x, diagonal = TRUE) { # guess dimensions if(diagonal) { p <- (sqrt(1 + 8*length(x))-1)/2 } else { p <- (sqrt(1 + 8*length(x))+1)/2 } S <- numeric(p * p) S[lav_matrix_vech_idx( p, diagonal = diagonal)] <- x S[lav_matrix_vechru_idx(p, diagonal = diagonal)] <- x attr(S, "dim") <- c(p, p) S } # vechr.reverse vechu.reversie (aka `lower2full) # # given the output of vechr(S) --or vechu(S) which is identical-- # reconstruct S lav_matrix_vechr_reverse <- lav_matrix_vechu_reverse <- lav_matrix_lower2full <- function(x, diagonal = TRUE) { # guess dimensions if(diagonal) { p <- (sqrt(1 + 8*length(x))-1)/2 } else { p <- (sqrt(1 + 8*length(x))+1)/2 } stopifnot(p == round(p,0)) S <- numeric(p * p) S[lav_matrix_vechr_idx(p, diagonal = diagonal)] <- x S[lav_matrix_vechu_idx(p, diagonal = diagonal)] <- x attr(S, "dim") <- c(p, p) S } # return the *vector* indices of the diagonal elements of a symmetric # matrix of size 'n' lav_matrix_diag_idx <- function(n = 1L) { if(n < 1L) return(integer(0L)) 1L + (seq_len(n) - 1L)*(n + 1L) } # return the *vector* indices of the diagonal elements of the LOWER part # of a symmatrix matrix of size 'n' lav_matrix_diagh_idx <- function(n = 1L) { if(n < 1L) return(integer(0L)) if(n == 1L) return(1L) c(1L, cumsum(n:2L) + 1L) } # return the *vector* indices of the ANTI diagonal elements of a symmetric # matrix of size 'n' lav_matrix_antidiag_idx <- function(n = 1L) { if(n < 1L) return(integer(0L)) 1L + seq_len(n)*(n-1L) } # create the duplication matrix (D_n): it 'duplicates' the elements # in vech(S) to create vec(S) (where S is symmetric) # # D %*% vech(S) == vec(S) # # M&N book: pages 48-50 # # note: several flavors: dup1, dup2, dup3, ... # currently used: dup3 # first attempt # dup1: working on the vector indices only .dup1 <- function(n = 1L) { if ((n < 1L) | (round(n) != n)) { stop("n must be a positive integer") } if (n > 255L) { stop("n is too large") } # dimensions n2 <- n * n; nstar <- n * (n + 1)/2 # THIS is the real bottleneck: allocating an ocean of zeroes... x <- numeric(n2 * nstar) # delta patterns r1 <- seq.int(from = n*n+1, by = -(n-1), length.out = n-1) r2 <- seq.int(from = n-1, by = n-1, length.out = n-1) r3 <- seq.int(from = 2*n+1, by = n, length.out = n-1) # is there a more elegant way to do this? rr <- unlist(lapply((n-1):1, function(x) { c(rbind(r1[1:x], r2[1:x]), r3[n-x]) })) idx <- c(1L, cumsum(rr) + 1L) # create matrix x[idx] <- 1.0 attr(x, "dim") <- c(n2, nstar) x } # second attempt # dup2: working on the row/col matrix indices # (but only create the matrix at the very end) .dup2 <- function (n = 1L) { if ((n < 1L) | (round(n) != n)) { stop("n must be a positive integer") } if(n > 255L) { stop("n is too large") } nstar <- n * (n+1)/2 n2 <- n * n # THIS is the real bottleneck: allocating an ocean of zeroes... x <- numeric(n2 * nstar) idx1 <- lav_matrix_vech_idx(n) + ((1L:nstar)-1L) * n2 # vector indices idx2 <- lav_matrix_vechru_idx(n) + ((1L:nstar)-1L) * n2 # vector indices x[idx1] <- 1.0 x[idx2] <- 1.0 attr(x, "dim") <- c(n2, nstar) x } # dup3: using col idx only # D7 <- dup(7L); x<- apply(D7, 1, function(x) which(x > 0)); matrix(x,7,7) .dup3 <- function(n = 1L) { if ((n < 1L) | (round(n) != n)) { stop("n must be a positive integer") } if(n > 255L) { stop("n is too large") } nstar <- n * (n+1)/2 n2 <- n * n # THIS is the real bottleneck: allocating an ocean of zeroes... x <- numeric(n2 * nstar) tmp <- matrix(0L, n, n) tmp[lav_matrix_vech_idx(n)] <- 1:nstar tmp[lav_matrix_vechru_idx(n)] <- 1:nstar idx <- (1:n2) + (lav_matrix_vec(tmp)-1L) * n2 x[idx] <- 1.0 attr(x, "dim") <- c(n2, nstar) x } # dup4: using Matrix package, returning a sparse matrix #.dup4 <- function(n = 1L) { # if ((n < 1L) | (round(n) != n)) { # stop("n must be a positive integer") # } # # if(n > 255L) { # stop("n is too large") # } # # nstar <- n * (n+1)/2 # #n2 <- n * n # # tmp <- matrix(0L, n, n) # tmp[lav_matrix_vech_idx(n)] <- 1:nstar # tmp[lav_matrix_vechru_idx(n)] <- 1:nstar # # x <- Matrix::sparseMatrix(i = 1:(n*n), j = vec(tmp), x = 1.0) # # x #} # default dup: lav_matrix_duplication <- .dup3 # compute t(D) %*% A (without explicitly computing D) # sqrt(nrow(A)) is an integer # A is not symmetric, and not even square, only n^2 ROWS lav_matrix_duplication_pre <- function(A = matrix(0,0,0)) { # number of rows n2 <- NROW(A) # square nrow(A) only, n2 = n^2 stopifnot(sqrt(n2) == round(sqrt(n2))) # dimension n <- sqrt(n2) # dup idx idx1 <- lav_matrix_vech_idx(n); idx2 <- lav_matrix_vechru_idx(n) OUT <- A[idx1,,drop=FALSE] + A[idx2,,drop=FALSE] u <- which(idx1 %in% idx2); OUT[u,] <- OUT[u,] / 2.0 OUT } # dupr_pre is faster... lav_matrix_duplication_dup_pre2 <- function(A = matrix(0,0,0)) { # number of rows n2 <- NROW(A) # square nrow(A) only, n2 = n^2 stopifnot(sqrt(n2) == round(sqrt(n2))) # dimension n <- sqrt(n2) # dup idx idx1 <- lav_matrix_vech_idx(n); idx2 <- lav_matrix_vechru_idx(n) OUT <- A[idx1,,drop=FALSE] u <- which(!idx1 %in% idx2); OUT[u,] <- OUT[u,] + A[idx2[u],] OUT } # compute A %*% D (without explicitly computing D) # sqrt(ncol(A)) must be an integer # A is not symmetric, and not even square, only n^2 COLUMNS lav_matrix_duplication_post <- function(A = matrix(0,0,0)) { # number of columns n2 <- NCOL(A) # square A only, n2 = n^2 stopifnot(sqrt(n2) == round(sqrt(n2))) # dimension n <- sqrt(n2) # dup idx idx1 <- lav_matrix_vech_idx(n); idx2 <- lav_matrix_vechru_idx(n) OUT <- A[,idx1] + A[,idx2] u <- which(idx1 %in% idx2); OUT[,u] <- OUT[,u] / 2.0 OUT } # compute t(D) %*% A %*% D (without explicitly computing D) # A must be a square matrix and sqrt(ncol) an integer lav_matrix_duplication_pre_post <- function(A = matrix(0,0,0)) { # number of columns n2 <- NCOL(A) # square A only, n2 = n^2 stopifnot(NROW(A) == n2, sqrt(n2) == round(sqrt(n2))) # dimension n <- sqrt(n2) # dup idx idx1 <- lav_matrix_vech_idx(n); idx2 <- lav_matrix_vechru_idx(n) OUT <- A[idx1,,drop=FALSE] + A[idx2,,drop=FALSE] u <- which(idx1 %in% idx2); OUT[u,] <- OUT[u,] / 2.0 OUT <- OUT[,idx1,drop=FALSE] + OUT[,idx2,drop=FALSE] OUT[,u] <- OUT[,u] / 2.0 OUT } # create the generalized inverse of the duplication matrix (D^+_n): # it removes the duplicated elements in vec(S) to create vech(S) # # D^+ %*% vec(S) == vech(S) # # M&N book: page 49 # # D^+ == solve(t(D_n %*% D_n) %*% t(D_n) # create first t(DUP.ginv) .dup_ginv1 <- function(n = 1L) { if ((n < 1L) | (round(n) != n)) { stop("n must be a positive integer") } if(n > 255L) { stop("n is too large") } nstar <- n * (n+1)/2 n2 <- n * n # THIS is the real bottleneck: allocating an ocean of zeroes... x <- numeric(nstar * n2) tmp <- matrix(1:(n*n), n, n) idx1 <- lav_matrix_vech(tmp) + (0:(nstar-1L))*n2 x[idx1] <- 0.5 idx2 <- lav_matrix_vechru(tmp) + (0:(nstar-1L))*n2 x[idx2] <- 0.5 idx3 <- lav_matrix_diag_idx(n) + (lav_matrix_diagh_idx(n)-1L)*n2 x[idx3] <- 1.0 attr(x, "dim") <- c(n2, nstar) x <- t(x) x } # create DUP.ginv without transpose .dup_ginv2 <- function(n = 1L) { if ((n < 1L) | (round(n) != n)) { stop("n must be a positive integer") } if(n > 255L) { stop("n is too large") } nstar <- n * (n+1)/2 n2 <- n * n # THIS is the real bottleneck: allocating an ocean of zeroes... x <- numeric(nstar * n2) x[(lav_matrix_vech_idx(n) - 1L)*nstar + 1:nstar] <- 0.5 x[(lav_matrix_vechru_idx(n) - 1L)*nstar + 1:nstar] <- 0.5 x[(lav_matrix_diag_idx(n) - 1L)*nstar + lav_matrix_diagh_idx(n)] <- 1.0 attr(x, "dim") <- c(nstar, n2) x } lav_matrix_duplication_ginv <- .dup_ginv2 # pre-multiply with D^+ # number of rows in A must be 'square' (n*n) lav_matrix_duplication_ginv_pre <- function(A = matrix(0,0,0)) { A <- as.matrix(A) # number of rows n2 <- NROW(A) # square nrow(A) only, n2 = n^2 stopifnot(sqrt(n2) == round(sqrt(n2))) # dimension n <- sqrt(n2) nstar <- n * (n+1)/2 idx1 <- lav_matrix_vech_idx(n); idx2 <- lav_matrix_vechru_idx(n) OUT <- (A[idx1,,drop=FALSE] + A[idx2,,drop=FALSE]) / 2 OUT } # post-multiply with t(D^+) # number of columns in A must be 'square' (n*n) lav_matrix_duplication_ginv_post <- function(A = matrix(0,0,0)) { A <- as.matrix(A) # number of columns n2 <- NCOL(A) # square A only, n2 = n^2 stopifnot(sqrt(n2) == round(sqrt(n2))) # dimension n <- sqrt(n2) idx1 <- lav_matrix_vech_idx(n); idx2 <- lav_matrix_vechru_idx(n) OUT <- (A[,idx1,drop=FALSE] + A[,idx2,drop=FALSE]) / 2 OUT } # pre AND post-multiply with D^+: D^+ %*% A %*% t(D^+) # for square matrices only, with ncol = nrow = n^2 lav_matrix_duplication_ginv_pre_post <- function(A = matrix(0,0,0)) { A <- as.matrix(A) # number of columns n2 <- NCOL(A) # square A only, n2 = n^2 stopifnot(NROW(A) == n2, sqrt(n2) == round(sqrt(n2))) # dimension n <- sqrt(n2) idx1 <- lav_matrix_vech_idx(n); idx2 <- lav_matrix_vechru_idx(n) OUT <- (A[idx1,,drop=FALSE] + A[idx2,,drop=FALSE]) / 2 OUT <- (OUT[,idx1,drop=FALSE] + OUT[,idx2,drop=FALSE]) / 2 OUT } # create the commutation matrix (K_mn) # the mn x mx commutation matrix is a permutation matrix which # transforms vec(A) into vec(A') # # K_mn %*% vec(A) == vec(A') # # (in Henderson & Searle 1979, it is called the vec-permutation matrix) # M&N book: pages 46-48 # # note: K_mn is a permutation matrix, so it is orthogonal: t(K_mn) = K_mn^-1 # K_nm %*% K_mn == I_mn # # it is called the 'commutation' matrix because it enables us to interchange # ('commute') the two matrices of a Kronecker product, eg # K_pm (A %x% B) K_nq == (B %x% A) # # important property: it allows us to transform a vec of a Kronecker product # into the Kronecker product of the vecs (if A is m x n and B is p x q): # vec(A %x% B) == (I_n %x% K_qm %x% I_p)(vec A %x% vec B) # first attempt .com1 <- function(m = 1L, n = 1L) { if ((m < 1L) | (round(m) != m)) { stop("n must be a positive integer") } if ((n < 1L) | (round(n) != n)) { stop("n must be a positive integer") } p <- m*n x <- numeric( p*p ) pattern <- rep(c(rep((m+1L)*n, (m-1L)), n+1L), n) idx <- c(1L, 1L + cumsum(pattern)[-p]) x[idx] <- 1.0 attr(x, "dim") <- c(p,p) x } lav_matrix_commutation <- .com1 # compute K_n %*% A without explicitly computing K # K_n = K_nn, so sqrt(nrow(A)) must be an integer! # = permuting the rows of A lav_matrix_commutation_pre <- function(A = matrix(0,0,0)) { # number of rows of A n2 <- NROW(A) # K_nn only (n2 = m * n) stopifnot(sqrt(n2) == round(sqrt(n2))) # dimension n <- sqrt(n2) # compute row indices #row.idx <- as.integer(t(matrix(1:n2, n, n))) row.idx <- rep(1:n, each=n) + (0:(n-1L))*n OUT <- A[row.idx,,drop=FALSE] OUT } # compute K_mn %*% A without explicitly computing K # = permuting the rows of A lav_matrix_commutation_mn_pre <- function(A, m = 1L, n = 1L) { # number of rows of A mn <- NROW(A) stopifnot(mn == m * n) # compute row indices # row.idx <- as.integer(t(matrix(1:mn, m, n))) row.idx <- rep(1:m, each=n) + (0:(n-1L))*m OUT <- A[row.idx,,drop=FALSE] OUT } # N_n == 1/2 (I_n^2 + K_nn) # see MN page 48 # # N_n == D_n %*% D^+_n # lav_matrix_commutation_Nn <- function(n = 1L) { stop("not implemented yet") } # (simplified) kronecker product for square matrices lav_matrix_kronecker_square <- function(A, check = TRUE) { dimA <- dim(A); n <- dimA[1L]; n2 <- n*n if(check) { stopifnot(dimA[2L] == n) } # all possible combinations out <- tcrossprod(as.vector(A)) # break up in n*n pieces, and rearrange dim(out) <- c(n,n,n,n) out <- aperm(out, perm = c(3,1,4,2)) # reshape again, to form n2 x n2 matrix dim(out) <- c(n2, n2) out } # (simplified) faster kronecker product for symmetric matrices # note: not faster, but the logic extends to vech versions lav_matrix_kronecker_symmetric <- function(S, check = TRUE) { dimS <- dim(S); n <- dimS[1L]; n2 <- n*n if(check) { stopifnot(dimS[2L] == n) } # all possible combinations out <- tcrossprod(as.vector(S)) # break up in n*(n*n) pieces, and rearrange dim(out) <- c(n,n*n,n) out <- aperm(out, perm = c(3L,2L,1L)) # reshape again, to form n2 x n2 matrix dim(out) <- c(n2, n2) out } # shortcut for the idiom 't(S2) %*% (S %x% S) %*% S2' # where S is symmetric, and the rows of S2 correspond to # the elements of S # eg - S2 = DELTA (the jacobian dS/dtheta) lav_matrix_tS2_SxS_S2 <- function(S2, S, check = TRUE) { # size of S n <- NROW(S) if(check) { stopifnot(NROW(S2) == n*n) } A <- matrix(S %*% matrix(S2, n, ), n*n,) A2 <- A[rep(1:n, each=n) + (0:(n-1L))*n,,drop = FALSE] crossprod(A, A2) } # shortcut for the idiom 't(D) %*% (S %x% S) %*% S' # where S is symmetric, and D is the duplication matrix lav_matrix_tD_SxS_D <- function(S) { } # square root of a positive definite symmetric matrix lav_matrix_symmetric_sqrt <- function(S = matrix(0,0,0)) { n <- NROW(S) # eigen decomposition, assume symmetric matrix S.eigen <- eigen(S, symmetric = TRUE) V <- S.eigen$vectors; d <- S.eigen$values # 'fix' slightly negative tiny numbers d[d < 0] <- 0.0 # sqrt the eigenvalues and reconstruct S.sqrt <- V %*% diag(sqrt(d), n, n) %*% t(V) S.sqrt } # orthogonal complement of a matrix A # see Satorra (1992). Sociological Methodology, 22, 249-278, footnote 3: # # To compute such an orthogonal matrix, consider the p* x p* matrix P = I - # A(A'A)^-1A', which is idempotent of rank p* - q. Consider the singular value # decomposition P = HVH', where H is a p* x (p* - q) matrix of full column rank, # and V is a (p* - q) x (p* - q) diagonal matrix. It is obvious that H'A = 0; # hence, H is the desired orthogonal complement. This method of constructing an # orthogonal complement was proposed by Heinz Neudecker (1990, pers. comm.). # # update YR 21 okt 2014: # - note that A %*% solve(t(A) %*% A) %*% t(A) == tcrossprod(qr.Q(qr(A))) # - if we are using qr, we can as well use qr.Q to get the complement # lav_matrix_orthogonal_complement <- function(A = matrix(0,0,0)) { QR <- qr(A) ranK <- QR$rank # following Heinz Neudecker: #n <- nrow(A) #P <- diag(n) - tcrossprod(qr.Q(QR)) #OUT <- svd(P)$u[, seq_len(n - ranK), drop = FALSE] Q <- qr.Q(QR, complete = TRUE) # get rid of the first ranK columns OUT <- Q[, -seq_len(ranK), drop = FALSE] OUT } # construct block diagonal matrix from a list of matrices # ... can contain multiple arguments, which will be coerced to a list # or it can be a single list (of matrices) lav_matrix_bdiag <- function(...) { if(nargs() == 0L) return(matrix(0,0,0)) dots <- list(...) # create list of matrices if(is.list(dots[[1]])) { mlist <- dots[[1]] } else { mlist <- dots } if(length(mlist) == 1L) return(mlist[[1]]) # more than 1 matrix nmat <- length(mlist) nrows <- sapply(mlist, NROW); crows <- cumsum(nrows) ncols <- sapply(mlist, NCOL); ccols <- cumsum(ncols) trows <- sum(nrows) tcols <- sum(ncols) x <- numeric(trows * tcols) for(m in seq_len(nmat)) { if(m > 1L) { rcoffset <- trows*ccols[m-1] + crows[m-1] } else { rcoffset <- 0L } m.idx <- ( rep((0:(ncols[m] - 1L))*trows, each=nrows[m]) + rep(1:nrows[m], ncols[m]) + rcoffset ) x[m.idx] <- mlist[[m]] } attr(x, "dim") <- c(trows, tcols) x } # trace of a single square matrix, or the trace of a product of (compatible) # matrices resulting in a single square matrix lav_matrix_trace <- function(..., check = TRUE) { if(nargs() == 0L) return(as.numeric(NA)) dots <- list(...) # create list of matrices if(is.list(dots[[1]])) { mlist <- dots[[1]] } else { mlist <- dots } # number of matrices nMat <- length(mlist) # single matrix if(nMat == 1L) { S <- mlist[[1]] if(check) { # check if square stopifnot(NROW(S) == NCOL(S)) } out <- sum(S[lav_matrix_diag_idx(n = NROW(S))]) } else if(nMat == 2L) { # dimension check is done by '*' out <- sum(mlist[[1]] * t(mlist[[2]])) } else if(nMat == 3L) { A <- mlist[[1]] B <- mlist[[2]] C <- mlist[[3]] # A, B, C # below is the logic; to be coded inline # DIAG <- numeric( NROW(A) ) # for(i in seq_len(NROW(A))) { # DIAG[i] <- sum( rep(A[i,], times = NCOL(B)) * # as.vector(B) * # rep(C[,i], each=NROW(B)) ) # } # out <- sum(DIAG) # FIXME: # dimension check is automatic B2 <- B %*% C out <- sum(A * t(B2)) } else { #nRows <- sapply(mlist, NROW) #nCols <- sapply(mlist, NCOL) # check if product is ok #stopifnot(all(nCols[seq_len(nMat-1L)] == nRows[2:nMat])) # check if product is square #stopifnot(nRows[1] == nCols[nMat]) M1 <- mlist[[1]] M2 <- mlist[[2]] for(m in 3L:nMat) { M2 <- M2 %*% mlist[[m]] } out <- sum(M1 * t(M2)) } out } # crossproduct, but handling NAs pairwise lav_matrix_crossprod <- function(A, B) { if(missing(B)) { B <- A } apply(A, 2L, function(x) colSums(B * x, na.rm=TRUE)) } # reduced row echelon form of A lav_matrix_rref <- function(A, tol = sqrt( .Machine$double.eps)) { # MATLAB documentation says rref uses: tol = (max(size(A))*eps *norm(A,inf) if(missing(tol)) { A.norm <- max(abs(apply(A,1,sum))) tol <- max(dim(A)) * A.norm * .Machine$double.eps } # check if A is a matrix stopifnot(is.matrix(A)) # dimensions nRow <- NROW(A); nCol <- NCOL(A) pivot = integer(0L) # catch empty matrix if(nRow == 0 && nCol == 0) return(matrix(0,0,0)) rowIndex <- colIndex <- 1 while( rowIndex <= nRow && colIndex <= nCol ) { # look for largest (in absolute value) element in this column: i.below <- which.max(abs(A[rowIndex:nRow, colIndex])) i <- i.below + rowIndex - 1L p <- A[i, colIndex] # check if column is empty if(abs(p) <= tol) { A[rowIndex:nRow, colIndex] <- 0L # clean up colIndex <- colIndex + 1 } else { # store pivot column pivot <- c(pivot, colIndex) # do we need to swap column? if(rowIndex != i) { A[ c(rowIndex,i), colIndex:nCol ] <- A[ c(i,rowIndex), colIndex:nCol ] } # scale pivot to be 1.0 A[ rowIndex, colIndex:nCol ] <- A[ rowIndex, colIndex:nCol] / p # create zeroes below and above pivot other <- seq_len(nRow)[-rowIndex] A[other, colIndex:nCol] <- A[other, colIndex:nCol] - tcrossprod(A[other,colIndex], A[rowIndex,colIndex:nCol]) # next row/col rowIndex <- rowIndex + 1 colIndex <- colIndex + 1 } } # rounding? list(R = A, pivot = pivot) } # non-orthonoramal (left) null space basis, using rref lav_matrix_orthogonal_complement2 <- function(A, tol = sqrt( .Machine$double.eps)) { # left A <- t(A) # compute rref out <- lav_matrix_rref(A = A, tol = tol) # number of free columns in R (if any) nfree <- NCOL(A) - length(out$pivot) if(nfree) { R <- out$R # remove all-zero rows zero.idx <- which(apply(R, 1, function(x) { all(abs(x) < tol) })) if(length(zero.idx) > 0) { R <- R[-zero.idx,, drop = FALSE] } FREE <- R[, -out$pivot, drop = FALSE] I <- diag( nfree ) N <- rbind(-FREE, I) } else { N <- matrix(0, nrow = NCOL(A), ncol = 0L) } N } # inverse of a positive-definite symmetric matrix # FIXME: error handling? lav_matrix_symmetric_inverse <- function(S, logdet = FALSE, Sinv.method = "eigen") { if(Sinv.method == "eigen") { EV <- eigen(S, symmetric = TRUE) # V %*% diag(1/d) %*% V^{-1}, where V^{-1} = V^T S.inv <- tcrossprod(sweep(EV$vector, 2L, STATS = (1/EV$values), FUN="*"), EV$vector) if(logdet) { if(all(EV$values >= 0)) { attr(S.inv, "logdet") <- sum(log(EV$values)) } else { attr(S.inv, "logdet") <- as.numeric(NA) } } } else if(Sinv.method == "solve") { S.inv <- solve(S) if(logdet) { ev <- eigen(S, symmetric = TRUE, only.values = TRUE) if(all(ev$values >= 0)) { attr(S.inv, "logdet") <- sum(log(ev$values)) } else { attr(S.inv, "logdet") <- as.numeric(NA) } } } else if(Sinv.method == "chol") { # this will break if S is not positive definite cS <- chol(S) S.inv <- chol2inv(cS) if(logdet) { diag.cS <- diag(cS) attr(S.inv, "logdet") <- sum(log(diag.cS * diag.cS)) } } else { stop("method must be either `eigen', `solve' or `chol'") } S.inv } # update inverse of A, after removing 1 or more rows (and corresponding # colums) from A # # - this is one of the many applications of the Sherman-Morrison formula # - only removal for now # lav_matrix_inverse_update <- function(A.inv, rm.idx = integer(0L)) { ndel <- length(rm.idx) # rank-1 update if(ndel == 1L) { a <- A.inv[-rm.idx, rm.idx, drop = FALSE] b <- A.inv[rm.idx, -rm.idx, drop = FALSE] h <- A.inv[rm.idx, rm.idx] out <- A.inv[-rm.idx, -rm.idx, drop = FALSE] - (a %*% b) / h } # rank-n update else if(ndel < NCOL(A.inv)) { A <- A.inv[-rm.idx, rm.idx, drop = FALSE] B <- A.inv[ rm.idx,-rm.idx, drop = FALSE] H <- A.inv[ rm.idx, rm.idx, drop = FALSE] out <- A.inv[-rm.idx, -rm.idx, drop = FALSE] - A %*% solve(H, B) # erase all col/rows... } else if(ndel == NCOL(A.inv)) { out <- matrix(0,0,0) } out } # update inverse of S, after removing 1 or more rows (and corresponding # colums) from S, a symmetric matrix # # - only removal for now! # lav_matrix_symmetric_inverse_update <- function(S.inv, rm.idx = integer(0L), logdet = FALSE, S.logdet = NULL) { ndel <- length(rm.idx) if(ndel == 0L) { out <- S.inv if(logdet) { attr(out, "logdet") <- S.logdet } } # rank-1 update else if(ndel == 1L) { h <- S.inv[rm.idx, rm.idx] a <- S.inv[-rm.idx, rm.idx, drop = FALSE] / sqrt(h) out <- S.inv[-rm.idx, -rm.idx, drop = FALSE] - tcrossprod(a) if(logdet) { attr(out, "logdet") <- S.logdet + log(h) } } # rank-n update else if(ndel < NCOL(S.inv)) { A <- S.inv[ rm.idx, -rm.idx, drop = FALSE] H <- S.inv[ rm.idx, rm.idx, drop = FALSE] out <- S.inv[-rm.idx, -rm.idx, drop = FALSE] - crossprod(A, solve(H, A)) if(logdet) { cH <- chol(H); diag.cH <- diag(cH) H.logdet <- sum(log(diag.cH * diag.cH)) attr(out, "logdet") <- S.logdet + H.logdet } # erase all col/rows... } else if(ndel == NCOL(S.inv)) { out <- matrix(0,0,0) } else { stop("lavaan ERROR: column indices exceed number of columns in S.inv") } out } # update determinant of A, after removing 1 or more rows (and corresponding # colums) from A # lav_matrix_det_update <- function(det.A, A.inv, rm.idx = integer(0L)) { ndel <- length(rm.idx) # rank-1 update if(ndel == 1L) { h <- A.inv[rm.idx, rm.idx] out <- det.A * h } # rank-n update else if(ndel < NCOL(A.inv)) { H <- A.inv[ rm.idx, rm.idx, drop = FALSE] det.H <- det(H) out <- det.A * det.H # erase all col/rows... } else if(ndel == NCOL(A.inv)) { out <- matrix(0,0,0) } out } # update determinant of S, after removing 1 or more rows (and corresponding # colums) from S, a symmetric matrix # lav_matrix_symmetric_det_update <- function(det.S, S.inv, rm.idx = integer(0L)){ ndel <- length(rm.idx) # rank-1 update if(ndel == 1L) { h <- S.inv[rm.idx, rm.idx] out <- det.S * h } # rank-n update else if(ndel < NCOL(S.inv)) { H <- S.inv[ rm.idx, rm.idx, drop = FALSE] cH <- chol(H); diag.cH <- diag(cH) det.H <- prod(diag.cH * diag.cH) out <- det.S * det.H # erase all col/rows... } else if(ndel == NCOL(S.inv)) { out <- numeric(0L) } out } # update log determinant of S, after removing 1 or more rows (and corresponding # colums) from S, a symmetric matrix # lav_matrix_symmetric_logdet_update <- function(S.logdet, S.inv, rm.idx = integer(0L)) { ndel <- length(rm.idx) # rank-1 update if(ndel == 1L) { h <- S.inv[rm.idx, rm.idx] out <- S.logdet + log(h) } # rank-n update else if(ndel < NCOL(S.inv)) { H <- S.inv[ rm.idx, rm.idx, drop = FALSE] cH <- chol(H); diag.cH <- diag(cH) H.logdet <- sum(log(diag.cH * diag.cH)) out <- S.logdet + H.logdet # erase all col/rows... } else if(ndel == NCOL(S.inv)) { out <- numeric(0L) } out } lavaan/R/lav_samplestats_icov.R0000644000176200001440000000314112743163301016261 0ustar liggesuserslav_samplestats_icov <- function(COV = NULL, ridge = 0.0, x.idx = integer(0L), ngroups = 1L, g = 1L, warn = TRUE) { tmp <- try(inv.chol(COV, logdet = TRUE), silent = TRUE) # what if this fails... if(inherits(tmp, "try-error")) { if(length(x.idx) > 0L) { # maybe, we can fix it by gently ridging the exo variances ridge.eps <- ridge diag(COV)[x.idx] <- diag(COV)[x.idx] + ridge.eps # try again tmp <- try(inv.chol(COV, logdet = TRUE), silent = TRUE) if(inherits(tmp, "try-error")) { # fatal stop after all stop("lavaan ERROR: sample covariance matrix is not positive-definite") } else { cov.log.det <- attr(tmp, "logdet") attr(tmp, "logdet") <- NULL icov <- tmp # give a warning if(warn) { if(ngroups > 1) { warning("lavaan WARNING sample covariance matrix in group: ", g, " is not positive-definite") } else { warning("lavaan WARNING: sample covariance matrix is not positive-definite") } } } } else { # fatal stop stop("lavaan ERROR: sample covariance matrix is not positive-definite") } } else { cov.log.det <- attr(tmp, "logdet") attr(tmp, "logdet") <- NULL icov <- tmp } list(icov = icov, cov.log.det = cov.log.det) } lavaan/R/lav_lavaanList_multipleGroups.R0000644000176200001440000000211312740723306020114 0ustar liggesusers# lavMultipleGroups: fit the *same* model, on (typically a small number of) # groups/sets # YR - 11 July 2016 lavMultipleGroups <- function(model = NULL, dataList = NULL, ndat = length(dataList), cmd = "sem", ..., store.slots = c("partable"), FUN = NULL, show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL) { # dotdotdot dotdotdot <- list() # fit multiple times fit <- do.call("lavaanList", args = c(list(model = model, dataList = dataList, ndat = ndat, cmd = cmd, store.slots = store.slots, FUN = FUN, show.progress = show.progress, parallel = parallel, ncpus = ncpus, cl = cl), dotdotdot)) # store group labels (if any) fit@meta$lavMultipleGroups <- TRUE fit@meta$group.label <- names(dataList) fit } lavaan/R/01RefClass_01lavOptim.R0000644000176200001440000001545513042163575015737 0ustar liggesusers# eventually, this file will contain all routines related to # optimization -- YR 21 june 2012 # super class -- virtual statistical model that needs to be optimized lavRefOptim <- setRefClass("lavOptim", # inherits contains = "lavRefModel", # fields fields = list( theta.start = "numeric", # starting values optim.method = "character", # optimization method optim.control = "list", # control parameter for optimization method optim.out = "list" # optimization results ), # methods methods = list( minObjective = function(x) { cat("this is a dummy function [minObjective]\n") return(Inf) }, minGradient = function(x) { cat("this is dummy a function [minGradient]\n") return(rep(as.numeric(NA), npar)) }, minHessian = function(x) { cat("this is dummy a function [minHessian]\n") return(matrix(as.numeric(NA), npar, npar)) }, optimize = function(method = "nlminb", control = list(), verbose = FALSE, start.values = NULL) { method <- tolower(method) hessian <- FALSE if( method == "none" ) { optim.method <<- "none" } else if( method %in% c("nlminb", "quasi-newton", "quasi.newton", "nlminb.hessian") ) { optim.method <<- "nlminb" if(verbose) control$trace <- 1L if(method == "nlminb.hessian") hessian <- TRUE control.nlminb <- list(eval.max=20000L, iter.max=10000L, trace=0L, #abs.tol=1e-20, ### important!! fx never negative abs.tol=(.Machine$double.eps * 10), rel.tol=1e-10, x.tol=1.5e-8, step.min=2.2e-14) control.nlminb <- modifyList(control.nlminb, control) optim.control <<- control.nlminb[c("eval.max", "iter.max", "trace", "abs.tol", "rel.tol", "x.tol", "step.min")] } else if( method %in% c("newton", "newton-raphson", "newton.raphson") ) { optim.method <<- "newton" if(verbose) control$verbose <- TRUE control.nr <- list(grad.tol = 1e-6, iter.max = 200L, inner.max = 20L, verbose = FALSE) control.nr <- modifyList(control.nr, control) optim.control <<- control.nr[c("grad.tol", "iter.max", "inner.max", "verbose")] } else { stop("unknown optim method: ", optim.method) } # user provided starting values? if(!is.null(start.values)) { stopifnot(length(start.values) == npar) theta.start <<- start.values } # run objective function to intialize (and see if starting values # are valid tmp <- minObjective(theta.start) if(optim.method == "newton") { out <- lavOptimNewtonRaphson(object=.self, control = optim.control) optim.out <<- out } else if(optim.method == "nlminb") { if(!hessian) { out <- nlminb(start = theta, objective = .self$minObjective, gradient = .self$minGradient, control = optim.control) } else { out <- nlminb(start = theta, objective = .self$minObjective, gradient = .self$minGradient, hessian = .self$minHessian, control = optim.control) } # FIXME: use generic fields optim.out <<- out } # just in case, a last call to objective() tmp <- minObjective() } )) # this is a simple/naive Newton Raphson implementation # - minimization only # - it assumes that the hessian is always positive definite (no check!) # - it may do some backstepping, but there is no guarantee that it will # converge # this function is NOT for general-purpose optimization, but should only be # used or simple (convex!) problems (eg. estimating polychoric/polyserial # correlations, probit regressions, ...) # lavOptimNewtonRaphson <- function(object, control = list(iter.max = 100L, grad.tol = 1e-6, inner.max = 20L, verbose = FALSE)) { # housekeeping converged <- FALSE; message <- character(0); inner <- 0L # we start with classic newton: step length alpha_k = 1 alpha_k <- 1 # current estimates fx.old <- object$minObjective() gradient <- object$minGradient() norm.grad <- sqrt( crossprod(gradient) ) max.grad <- max(abs(gradient)) # start NR steps for(i in seq_len(control$iter.max)) { if(control$verbose) { cat("NR step ", sprintf("%2d", (i-1L)), ": max.grad = ", sprintf("%12.9f", max.grad), " norm.grad = ", sprintf("%12.9f", norm.grad), "\n", sep="") } # simple convergence test if(max.grad < control$grad.tol) { converged <- TRUE message <- paste("converged due to max.grad < tol (", control$grad.tol, ")", sep="") if(control$verbose) cat("NR ", message, "\n", sep="") break } # compute search direction 'p_k' hessian <- object$minHessian() p_k <- as.numeric( solve(hessian, gradient) ) # update theta and fx theta <- object$theta - alpha_k * p_k fx.new <- object$minObjective(theta) # check if we minimize if(fx.new > fx.old) { # simple backstepping for(j in seq_len(control$inner.max)) { inner <- inner + 1L alpha_k <- alpha_k/2 theta <<- object$theta + alpha_k * p_k fx.new <- object$minObjective(theta) if(fx.new < fx.old) break } if(fx.new > fx.old) { # it didn't work... bail out message <- paste("backstepping failed after ", control$inner.max, "iterations") return(list(fx=fx.new, converged=FALSE, message=message, max.grad=max.grad,iterations=i, backsteps=inner)) } # reset alpha alpha_k <- 1 } # update gradient <- object$minGradient() max.grad <- max(abs(gradient)) norm.grad <- sqrt( crossprod(gradient) ) fx.old <- fx.new } list(fx = fx.old, converged = converged, message = message, max.grad = max.grad, norm.grad = norm.grad, iterations = i-1L, backsteps = inner) } lavaan/R/lav_partable_unrestricted.R0000644000176200001440000002276413053000662017275 0ustar liggesusers# YR - 26 Nov 2013: generate partable for the unrestricted model lav_partable_unrestricted <- function(lavobject = NULL, lavdata = NULL, lavoptions = NULL, lavsamplestats = NULL, # optional user-provided sample stats sample.cov = NULL, sample.mean = NULL, sample.slopes = NULL, sample.th = NULL, sample.th.idx = NULL) { # grab everything from lavaan lavobject if(!is.null(lavobject)) { stopifnot(inherits(lavobject, "lavaan")) lavdata <- lavobject@Data lavoptions <- lavobject@Options lavsamplestats <- lavobject@SampleStats } # conditional.x ? check res.cov[[1]] slot conditional.x <- FALSE if(!is.null(lavsamplestats) && !is.null(lavsamplestats@res.cov[[1]])) { conditional.x <- TRUE } else if(!is.null(lavoptions) && lavoptions$conditional.x) { conditional.x <- TRUE } # if user-based moments are given, use these if(is.null(sample.cov) && !is.null(lavsamplestats)) { if(conditional.x) { sample.cov <- lavsamplestats@res.cov } else { sampl.ecov <- lavsamplestats@cov } } if(is.null(sample.mean) && !is.null(lavsamplestats)) { if(conditional.x) { sample.mean <- lavsamplestats@res.int } else { sample.mean <- lavsamplestats@mean } } if(conditional.x && is.null(sample.slopes) && !is.null(lavsamplestats)) { sample.slopes <- lavsamplestats@res.slopes } if(is.null(sample.th) && !is.null(lavsamplestats)) { if(conditional.x) { sample.th <- lavsamplestats@res.th } else { sample.th <- lavsamplestats@th } } if(is.null(sample.th.idx) && !is.null(lavsamplestats)) { sample.th.idx <- lavsamplestats@th.idx } ov.names <- lavdata@ov.names ov <- lavdata@ov ov.names.x <- lavdata@ov.names.x meanstructure <- lavoptions$meanstructure categorical <- any(ov$type == "ordered") ngroups <- length(ov.names) # what with fixed.x? # - does not really matter; fit will be saturated any way # - fixed.x = TRUE may avoid convergence issues with non-numeric # x-covariates if(lavoptions$mimic %in% c("lavaan", "Mplus")) { fixed.x = lavoptions$fixed.x } else if(lavoptions$mimic == "EQS") { # always ignore fixed.x ov.names.x = NULL fixed.x = FALSE } else if(lavoptions$mimic == "LISREL") { # always ignore fixed.x??? CHECKME!! ov.names.x = NULL fixed.x = FALSE } if(conditional.x) { ov.names.nox <- lapply(seq_len(ngroups), function(g) ov.names[[g]][ !ov.names[[g]] %in% ov.names.x[[g]] ]) } lhs <- rhs <- op <- character(0) group <- free <- exo <- integer(0) ustart <- numeric(0) for(g in 1:ngroups) { # a) VARIANCES (all ov's, if !conditional.x, also exo's) nvar <- length(ov.names[[g]]) lhs <- c(lhs, ov.names[[g]]) op <- c(op, rep("~~", nvar)) rhs <- c(rhs, ov.names[[g]]) group <- c(group, rep(g, nvar)) free <- c(free, rep(1L, nvar)) exo <- c(exo, rep(0L, nvar)) # starting values -- variances if(!is.null(sample.cov)) { ustart <- c(ustart, diag(sample.cov[[g]])) } else { ustart <- c(ustart, rep(as.numeric(NA), nvar)) } # COVARIANCES! pstar <- nvar*(nvar-1)/2 if(pstar > 0L) { # only if more than 1 variable tmp <- utils::combn(ov.names[[g]], 2) lhs <- c(lhs, tmp[1,]) # to fill upper.tri op <- c(op, rep("~~", pstar)) rhs <- c(rhs, tmp[2,]) group <- c(group, rep(g, pstar)) free <- c(free, rep(1L, pstar)) exo <- c(exo, rep(0L, pstar)) } # starting values -- covariances if(!is.null(sample.cov)) { ustart <- c(ustart, lav_matrix_vech(sample.cov[[g]], diagonal = FALSE)) } else { ustart <- c(ustart, rep(as.numeric(NA), pstar)) } # ordered? fix variances, add thresholds ord.names <- character(0L) if(categorical) { ord.names <- ov$name[ ov$type == "ordered" ] # only for this group ord.names <- ov.names[[g]][ which(ov.names[[g]] %in% ord.names) ] if(length(ord.names) > 0L) { # fix variances to 1.0 idx <- which(lhs %in% ord.names & op == "~~" & lhs == rhs) ustart[idx] <- 1.0 free[idx] <- 0L # add thresholds lhs.th <- character(0); rhs.th <- character(0) for(o in ord.names) { nth <- ov$nlev[ ov$name == o ] - 1L if(nth < 1L) next lhs.th <- c(lhs.th, rep(o, nth)) rhs.th <- c(rhs.th, paste("t", seq_len(nth), sep="")) } nel <- length(lhs.th) lhs <- c(lhs, lhs.th) rhs <- c(rhs, rhs.th) op <- c(op, rep("|", nel)) group <- c(group, rep(g, nel)) free <- c(free, rep(1L, nel)) exo <- c(exo, rep(0L, nel)) # starting values if(!is.null(sample.th) && !is.null(sample.th.idx)) { th.start <- sample.th[[g]][ sample.th.idx[[g]] > 0L ] ustart <- c(ustart, th.start) } else { ustart <- c(ustart, rep(as.numeric(NA), nel)) } } } # meanstructure? if(meanstructure) { # auto-remove ordinal variables ov.int <- ov.names[[g]] idx <- which(ov.int %in% ord.names) if(length(idx)) ov.int <- ov.int[-idx] nel <- length(ov.int) lhs <- c(lhs, ov.int) op <- c(op, rep("~1", nel)) rhs <- c(rhs, rep("", nel)) group <- c(group, rep(g, nel)) free <- c(free, rep(1L, nel)) exo <- c(exo, rep(0L, nel)) # starting values if(!is.null(sample.mean)) { sample.int.idx <- match(ov.int, ov.names[[g]]) ustart <- c(ustart, sample.mean[[g]][sample.int.idx]) } else { ustart <- c(ustart, rep(as.numeric(NA), length(ov.int))) } } # categorical? insert means as fixed-to-zero parameters # since 0.5-17 if(categorical) { ov.int <- ov.names[[g]] idx <- which(ov.int %in% ord.names) ov.int <- ov.int[idx] nel <- length(ov.int) lhs <- c(lhs, ov.int) op <- c(op, rep("~1", nel)) rhs <- c(rhs, rep("", nel)) group <- c(group, rep(g, nel)) free <- c(free, rep(0L, nel)) exo <- c(exo, rep(0L, nel)) ustart <- c(ustart, rep(0L, nel)) } # fixed.x exogenous variables? if(!conditional.x && fixed.x && (nx <- length(ov.names.x[[g]])) > 0L) { # fix variances/covariances exo.idx <- which(rhs %in% ov.names.x[[g]] & lhs %in% ov.names.x[[g]] & op == "~~" & group == g) exo[exo.idx] <- 1L free[exo.idx] <- 0L # fix means exo.idx <- which(rhs %in% ov.names.x[[g]] & op == "~1" & group == g) exo[exo.idx] <- 1L free[exo.idx] <- 0L } # conditional.x? if(conditional.x && (nx <- length(ov.names.x[[g]])) > 0L) { nnox <- length(ov.names.nox[[g]]) nel <- nnox * nx lhs <- c(lhs, rep(ov.names.nox[[g]], times = nx)) op <- c(op, rep("~", nel)) rhs <- c(rhs, rep(ov.names.x[[g]], each = nnox)) group <- c(group, rep(g, nel)) free <- c(free, rep(1L, nel)) exo <- c(exo, rep(1L, nel)) # starting values -- slopes if(!is.null(sample.slopes)) { ustart <- c(ustart, lav_matrix_vec(sample.slopes[[g]])) } else { ustart <- c(ustart, rep(as.numeric(NA), nel)) } } } # ngroups # free counter idx.free <- which(free > 0) free[idx.free] <- 1:length(idx.free) LIST <- list( id = 1:length(lhs), lhs = lhs, op = op, rhs = rhs, user = rep(1L, length(lhs)), block = group, # for now group = group, #mod.idx = rep(0L, length(lhs)), free = free, ustart = ustart, exo = exo #, #label = rep("", length(lhs)) #eq.id = rep(0L, length(lhs)), #unco = free ) LIST } lavaan/R/lav_predict.R0000644000176200001440000007471513053110033014337 0ustar liggesusers# lavPredict() contains a collection of `predict' methods # the unifying theme is that they all rely on the (unknown, to be estimated) # or (known, apriori specified) values for the latent variables # # lv: lavtent variables (aka `factor scores') # ov: predict linear part of y_i # # - YR 11 June 2013: first version, in order to get factor scores for the # categorical case # - YR 12 Jan 2014: refactoring + lav_predict_fy (to be used by estimator MML) # # overload standard R function `predict' setMethod("predict", "lavaan", function(object, newdata = NULL) { lavPredict(object = object, newdata = newdata, type="lv", method="EBM", fsm = FALSE, optim.method = "nlminb") }) # main function lavPredict <- function(object, type = "lv", newdata = NULL, method = "EBM", se.fit = FALSE, label = TRUE, fsm = FALSE, optim.method = "nlminb") { stopifnot(inherits(object, "lavaan")) lavmodel <- object@Model lavdata <- object@Data lavsamplestats <- object@SampleStats lavimplied <- object@implied lavpta <- object@pta # type type <- tolower(type) if(type %in% c("latent", "lv", "factor", "factor.score", "factorscore")) type <- "lv" if(type %in% c("ov","yhat")) type <- "yhat" # need full data set supplied if(is.null(newdata)) { # use internal copy: if(lavdata@data.type != "full") { stop("lavaan ERROR: sample statistics were used for fitting and newdata is empty") } else if(is.null(lavdata@X[[1]])) { stop("lavaan ERROR: no local copy of data; FIXME!") } else { data.obs <- lavdata@X } eXo <- lavdata@eXo } else { OV <- lavdata@ov newData <- lavData(data = newdata, group = lavdata@group, ov.names = lavdata@ov.names, ov.names.x = lavdata@ov.names.x, ordered = OV$name[ OV$type == "ordered" ], lavoptions = list(std.ov = lavdata@std.ov, group.label = lavdata@group.label, missing = lavdata@missing, warn = FALSE), allow.single.case = TRUE) data.obs <- newData@X eXo <- newData@eXo } if(type == "lv") { # post fit check (lv pd?) ok <- lav_object_post_check(object) #if(!ok) { # stop("lavaan ERROR: lavInspect(,\"post.check\") is not TRUE; factor scores can not be computed. See the WARNING message.") #} out <- lav_predict_eta(lavobject = NULL, lavmodel = lavmodel, lavdata = lavdata, lavsamplestats = lavsamplestats, lavimplied = lavimplied, data.obs = data.obs, eXo = eXo, method = method, fsm = fsm, optim.method = optim.method) # remove dummy lv? if(fsm) { FSM <- attr(out, "fsm") } out <- lapply(seq_len(lavdata@ngroups), function(g) { lv.idx <- c(lavmodel@ov.y.dummy.lv.idx[[g]], lavmodel@ov.x.dummy.lv.idx[[g]]) ret <- out[[g]] if(length(lv.idx) > 0L) { ret <- out[[g]][, -lv.idx, drop=FALSE] } ret }) # label? if(label) { for(g in seq_len(lavdata@ngroups)) { colnames(out[[g]]) <- lavpta$vnames$lv[[g]] } } # estimated value for the observed indicators, given (estimated) # factor scores } else if(type == "yhat") { out <- lav_predict_yhat(lavobject = NULL, lavmodel = lavmodel, lavdata = lavdata, lavsamplestats = lavsamplestats, lavimplied = lavimplied, data.obs = data.obs, eXo = eXo, ETA = NULL, method = method, optim.method = optim.method) # label? if(label) { for(g in seq_len(lavdata@ngroups)) { colnames(out[[g]]) <- lavpta$vnames$ov[[g]] } } # density for each observed item, given (estimated) factor scores } else if(type == "fy") { out <- lav_predict_fy(lavobject = NULL, lavmodel = lavmodel, lavdata = lavdata, lavsamplestats = lavsamplestats, data.obs = data.obs, eXo = eXo, ETA = NULL, method = method, optim.method = optim.method) # label? if(label) { for(g in seq_len(lavdata@ngroups)) { colnames(out[[g]]) <- lavpta$vnames$ov[[g]] } } } else { stop("lavaan ERROR: type must be one of: lv yhat fy") } # lavaan.matrix out <- lapply(out, "class<-", c("lavaan.matrix", "matrix")) if(lavdata@ngroups == 1L) { out <- out[[1L]] } else { out } if(fsm) { attr(out, "fsm") <- FSM } out } # internal function lav_predict_eta <- function(lavobject = NULL, # for convenience # sub objects lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, lavimplied = NULL, # new data data.obs = NULL, eXo = NULL, # options method = "EBM", fsm = FALSE, optim.method = "nlminb") { # full object? if(inherits(lavobject, "lavaan")) { lavdata <- lavobject@Data } else { stopifnot(!is.null(lavdata)) } # method method <- tolower(method) # alias if(method == "regression") { method <- "ebm" } # normal case? if(all(lavdata@ov$type == "numeric")) { if(method == "ebm") { out <- lav_predict_eta_normal(lavobject = lavobject, lavmodel = lavmodel, lavdata = lavdata, lavimplied = lavimplied, lavsamplestats = lavsamplestats, data.obs = data.obs, eXo = eXo, fsm = fsm) } else if(method == "bartlett" || method == "bartlet") { out <- lav_predict_eta_bartlett(lavobject = lavobject, lavmodel = lavmodel, lavdata = lavdata, lavimplied = lavimplied, lavsamplestats = lavsamplestats, data.obs = data.obs, eXo = eXo, fsm = fsm) } else { stop("lavaan ERROR: unkown method: ", method) } } else { out <- lav_predict_eta_ebm(lavobject = lavobject, lavmodel = lavmodel, lavdata = lavdata, lavsamplestats = lavsamplestats, data.obs = data.obs, eXo = eXo, optim.method = optim.method) } out } # factor scores - normal case # NOTE: this is the classic 'regression' method; for the linear/continuous # case, this is equivalent to both EB and EBM lav_predict_eta_normal <- function(lavobject = NULL, # for convenience # sub objects lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, lavimplied = NULL, # optional new data data.obs = NULL, eXo = NULL, fsm = FALSE) { # full object? if(inherits(lavobject, "lavaan")) { lavmodel <- lavobject@Model lavdata <- lavobject@Data lavsamplestats <- lavobject@SampleStats lavimplied <- lavobject@implied } else { stopifnot(!is.null(lavmodel), !is.null(lavdata), !is.null(lavsamplestats), !is.null(lavimplied)) } if(is.null(data.obs)) { data.obs <- lavdata@X } # eXo not needed # missings? and missing = "ml"? # impute values under the normal if(lavdata@missing == "ml") { for(g in seq_len(lavdata@ngroups)) { data.obs[[g]] <- lav_mvnorm_missing_impute_pattern(Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = lavimplied$mean[[g]], Sigma = lavimplied$cov[[g]]) } } LAMBDA <- computeLAMBDA(lavmodel = lavmodel, remove.dummy.lv = FALSE) Sigma.hat <- lavimplied$cov Sigma.hat.inv <- lapply(Sigma.hat, solve) VETA <- computeVETA(lavmodel = lavmodel, lavsamplestats = lavsamplestats) EETA <- computeEETA(lavmodel = lavmodel, lavsamplestats = lavsamplestats) EY <- computeEY( lavmodel = lavmodel, lavsamplestats = lavsamplestats) FS <- vector("list", length = lavdata@ngroups) if(fsm) { FSM <- vector("list", length = lavdata@ngroups) } for(g in 1:lavdata@ngroups) { nfac <- ncol(VETA[[g]]) if(nfac == 0L) { FS[[g]] <- matrix(0, lavdata@nobs[[g]], nfac) next } # factor score coefficient matrix 'C' FSC <- VETA[[g]] %*% t(LAMBDA[[g]]) %*% Sigma.hat.inv[[g]] if(fsm) { FSM[[g]] <- FSC } RES <- sweep(data.obs[[g]], MARGIN = 2L, STATS = EY[[g]], FUN = "-") FS.g <- sweep(RES %*% t(FSC), MARGIN = 2L, STATS = EETA[[g]], FUN = "+") # remove dummy lv's #lv.dummy.idx <- c(lavmodel@ov.y.dummy.lv.idx[[g]], # lavmodel@ov.x.dummy.lv.idx[[g]]) #if(length(lv.dummy.idx) > 0L) { # FS.g <- FS.g[,-lv.dummy.idx,drop=FALSE] #} # replace values in dummy lv's by their observed counterpart if(length(lavmodel@ov.y.dummy.lv.idx[[g]]) > 0L) { FS.g[, lavmodel@ov.y.dummy.lv.idx[[g]] ] <- data.obs[[g]][, lavmodel@ov.y.dummy.ov.idx[[g]], drop = FALSE] } if(length(lavmodel@ov.x.dummy.lv.idx[[g]]) > 0L) { FS.g[, lavmodel@ov.x.dummy.lv.idx[[g]] ] <- data.obs[[g]][, lavmodel@ov.x.dummy.ov.idx[[g]], drop = FALSE] } FS[[g]] <- FS.g } if(fsm) { attr(FS, "fsm") <- FSM } FS } # factor scores - normal case - Bartlett method # NOTES: 1) this is the classic 'Bartlett' method; for the linear/continuous # case, this is equivalent to 'ML' # 2) the usual formula is: # FSC = solve(lambda' theta.inv lambda) (lambda' theta.inv) # BUT to deal with zero or negative variances, we use the # 'GLS' version instead: # FSC = solve(lambda' sigma.inv lambda) (lambda' sigma.inv) # Reference: Bentler & Yuan (1997) 'Optimal Conditionally Unbiased # Equivariant Factor Score Estimators' # in Berkane (Ed) 'Latent variable modeling with # applications to causality' (Springer-Verlag) # 3) instead of solve(), we use MASS::ginv, for special settings where # -by construction- (lambda' sigma.inv lambda) is singular lav_predict_eta_bartlett <- function(lavobject = NULL, # for convenience # sub objects lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, lavimplied = NULL, # optional new data data.obs = NULL, eXo = NULL, fsm = FALSE) { # full object? if(inherits(lavobject, "lavaan")) { lavmodel <- lavobject@Model lavdata <- lavobject@Data lavsamplestats <- lavobject@SampleStats lavimplied <- lavobject@implied } else { stopifnot(!is.null(lavmodel), !is.null(lavdata), !is.null(lavsamplestats), !is.null(lavimplied)) } if(is.null(data.obs)) { data.obs <- lavdata@X } # eXo not needed # missings? and missing = "ml"? # impute values under the normal if(lavdata@missing == "ml") { for(g in seq_len(lavdata@ngroups)) { data.obs[[g]] <- lav_mvnorm_missing_impute_pattern(Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = lavimplied$mean[[g]], Sigma = lavimplied$cov[[g]]) } } LAMBDA <- computeLAMBDA(lavmodel = lavmodel, remove.dummy.lv = FALSE) Sigma.hat.inv <- lapply(lavimplied$cov, solve) EETA <- computeEETA(lavmodel = lavmodel, lavsamplestats = lavsamplestats) EY <- computeEY( lavmodel = lavmodel, lavsamplestats = lavsamplestats) FS <- vector("list", length = lavdata@ngroups) if(fsm) { FSM <- vector("list", length = lavdata@ngroups) } for(g in 1:lavdata@ngroups) { nfac <- length(EETA[[g]]) if(nfac == 0L) { FS[[g]] <- matrix(0, lavdata@nobs[[g]], nfac) next } # factor score coefficient matrix 'C' FSC = (MASS::ginv(t(LAMBDA[[g]]) %*% Sigma.hat.inv[[g]] %*% LAMBDA[[g]]) %*% t(LAMBDA[[g]]) %*% Sigma.hat.inv[[g]] ) if(fsm) { FSM[[g]] <- FSC } RES <- sweep(data.obs[[g]], MARGIN = 2L, STATS = EY[[g]], FUN = "-") FS.g <- sweep(RES %*% t(FSC), MARGIN = 2L, STATS = EETA[[g]], FUN = "+") # remove dummy lv's #lv.dummy.idx <- c(lavmodel@ov.y.dummy.lv.idx[[g]], # lavmodel@ov.x.dummy.lv.idx[[g]]) #if(length(lv.dummy.idx) > 0L) { # FS.g <- FS.g[,-lv.dummy.idx,drop=FALSE] #} # replace values in dummy lv's by their observed counterpart if(length(lavmodel@ov.y.dummy.lv.idx[[g]]) > 0L) { FS.g[, lavmodel@ov.y.dummy.lv.idx[[g]] ] <- data.obs[[g]][, lavmodel@ov.y.dummy.ov.idx[[g]], drop = FALSE] } if(length(lavmodel@ov.x.dummy.lv.idx[[g]]) > 0L) { FS.g[, lavmodel@ov.x.dummy.lv.idx[[g]] ] <- data.obs[[g]][, lavmodel@ov.x.dummy.ov.idx[[g]], drop = FALSE] } FS[[g]] <- FS.g } if(fsm) { attr(FS, "fsm") <- FSM } FS } # factor scores - EBM lav_predict_eta_ebm <- function(lavobject = NULL, # for convenience # sub objects lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, # optional new data data.obs = NULL, eXo = NULL, optim.method = "nlminb") { stopifnot(optim.method %in% c("nlminb", "BFGS")) ### FIXME: if all indicators of a factor are normal, can we not ### just use the `classic' regression method?? ### (perhaps after whitening, to get uncorrelated factors...) # full object? if(inherits(lavobject, "lavaan")) { lavmodel <- lavobject@Model lavdata <- lavobject@Data lavsamplestats <- lavobject@SampleStats } else { stopifnot(!is.null(lavmodel), !is.null(lavdata), !is.null(lavsamplestats)) } # new data? if(is.null(data.obs)) { data.obs <- lavdata@X } if(is.null(eXo)) { eXo <- lavdata@eXo } VETAx <- computeVETAx(lavmodel = lavmodel) VETAx.inv <- VETAx for(g in seq_len(lavdata@ngroups)) { if(nrow(VETAx[[g]]) > 0L) { VETAx.inv[[g]] <- solve(VETAx[[g]]) } } EETAx <- computeEETAx(lavmodel = lavmodel, lavsamplestats = lavsamplestats, eXo = eXo, remove.dummy.lv = TRUE) ## FIXME? TH <- computeTH( lavmodel = lavmodel) THETA <- computeTHETA(lavmodel = lavmodel) # local objective function: x = lv values f.eta.i <- function(x, y.i, x.i, mu.i) { # add 'dummy' values (if any) for ov.y if(length(lavmodel@ov.y.dummy.lv.idx[[g]]) > 0L) { x2 <- c(x, data.obs[[g]][i, lavmodel@ov.y.dummy.ov.idx[[g]], drop = FALSE]) } else { x2 <- x } # conditional density of y, given eta.i(=x) log.fy <- lav_predict_fy_eta.i(lavmodel = lavmodel, lavdata = lavdata, lavsamplestats = lavsamplestats, y.i = y.i, x.i = x.i, eta.i = matrix(x2, nrow=1L), # <---- eta! theta.sd = theta.sd, th = th, th.idx = th.idx, log = TRUE) diff <- t(x) - mu.i V <- VETAx.inv[[g]] # handle missing values: we skip them tmp <- as.numeric(0.5 * diff %*% V %*% t(diff)) # handle missing values: we skip them: FIXME!!! out <- tmp - sum(log.fy, na.rm=TRUE) out } FS <- vector("list", length=lavdata@ngroups) for(g in seq_len(lavdata@ngroups)) { nfac <- ncol(VETAx[[g]]) nfac2 <- nfac if(length(lavmodel@ov.y.dummy.lv.idx[[g]]) > 0L) { nfac2 <- nfac2 + length(lavmodel@ov.y.dummy.lv.idx[[g]]) } FS[[g]] <- matrix(as.numeric(NA), nrow(data.obs[[g]]), nfac2) # special case: no regular lv's if(nfac == 0) { # impute dummy ov.y (if any) FS[[g]][, lavmodel@ov.y.dummy.ov.idx[[g]] ] <- data.obs[[g]][, lavmodel@ov.y.dummy.ov.idx[[g]], drop = FALSE] next } ## FIXME: factor scores not identical (but close) to Mplus # if delta elements not equal to 1?? mm.in.group <- 1:lavmodel@nmat[g] + cumsum(c(0,lavmodel@nmat))[g] MLIST <- lavmodel@GLIST[ mm.in.group ] # check for negative values neg.var.idx <- which(diag(THETA[[g]]) < 0) if(length(neg.var.idx) > 0) { warning("lavaan WARNING: factor scores could not be computed due to at least one negative (residual) variance") next } # common values theta.sd <- sqrt(diag(THETA[[g]])) th <- TH[[g]] th.idx <- lavmodel@th.idx[[g]] # casewise for now N <- nrow(data.obs[[g]]) for(i in 1:N) { # eXo? if(!is.null(eXo[[g]])) { x.i <- eXo[[g]][i,,drop=FALSE] } else { x.i <- NULL } mu.i <- EETAx[[g]][i,,drop=FALSE] y.i <- data.obs[[g]][i,,drop=FALSE] START <- numeric(nfac) # initial values for eta # find best values for eta.i if(optim.method == "nlminb") { out <- nlminb(start=START, objective=f.eta.i, gradient=NULL, # for now control=list(rel.tol=1e-8), y.i=y.i, x.i=x.i, mu.i=mu.i) } else if(optim.method == "BFGS") { out <- optim(par = START, fn = f.eta.i, gr = NULL, control = list(reltol = 1e-8), method = "BFGS", y.i = y.i, x.i = x.i, mu.i = mu.i) } if(out$convergence == 0L) { eta.i <- out$par } else { eta.i <- rep(as.numeric(NA), nfac) } # add dummy ov.y lv values if(length(lavmodel@ov.y.dummy.lv.idx[[g]]) > 0L) { eta.i <- c(eta.i, data.obs[[g]][i, lavmodel@ov.y.dummy.ov.idx[[g]], drop = FALSE]) } FS[[g]][i,] <- eta.i } } FS } # predicted value for response y*_i, conditional on the predicted latent # variable scores # `measurement part': # y*_i = nu + lambda eta_i + K x_i + epsilon_i # # where eta_i = latent variable value for i (either given or from predict) # # Two types: 1) nrow(ETA) = nrow(X) (factor scores) # 2) nrow(ETA) = 1L (given values) # # in both cases, we return [nobs x nvar] matrix per group lav_predict_yhat <- function(lavobject = NULL, # for convience # sub objects lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, lavimplied = NULL, # new data data.obs = NULL, eXo = NULL, # ETA values ETA = NULL, # options method = "EBM", duplicate = FALSE, optim.method = "nlminb") { # full object? if(inherits(lavobject, "lavaan")) { lavmodel <- lavobject@Model lavdata <- lavobject@Data lavsamplestats <- lavobject@SampleStats lavimplied <- lavobject@implied } else { stopifnot(!is.null(lavmodel), !is.null(lavdata), !is.null(lavsamplestats), !is.null(lavimplied)) } # new data? if(is.null(data.obs)) { data.obs <- lavdata@X } if(is.null(eXo)) { eXo <- lavdata@eXo } # do we get values for ETA? If not, use `predict' to get plausible values if(is.null(ETA)) { ETA <- lav_predict_eta(lavobject = NULL, lavmodel = lavmodel, lavdata = lavdata, lavsamplestats = lavsamplestats, lavimplied = lavimplied, data.obs = data.obs, eXo = eXo, method = method, optim.method = optim.method) } else { # list if(is.matrix(ETA)) { # user-specified? tmp <- ETA; ETA <- vector("list", length=lavdata@ngroups) ETA[seq_len(lavdata@ngroups)] <- list(tmp) } else if(is.list(ETA)) { stopifnot(lavdata@ngroups == length(ETA)) } } YHAT <- computeYHAT(lavmodel = lavmodel, GLIST = NULL, lavsamplestats = lavsamplestats, eXo = eXo, ETA = ETA, duplicate = duplicate) # if conditional.x, paste eXo if(lavmodel@categorical && !is.null(eXo)) { YHAT <- lapply(seq_len(lavdata@ngroups), function(g) { ret <- cbind(YHAT[[g]], eXo[[g]]) ret }) } YHAT } # conditional density y -- assuming independence!! # f(y_i | eta_i, x_i) for EACH item # lav_predict_fy <- function(lavobject = NULL, # for convience # sub objects lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, # new data data.obs = NULL, eXo = NULL, # ETA values ETA = NULL, # options method = "EBM", log. = FALSE, optim.method = "nlminb") { # full object? if(inherits(lavobject, "lavaan")) { lavmodel <- lavobject@Model lavdata <- lavobject@Data lavsamplestats <- lavobject@SampleStats } else { stopifnot(!is.null(lavmodel), !is.null(lavdata), !is.null(lavsamplestats)) } # new data? if(is.null(data.obs)) { data.obs <- lavdata@X } if(is.null(eXo)) { eXo <- lavdata@eXo } # we need the YHATs (per group) YHAT <- lav_predict_yhat(lavobject = NULL, lavmodel = lavmodel, lavdata = lavdata, lavsamplestats = lavsamplestats, data.obs = data.obs, eXo = eXo, ETA = ETA, method = method, duplicate = FALSE, optim.method = optim.method) THETA <- computeTHETA(lavmodel = lavmodel) TH <- computeTH( lavmodel = lavmodel) # all normal? NORMAL <- all(lavdata@ov$type == "numeric") FY <- vector("list", length=lavdata@ngroups) for(g in seq_len(lavdata@ngroups)) { FY[[g]] <- lav_predict_fy_internal(X = data.obs[[g]], yhat = YHAT[[g]], TH = TH[[g]], THETA = THETA[[g]], num.idx = lavmodel@num.idx[[g]], th.idx = lavmodel@th.idx[[g]], link = lavmodel@link, log. = log.) } FY } # single group, internal function lav_predict_fy_internal <- function(X = NULL, yhat = NULL, TH = NULL, THETA = NULL, num.idx = NULL, th.idx = NULL, link = NULL, log. = FALSE) { # shortcuts theta.var <- diag(THETA) # check size YHAT (either 1L or Nobs rows) if(! (nrow(yhat) == 1L || nrow(yhat) == nrow(X)) ) { stop("lavaan ERROR: nrow(YHAT[[g]]) not 1L and not nrow(X))") } FY.group <- matrix(0, nrow(X), ncol(X)) #if(NORMAL) { # if(nrow(yhat) == nrow(X)) { # tmp <- (X - yhat)^2 # } else { # tmp <- sweep(X, MARGIN=2, STATS=yhat, FUN="-")^2 # } # tmp1 <- sweep(tmp, MARGIN=2, theta.var, "/") # tmp2 <- exp( -0.5 * tmp1 ) # tmp3 <- sweep(tmp2, MARGIN=2, sqrt(2*pi*theta.var), "/") # if(log.) { # FY.group <- log(tmp3) # } else { # FY.group <- tmp3 # } #} else { # mixed items ord.idx <- unique( th.idx[th.idx > 0L] ) # first, NUMERIC variables if(length(num.idx) > 0L) { # multivariate # FY.group[,num.idx] <- # dmnorm(X[,num.idx], # mean = yhat[n,num.idx], # varcov = THETA[[g]][num.idx, num.idx], log = log.) for(v in num.idx) { FY.group[,v] <- dnorm(X[,v], # YHAT may change or not per case mean = yhat[,v], sd = sqrt(theta.var[v]), log = log.) } } # second, ORDERED variables for(v in ord.idx) { th.y <- TH[ th.idx == v ]; TH.Y <- c(-Inf, th.y, Inf) ncat <- length(th.y) + 1L fy <- numeric(ncat) theta.v <- sqrt(theta.var[v]) yhat.v <- yhat[,v] # two cases: yhat.v is a scalar, or has length = nobs fy <- matrix(0, nrow=length(yhat.v), ncol=ncat) # for each category for(k in seq_len(ncat)) { if(link == "probit") { fy[,k] = pnorm( (TH.Y[k+1] - yhat.v) / theta.v) - pnorm( (TH.Y[k ] - yhat.v) / theta.v) } else if(link == "logit") { fy[,k] = plogis( (TH.Y[k+1] - yhat.v) / theta.v) - plogis( (TH.Y[k ] - yhat.v) / theta.v) } else { stop("lavaan ERROR: link must be probit or logit") } } # underflow idx <- which(fy < .Machine$double.eps) if(length(idx) > 0L) { fy[idx] <- .Machine$double.eps } # log? if(log.) { fy <- log(fy) } # case-wise expansion/selection if(length(yhat.v) == 1L) { # expand category probabilities for all observations FY.group[,v] <- fy[1L, X[,v]] } else { # select correct category probability per observation FY.group[,v] <- fy[ cbind(seq_len(nrow(fy)), X[,v]) ] } } # ord FY.group } # conditional density y -- assuming independence!! # f(y_i | eta_i, x_i) # # but for a SINGLE observation y_i (and x_i), for given values of eta_i # lav_predict_fy_eta.i <- function(lavmodel = NULL, lavdata = NULL, lavsamplestats = NULL, y.i = NULL, x.i = NULL, eta.i = NULL, theta.sd = NULL, g = 1L, th = NULL, th.idx = NULL, log = TRUE) { mm.in.group <- 1:lavmodel@nmat[g] + cumsum(c(0,lavmodel@nmat))[g] MLIST <- lavmodel@GLIST[ mm.in.group ] # linear predictor for all items YHAT <- computeEYetax.LISREL(MLIST = MLIST, eXo = x.i, ETA = eta.i, sample.mean = lavsamplestats@mean[[g]], ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[g]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[g]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[g]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[g]]) # P(y_i | eta_i, x_i) for all items if(all(lavdata@ov$type == "numeric")) { # NORMAL case FY <- dnorm(y.i, mean = YHAT, sd = theta.sd, log = log) } else { FY <- numeric(lavmodel@nvar[g]) for(v in seq_len(lavmodel@nvar[g])) { if(lavdata@ov$type[v] == "numeric") { ### FIXME!!! we can do all numeric vars at once!! FY[v] <- dnorm(y.i[v], mean = YHAT[v], sd = theta.sd[v], log = log) } else if(lavdata@ov$type[v] == "ordered") { # handle missing value if(is.na(y.i[v])) { FY[v] <- as.numeric(NA) } else { th.y <- th[ th.idx == v ]; TH.Y <- c(-Inf, th.y, Inf) k <- y.i[v] p1 <- pnorm( (TH.Y[ k + 1 ] - YHAT[v])/theta.sd[v] ) p2 <- pnorm( (TH.Y[ k ] - YHAT[v])/theta.sd[v] ) prob <- (p1 - p2) if(prob < .Machine$double.eps) { prob <- .Machine$double.eps } if(log) { FY[v] <- log(prob) } else { FY[v] <- prob } } } else { stop("lavaan ERROR: unknown type: ", lavdata@ov$type[v], " for variable", lavdata@ov$name[v]) } } } FY } lavaan/R/lav_integrate.R0000644000176200001440000002314013031507302014656 0ustar liggesusers# routines for numerical intregration # integrate (-infty to +infty) a product of univariate Gaussian densities # with givens means (mus) and standard deviations (sds) (or variances, vars) lav_integration_gaussian_product <- function(mus = NULL, sds = NULL, vars = NULL) { n <- length(mus) if(is.null(vars)) { vars <- sds^2 } # variance product var.prod <- 1/sum(1/vars) # mean product mu.prod <- sum(mus/vars)*var.prod # normalization constant const <- 1/sqrt((2*pi)^(n-1)) * sqrt(var.prod) * sqrt(1/prod(vars)) * exp(-0.5 * (sum(mus^2/vars) - mu.prod^2/var.prod)) const } # return Gauss-Hermite quadrature rule for given order (n) # return list: x = nodes, w = quadrature weights # # As noted by Wilf (1962, chapter 2, ex 9), the nodes are given by # the eigenvalues of the Jacobi matrix; weights are given by the squares of the # first components of the (normalized) eigenvectors, multiplied by sqrt(pi) # # (This is NOT identical to Golub & Welsch, 1968: as they used a specific # method tailored for tridiagonal symmetric matrices) # # TODO: look at https://github.com/ajt60gaibb/FastGaussQuadrature.jl/blob/master/src/gausshermite.jl # featuring the work of Ignace Bogaert (UGent) # # approximation of the integral of 'f(x) * exp(-x*x)' from -inf to +inf # by sum( f(x_i) * w_i ) # # CHECK: sum(w_i) should be always sqrt(pi) = 1.772454 lav_integration_gauss_hermite_xw <- function(n = 21L, revert = FALSE) { # force n to be an integer n <- as.integer(n); stopifnot(n > 0L) if(n == 1L) { x <- 0 w <- sqrt(pi) } else { # construct symmetric, tridiagonal Jacobi matrix # diagonal = 0, -1/+1 diagonal is sqrt(1:(n-1)/2) u <- sqrt(seq.int(n-1L)/2) # upper diagonal of J Jn <- matrix(0, n, n); didx <- lav_matrix_diag_idx(n) Jn[(didx+1)[-n]] <- u #Jn[(didx-1)[-1]] <- u # only lower matrix is used anyway # eigen decomposition # FIXME: use specialized function for tridiagonal symmetrix matrix ev <- eigen(Jn, symmetric = TRUE) x <- ev$values tmp <- ev$vector[1L,] w <- sqrt(pi)*tmp*tmp } # revert? (minus to plus) if(revert) { x <- -x } list(x = x, w = w) } # generate GH points + weights lav_integration_gauss_hermite <- function(n = 21L, dnorm = FALSE, mean = 0, sd = 1, ndim = 1L, revert = TRUE, prune = FALSE) { XW <- lav_integration_gauss_hermite_xw(n = n, revert = revert) # dnorm kernel? if(dnorm) { # scale/shift x x <- XW$x * sqrt(2) * sd + mean # scale w w <- XW$w / sqrt(pi) } else { x <- XW$x w <- XW$w } if(ndim > 1L) { # cartesian product x <- as.matrix(expand.grid(rep(list(x), ndim), KEEP.OUT.ATTRS = FALSE)) w <- as.matrix(expand.grid(rep(list(w), ndim), KEEP.OUT.ATTRS = FALSE)) w <- apply(w, 1, prod) } else { x <- as.matrix(x) w <- as.matrix(w) } # prune? if(is.logical(prune) && prune) { # always divide by N=21 lower.limit <- XW$w[1] * XW$w[floor((n+1)/2)] / 21 keep.idx <- which(w > lower.limit) w <- w[keep.idx] x <- x[keep.idx,, drop = FALSE] } else if(is.numeric(prune) && prune > 0) { lower.limit <- quantile(w, probs = prune) keep.idx <- which(w > lower.limit) w <- w[keep.idx] x <- x[keep.idx,, drop = FALSE] } list(x=x, w=w) } # backwards compatibility lav_integration_gauss_hermite_dnorm <- function(n = 21L, mean = 0, sd = 1, ndim = 1L, revert = TRUE, prune = FALSE) { lav_integration_gauss_hermite(n = n, dnorm = TRUE, mean = mean, sd = sd, ndim = ndim, revert = revert, prune = prune) } # plot 2-dim # out <- lavaan:::lav_integration_gauss_hermite_dnorm(n = 20, ndim = 2) # plot(out$x, cex = -10/log(out$w), col = "darkgrey", pch=19) # integrand g(x) has the form g(x) = f(x) dnorm(x, m, s^2) lav_integration_f_dnorm <- function(func = NULL, # often ly.prod dnorm.mean = 0, # dnorm mean dnorm.sd = 1, # dnorm sd XW = NULL, # GH points n = 21L, # number of nodes adaptive = FALSE, # adaptive? iterative = FALSE, # iterative? max.iter = 20L, # max iterations verbose = FALSE, # verbose? ...) { # optional args for 'f' # create GH rule if(is.null(XW)) { XW <- lav_integration_gauss_hermite_xw(n = n, revert = TRUE) } if(!adaptive) { w.star <- XW$w / sqrt(pi) x.star <- dnorm.sd*(sqrt(2)*XW$x) + dnorm.mean out <- sum( func(x.star, ...) * w.star ) } else { # Naylor & Smith (1982, 1988) if(iterative) { mu.est <- 0; sd.est <- 1 for(i in 1:max.iter) { w.star <- sqrt(2) * sd.est * dnorm(sqrt(2)*sd.est*XW$x + mu.est,dnorm.mean, dnorm.sd) * exp(XW$x^2) * XW$w x.star <- sqrt(2)*sd.est*XW$x + mu.est LIK <- sum( func(x.star, ...) * w.star ) # update mu mu.est <- sum(x.star * (func(x.star, ...) * w.star)/LIK) # update sd var.est <- sum(x.star^2 * (func(x.star, ...) * w.star)/LIK) - mu.est^2 sd.est <- sqrt(var.est) if(verbose) { cat("i = ", i, "LIK = ", LIK, "mu.est = ", mu.est, "sd.est = ", sd.est, "\n") } } out <- LIK # Liu and Pierce (1994) } else { # integrand g(x) = func(x) * dnorm(x; m, s^2) log.g <- function(x, ...) { ## FIXME: should we take the log right away? log(func(x, ...) * dnorm(x, mean = dnorm.mean, sd = dnorm.sd)) } # find mu hat and sd hat mu.est <- optimize(f = log.g, interval = c(-10,10), maximum = TRUE, tol=.Machine$double.eps, ...)$maximum H <- as.numeric(numDeriv::hessian(func = log.g, x = mu.est, ...)) sd.est <- sqrt(1/-H) w.star <- sqrt(2) * sd.est * dnorm(sd.est*(sqrt(2)*XW$x) + mu.est,dnorm.mean,dnorm.sd) * exp(XW$x^2) * XW$w x.star <- sd.est*(sqrt(2)*XW$x) + mu.est out <- sum( func(x.star, ...) * w.star ) } } out } # integrand g(z) has the form g(z) = f(sz+m) dnorm(z, 0, 1) lav_integration_f_dnorm_z <- function(func = NULL, # often ly.prod f.mean = 0, # f mean f.sd = 1, # f sd XW = NULL, # GH points n = 21L, # number of nodes adaptive = FALSE, # adaptive? iterative = FALSE, # iterative? max.iter = 20L, # max iterations verbose = FALSE, # verbose? ...) { # optional args for 'f' # create GH rule if(is.null(XW)) { XW <- lav_integration_gauss_hermite_xw(n = n, revert = TRUE) } if(!adaptive) { w.star <- XW$w / sqrt(pi) x.star <- sqrt(2)*XW$x out <- sum( func(f.sd*x.star + f.mean, ...) * w.star ) } else { # Naylor & Smith (1982, 1988) if(iterative) { mu.est <- 0; sd.est <- 1 for(i in 1:max.iter) { w.star <- sqrt(2) * sd.est * dnorm(sd.est*sqrt(2)*XW$x + mu.est,0, 1) * exp(XW$x^2) * XW$w x.star <- sd.est*(sqrt(2)*XW$x) + mu.est LIK <- sum( func(f.sd*x.star + f.mean, ...) * w.star ) # update mu mu.est <- sum(x.star * (func(f.sd*x.star + f.mean, ...) * w.star)/LIK) # update sd var.est <- sum(x.star^2 * (func(f.sd*x.star + f.mean, ...) * w.star)/LIK) - mu.est^2 sd.est <- sqrt(var.est) if(verbose) { cat("i = ", i, "LIK = ", LIK, "mu.est = ", mu.est, "sd.est = ", sd.est, "\n") } } out <- LIK # Liu and Pierce (1994) } else { # integrand g(x) = func(x) * dnorm(x; m, s^2) log.gz <- function(x, ...) { ## FIXME: should we take the log right away? log(func(f.sd*x + f.mean, ...) * dnorm(x, mean = 0, sd = 1)) } # find mu hat and sd hat mu.est <- optimize(f = log.gz, interval = c(-10,10), maximum = TRUE, tol=.Machine$double.eps, ...)$maximum H <- as.numeric(numDeriv::hessian(func = log.gz, x = mu.est, ...)) sd.est <- sqrt(1/-H) w.star <- sqrt(2) * sd.est * dnorm(sd.est*(sqrt(2)*XW$x) + mu.est,0,1) * exp(XW$x^2) * XW$w x.star <- sd.est*(sqrt(2)*XW$x) + mu.est out <- sum( func(f.sd*x.star + f.mean, ...) * w.star ) } } out } lavaan/R/lav_model.R0000644000176200001440000002700313053023010013767 0ustar liggesusers# constructor of the matrix lavoptions$representation # # initial version: YR 22/11/2010 # - YR 14 Jan 2014: moved to lav_model.R # - YR 18 Nov 2014: more efficient handling of linear equality constraints # - YR 02 Dec 2014: allow for bare-minimum parameter tables # - YR 25 Jan 2017: collect options in lavoptions # construct MATRIX lavoptions$representation of the model lav_model <- function(lavpartable = NULL, lavoptions = NULL, th.idx = list()) { # handle bare-minimum partables lavpartable <- lav_partable_complete(lavpartable) # global info from user model nblocks <- lav_partable_nblocks(lavpartable) meanstructure <- any(lavpartable$op == "~1") categorical <- any(lavpartable$op == "|") if(categorical) meanstructure <- TRUE group.w.free <- any(lavpartable$lhs == "group" & lavpartable$op == "%") # handle variable definitions and (in)equality constraints CON <- lav_constraints_parse(partable = lavpartable, constraints = NULL, debug = lavoptions$debug) # handle *linear* equality constraints special if(CON$ceq.linear.only.flag) { con.jac <- CON$ceq.JAC con.lambda <- numeric(nrow(CON$ceq.JAC)) attr(con.jac, "inactive.idx") <- integer(0L) attr(con.jac, "ceq.idx") <- seq_len( nrow(CON$ceq.JAC) ) } else { con.jac <- matrix(0,0,0) con.lambda <- numeric(0) } # select model matrices if(lavoptions$representation == "LISREL") { REP <- representation.LISREL(lavpartable, target=NULL, extra=TRUE) } else { stop("lavaan ERROR: only representation \"LISREL\" has been implemented.") } if(lavoptions$debug) print(REP) # FIXME: check for non-existing parameters bad.idx <- which(REP$mat == "" & !lavpartable$op %in% c("==","<",">",":=")) if(length(bad.idx) > 0L) { label <- paste(lavpartable$lhs[bad.idx[1]], lavpartable$op[bad.idx[1]], lavpartable$rhs[bad.idx[1]], sep = " ") stop("lavaan ERROR: parameter is not defined: ", label) } # prepare nG-sized slots nG <- sum(unlist(attr(REP, "mmNumber"))) GLIST <- vector(mode="list", nG) names(GLIST) <- unlist(attr(REP, "mmNames")) dimNames <- vector(mode="list", length=nG) isSymmetric <- logical(nG) mmSize <- integer(nG) m.free.idx <- m.user.idx <- vector(mode="list", length=nG) x.free.idx <- x.user.idx <- vector(mode="list", length=nG) # prepare nblocks-sized slots nvar <- integer(nblocks) nmat <- unlist(attr(REP, "mmNumber")) num.idx <- vector("list", length=nblocks) nexo <- integer(nblocks) ov.x.dummy.ov.idx <- vector(mode="list", length=nblocks) ov.x.dummy.lv.idx <- vector(mode="list", length=nblocks) ov.y.dummy.ov.idx <- vector(mode="list", length=nblocks) ov.y.dummy.lv.idx <- vector(mode="list", length=nblocks) offset <- 0L for(g in 1:nblocks) { # observed and latent variables for this block ov.names <- lav_partable_vnames(lavpartable, "ov", block = g) ov.names.nox <- lav_partable_vnames(lavpartable, "ov.nox", block = g) ov.names.x <- lav_partable_vnames(lavpartable, "ov.x", block = g) nexo[g] <- length(ov.names.x) ov.num <- lav_partable_vnames(lavpartable, "ov.num", block = g) if(lavoptions$conditional.x) { nvar[g] <- length(ov.names.nox) } else { nvar[g] <- length(ov.names) } num.idx[[g]] <- match(ov.num, ov.names.nox) # model matrices for this block mmNumber <- attr(REP, "mmNumber")[[g]] mmNames <- attr(REP, "mmNames")[[g]] mmSymmetric <- attr(REP, "mmSymmetric")[[g]] mmDimNames <- attr(REP, "mmDimNames")[[g]] mmRows <- attr(REP, "mmRows")[[g]] mmCols <- attr(REP, "mmCols")[[g]] for(mm in 1:mmNumber) { # offset in GLIST offset <- offset + 1L # matrix size, symmetric, dimNames if(mmSymmetric[mm]) { N <- mmRows[mm] mm.size <- as.integer(N*(N+1)/2) } else { mm.size <- as.integer(mmRows[mm] * mmCols[mm]) } mmSize[offset] <- mm.size isSymmetric[offset] <- mmSymmetric[mm] dimNames[[offset]] <- mmDimNames[[mm]] # select elements for this matrix idx <- which(lavpartable$block == g & REP$mat == mmNames[mm]) # create empty `pattern' matrix # FIXME: one day, we may want to use sparse matrices... # but they should not slow things down! tmp <- matrix(0L, nrow=mmRows[mm], ncol=mmCols[mm]) # 1. first assign free values only, to get vector index # -> to be used in lav_model_objective tmp[ cbind(REP$row[idx], REP$col[idx]) ] <- lavpartable$free[idx] if(mmSymmetric[mm]) { # NOTE: we assume everything is in the UPPER tri! T <- t(tmp); tmp[lower.tri(tmp)] <- T[lower.tri(T)] } m.free.idx[[offset]] <- which(tmp > 0) x.free.idx[[offset]] <- tmp[which(tmp > 0)] # 2. if equality constraints, unconstrained free parameters # -> to be used in lav_model_gradient #if(CON$ceq.linear.only.flag) { # tmp[ cbind(REP$row[idx], # REP$col[idx]) ] <- lavpartable$unco[idx] # if(mmSymmetric[mm]) { # # NOTE: we assume everything is in the UPPER tri! # T <- t(tmp); tmp[lower.tri(tmp)] <- T[lower.tri(T)] # } # m.unco.idx[[offset]] <- which(tmp > 0) # x.unco.idx[[offset]] <- tmp[which(tmp > 0)] #} else { # m.unco.idx[[offset]] <- m.free.idx[[offset]] # x.unco.idx[[offset]] <- x.free.idx[[offset]] #} # 3. general mapping between user and GLIST tmp[ cbind(REP$row[idx], REP$col[idx]) ] <- lavpartable$id[idx] if(mmSymmetric[mm]) { T <- t(tmp); tmp[lower.tri(tmp)] <- T[lower.tri(T)] } m.user.idx[[offset]] <- which(tmp > 0) x.user.idx[[offset]] <- tmp[which(tmp > 0)] # 4. now assign starting/fixed values # create empty matrix # FIXME: again, we may want to use sparse matrices here... tmp <- matrix(0.0, nrow=mmRows[mm], ncol=mmCols[mm]) tmp[ cbind(REP$row[idx], REP$col[idx]) ] <- lavpartable$start[idx] if(mmSymmetric[mm]) { T <- t(tmp); tmp[lower.tri(tmp)] <- T[lower.tri(T)] } # representation specific stuff if(lavoptions$representation == "LISREL" && mmNames[mm] == "lambda") { ov.dummy.names.nox <- attr(REP, "ov.dummy.names.nox")[[g]] ov.dummy.names.x <- attr(REP, "ov.dummy.names.x")[[g]] ov.dummy.names <- c(ov.dummy.names.nox, ov.dummy.names.x) # define dummy latent variables if(length(ov.dummy.names)) { # in this case, lv.names will be extended with the dummys LV.names <- mmDimNames$psi[[1]] row..idx <- match(ov.dummy.names, ov.names) col..idx <- match(ov.dummy.names, LV.names) # Fix lambda values to 1.0 tmp[ cbind(row..idx, col..idx)] <- 1.0 ov.x.dummy.ov.idx[[g]] <- match(ov.dummy.names.x,ov.names) ov.x.dummy.lv.idx[[g]] <- match(ov.dummy.names.x,LV.names) ov.y.dummy.ov.idx[[g]] <- match(ov.dummy.names.nox,ov.names) ov.y.dummy.lv.idx[[g]] <- match(ov.dummy.names.nox,LV.names) } } # representation specific if(lavoptions$representation == "LISREL" && mmNames[mm] == "delta") { # only categorical values are listed in the lavpartable # but all remaining values should be 1.0 idx <- which(tmp[,1L] == 0.0) tmp[idx,1L] <- 1.0 } # assign matrix to GLIST GLIST[[offset]] <- tmp } # mm } # g # fixed.x parameters? #fixed.x <- any(lavpartable$exo > 0L & lavpartable$free == 0L) #if(categorical) { # fixed.x <- TRUE #} Model <- new("lavModel", GLIST=GLIST, dimNames=dimNames, isSymmetric=isSymmetric, mmSize=mmSize, representation=lavoptions$representation, meanstructure=meanstructure, categorical=categorical, link=lavoptions$link, nblocks=nblocks, ngroups=nblocks, # for rsem!!! group.w.free=group.w.free, nmat=nmat, nvar=nvar, num.idx=num.idx, th.idx=th.idx, nx.free=max(lavpartable$free), #nx.unco=max(lavpartable$unco), nx.user=max(lavpartable$id), m.free.idx=m.free.idx, x.free.idx=x.free.idx, #m.unco.idx=m.unco.idx, #x.unco.idx=x.unco.idx, m.user.idx=m.user.idx, x.user.idx=x.user.idx, x.def.idx=which(lavpartable$op == ":="), x.ceq.idx=which(lavpartable$op == "=="), x.cin.idx=which(lavpartable$op == ">" | lavpartable$op == "<"), eq.constraints = CON$ceq.linear.only.flag, eq.constraints.K = CON$ceq.JAC.NULL, eq.constraints.k0 = CON$ceq.rhs.NULL, def.function = CON$def.function, ceq.function = CON$ceq.function, ceq.JAC = CON$ceq.JAC, ceq.rhs = CON$ceq.rhs, ceq.jacobian = CON$ceq.jacobian, ceq.linear.idx = CON$ceq.linear.idx, ceq.nonlinear.idx = CON$ceq.nonlinear.idx, cin.function = CON$cin.function, cin.JAC = CON$cin.JAC, cin.rhs = CON$cin.rhs, cin.jacobian = CON$cin.jacobian, cin.linear.idx = CON$cin.linear.idx, cin.nonlinear.idx = CON$cin.nonlinear.idx, con.jac = con.jac, con.lambda = con.lambda, nexo = nexo, fixed.x = lavoptions$fixed.x, conditional.x = lavoptions$conditional.x, #x.idx = x.idx, parameterization = lavoptions$parameterization, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, estimator = lavoptions$estimator) if(lavoptions$debug) { cat("lavaan lavoptions$debug: lavaanModel\n") print( str(Model) ) print( Model@GLIST ) } Model } # for backwards compatibility # Model <- lav_model lavaan/R/lav_syntax_independence.R0000644000176200001440000000504712505536212016740 0ustar liggesusers# generate syntax for an independence model lav_syntax_independence <- function(ov.names=character(0), ov.names.x=character(0), sample.cov=NULL) { ov.names.nox <- ov.names[!ov.names %in% ov.names.x] nvar <- length(ov.names.nox) lv.names <- paste("f", 1:nvar, sep="") # check sample.cov if(!is.null(sample.cov)) { if(is.list(sample.cov)) { ngroups <- length(sample.cov) } else { ngroups <- 1L sample.cov <- list(sample.cov) } stopifnot(is.matrix(sample.cov[[1]])) #stopifnot(length(ov.names) == nrow(sample.cov[[1]])) # FIXME: check rownames and reorder... } # construct lavaan syntax for an independence model txt <- "# independence model\n" # =~ lines (each observed variables has its own latent variable) # excepct for ov's that are in ov.names.x txt <- paste(txt, paste(lv.names, " =~ 1*", ov.names.nox, "\n", sep="", collapse=""), sep="") # residual ov variances fixed to zero txt <- paste(txt, paste(ov.names.nox, " ~~ 0*", ov.names.nox, "\n", sep="", collapse=""), sep="") # latent variances if(is.null(sample.cov)) { txt <- paste(txt, paste(lv.names, " ~~ ", lv.names, "\n", sep="", collapse=""), sep="") } else { # fill in sample values ov.idx <- match(ov.names.nox, ov.names) start.txt <- paste("start(c(", apply(matrix(unlist(lapply(sample.cov, function(x) { diag(x)[ov.idx] })), ncol=ngroups), 1,paste,collapse=","), "))", sep="") txt <- paste(txt, paste(lv.names, " ~~ ", start.txt, " * ", lv.names, "\n", sep="", collapse=""), sep="") } # latent *covariances* fixed to zero (= independence!) if(length(lv.names) > 1L) { tmp <- utils::combn(lv.names, 2) txt <- paste(txt, paste(tmp[1,], " ~~ 0*", tmp[2,], "\n", sep="", collapse=""), sep="") } # if 'independent x' variables, add an 'empty' regression if((nx <- length(ov.names.x)) > 0) { # dummy regression line txt <- paste(txt, paste("f1 ~ 0*", ov.names.x, "\n", sep="", collapse=""), sep="") } # Note: no need to pass starting values here, lavaanStart will # use the sample statistics anyway.... txt } lavaan/R/lav_partable_merge.R0000644000176200001440000001162213053102333015646 0ustar liggesusers# merge two parameter tables # - but allow different number of columns lav_partable_merge <- function(pt1 = NULL, pt2 = NULL, remove.duplicated = FALSE, fromLast=FALSE, warn = TRUE) { pt1 <- as.data.frame(pt1, stringsAsFactors = FALSE) pt2 <- as.data.frame(pt2, stringsAsFactors = FALSE) # check minimum requirements: lhs, op, rhs stopifnot( !is.null(pt1$lhs), !is.null(pt1$op), !is.null(pt1$rhs), !is.null(pt2$lhs), !is.null(pt2$op), !is.null(pt2$rhs) ) # both should have block (or not) if(is.null(pt1$block) && is.null(pt2$block)) { pt1$block <- rep(1L, length(pt1$lhs)) pt2$block <- rep(1L, length(pt2$lhs)) TMP <- rbind(pt1[, c("lhs","op","rhs","block")], pt2[, c("lhs","op","rhs","block")]) } else { if(is.null(pt1$block) && !is.null(pt2$block)) { pt1$block <- rep(1L, length(pt1$lhs)) } else if(is.null(pt2$block) && !is.null(pt1$block)) { pt2$block <- rep(1L, length(pt2$lhs)) } TMP <- rbind(pt1[, c("lhs","op","rhs","block")], pt2[, c("lhs","op","rhs","block")]) } # if missing columns, provide default values of the right type # (numeric/integer/character) # group if(is.null(pt1$group) && !is.null(pt2$group)) { pt1$group <- rep(0L, length(pt1$lhs)) } else if(is.null(pt2$group) && !is.null(pt1$group)) { pt2$group <- rep(0L, length(pt2$lhs)) } # level if(is.null(pt1$level) && !is.null(pt2$level)) { pt1$level <- rep(0L, length(pt1$lhs)) } else if(is.null(pt2$level) && !is.null(pt1$level)) { pt2$level <- rep(0L, length(pt2$lhs)) } # user if(is.null(pt1$user) && !is.null(pt2$user)) { pt1$user <- rep(0L, length(pt1$lhs)) } else if(is.null(pt2$user) && !is.null(pt1$user)) { pt2$user <- rep(0L, length(pt2$lhs)) } # free if(is.null(pt1$free) && !is.null(pt2$free)) { pt1$free <- rep(0L, length(pt1$lhs)) } else if(is.null(pt2$free) && !is.null(pt1$free)) { pt2$free <- rep(0L, length(pt2$lhs)) } # ustart -- set to zero!! if(is.null(pt1$ustart) && !is.null(pt2$ustart)) { pt1$ustart <- rep(0, length(pt1$lhs)) } else if(is.null(pt2$ustart) && !is.null(pt1$ustart)) { pt2$ustart <- rep(0, length(pt2$lhs)) } # exo if(is.null(pt1$exo) && !is.null(pt2$exo)) { pt1$exo <- rep(0L, length(pt1$lhs)) } else if(is.null(pt2$exo) && !is.null(pt1$exo)) { pt2$exo <- rep(0L, length(pt2$lhs)) } # label if(is.null(pt1$label) && !is.null(pt2$label)) { pt1$label <- rep("", length(pt1$lhs)) } else if(is.null(pt2$label) && !is.null(pt1$label)) { pt2$label <- rep("", length(pt2$lhs)) } # plabel if(is.null(pt1$plabel) && !is.null(pt2$plabel)) { pt1$plabel <- rep("", length(pt1$lhs)) } else if(is.null(pt2$plabel) && !is.null(pt1$plabel)) { pt2$plabel <- rep("", length(pt2$lhs)) } # start if(is.null(pt1$start) && !is.null(pt2$start)) { pt1$start <- rep(as.numeric(NA), length(pt1$lhs)) } else if(is.null(pt2$start) && !is.null(pt1$start)) { pt2$start <- rep(as.numeric(NA), length(pt2$lhs)) } # est if(is.null(pt1$est) && !is.null(pt2$est)) { pt1$est <- rep(0, length(pt1$lhs)) } else if(is.null(pt2$est) && !is.null(pt1$est)) { pt2$est <- rep(0, length(pt2$lhs)) } # check for duplicated elements if(remove.duplicated) { # if fromLast = TRUE, idx is in pt1 # if fromLast = FALSE, idx is in pt2 idx <- which(duplicated(TMP, fromLast=fromLast)) if(length(idx)) { if(warn) { warning("lavaan WARNING: duplicated parameters are ignored:\n", paste(apply(pt1[idx, c("lhs","op","rhs")], 1, paste, collapse=" "), collapse="\n")) } if(fromLast) { pt1 <- pt1[-idx,] } else { idx <- idx - nrow(pt1) pt2 <- pt2[-idx,] } } } else if(!is.null(pt1$start) && !is.null(pt2$start)) { # copy start values from pt1 to pt2 for(i in 1:length(pt1$lhs)) { idx <- which(pt2$lhs == pt1$lhs[i] & pt2$op == pt1$op[i] & pt2$rhs == pt1$rhs[i] & pt2$block == pt1$block[i]) pt2$start[idx] <- pt1$start[i] } } # nicely merge, using 'id' column (if it comes first) if(is.null(pt1$id) && !is.null(pt2$id)) { nid <- max(pt2$id) pt1$id <- (nid+1L):(nid+nrow(pt1)) } else if(is.null(pt2$id) && !is.null(pt1$id)) { nid <- max(pt1$id) pt2$id <- (nid+1L):(nid+nrow(pt2)) } NEW <- base::merge(pt1, pt2, all = TRUE, sort = FALSE) NEW } lavaan/R/lav_partable_labels.R0000644000176200001440000001370113053005437016020 0ustar liggesusers# generate labels for each parameter lav_partable_labels <- function(partable, blocks = "group", group.equal = "", group.partial = "", type = "user") { # catch empty partable if(length(partable$lhs) == 0L) return(character(0L)) # default labels label <- paste(partable$lhs, partable$op, partable$rhs, sep="") # handle multiple groups if("group" %in% blocks) { if(is.character(partable$group)) { group.label <- unique(partable$group) group.label <- group.label[ nchar(group.label) > 0L ] ngroups <- length(group.label) } else { ngroups <- lav_partable_ngroups(partable) group.label <- 1:ngroups } if(ngroups > 1L) { for(g in 2:ngroups) { label[partable$group == group.label[g]] <- paste(label[partable$group == group.label[g]], ".g", g, sep="") } } } else { ngroups <- 1L } #cat("DEBUG: label start:\n"); print(label); cat("\n") #cat("group.equal = ", group.equal, "\n") #cat("group.partial = ", group.partial, "\n") # use group.equal so that equal sets of parameters get the same label if(ngroups > 1L && length(group.equal) > 0L) { if("intercepts" %in% group.equal || "residuals" %in% group.equal || "residual.covariances" %in% group.equal) { ov.names.nox <- vector("list", length=ngroups) for(g in 1:ngroups) ov.names.nox[[g]] <- lav_partable_vnames(partable, "ov.nox", group=g) } if("thresholds" %in% group.equal) { ov.names.ord <- vector("list", length=ngroups) for(g in 1:ngroups) ov.names.ord[[g]] <- lav_partable_vnames(partable, "ov.ord", group=g) } if("means" %in% group.equal || "lv.variances" %in% group.equal || "lv.covariances" %in% group.equal) { lv.names <- vector("list", length=ngroups) for(g in 1:ngroups) lv.names[[g]] <- lav_partable_vnames(partable, "lv", group=g) } # g1.flag: TRUE if included, FALSE if not g1.flag <- logical(length(which(partable$group == 1L))) # LOADINGS if("loadings" %in% group.equal) g1.flag[ partable$op == "=~" & partable$group == 1L ] <- TRUE # INTERCEPTS (OV) if("intercepts" %in% group.equal) g1.flag[ partable$op == "~1" & partable$group == 1L & partable$lhs %in% ov.names.nox[[1L]] ] <- TRUE # THRESHOLDS (OV-ORD) if("thresholds" %in% group.equal) g1.flag[ partable$op == "|" & partable$group == 1L & partable$lhs %in% ov.names.ord[[1L]] ] <- TRUE # MEANS (LV) if("means" %in% group.equal) g1.flag[ partable$op == "~1" & partable$group == 1L & partable$lhs %in% lv.names[[1L]] ] <- TRUE # REGRESSIONS if("regressions" %in% group.equal) g1.flag[ partable$op == "~" & partable$group == 1L ] <- TRUE # RESIDUAL variances (FIXME: OV ONLY!) if("residuals" %in% group.equal) g1.flag[ partable$op == "~~" & partable$group == 1L & partable$lhs %in% ov.names.nox[[1L]] & partable$lhs == partable$rhs ] <- TRUE # RESIDUAL covariances (FIXME: OV ONLY!) if("residual.covariances" %in% group.equal) g1.flag[ partable$op == "~~" & partable$group == 1L & partable$lhs %in% ov.names.nox[[1L]] & partable$lhs != partable$rhs ] <- TRUE # LV VARIANCES if("lv.variances" %in% group.equal) g1.flag[ partable$op == "~~" & partable$group == 1L & partable$lhs %in% lv.names[[1L]] & partable$lhs == partable$rhs ] <- TRUE # LV COVARIANCES if("lv.covariances" %in% group.equal) g1.flag[ partable$op == "~~" & partable$group == 1L & partable$lhs %in% lv.names[[1L]] & partable$lhs != partable$rhs ] <- TRUE # if group.partial, set corresponding flag to FALSE if(length(group.partial) > 0L) { g1.flag[ label %in% group.partial & partable$group == 1L ] <- FALSE } # for each (constrained) parameter in 'group 1', find a similar one # in the other groups (we assume here that the models need # NOT be the same across groups! g1.idx <- which(g1.flag) for(i in 1:length(g1.idx)) { ref.idx <- g1.idx[i] idx <- which(partable$lhs == partable$lhs[ref.idx] & partable$op == partable$op[ ref.idx] & partable$rhs == partable$rhs[ref.idx] & partable$group > 1L) label[idx] <- label[ref.idx] } } #cat("DEBUG: g1.idx = ", g1.idx, "\n") #cat("DEBUG: label after group.equal:\n"); print(label); cat("\n") # handle other block identifier (not 'group') for(block in blocks) { if(block == "group") { next } label <- paste(label, ".", partable[[block]], sep = "") } # user-specified labels -- override everything!! user.idx <- which(nchar(partable$label) > 0L) label[user.idx] <- partable$label[user.idx] #cat("DEBUG: user.idx = ", user.idx, "\n") #cat("DEBUG: label after user.idx:\n"); print(label); cat("\n") # which labels do we need? if(type == "user") { idx <- 1:length(label) } else if(type == "free") { idx <- which(partable$free > 0L & !duplicated(partable$free)) #} else if(type == "unco") { # idx <- which(partable$unco > 0L & !duplicated(partable$unco)) } else { stop("argument `type' must be one of free or user") } label[idx] } lavaan/R/lav_simulate.R0000644000176200001440000003720613053017374014540 0ustar liggesusers# simulate data starting from a user-specified model # # initial version: YR 24 jan 2011 # revision for 0.4-11: YR 21 okt 2011 simulateData <- function( # user-specified model model = NULL, model.type = "sem", # model modifiers meanstructure = FALSE, int.ov.free = TRUE, int.lv.free = FALSE, conditional.x = FALSE, fixed.x = FALSE, orthogonal = FALSE, std.lv = TRUE, auto.fix.first = FALSE, auto.fix.single = FALSE, auto.var = TRUE, auto.cov.lv.x = TRUE, auto.cov.y = TRUE, ..., # data properties sample.nobs = 500L, ov.var = NULL, group.label = paste("G", 1:nblocks, sep=""), skewness = NULL, kurtosis = NULL, # control seed = NULL, empirical = FALSE, return.type = "data.frame", return.fit = FALSE, debug = FALSE, standardized = FALSE ) { if(!is.null(seed)) set.seed(seed) #if(!exists(".Random.seed", envir = .GlobalEnv)) # runif(1) # initialize the RNG if necessary #RNGstate <- .Random.seed # lavaanify if(is.list(model)) { # two possibilities: either model is already lavaanified # or it is something else... if(!is.null(model$lhs) && !is.null(model$op) && !is.null(model$rhs) && !is.null(model$free)) { lav <- model } else if(is.character(model[[1]])) { stop("lavaan ERROR: model is a list, but not a parameterTable?") } } else { lav <- lavaanify(model = model, meanstructure=meanstructure, int.ov.free=int.ov.free, int.lv.free=int.lv.free, conditional.x=conditional.x, fixed.x=fixed.x, orthogonal=orthogonal, std.lv=std.lv, auto.fix.first=auto.fix.first, auto.fix.single=auto.fix.single, auto.var=auto.var, auto.cov.lv.x=auto.cov.lv.x, auto.cov.y=auto.cov.y, ngroups=length(sample.nobs)) } if(debug) { cat("initial lav\n") print(as.data.frame(lav)) } # fill in any remaining NA values (needed for unstandardize) # 1 for variances and (unstandardized) factor loadings, 0 otherwise idx <- which(lav$op == "=~" & is.na(lav$ustart)) if(length(idx) > 0L) { if(standardized) { lav$ustart[idx] <- 0.7 } else { lav$ustart[idx] <- 1.0 } } idx <- which(lav$op == "~~" & is.na(lav$ustart) & lav$lhs == lav$rhs) if(length(idx) > 0L) lav$ustart[idx] <- 1.0 idx <- which(lav$op == "~" & is.na(lav$ustart)) if(length(idx) > 0L) { warning("lavaan WARNING: some regression coefficients are unspecified and will be set to zero") } idx <- which(is.na(lav$ustart)) if(length(idx) > 0L) lav$ustart[idx] <- 0.0 if(debug) { cat("lav + default values\n") print(as.data.frame(lav)) } # set residual variances to enforce a standardized solution # but only if no *residual* variances have been specified in the syntax if(standardized) { # check if factor loadings are smaller than 1.0 lambda.idx <- which(lav$op == "=~") if(any(lav$ustart[lambda.idx] >= 1.0)) { warning("lavaan WARNING: standardized=TRUE but factor loadings are >= 1.0") } # check if regression coefficients are smaller than 1.0 reg.idx <- which(lav$op == "~") if(any(lav$ustart[reg.idx] >= 1.0)) { warning("lavaan WARNING: standardized=TRUE but regression coefficients are >= 1.0") } # for ordered observed variables, we will get '0.0', but that is ok # so there is no need to make a distinction between numeric/ordered # here?? lav2 <- lav nblocks <- lav_partable_nblocks(lav) ov.names <- vnames(lav, "ov") ov.nox <- vnames(lav, "ov.nox") lv.names <- vnames(lav, "lv") lv.y <- vnames(lav, "lv.y") ov.var.idx <- which(lav$op == "~~" & lav$lhs %in% ov.nox & lav$rhs == lav$lhs) lv.var.idx <- which(lav$op == "~~" & lav$lhs %in% lv.y & lav$rhs == lav$lhs) if(any(lav2$user[c(ov.var.idx, lv.var.idx)] > 0L)) { warning("lavaan WARNING: if residual variances are specified, please use standardized=FALSE") } lav2$ustart[c(ov.var.idx,lv.var.idx)] <- 0.0 fit <- lavaan(model=lav2, sample.nobs=sample.nobs, ...) Sigma.hat <- computeSigmaHat(lavmodel = fit@Model) ETA <- computeVETA(lavmodel = fit@Model, lavsamplestats = fit@SampleStats) if(debug) { cat("Sigma.hat:\n"); print(Sigma.hat) cat("Eta:\n"); print(ETA) } # standardized OV for(g in 1:nblocks) { var.block <- which(lav$op == "~~" & lav$lhs %in% ov.nox & lav$rhs == lav$lhs & lav$block == g) ov.idx <- match(ov.nox, ov.names) lav$ustart[var.block] <- 1 - diag(Sigma.hat[[g]])[ov.idx] } # standardize LV if(length(lv.y) > 0L) { for(g in 1:nblocks) { var.block <- which(lav$op == "~~" & lav$lhs %in% lv.y & lav$rhs == lav$lhs & lav$block == g) eta.idx <- match(lv.y, lv.names) lav$ustart[var.block] <- 1 - diag(ETA[[g]])[eta.idx] } } if(debug) { cat("after standardisation lav\n") print(as.data.frame(lav)) } } # unstandardize if(!is.null(ov.var)) { # FIXME: if ov.var is named, check the order of the elements # 1. unstandardize observed variables lav$ustart <- unstandardize.est.ov(partable=lav, ov.var=ov.var) # 2. unstandardized latent variables if(debug) { cat("after unstandardisation lav\n") print(as.data.frame(lav)) } } # fit the model without data fit <- lavaan(model=lav, sample.nobs=sample.nobs, ...) # the model-implied moments for the population Sigma.hat <- computeSigmaHat(lavmodel = fit@Model) Mu.hat <- computeMuHat(lavmodel = fit@Model) if(fit@Model@categorical) { TH <- computeTH(lavmodel = fit@Model) } if(debug) { cat("\nModel-implied moments (before Vale-Maurelli):\n") print(Sigma.hat) print(Mu.hat) if(exists("TH")) print(TH) } # nblocks nblocks <- length(sample.nobs) # prepare X <- vector("list", length=nblocks) out <- vector("list", length=nblocks) for(g in 1:nblocks) { COV <- Sigma.hat[[g]] # if empirical = TRUE, rescale by N/(N-1), so that estimator=ML # returns exact results if(empirical) { COV <- COV * sample.nobs[g] / (sample.nobs[g] - 1) } # FIXME: change to rmvnorm once we include the library? if(is.null(skewness) && is.null(kurtosis)) { X[[g]] <- MASS::mvrnorm(n = sample.nobs[g], mu = Mu.hat[[g]], Sigma = COV, empirical = empirical) } else { # first generate Z Z <- ValeMaurelli1983(n = sample.nobs[g], COR = cov2cor(COV), skewness = skewness, # FIXME: per block? kurtosis = kurtosis, debug = debug) # rescale # Note: 'scale()' will first center, and then scale # but we need to first scale, and then center... # this was reported by Jordan Brace (9 may 2014) #X[[g]] <- scale(Z, center = -Mu.hat[[g]], # scale = 1/sqrt(diag(COV))) # first, we scale TMP <- scale(Z, center = FALSE, scale = 1/sqrt(diag(COV)))[,,drop=FALSE] # then, we center X[[g]] <- sweep(TMP, MARGIN=2, STATS=Mu.hat[[g]], FUN="+") } # any categorical variables? ov.ord <- vnames(lav, type="ov.ord", block = g) if(length(ov.ord) > 0L) { ov.names <- vnames(lav, type="ov", block = g) # use thresholds to cut for(o in ov.ord) { o.idx <- which(o == ov.names) th.idx <- which(lav$op == "|" & lav$lhs == o & lav$block == g) th.val <- c(-Inf,sort(lav$ustart[th.idx]),+Inf) X[[g]][,o.idx] <- as.integer(cut(X[[g]][,o.idx], th.val)) } } if(return.type == "data.frame") X[[g]] <- as.data.frame(X[[g]]) } if(return.type == "matrix") { if(nblocks == 1L) { return(X[[1L]]) } else { return(X) } } else if (return.type == "data.frame") { Data <- X[[1L]] # if multiple groups, add group column if(nblocks > 1L) { for(g in 2:nblocks) { Data <- rbind(Data, X[[g]]) } Data$group <- rep(1:nblocks, times=sample.nobs) } var.names <- vnames(fit@ParTable, type="ov", block=1L) if(nblocks > 1L) var.names <- c(var.names, "group") names(Data) <- var.names if(return.fit) { attr(Data, "fit") <- fit } return(Data) } else if (return.type == "cov") { if(nblocks == 1L) { return(cov(X[[1L]])) } else { cov.list <- lapply(X, cov) return(cov.list) } } } Skewness <- function(x., N1=TRUE) { x <- x.; x <- x[!is.na(x)]; N <- length(x) mean.x <- mean(x); xc <- x - mean.x; var.x <- var(x) if(!N1) var.x <- var.x * (N-1)/N sd.x <- sqrt(var.x) sk <- sum(xc*xc*xc)/(sd.x*sd.x*sd.x) skewness <- N*sk/((N-1)*(N-2)) skewness } Kurtosis <- function(x., N1=TRUE) { x <- x.; x <- x[!is.na(x)]; N <- length(x) mean.x <- mean(x); xc <- x - mean.x; var.x <- var(x) if(!N1) var.x <- var.x * (N-1)/N k <- sum(xc*xc*xc*xc)/(var.x*var.x) kurtosis <- N*(N+1)*k/((N-1)*(N-2)*(N-3))-3*(N-1)*(N-1)/((N-2)*(N-3)) kurtosis } # NOTE: as pointed out in Fleishman (1978), a real solution does not # always exist (for a/b/c/d) for all values of skew/kurtosis # # for example: skew = 3, only valid if kurtosis > 14 (approximately) # # fleishman eq 21 suggests: skew^2 < 0.0629576*kurtosis + 0.0717247 # see figure 1 page 527 # # note also that the a/b/c/d solution is not unique, although this seems # not to matter for generating the data # Fleishman (1978) cubic transformation method lav_fleishman1978 <- function(n=100, skewness=0, kurtosis=0, verbose=FALSE) { system.function <- function(x, skewness, kurtosis) { b=x[1L]; c=x[2L]; d=x[3L] eq1 <- b*b + 6*b*d + 2*c*c + 15*d*d - 1 eq2 <- 2*c*(b*b + 24*b*d + 105*d*d + 2) - skewness eq3 <- 24*(b*d + c*c*(1 + b*b + 28*b*d) + d*d*(12 + 48*b*d + 141*c*c + 225*d*d)) - kurtosis eq <- c(eq1,eq2,eq3) sum(eq*eq) ## SS } out <- nlminb(start=c(1,0,0), objective=system.function, scale=10, control=list(trace=ifelse(verbose,1,0), rel.tol=1e-10), skewness=skewness, kurtosis=kurtosis) if(out$convergence != 0) warning("no convergence") b <- out$par[1L]; c <- out$par[2L]; d <- out$par[3L]; a <- -c Z <- rnorm(n=n) Y <- a + b*Z + c*Z*Z + d*Z*Z*Z Y } ValeMaurelli1983 <- function(n=100L, COR, skewness, kurtosis, debug = FALSE) { fleishman1978_abcd <- function(skewness, kurtosis) { system.function <- function(x, skewness, kurtosis) { b.=x[1L]; c.=x[2L]; d.=x[3L] eq1 <- b.*b. + 6*b.*d. + 2*c.*c. + 15*d.*d. - 1 eq2 <- 2*c.*(b.*b. + 24*b.*d. + 105*d.*d. + 2) - skewness eq3 <- 24*(b.*d. + c.*c.*(1 + b.*b. + 28*b.*d.) + d.*d.*(12 + 48*b.*d. + 141*c.*c. + 225*d.*d.)) - kurtosis eq <- c(eq1,eq2,eq3) sum(eq*eq) ## SS } out <- nlminb(start=c(1,0,0), objective=system.function, scale=10, control=list(trace=0), skewness=skewness, kurtosis=kurtosis) if(out$convergence != 0) warning("no convergence") b. <- out$par[1L]; c. <- out$par[2L]; d. <- out$par[3L]; a. <- -c. c(a.,b.,c.,d.) } getICOV <- function(b1, c1, d1, b2, c2, d2, R) { objectiveFunction <- function(x, b1, c1, d1, b2, c2, d2, R) { rho=x[1L] eq <- rho*(b1*b2 + 3*b1*d2 + 3*d1*b2 + 9*d1*d2) + rho*rho*(2*c1*c2) + rho*rho*rho*(6*d1*d2) - R eq*eq } #gradientFunction <- function(x, bcd1, bcd2, R) { # #} out <- nlminb(start=R, objective=objectiveFunction, scale=10, control=list(trace=0), b1=b1, c1=c1, d1=d1, b2=b2, c2=c2, d2=d2, R=R) if(out$convergence != 0) warning("no convergence") rho <- out$par[1L] rho } # number of variables nvar <- ncol(COR) # check skewness if(is.null(skewness)) { SK <- rep(0, nvar) } else if(length(skewness) == nvar) { SK <- skewness } else if(length(skewness) == 1L) { SK <- rep(skewness, nvar) } else { stop("skewness has wrong length") } if(is.null(kurtosis)) { KU <- rep(0, nvar) } else if(length(kurtosis) == nvar) { KU <- kurtosis } else if(length(kurtosis) == 1L) { KU <- rep(kurtosis, nvar) } else { stop("kurtosis has wrong length") } # create Fleishman table FTable <- matrix(0, nvar, 4L) for(i in 1:nvar) { FTable[i,] <- fleishman1978_abcd(skewness=SK[i], kurtosis=KU[i]) } # compute intermediate correlations between all pairs ICOR <- diag(nvar) for(j in 1:(nvar-1L)) { for(i in (j+1):nvar) { if(COR[i,j] == 0) next ICOR[i,j] <- ICOR[j,i] <- getICOV(FTable[i,2], FTable[i,3], FTable[i,4], FTable[j,2], FTable[j,3], FTable[j,4], R=COR[i,j]) } } if(debug) { cat("\nOriginal correlations (for Vale-Maurelli):\n") print(COR) cat("\nIntermediate correlations (for Vale-Maurelli):\n") print(ICOR) cat("\nEigen values ICOR:\n") print( eigen(ICOR)$values ) } # generate Z ## FIXME: replace by rmvnorm once we use that package X <- Z <- MASS::mvrnorm(n=n, mu=rep(0,nvar), Sigma=ICOR) # transform Z using Fleishman constants for(i in 1:nvar) { X[,i] <- FTable[i,1L] + FTable[i,2L]*Z[,i] + FTable[i,3L]*Z[,i]*Z[,i] + FTable[i,4L]*Z[,i]*Z[,i]*Z[,i] } X } lavaan/R/lav_partable_attributes.R0000644000176200001440000000350713053003737016750 0ustar liggesusers# return 'attributes' of a lavaan partable -- generate a new set if necessary lav_partable_attributes <- function(partable, pta = NULL) { if(is.null(pta)) { # attached to partable? pta <- attributes(partable) if(!is.null(pta$vnames) && !is.null(pta$nvar)) { # looks like a pta return(pta) } else { pta <- list() } } # vnames pta$vnames <- lav_partable_vnames(partable, type="all") # vidx OV <- pta$vnames$ov LV <- pta$vnames$lv nblocks <- length(pta$vnames$ov) pta$vidx <- lapply(names(pta$vnames), function(v) { lapply(seq_len(nblocks), function(g) { if(grepl("lv", v)) { match(pta$vnames[[v]][[g]], LV[[g]]) } else if(grepl("th", v)) { # thresholds have '|t' pattern TH <- sapply(strsplit(pta$vnames[[v]][[g]], "|t", fixed = TRUE), "[[", 1L) match(TH, OV[[g]]) } else if(grepl("eqs", v)){ # mixture of OV/LV integer(0L) } else { match(pta$vnames[[v]][[g]], OV[[g]]) } }) }) names(pta$vidx) <- names(pta$vnames) # nblocks pta$nblocks <- nblocks # ngroups pta$ngroups <- lav_partable_ngroups(partable) # nlevels pta$nlevels <- lav_partable_nlevels(partable) # nvar pta$nvar <- lapply(pta$vnames$ov, length) # nfac pta$nfac <- lapply(pta$vnames$lv, length) # nfac.nonnormal - for numerical integration pta$nfac.nonnormal <- lapply(pta$vnames$lv.nonnormal, length) pta } lavaan/R/lav_graphics.R0000644000176200001440000000036112465075714014515 0ustar liggesusers# small functions to do something useful with the common # plot commands # suggested by JEB pairs.lavaan <- function(x, group=1L, ...) { X <- x@Data@X[[group]] colnames(X) <- x@Data@ov.names[[group]] pairs(X, ...) } lavaan/R/lav_object_generate.R0000644000176200001440000001224313053017033016017 0ustar liggesusers# here, we generate new models based on the original model in lavobject # 1. the independence model # 2. the unrestricted model # 3. model + extra parameters (for modindices/lavTestScore) # 1. fit an 'independence' model # note that for ML (and ULS and DWLS), the 'estimates' of the # independence model are simply the observed variances # but for GLS and WLS, this is not the case!! lav_object_independence <- function(object, se = FALSE, verbose = FALSE, warn = FALSE) { # construct parameter table for independence model lavpartable <- lav_partable_independence(object) # adapt options lavoptions <- object@Options # se if(se) { if(lavoptions$se == "none") { lavoptions$se <- "standard" } } else { ## FIXME: if test = scaled, we need it anyway? lavoptions$se <- "none" } # ALWAYS do.fit lavoptions$do.fit <- TRUE # verbose? lavoptions$verbose <- verbose # warn? lavoptions$warn <- warn # needed? if(any(lavpartable$op == "~1")) lavoptions$meanstructure <- TRUE FIT <- lavaan(lavpartable, slotOptions = lavoptions, slotSampleStats = object@SampleStats, slotData = object@Data, slotCache = object@Cache) FIT } # 2. unrestricted model lav_object_unrestricted <- function(object, se = FALSE, verbose = FALSE, warn = FALSE) { # construct parameter table for unrestricted model lavpartable <- lav_partable_unrestricted(object) # adapt options lavoptions <- object@Options # se if(se) { if(lavoptions$se == "none") { lavoptions$se <- "standard" } } else { ## FIXME: if test = scaled, we need it anyway? lavoptions$se <- "none" } # ALWAYS do.fit lavoptions$do.fit <- TRUE # verbose? if(verbose) { lavoptions$verbose <- TRUE } else { lavoptions$verbose <- FALSE } # warn? if(warn) { lavoptions$warn <- TRUE } else { lavoptions$warn <- FALSE } # needed? if(any(lavpartable$op == "~1")) lavoptions$meanstructure <- TRUE FIT <- lavaan(lavpartable, slotOptions = lavoptions, slotSampleStats = object@SampleStats, slotData = object@Data, slotCache = object@Cache) FIT } # 3. extended model lav_object_extended <- function(object, add = NULL, remove.duplicated = TRUE, all.free = FALSE, verbose = FALSE, warn = FALSE, do.fit = FALSE) { # partable original model partable <- object@ParTable[c("lhs","op","rhs","block","free", "exo","label","plabel")] # do we need 'exo'? if(all.free) { partable$user <- rep(1L, length(partable$lhs)) non.free.idx <- which(partable$free == 0L & partable$op != "==" & partable$op != ":=" & partable$op != "<" & partable$op != ">") partable$free[ non.free.idx ] <- 1L partable$user[ non.free.idx ] <- 10L } # replace 'start' column, since lav_model will fill these in in GLIST partable$start <- parameterEstimates(object, remove.system.eq = FALSE, remove.def = FALSE, remove.eq = FALSE, remove.ineq = FALSE)$est # add new parameters, extend model if(is.list(add)) { stopifnot(!is.null(add$lhs), !is.null(add$op), !is.null(add$rhs)) ADD <- add } else if(is.character(add)) { ngroups <- lav_partable_ngroups(partable) ADD <- lavaanify(add, ngroups = ngroups) ADD <- ADD[,c("lhs","op","rhs","block","user","label")] remove.idx <- which(ADD$user == 0) if(length(remove.idx) > 0L) { ADD <- ADD[-remove.idx,] } ADD$start <- rep( 0, nrow(ADD)) ADD$free <- rep( 1, nrow(ADD)) ADD$user <- rep(10, nrow(ADD)) } # merge LIST <- lav_partable_merge(partable, ADD, remove.duplicated = remove.duplicated, warn = FALSE) # remove nonpar? #if(remove.nonpar) { # nonpar.idx <- which(LIST$op %in% c("==", ":=", "<", ">")) # if(length(nonpar.idx) > 0L) { # LIST <- LIST[-nonpar.idx,] # } #} # redo 'free' free.idx <- which(LIST$free > 0) LIST$free[free.idx] <- 1:length(free.idx) # adapt options lavoptions <- object@Options # verbose? lavoptions$verbose <- verbose # warn? lavoptions$warn <- warn # do.fit? lavoptions$do.fit <- do.fit # needed? if(any(LIST$op == "~1")) lavoptions$meanstructure <- TRUE FIT <- lavaan(LIST, slotOptions = lavoptions, slotSampleStats = object@SampleStats, slotData = object@Data, slotCache = object@Cache) FIT } lavaan/R/01RefClass_02lavML.R0000644000176200001440000000165412465075714015161 0ustar liggesusers# Maximum Likelihood optimization -- YR 10 july 2012 # super class -- virtual statistical model that needs to be optimized lavRefML <- setRefClass("lavML", # inherits contains = "lavOptim", # fields fields = list( y = "numeric", # the (unidimensional) data nobs = "integer", # number of observations weights = "numeric" # weights ), # methods methods = list( logl = function(x) { if(!missing(x)) theta <<- x likelihoods <- lik() # FIXME: handle zero/negative/small likelihood values sum(log(likelihoods), na.rm=TRUE) }, lik = function(x) { if(!missing(x)) theta <<- x cat("this is dummy function\n") return(rep(as.numeric(NA), nobs)) }, scores = function(x) { if(!missing(x)) theta <<- x cat("this is dummy function\n") return(matrix(as.numeric(NA), nobs, npar)) }, gradient = function(x) { SCORES <- scores(x) apply(SCORES, 2L, base::sum, na.rm=TRUE) } )) lavaan/R/lav_representation_lisrel.R0000644000176200001440000025327113053001304017315 0ustar liggesusers# and matrix-representation specific functions: # - computeSigmaHat # - computeMuHat # - derivative.F # initital version: YR 2011-01-21: LISREL stuff # updates: YR 2011-12-01: group specific extraction # YR 2012-05-17: thresholds representation.LISREL <- function(partable=NULL, target=NULL, extra=FALSE, remove.nonexisting=TRUE) { # prepare target list if(is.null(target)) target <- partable stopifnot(!is.null(target$block)) # prepare output N <- length(target$lhs) tmp.mat <- character(N); tmp.row <- integer(N); tmp.col <- integer(N) # global settings meanstructure <- any(partable$op == "~1") categorical <- any(partable$op == "|") group.w.free <- any(partable$lhs == "group" & partable$op == "%") # gamma? if(categorical) { gamma <- TRUE } else if(any(partable$op == "~" & partable$exo == 1L)) { gamma <- TRUE } else { gamma <- FALSE } # number of blocks nblocks <- lav_partable_nblocks(partable) ov.dummy.names.nox <- vector("list", nblocks) ov.dummy.names.x <- vector("list", nblocks) if(extra) { REP.mmNames <- vector("list", nblocks) REP.mmNumber <- vector("list", nblocks) REP.mmRows <- vector("list", nblocks) REP.mmCols <- vector("list", nblocks) REP.mmDimNames <- vector("list", nblocks) REP.mmSymmetric <- vector("list", nblocks) } for(g in 1:nblocks) { # info from user model per block if(gamma) { ov.names <- vnames(partable, "ov.nox", block=g) } else { ov.names <- vnames(partable, "ov", block=g) } nvar <- length(ov.names) lv.names <- vnames(partable, "lv", block=g); nfac <- length(lv.names) ov.th <- vnames(partable, "th", block=g); nth <- length(ov.th) ov.names.x <- vnames(partable, "ov.x",block=g); nexo <- length(ov.names.x) ov.names.nox <- vnames(partable, "ov.nox",block=g) # in this representation, we need to create 'phantom/dummy' latent # variables for all `x' and `y' variables not in lv.names # (only y if conditional.x = TRUE) # regression dummys if(gamma) { tmp.names <- unique( partable$lhs[(partable$op == "~" | partable$op == "<~") & partable$block == g] ) } else { tmp.names <- unique( c(partable$lhs[(partable$op == "~" | partable$op == "<~") & partable$block == g], partable$rhs[(partable$op == "~" | partable$op == "<~") & partable$block == g]) ) } dummy.names1 <- tmp.names[ !tmp.names %in% lv.names ] # covariances involving dummys dummy.cov.idx <- which(partable$op == "~~" & partable$block == g & (partable$lhs %in% dummy.names1 | partable$rhs %in% dummy.names1)) # new in 0.5-21: also include covariances involving these covariances... dummy.cov.idx1 <- which(partable$op == "~~" & partable$block == g & (partable$lhs %in% partable$lhs[dummy.cov.idx] | partable$rhs %in% partable$rhs[dummy.cov.idx])) dummy.cov.idx <- unique(c(dummy.cov.idx, dummy.cov.idx1)) dummy.names2 <- unique( c(partable$lhs[dummy.cov.idx], partable$rhs[dummy.cov.idx]) ) # collect all dummy variables dummy.names <- unique(c(dummy.names1, dummy.names2)) if(length(dummy.names)) { # make sure order is the same as ov.names ov.dummy.names.nox[[g]] <- ov.names.nox[ ov.names.nox %in% dummy.names ] ov.dummy.names.x[[g]] <- ov.names.x[ ov.names.x %in% dummy.names ] # combine them, make sure order is identical to ov.names tmp <- ov.names[ ov.names %in% dummy.names ] # extend lv.names lv.names <- c(lv.names, tmp) nfac <- length(lv.names) # add 'dummy' =~ entries dummy.mat <- rep("lambda", length(dummy.names)) } else { ov.dummy.names.nox[[g]] <- character(0) ov.dummy.names.x[[g]] <- character(0) } # 1a. "=~" regular indicators idx <- which(target$block == g & target$op == "=~" & !(target$rhs %in% lv.names)) tmp.mat[idx] <- "lambda" tmp.row[idx] <- match(target$rhs[idx], ov.names) tmp.col[idx] <- match(target$lhs[idx], lv.names) # 1b. "=~" regular higher-order lv indicators idx <- which(target$block == g & target$op == "=~" & !(target$rhs %in% ov.names)) tmp.mat[idx] <- "beta" tmp.row[idx] <- match(target$rhs[idx], lv.names) tmp.col[idx] <- match(target$lhs[idx], lv.names) # 1c. "=~" indicators that are both in ov and lv idx <- which(target$block == g & target$op == "=~" & target$rhs %in% ov.names & target$rhs %in% lv.names) tmp.mat[idx] <- "beta" tmp.row[idx] <- match(target$rhs[idx], lv.names) tmp.col[idx] <- match(target$lhs[idx], lv.names) # 2. "~" regressions if(gamma) { # gamma idx <- which(target$rhs %in% ov.names.x & target$block == g & (target$op == "~" | target$op == "<~") ) tmp.mat[idx] <- "gamma" tmp.row[idx] <- match(target$lhs[idx], lv.names) tmp.col[idx] <- match(target$rhs[idx], ov.names.x) # beta idx <- which(!target$rhs %in% ov.names.x & target$block == g & (target$op == "~" | target$op == "<~") ) tmp.mat[idx] <- "beta" tmp.row[idx] <- match(target$lhs[idx], lv.names) tmp.col[idx] <- match(target$rhs[idx], lv.names) } else { idx <- which(target$block == g & (target$op == "~" | target$op == "<~") ) tmp.mat[idx] <- "beta" tmp.row[idx] <- match(target$lhs[idx], lv.names) tmp.col[idx] <- match(target$rhs[idx], lv.names) } # 3a. "~~" ov idx <- which(target$block == g & target$op == "~~" & !(target$lhs %in% lv.names)) tmp.mat[idx] <- "theta" tmp.row[idx] <- match(target$lhs[idx], ov.names) tmp.col[idx] <- match(target$rhs[idx], ov.names) # 3b. "~~" lv idx <- which(target$block == g & target$op == "~~" & target$rhs %in% lv.names) tmp.mat[idx] <- "psi" tmp.row[idx] <- match(target$lhs[idx], lv.names) tmp.col[idx] <- match(target$rhs[idx], lv.names) # 4a. "~1" ov idx <- which(target$block == g & target$op == "~1" & !(target$lhs %in% lv.names)) tmp.mat[idx] <- "nu" tmp.row[idx] <- match(target$lhs[idx], ov.names) tmp.col[idx] <- 1L # 4b. "~1" lv idx <- which(target$block == g & target$op == "~1" & target$lhs %in% lv.names) tmp.mat[idx] <- "alpha" tmp.row[idx] <- match(target$lhs[idx], lv.names) tmp.col[idx] <- 1L # 5. "|" th LABEL <- paste(target$lhs, target$op, target$rhs, sep="") idx <- which(target$block == g & target$op == "|" & LABEL %in% ov.th) TH <- paste(target$lhs[idx], "|", target$rhs[idx], sep="") tmp.mat[idx] <- "tau" tmp.row[idx] <- match(TH, ov.th) tmp.col[idx] <- 1L # 6. "~*~" scales idx <- which(target$block == g & target$op == "~*~") tmp.mat[idx] <- "delta" tmp.row[idx] <- match(target$lhs[idx], ov.names) tmp.col[idx] <- 1L # new 0.5-12: catch lower-elements in theta/psi idx.lower <- which(tmp.mat %in% c("theta","psi") & tmp.row > tmp.col) if(length(idx.lower) > 0L) { tmp <- tmp.row[idx.lower] tmp.row[idx.lower] <- tmp.col[idx.lower] tmp.col[idx.lower] <- tmp } # new 0.5-16: group weights idx <- which(target$block == g & target$lhs == "group" & target$op == "%") tmp.mat[idx] <- "gw" tmp.row[idx] <- 1L tmp.col[idx] <- 1L if(extra) { # mRows mmRows <- list(tau = nth, delta = nvar, nu = nvar, lambda = nvar, theta = nvar, alpha = nfac, beta = nfac, gamma = nfac, gw = 1L, psi = nfac) # mCols mmCols <- list(tau = 1L, delta = 1L, nu = 1L, lambda = nfac, theta = nvar, alpha = 1L, beta = nfac, gamma = nexo, gw = 1L, psi = nfac) # dimNames for LISREL model matrices mmDimNames <- list(tau = list( ov.th, "threshold"), delta = list( ov.names, "scales"), nu = list( ov.names, "intercept"), lambda = list( ov.names, lv.names), theta = list( ov.names, ov.names), alpha = list( lv.names, "intercept"), beta = list( lv.names, lv.names), gamma = list( lv.names, ov.names.x), gw = list( "group", "weight"), psi = list( lv.names, lv.names)) # isSymmetric mmSymmetric <- list(tau = FALSE, delta = FALSE, nu = FALSE, lambda = FALSE, theta = TRUE, alpha = FALSE, beta = FALSE, gamma = FALSE, gw = FALSE, psi = TRUE) # which mm's do we need? (always include lambda, theta and psi) mmNames <- c("lambda", "theta", "psi") if("beta" %in% tmp.mat) mmNames <- c(mmNames, "beta") if(meanstructure) mmNames <- c(mmNames, "nu", "alpha") if("tau" %in% tmp.mat) mmNames <- c(mmNames, "tau") if("delta" %in% tmp.mat) mmNames <- c(mmNames, "delta") if("gamma" %in% tmp.mat) mmNames <- c(mmNames, "gamma") if("gw" %in% tmp.mat) mmNames <- c(mmNames, "gw") REP.mmNames[[g]] <- mmNames REP.mmNumber[[g]] <- length(mmNames) REP.mmRows[[g]] <- unlist(mmRows[ mmNames ]) REP.mmCols[[g]] <- unlist(mmCols[ mmNames ]) REP.mmDimNames[[g]] <- mmDimNames[ mmNames ] REP.mmSymmetric[[g]] <- unlist(mmSymmetric[ mmNames ]) } # extra } # nblocks REP <- list(mat = tmp.mat, row = tmp.row, col = tmp.col) # remove non-existing (NAs)? # here we remove `non-existing' parameters; this depends on the matrix # representation (eg in LISREL rep, there is no ~~ between lv and ov) #if(remove.nonexisting) { # idx <- which( nchar(REP$mat) > 0L & # !is.na(REP$row) & REP$row > 0L & # !is.na(REP$col) & REP$col > 0L ) # # but keep ==, :=, etc. # idx <- c(idx, which(partable$op %in% c("==", ":=", "<", ">"))) # REP$mat <- REP$mat[idx] # REP$row <- REP$row[idx] # REP$col <- REP$col[idx] # # always add 'ov.dummy.*.names' attributes attr(REP, "ov.dummy.names.nox") <- ov.dummy.names.nox attr(REP, "ov.dummy.names.x") <- ov.dummy.names.x if(extra) { attr(REP, "mmNames") <- REP.mmNames attr(REP, "mmNumber") <- REP.mmNumber attr(REP, "mmRows") <- REP.mmRows attr(REP, "mmCols") <- REP.mmCols attr(REP, "mmDimNames") <- REP.mmDimNames attr(REP, "mmSymmetric") <- REP.mmSymmetric } REP } # ETA: # 1) EETA # 2) EETAx # 3) VETA # 4) VETAx # 1) EETA # compute E(ETA): expected value of latent variables (marginal over x) # - if no eXo (and GAMMA): # E(ETA) = (I-B)^-1 ALPHA # - if eXo and GAMMA: # E(ETA) = (I-B)^-1 ALPHA + (I-B)^-1 GAMMA mean.x computeEETA.LISREL <- function(MLIST=NULL, mean.x=NULL, sample.mean=NULL, ov.y.dummy.ov.idx=NULL, ov.x.dummy.ov.idx=NULL, ov.y.dummy.lv.idx=NULL, ov.x.dummy.lv.idx=NULL) { LAMBDA <- MLIST$lambda; BETA <- MLIST$beta; GAMMA <- MLIST$gamma # ALPHA? (reconstruct, but no 'fix') ALPHA <- .internal_get_ALPHA(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # BETA? if(!is.null(BETA)) { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) # GAMMA? if(!is.null(GAMMA)) { eeta <- as.vector(IB.inv %*% ALPHA + IB.inv %*% GAMMA %*% mean.x) } else { eeta <- as.vector(IB.inv %*% ALPHA) } } else { # GAMMA? if(!is.null(GAMMA)) { eeta <- as.vector(ALPHA + GAMMA %*% mean.x) } else { eeta <- as.vector(ALPHA) } } eeta } # 2) EETAx # compute E(ETA|x_i): conditional expected value of latent variable, # given specific value of x_i # - if no eXo (and GAMMA): # E(ETA) = (I-B)^-1 ALPHA # we return a matrix of size [nobs x nfac] replicating E(ETA) # - if eXo and GAMMA: # E(ETA|x_i) = (I-B)^-1 ALPHA + (I-B)^-1 GAMMA x_i # we return a matrix of size [nobs x nfac] # computeEETAx.LISREL <- function(MLIST=NULL, eXo=NULL, N=nrow(eXo), sample.mean=NULL, ov.y.dummy.ov.idx=NULL, ov.x.dummy.ov.idx=NULL, ov.y.dummy.lv.idx=NULL, ov.x.dummy.lv.idx=NULL) { LAMBDA <- MLIST$lambda; BETA <- MLIST$beta; GAMMA <- MLIST$gamma nfac <- ncol(LAMBDA) # if eXo, N must be nrow(eXo) if(!is.null(eXo)) { N <- nrow(eXo) } # ALPHA? ALPHA <- .internal_get_ALPHA(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # construct [nobs x nfac] matrix (repeating ALPHA) EETA <- matrix(ALPHA, N, nfac, byrow=TRUE) # put back eXo values if dummy if(length(ov.x.dummy.lv.idx) > 0L) { EETA[,ov.x.dummy.lv.idx] <- eXo } # BETA? if(!is.null(BETA)) { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) EETA <- EETA %*% t(IB.inv) } # GAMMA? if(!is.null(GAMMA)) { if(!is.null(BETA)) { EETA <- EETA + eXo %*% t(IB.inv %*% GAMMA) } else { EETA <- EETA + eXo %*% t(GAMMA) } } EETA } # 3) VETA # compute V(ETA): variances/covariances of latent variables # - if no eXo (and GAMMA) # V(ETA) = (I-B)^-1 PSI (I-B)^-T # - if eXo and GAMMA: (cfr lisrel submodel 3a with ksi=x) # V(ETA) = (I-B)^-1 [ GAMMA cov.x t(GAMMA) + PSI] (I-B)^-T computeVETA.LISREL <- function(MLIST=NULL, cov.x=NULL) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA) PSI <- MLIST$psi THETA <- MLIST$theta BETA <- MLIST$beta GAMMA <- MLIST$gamma if(!is.null(GAMMA)) { stopifnot(!is.null(cov.x)) # we treat 'x' as 'ksi' in the LISREL model; cov.x is PHI PSI <- tcrossprod(GAMMA %*% cov.x, GAMMA) + PSI } # beta? if(is.null(BETA)) { VETA <- PSI } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) VETA <- tcrossprod(IB.inv %*% PSI, IB.inv) } VETA } # 4) VETAx # compute V(ETA|x_i): variances/covariances of latent variables # V(ETA) = (I-B)^-1 PSI (I-B)^-T + remove dummies computeVETAx.LISREL <- function(MLIST=NULL, lv.dummy.idx=NULL) { PSI <- MLIST$psi BETA <- MLIST$beta # beta? if(is.null(BETA)) { VETA <- PSI } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) VETA <- tcrossprod(IB.inv %*% PSI, IB.inv) } # remove dummy lv? if(!is.null(lv.dummy.idx)) { VETA <- VETA[-lv.dummy.idx, -lv.dummy.idx, drop=FALSE] } VETA } # Y # 1) EY # 2) EYx # 3) EYetax # 4) VY # 5) VYx # 6) VYetax # 1) EY # compute E(Y): expected value of observed # E(Y) = NU + LAMBDA %*% E(eta) # = NU + LAMBDA %*% (IB.inv %*% ALPHA) # no exo, no GAMMA # = NU + LAMBDA %*% (IB.inv %*% ALPHA + IB.inv %*% GAMMA %*% mean.x) # eXo # if DELTA -> E(Y) = delta * E(Y) # # this is similar to computeMuHat but: # - we ALWAYS compute NU+ALPHA, even if meanstructure=FALSE # - never used if GAMMA, since we then have categorical variables, and the # 'part 1' structure contains the (thresholds +) intercepts, not # the means computeEY.LISREL <- function(MLIST=NULL, mean.x = NULL, sample.mean = NULL, ov.y.dummy.ov.idx=NULL, ov.x.dummy.ov.idx=NULL, ov.y.dummy.lv.idx=NULL, ov.x.dummy.lv.idx=NULL) { LAMBDA <- MLIST$lambda # get NU, but do not 'fix' NU <- .internal_get_NU(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # compute E(ETA) EETA <- computeEETA.LISREL(MLIST = MLIST, sample.mean = sample.mean, mean.x = mean.x, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # EY EY <- as.vector(NU) + as.vector(LAMBDA %*% EETA) # if delta, scale if(!is.null(MLIST$delta)) { EY <- EY * as.vector(MLIST$delta) } EY } # 2) EYx # compute E(Y|x_i): expected value of observed, conditional on x_i # E(Y|x_i) = NU + LAMBDA %*% E(eta|x_i) # - if no eXo (and GAMMA): # E(ETA|x_i) = (I-B)^-1 ALPHA # we return a matrix of size [nobs x nfac] replicating E(ETA) # - if eXo and GAMMA: # E(ETA|x_i) = (I-B)^-1 ALPHA + (I-B)^-1 GAMMA x_i # we return a matrix of size [nobs x nfac] # # - we ALWAYS compute NU+ALPHA, even if meanstructure=FALSE # - never used if GAMMA, since we then have categorical variables, and the # 'part 1' structure contains the (thresholds +) intercepts, not # the means computeEYx.LISREL <- function(MLIST = NULL, eXo = NULL, N = nrow(eXo), sample.mean = NULL, ov.y.dummy.ov.idx = NULL, ov.x.dummy.ov.idx = NULL, ov.y.dummy.lv.idx = NULL, ov.x.dummy.lv.idx = NULL) { LAMBDA <- MLIST$lambda # get NU, but do not 'fix' NU <- .internal_get_NU(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # compute E(ETA|x_i) EETAx <- computeEETAx.LISREL(MLIST = MLIST, eXo = eXo, N = N, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # EYx EYx <- sweep(tcrossprod(EETAx, LAMBDA), 2L, STATS = NU, FUN = "+") # if delta, scale if(!is.null(MLIST$delta)) { EYx <- sweep(EYx, 2L, STATS = MLIST$delta, FUN = "*") } EYx } # 3) EYetax # compute E(Y|eta_i,x_i): conditional expected value of observed variable # given specific value of eta_i AND x_i # # E(y*_i|eta_i, x_i) = NU + LAMBDA eta_i + KAPPA x_i # # where eta_i = predict(fit) = factor scores OR specific values for eta_i # (as in GH integration) # # if nexo = 0, and eta_i is single row, YHAT is the same for each observation # in this case, we return a single row, unless Nobs > 1L, in which case # we return Nobs identical rows # # NOTE: we assume that any effect of x_i on eta_i has already been taken # care off # categorical version computeEYetax.LISREL <- function(MLIST = NULL, eXo = NULL, ETA = NULL, N = nrow(eXo), sample.mean = NULL, ov.y.dummy.ov.idx = NULL, ov.x.dummy.ov.idx = NULL, ov.y.dummy.lv.idx = NULL, ov.x.dummy.lv.idx = NULL) { LAMBDA <- MLIST$lambda BETA <- MLIST$beta if(!is.null(eXo)) { N <- nrow(eXo) } else if(!is.null(N)) { # nothing to do } else { N <- 1L } # create ETA matrix if(nrow(ETA) == 1L) { ETA <- matrix(ETA, N, ncol(ETA), byrow=TRUE) } # always augment ETA with 'dummy values' (0 for ov.y, eXo for ov.x) #ndummy <- length(c(ov.y.dummy.lv.idx, ov.x.dummy.lv.idx)) #if(ndummy > 0L) { # ETA2 <- cbind(ETA, matrix(0, N, ndummy)) #} else { ETA2 <- ETA #} # only if we have dummy ov.y, we need to compute the 'yhat' values # beforehand if(length(ov.y.dummy.lv.idx) > 0L) { # insert eXo values if(length(ov.x.dummy.lv.idx) > 0L) { ETA2[,ov.x.dummy.lv.idx] <- eXo } # zero ov.y values if(length(ov.y.dummy.lv.idx) > 0L) { ETA2[,ov.y.dummy.lv.idx] <- 0 } # ALPHA? (reconstruct, but no 'fix') ALPHA <- .internal_get_ALPHA(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # BETA? if(!is.null(BETA)) { ETA2 <- sweep(tcrossprod(ETA2, BETA), 2L, STATS = ALPHA, FUN = "+") } else { ETA2 <- sweep(ETA2, 2L, STATS = ALPHA, FUN = "+") } # put back eXo values if(length(ov.x.dummy.lv.idx) > 0L) { ETA2[,ov.x.dummy.lv.idx] <- eXo } # put back ETA values for the 'real' latent variables dummy.idx <- c(ov.x.dummy.lv.idx, ov.y.dummy.lv.idx) if(length(dummy.idx) > 0L) { lv.regular.idx <- seq_len( min(dummy.idx) - 1L ) ETA2[, lv.regular.idx] <- ETA[,lv.regular.idx, drop = FALSE] } } # get NU, but do not 'fix' NU <- .internal_get_NU(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # EYetax EYetax <- sweep(tcrossprod(ETA2, LAMBDA), 2L, STATS = NU, FUN = "+") # if delta, scale if(!is.null(MLIST$delta)) { EYetax <- sweep(EYetax, 2L, STATS = MLIST$delta, FUN = "*") } EYetax } # unconditional version computeEYetax2.LISREL <- function(MLIST = NULL, ETA = NULL, sample.mean = NULL, ov.y.dummy.ov.idx = NULL, ov.x.dummy.ov.idx = NULL, ov.y.dummy.lv.idx = NULL, ov.x.dummy.lv.idx = NULL) { LAMBDA <- MLIST$lambda BETA <- MLIST$beta # only if we have dummy ov.y, we need to compute the 'yhat' values # beforehand, and impute them in ETA[,ov.y] if(length(ov.y.dummy.lv.idx) > 0L) { # ALPHA? (reconstruct, but no 'fix') ALPHA <- .internal_get_ALPHA(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # keep all, but ov.y values OV.NOY <- ETA[,-ov.y.dummy.lv.idx, drop = FALSE] # ov.y rows, non-ov.y cols BETAY <- BETA[ov.y.dummy.lv.idx,-ov.y.dummy.lv.idx, drop = FALSE] # ov.y intercepts ALPHAY <- ALPHA[ov.y.dummy.lv.idx,, drop=FALSE] # impute ov.y values in ETA ETA[,ov.y.dummy.lv.idx] <- sweep(tcrossprod(OV.NOY, BETAY), 2L, STATS = ALPHAY, FUN = "+") } # get NU, but do not 'fix' NU <- .internal_get_NU(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # EYetax EYetax <- sweep(tcrossprod(ETA, LAMBDA), 2L, STATS = NU, FUN = "+") # if delta, scale if(!is.null(MLIST$delta)) { EYetax <- sweep(EYetax, 2L, STATS = MLIST$delta, FUN = "*") } EYetax } # unconditional version computeEYetax3.LISREL <- function(MLIST = NULL, ETA = NULL, sample.mean = NULL, mean.x = NULL, ov.y.dummy.ov.idx = NULL, ov.x.dummy.ov.idx = NULL, ov.y.dummy.lv.idx = NULL, ov.x.dummy.lv.idx = NULL) { LAMBDA <- MLIST$lambda # special case: empty lambda if(ncol(LAMBDA) == 0L) { return( matrix(sample.mean, nrow(ETA), length(sample.mean), byrow=TRUE) ) } # lv idx dummy.idx <- c(ov.y.dummy.lv.idx, ov.x.dummy.lv.idx) if(length(dummy.idx) > 0L) { nondummy.idx <- seq_len( min(dummy.idx) - 1L ) } else { nondummy.idx <- seq_len( ncol(MLIST$lambda) ) } # beta? if(is.null(MLIST$beta) || length(ov.y.dummy.lv.idx) == 0L || length(nondummy.idx) == 0L) { LAMBDA..IB.inv <- LAMBDA } else { # only keep those columns of BETA that correspond to the # the `regular' latent variables # (ie. ignore the structural part altogether) MLIST2 <- MLIST MLIST2$beta[,dummy.idx] <- 0 IB.inv <- .internal_get_IB.inv(MLIST = MLIST2) LAMBDA..IB.inv <- LAMBDA %*% IB.inv } # compute model-implied means EY <- computeEY.LISREL(MLIST = MLIST, mean.x = mean.x, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) EETA <- computeEETA.LISREL(MLIST = MLIST, sample.mean = sample.mean, mean.x = mean.x, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # center regular lv only ETA[,nondummy.idx] <- sweep(ETA[,nondummy.idx,drop = FALSE], 2L, STATS = EETA[nondummy.idx], FUN = "-") # project from lv to ov, if we have any lv if(length(nondummy.idx) > 0) { EYetax <- sweep(tcrossprod(ETA[,nondummy.idx,drop=FALSE], LAMBDA..IB.inv[,nondummy.idx,drop=FALSE]), 2L, STATS = EY, FUN = "+") } else { EYetax <- ETA } # put back eXo variables if(length(ov.x.dummy.lv.idx) > 0L) { EYetax[,ov.x.dummy.ov.idx] <- ETA[,ov.x.dummy.lv.idx, drop = FALSE] } # if delta, scale if(!is.null(MLIST$delta)) { EYetax <- sweep(EYetax, 2L, STATS = MLIST$delta, FUN = "*") } EYetax } # 4) VY # compute the *un*conditional variance/covariance of y: V(Y) or V(Y*) # 'unconditional' model-implied (co)variances # - same as Sigma.hat if all Y are continuous # - diagonal is 1.0 (or delta^2) if categorical # - if also Gamma, cov.x is used (only if conditional.x) # only in THIS case, VY is different from diag(VYx) # # V(Y) = LAMBDA V(ETA) t(LAMBDA) + THETA computeVY.LISREL <- function(MLIST=NULL, cov.x=NULL) { LAMBDA <- MLIST$lambda THETA <- MLIST$theta VETA <- computeVETA.LISREL(MLIST = MLIST, cov.x = cov.x) VY <- tcrossprod(LAMBDA %*% VETA, LAMBDA) + THETA VY } # 5) VYx # compute V(Y*|x_i) == model-implied covariance matrix # this equals V(Y*) if no (explicit) eXo no GAMMA computeVYx.LISREL <- computeSigmaHat.LISREL <- function(MLIST = NULL, delta = TRUE) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA) PSI <- MLIST$psi THETA <- MLIST$theta BETA <- MLIST$beta # beta? if(is.null(BETA)) { LAMBDA..IB.inv <- LAMBDA } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) LAMBDA..IB.inv <- LAMBDA %*% IB.inv } # compute V(Y*|x_i) VYx <- tcrossprod(LAMBDA..IB.inv %*% PSI, LAMBDA..IB.inv) + THETA # if delta, scale if(delta && !is.null(MLIST$delta)) { DELTA <- diag(MLIST$delta[,1L], nrow=nvar, ncol=nvar) VYx <- DELTA %*% VYx %*% DELTA } VYx } # 6) VYetax # V(Y | eta_i, x_i) = THETA computeVYetax.LISREL <- function(MLIST = NULL, delta = TRUE) { VYetax <- MLIST$theta; nvar <- nrow(MLIST$theta) # if delta, scale if(delta && !is.null(MLIST$delta)) { DELTA <- diag(MLIST$delta[,1L], nrow=nvar, ncol=nvar) VYetax <- DELTA %*% VYetax %*% DELTA } VYetax } ### compute model-implied sample statistics # # 1) MuHat (similar to EY, but continuous only) # 2) TH # 3) PI # 4) SigmaHat == VYx # compute MuHat for a single block/group; only for the continuous case (no eXo) # # this is a special case of E(Y) where # - we have no (explicit) eXogenous variables # - only continuous computeMuHat.LISREL <- function(MLIST=NULL) { NU <- MLIST$nu ALPHA <- MLIST$alpha LAMBDA <- MLIST$lambda BETA <- MLIST$beta # shortcut if(is.null(ALPHA) || is.null(NU)) return(matrix(0, nrow(LAMBDA), 1L)) # beta? if(is.null(BETA)) { LAMBDA..IB.inv <- LAMBDA } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) LAMBDA..IB.inv <- LAMBDA %*% IB.inv } # compute Mu Hat Mu.hat <- NU + LAMBDA..IB.inv %*% ALPHA Mu.hat } # compute TH for a single block/group computeTH.LISREL <- function(MLIST=NULL, th.idx=NULL) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA); nfac <- ncol(LAMBDA) BETA <- MLIST$beta TAU <- MLIST$tau; nth <- nrow(TAU) # missing alpha if(is.null(MLIST$alpha)) { ALPHA <- matrix(0, nfac, 1L) } else { ALPHA <- MLIST$alpha } # missing nu if(is.null(MLIST$nu)) { NU <- matrix(0, nvar, 1L) } else { NU <- MLIST$nu } if(is.null(th.idx)) { th.idx <- seq_len(nth) nlev <- rep(1L, nvar) K_nu <- diag(nvar) } else { nlev <- tabulate(th.idx, nbins=nvar); nlev[nlev == 0L] <- 1L K_nu <- matrix(0, sum(nlev), nvar) K_nu[ cbind(seq_len(sum(nlev)), rep(seq_len(nvar), times=nlev)) ] <- 1.0 } # shortcut if(is.null(TAU)) return(matrix(0, length(th.idx), 1L)) # beta? if(is.null(BETA)) { LAMBDA..IB.inv <- LAMBDA } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) LAMBDA..IB.inv <- LAMBDA %*% IB.inv } # compute pi0 pi0 <- NU + LAMBDA..IB.inv %*% ALPHA # interleave th's with zeros where we have numeric variables th <- numeric( length(th.idx) ) th[ th.idx > 0L ] <- TAU[,1L] # compute TH TH <- th - (K_nu %*% pi0) # if delta, scale if(!is.null(MLIST$delta)) { DELTA.diag <- MLIST$delta[,1L] DELTA.star.diag <- rep(DELTA.diag, times=nlev) TH <- TH * DELTA.star.diag } as.vector(TH) } # compute PI for a single block/group computePI.LISREL <- function(MLIST=NULL) { LAMBDA <- MLIST$lambda BETA <- MLIST$beta GAMMA <- MLIST$gamma # shortcut if(is.null(GAMMA)) return(matrix(0, nrow(LAMBDA), 0L)) # beta? if(is.null(BETA)) { LAMBDA..IB.inv <- LAMBDA } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) LAMBDA..IB.inv <- LAMBDA %*% IB.inv } # compute PI PI <- LAMBDA..IB.inv %*% GAMMA # if delta, scale if(!is.null(MLIST$delta)) { DELTA.diag <- MLIST$delta[,1L] PI <- PI * DELTA.diag } PI } computeLAMBDA.LISREL <- function(MLIST = NULL, ov.y.dummy.ov.idx = NULL, ov.x.dummy.ov.idx = NULL, ov.y.dummy.lv.idx = NULL, ov.x.dummy.lv.idx = NULL, remove.dummy.lv = FALSE) { ov.dummy.idx = c(ov.y.dummy.ov.idx, ov.x.dummy.ov.idx) lv.dummy.idx = c(ov.y.dummy.lv.idx, ov.x.dummy.lv.idx) # fix LAMBDA LAMBDA <- MLIST$lambda if(length(ov.y.dummy.ov.idx) > 0L) { LAMBDA[ov.y.dummy.ov.idx,] <- MLIST$beta[ov.y.dummy.lv.idx,] } # remove dummy lv? if(remove.dummy.lv && length(lv.dummy.idx) > 0L) { LAMBDA <- LAMBDA[,-lv.dummy.idx,drop=FALSE] } LAMBDA } computeTHETA.LISREL <- function(MLIST=NULL, ov.y.dummy.ov.idx=NULL, ov.x.dummy.ov.idx=NULL, ov.y.dummy.lv.idx=NULL, ov.x.dummy.lv.idx=NULL) { ov.dummy.idx = c(ov.y.dummy.ov.idx, ov.x.dummy.ov.idx) lv.dummy.idx = c(ov.y.dummy.lv.idx, ov.x.dummy.lv.idx) # fix THETA THETA <- MLIST$theta if(length(ov.dummy.idx) > 0L) { THETA[ov.dummy.idx, ov.dummy.idx] <- MLIST$psi[lv.dummy.idx, lv.dummy.idx] } THETA } # compute IB.inv .internal_get_IB.inv <- function(MLIST = NULL) { BETA <- MLIST$beta; nr <- nrow(MLIST$psi) if(!is.null(BETA)) { tmp <- -BETA tmp[lav_matrix_diag_idx(nr)] <- 1 IB.inv <- solve(tmp) } else { IB.inv <- diag(nr) } IB.inv } # only if ALPHA=NULL but we need it anyway # we 'reconstruct' ALPHA here (including dummy entries), no fixing # # without any dummy variables, this is just the zero vector # but if we have dummy variables, we need to fill in their values # # .internal_get_ALPHA <- function(MLIST = NULL, sample.mean = NULL, ov.y.dummy.ov.idx = NULL, ov.x.dummy.ov.idx = NULL, ov.y.dummy.lv.idx = NULL, ov.x.dummy.lv.idx = NULL) { if(!is.null(MLIST$alpha)) return(MLIST$alpha) LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA); nfac <- ncol(LAMBDA) BETA <- MLIST$beta ov.dummy.idx = c(ov.y.dummy.ov.idx, ov.x.dummy.ov.idx) lv.dummy.idx = c(ov.y.dummy.lv.idx, ov.x.dummy.lv.idx) if(length(ov.dummy.idx) > 0L) { ALPHA <- matrix(0, nfac, 1L) # Note: instead of sample.mean, we need 'intercepts' # sample.mean = NU + LAMBDA..IB.inv %*% ALPHA # so, # solve(LAMBDA..IB.inv) %*% (sample.mean - NU) = ALPHA # where # - LAMBDA..IB.inv only contains 'dummy' variables, and is square # - NU elements are not needed (since not in ov.dummy.idx) IB.inv <- .internal_get_IB.inv(MLIST = MLIST) LAMBDA..IB.inv <- LAMBDA %*% IB.inv LAMBDA..IB.inv.dummy <- LAMBDA..IB.inv[ov.dummy.idx, lv.dummy.idx] ALPHA[lv.dummy.idx] <- solve(LAMBDA..IB.inv.dummy) %*% sample.mean[ov.dummy.idx] } else { ALPHA <- matrix(0, nfac, 1L) } ALPHA } # only if NU=NULL but we need it anyway # # since we have no meanstructure, we can assume NU is unrestricted # and contains either: # 1) the sample means (if not eXo) # 2) the intercepts, if we have exogenous covariates # since sample.mean = NU + LAMBDA %*% E(eta) # we have NU = sample.mean - LAMBDA %*% E(eta) .internal_get_NU <- function(MLIST = NULL, sample.mean = NULL, ov.y.dummy.ov.idx = NULL, ov.x.dummy.ov.idx = NULL, ov.y.dummy.lv.idx = NULL, ov.x.dummy.lv.idx = NULL) { if(!is.null(MLIST$nu)) return(MLIST$nu) # if nexo > 0, substract lambda %*% EETA if( length(ov.x.dummy.ov.idx) > 0L ) { EETA <- computeEETA.LISREL(MLIST, mean.x=NULL, sample.mean=sample.mean, ov.y.dummy.ov.idx=ov.y.dummy.ov.idx, ov.x.dummy.ov.idx=ov.x.dummy.ov.idx, ov.y.dummy.lv.idx=ov.y.dummy.lv.idx, ov.x.dummy.lv.idx=ov.x.dummy.lv.idx) # 'regress' NU on X NU <- sample.mean - MLIST$lambda %*% EETA # just to make sure we have exact zeroes for all dummies NU[c(ov.y.dummy.ov.idx,ov.x.dummy.ov.idx)] <- 0 } else { # unrestricted mean NU <- sample.mean } NU } .internal_get_KAPPA <- function(MLIST = NULL, ov.y.dummy.ov.idx = NULL, ov.x.dummy.ov.idx = NULL, ov.y.dummy.lv.idx = NULL, ov.x.dummy.lv.idx = NULL, nexo = NULL) { nvar <- nrow(MLIST$lambda) if(!is.null(MLIST$gamma)) { nexo <- ncol(MLIST$gamma) } else if(!is.null(nexo)) { nexo <- nexo } else { stop("nexo not known") } # create KAPPA KAPPA <- matrix(0, nvar, nexo) if(!is.null(MLIST$gamma)) { KAPPA[ov.y.dummy.ov.idx,] <- MLIST$gamma[ov.y.dummy.lv.idx,,drop=FALSE] } else if(length(ov.x.dummy.ov.idx) > 0L) { KAPPA[ov.y.dummy.ov.idx,] <- MLIST$beta[ov.y.dummy.lv.idx, ov.x.dummy.lv.idx, drop=FALSE] } KAPPA } # old version of computeEYetax (using 'fixing') computeYHATetax.LISREL <- function(MLIST=NULL, eXo=NULL, ETA=NULL, sample.mean=NULL, ov.y.dummy.ov.idx=NULL, ov.x.dummy.ov.idx=NULL, ov.y.dummy.lv.idx=NULL, ov.x.dummy.lv.idx=NULL, Nobs = 1L) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA); nfac <- ncol(LAMBDA) lv.dummy.idx <- c(ov.y.dummy.lv.idx, ov.x.dummy.lv.idx) ov.dummy.idx <- c(ov.y.dummy.ov.idx, ov.x.dummy.ov.idx) # exogenous variables? if(is.null(eXo)) { nexo <- 0L } else { nexo <- ncol(eXo) # check ETA rows if(!(nrow(ETA) == 1L || nrow(ETA) == nrow(eXo))) { stop("lavaan ERROR: !(nrow(ETA) == 1L || nrow(ETA) == nrow(eXo))") } } # get NU NU <- .internal_get_NU(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # ALPHA? (reconstruct, but no 'fix') ALPHA <- .internal_get_ALPHA(MLIST = MLIST, sample.mean = sample.mean, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx) # fix NU if(length(lv.dummy.idx) > 0L) { NU[ov.dummy.idx, 1L] <- ALPHA[lv.dummy.idx, 1L] } # fix LAMBDA (remove dummies) ## FIXME -- needed? LAMBDA <- MLIST$lambda if(length(lv.dummy.idx) > 0L) { LAMBDA <- LAMBDA[, -lv.dummy.idx, drop=FALSE] nfac <- ncol(LAMBDA) LAMBDA[ov.y.dummy.ov.idx,] <- MLIST$beta[ov.y.dummy.lv.idx, seq_len(nfac), drop=FALSE] } # compute YHAT YHAT <- sweep(ETA %*% t(LAMBDA), MARGIN=2, NU, "+") # Kappa + eXo? # note: Kappa elements are either in Gamma or in Beta if(nexo > 0L) { # create KAPPA KAPPA <- .internal_get_KAPPA(MLIST = MLIST, ov.y.dummy.ov.idx = ov.y.dummy.ov.idx, ov.x.dummy.ov.idx = ov.x.dummy.ov.idx, ov.y.dummy.lv.idx = ov.y.dummy.lv.idx, ov.x.dummy.lv.idx = ov.x.dummy.lv.idx, nexo = nexo) # expand YHAT if ETA only has 1 row if(nrow(YHAT) == 1L) { YHAT <- sweep(eXo %*% t(KAPPA), MARGIN=2, STATS=YHAT, FUN="+") } else { # add fixed part YHAT <- YHAT + (eXo %*% t(KAPPA)) } # put back eXo if(length(ov.x.dummy.ov.idx) > 0L) { YHAT[, ov.x.dummy.ov.idx] <- eXo } } else { # duplicate? if(is.numeric(Nobs) && Nobs > 1L && nrow(YHAT) == 1L) { YHAT <- matrix(YHAT, Nobs, nvar, byrow=TRUE) # YHAT <- YHAT[ rep(1L, Nobs), ] } } # delta? # FIXME: not used here? #if(!is.null(DELTA)) { # YHAT <- sweep(YHAT, MARGIN=2, DELTA, "*") #} YHAT } # deal with 'dummy' OV.X latent variables # create additional matrices (eg GAMMA), and resize # remove all ov.x related entries MLIST2MLISTX <- function(MLIST=NULL, ov.x.dummy.ov.idx = NULL, ov.x.dummy.lv.idx = NULL) { lv.idx <- ov.x.dummy.lv.idx ov.idx <- ov.x.dummy.ov.idx if(length(lv.idx) == 0L) return(MLIST) if(!is.null(MLIST$gamma)) { nexo <- ncol(MLIST$gamma) } else { nexo <- length(ov.x.dummy.ov.idx) } nvar <- nrow(MLIST$lambda) nfac <- ncol(MLIST$lambda) - length(lv.idx) # copy MLISTX <- MLIST # fix LAMBDA: # - remove all ov.x related columns/rows MLISTX$lambda <- MLIST$lambda[-ov.idx, -lv.idx,drop=FALSE] # fix THETA: # - remove ov.x related columns/rows MLISTX$theta <- MLIST$theta[-ov.idx, -ov.idx, drop=FALSE] # fix PSI: # - remove ov.x related columns/rows MLISTX$psi <- MLIST$psi[-lv.idx, -lv.idx, drop=FALSE] # create GAMMA if(length(ov.x.dummy.lv.idx) > 0L) { MLISTX$gamma <- MLIST$beta[-lv.idx, lv.idx, drop=FALSE] } # fix BETA (remove if empty) if(!is.null(MLIST$beta)) { MLISTX$beta <- MLIST$beta[-lv.idx, -lv.idx, drop=FALSE] if(ncol(MLISTX$beta) == 0L) MLISTX$beta <- NULL } # fix NU if(!is.null(MLIST$nu)) { MLISTX$nu <- MLIST$nu[-ov.idx, 1L, drop=FALSE] } # fix ALPHA if(!is.null(MLIST$alpha)) { MLISTX$alpha <- MLIST$alpha[-lv.idx, 1L, drop=FALSE] } MLISTX } # create MLIST from MLISTX MLISTX2MLIST <- function(MLISTX=NULL, ov.x.dummy.ov.idx = NULL, ov.x.dummy.lv.idx = NULL, mean.x=NULL, cov.x=NULL) { lv.idx <- ov.x.dummy.lv.idx; ndum <- length(lv.idx) ov.idx <- ov.x.dummy.ov.idx if(length(lv.idx) == 0L) return(MLISTX) stopifnot(!is.null(cov.x), !is.null(mean.x)) nvar <- nrow(MLISTX$lambda); nfac <- ncol(MLISTX$lambda) # copy MLIST <- MLISTX # resize matrices MLIST$lambda <- rbind(cbind(MLISTX$lambda, matrix(0, nvar, ndum)), matrix(0, ndum, nfac+ndum)) MLIST$psi <- rbind(cbind(MLISTX$psi, matrix(0, nfac, ndum)), matrix(0, ndum, nfac+ndum)) MLIST$theta <- rbind(cbind(MLISTX$theta, matrix(0, nvar, ndum)), matrix(0, ndum, nvar+ndum)) if(!is.null(MLISTX$beta)) { MLIST$beta <- rbind(cbind(MLISTX$beta, matrix(0, nfac, ndum)), matrix(0, ndum, nfac+ndum)) } if(!is.null(MLISTX$alpha)) { MLIST$alpha <- rbind(MLISTX$alpha, matrix(0, ndum, 1)) } if(!is.null(MLISTX$nu)) { MLIST$nu <- rbind(MLISTX$nu, matrix(0, ndum, 1)) } # fix LAMBDA: # - add columns for all dummy latent variables MLIST$lambda[ cbind(ov.idx, lv.idx) ] <- 1 # fix PSI # - move cov.x elements to PSI MLIST$psi[lv.idx, lv.idx] <- cov.x # move (ov.x.dummy elements of) GAMMA to BETA MLIST$beta[seq_len(nfac), ov.x.dummy.lv.idx] <- MLISTX$gamma MLIST$gamma <- NULL # fix ALPHA if(!is.null(MLIST$alpha)) { MLIST$alpha[lv.idx] <- mean.x } MLIST } # if DELTA parameterization, compute residual elements (in theta, or psi) # of observed categorical variables, as a function of other model parameters setResidualElements.LISREL <- function(MLIST=NULL, num.idx=NULL, ov.y.dummy.ov.idx=NULL, ov.y.dummy.lv.idx=NULL) { # remove num.idx from ov.y.dummy.* if(length(num.idx) > 0L && length(ov.y.dummy.ov.idx) > 0L) { n.idx <- which(ov.y.dummy.ov.idx %in% num.idx) if(length(n.idx) > 0L) { ov.y.dummy.ov.idx <- ov.y.dummy.ov.idx[-n.idx] ov.y.dummy.lv.idx <- ov.y.dummy.lv.idx[-n.idx] } } # force non-numeric theta elements to be zero if(length(num.idx) > 0L) { diag(MLIST$theta)[-num.idx] <- 0.0 } else { diag(MLIST$theta) <- 0.0 } if(length(ov.y.dummy.ov.idx) > 0L) { MLIST$psi[ cbind(ov.y.dummy.lv.idx, ov.y.dummy.lv.idx) ] <- 0.0 } # special case: PSI=0, and lambda=I (eg ex3.12) if(ncol(MLIST$psi) > 0L && sum(diag(MLIST$psi)) == 0.0 && all(diag(MLIST$lambda) == 1)) { ### FIXME: more elegant/general solution?? diag(MLIST$psi) <- 1 Sigma.hat <- computeSigmaHat.LISREL(MLIST = MLIST, delta=FALSE) diag.Sigma <- diag(Sigma.hat) - 1.0 } else if(ncol(MLIST$psi) == 0L) { diag.Sigma <- rep(0, ncol(MLIST$theta)) } else { Sigma.hat <- computeSigmaHat.LISREL(MLIST = MLIST, delta=FALSE) diag.Sigma <- diag(Sigma.hat) } if(is.null(MLIST$delta)) { delta <- rep(1, length(diag.Sigma)) } else { delta <- MLIST$delta } # theta = DELTA^(-1/2) - diag( LAMBDA (I-B)^-1 PSI (I-B)^-T t(LAMBDA) ) RESIDUAL <- as.vector(1/(delta*delta) - diag.Sigma) if(length(num.idx) > 0L) { diag(MLIST$theta)[-num.idx] <- RESIDUAL[-num.idx] } else { diag(MLIST$theta) <- RESIDUAL } # move ov.y.dummy 'RESIDUAL' elements from THETA to PSI if(length(ov.y.dummy.ov.idx) > 0L) { MLIST$psi[cbind(ov.y.dummy.lv.idx, ov.y.dummy.lv.idx)] <- MLIST$theta[cbind(ov.y.dummy.ov.idx, ov.y.dummy.ov.idx)] MLIST$theta[cbind(ov.y.dummy.ov.idx, ov.y.dummy.ov.idx)] <- 0.0 } MLIST } # if THETA parameterization, compute delta elements # of observed categorical variables, as a function of other model parameters setDeltaElements.LISREL <- function(MLIST=NULL, num.idx=NULL) { Sigma.hat <- computeSigmaHat.LISREL(MLIST = MLIST, delta=FALSE) diag.Sigma <- diag(Sigma.hat) # (1/delta^2) = diag( LAMBDA (I-B)^-1 PSI (I-B)^-T t(LAMBDA) ) + THETA #tmp <- diag.Sigma + THETA tmp <- diag.Sigma tmp[tmp < 0] <- as.numeric(NA) MLIST$delta[, 1L] <- sqrt(1/tmp) # numeric delta's stay 1.0 if(length(num.idx) > 0L) { MLIST$delta[num.idx] <- 1.0 } MLIST } # compute Sigma/ETA: variances/covariances of BOTH observed and latent variables computeCOV.LISREL <- function(MLIST=NULL, cov.x=NULL, delta=TRUE) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA) PSI <- MLIST$psi; nlat <- nrow(PSI) THETA <- MLIST$theta BETA <- MLIST$beta # 'extend' matrices LAMBDA2 <- rbind(LAMBDA, diag(nlat)) THETA2 <- lav_matrix_bdiag(THETA, matrix(0,nlat,nlat)) # beta? if(is.null(BETA)) { LAMBDA..IB.inv <- LAMBDA2 } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) LAMBDA..IB.inv <- LAMBDA2 %*% IB.inv } # compute augment COV matrix COV <- tcrossprod(LAMBDA..IB.inv %*% PSI, LAMBDA..IB.inv) + THETA2 # if delta, scale if(delta && !is.null(MLIST$delta)) { DELTA <- diag(MLIST$delta[,1L], nrow=nvar, ncol=nvar) COV[seq_len(nvar),seq_len(nvar)] <- DELTA %*% COV[seq_len(nvar),seq_len(nvar)] %*% DELTA } # if GAMMA, also x part GAMMA <- MLIST$gamma if(!is.null(GAMMA)) { stopifnot(!is.null(cov.x)) if(is.null(BETA)) { SX <- tcrossprod(GAMMA %*% cov.x, GAMMA) } else { IB.inv..GAMMA <- IB.inv %*% GAMMA SX <- tcrossprod(IB.inv..GAMMA %*% cov.x, IB.inv..GAMMA) } COV[(nvar+1):(nvar+nlat),(nvar+1):(nvar+nlat)] <- COV[(nvar+1):(nvar+nlat),(nvar+1):(nvar+nlat)] + SX } COV } # derivative of the objective function derivative.F.LISREL <- function(MLIST=NULL, Omega=NULL, Omega.mu=NULL) { LAMBDA <- MLIST$lambda PSI <- MLIST$psi BETA <- MLIST$beta ALPHA <- MLIST$alpha # beta? if(is.null(BETA)) { LAMBDA..IB.inv <- LAMBDA } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) LAMBDA..IB.inv <- LAMBDA %*% IB.inv } # meanstructure? meanstructure <- FALSE; if(!is.null(Omega.mu)) meanstructure <- TRUE # group weight? group.w.free <- FALSE; if(!is.null(MLIST$gw)) group.w.free <- TRUE # pre-compute some values tLAMBDA..IB.inv <- t(LAMBDA..IB.inv) if(!is.null(BETA)) { Omega..LAMBDA..IB.inv..PSI..tIB.inv <- ( Omega %*% LAMBDA..IB.inv %*% PSI %*% t(IB.inv) ) } else { Omega..LAMBDA <- Omega %*% LAMBDA } # 1. LAMBDA if(!is.null(BETA)) { if(meanstructure) { LAMBDA.deriv <- -1.0 * ( Omega.mu %*% t(ALPHA) %*% t(IB.inv) + Omega..LAMBDA..IB.inv..PSI..tIB.inv ) } else { LAMBDA.deriv <- -1.0 * Omega..LAMBDA..IB.inv..PSI..tIB.inv } } else { # no BETA if(meanstructure) { LAMBDA.deriv <- -1.0 * ( Omega.mu %*% t(ALPHA) + Omega..LAMBDA %*% PSI ) } else { LAMBDA.deriv <- -1.0 * (Omega..LAMBDA %*% PSI) } } # 2. BETA if(!is.null(BETA)) { if(meanstructure) { BETA.deriv <- -1.0*(( t(IB.inv) %*% (t(LAMBDA) %*% Omega.mu %*% t(ALPHA)) %*% t(IB.inv)) + (tLAMBDA..IB.inv %*% Omega..LAMBDA..IB.inv..PSI..tIB.inv)) } else { BETA.deriv <- -1.0 * ( tLAMBDA..IB.inv %*% Omega..LAMBDA..IB.inv..PSI..tIB.inv ) } } else { BETA.deriv <- NULL } # 3. PSI PSI.deriv <- -1.0 * ( tLAMBDA..IB.inv %*% Omega %*% LAMBDA..IB.inv ) diag(PSI.deriv) <- 0.5 * diag(PSI.deriv) # 4. THETA THETA.deriv <- -1.0 * Omega diag(THETA.deriv) <- 0.5 * diag(THETA.deriv) if(meanstructure) { # 5. NU NU.deriv <- -1.0 * Omega.mu # 6. ALPHA ALPHA.deriv <- -1.0 * t( t(Omega.mu) %*% LAMBDA..IB.inv ) } else { NU.deriv <- NULL ALPHA.deriv <- NULL } if(group.w.free) { GROUP.W.deriv <- 0.0 } else { GROUP.W.deriv <- NULL } list(lambda = LAMBDA.deriv, beta = BETA.deriv, theta = THETA.deriv, psi = PSI.deriv, nu = NU.deriv, alpha = ALPHA.deriv, gw = GROUP.W.deriv) } # dSigma/dx -- per model matrix # note: # we avoid using the duplication and elimination matrices # for now (perhaps until we'll use the Matrix package) derivative.sigma.LISREL_OLD <- function(m="lambda", # all model matrix elements, or only a few? # NOTE: for symmetric matrices, # we assume that the have full size # (nvar*nvar) (but already correct for # symmetry) idx=seq_len(length(MLIST[[m]])), MLIST=NULL, delta = TRUE) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA); nfac <- ncol(LAMBDA) PSI <- MLIST$psi # only lower.tri part of sigma (not same order as elimination matrix?) v.idx <- lav_matrix_vech_idx( nvar ); pstar <- nvar*(nvar+1)/2 # shortcut for gamma, nu, alpha and tau: empty matrix if(m == "nu" || m == "alpha" || m == "tau" || m == "gamma" || m == "gw") { return( matrix(0.0, nrow=pstar, ncol=length(idx)) ) } # Delta? delta.flag <- FALSE if(delta && !is.null(MLIST$delta)) { DELTA <- MLIST$delta delta.flag <- TRUE } else if(m == "delta") { # modindices? return( matrix(0.0, nrow=pstar, ncol=length(idx)) ) } # beta? if(!is.null(MLIST$ibeta.inv)) { IB.inv <- MLIST$ibeta.inv } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) } # pre if(m == "lambda" || m == "beta") IK <- diag(nvar*nvar) + lav_matrix_commutation(nvar, nvar) if(m == "lambda" || m == "beta") { IB.inv..PSI..tIB.inv..tLAMBDA <- IB.inv %*% PSI %*% t(IB.inv) %*% t(LAMBDA) } if(m == "beta" || m == "psi") { LAMBDA..IB.inv <- LAMBDA %*% IB.inv } # here we go: if(m == "lambda") { DX <- IK %*% t(IB.inv..PSI..tIB.inv..tLAMBDA %x% diag(nvar)) if(delta.flag) DX <- DX * as.vector(DELTA %x% DELTA) } else if(m == "beta") { DX <- IK %*% ( t(IB.inv..PSI..tIB.inv..tLAMBDA) %x% LAMBDA..IB.inv ) # this is not really needed (because we select idx=m.el.idx) # but just in case we need all elements of beta... DX[,lav_matrix_diag_idx(nfac)] <- 0.0 if(delta.flag) DX <- DX * as.vector(DELTA %x% DELTA) } else if(m == "psi") { DX <- (LAMBDA..IB.inv %x% LAMBDA..IB.inv) # symmetry correction, but keeping all duplicated elements # since we depend on idx=m.el.idx # otherwise, we could simply postmultiply with the duplicationMatrix # we sum up lower.tri + upper.tri (but not the diagonal elements!) #imatrix <- matrix(1:nfac^2,nfac,nfac) #lower.idx <- imatrix[lower.tri(imatrix, diag=FALSE)] #upper.idx <- imatrix[upper.tri(imatrix, diag=FALSE)] lower.idx <- lav_matrix_vech_idx(nfac, diagonal = FALSE) upper.idx <- lav_matrix_vechru_idx(nfac, diagonal = FALSE) # NOTE YR: upper.idx (see 3 lines up) is wrong in MH patch! # fixed again 13/06/2012 after bug report of Mijke Rhemtulla. offdiagSum <- DX[,lower.idx] + DX[,upper.idx] DX[,c(lower.idx, upper.idx)] <- cbind(offdiagSum, offdiagSum) if(delta.flag) DX <- DX * as.vector(DELTA %x% DELTA) } else if(m == "theta") { DX <- diag(nvar*nvar) # very sparse... # symmetry correction not needed, since all off-diagonal elements # are zero? if(delta.flag) DX <- DX * as.vector(DELTA %x% DELTA) } else if(m == "delta") { Omega <- computeSigmaHat.LISREL(MLIST, delta=FALSE) DD <- diag(DELTA[,1], nvar, nvar) DD.Omega <- (DD %*% Omega) A <- DD.Omega %x% diag(nvar); B <- diag(nvar) %x% DD.Omega DX <- A[,lav_matrix_diag_idx(nvar),drop=FALSE] + B[,lav_matrix_diag_idx(nvar),drop=FALSE] } else { stop("wrong model matrix names: ", m, "\n") } DX <- DX[v.idx, idx, drop=FALSE] DX } # dSigma/dx -- per model matrix derivative.sigma.LISREL <- function(m = "lambda", # all model matrix elements, or only a few? # NOTE: for symmetric matrices, # we assume that the have full size # (nvar*nvar) (but already correct for # symmetry) idx = seq_len(length(MLIST[[m]])), MLIST = NULL, vech = TRUE, delta = TRUE) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA); nfac <- ncol(LAMBDA) PSI <- MLIST$psi # only lower.tri part of sigma (not same order as elimination matrix?) v.idx <- lav_matrix_vech_idx( nvar ); pstar <- nvar*(nvar+1)/2 # shortcut for gamma, nu, alpha and tau: empty matrix if(m == "nu" || m == "alpha" || m == "tau" || m == "gamma" || m == "gw") { return( matrix(0.0, nrow=pstar, ncol=length(idx)) ) } # Delta? delta.flag <- FALSE if(delta && !is.null(MLIST$delta)) { DELTA <- MLIST$delta delta.flag <- TRUE } else if(m == "delta") { # modindices? return( matrix(0.0, nrow=pstar, ncol=length(idx)) ) } # beta? if(!is.null(MLIST$ibeta.inv)) { IB.inv <- MLIST$ibeta.inv } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) } # pre #if(m == "lambda" || m == "beta") # IK <- diag(nvar*nvar) + lav_matrix_commutation(nvar, nvar) if(m == "lambda" || m == "beta") { L1 <- LAMBDA %*% IB.inv %*% PSI %*% t(IB.inv) } if(m == "beta" || m == "psi") { LAMBDA..IB.inv <- LAMBDA %*% IB.inv } # here we go: if(m == "lambda") { KOL.idx <- matrix(1:(nvar*nfac), nvar, nfac, byrow = TRUE)[idx] DX <- (L1 %x% diag(nvar))[,idx, drop = FALSE] + (diag(nvar) %x% L1)[,KOL.idx, drop = FALSE] } else if(m == "beta") { KOL.idx <- matrix(1:(nfac*nfac), nfac, nfac, byrow = TRUE)[idx] DX <- (L1 %x% LAMBDA..IB.inv)[,idx, drop = FALSE] + (LAMBDA..IB.inv %x% L1)[, KOL.idx, drop = FALSE] # this is not really needed (because we select idx=m.el.idx) # but just in case we need all elements of beta... DX[, which(idx %in% lav_matrix_diag_idx(nfac))] <- 0.0 } else if(m == "psi") { DX <- (LAMBDA..IB.inv %x% LAMBDA..IB.inv) # symmetry correction, but keeping all duplicated elements # since we depend on idx=m.el.idx lower.idx <- lav_matrix_vech_idx(nfac, diagonal = FALSE) upper.idx <- lav_matrix_vechru_idx(nfac, diagonal = FALSE) offdiagSum <- DX[,lower.idx] + DX[,upper.idx] DX[,c(lower.idx, upper.idx)] <- cbind(offdiagSum, offdiagSum) DX <- DX[,idx, drop = FALSE] } else if(m == "theta") { #DX <- diag(nvar*nvar) # very sparse... DX <- matrix(0, nvar*nvar, length(idx)) DX[cbind(idx,seq_along(idx))] <- 1 # symmetry correction not needed, since all off-diagonal elements # are zero? } else if(m == "delta") { Omega <- computeSigmaHat.LISREL(MLIST, delta=FALSE) DD <- diag(DELTA[,1], nvar, nvar) DD.Omega <- (DD %*% Omega) A <- DD.Omega %x% diag(nvar); B <- diag(nvar) %x% DD.Omega DX <- A[,lav_matrix_diag_idx(nvar),drop=FALSE] + B[,lav_matrix_diag_idx(nvar),drop=FALSE] DX <- DX[,idx, drop = FALSE] } else { stop("wrong model matrix names: ", m, "\n") } if(delta.flag && !m == "delta") { DX <- DX * as.vector(DELTA %x% DELTA) } # vech? if(vech) { DX <- DX[v.idx,, drop=FALSE] } DX } # dMu/dx -- per model matrix derivative.mu.LISREL <- function(m="alpha", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA); nfac <- ncol(LAMBDA) # shortcut for empty matrices if(m == "gamma" || m == "psi" || m == "theta" || m == "tau" || m == "delta"|| m == "gw") { return( matrix(0.0, nrow=nvar, ncol=length(idx) ) ) } # missing alpha if(is.null(MLIST$alpha)) ALPHA <- matrix(0, nfac, 1L) else ALPHA <- MLIST$alpha # beta? if(!is.null(MLIST$ibeta.inv)) { IB.inv <- MLIST$ibeta.inv } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) } if(m == "nu") { DX <- diag(nvar) } else if(m == "lambda") { DX <- t(IB.inv %*% ALPHA) %x% diag(nvar) } else if(m == "beta") { DX <- t(IB.inv %*% ALPHA) %x% (LAMBDA %*% IB.inv) # this is not really needed (because we select idx=m.el.idx) DX[,lav_matrix_diag_idx(nfac)] <- 0.0 } else if(m == "alpha") { DX <- LAMBDA %*% IB.inv } else { stop("wrong model matrix names: ", m, "\n") } DX <- DX[, idx, drop=FALSE] DX } # dTh/dx -- per model matrix derivative.th.LISREL <- function(m="tau", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), th.idx=NULL, MLIST=NULL, delta = TRUE) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA); nfac <- ncol(LAMBDA) TAU <- MLIST$tau; nth <- nrow(TAU) # missing alpha if(is.null(MLIST$alpha)) { ALPHA <- matrix(0, nfac, 1L) } else { ALPHA <- MLIST$alpha } # missing nu if(is.null(MLIST$nu)) { NU <- matrix(0, nvar, 1L) } else { NU <- MLIST$nu } # Delta? delta.flag <- FALSE if(delta && !is.null(MLIST$delta)) { DELTA <- MLIST$delta delta.flag <- TRUE } if(is.null(th.idx)) { th.idx <- seq_len(nth) nlev <- rep(1L, nvar) K_nu <- diag(nvar) } else { nlev <- tabulate(th.idx, nbins=nvar); nlev[nlev == 0L] <- 1L K_nu <- matrix(0, sum(nlev), nvar) K_nu[ cbind(seq_len(sum(nlev)), rep(seq_len(nvar), times=nlev)) ] <- 1.0 } # shortcut for empty matrices if(m == "gamma" || m == "psi" || m == "theta" || m == "gw") { return( matrix(0.0, nrow=length(th.idx), ncol=length(idx) ) ) } # beta? if(!is.null(MLIST$ibeta.inv)) { IB.inv <- MLIST$ibeta.inv } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) } if(m == "tau") { DX <- matrix(0, nrow=length(th.idx), ncol=nth) DX[ th.idx > 0L, ] <- diag(nth) if(delta.flag) DX <- DX * as.vector(K_nu %*% DELTA) } else if(m == "nu") { DX <- (-1) * K_nu if(delta.flag) DX <- DX * as.vector(K_nu %*% DELTA) } else if(m == "lambda") { DX <- (-1) * t(IB.inv %*% ALPHA) %x% diag(nvar) DX <- K_nu %*% DX if(delta.flag) DX <- DX * as.vector(K_nu %*% DELTA) } else if(m == "beta") { DX <- (-1) * t(IB.inv %*% ALPHA) %x% (LAMBDA %*% IB.inv) # this is not really needed (because we select idx=m.el.idx) DX[,lav_matrix_diag_idx(nfac)] <- 0.0 DX <- K_nu %*% DX if(delta.flag) DX <- DX * as.vector(K_nu %*% DELTA) } else if(m == "alpha") { DX <- (-1) * LAMBDA %*% IB.inv DX <- K_nu %*% DX if(delta.flag) DX <- DX * as.vector(K_nu %*% DELTA) } else if(m == "delta") { DX1 <- matrix(0, nrow=length(th.idx), ncol=1) DX1[ th.idx > 0L, ] <- TAU DX2 <- NU + LAMBDA %*% IB.inv %*% ALPHA DX2 <- K_nu %*% DX2 DX <- K_nu * as.vector(DX1 - DX2) } else { stop("wrong model matrix names: ", m, "\n") } DX <- DX[, idx, drop=FALSE] DX } # dPi/dx -- per model matrix derivative.pi.LISREL <- function(m="lambda", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { LAMBDA <- MLIST$lambda; nvar <- nrow(LAMBDA); nfac <- ncol(LAMBDA) GAMMA <- MLIST$gamma; nexo <- ncol(GAMMA) # Delta? delta.flag <- FALSE if(!is.null(MLIST$delta)) { DELTA.diag <- MLIST$delta[,1L] delta.flag <- TRUE } # shortcut for empty matrices if(m == "tau" || m == "nu" || m == "alpha" || m == "psi" || m == "theta" || m == "gw") { return( matrix(0.0, nrow=nvar*nexo, ncol=length(idx) ) ) } # beta? if(!is.null(MLIST$ibeta.inv)) { IB.inv <- MLIST$ibeta.inv } else { IB.inv <- .internal_get_IB.inv(MLIST = MLIST) } if(m == "lambda") { DX <- t(IB.inv %*% GAMMA) %x% diag(nvar) if(delta.flag) DX <- DX * DELTA.diag } else if(m == "beta") { DX <- t(IB.inv %*% GAMMA) %x% (LAMBDA %*% IB.inv) # this is not really needed (because we select idx=m.el.idx) DX[,lav_matrix_diag_idx(nfac)] <- 0.0 if(delta.flag) DX <- DX * DELTA.diag } else if(m == "gamma") { DX <- diag(nexo) %x% (LAMBDA %*% IB.inv) if(delta.flag) DX <- DX * DELTA.diag } else if(m == "delta") { PRE <- rep(1, nexo) %x% diag(nvar) DX <- PRE * as.vector(LAMBDA %*% IB.inv %*% GAMMA) } else { stop("wrong model matrix names: ", m, "\n") } DX <- DX[, idx, drop=FALSE] DX } # dGW/dx -- per model matrix derivative.gw.LISREL <- function(m="gw", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { # shortcut for empty matrices if(m != "gw") { return( matrix(0.0, nrow=1L, ncol=length(idx) ) ) } else { # m == "gw" DX <- matrix(1.0, 1, 1) } DX <- DX[, idx, drop=FALSE] DX } # dlambda/dx -- per model matrix derivative.lambda.LISREL <- function(m="lambda", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { LAMBDA <- MLIST$lambda # shortcut for empty matrices if(m != "lambda") { return( matrix(0.0, nrow=length(LAMBDA), ncol=length(idx) ) ) } else { # m == "lambda" DX <- diag(1, nrow=length(LAMBDA), ncol=length(LAMBDA)) } DX <- DX[, idx, drop=FALSE] DX } # dpsi/dx -- per model matrix - FIXME!!!!! derivative.psi.LISREL <- function(m="psi", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { PSI <- MLIST$psi; nfac <- nrow(PSI) v.idx <- lav_matrix_vech_idx( nfac ) # shortcut for empty matrices if(m != "psi") { DX <- matrix(0.0, nrow=length(PSI), ncol=length(idx)) return(DX[v.idx,,drop=FALSE]) } else { # m == "psi" DX <- diag(1, nrow=length(PSI), ncol=length(PSI)) } DX <- DX[v.idx, idx, drop=FALSE] DX } # dtheta/dx -- per model matrix derivative.theta.LISREL <- function(m="theta", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { THETA <- MLIST$theta; nvar <- nrow(THETA) v.idx <- lav_matrix_vech_idx(nvar) # shortcut for empty matrices if(m != "theta") { DX <- matrix(0.0, nrow=length(THETA), ncol=length(idx)) return(DX[v.idx,,drop=FALSE]) } else { # m == "theta" DX <- diag(1, nrow=length(THETA), ncol=length(THETA)) } DX <- DX[v.idx, idx, drop=FALSE] DX } # dbeta/dx -- per model matrix derivative.beta.LISREL <- function(m="beta", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { BETA <- MLIST$beta # shortcut for empty matrices if(m != "beta") { return( matrix(0.0, nrow=length(BETA), ncol=length(idx)) ) } else { # m == "beta" DX <- diag(1, nrow=length(BETA), ncol=length(BETA)) } DX <- DX[, idx, drop=FALSE] DX } # dgamma/dx -- per model matrix derivative.gamma.LISREL <- function(m="gamma", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { GAMMA <- MLIST$gamma # shortcut for empty matrices if(m != "gamma") { return( matrix(0.0, nrow=length(GAMMA), ncol=length(idx)) ) } else { # m == "gamma" DX <- diag(1, nrow=length(GAMMA), ncol=length(GAMMA)) } DX <- DX[, idx, drop=FALSE] DX } # dnu/dx -- per model matrix derivative.nu.LISREL <- function(m="nu", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { NU <- MLIST$nu # shortcut for empty matrices if(m != "nu") { return( matrix(0.0, nrow=length(NU), ncol=length(idx)) ) } else { # m == "nu" DX <- diag(1, nrow=length(NU), ncol=length(NU)) } DX <- DX[, idx, drop=FALSE] DX } # dtau/dx -- per model matrix derivative.tau.LISREL <- function(m="tau", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { TAU <- MLIST$tau # shortcut for empty matrices if(m != "tau") { return( matrix(0.0, nrow=length(TAU), ncol=length(idx)) ) } else { # m == "tau" DX <- diag(1, nrow=length(TAU), ncol=length(TAU)) } DX <- DX[, idx, drop=FALSE] DX } # dalpha/dx -- per model matrix derivative.alpha.LISREL <- function(m="alpha", # all model matrix elements, or only a few? idx=seq_len(length(MLIST[[m]])), MLIST=NULL) { ALPHA <- MLIST$alpha # shortcut for empty matrices if(m != "alpha") { return( matrix(0.0, nrow=length(ALPHA), ncol=length(idx)) ) } else { # m == "alpha" DX <- diag(1, nrow=length(ALPHA), ncol=length(ALPHA)) } DX <- DX[, idx, drop=FALSE] DX } # MLIST = NULL; meanstructure=TRUE; th=TRUE; delta=TRUE; pi=TRUE; gw=FALSE # lav_matrix_vech_idx <- lavaan:::lav_matrix_vech_idx; lav_matrix_vechru_idx <- lavaan:::lav_matrix_vechru_idx # vec <- lavaan:::vec; lav_func_jacobian_complex <- lavaan:::lav_func_jacobian_complex # computeSigmaHat.LISREL <- lavaan:::computeSigmaHat.LISREL # setDeltaElements.LISREL <- lavaan:::setDeltaElements.LISREL TESTING_derivatives.LISREL <- function(MLIST = NULL, nvar = NULL, nfac = NULL, nexo = NULL, th.idx = NULL, num.idx = NULL, meanstructure = TRUE, th = TRUE, delta = TRUE, pi = TRUE, gw = FALSE, theta = FALSE, debug = FALSE) { if(is.null(MLIST)) { # create artificial matrices, compare 'numerical' vs 'analytical' # derivatives #nvar <- 12; nfac <- 3; nexo <- 4 # this combination is special? if(is.null(nvar)) { nvar <- 20 } if(is.null(nfac)) { nfac <- 6 } if(is.null(nexo)) { nexo <- 5 } if(is.null(num.idx)) { num.idx <- sort(sample(seq_len(nvar), ceiling(nvar/2))) } if(is.null(th.idx)) { th.idx <- integer(0L) for(i in seq_len(nvar)) { if(i %in% num.idx) { th.idx <- c(th.idx, 0) } else { th.idx <- c(th.idx, rep(i, sample(c(1,1,2,6), 1L))) } } } nth <- sum(th.idx > 0L) MLIST <- list() MLIST$lambda <- matrix(0,nvar,nfac) MLIST$beta <- matrix(0,nfac,nfac) MLIST$theta <- matrix(0,nvar,nvar) MLIST$psi <- matrix(0,nfac,nfac) if(meanstructure) { MLIST$alpha <- matrix(0,nfac,1L) MLIST$nu <- matrix(0,nvar,1L) } if(th) MLIST$tau <- matrix(0,nth,1L) if(delta) MLIST$delta <- matrix(0,nvar,1L) MLIST$gamma <- matrix(0,nfac,nexo) if(gw) MLIST$gw <- matrix(0, 1L, 1L) # feed random numbers MLIST <- lapply(MLIST, function(x) {x[,] <- rnorm(length(x)); x}) # fix diag(MLIST$beta) <- 0.0 diag(MLIST$theta) <- diag(MLIST$theta)*diag(MLIST$theta) * 10 diag(MLIST$psi) <- diag(MLIST$psi)*diag(MLIST$psi) * 10 MLIST$psi[ lav_matrix_vechru_idx(nfac) ] <- MLIST$psi[ lav_matrix_vech_idx(nfac) ] MLIST$theta[ lav_matrix_vechru_idx(nvar) ] <- MLIST$theta[ lav_matrix_vech_idx(nvar) ] if(delta) MLIST$delta[,] <- abs(MLIST$delta)*10 } else { nvar <- nrow(MLIST$lambda) } compute.sigma <- function(x, mm="lambda", MLIST=NULL) { mlist <- MLIST if(mm %in% c("psi", "theta")) { mlist[[mm]] <- lav_matrix_vech_reverse(x) } else { mlist[[mm]][,] <- x } if(theta) { mlist <- setDeltaElements.LISREL(MLIST = mlist, num.idx = num.idx) } lav_matrix_vech(computeSigmaHat.LISREL(mlist)) } compute.mu <- function(x, mm="lambda", MLIST=NULL) { mlist <- MLIST if(mm %in% c("psi", "theta")) { mlist[[mm]] <- lav_matrix_vech_reverse(x) } else { mlist[[mm]][,] <- x } if(theta) { mlist <- setDeltaElements.LISREL(MLIST = mlist, num.idx = num.idx) } computeMuHat.LISREL(mlist) } compute.th2 <- function(x, mm="tau", MLIST=NULL, th.idx) { mlist <- MLIST if(mm %in% c("psi", "theta")) { mlist[[mm]] <- lav_matrix_vech_reverse(x) } else { mlist[[mm]][,] <- x } if(theta) { mlist <- setDeltaElements.LISREL(MLIST = mlist, num.idx = num.idx) } computeTH.LISREL(mlist, th.idx=th.idx) } compute.pi <- function(x, mm="lambda", MLIST=NULL) { mlist <- MLIST if(mm %in% c("psi", "theta")) { mlist[[mm]] <- lav_matrix_vech_reverse(x) } else { mlist[[mm]][,] <- x } if(theta) { mlist <- setDeltaElements.LISREL(MLIST = mlist, num.idx = num.idx) } computePI.LISREL(mlist) } compute.gw <- function(x, mm="gw", MLIST=NULL) { mlist <- MLIST if(mm %in% c("psi", "theta")) { mlist[[mm]] <- lav_matrix_vech_reverse(x) } else { mlist[[mm]][,] <- x } if(theta) { mlist <- setDeltaElements.LISREL(MLIST = mlist, num.idx = num.idx) } mlist$gw[1,1] } # if theta, set MLIST$delta if(theta) { MLIST <- setDeltaElements.LISREL(MLIST = MLIST, num.idx = num.idx) } for(mm in names(MLIST)) { if(mm %in% c("psi", "theta")) { x <- lav_matrix_vech(MLIST[[mm]]) } else { x <- lav_matrix_vec(MLIST[[mm]]) } if(mm == "delta" && theta) next if(debug) { cat("### mm = ", mm, "\n") } # 1. sigma DX1 <- lav_func_jacobian_complex(func=compute.sigma, x=x, mm=mm, MLIST=MLIST) DX2 <- derivative.sigma.LISREL(m=mm, idx=seq_len(length(MLIST[[mm]])), MLIST=MLIST, delta = !theta) if(mm %in% c("psi","theta")) { # remove duplicated columns of symmetric matrices idx <- lav_matrix_vechru_idx(sqrt(ncol(DX2)), diagonal=FALSE) if(length(idx) > 0L) DX2 <- DX2[,-idx] } if(theta) { sigma.hat <- computeSigmaHat.LISREL(MLIST=MLIST, delta=FALSE) R <- lav_deriv_cov2cor(sigma.hat, num.idx = num.idx) DX3 <- DX2 DX2 <- R %*% DX2 } if(debug) { cat("[SIGMA] mm = ", sprintf("%-8s:", mm), "DX1 (numerical):\n"); print(zapsmall(DX1)); cat("\n") cat("[SIGMA] mm = ", sprintf("%-8s:", mm), "DX2 (analytical):\n"); print(DX2); cat("\n") if(theta) { cat("[SIGMA] mm = ", sprintf("%-8s:", mm), "DX3 (analytical):\n"); print(DX3); cat("\n") } } cat("[SIGMA] mm = ", sprintf("%-8s:", mm), "sum delta = ", sprintf("%12.9f", sum(DX1-DX2)), " max delta = ", sprintf("%12.9f", max(DX1-DX2)), "\n") # 2. mu DX1 <- lav_func_jacobian_complex(func=compute.mu, x=x, mm=mm, MLIST=MLIST) DX2 <- derivative.mu.LISREL(m=mm, idx=seq_len(length(MLIST[[mm]])), MLIST=MLIST) if(mm %in% c("psi","theta")) { # remove duplicated columns of symmetric matrices idx <- lav_matrix_vechru_idx(sqrt(ncol(DX2)), diagonal = FALSE) if(length(idx) > 0L) DX2 <- DX2[,-idx] } cat("[MU ] mm = ", sprintf("%-8s:", mm), "sum delta = ", sprintf("%12.9f", sum(DX1-DX2)), " max delta = ", sprintf("%12.9f", max(DX1-DX2)), "\n") if(debug) { cat("[MU ] mm = ", sprintf("%-8s:", mm), "DX1 (numerical):\n"); print(zapsmall(DX1)); cat("\n") cat("[MU ] mm = ", sprintf("%-8s:", mm), "DX2 (analytical):\n"); print(DX2); cat("\n") } # 3. th if(th) { DX1 <- lav_func_jacobian_complex(func=compute.th2, x=x, mm=mm, MLIST=MLIST, th.idx=th.idx) DX2 <- derivative.th.LISREL(m=mm, idx=seq_len(length(MLIST[[mm]])), MLIST=MLIST, th.idx=th.idx, delta=TRUE) if(theta) { # 1. compute dDelta.dx dxSigma <- derivative.sigma.LISREL(m=mm, idx=seq_len(length(MLIST[[mm]])), MLIST=MLIST, delta = !theta) var.idx <- which(!lav_matrix_vech_idx(nvar) %in% lav_matrix_vech_idx(nvar, diagonal = FALSE)) sigma.hat <- computeSigmaHat.LISREL(MLIST=MLIST, delta=FALSE) dsigma <- diag(sigma.hat) # dy/ddsigma = -0.5/(ddsigma*sqrt(ddsigma)) dDelta.dx <- dxSigma[var.idx,] * -0.5 / (dsigma*sqrt(dsigma)) # 2. compute dth.dDelta dth.dDelta <- derivative.th.LISREL(m="delta", idx=seq_len(length(MLIST[["delta"]])), MLIST=MLIST, th.idx=th.idx) # 3. add dth.dDelta %*% dDelta.dx no.num.idx <- which(th.idx > 0) DX2[no.num.idx,] <- DX2[no.num.idx,,drop=FALSE] + (dth.dDelta %*% dDelta.dx)[no.num.idx,,drop=FALSE] #DX2 <- DX2 + dth.dDelta %*% dDelta.dx } if(mm %in% c("psi","theta")) { # remove duplicated columns of symmetric matrices idx <- lav_matrix_vechru_idx(sqrt(ncol(DX2)), diagonal = FALSE) if(length(idx) > 0L) DX2 <- DX2[,-idx] } cat("[TH ] mm = ", sprintf("%-8s:", mm), "sum delta = ", sprintf("%12.9f", sum(DX1-DX2)), " max delta = ", sprintf("%12.9f", max(DX1-DX2)), "\n") if(debug) { cat("[TH ] mm = ",sprintf("%-8s:", mm),"DX1 (numerical):\n") print(zapsmall(DX1)); cat("\n") cat("[TH ] mm = ",sprintf("%-8s:", mm),"DX2 (analytical):\n") print(DX2); cat("\n") } } # 4. pi if(pi) { DX1 <- lav_func_jacobian_complex(func=compute.pi, x=x, mm=mm, MLIST=MLIST) DX2 <- derivative.pi.LISREL(m=mm, idx=seq_len(length(MLIST[[mm]])), MLIST=MLIST) if(mm %in% c("psi","theta")) { # remove duplicated columns of symmetric matrices idx <- lav_matrix_vechru_idx(sqrt(ncol(DX2)), diagonal = FALSE) if(length(idx) > 0L) DX2 <- DX2[,-idx] } if(theta) { # 1. compute dDelta.dx dxSigma <- derivative.sigma.LISREL(m=mm, idx=seq_len(length(MLIST[[mm]])), MLIST=MLIST, delta = !theta) if(mm %in% c("psi","theta")) { # remove duplicated columns of symmetric matrices idx <- lav_matrix_vechru_idx(sqrt(ncol(dxSigma)), diagonal = FALSE) if(length(idx) > 0L) dxSigma <- dxSigma[,-idx] } var.idx <- which(!lav_matrix_vech_idx(nvar) %in% lav_matrix_vech_idx(nvar, diagonal = FALSE)) sigma.hat <- computeSigmaHat.LISREL(MLIST=MLIST, delta=FALSE) dsigma <- diag(sigma.hat) # dy/ddsigma = -0.5/(ddsigma*sqrt(ddsigma)) dDelta.dx <- dxSigma[var.idx,] * -0.5 / (dsigma*sqrt(dsigma)) # 2. compute dpi.dDelta dpi.dDelta <- derivative.pi.LISREL(m="delta", idx=seq_len(length(MLIST[["delta"]])), MLIST=MLIST) # 3. add dpi.dDelta %*% dDelta.dx no.num.idx <- which(! seq.int(1L, nvar) %in% num.idx ) no.num.idx <- rep(seq.int(0,nexo-1) * nvar, each=length(no.num.idx)) + no.num.idx DX2[no.num.idx,] <- DX2[no.num.idx,,drop=FALSE] + (dpi.dDelta %*% dDelta.dx)[no.num.idx,,drop=FALSE] } cat("[PI ] mm = ", sprintf("%-8s:", mm), "sum delta = ", sprintf("%12.9f", sum(DX1-DX2)), " max delta = ", sprintf("%12.9f", max(DX1-DX2)), "\n") if(debug) { cat("[PI ] mm = ",sprintf("%-8s:", mm),"DX1 (numerical):\n") print(zapsmall(DX1)); cat("\n") cat("[PI ] mm = ",sprintf("%-8s:", mm),"DX2 (analytical):\n") print(DX2); cat("\n") } } # 5. gw if(gw) { DX1 <- lav_func_jacobian_complex(func=compute.gw, x=x, mm=mm, MLIST=MLIST) DX2 <- derivative.gw.LISREL(m=mm, idx=seq_len(length(MLIST[[mm]])), MLIST=MLIST) if(mm %in% c("psi","theta")) { # remove duplicated columns of symmetric matrices idx <- lav_matrix_vechru_idx(sqrt(ncol(DX2)), diagonal = FALSE) if(length(idx) > 0L) DX2 <- DX2[,-idx] } cat("[GW ] mm = ", sprintf("%-8s:", mm), "sum delta = ", sprintf("%12.9f", sum(DX1-DX2)), " max delta = ", sprintf("%12.9f", max(DX1-DX2)), "\n") if(debug) { cat("[GW ] mm = ",sprintf("%-8s:", mm),"DX1 (numerical):\n") print(DX1); cat("\n\n") cat("[GW ] mm = ",sprintf("%-8s:", mm),"DX2 (analytical):\n") print(DX2); cat("\n\n") } } } MLIST$th.idx <- th.idx MLIST$num.idx <- num.idx MLIST } lavaan/R/lav_model_wls.R0000644000176200001440000002015113052776150014674 0ustar liggesusers# compute WLS.est (as a list per group) lav_model_wls_est <- function(lavmodel = NULL, GLIST = NULL) { #, #cov.x = NULL) { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST nblocks <- lavmodel@nblocks meanstructure <- lavmodel@meanstructure categorical <- lavmodel@categorical group.w.free <- lavmodel@group.w.free fixed.x <- lavmodel@fixed.x num.idx <- lavmodel@num.idx # compute moments for all groups Sigma.hat <- computeSigmaHat(lavmodel = lavmodel, GLIST = GLIST, extra = FALSE) if(meanstructure && !categorical) { Mu.hat <- computeMuHat(lavmodel = lavmodel, GLIST = GLIST) } else if(categorical) { TH <- computeTH(lavmodel = lavmodel, GLIST = GLIST) } if(lavmodel@conditional.x && lavmodel@nexo > 0L) { PI <- computePI(lavmodel = lavmodel, GLIST = GLIST) } if(group.w.free) { GW <- computeGW(lavmodel = lavmodel, GLIST = GLIST) } WLS.est <- vector("list", length=nblocks) for(g in 1:nblocks) { # PI? if(lavmodel@conditional.x && lavmodel@nexo > 0L) { PI.g <- PI[[g]] # Sigma_yy = Sigma_yy|x + PI %*% cov.x %*% t(PI) #if(!categorical) { # cov.xg <- cov.x[[g]] # Sigma.hat[[g]] <- Sigma.hat[[g]] + PI.g %*% cov.xg %*% t(PI.g) #} } else { PI.g <- numeric(0L) } if(categorical) { # order of elements is important here: # 1. thresholds + means (interleaved) # 2. slopes (if any, columnwise per exo) # 3. variances (if any) # 4. correlations (no diagonal!) wls.est <- c(TH[[g]], lav_matrix_vec(PI.g), diag(Sigma.hat[[g]])[num.idx[[g]]], lav_matrix_vech(Sigma.hat[[g]], diagonal=FALSE)) } else if(!categorical && meanstructure) { wls.est <- c(Mu.hat[[g]], lav_matrix_vec(PI.g), lav_matrix_vech(Sigma.hat[[g]])) } else { wls.est <- c(lav_matrix_vec(PI.g), lav_matrix_vech(Sigma.hat[[g]])) } if(group.w.free) { #wls.est <- c(log(GW[[g]]/GW[[samplestats@ngroups]]), wls.est) wls.est <- c(GW[[g]], wls.est) } WLS.est[[g]] <- wls.est } WLS.est } # compute WLS.V (as a list per group) # # three options: # 1) *LS: WLS.V is already in lavsamplestats # 2) NTRLS: WLS.V needs to recomputed after every iteration, using # the structured estimates of Sigma/Mu # 3) ML: 3a: complete, structured (default) # 3b: complete, unstructured # 3c: incomplete, FIML, structured # 3d: incomplete, FIML, unstructured # 3e: incomplete, two.stage, structured # ef: incomplete, two.stage, unstructured (EM estimates) lav_model_wls_v <- function(lavmodel = NULL, lavsamplestats = NULL, structured = TRUE, lavimplied = NULL, lavdata = NULL) { WLS.V <- vector("list", length=lavsamplestats@ngroups) # 1) *LS: WLS.V is already in lavsamplestats if(lavmodel@estimator == "GLS" || lavmodel@estimator == "WLS") { # for GLS, the WLS.V22 part is: 0.5 * t(D) %*% [S.inv %x% S.inv] %*% D # for WLS, the WLS.V22 part is: Gamma WLS.V <- lavsamplestats@WLS.V } else if(lavmodel@estimator == "DWLS" || lavmodel@estimator == "ULS") { # diagonal only!! WLS.V <- lavsamplestats@WLS.VD # 2) NTRLS: based on structured estimates of Sigma/Mu } else if(lavmodel@estimator == "NTRLS") { stopifnot(!lavmodel@conditional.x) # by definition, we always use the 'structured' moments Sigma.hat <- computeSigmaHat(lavmodel = lavmodel) Mu.hat <- computeMuHat(lavmodel = lavmodel) for(g in 1:lavsamplestats@ngroups) { WLS.V[[g]] <- lav_samplestats_Gamma_inverse_NT( COV = Sigma.hat[[g]][,,drop=FALSE], MEAN = Mu.hat[[g]], x.idx = lavsamplestats@x.idx[[g]], fixed.x = lavmodel@fixed.x, conditional.x = lavmodel@conditional.x, meanstructure = lavmodel@meanstructure, slopestructure = lavmodel@conditional.x) } # 3) ML: 3a: complete, structured (default) # 3b: complete, unstructured # 3c: incomplete, FIML, structured # 3d: incomplete, FIML, unstructured # 3e: incomplete, two.stage, structured # ef: incomplete, two.stage, unstructured (EM estimates) } else if(lavmodel@estimator == "ML") { WLS.V <- vector("list", length=lavsamplestats@ngroups) if(structured) { if(lavmodel@conditional.x) { Sigma.hat <- computeSigmaHatJoint(lavmodel = lavmodel, lavsamplestats = lavsamplestats) } else { Sigma.hat <- computeSigmaHat(lavmodel = lavmodel) } if(lavmodel@meanstructure) { if(lavmodel@conditional.x) { Mu.hat <- computeMuHatJoint(lavmodel = lavmodel, lavsamplestats = lavsamplestats) } else { Mu.hat <- computeMuHat(lavmodel = lavmodel) } } else { Mu.hat <- NULL } } else { if(lavmodel@conditional.x) { # FIXME: wahat to do here? stop("lavaan ERROR: conditional.x = TRUE, but structured = FALSE?") } else { # complete data: observed var/cov matrix 'S' # two.stage + incomplete data: EM estimate of 'S' Sigma.hat <- lavsamplestats@cov } if(lavmodel@meanstructure) { if(lavmodel@conditional.x) { # FIXME! } else { # complete data: observed mean vector 'ybar' # two.stage + incomplete data: EM estimate of 'ybar' Mu.hat <- lavsamplestats@mean } } else { Mu.hat <- NULL } } # GW? if(lavmodel@group.w.free) { GW <- unlist(computeGW(lavmodel = lavmodel)) } # three options # - complete data # - incomplete data + FIML # - incomplete data + two.stage # two variants: # - using unstructured moments # - using structured moments for(g in 1:lavsamplestats@ngroups) { if(lavsamplestats@missing.flag) { stopifnot(!lavmodel@conditional.x) WLS.V[[g]] <- lav_mvnorm_missing_information_expected( Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = Mu.hat[[g]], Sigma = Sigma.hat[[g]]) } else { WLS.V[[g]] <- lav_samplestats_Gamma_inverse_NT( COV = Sigma.hat[[g]][,,drop=FALSE], MEAN = Mu.hat[[g]], x.idx = lavsamplestats@x.idx[[g]], fixed.x = lavmodel@fixed.x, conditional.x = lavmodel@conditional.x, meanstructure = lavmodel@meanstructure, slopestructure = lavmodel@conditional.x) } if(lavmodel@group.w.free) { # unweight!! a <- exp(GW[g]) / lavsamplestats@nobs[[g]] # a <- exp(GW[g]) * lavsamplestats@ntotal / lavsamplestats@nobs[[g]] WLS.V[[g]] <- lav_matrix_bdiag( matrix(a,1,1), WLS.V[[g]]) } } } # ML WLS.V } lavaan/R/lav_nlminb_constr.R0000644000176200001440000001776712465075714015606 0ustar liggesusers# constrained optimization # - references: * Nocedal & Wright (2006) Chapter 17 # * Optimization with constraints by Madsen, Nielsen & Tingleff # * original papers: Powell, 1969 and Rockafeller, 1974 # - using 'nlminb' for the unconstrained subproblem # - convergence scheme is based on the auglag function in the alabama package nlminb.constr <- function(start, objective, gradient = NULL, hessian = NULL, ..., scale = 1, control = list(), lower = -Inf, upper = Inf, ceq = NULL, ceq.jac = NULL, cin = NULL, cin.jac = NULL, control.outer = list()) { # we need a gradient stopifnot(!is.null(gradient)) # if no 'ceq' or 'cin' function, we create a dummy one if(is.null(ceq)) { ceq <- function(x, ...) { return( numeric(0) ) } } if(is.null(cin)) { cin <- function(x, ...) { return( numeric(0) ) } } # if no user-supplied jacobian functions, create them if(is.null(ceq.jac)) { if(is.null(ceq)) { ceq.jac <- function(x, ...) { matrix(0, nrow = 0L, ncol = length(x)) } } else { ceq.jac <- function(x, ...) { lav_func_jacobian_complex(func = ceq, x = x, ...) } } } if(is.null(cin.jac)) { if(is.null(cin)) { cin.jac <- function(x, ...) { matrix(0, nrow = 0L, ncol = length(x)) } } else { #require(numDeriv) cin.jac <- function(x, ...) { lav_func_jacobian_complex(func = cin, x = x, ...) } } } # how many ceq and cin constraints? nceq <- length( ceq(start) ) ncin <- length( cin(start) ) ncon <- nceq + ncin ceq.idx <- cin.idx <- integer(0) if(nceq > 0L) ceq.idx <- 1:nceq if(ncin > 0L) cin.idx <- nceq + 1:ncin cin.flag <- rep(FALSE, length(ncon)) if(ncin > 0L) cin.flag[cin.idx] <- TRUE # control outer default values control.outer.default <- list(mu0 = 100, lambda0 = 10, tol = 1e-06, # changed this in 0.4-12 itmax = 100L, verbose = FALSE) control.outer <- modifyList(control.outer.default, control.outer) # construct augmented lagrangian function auglag <- function(x, ...) { # apply constraints ceq0 <- ceq(x, ...); cin0 <- cin(x, ...); con0 <- c(ceq0, cin0) # 'release' inactive constraints if(ncin > 0L) { slack <- lambda/mu inactive.idx <- which(cin.flag & con0 > slack) con0[inactive.idx] <- slack[inactive.idx] } objective(x, ...) - sum(lambda * con0) + (mu/2) * sum(con0 * con0) } fgrad <- function(x, ...) { # apply constraints ceq0 <- ceq(x, ...); cin0 <- cin(x, ...); con0 <- c(ceq0, cin0) # jacobian JAC <- rbind(ceq.jac(x, ...), cin.jac(x, ...)) lambda.JAC <- lambda * JAC # handle inactive constraints if(ncin > 0L) { slack <- lambda/mu inactive.idx <- which(cin.flag & con0 > slack) if(length(inactive.idx) > 0L) { JAC <- JAC[-inactive.idx,,drop=FALSE] lambda.JAC <- lambda.JAC[-inactive.idx,,drop=FALSE] con0 <- con0[-inactive.idx] } } if(nrow(JAC) > 0L) { ( gradient(x, ...) - colSums(lambda.JAC) + mu * as.numeric(t(JAC) %*% con0) ) } else { gradient(x, ...) } } # initialization ceq0 <- ceq(start, ...); cin0 <- cin(start, ...); con0 <- c(ceq0, cin0) lambda <- rep(control.outer$lambda0, length(con0)) mu <- control.outer$mu0 inactive.idx <- integer(0) if(ncin > 0L) { slack <- lambda/mu inactive.idx <- which(cin.flag & con0 > slack) con0[inactive.idx] <- slack[inactive.idx] } K <- max(abs(con0)) if(control.outer$verbose) { cat("init cin0 values: ", cin0, "\n") cat("init ceq0 values: ", ceq0, "\n") cat("init slack values: ", lambda/mu, "\n") cat("init inactive idx: ", inactive.idx, "\n") cat("init con0 values: ", con0, "\n") cat("K = max con0: ", K, "\n") } r <- obj <- objective(start, ...) feval <- 0L geval <- 0L niter <- 0L ilack <- 0L Kprev <- K mu0 <- control.outer$mu0/Kprev if(is.infinite(mu0)) mu0 <- 1.0 mu <- mu0 K <- Inf x.par <- start for (i in 1:control.outer$itmax) { x.old <- x.par r.old <- r ############################################################ if(control.outer$verbose) { cat("\nStarting inner optimization [",i,"]:\n") cat("lambda: ", lambda, "\n") cat("mu: ", mu, "\n") } optim.out <- nlminb(start = x.par, objective = auglag, gradient = fgrad, control = control, scale = scale, ...) ############################################################ x.par <- optim.out$par r <- optim.out$objective feval <- feval + optim.out$evaluations[1] geval <- geval + optim.out$evaluations[2] niter <- niter + optim.out$iterations # check constraints ceq0 <- ceq(x.par, ...); cin0 <- cin(x.par, ...); con0 <- c(ceq0, cin0) if(ncin > 0L) { slack <- lambda/mu inactive.idx <- which(cin.flag & con0 > slack) con0[inactive.idx] <- slack[inactive.idx] } K <- max(abs(con0)) if(control.outer$verbose) { cat("cin0 values: ", cin0, "\n") cat("ceq0 values: ", ceq0, "\n") cat("active threshold: ", lambda/mu, "\n") cat("inactive idx: ", inactive.idx, "\n") cat("con0 values: ", con0, "\n") cat("K = max con0: ", K, " Kprev = ", Kprev, "\n") } # update K or mu (see Powell, 1969) if (K <= Kprev/4) { lambda <- lambda - (mu * con0) Kprev <- K } else { mu <- 10 * mu } # check convergence pconv <- max(abs(x.par - x.old)) if(pconv < control.outer$tol) { ilack <- ilack + 1L } else { ilack <- 0L } if( (is.finite(r) && is.finite(r.old) && abs(r - r.old) < control.outer$tol && K < control.outer$tol) | ilack >= 3 ) break } # output a <- list() if(i == control.outer$itmax) { a$convergence <- 10L a$message <- "nlminb.constr ran out of iterations and did not converge" } else if(K > control.outer$tol) { a$convergence <- 11L a$message <- "Convergence due to lack of progress in parameter updates" } else { a$convergence <- 0L a$message <- "converged" } a$par <- optim.out$par a$outer.iterations <- i a$lambda <- lambda a$mu <- mu #a$value <- objective(a$start, ...) #a$cin <- cin(a$start, ...) #a$ceq <- ceq(a$start, ...) a$evaluations <- c(feval, geval) a$iterations <- niter #a$kkt1 <- max(abs(a$fgrad)) <= 0.01 * (1 + abs(a$value)) #a$kkt2 <- any(eigen(a$hessian)$value * control.optim$objectivescale> 0) # jacobian of ceq and 'active' cin ceq0 <- ceq(a$par, ...); cin0 <- cin(a$par, ...); con0 <- c(ceq0, cin0) JAC <- rbind(ceq.jac(a$par, ...), cin.jac(a$par, ...)) inactive.idx <- integer(0L) cin.idx <- which(cin.flag) #ceq.idx <- which(!cin.flag) if(ncin > 0L) { # FIXME: slack value not too strict?? slack <- 1e-05 #cat("DEBUG:\n"); print(con0) inactive.idx <- which(cin.flag & con0 > slack) #if(length(inactive.idx) > 0L) { # JAC <- JAC[-inactive.idx,,drop=FALSE] #} } attr(JAC, "inactive.idx") <- inactive.idx attr(JAC, "cin.idx") <- cin.idx attr(JAC, "ceq.idx") <- ceq.idx a$con.jac <- JAC a } lavaan/R/xxx_lavaanList.R0000644000176200001440000003377113042722104015053 0ustar liggesusers# lavaanList: fit the *same* model, on different datasets # YR - 29 Jun 2016 # YR - 27 Jan 2017: change lavoptions; add dotdotdot to each call lavaanList <- function(model = NULL, # model dataList = NULL, # list of datasets dataFunction = NULL, # generating function dataFunction.args = list(), # optional arguments ndat = length(dataList), # how many datasets? cmd = "lavaan", ..., store.slots = c("partable"), # default is partable FUN = NULL, # arbitrary FUN show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL, iseed = NULL) { # store.slots call mc <- match.call() # check store.slots store.slots <- tolower(store.slots) if(length(store.slots) == 1L && store.slots == "all") { store.slots <- c("timing", "partable", "data", "samplestats", "vcov", "test", "optim", "implied") } # dataList or function? if(is.function(dataFunction)) { if(ndat == 0L) { stop("lavaan ERROR: please specify number of requested datasets (ndat)") } firstData <- do.call(dataFunction, args = dataFunction.args) #dataList <- vector("list", length = ndat) } else { firstData <- dataList[[1]] } # check data if(is.matrix(firstData)) { # check if we have column names? NAMES <- colnames(firstData) if(is.null(NAMES)) { stop("lavaan ERROR: data is a matrix without column names") } } else if(inherits(firstData, "data.frame")) { # check? } else { stop("lavaan ERROR: (generated) data is not a data.frame (or a matrix)") } # parallel (see boot package) if (missing(parallel)) { #parallel <- getOption("boot.parallel", "no") parallel <- "no" } parallel <- match.arg(parallel) have_mc <- have_snow <- FALSE if (parallel != "no" && ncpus > 1L) { if (parallel == "multicore") { have_mc <- .Platform$OS.type != "windows" } else if (parallel == "snow") { have_snow <- TRUE } if (!have_mc && !have_snow) { ncpus <- 1L } loadNamespace("parallel") } # dot dot dot dotdotdot <- list(...) # if 'model' is a lavaan object (perhaps from lavSimulate), no need to # call `cmd' if(inherits(model, "lavaan")) { FIT <- model } else { # adapt for FIT #dotdotdotFIT <- dotdotdot #dotdotdotFIT$do.fit <- TRUE # to get starting values #dotdotdotFIT$se <- "none" #dotdotdotFIT$test <- "none" #dotdotdotFIT$verbose <- FALSE #dotdotdotFIT$debug <- FALSE # initial model fit, using first dataset FIT <- do.call(cmd, args = c(list(model = model, data = firstData), dotdotdot) ) } lavoptions <- FIT@Options lavmodel <- FIT@Model lavpartable <- FIT@ParTable lavpta <- FIT@pta # remove start/est/se columns from lavpartable lavpartable$start <- lavpartable$est lavpartable$est <- lavpartable$se <- NULL # empty slots timingList <- ParTableList <- DataList <- SampleStatsList <- CacheList <- vcovList <- testList <- optimList <- impliedList <- funList <- list() # prepare store.slotsd slots if("timing" %in% store.slots) { timingList <- vector("list", length = ndat) } if("partable" %in% store.slots) { ParTableList <- vector("list", length = ndat) } if("data" %in% store.slots) { DataList <- vector("list", length = ndat) } if("samplestats" %in% store.slots) { SampleStatsList <- vector("list", length = ndat) } if("cache" %in% store.slots) { CacheList <- vector("list", length = ndat) } if("vcov" %in% store.slots) { vcovList <- vector("list", length = ndat) } if("test" %in% store.slots) { testList <- vector("list", length = ndat) } if("optim" %in% store.slots) { optimList <- vector("list", length = ndat) } if("implied" %in% store.slots) { impliedList <- vector("list", length = ndat) } if(!is.null(FUN)) { funList <- vector("list", length = ndat) } # single run fn <- function(i) { if(show.progress) { cat(" ... data set number:", sprintf("%4d", i)) } # get new dataset if(i == 1L) { DATA <- firstData } else if(is.function(dataFunction)) { DATA <- do.call(dataFunction, args = dataFunction.args) } else if(is.list(dataList)) { DATA <- dataList[[i]] } # adapt lavmodel for this new dataset # - starting values will be different # - ov.x variances/covariances # FIXME: can we not make the changes internally? #if(lavmodel@fixed.x && length(vnames(lavpartable, "ov.x")) > 0L) { #for(g in 1:FIT@Data@ngroups) { # #} lavmodel <- NULL #} # fit model with this (new) dataset if(cmd %in% c("lavaan", "sem", "cfa", "growth")) { lavobject <- try(do.call("lavaan", args = c(list(slotOptions = lavoptions, slotParTable = lavpartable, slotModel = lavmodel, start = FIT, data = DATA), dotdotdot)), silent = TRUE) } else if(cmd == "fsr") { # extract fs.method and fsr.method from dotdotdot if(!is.null(dotdotdot$fs.method)) { fs.method <- dotdotdot$fs.method } else { fs.method <- formals(fsr)$fs.method # default } if(!is.null(dotdotdot$fsr.method)) { fsr.method <- dotdotdot$fsr.method } else { fsr.method <- formals(fsr)$fsr.method # default } lavobject <- try(do.call("fsr", args = c(list(slotOptions = lavoptions, slotParTable = lavpartable, slotModel = lavmodel, start = FIT, data = DATA, cmd = "lavaan", fs.method = fs.method, fsr.method = fsr.method), dotdotdot)), silent = TRUE) } else { stop("lavaan ERROR: unknown cmd: ", cmd) } RES <- list(ok = FALSE, timing = NULL, ParTable = NULL, Data = NULL, SampleStats = NULL, vcov = NULL, test = NULL, optim = NULL, implied = NULL, fun = NULL) if(inherits(lavobject, "lavaan") && lavInspect(lavobject, "converged")) { RES$ok <- TRUE if(show.progress) { cat(" OK -- niter = ", sprintf("%3d", lavInspect(lavobject, "iterations")), "\n") } # extract slots from fit if("timing" %in% store.slots) { RES$timing <- lavobject@timing } if("partable" %in% store.slots) { RES$ParTable <- lavobject@ParTable } if("data" %in% store.slots) { RES$Data <- lavobject@Data } if("samplestats" %in% store.slots) { RES$SampleStats <- lavobject@vcov } if("cache" %in% store.slots) { RES$Cache <- lavobject@Cache } if("vcov" %in% store.slots) { RES$vcov <- lavobject@vcov } if("test" %in% store.slots) { RES$test <- lavobject@test } if("optim" %in% store.slots) { RES$optim <- lavobject@optim } if("implied" %in% store.slots) { RES$implied <- lavobject@implied } # custom FUN if(!is.null(FUN)) { RES$fun <- FUN(lavobject) } } else { if(show.progress) { cat(" FAILED: no convergence\n") } } RES } # the next 20 lines are based on the boot package RES <- if (ncpus > 1L && (have_mc || have_snow)) { if (have_mc) { parallel::mclapply(seq_len(ndat), fn, mc.cores = ncpus) } else if (have_snow) { list(...) # evaluate any promises if (is.null(cl)) { cl <- parallel::makePSOCKcluster(rep("localhost", ncpus)) if(RNGkind()[1L] == "L'Ecuyer-CMRG") { parallel::clusterSetRNGStream(cl, iseed = iseed) } RES <- parallel::parLapply(cl, seq_len(ndat), fn) parallel::stopCluster(cl) RES } else { parallel::parLapply(cl, seq_len(ndat), fn) } } } else { lapply(seq_len(ndat), fn) } # restructure meta <- list(ndat = ndat, ok = sapply(RES, "[[", "ok"), store.slots = store.slots) # extract store.slots slots if("timing" %in% store.slots) { timingList <- lapply(RES, "[[", "timing") } if("partable" %in% store.slots) { ParTableList <- lapply(RES, "[[", "ParTable") } if("data" %in% store.slots) { DataList <- lapply(RES, "[[", "Data") } if("samplestats" %in% store.slots) { SampleStatsList <- lapply(RES, "[[", "SampleStats") } if("cache" %in% store.slots) { CacheList <- lapply(RES, "[[", "Cache") } if("vcov" %in% store.slots) { vcovList <- lapply(RES, "[[", "vcov") } if("test" %in% store.slots) { testList <- lapply(RES, "[[", "test") } if("optim" %in% store.slots) { optimList <- lapply(RES, "[[", "optim") } if("implied" %in% store.slots) { impliedList <- lapply(RES, "[[", "implied") } if(!is.null(FUN)) { funList <- lapply(RES, "[[", "fun") } # create lavaanList object lavaanList <- new("lavaanList", call = mc, Options = lavoptions, ParTable = lavpartable, pta = lavpta, Model = lavmodel, Data = FIT@Data, # meta meta = meta, # per dataset timingList = timingList, ParTableList = ParTableList, DataList = DataList, SampleStatsList = SampleStatsList, CacheList = CacheList, vcovList = vcovList, testList = testList, optimList = optimList, impliedList = impliedList, funList = funList, external = list() ) lavaanList } semList <- function(model = NULL, dataList = NULL, dataFunction = NULL, dataFunction.args = list(), ndat = length(dataList), ..., store.slots = c("partable"), FUN = NULL, show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL, iseed = NULL) { lavaanList(model = model, dataList = dataList, dataFunction = dataFunction, dataFunction.args = dataFunction.args, ndat = ndat, cmd = "sem", ..., store.slots = store.slots, FUN = FUN, show.progress = show.progress, parallel = parallel, ncpus = ncpus, cl = cl, iseed = iseed) } cfaList <- function(model = NULL, dataList = NULL, dataFunction = NULL, dataFunction.args = list(), ndat = length(dataList), ..., store.slots = c("partable"), FUN = NULL, show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL, iseed = NULL) { lavaanList(model = model, dataList = dataList, dataFunction = dataFunction, dataFunction.args = dataFunction.args, ndat = ndat, cmd = "cfa", ..., store.slots = store.slots, FUN = FUN, show.progress = show.progress, parallel = parallel, ncpus = ncpus, cl = cl, iseed = iseed) } lavaan/R/lav_mvnorm_missing_h1.R0000644000176200001440000001415213045576460016356 0ustar liggesusers# the Multivariate normal distribution, unrestricted (h1), missing values # 1) loglikelihood --> same as h0 but where Mu and Sigma are unrestricted # 2) 3) 4) 5) --> (idem) # YR 26 Mar 2016: first version # YR 20 Jan 2017: added _h1_omega_sw() # here, we estimate Mu and Sigma from Y with missing values, assuming normality # this is a rewrite of the 'estimate.moments.EM' function in <= 0.5-22 lav_mvnorm_missing_h1_estimate_moments <- function(Y = NULL, Mp = NULL, Yp = NULL, Sinv.method = "eigen", verbose = FALSE, max.iter = 500L, tol = 1e-05) { # check input Y <- as.matrix(Y); P <- NCOL(Y); N <- NROW(Y) # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } if(is.null(Yp)) { Yp <- lav_samplestats_missing_patterns(Y = Y, Mp = Mp) } # remove empty cases if(length(Mp$empty.idx) > 0L) { N <- N - length(Mp$empty.idx) } # verbose? if(verbose) { cat("\n") cat("lav_mvnorm_missing_h1_estimate_moments: start EM steps\n") } # starting values; zero covariances to guarantee a pd matrix Mu0 <- base::.colMeans(Y, m = N, n = P, na.rm = TRUE) var0 <- base::.colMeans(Y*Y, m = N, n = P, na.rm = TRUE) - Mu0*Mu0 Sigma0 <- diag(x = var0, nrow = P) Mu <- Mu0; Sigma <- Sigma0 # report if(verbose) { #fx0 <- estimator.FIML(Sigma.hat=Sigma, Mu.hat=Mu, M=Yp) fx0 <- lav_mvnorm_missing_loglik_samplestats(Yp = Yp, Mu = Mu, Sigma = Sigma, log2pi = FALSE, minus.two = TRUE)/N cat(" EM iteration:", sprintf("%4d", 0), " fx = ", sprintf("%15.10f", fx0), "\n") } # EM steps for(i in 1:max.iter) { # E-step Estep <- lav_mvnorm_missing_estep(Y = Y, Mp = Mp, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method) T1 <- Estep$T1 T2 <- Estep$T2 # M-step Mu <- T1/N Sigma <- T2/N - tcrossprod(Mu) # check if Sigma is near-pd (+ poor fix) ev <- eigen(Sigma, symmetric = TRUE, only.values = TRUE) tol <- 1e-6 # FIXME! if(any(ev$values < tol)) { #too.small <- which( ev$values < tol ) #ev$values[too.small] <- tol #ev$values <- ev$values + tol #Sigma <- ev$vectors %*% diag(ev$values) %*% t(ev$vectors) # ridge diag(Sigma) <- diag(Sigma) + max(diag(Sigma))*1e-08 } # max absolute difference in parameter values DELTA <- max(abs(c(Mu, lav_matrix_vech(Sigma)) - c(Mu0, lav_matrix_vech(Sigma0)))) # report fx if(verbose) { #fx <- estimator.FIML(Sigma.hat=Sigma, Mu.hat=Mu, M=Yp) fx <- lav_mvnorm_missing_loglik_samplestats(Yp = Yp, Mu = Mu, Sigma = Sigma, log2pi = FALSE, minus.two = TRUE)/N cat(" EM iteration:", sprintf("%4d", i), " fx = ", sprintf("%15.10f", fx), " delta par = ", sprintf("%9.8f", DELTA), "\n") } # convergence check: using parameter values: if(DELTA < tol) break # again Mu0 <- Mu; Sigma0 <- Sigma } # EM iterations if(verbose) { cat("\nSigma:\n"); print(Sigma) cat("\nMu:\n"); print(Mu) cat("\n") } # compute fx if we haven't already if(!verbose) { #fx <- estimator.FIML(Sigma.hat = Sigma, Mu.hat = Mu, M = Yp) fx <- lav_mvnorm_missing_loglik_samplestats(Yp = Yp, Mu = Mu, Sigma = Sigma, log2pi = FALSE, minus.two = TRUE)/N } list(Sigma = Sigma, Mu = Mu, fx = fx) } # compute N times ACOV(Mu, vech(Sigma)) # in the literature: - `Omega_{SW}' # - `Gamma for incomplete data' # - (N times the) sandwich estimator for acov(mu,vech(Sigma)) lav_mvnorm_missing_h1_omega_sw <- function(Y = NULL, Mp = NULL, Yp = NULL, Sinv.method = "eigen", Mu = NULL, Sigma = NULL, Sigma.inv = NULL, information = "observed") { # missing patterns if(is.null(Mp)) { Mp <- lav_data_missing_patterns(Y) } # sample stats per pattern if(is.null(Yp) && (information == "observed" || is.null(Sigma))) { Yp <- lav_samplestats_missing_patterns(Y = Y, Mp = Mp) } # Sigma and Mu if(is.null(Sigma) || is.null(Mu)) { out <- lav_mvnorm_missing_h1_estimate_moments(Y = Y, Mp = Mp, Yp = Yp) Mu <- out$Mu Sigma <- out$Sigma } # information matrices info <- lav_mvnorm_missing_information_both(Y = Y, Mp = Mp, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv, information = information) A <- info$Abeta A.inv <- lav_matrix_symmetric_inverse(S = A, logdet = FALSE, Sinv.method = Sinv.method) B <- info$Bbeta # sandwich SW <- A.inv %*% B %*% A.inv SW } lavaan/R/lav_muthen1984.R0000644000176200001440000003656013046351747014554 0ustar liggesusers# This function was written in January 2012 -- Yves Rosseel # First success: Friday 20 Jan 2012: the standard errors for # thresholds and polychoric correlations (in an # unrestricted/saturated model) are spot on! # Second success: Saturday 9 June 2012: support for mixed (ordinal + metric) # variables; thanks to the delta method to get the ACOV # right (see H matrix) # Third success: Monday 2 July 2012: support for fixed.x covariates # # Friday 13 July 2012: merge exo + non-exo code # Monday 16 July 2012: fixed sign numeric in WLS.W; I think we got it right now # YR 26 Nov 2015: move step1 + step2 to external functions # muthen1984 <- function(Data = NULL, ov.names = NULL, ov.types = NULL, ov.levels = NULL, ov.names.x = character(0L), eXo = NULL, verbose = FALSE, missing = "listwise", WLS.W = TRUE, optim.method = "nlminb", zero.add = c(0.5, 0.0), zero.keep.margins = TRUE, zero.cell.warn = FALSE, zero.cell.tables = TRUE, group = 1L) { # group only for error messages # just in case Data is a vector Data <- as.matrix(Data) # internal function lav_crossprod2 if(missing == "listwise") { lav_crossprod2 <- base::crossprod } else { # pairwise, we can have missing values lav_crossprod2 <- function(x, y) sum(x * y, na.rm = TRUE) } # pairwise version # FIXME: surely a much better/faster solution is possible?? lav_crossprod_matrix <- function(A) { ndim <- NCOL(A) # off-diagonal upper <- apply(combn(NCOL(A),2),2, function(x) sum(A[,x[1]] * A[,x[2]], na.rm=TRUE)) tmp <- diag(apply(A, 2, function(x) sum(x*x, na.rm=TRUE))) tmp[ lav_matrix_vechru_idx(ndim, diagonal = FALSE) ] <- upper tmp[ lav_matrix_vech_idx( ndim, diagonal = FALSE) ] <- upper tmp } nvar <- NCOL(Data); N <- NROW(Data) num.idx <- which(ov.types == "numeric") ord.idx <- which(ov.types == "ordered") nexo <- length(ov.names.x) if(nexo > 0L) stopifnot(NCOL(eXo) == nexo) pstar <- nvar*(nvar-1)/2 if(verbose) { cat("\nPreparing for WLS estimation -- STEP 1 + 2\n") cat("Number of endogenous variables: ", nvar, "\n") cat("Endogenous variable names:\n"); print(ov.names); cat("\n") cat("Endogenous ov types:\n"); print(ov.types); cat("\n") cat("Endogenous ov levels:\n "); print(ov.levels); cat("\n") cat("Number of exogenous variables: ", nexo, "\n") cat("Exogenous variable names:\n"); print(ov.names.x); cat("\n") } step1 <- lav_samplestats_step1(Y = Data, ov.names = ov.names, ov.types = ov.types, ov.levels = ov.levels, ov.names.x = ov.names.x, eXo = eXo, scores.flag = WLS.W, group = group) FIT <- step1$FIT TH <- step1$TH; TH.NOX <- step1$TH.NOX TH.IDX <- step1$TH.IDX; TH.NAMES <- step1$TH.NAMES VAR <- step1$VAR; SLOPES <- step1$SLOPES SC.TH <- step1$SC.TH; SC.SL <- step1$SC.SL; SC.VAR <- step1$SC.VAR th.start.idx <- step1$th.start.idx; th.end.idx <- step1$th.end.idx # rm SC.VAR columns from ordinal variables if(WLS.W && length(ord.idx) > 0L) { SC.VAR <- SC.VAR[,-ord.idx, drop=FALSE] } if(verbose) { cat("STEP 1: univariate statistics\n") cat("Threshold + means:\n") TTHH <- unlist(TH) names(TTHH) <- unlist(TH.NAMES) print(TTHH) cat("Slopes (if any):\n") colnames(SLOPES) <- ov.names.x rownames(SLOPES) <- ov.names print(SLOPES) cat("Variances:\n") names(VAR) <- ov.names print(unlist(VAR)) } # stage two -- correlations if(verbose) cat("\n\nSTEP 2: covariances/correlations:\n") COR <- lav_samplestats_step2(UNI = FIT, ov.names = ov.names, zero.add = zero.add, zero.keep.margins = zero.keep.margins, zero.cell.warn = zero.cell.warn, zero.cell.tables = zero.cell.tables, optim.method = optim.method) empty.cell.tables <- attr(COR, "zero.cell.tables") attr(COR, "zero.cell.tables") <- NULL if(verbose) { colnames(COR) <- rownames(COR) <- ov.names print(COR) } if(!WLS.W) { # we do not need the asymptotic variance matrix if(any("numeric" %in% ov.types)) { COV <- cor2cov(R=COR, sds=sqrt(unlist(VAR))) } else { COV <- COR } out <- list(TH=TH, SLOPES=SLOPES, VAR=VAR, COR=COR, COV=COV, SC=NULL, TH.NOX=TH.NOX,TH.NAMES=TH.NAMES, TH.IDX=TH.IDX, INNER=NULL, A11=NULL, A12=NULL, A21=NULL, A22=NULL, WLS.W=NULL, H=NULL, zero.cell.tables=matrix("",0,2)) return(out) } # stage three -- WLS.W SC.COR <- matrix(0, N, pstar) PSTAR <- matrix(0, nvar, nvar) PSTAR[lav_matrix_vech_idx(nvar, diagonal = FALSE)] <- 1:pstar A11.size <- NCOL(SC.TH) + NCOL(SC.SL) + NCOL(SC.VAR) # A21 A21 <- matrix(0, pstar, A11.size) H22 <- diag(pstar) # for the delta rule H21 <- matrix(0, pstar, A11.size) # for this one, we need new scores: for each F_ij (cor), the # scores with respect to the TH, VAR, ... for(j in seq_len(nvar-1L)) { for(i in (j+1L):nvar) { pstar.idx <- PSTAR[i,j] th.idx_i <- th.start.idx[i]:th.end.idx[i] th.idx_j <- th.start.idx[j]:th.end.idx[j] if(nexo > 0L) { sl.idx_i <- NCOL(SC.TH) + seq(i, by=nvar, length.out=nexo) sl.idx_j <- NCOL(SC.TH) + seq(j, by=nvar, length.out=nexo) var.idx_i <- NCOL(SC.TH) + NCOL(SC.SL) + match(i, num.idx) var.idx_j <- NCOL(SC.TH) + NCOL(SC.SL) + match(j, num.idx) } else { var.idx_i <- NCOL(SC.TH) + match(i, num.idx) var.idx_j <- NCOL(SC.TH) + match(j, num.idx) } if(ov.types[i] == "numeric" && ov.types[j] == "numeric") { SC.COR.UNI <- pp_cor_scores(rho=COR[i,j], fit.y1=FIT[[i]], fit.y2=FIT[[j]]) # RHO SC.COR[,pstar.idx] <- SC.COR.UNI$dx.rho # TH A21[pstar.idx, th.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.mu.y1) A21[pstar.idx, th.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.mu.y2) # SL if(nexo > 0L) { A21[pstar.idx, sl.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.sl.y1) A21[pstar.idx, sl.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.sl.y2) } # VAR A21[pstar.idx, var.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.var.y1) A21[pstar.idx, var.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.var.y2) # H21 only needed for VAR H21[pstar.idx, var.idx_i] <- (sqrt(VAR[j]) * COR[i,j]) / (2*sqrt(VAR[i])) H21[pstar.idx, var.idx_j] <- (sqrt(VAR[i]) * COR[i,j]) / (2*sqrt(VAR[j])) H22[pstar.idx, pstar.idx] <- sqrt(VAR[i]) * sqrt(VAR[j]) } else if(ov.types[i] == "numeric" && ov.types[j] == "ordered") { SC.COR.UNI <- ps_cor_scores(rho=COR[i,j], fit.y1=FIT[[i]], fit.y2=FIT[[j]]) # RHO SC.COR[,pstar.idx] <- SC.COR.UNI$dx.rho # TH A21[pstar.idx, th.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.mu.y1) A21[pstar.idx, th.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.th.y2) # SL if(nexo > 0L) { A21[pstar.idx, sl.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.sl.y1) A21[pstar.idx, sl.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.sl.y2) } # VAR A21[pstar.idx, var.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.var.y1) # H21 only need for VAR H21[pstar.idx, var.idx_i] <- COR[i,j] / (2*sqrt(VAR[i])) H22[pstar.idx, pstar.idx] <- sqrt(VAR[i]) } else if(ov.types[j] == "numeric" && ov.types[i] == "ordered") { SC.COR.UNI <- ps_cor_scores(rho=COR[i,j], fit.y1=FIT[[j]], fit.y2=FIT[[i]]) # RHO SC.COR[,pstar.idx] <- SC.COR.UNI$dx.rho # TH A21[pstar.idx, th.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.mu.y1) A21[pstar.idx, th.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.th.y2) # SL if(nexo > 0L) { A21[pstar.idx, sl.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.sl.y1) A21[pstar.idx, sl.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.sl.y2) } # VAR A21[pstar.idx, var.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.var.y1) # H21 only for VAR H21[pstar.idx, var.idx_j] <- COR[i,j] / (2*sqrt(VAR[j])) H22[pstar.idx, pstar.idx] <- sqrt(VAR[j]) } else if(ov.types[i] == "ordered" && ov.types[j] == "ordered") { # polychoric correlation SC.COR.UNI <- pc_cor_scores(rho=COR[i,j], fit.y1=FIT[[i]], fit.y2=FIT[[j]]) # RHO SC.COR[,pstar.idx] <- SC.COR.UNI$dx.rho # TH A21[pstar.idx, th.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.th.y1) A21[pstar.idx, th.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.th.y2) # SL if(nexo > 0L) { A21[pstar.idx, sl.idx_i] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.sl.y1) A21[pstar.idx, sl.idx_j] <- lav_crossprod2(SC.COR[,pstar.idx], SC.COR.UNI$dx.sl.y2) } # NO VAR } } } # stage three SC <- cbind(SC.TH, SC.SL, SC.VAR, SC.COR) if(missing == "listwise") { INNER <- crossprod(SC) } else { INNER <- lav_crossprod_matrix(SC) } # A11 # new approach (2 June 2012): A11 is just a 'sparse' version of # (the left upper block of) INNER A11 <- matrix(0, A11.size, A11.size) for(i in 1:nvar) { th.idx <- th.start.idx[i]:th.end.idx[i] sl.idx <- integer(0L) var.idx <- integer(0L) if(nexo > 0L) { sl.idx <- NCOL(SC.TH) + seq(i, by=nvar, length.out=nexo) #sl.end.idx <- (i*nexo); sl.start.idx <- (i-1L)*nexo + 1L #sl.idx <- NCOL(SC.TH) + (sl.start.idx:sl.end.idx) } if(ov.types[i] == "numeric") { var.idx <- NCOL(SC.TH) + NCOL(SC.SL) + match(i, num.idx) } a11.idx <- c(th.idx, sl.idx, var.idx) A11[a11.idx, a11.idx] <- INNER[a11.idx, a11.idx] } ##### DEBUG ###### #### for numeric VAR only, use hessian to get better residual var value #### # for(i in 1:nvar) { # if(ov.types[i] == "numeric") { # tmp.npar <- FIT[[i]]$npar # e.var <- FIT[[i]]$theta[ tmp.npar ] # sq.e.var <- sqrt(e.var) # sq.e.var6 <- sq.e.var*sq.e.var*sq.e.var*sq.e.var*sq.e.var*sq.e.var # dx2.var <- N/(2*e.var*e.var) - 1/sq.e.var6 * (e.var * N) # # var.idx <- NCOL(SC.TH) + NCOL(SC.SL) + match(i, num.idx) # A11[var.idx, var.idx] <- -1 * dx2.var # } # } ################ ################ # A22 (diagonal) A22 <- matrix(0, pstar, pstar) for(i in seq_len(pstar)) { A22[i,i] <- sum( SC.COR[,i]*SC.COR[,i], na.rm=TRUE ) } # A12 (zero) A12 <- matrix(0, NROW(A11), NCOL(A22)) #B <- rbind( cbind(A11,A12), # cbind(A21,A22) ) # we invert B as a block-triangular matrix (0.5-23) # # B.inv = A11^{-1} 0 # -A22^{-1} A21 A11^{-1} A22^{-1} # # invert A A11.inv <- try(solve(A11), silent = TRUE) if(inherits(A11.inv, "try-error")) { # brute force A11.inv <- MASS::ginv(A11) warning("lavaan WARNING: trouble constructing W matrix; used generalized inverse for A11 submatrix") } # invert da22 <- diag(A22) if(any(da22 == 0)) { warning("lavaan WARNING: trouble constructing W matrix; used generalized inverse for A22 submatrix") A22.inv <- MASS::ginv(A22) } else { A22.inv <- A22 diag(A22.inv) <- 1/da22 } # lower-left block A21.inv <- -A22.inv %*% A21 %*% A11.inv # upper-left block remains zero A12.inv <- A12 # construct B.inv B.inv <- rbind( cbind(A11.inv, A12.inv), cbind(A21.inv, A22.inv) ) # weight matrix (correlation metric) WLS.W <- B.inv %*% INNER %*% t(B.inv) # COV matrix? if(any("numeric" %in% ov.types)) { COV <- cor2cov(R=COR, sds=sqrt(unlist(VAR))) # construct H matrix to apply delta rule (for the tranformation # of rho_ij to cov_ij) H11 <- diag(NROW(A11)) H12 <- matrix(0, NROW(A11), NCOL(A22)) # H22 and H21 already filled in H <- rbind( cbind(H11,H12), cbind(H21,H22) ) WLS.W <- H %*% WLS.W %*% t(H) } else { COV <- COR H <- diag(NCOL(WLS.W)) } # reverse sign numeric TH (because we provide -mu in WLS.obs) # (WOW, it took me a LOOONGGG time to realize this!) # YR 16 July 2012 # NOTE: prior to 0.5-17, we used num.idx (instead of NUM.idx) # which is WRONG if we have more than one threshold per variable # (thanks to Sacha Epskamp for spotting this!) if(length(num.idx) > 0L) { NUM.idx <- which(unlist(TH.IDX) == 0L) WLS.W[NUM.idx,] <- -WLS.W[NUM.idx,] WLS.W[,NUM.idx] <- -WLS.W[,NUM.idx] } out <- list(TH=TH, SLOPES=SLOPES, VAR=VAR, COR=COR, COV=COV, SC=SC, TH.NOX=TH.NOX,TH.NAMES=TH.NAMES, TH.IDX=TH.IDX, INNER=INNER, A11=A11, A12=A12, A21=A21, A22=A22, WLS.W=WLS.W, H=H, zero.cell.tables = empty.cell.tables) out } lavaan/R/lav_syntax.R0000644000176200001440000004572612736441262014255 0ustar liggesusers# parse lavaan syntax # YR 14 Jan 2014: move to lav_syntax.R lavParseModelString <- function(model.syntax = '', as.data.frame. = FALSE, warn = TRUE, debug = FALSE) { # check for empty syntax if(length(model.syntax) == 0) { stop("lavaan ERROR: empty model syntax") } # remove comments prior to split. # Match from comment character to newline, but don't eliminate newline model.syntax <- gsub("[#!].*(?=\n)","", model.syntax, perl=TRUE) # replace semicolons with newlines prior to split model.syntax <- gsub(";", "\n", model.syntax, fixed=TRUE) #remove whitespace prior to split model.syntax <- gsub("[ \t]+", "", model.syntax, perl=TRUE) # remove any occurrence of >= 2 consecutive newlines to eliminate \ # blank statements; this retains a blank newline at the beginning, # if such exists, but parser will not choke because of start.idx model.syntax <- gsub("\n{2,}", "\n", model.syntax, perl=TRUE) # break up in lines model <- unlist( strsplit(model.syntax, "\n") ) # check for multi-line formulas: they contain no "~" or "=" character # but before we do that, we remove all modifiers # to avoid confusion with for example equal("f1=~x1") statements model.simple <- gsub("\\(.*\\)\\*", "MODIFIER*", model) start.idx <- grep("[~=<>:|%]", model.simple) end.idx <- c( start.idx[-1]-1, length(model) ) model.orig <- model model <- character( length(start.idx) ) for(i in 1:length(start.idx)) { model[i] <- paste(model.orig[start.idx[i]:end.idx[i]], collapse="") } # ok, in all remaining lines, we should have a '~' operator # OR one of '=', '<', '>', '|' outside the "" model.simple <- gsub("\\\".[^\\\"]*\\\"", "LABEL", model) idx.wrong <- which(!grepl("[~=<>:|%]", model.simple)) if(length(idx.wrong) > 0) { cat("lavaan: missing operator in formula(s):\n") print(model[idx.wrong]) stop("lavaan ERROR: syntax error in lavaan model syntax") } # but perhaps we have a '+' as the first character? idx.wrong <- which(grepl("^\\+", model)) if(length(idx.wrong) > 0) { cat("lavaan: some formula(s) start with a plus (+) sign:\n") print(model[idx.wrong]) stop("lavaan ERROR: syntax error in lavaan model syntax") } # main operation: flatten formulas into single bivariate pieces # with a left-hand-side (lhs), an operator (eg "=~"), and a # right-hand-side (rhs) # both lhs and rhs can have a modifier # (but we ignore the lhs modifier for now) FLAT.lhs <- character(0) #FLAT.lhs.mod <- character(0) FLAT.op <- character(0) FLAT.rhs <- character(0) FLAT.rhs.mod.idx <- integer(0) FLAT.block <- integer(0) # keep track of groups using ":" operator FLAT.fixed <- character(0) # only for display purposes! FLAT.start <- character(0) # only for display purposes! FLAT.label <- character(0) # only for display purposes! FLAT.prior <- character(0) FLAT.idx <- 0L MOD.idx <- 0L CON.idx <- 0L MOD <- vector("list", length=0L) CON <- vector("list", length=0L) BLOCK <- 1L BLOCK_OP <- FALSE for(i in 1:length(model)) { x <- model[i] if(debug) { cat("formula to parse:\n"); print(x); cat("\n") } # 1. which operator is used? line.simple <- gsub("\\\".[^\\\"]*\\\"", "LABEL", x) # "=~" operator? if(grepl("=~", line.simple, fixed=TRUE)) { op <- "=~" # "<~" operator? } else if(grepl("<~", line.simple, fixed=TRUE)) { op <- "<~" } else if(grepl("~*~", line.simple, fixed=TRUE)) { op <- "~*~" # "~~" operator? } else if(grepl("~~", line.simple, fixed=TRUE)) { op <- "~~" # "~" operator? } else if(grepl("~", line.simple, fixed=TRUE)) { op <- "~" # "==" operator? } else if(grepl("==", line.simple, fixed=TRUE)) { op <- "==" # "<" operator? } else if(grepl("<", line.simple, fixed=TRUE)) { op <- "<" # ">" operator? } else if(grepl(">", line.simple, fixed=TRUE)) { op <- ">" # ":=" operator? } else if(grepl(":=", line.simple, fixed=TRUE)) { op <- ":=" # ":" operator? } else if(grepl(":", line.simple, fixed=TRUE)) { op <- ":" # "|" operator? } else if(grepl("|", line.simple, fixed=TRUE)) { op <- "|" # "%" operator? } else if(grepl("%", line.simple, fixed=TRUE)) { op <- "%" } else { stop("unknown operator in ", model[i]) } # 2. split by operator (only the *first* occurence!) # check first if equal/label modifier has been used on the LEFT! if(substr(x,1,6) == "label(") stop("label modifier can not be used on the left-hand side of the operator") if(op == "|") { op.idx <- regexpr("\\|", x) } else if(op == "~*~") { op.idx <- regexpr("~\\*~", x) } else { op.idx <- regexpr(op, x) } lhs <- substr(x, 1L, op.idx-1L) # fix for 'NA' names in lhs; not likely to happen to ov.names # since 'NA' is not a valid name for list elements/data.frame columns if(lhs == "NA") lhs <- "NA." rhs <- substr(x, op.idx+attr(op.idx, "match.length"), nchar(x)) # check if first character is '+'; if so, remove silently if(substr(rhs, 1, 1) == "+") { rhs <- substr(rhs, 2, nchar(rhs)) } # 2b. if operator is "==" or "<" or ">" or ":=", put it in CON if(op == "==" || op == "<" || op == ">" || op == ":=") { # remove quotes, if any lhs <- gsub("\\\"", "", lhs) rhs <- gsub("\\\"", "", rhs) CON.idx <- CON.idx + 1L CON[[CON.idx]] <- list(op=op, lhs=lhs, rhs=rhs, user=1L) next } # 2c if operator is ":", put it in BLOCK if(op == ":") { FLAT.idx <- FLAT.idx + 1L FLAT.lhs[FLAT.idx] <- lhs FLAT.op[ FLAT.idx] <- op FLAT.rhs[FLAT.idx] <- rhs FLAT.fixed[FLAT.idx] <- "" FLAT.start[FLAT.idx] <- "" FLAT.label[FLAT.idx] <- "" FLAT.prior[FLAT.idx] <- "" FLAT.rhs.mod.idx[FLAT.idx] <- 0L if(BLOCK_OP) { BLOCK <- BLOCK + 1L } FLAT.block[FLAT.idx] <- BLOCK BLOCK_OP <- TRUE next } # 3. parse left hand # lhs modifiers will be ignored for now lhs.formula <- as.formula(paste("~",lhs)) out <- lav_syntax_parse_rhs(rhs=lhs.formula[[2L]]) lhs.names <- names(out) # check if we have modifiers if(sum(sapply(out, length)) > 0L) { warning("lavaan WARNING: left-hand side of formula below contains modifier:\n", x,"\n") } # 4. lav_syntax_parse_rhs (as rhs of a single-sided formula) # new 0.5-12: before we do this, replace '0.2?' by 'start(0.2)*' # requested by the simsem folks rhs <- gsub('\\(?([-]?[0-9]*\\.?[0-9]*)\\)?\\?',"start(\\1)\\*", rhs) rhs.formula <- as.formula(paste("~",rhs)) out <- lav_syntax_parse_rhs(rhs=rhs.formula[[2L]],op=op) if(debug) print(out) # for each lhs element for(l in 1:length(lhs.names)) { # for each rhs element for(j in 1:length(out)) { # catch intercepts if(names(out)[j] == "intercept") { if(op == "~") { rhs.name <- "" } else { stop("lavaan ERROR: right-hand side of formula contains an intercept, but operator is \"", op, "\" in: ", x) } } else if(names(out)[j] == "..zero.." && op == "~") { rhs.name <- "" } else if(names(out)[j] == "..constant.." && op == "~") { rhs.name <- "" } else { rhs.name <- names(out)[j] } # move this 'check' to post-parse #if(op == "|") { # th.name <- paste("t", j, sep="") # if(names(out)[j] != th.name) { # stop("lavaan ERROR: threshold ", j, " of variable ", # sQuote(lhs.names[1]), " should be named ", # sQuote(th.name), "; found ", # sQuote(names(out)[j]), "\n") # } #} # catch lhs = rhs and op = "=~" if(op == "=~" && lhs.names[l] == names(out)[j]) { stop("lavaan ERROR: latent variable `", lhs.names[l], "' can not be measured by itself") } # check if we not already have this combination (in this group) # 1. asymmetric (=~, ~, ~1) if(op != "~~") { idx <- which(FLAT.lhs == lhs.names[l] & FLAT.op == op & FLAT.block == BLOCK & FLAT.rhs == rhs.name) if(length(idx) > 0L) { stop("lavaan ERROR: duplicate model element in: ", model[i]) } } else { # 2. symmetric (~~) idx <- which(FLAT.lhs == rhs.name & FLAT.op == "~~" & FLAT.block == BLOCK & FLAT.rhs == lhs.names[l]) if(length(idx) > 0L) { stop("lavaan ERROR: duplicate model element in: ", model[i]) } } FLAT.idx <- FLAT.idx + 1L FLAT.lhs[FLAT.idx] <- lhs.names[l] FLAT.op[ FLAT.idx] <- op FLAT.rhs[FLAT.idx] <- rhs.name FLAT.block[FLAT.idx] <- BLOCK FLAT.fixed[FLAT.idx] <- "" FLAT.start[FLAT.idx] <- "" FLAT.label[FLAT.idx] <- "" FLAT.prior[FLAT.idx] <- "" mod <- list() rhs.mod <- 0L if(length(out[[j]]$fixed) > 0L) { mod$fixed <- out[[j]]$fixed FLAT.fixed[FLAT.idx] <- paste(mod$fixed, collapse=";") rhs.mod <- 1L } if(length(out[[j]]$start) > 0L) { mod$start <- out[[j]]$start FLAT.start[FLAT.idx] <- paste(mod$start, collapse=";") rhs.mod <- 1L } if(length(out[[j]]$label) > 0L) { mod$label <- out[[j]]$label FLAT.label[FLAT.idx] <- paste(mod$label, collapse=";") rhs.mod <- 1L } if(length(out[[j]]$prior) > 0L) { mod$prior <- out[[j]]$prior FLAT.prior[FLAT.idx] <- paste(mod$prior, collapse=";") rhs.mod <- 1L } #if(op == "~1" && rhs == "0") { # mod$fixed <- 0 # FLAT.fixed[FLAT.idx] <- paste(mod$fixed, collapse=";") # rhs.mod <- 1L #} if(op == "=~" && rhs == "0") { mod$fixed <- 0 FLAT.rhs[FLAT.idx] <- FLAT.lhs[FLAT.idx] FLAT.fixed[FLAT.idx] <- paste(mod$fixed, collapse=";") rhs.mod <- 1L } FLAT.rhs.mod.idx[FLAT.idx] <- rhs.mod if(rhs.mod > 0L) { MOD.idx <- MOD.idx + 1L MOD[[MOD.idx]] <- mod } } # rhs elements } # lhs elements } # model elements # enumerate modifier indices mod.idx <- which(FLAT.rhs.mod.idx > 0L) FLAT.rhs.mod.idx[ mod.idx ] <- 1:length(mod.idx) FLAT <- list(lhs=FLAT.lhs, op=FLAT.op, rhs=FLAT.rhs, mod.idx=FLAT.rhs.mod.idx, block=FLAT.block, fixed=FLAT.fixed, start=FLAT.start, label=FLAT.label, prior=FLAT.prior) # change op for intercepts (for convenience only) int.idx <- which(FLAT$op == "~" & FLAT$rhs == "") if(length(int.idx) > 0L) FLAT$op[int.idx] <- "~1" if(as.data.frame.) FLAT <- as.data.frame(FLAT, stringsAsFactors=FALSE) attr(FLAT, "modifiers") <- MOD attr(FLAT, "constraints") <- CON FLAT } lav_syntax_parse_rhs <- function(rhs, op="") { # new version YR 15 dec 2011! # - no 'equal' field anymore (only labels!) # - every modifier is evaluated # - unquoted labels are allowed (eg. x1 + x2 + c(v1,v2,v3)*x3) # fill in rhs list out <- list() repeat { if(length(rhs) == 1L) { # last one and only a single element out <- c(vector("list", 1L), out) NAME <- all.vars(rhs) if(length(NAME) > 0L) { names(out)[1L] <- NAME } else { # intercept or zero? if(as.character(rhs) == "1") { names(out)[1L] <- "intercept" } else if(as.character(rhs) == "0") { names(out)[1L] <- "..zero.." out[[1L]]$fixed <- 0 } else { names(out)[1L] <- "..constant.." out[[1L]]$fixed <- 0 } } break } else if(rhs[[1L]] == "*") { # last one, but with modifier out <- c(vector("list", 1L), out) NAME <- all.vars(rhs[[3L]]) if(length(NAME) > 0L) { # not an intercept # catch interaction term rhs3.names <- all.names(rhs[[3L]]) if(rhs3.names[1L] == ":") { NAME <- paste(NAME[1L], ":", NAME[2L], sep = "") } names(out)[1L] <- NAME } else { # intercept names(out)[1L] <- "intercept" } i.var <- all.vars(rhs[[2L]], unique=FALSE) if(length(i.var) > 0L) { # modifier are unquoted labels out[[1L]]$label <- i.var } else { # modifer is something else out[[1L]] <- lav_syntax_get_modifier(rhs[[2L]]) } break } else if(rhs[[1L]] == ":") { # last one, but interaction term out <- c(vector("list", 1L), out) NAME <- all.vars(rhs) NAME <- paste(NAME[1L], ":", NAME[2L], sep = "") names(out)[1L] <- NAME break } else if(rhs[[1L]] == "+") { # not last one! i.var <- all.vars(rhs[[3L]], unique=FALSE) n.var <- length(i.var) # catch interaction term rhs3.names <- all.names(rhs[[3L]]) if(length(i.var) > 1L && ":" %in% rhs3.names) { colon.idx <- which(rhs3.names == ":") i.var <- i.var[seq_len(n.var - 1L)] n.var <- n.var - 1L i.var[n.var] <- paste(rhs3.names[colon.idx + 1L], ":", rhs3.names[colon.idx + 2L], sep = "") } out <- c(vector("list", 1L), out) if(length(i.var) > 0L) { names(out)[1L] <- i.var[n.var] } else { names(out)[1L] <- "intercept" } if(n.var > 1L) { # modifier are unquoted labels out[[1L]]$label <- i.var[-n.var] } else if(length(rhs[[3L]]) == 3L && rhs3.names[1L] == "*") { # modifiers!! out[[1L]] <- lav_syntax_get_modifier(rhs[[3L]][[2L]]) } # next element rhs <- rhs[[2L]] } else { stop("lavaan ERROR: I'm confused parsing this line: ", rhs, "\n") } } # if multiple elements, check for duplicated elements and merge if found if(length(out) > 1L) { rhs.names <- names(out) while( !is.na(idx <- which(duplicated(rhs.names))[1L]) ) { dup.name <- rhs.names[ idx ] orig.idx <- match(dup.name, rhs.names) merged <- c( out[[orig.idx]], out[[idx]] ) if(!is.null(merged)) # be careful, NULL will delete element out[[orig.idx]] <- merged out <- out[-idx] rhs.names <- names(out) } } # if thresholds, check order and reorder if necessary #if(op == "|") { # t.names <- names(out) # idx <- match(sort(t.names), t.names) # out <- out[idx] #} out } lav_syntax_get_modifier <- function(mod) { if(length(mod) == 1L) { # three possibilites: 1) numeric, 2) NA, or 3) quoted character if( is.numeric(mod) ) return( list(fixed=mod) ) if( is.na(mod) ) return( list(fixed=as.numeric(NA)) ) if( is.character(mod) ) return( list(label=mod) ) } else if(mod[[1L]] == "start") { cof <- unlist(lapply(as.list(mod)[-1], eval, envir=NULL, enclos=NULL)) return( list(start=cof) ) } else if(mod[[1L]] == "equal") { label <- unlist(lapply(as.list(mod)[-1], eval, envir=NULL, enclos=NULL)) return( list(label=label) ) } else if(mod[[1L]] == "label") { label <- unlist(lapply(as.list(mod)[-1], eval, envir=NULL, enclos=NULL)) label[is.na(label)] <- "" # catch 'NA' elements in a label return( list(label=label) ) } else if(mod[[1L]] == "prior") { prior <- unlist(lapply(as.list(mod)[-1], eval, envir=NULL, enclos=NULL)) return( list(prior=prior) ) } else if(mod[[1L]] == "c") { # vector: we allow numeric and character only! cof <- unlist(lapply(as.list(mod)[-1], eval, envir=NULL, enclos=NULL)) if(all(is.na(cof))) { return( list(fixed=rep(as.numeric(NA), length(cof))) ) } else if(is.numeric(cof)) return( list(fixed=cof) ) else if(is.character(cof)) { cof[is.na(cof)] <- "" # catch 'NA' elements in a label return( list(label=cof) ) } else { stop("lavaan ERROR: can not parse modifier:", mod, "\n") } } else { # unknown expression # as a final attempt, we will evaluate it and coerce it # to either a numeric or character (vector) cof <- try( eval(mod, envir=NULL, enclos=NULL), silent=TRUE) if(is.numeric(cof)) return( list(fixed=cof) ) else if(is.character(cof)) return( list(label=cof) ) else { stop("lavaan ERROR: can not parse modifier:", mod, "\n") } } } lavaan/R/lav_fiml.R0000644000176200001440000001230712660632266013645 0ustar liggesusers # derivatives FIML unrestricted model (h1) # pattern-based method derivative.FIML <- function(Sigma.hat, Mu.hat, M) { ntotal <- sum(sapply(M, "[[", "freq")) nvar <- length(Mu.hat) npatterns <- length(M) dx.Sigma <- matrix(0, nvar, nvar) dx.Mu <- matrix(0, nvar, 1) for(p in 1:npatterns) { SX <- M[[p]][["SY"]] MX <- M[[p]][["MY"]] nobs <- M[[p]][["freq"]] var.idx <- M[[p]][["var.idx"]] Sigma.inv <- inv.chol(Sigma.hat[var.idx, var.idx], logdet=FALSE) Mu <- Mu.hat[var.idx] TT <- SX + tcrossprod(MX - Mu) dx.Mu[var.idx, 1] <- ( dx.Mu[var.idx, 1] + nobs/ntotal * -2 * t(t(MX - Mu) %*% Sigma.inv) ) dx.Sigma[var.idx, var.idx] <- ( dx.Sigma[var.idx, var.idx] - nobs/ntotal * 2 * # in the 'textbook' formula's, the Sigma.inv below is often # replaced by [0.5 * D'(Sigma.inv %x% Sigma.inv) D] # but we do not use the 'vecs' notation here, and # we 'compensate' for the symmetry later on (Sigma.inv %*% (TT - Sigma.hat[var.idx,var.idx]) %*% Sigma.inv ) ) } # compensate for symmetry diag(dx.Sigma) <- diag(dx.Sigma)/2 out <- list(dx.mu=dx.Mu, dx.Sigma=dx.Sigma) out } # X <- matrix(rnorm(200*3), ncol=3) # X2 <- matrix(rnorm(200*3), ncol=3) # Sigma <- cov(X2); Mu <- colMeans(X2) # logl of the MVM, no constants, factor 0.5, factor -1 # summary statistics version logl.MVN.complete <- function(Sigma, Mu, X=NULL, data.cov=NULL, data.mean=NULL) { if(is.null(data.cov)) { stopifnot(!is.null(X)) nobs <- nrow(X) data.cov <- cov(X) * (nobs-1)/nobs data.mean <- colMeans(X) } Sigma.inv <- inv.chol(Sigma, logdet=TRUE) Sigma.log.det <- attr(Sigma.inv, "logdet") diff <- as.matrix(data.mean - Mu) TT <- data.cov + tcrossprod(diff) logl <- Sigma.log.det + sum(TT * Sigma.inv) # - S.log.det - nvar logl <- 0.5 * logl logl } # logl of the MVM, no constant, factor 0.5, factor -1, factor 1/nobs # case-wise version logl.MVN.casewise <- function(Sigma, Mu, X) { Sigma.inv <- inv.chol(Sigma, logdet=TRUE) Sigma.log.det <- attr(Sigma.inv, "logdet") tmp1 <- tmp2 <- logl <- 0.0 for(i in 1:nrow(X)) { diff <- as.matrix(X[i,] - Mu) Tmp1 <- Sigma.log.det Tmp2 <- (t(diff) %*% Sigma.inv %*% diff) tmp1 <- tmp1 + Tmp1 tmp2 <- tmp2 + Tmp2 } logl <- -1 * as.numeric( -0.5 * tmp1 -0.5* tmp2 ) * 1/nrow(X) logl } # numerical/analytical hessian saturated model under MVN hessian.MVN.saturated <- function(Sigma=NULL, Mu=NULL, X=NULL, data.cov=NULL, data.mean=NULL, meanstructure=TRUE, analytical=TRUE) { if(is.null(data.cov)) { stopifnot(!is.null(X)) nobs <- nrow(X) data.cov <- cov(X) * (nobs-1)/nobs data.mean <- colMeans(X) } nvar <- ncol(data.cov) #if(!analytical) { # lower.idx <- which(lower.tri(Sigma, diag = TRUE)) # upper.idx <- which(upper.tri(t(Sigma), diag = TRUE)) # # x2param <- function(x) { # if(meanstructure) { # mu <- x[1:nvar] # sigma.el <- x[-(1:nvar)] # } else { # mu <- data.mean # sigma.el <- x # } # sigma <- matrix(0, nvar, nvar) # sigma[lower.idx] <- sigma.el # sigma[upper.idx] <- t(sigma)[upper.idx] # list(mu = mu, sigma = sigma) # } # # param2x <- function(Sigma, Mu) { # if(meanstructure) { # x1 <- as.numeric(Mu) # x2 <- lav_matrix_vech(Sigma) # x <- c(x1, x2) # } else { # x <- lav_matrix_vech(Sigma) # } # x # } # # objective.function <- function(x) { # out <- x2param(x) # mu.local <- out$mu # sigma.local <- out$sigma # fx <- logl.MVN.complete(Sigma=sigma.local, Mu=mu.local, # data.cov=data.cov, data.mean=data.mean) # fx # } # # # compute numerical approximation of the Hessian # H <- numDeriv::hessian(func=objective.function, x=param2x(Sigma,Mu)) # #} else { Sigma.inv <- inv.chol(Sigma, logdet=FALSE) if(meanstructure) { diff <- as.matrix(data.mean - Mu) TT <- data.cov + tcrossprod(diff) H11 <- inv.chol(Sigma, logdet=FALSE) tmp <- t(diff) %*% Sigma.inv H12 <- lav_matrix_duplication_post(Sigma.inv %x% tmp) H21 <- t(H12) tmp <- (Sigma.inv %*% TT %*% Sigma.inv) - 0.5*Sigma.inv H22 <- lav_matrix_duplication_pre_post(Sigma.inv %x% tmp) H <- rbind( cbind(H11, H12), cbind(H21, H22) ) } else { TT <- data.cov tmp <- (Sigma.inv %*% TT %*% Sigma.inv) - 0.5*Sigma.inv H <- lav_matrix_duplication_pre_post(Sigma.inv %x% tmp) } # } H } lavaan/R/lav_modification.R0000644000176200001440000002306413053017773015362 0ustar liggesusers# univariate modification indices # modindices <- function(object, standardized = TRUE, # power statistics? power = FALSE, delta = 0.1, alpha = 0.05, high.power = 0.75, # customize output sort. = FALSE, minimum.value = 0.0, maximum.number = nrow(LIST), free.remove = TRUE, na.remove = TRUE, op = NULL) { # check if model has converged if(object@optim$npar > 0L && !object@optim$converged) { warning("lavaan WARNING: model did not converge") } # not ready for estimator = "PML" if(object@Options$estimator == "PML") { stop("lavaan WARNING: modification indices for estimator PML are not implemented yet.") } # sanity check if(power) { standardized <- TRUE } # extended list (fixed-to-zero parameters) strict.exo <- FALSE if(object@Model@fixed.x && object@Model@categorical) { strict.exo <- TRUE ## truly conditional.x } FULL <- lav_partable_full(object@ParTable, free = TRUE, start = TRUE, strict.exo = strict.exo) FULL$free <- rep(1L, nrow(FULL)) FULL$user <- rep(10L, nrow(FULL)) FIT <- lav_object_extended(object, add = FULL, all.free = TRUE) LIST <- FIT@ParTable # compute information matrix 'extended model' # ALWAYS use *expected* information (for now) information <- lavTech(FIT, "information.expected") # compute gradient 'extended model' score <- lavTech(FIT, "gradient") # Saris, Satorra & Sorbom 1987 # partition Q into Q_11, Q_22 and Q_12/Q_21 # which elements of Q correspond with 'free' and 'nonfree' parameters? model.idx <- LIST$free[ LIST$free > 0L & LIST$user != 10L ] extra.idx <- LIST$free[ LIST$free > 0L & LIST$user == 10L ] # catch empty extra.idx (no modification indices!) if(length(extra.idx) == 0L) { # 2 possibilities: either model is saturated, or we have constraints if(object@test[[1]]$df == 0) { warning("lavaan WARNING: list with extra parameters is empty; model is saturated") } else { warning("lavaan WARNING: list with extra parameters is empty; to release equality\n constraints, use lavTestScore()") } LIST <- data.frame(lhs = character(0), op = character(0), rhs = character(0), group = integer(0), mi = numeric(0), epc = numeric(0), sepc.lv = numeric(0), sepc.all = numeric(0), sepc.nox = numeric(0)) return(LIST) } # partition I11 <- information[extra.idx, extra.idx, drop = FALSE] I12 <- information[extra.idx, model.idx, drop = FALSE] I21 <- information[model.idx, extra.idx, drop = FALSE] I22 <- information[model.idx, model.idx, drop = FALSE] # ALWAYS use *expected* information (for now) I22.inv <- lavTech(object, "inverted.information.expected") # just in case... if(inherits(I22.inv, "try-error")) { stop("lavaan ERROR: could not compute modification indices; information matrix is singular") } V <- I11 - I12 %*% I22.inv %*% I21 V.diag <- diag(V) # dirty hack: catch very small or negative values in diag(V) # this is needed eg when parameters are not identified if freed-up; idx <- which(V.diag < sqrt(.Machine$double.eps)) if(length(idx) > 0L) { V.diag[idx] <- as.numeric(NA) } # create and fill in mi N <- object@SampleStats@ntotal mi <- numeric( length(score) ) mi[extra.idx] <- N * (score[extra.idx]*score[extra.idx]) / V.diag if(length(model.idx) > 0L) { mi[model.idx] <- N * (score[model.idx]*score[model.idx]) / diag(I22) } LIST$mi <- rep(as.numeric(NA), length(LIST$lhs)) LIST$mi[ LIST$free > 0 ] <- mi # handle equality constraints (if any) #eq.idx <- which(LIST$op == "==") #if(length(eq.idx) > 0L) { # OUT <- lavTestScore(object, warn = FALSE) # LIST$mi[ eq.idx ] <- OUT$uni$X2 #} # scaled? if(length(object@test) > 1L) { LIST$mi.scaled <- LIST$mi / object@test[[2]]$scaling.factor } # EPC d <- (-1 * N) * score # needed? probably not; just in case d[which(abs(d) < 1e-15)] <- 1.0 LIST$epc[ LIST$free > 0 ] <- mi/d # standardize? if(standardized) { # two problems: # - EPC of variances can be negative, and that is # perfectly legal # - EPC (of variances) can be tiny (near-zero), and we should # not divide by tiny variables EPC <- LIST$epc small.idx <- which(LIST$op == "~~" & LIST$lhs == LIST$rhs & abs(EPC) < sqrt( .Machine$double.eps ) ) if(length(small.idx) > 0L) { EPC[ small.idx ] <- as.numeric(NA) } # get the sign EPC.sign <- sign(LIST$epc) LIST$sepc.lv <- EPC.sign * standardize.est.lv(object, partable = LIST, est = abs(EPC), cov.std = FALSE) if(length(small.idx) > 0L) { LIST$sepc.lv[small.idx] <- 0 } LIST$sepc.all <- EPC.sign * standardize.est.all(object, partable = LIST, est = abs(EPC), cov.std = FALSE) if(length(small.idx) > 0L) { LIST$sepc.all[small.idx] <- 0 } LIST$sepc.nox <- EPC.sign * standardize.est.all.nox(object, partable = LIST, est = abs(EPC), cov.std = FALSE) if(length(small.idx) > 0L) { LIST$sepc.nox[small.idx] <- 0 } } # power? if(power) { LIST$delta <- delta # FIXME: this is using epc in unstandardized metric # this would be much more useful in standardized metric # we need a standardize.est.all.reverse function... LIST$ncp <- (LIST$mi / (LIST$epc*LIST$epc)) * (delta*delta) LIST$power <- 1 - pchisq(qchisq((1.0 - alpha), df=1), df=1, ncp=LIST$ncp) LIST$decision <- character( length(LIST$power) ) # five possibilities (Table 6 in Saris, Satorra, van der Veld, 2009) mi.significant <- ifelse( 1 - pchisq(LIST$mi, df=1) < alpha, TRUE, FALSE ) high.power <- LIST$power > high.power # FIXME: sepc.all or epc?? #epc.high <- LIST$sepc.all > LIST$delta epc.high <- LIST$epc > LIST$delta LIST$decision[ which(!mi.significant & !high.power)] <- "(i)" LIST$decision[ which( mi.significant & !high.power)] <- "**(m)**" LIST$decision[ which(!mi.significant & high.power)] <- "(nm)" LIST$decision[ which( mi.significant & high.power & !epc.high)] <- "epc:nm" LIST$decision[ which( mi.significant & high.power & epc.high)] <- "*epc:m*" #LIST$decision[ which(mi.significant & high.power) ] <- "epc" #LIST$decision[ which(mi.significant & !high.power) ] <- "***" #LIST$decision[ which(!mi.significant & !high.power) ] <- "(i)" } # remove some columns LIST$id <- LIST$ustart <- LIST$exo <- LIST$label <- LIST$plabel <- NULL LIST$start <- LIST$free <- LIST$est <- LIST$se <- LIST$prior <- NULL if(power) { LIST$sepc.lv <- LIST$sepc.nox <- NULL } # create data.frame LIST <- as.data.frame(LIST, stringsAsFactors = FALSE) class(LIST) <- c("lavaan.data.frame", "data.frame") # remove rows corresponding to 'old' free parameters if(free.remove) { old.idx <- which(LIST$user != 10L) if(length(old.idx) > 0L) { LIST <- LIST[-old.idx,] } } # remove rows corresponding to 'equality' constraints eq.idx <- which(LIST$op == "==") if(length(eq.idx) > 0L) { LIST <- LIST[-eq.idx,] } # remove even more columns LIST$user <- NULL # remove block/group/level is only single block if(lav_partable_nblocks(LIST) == 1L) { LIST$block <- NULL LIST$group <- NULL LIST$level <- NULL } # sort? if(sort.) { LIST <- LIST[order(LIST$mi, decreasing = TRUE),] } if(minimum.value > 0.0) { LIST <- LIST[!is.na(LIST$mi) & LIST$mi > minimum.value,] } if(maximum.number < nrow(LIST)) { LIST <- LIST[seq_len(maximum.number),] } if(na.remove) { idx <- which(is.na(LIST$mi)) if(length(idx) > 0) { LIST <- LIST[-idx,] } } if(!is.null(op)) { idx <- LIST$op %in% op if(length(idx) > 0) { LIST <- LIST[idx,] } } # add header # TODO: small explanation of the columns in the header? # attr(LIST, "header") <- # c("modification indices for newly added parameters only; to\n", # "see the effects of releasing equality constraints, use the\n", # "lavTestScore() function") LIST } # aliases modificationIndices <- modificationindices <- modindices lavaan/R/lav_lavaanList_methods.R0000644000176200001440000001372512741204533016534 0ustar liggesusers# methods setMethod("show", "lavaanList", function(object) { # show only basic information lav_lavaanList_short_summary(object, print = TRUE) }) lav_lavaanList_short_summary <- function(object, print = TRUE) { txt <- sprintf("lavaanList (%s) -- based on %d datasets (%d converged)\n", packageDescription("lavaan", fields="Version"), object@meta$ndat, sum(object@meta$ok)) if(print) { cat(txt) } invisible(txt) } setMethod("summary", "lavaanList", function(object, header = TRUE, estimates = TRUE, print = TRUE, nd = 3L) { lav_lavaanList_summary(object, header = header, estimates = estimates, print = print, nd = nd) }) lav_lavaanList_summary <- function(object, header = TRUE, estimates = TRUE, est.bias = TRUE, se.bias = TRUE, zstat = TRUE, pvalue = TRUE, print = TRUE, nd = 3L) { output <- list() if(header) { output$header <- lav_lavaanList_short_summary(object, print = print) #if(print) { # # show only basic information # lav_lavaanList_short_summary(object) #} } if(estimates && "partable" %in% object@meta$store.slots) { pe <- parameterEstimates(object, se = FALSE, # zstat = FALSE, pvalue = FALSE, ci = FALSE, standardized = FALSE, add.attributes = print) # scenario 1: simulation if(!is.null(object@meta$lavSimulate)) { pe$est.true <- object@meta$est.true # EST EST <- lav_lavaanList_partable(object, what = "est", type = "all") pe$est.ave <- rowMeans(EST) if(est.bias) { pe$est.bias <- pe$est.true - pe$est.ave } # SE? if(se.bias) { pe$se.obs <- apply(EST, 1L, sd) SE <- lav_lavaanList_partable(object, what = "se", type = "all") pe$se.ave <- rowMeans(SE) pe$se.bias <- pe$se.obs - pe$se.ave } # scenario 2: bootstrap } else if(!is.null(object@meta$lavBootstrap)) { # print the average value for est EST <- lav_lavaanList_partable(object, what = "est", type = "all") pe$est.ave <- rowMeans(EST) # scenario 3: multiple imputation } else if(!is.null(object@meta$lavMultipleImputation)) { # pool est: take the mean EST <- lav_lavaanList_partable(object, what = "est", type = "all") m <- NCOL(EST) pe$est <- rowMeans(EST) # pool se # between-imputation variance #B.var <- apply(EST, 1L, var) est1 <- rowMeans(EST); est2 <- rowMeans(EST^2) B.var <- (est2 - est1*est1) * m/(m-1) # within-imputation variance SE <- lav_lavaanList_partable(object, what = "se", type = "all") W.var <- rowMeans(SE^2) # total variance: T.var = W.var + B.var + B.var/m pe$se <- sqrt(W.var + B.var + (B.var / m)) tmp.se <- ifelse(pe$se == 0.0, NA, pe$se) if(zstat) { pe$z <- pe$est / tmp.se if(pvalue) { pe$pvalue <- 2 * (1 - pnorm( abs(pe$z) )) } } # scenario 4: multiple groups/sets } else if(!is.null(object@meta$lavMultipleGroups)) { # show individual estimates, for each group # browser() EST <- lav_lavaanList_partable(object, what = "est", type = "all") EST <- as.list(as.data.frame(EST)) ngroups <- length(EST) names(EST) <- object@meta$group.label ATTR <- attributes(pe) NAMES <- c(names(pe), names(EST)) pe <- c(pe, EST) attributes(pe) <- ATTR names(pe) <- NAMES } # scenarior 5: just a bunch of fits, using different datasets else { # print the average value for est EST <- lav_lavaanList_partable(object, what = "est", type = "all") pe$est.ave <- rowMeans(EST) # more? } output$pe <- pe if(print) { # print pe? print(pe, nd = nd) } } else { cat("available slots (per dataset) are:\n") print(object@meta$store.slots) } invisible(output) } setMethod("coef", "lavaanList", function(object, type = "free", labels = TRUE) { lav_lavaanList_partable(object = object, what = "est", type = type, labels = labels) }) lav_lavaanList_partable <- function(object, what = "est", type = "free", labels = TRUE) { if("partable" %in% object@meta$store.slots) { if(what %in% names(object@ParTableList[[1]])) { OUT <- sapply(object@ParTableList, "[[", what) } else { stop("lavaan ERROR: column `", what, "' not found in the first element of the ParTableList slot.") } } else { stop("lavaan ERROR: no ParTable slot stored in lavaanList object") } if(type == "user" || type == "all") { type <- "user" idx <- 1:length( object@ParTable$lhs ) } else if(type == "free") { idx <- which(object@ParTable$free > 0L & !duplicated(object@ParTable$free)) } else { stop("lavaan ERROR: argument `type' must be one of free or user") } OUT <- OUT[idx, , drop = FALSE] if(labels) { rownames(OUT) <- lav_partable_labels(object@ParTable, type = type) } OUT } lavaan/R/lav_object_methods.R0000644000176200001440000011115413054026503015674 0ustar liggesusers# # initial version: YR 25/03/2009 short.summary <- function(object) { # catch FAKE run FAKE <- FALSE if(object@Options$optim.method == "none") { FAKE <- TRUE } # Convergence or not? if(FAKE) { cat(sprintf("lavaan (%s) -- DRY RUN with 0 iterations\n", packageDescription("lavaan", fields="Version"))) } else if(object@optim$iterations > 0) { if(object@optim$converged) { cat(sprintf("lavaan (%s) converged normally after %3i iterations\n", packageDescription("lavaan", fields="Version"), object@optim$iterations)) } else { cat(sprintf("** WARNING ** lavaan (%s) did NOT converge after %i iterations\n", packageDescription("lavaan", fields="Version"), object@optim$iterations)) cat("** WARNING ** Estimates below are most likely unreliable\n") } } else { cat(sprintf("** WARNING ** lavaan (%s) model has NOT been fitted\n", packageDescription("lavaan", fields="Version"))) cat("** WARNING ** Estimates below are simply the starting values\n") } cat("\n") # number of free parameters #t0.txt <- sprintf(" %-40s", "Number of free parameters") #t1.txt <- sprintf(" %10i", object@optim$npar) #t2.txt <- "" #cat(t0.txt, t1.txt, t2.txt, "\n", sep="") #cat("\n") # listwise deletion? listwise <- FALSE for(g in 1:object@Data@ngroups) { if(object@Data@nobs[[1L]] != object@Data@norig[[1L]]) { listwise <- TRUE break } } if(object@Data@ngroups == 1L) { if(listwise) { cat(sprintf(" %-40s", ""), sprintf(" %10s", "Used"), sprintf(" %10s", "Total"), "\n", sep="") } t0.txt <- sprintf(" %-40s", "Number of observations") t1.txt <- sprintf(" %10i", object@Data@nobs[[1L]]) t2.txt <- ifelse(listwise, sprintf(" %10i", object@Data@norig[[1L]]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } else { if(listwise) { cat(sprintf(" %-40s", ""), sprintf(" %10s", "Used"), sprintf(" %10s", "Total"), "\n", sep="") } t0.txt <- sprintf(" %-40s", "Number of observations per group") cat(t0.txt, "\n") for(g in 1:object@Data@ngroups) { t.txt <- sprintf(" %-40s %10i", object@Data@group.label[[g]], object@Data@nobs[[g]]) t2.txt <- ifelse(listwise, sprintf(" %10i", object@Data@norig[[g]]), "") cat(t.txt, t2.txt, "\n", sep="") } } cat("\n") # missing patterns? if(object@SampleStats@missing.flag) { if(object@Data@ngroups == 1L) { t0.txt <- sprintf(" %-40s", "Number of missing patterns") t1.txt <- sprintf(" %10i", object@Data@Mp[[1L]]$npatterns) cat(t0.txt, t1.txt, "\n\n", sep="") } else { t0.txt <- sprintf(" %-40s", "Number of missing patterns per group") cat(t0.txt, "\n") for(g in 1:object@Data@ngroups) { t.txt <- sprintf(" %-40s %10i", object@Data@group.label[[g]], object@Data@Mp[[g]]$npatterns) cat(t.txt, "\n", sep="") } cat("\n") } } # Print Chi-square value for the user-specified (full/h0) model # robust/scaled statistics? if(object@Options$test %in% c("satorra.bentler", "yuan.bentler", "mean.var.adjusted", "scaled.shifted") && length(object@test) > 1L) { scaled <- TRUE if(object@Options$test == "scaled.shifted") shifted <- TRUE else shifted <- FALSE } else { scaled <- FALSE shifted <- FALSE } # 0. heading #h.txt <- sprintf("\nChi-square test user model (h0)", # object@Options$estimator) t0.txt <- sprintf(" %-40s", "Estimator") t1.txt <- sprintf(" %10s", object@Options$estimator) t2.txt <- ifelse(scaled, sprintf(" %10s", "Robust"), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") # check if test == "none" if(object@Options$test != "none" && object@Options$estimator != "MML") { # 1. chi-square values t0.txt <- sprintf(" %-40s", "Minimum Function Test Statistic") t1.txt <- sprintf(" %10.3f", object@test[[1]]$stat) t2.txt <- ifelse(scaled, sprintf(" %10.3f", object@test[[2]]$stat), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") # 2. degrees of freedom t0.txt <- sprintf(" %-40s", "Degrees of freedom") t1.txt <- sprintf(" %10i", object@test[[1]]$df) t2.txt <- ifelse(scaled, ifelse(round(object@test[[2]]$df) == object@test[[2]]$df, sprintf(" %10i", object@test[[2]]$df), sprintf(" %10.3f", object@test[[2]]$df)), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") # 3. P-value if(is.na(object@test[[1]]$df)) { t0.txt <- sprintf(" %-40s", "P-value") t1.txt <- sprintf(" %10.3f", object@test[[1]]$pvalue) t2.txt <- ifelse(scaled, sprintf(" %10.3f", object@test[[2]]$pvalue), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } else if(object@test[[1]]$df > 0) { if(object@test[[1]]$refdistr == "chisq") { t0.txt <- sprintf(" %-40s", "P-value (Chi-square)") } else if(length(object@test) == 1L && object@test[[1]]$refdistr == "unknown") { t0.txt <- sprintf(" %-40s", "P-value (Unknown)") } else { t0.txt <- sprintf(" %-40s", "P-value") } t1.txt <- sprintf(" %10.3f", object@test[[1]]$pvalue) t2.txt <- ifelse(scaled, sprintf(" %10.3f", object@test[[2]]$pvalue), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } else { # FIXME: should we do this? To warn that exact 0.0 was not obtained? if(object@optim$fx > 0) { t0.txt <- sprintf(" %-35s", "Minimum Function Value") t1.txt <- sprintf(" %15.13f", object@optim$fx) t2.txt <- "" cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } } # 3b. Do we have a Bollen-Stine p-value? if(object@Options$test == "bollen.stine") { t0.txt <- sprintf(" %-40s", "P-value (Bollen-Stine Bootstrap)") t1.txt <- sprintf(" %10.3f", object@test[[2]]$pvalue) cat(t0.txt, t1.txt, "\n", sep="") } # 4. Scaling correction factor if(scaled) { t0.txt <- sprintf(" %-40s", "Scaling correction factor") t1.txt <- sprintf(" %10s", "") t2.txt <- sprintf(" %10.3f", object@test[[2]]$scaling.factor) cat(t0.txt, t1.txt, t2.txt, "\n", sep="") if(object@Options$test == "yuan.bentler") { if(object@Options$mimic == "Mplus") { cat(" for the Yuan-Bentler correction (Mplus variant)\n") } else { cat(" for the Yuan-Bentler correction\n") } } else if(object@Options$test == "satorra.bentler") { if(object@Options$mimic == "Mplus" && object@Options$estimator == "ML") { cat(" for the Satorra-Bentler correction (Mplus variant)\n") } else if(object@Options$mimic == "Mplus" && object@Options$estimator == "DWLS") { cat(" for the Satorra-Bentler correction (WLSM)\n") } else if(object@Options$mimic == "Mplus" && object@Options$estimator == "ULS") { cat(" for the Satorra-Bentler correction (ULSM)\n") } else { cat(" for the Satorra-Bentler correction\n") } } else if(object@Options$test == "mean.var.adjusted") { if(object@Options$mimic == "Mplus" && object@Options$estimator == "ML") { cat(" for the mean and variance adjusted correction (MLMV)\n") } else if(object@Options$mimic == "Mplus" && object@Options$estimator == "DWLS") { cat(" for the mean and variance adjusted correction (WLSMV)\n") } else if(object@Options$mimic == "Mplus" && object@Options$estimator == "ULS") { cat(" for the mean and variance adjusted correction (ULSMV)\n") } else { cat(" for the mean and variance adjusted correction\n") } } } # 4b. Shift parameter? if(shifted) { if(object@Data@ngroups == 1L) { t0.txt <- sprintf(" %-40s", "Shift parameter") t1.txt <- sprintf(" %10s", "") t2.txt <- sprintf(" %10.3f", object@test[[2]]$shift.parameter) cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } else { # multiple groups, multiple shift values! cat(" Shift parameter for each group:\n") for(g in 1:object@Data@ngroups) { t0.txt <- sprintf(" %-38s", object@Data@group.label[[g]]) t1.txt <- sprintf(" %10s", "") t2.txt <- sprintf(" %10.3f", object@test[[2]]$shift.parameter[g]) cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } } if(object@Options$mimic == "Mplus" && object@Options$estimator == "DWLS") { cat(" for simple second-order correction (WLSMV)\n") } else { cat(" for simple second-order correction (Mplus variant)\n") } } if(object@Data@ngroups > 1L) { cat("\n") cat("Chi-square for each group:\n\n") for(g in 1:object@Data@ngroups) { t0.txt <- sprintf(" %-40s", object@Data@group.label[[g]]) t1.txt <- sprintf(" %10.3f", object@test[[1]]$stat.group[g]) t2.txt <- ifelse(scaled, sprintf(" %10.3f", object@test[[2]]$stat.group[g]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } } } # test != none if(object@Options$estimator == "MML") { fm <- fitMeasures(object, c("logl", "npar", "aic", "bic", "bic2")) print.fit.measures(fm) } #cat("\n") } setMethod("show", "lavaan", function(object) { # show only basic information short.summary(object) }) setMethod("summary", "lavaan", function(object, header = TRUE, fit.measures = FALSE, estimates = TRUE, ci = FALSE, fmi = FALSE, standardized = FALSE, rsquare = FALSE, std.nox = FALSE, modindices = FALSE, nd = 3L) { if(std.nox) standardized <- TRUE # print the 'short' summary if(header) { short.summary(object) } # only if requested, the fit measures if(fit.measures) { if(object@Options$test == "none") { warning("lavaan WARNING: fit measures not available if test = \"none\"\n\n") } else if(object@optim$npar > 0L && !object@optim$converged) { warning("lavaan WARNING: fit measures not available if model did not converge\n\n") } else { print.fit.measures( fitMeasures(object, fit.measures="default") ) } } if(estimates) { PE <- parameterEstimates(object, ci = ci, standardized = standardized, rsquare = rsquare, fmi = fmi, remove.eq = FALSE, remove.system.eq = TRUE, remove.ineq = FALSE, remove.def = FALSE, add.attributes = TRUE) if(standardized && std.nox) { PE$std.all <- PE$std.nox } print(PE, nd = nd) } # modification indices? if(modindices) { cat("Modification Indices:\n\n") print( modificationIndices(object, standardized=TRUE) ) } }) setMethod("coef", "lavaan", function(object, type="free", labels=TRUE) { if(type == "user" || type == "all") { type <- "user" idx <- 1:length( object@ParTable$lhs ) } else if(type == "free") { idx <- which(object@ParTable$free > 0L & !duplicated(object@ParTable$free)) } else { stop("lavaan ERROR: argument `type' must be one of free or user") } EST <- lav_object_inspect_est(object) cof <- EST[idx] # labels? if(labels) names(cof) <- lav_partable_labels(object@ParTable, type=type) # class class(cof) <- c("lavaan.vector", "numeric") cof }) standardizedSolution <- standardizedsolution <- function(object, type = "std.all", se = TRUE, zstat = TRUE, pvalue = TRUE, remove.eq = TRUE, remove.ineq = TRUE, remove.def = FALSE, GLIST = NULL, est = NULL) { stopifnot(type %in% c("std.all", "std.lv", "std.nox")) # no zstat + pvalue if estimator is Bayes if(object@Options$estimator == "Bayes") { zstat <- pvalue <- FALSE } # no se if class is not lavaan if(class(object) != "lavaan") { if(missing(se) || !se) { se <- FALSE zstat <- FALSE pvalue <- FALSE } } PARTABLE <- inspect(object, "list") free.idx <- which(PARTABLE$free > 0L) LIST <- PARTABLE[,c("lhs", "op", "rhs")] if(!is.null(PARTABLE$group)) { LIST$group <- PARTABLE$group } # add std and std.all columns if(type == "std.lv") { LIST$est.std <- standardize.est.lv(object, est = est, GLIST = GLIST) } else if(type == "std.all") { LIST$est.std <- standardize.est.all(object, est = est, GLIST = GLIST) } else if(type == "std.nox") { LIST$est.std <- standardize.est.all.nox(object, est = est, GLIST = GLIST) } if(object@Options$se != "none" && se) { # add 'se' for standardized parameters VCOV <- try(lav_object_inspect_vcov(object, standardized = TRUE, type = type, free.only = FALSE, add.labels = FALSE, add.class = FALSE)) if(inherits(VCOV, "try-error")) { LIST$se <- rep(NA, length(LIST$lhs)) if(zstat) { LIST$z <- rep(NA, length(LIST$lhs)) } if(pvalue) { LIST$pvalue <- rep(NA, length(LIST$lhs)) } } else { tmp <- diag(VCOV) # catch negative values min.idx <- which(tmp < 0) if(length(min.idx) > 0L) { tmp[min.idx] <- as.numeric(NA) } # now, we can safely take the square root tmp <- sqrt(tmp) # catch near-zero SEs zero.idx <- which(tmp < sqrt(.Machine$double.eps)) if(length(zero.idx) > 0L) { tmp[zero.idx] <- 0.0 } LIST$se <- tmp # add 'z' column if(zstat) { tmp.se <- ifelse( LIST$se == 0.0, NA, LIST$se) LIST$z <- LIST$est.std / tmp.se } if(zstat && pvalue) { LIST$pvalue <- 2 * (1 - pnorm( abs(LIST$z) )) } } } # if single group, remove group column if(object@Data@ngroups == 1L) LIST$group <- NULL # remove == rows? if(remove.eq) { eq.idx <- which(LIST$op == "==") if(length(eq.idx) > 0L) { LIST <- LIST[-eq.idx,] } } # remove <> rows? if(remove.ineq) { ineq.idx <- which(LIST$op == "<" || LIST$op == ">") if(length(ineq.idx) > 0L) { LIST <- LIST[-ineq.idx,] } } # remove := rows? if(remove.def) { def.idx <- which(LIST$op == ":=") if(length(def.idx) > 0L) { LIST <- LIST[-def.idx,] } } # always add attributes (for now) class(LIST) <- c("lavaan.data.frame", "data.frame") LIST } parameterEstimates <- parameterestimates <- function(object, se = TRUE, zstat = TRUE, pvalue = TRUE, ci = TRUE, level = 0.95, boot.ci.type = "perc", standardized = FALSE, fmi = FALSE, remove.system.eq = TRUE, remove.eq = TRUE, remove.ineq = TRUE, remove.def = FALSE, rsquare = FALSE, add.attributes = FALSE) { if("lavaan.fsr" %in% class(object)) { return(object$PE) } # no se if class is not lavaan if(class(object) != "lavaan") { if(missing(se) || !se) { se <- FALSE zstat <- FALSE pvalue <- FALSE } } # check fmi if(fmi) { if(inherits(object, "lavaanList")) { warning("lavaan WARNING: fmi not available for object of class \"lavaanList\"") fmi <- FALSE } if(object@Options$se != "standard") { warning("lavaan WARNING: fmi only available if se = \"standard\"") fmi <- FALSE } if(object@Options$estimator != "ML") { warning("lavaan WARNING: fmi only available if estimator = \"ML\"") fmi <- FALSE } if(!object@SampleStats@missing.flag) { warning("lavaan WARNING: fmi only available if missing = \"(fi)ml\"") fmi <- FALSE } if(!object@optim$converged) { warning("lavaan WARNING: fmi not available; model did not converge") fmi <- FALSE } } # no zstat + pvalue if estimator is Bayes if(object@Options$estimator == "Bayes") { zstat <- pvalue <- FALSE } PARTABLE <- as.data.frame(object@ParTable, stringsAsFactors = FALSE) LIST <- PARTABLE[,c("lhs", "op", "rhs")] if(!is.null(PARTABLE$user)) { LIST$user <- PARTABLE$user } if(!is.null(PARTABLE$block)) { LIST$block <- PARTABLE$block } else { LIST$block <- rep(1L, length(LIST$lhs)) } if(!is.null(PARTABLE$level)) { LIST$level <- PARTABLE$level } else { LIST$level <- rep(1L, length(LIST$lhs)) } if(!is.null(PARTABLE$group)) { LIST$group <- PARTABLE$group } else { LIST$group <- rep(1L, length(LIST$lhs)) } if(!is.null(PARTABLE$label)) { LIST$label <- PARTABLE$label } else { LIST$label <- rep("", length(LIST$lhs)) } if(!is.null(PARTABLE$exo)) { LIST$exo <- PARTABLE$exo } else { LIST$exo <- rep(0L, length(LIST$lhs)) } if(inherits(object, "lavaanList")) { # per default: nothing! #if("partable" %in% object@meta$store.slots) { # COF <- sapply(object@ParTableList, "[[", "est") # LIST$est <- rowMeans(COF) #} LIST$est <- NULL } else if(!is.null(PARTABLE$est)) { LIST$est <- PARTABLE$est } else { LIST$est <- lav_model_get_parameters(object@Model, type = "user", extra = TRUE) } # add se, zstat, pvalue if(se && object@Options$se != "none") { LIST$se <- lav_object_inspect_se(object) tmp.se <- ifelse(LIST$se == 0.0, NA, LIST$se) if(zstat) { LIST$z <- LIST$est / tmp.se if(pvalue) { LIST$pvalue <- 2 * (1 - pnorm( abs(LIST$z) )) } } } # extract bootstrap data (if any) BOOT <- lav_object_inspect_boot(object) bootstrap.successful <- NROW(BOOT) # should be zero if NULL # confidence interval if(se && object@Options$se != "none" && ci) { # next three lines based on confint.lm a <- (1 - level)/2; a <- c(a, 1 - a) if(object@Options$se != "bootstrap") { fac <- qnorm(a) ci <- LIST$est + LIST$se %o% fac } else if(object@Options$se == "bootstrap") { # local copy of 'norm.inter' from boot package (not exported!) norm.inter <- function(t, alpha) { t <- t[is.finite(t)]; R <- length(t); rk <- (R + 1) * alpha if (!all(rk > 1 & rk < R)) warning("extreme order statistics used as endpoints") k <- trunc(rk); inds <- seq_along(k) out <- inds; kvs <- k[k > 0 & k < R] tstar <- sort(t, partial = sort(union(c(1, R), c(kvs, kvs+1)))) ints <- (k == rk) if (any(ints)) out[inds[ints]] <- tstar[k[inds[ints]]] out[k == 0] <- tstar[1L] out[k == R] <- tstar[R] not <- function(v) xor(rep(TRUE,length(v)),v) temp <- inds[not(ints) & k != 0 & k != R] temp1 <- qnorm(alpha[temp]) temp2 <- qnorm(k[temp]/(R+1)) temp3 <- qnorm((k[temp]+1)/(R+1)) tk <- tstar[k[temp]] tk1 <- tstar[k[temp]+1L] out[temp] <- tk + (temp1-temp2)/(temp3-temp2)*(tk1 - tk) cbind(round(rk, 2), out) } stopifnot(!is.null(BOOT)) stopifnot(boot.ci.type %in% c("norm","basic","perc","bca.simple")) if(boot.ci.type == "norm") { fac <- qnorm(a) boot.x <- colMeans(BOOT) boot.est <- lav_model_get_parameters(object@Model, GLIST=lav_model_x2GLIST(object@Model, boot.x), type="user", extra=TRUE) bias.est <- (boot.est - LIST$est) ci <- (LIST$est - bias.est) + LIST$se %o% fac } else if(boot.ci.type == "basic") { ci <- cbind(LIST$est, LIST$est) alpha <- (1 + c(level, -level))/2 # free.idx only qq <- apply(BOOT, 2, norm.inter, alpha) free.idx <- which(object@ParTable$free & !duplicated(object@ParTable$free)) ci[free.idx,] <- 2*ci[free.idx,] - t(qq[c(3,4),]) # def.idx def.idx <- which(object@ParTable$op == ":=") if(length(def.idx) > 0L) { BOOT.def <- apply(BOOT, 1, object@Model@def.function) if(length(def.idx) == 1L) { BOOT.def <- as.matrix(BOOT.def) } else { BOOT.def <- t(BOOT.def) } qq <- apply(BOOT.def, 2, norm.inter, alpha) ci[def.idx,] <- 2*ci[def.idx,] - t(qq[c(3,4),]) } # TODO: add cin/ceq? } else if(boot.ci.type == "perc") { ci <- cbind(LIST$est, LIST$est) alpha <- (1 + c(-level, level))/2 # free.idx only qq <- apply(BOOT, 2, norm.inter, alpha) free.idx <- which(object@ParTable$free & !duplicated(object@ParTable$free)) ci[free.idx,] <- t(qq[c(3,4),]) # def.idx def.idx <- which(object@ParTable$op == ":=") if(length(def.idx) > 0L) { BOOT.def <- apply(BOOT, 1, object@Model@def.function) if(length(def.idx) == 1L) { BOOT.def <- as.matrix(BOOT.def) } else { BOOT.def <- t(BOOT.def) } qq <- apply(BOOT.def, 2, norm.inter, alpha) def.idx <- which(object@ParTable$op == ":=") ci[def.idx,] <- t(qq[c(3,4),]) } # TODO: add cin/ceq? } else if(boot.ci.type == "bca.simple") { # no adjustment for scale!! only bias!! alpha <- (1 + c(-level, level))/2 zalpha <- qnorm(alpha) ci <- cbind(LIST$est, LIST$est) # free.idx only free.idx <- which(object@ParTable$free & !duplicated(object@ParTable$free)) x <- LIST$est[free.idx] for(i in 1:length(free.idx)) { t <- BOOT[,i]; t <- t[is.finite(t)]; t0 <- x[i] w <- qnorm(sum(t < t0)/length(t)) a <- 0.0 #### !!! #### adj.alpha <- pnorm(w + (w + zalpha)/(1 - a*(w + zalpha))) qq <- norm.inter(t, adj.alpha) ci[free.idx[i],] <- qq[,2] } # def.idx def.idx <- which(object@ParTable$op == ":=") if(length(def.idx) > 0L) { x.def <- object@Model@def.function(x) BOOT.def <- apply(BOOT, 1, object@Model@def.function) if(length(def.idx) == 1L) { BOOT.def <- as.matrix(BOOT.def) } else { BOOT.def <- t(BOOT.def) } for(i in 1:length(def.idx)) { t <- BOOT.def[,i]; t <- t[is.finite(t)]; t0 <- x.def[i] w <- qnorm(sum(t < t0)/length(t)) a <- 0.0 #### !!! #### adj.alpha <- pnorm(w + (w + zalpha)/(1 - a*(w + zalpha))) qq <- norm.inter(t, adj.alpha) ci[def.idx[i],] <- qq[,2] } } # TODO: # - add cin/ceq } } LIST$ci.lower <- ci[,1]; LIST$ci.upper <- ci[,2] } # standardized estimates? if(standardized) { LIST$std.lv <- standardize.est.lv(object) LIST$std.all <- standardize.est.all(object, est.std=LIST$est.std) LIST$std.nox <- standardize.est.all.nox(object, est.std=LIST$est.std) } # rsquare? if(rsquare) { r2 <- lavTech(object, "rsquare", add.labels = TRUE) NAMES <- unlist(lapply(r2, names)); nel <- length(NAMES) R2 <- data.frame( lhs = NAMES, op = rep("r2", nel), rhs = NAMES, block = rep(1:length(r2), sapply(r2, length)), est = unlist(r2), stringsAsFactors = FALSE ) LIST <- lav_partable_merge(pt1 = LIST, pt2 = R2, warn = FALSE) } # fractional missing information (if estimator="fiml") if(fmi) { SE.orig <- LIST$se lavmodel <- object@Model; implied <- object@implied COV <- if(lavmodel@conditional.x) implied$res.cov else implied$cov MEAN <- if(lavmodel@conditional.x) implied$res.int else implied$mean # provide rownames for(g in 1:object@Data@ngroups) rownames(COV[[g]]) <- object@Data@ov.names[[g]] # if estimator="ML" and likelihood="normal" --> rescale if(object@Options$estimator == "ML" && object@Options$likelihood == "normal") { for(g in 1:object@Data@ngroups) { N <- object@Data@nobs[[g]] COV[[g]] <- (N+1)/N * COV[[g]] } } # fit another model, using the model-implied moments as input data step2 <- lavaan(slotOptions = object@Options, slotParTable = object@ParTable, sample.cov = COV, sample.mean = MEAN, sample.nobs = object@Data@nobs) SE2 <- lav_object_inspect_se(step2) SE.step2 <- ifelse(SE2 == 0.0, as.numeric(NA), SE2) if(rsquare) { # add additional elements, since LIST$se is now longer r2.idx <- which(LIST$op == "r2") if(length(r2.idx) > 0L) { SE.step2 <- c(SE.step2, rep(as.numeric(NA), length(r2.idx))) } } LIST$fmi <- 1-(SE.step2*SE.step2/(SE.orig*SE.orig)) } # if single level, remove level column if(object@Data@nlevels == 1L) LIST$level <- NULL # if single group, remove group column if(object@Data@ngroups == 1L) LIST$group <- NULL # if single everything, remove block column if(object@Data@nlevels == 1L && object@Data@ngroups == 1L) { LIST$block <- NULL } # if no user-defined labels, remove label column if(sum(nchar(object@ParTable$label)) == 0L) LIST$label <- NULL # remove == rows? if(remove.eq) { eq.idx <- which(LIST$op == "==" & LIST$user == 1L) if(length(eq.idx) > 0L) { LIST <- LIST[-eq.idx,] } } if(remove.system.eq) { eq.idx <- which(LIST$op == "==" & LIST$user != 1L) if(length(eq.idx) > 0L) { LIST <- LIST[-eq.idx,] } } # remove <> rows? if(remove.ineq) { ineq.idx <- which(LIST$op == "<" || LIST$op == ">") if(length(ineq.idx) > 0L) { LIST <- LIST[-ineq.idx,] } } # remove := rows? if(remove.def) { def.idx <- which(LIST$op == ":=") if(length(def.idx) > 0L) { LIST <- LIST[-def.idx,] } } # remove LIST$user LIST$user <- NULL if(add.attributes) { class(LIST) <- c("lavaan.parameterEstimates", "lavaan.data.frame", "data.frame") attr(LIST, "information") <- object@Options$information attr(LIST, "se") <- object@Options$se attr(LIST, "group.label") <- object@Data@group.label attr(LIST, "level.label") <- object@Data@level.label attr(LIST, "bootstrap") <- object@Options$bootstrap attr(LIST, "bootstrap.successful") <- bootstrap.successful attr(LIST, "missing") <- object@Options$missing attr(LIST, "observed.information") <- object@Options$observed.information attr(LIST, "h1.information") <- object@Options$h1.information # FIXME: add more!! } else { LIST$exo <- NULL class(LIST) <- c("lavaan.data.frame", "data.frame") } LIST } parameterTable <- parametertable <- parTable <- partable <- function(object) { # convert to data.frame out <- as.data.frame(object@ParTable, stringsAsFactors = FALSE) class(out) <- c("lavaan.data.frame", "data.frame") out } varTable <- vartable <- function(object, ov.names=names(object), ov.names.x=NULL, ordered = NULL, factor = NULL, as.data.frame.=TRUE) { if(inherits(object, "lavaan")) { VAR <- object@Data@ov } else if(inherits(object, "lavData")) { VAR <- object@ov } else if(inherits(object, "data.frame")) { VAR <- lav_dataframe_vartable(frame = object, ov.names = ov.names, ov.names.x = ov.names.x, ordered = ordered, factor = factor, as.data.frame. = FALSE) } else { stop("object must of class lavaan or a data.frame") } if(as.data.frame.) { VAR <- as.data.frame(VAR, stringsAsFactors=FALSE, row.names=1:length(VAR$name)) class(VAR) <- c("lavaan.data.frame", "data.frame") } VAR } setMethod("fitted.values", "lavaan", function(object, type = "moments", labels=TRUE) { # lowercase type type <- tolower(type) # catch type="casewise" if(type %in% c("casewise","case","obs","observations","ov")) { return( lavPredict(object, type = "ov", label = labels) ) } lav_object_inspect_implied(object, add.labels = labels, add.class = TRUE, drop.list.single.group = TRUE) }) setMethod("fitted", "lavaan", function(object, type = "moments", labels=TRUE) { fitted.values(object, type = type, labels = labels) }) setMethod("vcov", "lavaan", function(object, labels = TRUE, remove.duplicated = FALSE) { # check for convergence first! if(object@optim$npar > 0L && !object@optim$converged) stop("lavaan ERROR: model did not converge") if(object@Options$se == "none") { stop("lavaan ERROR: vcov not available if se=\"none\"") } VarCov <- lav_object_inspect_vcov(object, add.labels = labels, add.class = TRUE, remove.duplicated = remove.duplicated) VarCov }) # logLik (so that we can use the default AIC/BIC functions from stats4( setMethod("logLik", "lavaan", function(object, ...) { if(object@Options$estimator != "ML") { warning("lavaan WARNING: logLik only available if estimator is ML") } if(object@optim$npar > 0L && !object@optim$converged) { warning("lavaan WARNING: model did not converge") } logl.df <- fitMeasures(object, c("logl", "npar", "ntotal")) names(logl.df) <- NULL logl <- logl.df[1] attr(logl, "df") <- logl.df[2] ### note: must be npar, not df!! attr(logl, "nobs") <- logl.df[3] class(logl) <- "logLik" logl }) # nobs if(!exists("nobs", envir=asNamespace("stats4"))) { setGeneric("nobs", function(object, ...) standardGeneric("nobs")) } setMethod("nobs", signature(object = "lavaan"), function(object, ...) { object@SampleStats@ntotal }) # see: src/library/stats/R/update.R setMethod("update", signature(object = "lavaan"), function(object, model, ..., evaluate = TRUE) { call <- object@call if(is.null(call)) stop("need an object with call slot") extras <- match.call(expand.dots = FALSE)$... if(!missing(model)) #call$formula <- update.formula(formula(object), formula.) call$model <- model if(length(extras) > 0) { existing <- !is.na(match(names(extras), names(call))) for(a in names(extras)[existing]) call[[a]] <- extras[[a]] if(any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (evaluate) { eval(call, parent.frame()) } else call }) setMethod("anova", signature(object = "lavaan"), function(object, ...) { # NOTE: if we add additional arguments, it is not the same generic # anova() function anymore, and match.call will be screwed up # NOTE: we need to extract the names of the models from match.call here, # otherwise, we loose them in the call stack mcall <- match.call(expand.dots = TRUE) dots <- list(...) # catch SB.classic and SB.H0 SB.classic <- TRUE; SB.H0 <- FALSE arg.names <- names(dots) arg.idx <- which(nchar(arg.names) > 0L) if(length(arg.idx) > 0L) { if(!is.null(dots$SB.classic)) SB.classic <- dots$SB.classic if(!is.null(dots$SB.H0)) SB.H0 <- dots$SB.H0 dots <- dots[-arg.idx] } modp <- if(length(dots)) sapply(dots, is, "lavaan") else logical(0) mods <- c(list(object), dots[modp]) NAMES <- sapply(as.list(mcall)[c(FALSE, TRUE, modp)], deparse) # use do.call to handle changed dots ans <- do.call("lavTestLRT", c(list(object = object, SB.classic = SB.classic, SB.H0 = SB.H0, model.names = NAMES), dots)) ans }) lavaan/R/lav_model_gradient_pml.R0000644000176200001440000011121612676506465016552 0ustar liggesusers# utility functions for pairwise maximum likelihood # stub for fml_deriv1 fml_deriv1 <- function(Sigma.hat = NULL, # model-based var/cov/cor TH = NULL, # model-based thresholds + means th.idx = NULL, # threshold idx per variable num.idx = NULL, # which variables are numeric X = NULL, # data eXo = NULL, # external covariates lavcache = NULL, # housekeeping stuff scores = FALSE, # return case-wise scores negative = TRUE) { stop("not implemented") } # the first derivative of the pairwise logLik function with respect to the # thresholds/slopes/var/correlations; together with DELTA, we can use the # chain rule to get the gradient # this is adapted from code written by Myrsini Katsikatsou # first attempt - YR 5 okt 2012 pml_deriv1 <- function(Sigma.hat = NULL, # model-based var/cov/cor TH = NULL, # model-based thresholds + means th.idx = NULL, # threshold idx per variable num.idx = NULL, # which variables are numeric X = NULL, # data eXo = NULL, # external covariates lavcache = NULL, # housekeeping stuff PI = NULL, # slopes missing = "listwise", # how to deal with missings scores = FALSE, # return case-wise scores negative = TRUE) { # multiply by -1 cors <- Sigma.hat[lower.tri(Sigma.hat)] if(any(abs(cors) > 1)) { # what should we do now... force cov2cor? #cat("FFFFOOOORRRRRCEEE PD!\n") #Sigma.hat <- Matrix::nearPD(Sigma.hat) #Sigma.hat <- as.matrix(Sigma.hat$mat) #Sigma.hat <- cov2cor(Sigma.hat) #cors <- Sigma.hat[lower.tri(Sigma.hat)] idx <- which( abs(cors) > 0.99 ) cors[idx] <- 0.99 # clip #cat("CLIPPING!\n") } #cat("[DEBUG gradient]\n"); print(range(cors)); print(range(TH)); cat("\n") nvar <- nrow(Sigma.hat) pstar <- nvar*(nvar-1)/2 ov.types <- rep("ordered", nvar) if(length(num.idx) > 0L) ov.types[num.idx] <- "numeric" if(!is.null(eXo)) { nexo <- ncol(eXo) } else { nexo <- 0 } N.TH <- length(th.idx) N.SL <- nvar * nexo N.VAR <- length(num.idx) N.COR <- pstar #print(Sigma.hat); print(TH); print(th.idx); print(num.idx); print(str(X)) # shortcut for ordinal-only/no-exo case if(!scores && all(ov.types == "ordered") && nexo == 0L) { gradient <- grad_tau_rho(no.x = nvar, all.thres = TH, index.var.of.thres = th.idx, rho.xixj = cors, n.xixj.vec = lavcache$bifreq, out.LongVecInd = lavcache$LONG) if(missing == "available.cases") { uniPI <- univariateExpProbVec(TH = TH, th.idx = th.idx) tmp <- lavcache$uniweights / uniPI var.idx <- split(th.idx, th.idx) var.idx <- unlist( lapply(var.idx, function(x){c(x,x[1])}) ) tmp.varwise <- split(tmp, var.idx) tmp1 <- unlist( lapply(tmp.varwise, function(x){ c(x[-length(x)]) } ) ) tmp2 <- unlist( lapply(tmp.varwise, function(x){ c(x[-1]) } ) ) uni.der.tau <- dnorm(TH) * (tmp1 - tmp2) nTH <- length(TH) gradient[1:nTH] <- gradient[1:nTH] + uni.der.tau } if(negative) { gradient <- -1 * gradient } return(gradient) } # in this order: TH/MEANS + SLOPES + VAR + COR GRAD.size <- N.TH + N.SL + N.VAR + N.COR # scores or gradient? if(scores) { SCORES <- matrix(0, nrow(X), GRAD.size) # we will sum up over all pairs } else { GRAD <- matrix(0, pstar, GRAD.size) # each pair is a row } PSTAR <- matrix(0, nvar, nvar) # utility matrix, to get indices PSTAR[lav_matrix_vech_idx(nvar, diagonal = FALSE)] <- 1:pstar for(j in seq_len(nvar-1L)) { for(i in (j+1L):nvar) { # cat(" i = ", i, " j = ", j, "\n") # debug only pstar.idx <- PSTAR[i,j] cor.idx <- N.TH + N.SL + N.VAR + PSTAR[i,j] th.idx_i <- which(th.idx == i) th.idx_j <- which(th.idx == j) if(nexo > 0L) { sl.idx_i <- N.TH + seq(i, by=nvar, length.out=nexo) sl.idx_j <- N.TH + seq(j, by=nvar, length.out=nexo) if(length(num.idx) > 0L) { var.idx_i <- N.TH + N.SL + match(i, num.idx) var.idx_j <- N.TH + N.SL + match(j, num.idx) } } else { if(length(num.idx) > 0L) { var.idx_i <- N.TH + match(i, num.idx) var.idx_j <- N.TH + match(j, num.idx) } } if(ov.types[i] == "numeric" && ov.types[j] == "numeric") { # ordinary pearson correlation stop("not done yet") } else if(ov.types[i] == "numeric" && ov.types[j] == "ordered") { # polyserial correlation stop("not done yet") } else if(ov.types[j] == "numeric" && ov.types[i] == "ordered") { # polyserial correlation stop("not done yet") } else if(ov.types[i] == "ordered" && ov.types[j] == "ordered") { # polychoric correlation if(nexo == 0L) { SC.COR.UNI <- pc_cor_scores(Y1 = X[,i], Y2 = X[,j], eXo = NULL, rho = Sigma.hat[i,j], fit.y1 = NULL, # fixme fit.y2 = NULL, # fixme th.y1 = TH[ th.idx == i ], th.y2 = TH[ th.idx == j ], sl.y1 = NULL, sl.y2 = NULL, na.zero = TRUE) } else { SC.COR.UNI <- pc_cor_scores_PL_with_cov(Y1 = X[,i], Y2 = X[,j], eXo = eXo, Rho = Sigma.hat[i,j], th.y1 = TH[ th.idx == i ], th.y2 = TH[ th.idx == j ], sl.y1 = PI[i,], sl.y2 = PI[j,], missing.ind = missing) } if(scores) { # TH SCORES[,th.idx_i] <- SCORES[,th.idx_i] + SC.COR.UNI$dx.th.y1 SCORES[,th.idx_j] <- SCORES[,th.idx_j] + SC.COR.UNI$dx.th.y2 # SL if(nexo > 0L) { SCORES[,sl.idx_i] <- SCORES[,sl.idx_i] + SC.COR.UNI$dx.sl.y1 SCORES[,sl.idx_j] <- SCORES[,sl.idx_j] + SC.COR.UNI$dx.sl.y2 } # NO VAR # RHO SCORES[,cor.idx] <- SCORES[,cor.idx] + SC.COR.UNI$dx.rho } else { # TH if(length(th.idx_i) > 1L) { GRAD[pstar.idx, th.idx_i] <- colSums(SC.COR.UNI$dx.th.y1, na.rm = TRUE) } else { GRAD[pstar.idx, th.idx_i] <- sum(SC.COR.UNI$dx.th.y1, na.rm = TRUE) } if(length(th.idx_j) > 1L) { GRAD[pstar.idx, th.idx_j] <- colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE) } else { GRAD[pstar.idx, th.idx_j] <- sum(SC.COR.UNI$dx.th.y2, na.rm = TRUE) } # SL if(nexo > 0L) { if(length(sl.idx_i) > 1L) { GRAD[pstar.idx, sl.idx_i] <- colSums(SC.COR.UNI$dx.sl.y1, na.rm = TRUE) } else { GRAD[pstar.idx, sl.idx_i] <- sum(SC.COR.UNI$dx.sl.y1, na.rm = TRUE) } if(length(sl.idx_j) > 1L) { GRAD[pstar.idx, sl.idx_j] <- colSums(SC.COR.UNI$dx.sl.y2, na.rm = TRUE) } else { GRAD[pstar.idx, sl.idx_j] <- sum(SC.COR.UNI$dx.sl.y2, na.rm = TRUE) } } # NO VAR # RHO GRAD[pstar.idx, cor.idx] <- sum(SC.COR.UNI$dx.rho, na.rm = TRUE) } } } } if(missing == "available.cases" && all(ov.types == "ordered")) { if(nexo == 0L) { UNI_SCORES <- matrix(0, nrow(X), N.TH) for(i in seq_len(nvar)) { th.idx_i <- which(th.idx == i) derY1 <- uni_scores(Y1 = X[,i], th.y1 = TH[ th.idx == i ], eXo = NULL, sl.y1 = NULL, weights.casewise = lavcache$uniweights.casewise) UNI_SCORES[,th.idx_i] <- derY1$dx.th.y1 } } else { UNI_SCORES <- matrix(0, nrow(X), ncol=(N.TH+N.SL) ) for(i in seq_len(nvar)) { th.idx_i <- which(th.idx == i) sl.idx_i <- N.TH + seq(i, by=nvar, length.out=nexo) derY1 <- uni_scores(Y1 = X[,i], th.y1 = TH[ th.idx == i ], eXo = eXo, sl.y1 = PI[i,], weights.casewise = lavcache$uniweights.casewise) UNI_SCORES[,th.idx_i] <- derY1$dx.th.y1 UNI_SCORES[,sl.idx_i] <- derY1$dx.sl.y1 } if(scores) { SCORES <- SCORES[, 1:(N.TH+N.SL)] + UNI_SCORES } else { uni_gradient <- colSums(UNI_SCORES) } } } # do we need scores? if(scores) return(SCORES) # gradient is sum over all pairs gradient <- colSums(GRAD, na.rm = TRUE) if(missing == "available.cases" && all(ov.types == "ordered")) { if(nexo == 0L) { gradient[1:N.TH] <- gradient + uni_gradient } else { gradient[1:(N.TH+N.SL)] <- gradient + uni_gradient } } # we multiply by -1 because we minimize if(negative) { gradient <- -1 * gradient } gradient } ### all code below written by Myrsini Katsikatsou # The function grad_tau_rho # input: # no.x - is scalar, the number of ordinal variables # all.thres - is vector containing the thresholds of all variables in the # following order: thres_var1, thres_var2,..., thres_var_p # within each variable the thresholds are in ascending order # Note that all.thres do NOT contain tau_0=-Inf and tau_last=Inf # for all variables. # index.var.of.thres - a vector keeping track to which variable the thresholds # in all.thres belongs to, it is of the form # (1,1,1..., 2,2,2,..., p,p,p,...) # rho.xixj - is the vector of all correlations where j runs faster than i # i.e. the order is rho_12, rho_13, ..., rho_1p, rho_23, ..., rho_2p, # etc. # n.xixj.vec - a vector with the observed frequency for every combination # of categories and every pair. The frequencies are given in # the same order as the expected probabilities in the output of # pairwiseExpProbVec output # out.LongVecInd - it is the output of function LongVecInd # the output: it gives the elements of der.L.to.tau and der.L.to.rho in this # order. The elements of der.L.to.tau where the elements are # ordered as follows: the thresholds of each variable with respect # to ascending order of the variable index (i.e. thres_var1, # thres_var2, etc.) and within each variable the thresholds in # ascending order. # The elements of vector der.L.to.rho are der.Lxixj.to.rho.xixj # where j runs faster than i. # The function depends on four other functions: LongVecTH.Rho, # pairwiseExpProbVec, derLtoRho, and derLtoTau, all given below. # if n.xixj.ab is either an array or a list the following should be done #n.xixj.vec <- if(is.array(n.xixj.ab)) { # c(n.xixj.ab) # } else if(is.list(n.xixj.ab)){ # unlist(n.xixj.ab) # } grad_tau_rho <- function(no.x, all.thres, index.var.of.thres, rho.xixj, n.xixj.vec, out.LongVecInd) { out.LongVecTH.Rho <- LongVecTH.Rho(no.x=no.x, all.thres=all.thres, index.var.of.thres=index.var.of.thres, rho.xixj=rho.xixj) pi.xixj <- pairwiseExpProbVec(ind.vec= out.LongVecInd, th.rho.vec= out.LongVecTH.Rho) out.derLtoRho <- derLtoRho(ind.vec= out.LongVecInd, th.rho.vec= out.LongVecTH.Rho, n.xixj=n.xixj.vec, pi.xixj=pi.xixj, no.x=no.x) out.derLtoTau <- derLtoTau(ind.vec= out.LongVecInd, th.rho.vec= out.LongVecTH.Rho, n.xixj=n.xixj.vec, pi.xixj=pi.xixj, no.x=no.x) grad <- c(out.derLtoTau, out.derLtoRho) attr(grad, "pi.xixj") <- pi.xixj grad } ################################################################################ # The input of the function LongVecInd: # no.x is scalar, the number of ordinal variables # all.thres is vector containing the thresholds of all variables in the # following order: thres_var1, thres_var2,..., thres_var_p # within each variable the thresholds are in ascending order # Note that all.thres does NOT contain the first and the last threshold of the # variables, i.e. tau_0=-Inf and tau_last=Inf # index.var.of.thres is a vector keeping track to which variable the thresholds # in all.thres belongs to, it is of the form (1,1,1..., 2,2,2,..., p,p,p,...) # The output of the function: # it is a list of vectors keeping track of the indices # of thresholds, of variables, and of pairs, and two T/F vectors indicating # if the threshold index corresponds to the last threshold of a variable; all # these for all pairs of variables. All are needed for the # computation of expected probabilities, der.L.to.rho, and der.L.to.tau # all duplications of indices are done as follows: within each pair of variables, # xi-xj, if for example we want to duplicate the indices of the thresholds, # tau^xi_a and tau^xj_b, then index a runs faster than b, i.e. for each b we # take all different tau^xi's, and then we proceed to the next b and do the # same. In other words if it was tabulated we fill the table columnwise. # All pairs xi-xj are taken with index j running faster than i. # Note that each variable may have a different number of categories, that's why # for example we take lists below. LongVecInd <- function(no.x, all.thres, index.var.of.thres) { no.thres.of.each.var <- tapply(all.thres, index.var.of.thres, length) index.pairs <- utils::combn(no.x,2) no.pairs <- ncol(index.pairs) # index.thres.var1.of.pair and index.thres.var2.of.pair contain the indices of # of all thresholds (from tau_0 which is -Inf to tau_last which is Inf) # for any pair of variables appropriately duplicated so that the two vectors # together give all possible combinations of thresholds indices # Since here the threshold indices 0 and "last" are included, the vectors are # longer than the vectors thres.var1.of.pair and thres.var2.of.pair above. index.thres.var1.of.pair <- vector("list", no.pairs) index.thres.var2.of.pair <- vector("list", no.pairs) # index.var1.of.pair and index.var2.of.pair keep track the index of the # variable that the thresholds in index.thres.var1.of.pair and # index.thres.var2.of.pair belong to, respectively. So, these two variables # are of same length as that of index.thres.var1.of.pair and # index.thres.var2.of.pair index.var1.of.pair <- vector("list", no.pairs) index.var2.of.pair <- vector("list", no.pairs) # index.pairs.extended gives the index of the pair for each pair of variables # e.g. pair of variables 1-2 has index 1, variables 1-3 has index 2, etc. # The vector is of the same length as index.thres.var1.of.pair, # index.thres.var2.of.pair, index.var1.of.pair, and index.var2.of.pair index.pairs.extended <- vector("list", no.pairs) for (i in 1:no.pairs) { no.thres.var1.of.pair <- no.thres.of.each.var[index.pairs[1,i]] no.thres.var2.of.pair <- no.thres.of.each.var[index.pairs[2,i]] index.thres.var1.of.pair[[i]] <- rep(0:(no.thres.var1.of.pair+1), times= (no.thres.var2.of.pair+2) ) index.thres.var2.of.pair[[i]] <- rep(0:(no.thres.var2.of.pair+1), each= (no.thres.var1.of.pair+2) ) length.vec <- length(index.thres.var1.of.pair[[i]] ) index.var1.of.pair[[i]] <- rep(index.pairs[1,i], length.vec) index.var2.of.pair[[i]] <- rep(index.pairs[2,i], length.vec) index.pairs.extended[[i]] <- rep(i, length.vec) } index.thres.var1.of.pair <- unlist(index.thres.var1.of.pair) index.thres.var2.of.pair <- unlist(index.thres.var2.of.pair) index.var1.of.pair <- unlist(index.var1.of.pair) index.var2.of.pair <- unlist(index.var2.of.pair) index.pairs.extended <- unlist(index.pairs.extended) # indicator vector (T/F) showing which elements of index.thres.var1.of.pair # correspond to the last thresholds of variables. The length is the same as # that of index.thres.var1.of.pair. last.thres.var1.of.pair <- index.var1.of.pair==1 & index.thres.var1.of.pair==(no.thres.of.each.var[1]+1) # we consider up to variable (no.x-1) because in pairs xi-xj where j runs # faster than i, the last variable is not included in the column of xi's for(i in 2:(no.x-1)) { new.condition <- index.var1.of.pair==i & index.thres.var1.of.pair==(no.thres.of.each.var[i]+1) last.thres.var1.of.pair <- last.thres.var1.of.pair | new.condition } # indicator vector (T/F) showing which elements of index.thres.var2.of.pair # correspond to the last thresholds of variables. Notet that in pairs xi-xj # where j runs faster than i, the first variable is not included in the column # of xj's. That's why we start with variable 2. The length is the same as # that of index.thres.var1.of.pair. last.thres.var2.of.pair <- index.var2.of.pair==2 & index.thres.var2.of.pair==(no.thres.of.each.var[2]+1) for(i in 3:no.x) { new.condition <- index.var2.of.pair==i & index.thres.var2.of.pair==(no.thres.of.each.var[i]+1) last.thres.var2.of.pair <- last.thres.var2.of.pair | new.condition } list(index.thres.var1.of.pair = index.thres.var1.of.pair, index.thres.var2.of.pair = index.thres.var2.of.pair, index.var1.of.pair = index.var1.of.pair, index.var2.of.pair = index.var2.of.pair, index.pairs.extended = index.pairs.extended, last.thres.var1.of.pair = last.thres.var1.of.pair, last.thres.var2.of.pair = last.thres.var2.of.pair ) } ################################################################################ # The input of the function LongVecTH.Rho: # no.x is scalar, the number of ordinal variables # all.thres is vector containing the thresholds of all variables in the # following order: thres_var1, thres_var2,..., thres_var_p # within each variable the thresholds are in ascending order # Note that all.thres does NOT contain the first and the last threshold of the # variables, i.e. tau_0=-Inf and tau_last=Inf # index.var.of.thres is a vector keeping track to which variable the thresholds # in all.thres belongs to, it is of the form (1,1,1..., 2,2,2,..., p,p,p,...) # rho.xixj is the vector of all corrlations where j runs faster than i # i.e. the order is rho_12, rho_13, ..., rho_1p, rho_23, ..., rho_2p, etc. # The output of the function: # it is a list of vectors with thresholds and rho's duplicated appropriately, # all needed for the computation of expected probabilities, # der.L.to.rho, and der.L.to.tau # all duplications below are done as follows: within each pair of variables, # xi-xj, if for example we want to duplicate their thresholds, tau^xi_a and # tau^xj_b, then index a runs faster than b, i.e. for each b we take all # different tau^xi's, and then we proceed to the next b and do the same. # In other words if it was tabulated we fill the table columnwise. # All pairs xi-xj are taken with index j running faster than i. # Note that each variable may have a different number of categories, that's why # for example we take lists below. LongVecTH.Rho <- function(no.x, all.thres, index.var.of.thres, rho.xixj) { no.thres.of.each.var <- tapply(all.thres, index.var.of.thres, length) index.pairs <- utils::combn(no.x,2) no.pairs <- ncol(index.pairs) # create the long vectors needed for the computation of expected probabilities # for each cell and each pair of variables. The vectors thres.var1.of.pair and # thres.var2.of.pair together give all the possible combinations of the # thresholds of any two variables. Note the combinations (-Inf, -Inf), # (-Inf, Inf), (Inf, -Inf), (Inf, Inf) are NOT included. Only the combinations # of the middle thresholds (tau_1 to tau_(last-1)). # thres.var1.of.pair and thres.var2.of.pair give the first and the second # argument, respectively, in functions pbivnorm and dbinorm thres.var1.of.pair <- vector("list", no.pairs) thres.var2.of.pair <- vector("list", no.pairs) # Extending the rho.vector accordingly so that it will be the the third # argument in pbivnorm and dbinorm functions. It is of same length as # thres.var1.of.pair and thres.var2.of.pair. rho.vector <- vector("list", no.pairs) # thres.var1.for.dnorm.in.der.pi.to.tau.xi and # thres.var2.for.dnorm.in.der.pi.to.tau.xj give the thresholds of almost # all variables appropriately duplicated so that the vectors can be used # as input in dnorm() to compute der.pi.xixj.to.tau.xi and # der.pi.xixj.to.tau.xj. # thres.var1.for.dnorm.in.der.pi.to.tau.xi does not contain the thresholds of # the last variable and thres.var2.for.dnorm.in.der.pi.to.tau.xj those of # the first variable thres.var1.for.dnorm.in.der.pi.to.tau.xi <- vector("list", no.pairs) thres.var2.for.dnorm.in.der.pi.to.tau.xj <- vector("list", no.pairs) for (i in 1:no.pairs) { single.thres.var1.of.pair <- all.thres[index.var.of.thres==index.pairs[1,i]] single.thres.var2.of.pair <- all.thres[index.var.of.thres==index.pairs[2,i]] # remember that the first (-Inf) and last (Inf) thresholds are not included # so no.thres.var1.of.pair is equal to number of categories of var1 minus 1 # similarly for no.thres.var2.of.pair no.thres.var1.of.pair <- no.thres.of.each.var[index.pairs[1,i]] no.thres.var2.of.pair <- no.thres.of.each.var[index.pairs[2,i]] thres.var1.of.pair[[i]] <- rep(single.thres.var1.of.pair, times=no.thres.var2.of.pair ) thres.var2.of.pair[[i]] <- rep(single.thres.var2.of.pair, each=no.thres.var1.of.pair) rho.vector[[i]] <- rep(rho.xixj[i], length(thres.var1.of.pair[[i]])) thres.var1.for.dnorm.in.der.pi.to.tau.xi[[i]] <- rep(single.thres.var1.of.pair, times=(no.thres.var2.of.pair+1)) thres.var2.for.dnorm.in.der.pi.to.tau.xj[[i]] <- rep(single.thres.var2.of.pair, each=(no.thres.var1.of.pair+1)) } thres.var1.of.pair <- unlist(thres.var1.of.pair) thres.var2.of.pair <- unlist(thres.var2.of.pair) rho.vector <- unlist(rho.vector) thres.var1.for.dnorm.in.der.pi.to.tau.xi <- unlist(thres.var1.for.dnorm.in.der.pi.to.tau.xi) thres.var2.for.dnorm.in.der.pi.to.tau.xj <- unlist(thres.var2.for.dnorm.in.der.pi.to.tau.xj) # thres.var2.for.last.cat.var1 and thres.var1.for.last.cat.var2 are needed # for the computation of expected probabilities. In the computation of # \Phi_2(tau1, tau2; rho) when either tau1 or tau2 are Inf then it is enought # to compute pnorm() with the non-infinite tau as an argument # In particular when the first variable of the pair has tau_last= Inf # and the second a non-infite threshold we compute # pnorm(thres.var2.for.last.cat.var1). Similarly, when the second variable of # the pair has tau_last=Inf and the first a non-infite threshold we compute # pnorm(thres.var1.for.last.cat.var2). thres.var2.for.last.cat.var1 <- vector("list", (no.x-1)) thres.var1.for.last.cat.var2 <- vector("list", (no.x-1)) for (i in 1:(no.x-1)) { thres.var2.for.last.cat.var1[[i]] <- c(all.thres[index.var.of.thres %in% (i+1):no.x]) thres.var1.for.last.cat.var2[[i]] <- rep(all.thres[index.var.of.thres==i], times=(no.x-i)) } thres.var2.for.last.cat.var1 <- unlist(thres.var2.for.last.cat.var1) thres.var1.for.last.cat.var2 <- unlist(thres.var1.for.last.cat.var2) list(thres.var1.of.pair = thres.var1.of.pair, #these 3 of same length thres.var2.of.pair = thres.var2.of.pair, rho.vector = rho.vector, #the following of length dependning on the number of categories thres.var1.for.dnorm.in.der.pi.to.tau.xi = thres.var1.for.dnorm.in.der.pi.to.tau.xi , thres.var2.for.dnorm.in.der.pi.to.tau.xj = thres.var2.for.dnorm.in.der.pi.to.tau.xj , thres.var2.for.last.cat.var1=thres.var2.for.last.cat.var1, thres.var1.for.last.cat.var2=thres.var1.for.last.cat.var2 ) } ######################################################### ######################################################### # The function pairwiseExpProbVec # input: ind.vec - the output of function LongVecInd # th.rho.vec - the output of function LongVecTH.Rho # output: it gives the elements of pairwiseTablesExpected()$pi.tables # table-wise and column-wise within each table. In other words if # pi^xixj_ab is the expected probability for the pair of variables xi-xj # and categories a and b, then index a runs the fastest of all, followed by b, # then by j, and lastly by i. pairwiseExpProbVec <- function(ind.vec, th.rho.vec) { prob.vec <- rep(NA, length(ind.vec$index.thres.var1.of.pair)) prob.vec[ind.vec$index.thres.var1.of.pair==0 | ind.vec$index.thres.var2.of.pair==0] <- 0 prob.vec[ind.vec$last.thres.var1.of.pair & ind.vec$last.thres.var2.of.pair] <- 1 prob.vec[ind.vec$last.thres.var1.of.pair & ind.vec$index.thres.var2.of.pair!=0 & !ind.vec$last.thres.var2.of.pair] <- pnorm(th.rho.vec$thres.var2.for.last.cat.var1) prob.vec[ind.vec$last.thres.var2.of.pair & ind.vec$index.thres.var1.of.pair!=0 & !ind.vec$last.thres.var1.of.pair] <- pnorm(th.rho.vec$thres.var1.for.last.cat.var2) prob.vec[is.na(prob.vec)] <- pbivnorm(th.rho.vec$thres.var1.of.pair, th.rho.vec$thres.var2.of.pair, th.rho.vec$rho.vector) cum.term1 <- prob.vec[ind.vec$index.thres.var1.of.pair!=0 & ind.vec$index.thres.var2.of.pair!=0] cum.term2 <- prob.vec[ ind.vec$index.thres.var1.of.pair!=0 & !ind.vec$last.thres.var2.of.pair] cum.term3 <- prob.vec[ ind.vec$index.thres.var2.of.pair!=0 & !ind.vec$last.thres.var1.of.pair] cum.term4 <- prob.vec[!ind.vec$last.thres.var1.of.pair & !ind.vec$last.thres.var2.of.pair] PI <- cum.term1 - cum.term2 - cum.term3 + cum.term4 # added by YR 11 nov 2012 to avoid Nan/-Inf # log(.Machine$double.eps) = -36.04365 # all elements should be strictly positive PI[PI < .Machine$double.eps] <- .Machine$double.eps PI } # derLtoRho # input: ind.vec - the output of function LongVecInd # th.rho.vec - the output of function LongVecTH.Rho # n.xixj - a vector with the observed frequency for every combination # of categories and every pair. The frequencies are given in # the same order as the expected probabilities in the output of # pairwiseExpProbVec output # pi.xixj - the output of pairwiseExpProbVec function # no.x - the number of ordinal variables # output: the vector of der.L.to.rho, each element corresponds to # der.Lxixj.to.rho.xixj where j runs faster than i derLtoRho <- function(ind.vec, th.rho.vec, n.xixj, pi.xixj, no.x) { prob.vec <- rep(NA, length(ind.vec$index.thres.var1.of.pair)) prob.vec[ind.vec$index.thres.var1.of.pair==0 | ind.vec$index.thres.var2.of.pair==0 | ind.vec$last.thres.var1.of.pair | ind.vec$last.thres.var2.of.pair] <- 0 prob.vec[is.na(prob.vec)] <- dbinorm(th.rho.vec$thres.var1.of.pair, th.rho.vec$thres.var2.of.pair, rho=th.rho.vec$rho.vector) den.term1 <- prob.vec[ind.vec$index.thres.var1.of.pair!=0 & ind.vec$index.thres.var2.of.pair!=0] den.term2 <- prob.vec[ ind.vec$index.thres.var1.of.pair!=0 & !ind.vec$last.thres.var2.of.pair] den.term3 <- prob.vec[ ind.vec$index.thres.var2.of.pair!=0 & !ind.vec$last.thres.var1.of.pair] den.term4 <- prob.vec[!ind.vec$last.thres.var1.of.pair & !ind.vec$last.thres.var2.of.pair] der.pi.xixj.to.rho.xixj <- den.term1 - den.term2 - den.term3 + den.term4 prod.terms <- (n.xixj/pi.xixj)*der.pi.xixj.to.rho.xixj # to get der.Lxixj.to.rho.xixj we should all the elements of # der.pi.xixj.to.rho.xixj which correspond to the pair xi-xj, to do so: xnew <- lapply( ind.vec[c("index.pairs.extended")], function(y){y[ind.vec$index.thres.var1.of.pair!=0 & ind.vec$index.thres.var2.of.pair!=0]}) #der.L.to.rho is: tapply(prod.terms, xnew$index.pairs.extended, sum) } ########################################################################### # derLtoTau # input: ind.vec - the output of function LongVecInd # th.rho.vec - the output of function LongVecTH.Rho # n.xixj - a vector with the observed frequency for every combination # of categories and every pair. The frequencies are given in # the same order as the expected probabilities in the output of # pairwiseExpProbVec output # pi.xixj - the output of pairwiseExpProbVec function # output: the vector of der.L.to.tau where the elements are ordered as follows: # the thresholds of each variable with respect to ascending order of # the variable index (i.e. thres_var1, thres_var2, etc.) and within # each variable the thresholds in ascending order. derLtoTau <- function(ind.vec, th.rho.vec, n.xixj, pi.xixj, no.x=0L) { # to compute der.pi.xixj.to.tau.xi xi <- lapply( ind.vec[c("index.thres.var2.of.pair", "last.thres.var2.of.pair")], function(y){ y[!(ind.vec$index.thres.var1.of.pair==0 | ind.vec$last.thres.var1.of.pair)] } ) cum.prob.vec <- rep(NA, length(xi$index.thres.var2.of.pair) ) cum.prob.vec[xi$index.thres.var2.of.pair==0] <- 0 cum.prob.vec[xi$last.thres.var2.of.pair] <- 1 denom <- sqrt(1-(th.rho.vec$rho.vector*th.rho.vec$rho.vector)) cum.prob.vec[is.na(cum.prob.vec)] <- pnorm( (th.rho.vec$thres.var2.of.pair - th.rho.vec$rho.vector* th.rho.vec$thres.var1.of.pair) / denom) den.prob.vec <- dnorm(th.rho.vec$thres.var1.for.dnorm.in.der.pi.to.tau.xi) der.pi.xixj.to.tau.xi <- den.prob.vec * (cum.prob.vec[ xi$index.thres.var2.of.pair!=0] - cum.prob.vec[!xi$last.thres.var2.of.pair] ) # to compute der.pi.xixj.to.tau.xj xj <- lapply( ind.vec[c("index.thres.var1.of.pair", "last.thres.var1.of.pair")], function(y){ y[!(ind.vec$index.thres.var2.of.pair==0 | ind.vec$last.thres.var2.of.pair)] } ) cum.prob.vec <- rep(NA, length(xj$index.thres.var1.of.pair) ) cum.prob.vec[xj$index.thres.var1.of.pair==0] <- 0 cum.prob.vec[xj$last.thres.var1.of.pair] <- 1 denom <- sqrt(1-(th.rho.vec$rho.vector*th.rho.vec$rho.vector)) cum.prob.vec[is.na(cum.prob.vec)] <- pnorm( (th.rho.vec$thres.var1.of.pair - th.rho.vec$rho.vector* th.rho.vec$thres.var2.of.pair) / denom) den.prob.vec <- dnorm(th.rho.vec$thres.var2.for.dnorm.in.der.pi.to.tau.xj) der.pi.xixj.to.tau.xj <- den.prob.vec * (cum.prob.vec[ xj$index.thres.var1.of.pair!=0] - cum.prob.vec[!xj$last.thres.var1.of.pair] ) #to compute der.Lxixj.tau.xi and der.Lxixj.tau.xi n.over.pi <- n.xixj/ pi.xixj # get the appropriate differences of n.over.pi for der.Lxixj.to.tau.xi and # der.Lxixj.to.tau.xj x3a <- lapply(ind.vec, function(y){ y[!(ind.vec$index.thres.var1.of.pair==0 | ind.vec$index.thres.var2.of.pair==0) ] } ) diff.n.over.pi.to.xi <- n.over.pi[!x3a$last.thres.var1.of.pair] - n.over.pi[ x3a$index.thres.var1.of.pair!=1] diff.n.over.pi.to.xj <- n.over.pi[!x3a$last.thres.var2.of.pair] - n.over.pi[ x3a$index.thres.var2.of.pair!=1] # terms.der.Lxixj.to.tau.xi and terms.der.Lxixj.to.tau.xj terms.der.Lxixj.to.tau.xi <- diff.n.over.pi.to.xi * der.pi.xixj.to.tau.xi terms.der.Lxixj.to.tau.xj <- diff.n.over.pi.to.xj * der.pi.xixj.to.tau.xj # to add appropriately elements of terms.der.Lxixj.to.tau.xi x3b <- lapply( ind.vec[c("index.pairs.extended", "index.thres.var1.of.pair")], function(y){ y[!(ind.vec$index.thres.var1.of.pair==0 | ind.vec$last.thres.var1.of.pair | ind.vec$index.thres.var2.of.pair==0) ] } ) # to add appropriately elements of terms.der.Lxixj.to.tau.xj x4b <- lapply( ind.vec[c("index.pairs.extended", "index.thres.var2.of.pair")], function(y){ y[!(ind.vec$index.thres.var2.of.pair==0 | ind.vec$last.thres.var2.of.pair | ind.vec$index.thres.var1.of.pair==0) ] } ) ind.pairs <- utils::combn(no.x,2) # der.Lxixj.to.tau.xi is a matrix, nrow=no.pairs, ncol=max(no.of.free.thres) # thus, there are NA's, similarly for der.Lxixj.to.tau.xj der.Lxixj.to.tau.xi <- tapply(terms.der.Lxixj.to.tau.xi, list(x3b$index.pairs.extended, x3b$index.thres.var1.of.pair), sum) der.Lxixj.to.tau.xj <- tapply(terms.der.Lxixj.to.tau.xj, list(x4b$index.pairs.extended, x4b$index.thres.var2.of.pair), sum) # to add appropriately the terms of der.Lxixj.to.tau.xi and # der.Lxixj.to.tau.xj split.der.Lxixj.to.tau.xi <- split(as.data.frame(der.Lxixj.to.tau.xi), ind.pairs[1,] ) sums.der.Lxixj.to.tau.xi <- lapply(split.der.Lxixj.to.tau.xi, function(x){ y <- apply(x,2,sum) y[!is.na(y)] } ) # Note: NA exist in the case where the ordinal variables have different # number of response categories split.der.Lxixj.to.tau.xj <- split(as.data.frame(der.Lxixj.to.tau.xj), ind.pairs[2,] ) sums.der.Lxixj.to.tau.xj <- lapply(split.der.Lxixj.to.tau.xj, function(x){ y <- apply(x,2,sum) y[!is.na(y)] } ) # to get der.L.to.tau c( sums.der.Lxixj.to.tau.xi[[1]], c( unlist(sums.der.Lxixj.to.tau.xi[2:(no.x-1)]) + unlist(sums.der.Lxixj.to.tau.xj[1:(no.x-2)]) ), sums.der.Lxixj.to.tau.xj[[no.x-1]] ) } lavaan/R/lav_lavaanList_multipleImputation.R0000644000176200001440000000201612740745056020776 0ustar liggesusers# lavMultipleImputation: fit the *same* model, on a set of imputed datasets # YR - 11 July 2016 lavMultipleImputation <- function(model = NULL, dataList = NULL, ndat = length(dataList), cmd = "sem", ..., store.slots = c("partable"), FUN = NULL, show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL) { # dotdotdot dotdotdot <- list() # fit multiple times fit <- do.call("lavaanList", args = c(list(model = model, dataList = dataList, ndat = ndat, cmd = cmd, store.slots = store.slots, FUN = FUN, show.progress = show.progress, parallel = parallel, ncpus = ncpus, cl = cl), dotdotdot)) # flag multiple imputation fit@meta$lavMultipleImputation <- TRUE fit } lavaan/R/lavaan-deprecated.R0000644000176200001440000000413612506007167015410 0ustar liggesusers# deprecated (or renamed) lavaan functions # created 29 March 2015, pre 0.5-18 by YR vech <- function(S, diagonal = TRUE) { .Deprecated("lav_matrix_vech", package = "lavaan") lav_matrix_vech(S = S, diagonal = diagonal) } vechr <- function(S, diagonal = TRUE) { .Deprecated("lav_matrix_vechr", package = "lavaan") lav_matrix_vechr(S = S, diagonal = diagonal) } vechu <- function(S, diagonal = TRUE) { .Deprecated("lav_matrix_vechu", package = "lavaan") lav_matrix_vechu(S = S, diagonal = diagonal) } vechru <- function(S, diagonal = TRUE) { .Deprecated("lav_matrix_vechru", package = "lavaan") lav_matrix_vechru(S = S, diagonal = diagonal) } vech.reverse <- function(x, diagonal = TRUE) { .Deprecated("lav_matrix_vech_reverse", package = "lavaan") lav_matrix_vech_reverse(x = x, diagonal = diagonal) } vechru.reverse <- function(x, diagonal = TRUE) { .Deprecated("lav_matrix_vechru_reverse", package = "lavaan") lav_matrix_vechru_reverse(x = x, diagonal = diagonal) } vechr.reverse <- function(x, diagonal = TRUE) { .Deprecated("lav_matrix_vechr_reverse", package = "lavaan") lav_matrix_vechr_reverse(x = x, diagonal = diagonal) } vechu.reverse <- function(x, diagonal = TRUE) { .Deprecated("lav_matrix_vechu_reverse", package = "lavaan") lav_matrix_vechu_reverse(x = x, diagonal = diagonal) } lower2full <- function(x, diagonal = TRUE) { .Deprecated("lav_matrix_lower2full", package = "lavaan") lav_matrix_lower2full(x = x, diagonal = diagonal) } upper2full <- function(x, diagonal = TRUE) { .Deprecated("lav_matrix_upper2full", package = "lavaan") lav_matrix_upper2full(x = x, diagonal = diagonal) } duplicationMatrix <- function(n = 1L) { .Deprecated("lav_matrix_duplication", package = "lavaan") lav_matrix_duplication(n = n) } commutationMatrix <- function(m = 1L, n = 1L) { .Deprecated("lav_matrix_commutation", package = "lavaan") lav_matrix_commutation(m = m, n = n) } sqrtSymmetricMatrix <- function(S) { .Deprecated("lav_matrix_symmetric_sqrt", package = "lavaan") lav_matrix_symmetric_sqrt(S = S) } lavaan/R/lav_fit.R0000644000176200001440000000462213042162755013475 0ustar liggesuserslav_model_fit <- function(lavpartable = NULL, lavmodel = NULL, x = NULL, VCOV = NULL, TEST = NULL) { stopifnot(is.list(lavpartable), class(lavmodel) == "lavModel") # extract information from 'x' iterations = attr(x, "iterations") converged = attr(x, "converged") fx = attr(x, "fx") fx.group = attr(fx, "fx.group") if(!is.null(attr(fx, "logl.group"))) { logl.group = attr(fx, "logl.group") logl = sum(logl.group) } else { logl.group = as.numeric(NA) logl = as.numeric(NA) } #print(fx.group) control = attr(x, "control") attributes(fx) <- NULL x.copy <- x # we are going to change it (remove attributes) attributes(x.copy) <- NULL est <- lav_model_get_parameters(lavmodel = lavmodel, type = "user") # did we compute standard errors? se <- lav_model_vcov_se(lavmodel = lavmodel, lavpartable = lavpartable, VCOV = VCOV, BOOT = attr(VCOV, "BOOT.COEF")) # did we compute test statistics if(is.null(TEST)) { test <- list() } else { test <- TEST } # for convenience: compute lavmodel-implied Sigma and Mu implied <- lav_model_implied(lavmodel) # if bootstrapped parameters, add attr to 'est' if(!is.null(attr(VCOV, "BOOT.COEF"))) { attr(est, "BOOT.COEF") <- attr(VCOV, "BOOT.COEF") } # partrace? if(!is.null(attr(x, "partrace"))) { PARTRACE <- attr(x, "partrace") } else { PARTRACE <- matrix(0, 0L, 0L) } new("Fit", npar = max(lavpartable$free), x = x.copy, partrace = PARTRACE, start = lavpartable$start, # needed? (package stremo!) est = est, # at least 5 packages!! se = se, fx = fx, fx.group = fx.group, logl = logl, logl.group = logl.group, iterations = iterations, converged = converged, control = control, Sigma.hat = if(lavmodel@conditional.x) implied$res.cov else implied$cov, Mu.hat = if(lavmodel@conditional.x) implied$res.int else implied$mean, TH = if(lavmodel@conditional.x) implied$res.th else implied$th, test = test ) } lavaan/R/lav_model_utils.R0000644000176200001440000001356113052775720015240 0ustar liggesusers# lav_model utility functions # initial version: YR 25/03/2009: `methods' for the Model class # - YR 14 Jan 2014: rename object -> lavmodel, all functions as lav_model_* lav_model_get_parameters <- function(lavmodel = NULL, GLIST = NULL, type = "free", extra = TRUE) { # type == "free": only non-redundant free parameters (x) # type == "user": all parameters listed in User model # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST if(type == "free") { N <- lavmodel@nx.free #} else if(type == "unco") { # N <- lavmodel@nx.unco } else if(type == "user") { N <- lavmodel@nx.user } x <- numeric(N) for(mm in 1:length(lavmodel@GLIST)) { if(type == "free") { m.idx <- lavmodel@m.free.idx[[mm]] x.idx <- lavmodel@x.free.idx[[mm]] #} else if(type == "unco") { # m.idx <- lavmodel@m.unco.idx[[mm]] # x.idx <- lavmodel@x.unco.idx[[mm]] } else if(type == "user") { m.idx <- lavmodel@m.user.idx[[mm]] x.idx <- lavmodel@x.user.idx[[mm]] } x[x.idx] <- GLIST[[mm]][m.idx] } if(type == "user" && extra && sum(lavmodel@x.def.idx, lavmodel@x.ceq.idx, lavmodel@x.cin.idx) > 0L) { # we need 'free' x x.free <- lav_model_get_parameters(lavmodel = lavmodel, GLIST = GLIST, type = "free") if(length(lavmodel@x.def.idx) > 0L) { x[lavmodel@x.def.idx] <- lavmodel@def.function(x.free) } if(length(lavmodel@x.ceq.idx) > 0L) { x[lavmodel@x.ceq.idx] <- lavmodel@ceq.function(x.free) } if(length(lavmodel@x.cin.idx) > 0L) { x[lavmodel@x.cin.idx] <- lavmodel@cin.function(x.free) } } x } # warning: this will make a copy of lavmodel lav_model_set_parameters <- function(lavmodel = NULL, x = NULL) { tmp <- lavmodel@GLIST for(mm in 1:length(lavmodel@GLIST)) { m.free.idx <- lavmodel@m.free.idx[[mm]] x.free.idx <- lavmodel@x.free.idx[[mm]] tmp[[mm]][m.free.idx] <- x[x.free.idx] } # categorical? set categorical theta elements (if any) if(lavmodel@categorical) { nmat <- lavmodel@nmat if(lavmodel@representation == "LISREL") { for(g in 1:lavmodel@nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0L,nmat))[g] if(lavmodel@estimator %in% c("WLS","DWLS","ULS","PML")) { if(lavmodel@parameterization == "delta") { tmp[mm.in.group] <- setResidualElements.LISREL(MLIST = tmp[mm.in.group], num.idx = lavmodel@num.idx[[g]], ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[g]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[g]]) } else if(lavmodel@parameterization == "theta") { tmp[mm.in.group] <- setDeltaElements.LISREL(MLIST = tmp[mm.in.group], num.idx = lavmodel@num.idx[[g]]) } } else if(lavmodel@estimator %in% c("MML", "FML")) { # ttt <- diag(tmp[mm.in.group]$theta) # diag(tmp[mm.in.group]$theta) <- as.numeric(NA) # if(length(lavmodel@num.idx[[g]]) > 0L) { # diag(tmp[mm.in.group]$theta)[ lavmodel@num.idx[[g]] ] <- # ttt[ lavmodel@num.idx[[g]] ] # } } } } else { cat("FIXME: deal with theta elements in the categorical case") } } lavmodel@GLIST <- tmp lavmodel } # create a standalone GLIST, filled with (new) x values # (avoiding a copy of lavmodel) lav_model_x2GLIST <- function(lavmodel = NULL, x = NULL, type = "free", setDelta = TRUE, m.el.idx = NULL, x.el.idx = NULL) { GLIST <- lavmodel@GLIST for(mm in 1:length(GLIST)) { # skip empty matrix if(nrow(GLIST[[mm]]) == 0L) next if(type == "free") { M.EL.IDX <- lavmodel@m.free.idx[[mm]] X.EL.IDX <- lavmodel@x.free.idx[[mm]] } else if(type == "full") { if(lavmodel@isSymmetric[mm]) { N <- ncol(GLIST[[mm]]) M.EL.IDX <- lav_matrix_vech_idx(N) } else { M.EL.IDX <- seq_len(length(GLIST[[mm]])) } X.EL.IDX <- seq_len(length(m.el.idx)) if(mm > 1) X.EL.IDX <- X.EL.IDX + sum(lavmodel@mmSize[1:(mm-1)]) } else if(type == "custom") { # nothing to do, m.el.idx and x.el.idx should be given M.EL.IDX <- m.el.idx[[mm]] X.EL.IDX <- x.el.idx[[mm]] } # assign GLIST[[mm]][M.EL.IDX] <- x[X.EL.IDX] # make symmetric (if full) if(type == "full" && lavmodel@isSymmetric[mm]) { T <- t(GLIST[[mm]]) GLIST[[mm]][upper.tri(GLIST[[mm]])] <- T[upper.tri(T)] } } # theta parameterization: delta must be reset! if(lavmodel@categorical && setDelta && lavmodel@parameterization == "theta") { nmat <- lavmodel@nmat for(g in 1:lavmodel@nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0L,nmat))[g] GLIST[mm.in.group] <- setDeltaElements.LISREL(MLIST = GLIST[mm.in.group], num.idx = lavmodel@num.idx[[g]]) } } GLIST } # backwards compatibility # getModelParameters <- lav_model_get_parameters # setModelParameters <- lav_model_set_parameters # x2GLIST <- lav_model_x2GLIST lavaan/R/lav_model_vcov.R0000644000176200001440000004721313046033063015044 0ustar liggesusers# bootstrap based NVCOV lav_model_nvcov_bootstrap <- function(lavmodel = NULL, lavsamplestats = NULL, lavoptions = NULL, lavdata = NULL, lavcache = NULL, lavpartable = NULL) { # number of bootstrap draws if(!is.null(lavoptions$bootstrap)) { R <- lavoptions$bootstrap } else { R <- 1000L } boot.type <- "ordinary" if(lavoptions$test == "bollen.stine") boot.type <- "bollen.stine" TEST <- NULL COEF <- bootstrap.internal(object = NULL, lavmodel. = lavmodel, lavsamplestats. = lavsamplestats, lavpartable. = lavpartable, lavoptions. = lavoptions, lavdata. = lavdata, R = R, verbose = lavoptions$verbose, type = boot.type, FUN = ifelse(boot.type == "bollen.stine", "coeftest", "coef"), warn = -1L) if(boot.type == "bollen.stine") { nc <- ncol(COEF) TEST <- COEF[,nc] COEF <- COEF[,-nc] } # FIXME: cov rescale? Yes for now nboot <- nrow(COEF) NVarCov <- lavsamplestats@ntotal * (cov(COEF) * (nboot-1)/nboot ) # save COEF and TEST (if any) attr(NVarCov, "BOOT.COEF") <- COEF attr(NVarCov, "BOOT.TEST") <- TEST NVarCov } # robust `sem' NVCOV (see Browne, 1984, bentler & dijkstra 1985) lav_model_nvcov_robust_sem <- function(lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL, lavcache = NULL, mimic = "lavaan", use.ginv = FALSE) { # compute inverse of the expected(!) information matrix if(lavmodel@estimator == "ML" && mimic == "Mplus") { # YR - 11 aug 2010 - what Mplus seems to do is (see Muthen apx 4 eq102) # - WLS.V is not based on Sigma.hat and Mu.hat (as it # should be?), but on lavsamplestats@cov and lavsamplestats@mean... # - Gamma is not identical to what is used for WLS; closer to EQS # - N/N-1 bug in G11 for NVarCov (but not test statistic) # - we divide by N-1! (just like EQS) E.inv <- lav_model_information_expected_MLM(lavmodel = lavmodel, lavsamplestats = lavsamplestats, extra = TRUE, augmented = TRUE, inverted = TRUE, use.ginv = use.ginv) } else { E.inv <- lav_model_information_expected(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, extra = TRUE, augmented = TRUE, inverted = TRUE, use.ginv = use.ginv) } # check if E.inv is ok if(inherits(E.inv, "try-error")) { return(E.inv) } Delta <- attr(E.inv, "Delta") WLS.V <- attr(E.inv, "WLS.V") # Gamma Gamma <- lavsamplestats@NACOV if(lavmodel@estimator == "ML" && mimic == "Mplus" && !lavsamplestats@NACOV.user) { # 'fix' G11 part of Gamma (NOTE: this is NOT needed for SB test # statistic for(g in 1:lavsamplestats@ngroups) { gg1 <- (lavsamplestats@nobs[[g]]-1)/lavsamplestats@nobs[[g]] if(lavmodel@conditional.x) { nvar <- NCOL(lavsamplestats@res.cov[[g]]) } else { nvar <- NCOL(lavsamplestats@cov[[g]]) } G11 <- Gamma[[g]][1:nvar, 1:nvar, drop = FALSE] Gamma[[g]][1:nvar, 1:nvar] <- G11 * gg1 } # g } tDVGVD <- matrix(0, ncol=ncol(E.inv), nrow=nrow(E.inv)) for(g in 1:lavsamplestats@ngroups) { fg <- lavsamplestats@nobs[[g]] /lavsamplestats@ntotal fg1 <- (lavsamplestats@nobs[[g]]-1)/lavsamplestats@ntotal # fg twice for WLS.V, 1/fg1 once for GaMMA # if fg==fg1, there would be only one fg, as in Satorra 1999 p.8 # t(Delta) * WLS.V %*% Gamma %*% WLS.V %*% Delta if(lavmodel@estimator == "DWLS" || lavmodel@estimator == "ULS") { # diagonal weight matrix WD <- WLS.V[[g]] * Delta[[g]] } else { # full weight matrix WD <- WLS.V[[g]] %*% Delta[[g]] } tDVGVD <- tDVGVD + fg*fg/fg1 * crossprod(WD, Gamma[[g]] %*% WD) } # g NVarCov <- (E.inv %*% tDVGVD %*% E.inv) # to be reused by lavaanTest attr(NVarCov, "E.inv") <- E.inv attr(NVarCov, "Delta") <- Delta attr(NVarCov, "WLS.V") <- WLS.V NVarCov } lav_model_nvcov_robust_sandwich <- function(lavmodel = lavmodel, lavsamplestats = NULL, lavdata = NULL, information = "observed", lavcache = NULL, use.ginv = FALSE) { # sandwich estimator: A.inv %*% B %*% t(A.inv) # where A.inv == E.inv # B == outer product of case-wise scores # inverse observed/expected information matrix E.inv <- lav_model_information(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, information = information, extra = FALSE, augmented = TRUE, inverted = TRUE, use.ginv = use.ginv) # check if E.inv is ok if(inherits(E.inv, "try-error")) { return(E.inv) } # outer product of case-wise scores B0 <- lav_model_information_firstorder(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, extra = TRUE, check.pd = FALSE, augmented = FALSE, inverted = FALSE, use.ginv = use.ginv) # compute sandwich estimator NVarCov <- E.inv %*% B0 %*% E.inv attr(NVarCov, "B0.group") <- attr(B0, "B0.group") attr(NVarCov, "E.inv") <- E.inv NVarCov } # two stage # - two.stage: Gamma = I_1^{-1} # - robust.two.stage: Gamma = incomplete Gamma (I_1^{-1} J_1 I_1^{-1}) # where I_1 and J_1 are based on the (saturated) model h1 # (either unstructured, or structured) # # references: # # - Savalei \& Bentler (2009) eq (6) for se = "two.stage" # - Savalei \& Falk (2014) eq (3) for se = "robust.two.stage" # - Yuan \& Bentler (2000) lav_model_nvcov_two_stage <- function(lavmodel = NULL, lavsamplestats = NULL, lavoptions = NULL, lavdata = NULL, lavimplied = NULL, use.ginv = FALSE) { # expected OR observed, depending on lavoptions$information if(is.null(lavoptions) && is.null(lavoptions$information)) { lavoptions <- list(information = "observed") } # information matrix if(lavoptions$information == "expected") { # structured of unstructured? if(!is.null(lavoptions) && !is.null(lavoptions$h1.information) && lavoptions$h1.information == "unstructured") { structured <- FALSE } else { structured <- TRUE } E.inv <- lav_model_information_expected(lavmodel = lavmodel, lavsamplestats = lavsamplestats, structured = structured, lavdata = NULL, extra = TRUE, augmented = TRUE, inverted = TRUE, use.ginv = use.ginv) Delta <- attr(E.inv, "Delta") WLS.V <- attr(E.inv, "WLS.V") # this is 'H' in the literature attr(E.inv, "Delta") <- NULL attr(E.inv, "WLS.V") <- NULL } else { E.inv <- lav_model_information_observed(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavoptions = lavoptions, lavdata = NULL, augmented = TRUE, inverted = TRUE, use.ginv = use.ginv) if(lavoptions$observed.information == "h1") { Delta <- attr(E.inv, "Delta") WLS.V <- attr(E.inv, "WLS.V") # this is 'H' in the literature attr(E.inv, "Delta") <- NULL attr(E.inv, "WLS.V") <- NULL } else { stop("lavaan ERROR: two.stage + observed information currently only works with observed.information = ", dQuote("h1")) } } # check if E.inv is ok if(inherits(E.inv, "try-error")) { return(E.inv) } if(is.null(WLS.V)) { stop("lavaan ERROR: WLS.V/H is NULL, observed.information = hessian?") } Gamma <- vector("list", length = lavsamplestats@ngroups) # handle multiple groups tDVGVD <- matrix(0, ncol=ncol(E.inv), nrow=nrow(E.inv)) for(g in 1:lavsamplestats@ngroups) { fg <- lavsamplestats@nobs[[g]] /lavsamplestats@ntotal #fg1 <- (lavsamplestats@nobs[[g]]-1)/lavsamplestats@ntotal fg1 <- fg # fg twice for WLS.V, 1/fg1 once for GaMMA # if fg==fg1, there would be only one fg, as in Satorra 1999 p.8 # t(Delta) * WLS.V %*% Gamma %*% WLS.V %*% Delta WD <- WLS.V[[g]] %*% Delta[[g]] # to compute (incomplete) GAMMA, should we use # structured or unstructured mean/sigma? # # we use the same setting as to compute 'H' (the h1 information matrix) # so that at Omega = H if data is complete if(lavoptions$h1.information == "unstructured") { MU <- lavsamplestats@missing.h1[[g]]$mu SIGMA <- lavsamplestats@missing.h1[[g]]$sigma } else { MU <- lavimplied$mean[[g]] SIGMA <- lavimplied$cov[[g]] } # compute 'Gamma' (or Omega.beta) if(lavoptions$se == "two.stage") { # this is Savalei & Bentler (2009) if(lavoptions$information == "expected") { Info <- lav_mvnorm_missing_information_expected( Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = MU, Sigma = SIGMA) } else { Info <- lav_mvnorm_missing_information_observed_samplestats( Yp = lavsamplestats@missing[[g]], Mu = MU, Sigma = SIGMA) } Gamma[[g]] <- lav_matrix_symmetric_inverse(Info) } else { # we assume "robust.two.stage" # NACOV is here incomplete Gamma # Savalei & Falk (2014) # Gamma[[g]] <- lav_mvnorm_missing_h1_omega_sw(Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Yp = lavsamplestats@missing[[g]], Mu = MU, Sigma = SIGMA, information = lavoptions$information) } # compute tDVGVD <- tDVGVD + fg*fg/fg1 * crossprod(WD, Gamma[[g]] %*% WD) } # g NVarCov <- (E.inv %*% tDVGVD %*% E.inv) # to be reused by lavaanTest attr(NVarCov, "Delta") <- Delta attr(NVarCov, "Gamma") <- Gamma #if(lavoptions$h1.information.se == lavoptions$h1.information.test) { attr(NVarCov, "E.inv") <- E.inv attr(NVarCov, "WLS.V") <- WLS.V #} NVarCov } lav_model_vcov <- function(lavmodel = NULL, lavsamplestats = NULL, lavoptions = NULL, lavdata = NULL, lavpartable = NULL, lavcache = NULL, lavimplied = NULL, use.ginv = FALSE) { likelihood <- lavoptions$likelihood information <- lavoptions$information se <- lavoptions$se verbose <- lavoptions$verbose mimic <- lavoptions$mimic # special cases if(se == "none" || se == "external") return(matrix(0,0,0)) # some require meanstructure (for now) if(se %in% c("first.order", "robust.sem", "robust.huber.white") && !lavoptions$meanstructure) { stop("se (", se, ") requires meanstructure (for now)") } if(se == "standard") { NVarCov <- lav_model_information(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, information = information, extra = FALSE, augmented = TRUE, inverted = TRUE, use.ginv = use.ginv) } else if(se == "first.order") { NVarCov <- lav_model_information_firstorder(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, extra = TRUE, check.pd = FALSE, augmented = TRUE, inverted = TRUE, use.ginv = use.ginv) } else if(se == "robust.sem") { NVarCov <- lav_model_nvcov_robust_sem(lavmodel = lavmodel, lavsamplestats = lavsamplestats, mimic = mimic, lavcache = lavcache, lavdata = lavdata, use.ginv = use.ginv) } else if(se == "robust.huber.white") { NVarCov <- lav_model_nvcov_robust_sandwich(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, information = information, lavcache = lavcache, use.ginv = use.ginv) } else if(se %in% c("two.stage", "robust.two.stage")) { NVarCov <- lav_model_nvcov_two_stage(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavoptions = lavoptions, lavdata = lavdata, lavimplied = lavimplied, use.ginv = use.ginv) } else if(se == "bootstrap") { NVarCov <- try( lav_model_nvcov_bootstrap(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavoptions = lavoptions, lavdata = lavdata, lavcache = lavcache, lavpartable = lavpartable), silent=TRUE ) } else { warning("lavaan WARNING: unknown se type: ", se) } if(! inherits(NVarCov, "try-error") ) { # denominator! if(lavmodel@estimator %in% c("ML","PML","FML") && likelihood == "normal") { N <- lavsamplestats@ntotal } else { N <- lavsamplestats@ntotal - lavsamplestats@ngroups } #if(lavmodle@estimator %in% c("PML", "MML")) { # VarCov <- NVarCov #} else { VarCov <- 1/N * NVarCov #} } else { warning("lavaan WARNING: could not compute standard errors!\n lavaan NOTE: this may be a symptom that the model is not identified.\n") VarCov <- NULL } VarCov } lav_model_vcov_se <- function(lavmodel, lavpartable, VCOV = NULL, BOOT = NULL) { # 0. special case if(is.null(VCOV)) { se <- rep(as.numeric(NA), lavmodel@nx.user) se[ lavpartable$free == 0L ] <- 0.0 return(se) } # 1. free parameters only x.var <- diag(VCOV) # check for negative values (what to do: NA or 0.0?) x.var[x.var < 0] <- as.numeric(NA) x.se <- sqrt( x.var ) GLIST <- lav_model_x2GLIST(lavmodel = lavmodel, x = x.se, type = "free") # se for full parameter table, but with 0.0 entries for def/ceq/cin # elements se <- lav_model_get_parameters(lavmodel = lavmodel, GLIST = GLIST, type = "user", extra = FALSE) # 2. fixed parameters -> se = 0.0 se[ which(lavpartable$free == 0L) ] <- 0.0 # 3. defined parameters: def.idx <- which(lavpartable$op == ":=") if(length(def.idx) > 0L) { if(!is.null(BOOT)) { BOOT.def <- apply(BOOT, 1L, lavmodel@def.function) if(length(def.idx) == 1L) { BOOT.def <- as.matrix(BOOT.def) } else { BOOT.def <- t(BOOT.def) } def.cov <- cov(BOOT.def ) } else { # regular delta method x <- lav_model_get_parameters(lavmodel = lavmodel, type = "free") JAC <- try(lav_func_jacobian_complex(func = lavmodel@def.function, x = x), silent=TRUE) if(inherits(JAC, "try-error")) { # eg. pnorm() JAC <- lav_func_jacobian_simple(func = lavmodel@def.function, x = x) } def.cov <- JAC %*% VCOV %*% t(JAC) } # check for negative se's diag.def.cov <- diag(def.cov) diag.def.cov[ diag.def.cov < 0 ] <- as.numeric(NA) se[def.idx] <- sqrt(diag.def.cov) } se } lavaan/R/lav_prelis.R0000644000176200001440000000454112465075714014217 0ustar liggesusers# small utility functions to deal with PRELIS # Y.R.: 11 dec 2012 prelis.read.cor <- function(file = "") { # read in numbers as characters txt <- scan(file, what="character", quiet=TRUE) # convert to numbers txt <- gsub("D", "e", txt) x <- as.numeric(txt) # create COR/COR matrix COR <- lav_matrix_lower2full(x, diagonal = TRUE) COR } prelis.read.acm <- function(file = "", rescale=1e-3) { # read in raw data -- ignore first three elements # first element: 123.456789 (check?) # second element: 2.72 version number of prelis # third element: almost zero?? zz <- file(file, "rb") raw <- readBin(zz, what = "double", n = 1e+05)[-c(1, 2, 3)] close(zz) # scale numbers raw <- raw*rescale ACM <- lav_matrix_lower2full(raw, diagonal = TRUE) ACM } prelis.write.data <- function(data, file = "prelis", na.rm = TRUE, labels = FALSE, std.ov = FALSE) { dfile <- paste(file, ".raw", sep = "") write.table(data, file = dfile, na = "-999999", col.names = FALSE, row.names = FALSE, quote = FALSE) if (labels) { lfile <- paste(file, ".lab", sep = "") write.table(unique(names(data)), file = lfile, row.names = F, col.names = F, quote = F) } } prelis.run <- function(X, type="OR", keep.files=FALSE) { label <- names(X) nvar <- ncol(X) # write raw data prelis.write.data(X, file="prelistmp") # write syntax txt <- paste("DA NI=", nvar, " NO=0 MI=-999999\n", sep="") txt <- paste(txt, "LA", sep="") tmp <- 0 for(i in 1:nvar) { if(tmp%%6 == 0) txt <- paste(txt, "\n", sep="") txt <- paste(txt, label[i], " ", sep="") tmp <- tmp+1 } txt <- paste(txt, "\n") txt <- paste(txt, "RA FI=prelistmp.raw\n", sep="") txt <- paste(txt, type, " ALL\n", sep="") txt <- paste(txt, "OU MA=PM SA=prelistmp.acm SM=prelistmp.cor\n", sep="") writeLines(txt, con="prelistmp.in") # run prelis system("prelis prelistmp.in prelistmp.out") # read in acm and cor ACM <- prelis.read.acm(file="prelistmp.acm") COR <- prelis.read.cor(file="prelistmp.cor") # clean up if(!keep.files) { unlink(c("prelistmp.in", "prelistmp.out", "prelistmp.acm", "prelistmp.cor", "prelistmp.FREQ", "prelistmp.raw")) } list(COR=COR, ACM=ACM) } lavaan/R/lav_polychor.R0000644000176200001440000005342013046373216014552 0ustar liggesusers# polychoric Y1 and Y2 are both ORDINAL variables # two-way frequency table pc_freq <- function(Y1, Y2) { # FIXME: for 2x2, we could use # array(tabulate((Y1-1) + (Y2-1)*2 + 1, nbins = 4L), dim=c(2L ,2L)) max.y1 <- max(Y1, na.rm=TRUE); max.y2 <- max(Y2, na.rm=TRUE) bin <- Y1 - 1L; bin <- bin + max.y1 * (Y2 - 1L); bin <- bin[!is.na(bin)] if (length(bin)) bin <- bin + 1L array(tabulate(bin, nbins = max.y1*max.y2), dim=c(max.y1, max.y2)) } # compute thresholds (no eXo) pc_th <- function(Y, freq=NULL, prop=NULL) { if(is.null(prop)) { if(is.null(freq)) freq <- tabulate(Y) prop <- freq / sum(freq) } prop1 <- prop[-length(prop)] qnorm(cumsum(prop1)) } pc_PI <- function(rho, th.y1, th.y2) { nth.y1 <- length(th.y1); nth.y2 <- length(th.y2) pth.y1 <- pnorm(th.y1); pth.y2 <- pnorm(th.y2) # catch special case: rho = 0.0 if(rho == 0.0) { rowPI <- diff(c(0,pth.y1,1)) colPI <- diff(c(0,pth.y2,1)) PI.ij <- outer(rowPI, colPI) return(PI.ij) } # prepare for a single call to pbinorm upper.y <- rep(th.y2, times=rep.int(nth.y1, nth.y2)) upper.x <- rep(th.y1, times=ceiling(length(upper.y))/nth.y1) #rho <- rep(rho, length(upper.x)) # only one rho here BI <- pbivnorm::pbivnorm(x=upper.x, y=upper.y, rho=rho) #BI <- pbinorm1(upper.x=upper.x, upper.y=upper.y, rho=rho) dim(BI) <- c(nth.y1, nth.y2) BI <- rbind(0, BI, pth.y2, deparse.level = 0) BI <- cbind(0, BI, c(0, pth.y1, 1), deparse.level = 0) # get probabilities nr <- nrow(BI); nc <- ncol(BI) PI <- BI[-1L,-1L] - BI[-1L,-nc] - BI[-nr,-1L] + BI[-nr,-nc] # all elements should be strictly positive PI[PI < .Machine$double.eps] <- .Machine$double.eps PI } pc_PHI <- function(rho, th.y1, th.y2) { TH.Y1 <- c(-Inf, th.y1, Inf); TH.Y2 <- c(-Inf, th.y2, Inf) nr <- length(TH.Y1) - 1L; nc <- length(TH.Y2) - 1L phi <- matrix(0, nr, nc) for(i in seq_len(nr)) { for(j in seq_len(nc)) { p1 <- p2 <- p3 <- p4 <- 0 if(i < nr && j < nc) p1 <- dbinorm(TH.Y1[i +1L], TH.Y2[j +1L], rho) if(i > 1L && j < nc) p2 <- dbinorm(TH.Y1[i-1L+1L], TH.Y2[j +1L], rho) if(i < nr && j > 1L) p3 <- dbinorm(TH.Y1[i +1L], TH.Y2[j-1L+1L], rho) if(i > 1L && j > 1L) p4 <- dbinorm(TH.Y1[i-1L+1L], TH.Y2[j-1L+1L], rho) phi[i,j] <- (p1 - p2 - p3 + p4) } } phi } pc_gnorm <- function(rho, th.y1, th.y2) { # note: Olsson 1979 A2 contains an error!! guv <- function(u, v, rho) { R <- (1 - rho*rho) ( u*v*R - rho*((u*u) - 2*rho*u*v + (v*v)) + rho*R ) / (R*R) } TH.Y1 <- c(-Inf, th.y1, Inf); TH.Y2 <- c(-Inf, th.y2, Inf) nr <- length(TH.Y1) - 1L; nc <- length(TH.Y2) - 1L gnorm <- matrix(0, nr, nc) for(i in seq_len(nr)) { for(j in seq_len(nc)) { g1 <- g2 <- g3 <- g4 <- 0 if(i < nr && j < nc) { u <- TH.Y1[i +1L]; v <- TH.Y2[j +1L] g1 <- dbinorm(u, v, rho) * guv(u,v,rho) } if(i > 1L && j < nc) { u <- TH.Y1[i-1L+1L]; v <- TH.Y2[j +1L] g2 <- dbinorm(u, v, rho) * guv(u,v,rho) } if(i < nr && j > 1L) { u <- TH.Y1[i +1L]; v <- TH.Y2[j-1L+1L] g3 <- dbinorm(u, v, rho) * guv(u,v,rho) } if(i > 1L && j > 1L) { u <- TH.Y1[i-1L+1L]; v <- TH.Y2[j-1L+1L] g4 <- dbinorm(u, v, rho) * guv(u,v,rho) } gnorm[i,j] <- (g1 - g2 - g3 + g4) } } gnorm } # (summed) loglikelihood pc_logl <- function(Y1, Y2, eXo=NULL, rho=NULL, fit.y1=NULL, fit.y2=NULL, freq=NULL) { stopifnot(!is.null(rho)) if(is.null(fit.y1)) fit.y1 <- lavProbit(y=Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavProbit(y=Y2, X=eXo) # if no eXo, use shortcut (grouped) if(length(fit.y1$slope.idx) == 0L) { if(is.null(freq)) freq <- pc_freq(fit.y1$y,fit.y2$y) # grouped lik PI <- pc_PI(rho, th.y1=fit.y1$theta[fit.y1$th.idx], th.y2=fit.y2$theta[fit.y2$th.idx]) if(all(PI > 0)) logl <- sum( freq * log(PI) ) else logl <- -Inf } else { lik <- pc_lik(Y1=Y1, Y2=Y2, rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) if(all(lik > 0, na.rm = TRUE)) logl <- sum( log(lik), na.rm = TRUE ) else logl <- -Inf } logl } # pc_lik, with user-specified parameters pc_lik2 <- function(Y1, Y2, eXo=NULL, rho, fit.y1=NULL, fit.y2=NULL, th.y1=NULL, th.y2=NULL, sl.y1=NULL, sl.y2=NULL) { R <- sqrt(1 - rho*rho) if(is.null(fit.y1)) fit.y1 <- lavProbit(y=Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavProbit(y=Y2, X=eXo) y1.update <- y2.update <- FALSE if(!is.null(th.y1)) { # update thresholds fit.y1 y1.update <- TRUE fit.y1$theta[fit.y1$th.idx] <- th.y1 } if(!is.null(th.y2)) { # update thresholds fit.y1 y2.update <- TRUE fit.y2$theta[fit.y2$th.idx] <- th.y2 } if(!is.null(sl.y1)) { # update slopes y1.update <- TRUE fit.y1$theta[fit.y1$slope.idx] <- sl.y1 } if(!is.null(sl.y2)) { # update slopes y2.update <- TRUE fit.y2$theta[fit.y2$slope.idx] <- sl.y2 } if(y1.update) tmp <- fit.y1$lik() if(y2.update) tmp <- fit.y2$lik() if(missing(Y1)) Y1 <- fit.y1$y if(missing(Y2)) Y2 <- fit.y2$y if(missing(eXo) && length(fit.y1$slope.idx) > 0L) eXo <- fit.y1$X # lik lik <- pc_lik(rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) lik } # individual likelihoods pc_lik <- function(Y1, Y2, eXo=NULL, rho=NULL, fit.y1=NULL, fit.y2=NULL) { stopifnot(!is.null(rho)) if(is.null(fit.y1)) fit.y1 <- lavProbit(y=Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavProbit(y=Y2, X=eXo) # if no eXo, use shortcut (grouped) if(length(fit.y1$slope.idx) == 0L) { # probability per cell PI <- pc_PI(rho, th.y1=fit.y1$theta[fit.y1$th.idx], th.y2=fit.y2$theta[fit.y2$th.idx]) lik <- PI[ cbind(fit.y1$y, fit.y2$y) ] } else { # individual likelihoods # pbivnorm package is MUCH faster (loop in fortran) # lik <- ( pbivnorm(x=fit.y1$z1, y=fit.y2$z1, rho=rho) - # pbivnorm(x=fit.y1$z2, y=fit.y2$z1, rho=rho) - # pbivnorm(x=fit.y1$z1, y=fit.y2$z2, rho=rho) + # pbivnorm(x=fit.y1$z2, y=fit.y2$z2, rho=rho) ) # take care of missing values if(fit.y1$missing.values || fit.y2$missing.values) { lik <- rep(as.numeric(NA), length(fit.y1$z1)) missing.idx <- unique(c(fit.y1$missing.idx, fit.y2$missing.idx)) fit.y1.z1 <- fit.y1$z1; fit.y1.z1[missing.idx] <- 0 fit.y2.z1 <- fit.y2$z1; fit.y2.z1[missing.idx] <- 0 fit.y1.z2 <- fit.y1$z2; fit.y1.z2[missing.idx] <- 0 fit.y2.z2 <- fit.y2$z2; fit.y2.z2[missing.idx] <- 0 lik <- pbinorm(upper.x=fit.y1.z1, upper.y=fit.y2.z1, lower.x=fit.y1.z2, lower.y=fit.y2.z2, rho=rho) lik[missing.idx] <- NA } else { lik <- pbinorm(upper.x=fit.y1$z1, upper.y=fit.y2$z1, lower.x=fit.y1$z2, lower.y=fit.y2$z2, rho=rho) } } lik } # loglikelihood (x-version) pc_logl_x <- function(x, Y1, Y2, eXo=NULL, nth.y1, nth.y2, freq=NULL) { nexo <- ifelse(is.null(eXo), 0L, ncol(eXo)); S <- seq_len stopifnot(length(x) == (1L + nth.y1 + nth.y2 + 2*nexo)) rho = x[1L] th.y1 = x[1L + S(nth.y1)] th.y2 = x[1L + nth.y1 + S(nth.y2)] sl.y1 = x[1L + nth.y1 + nth.y2 + S(nexo)] sl.y2 = x[1L + nth.y1 + nth.y2 + nexo + S(nexo)] fit.y1 <- lavProbit(y=Y1, X=eXo) fit.y1$theta[fit.y1$th.idx] <- th.y1 fit.y1$theta[fit.y1$slope.idx] <- sl.y1 fit.y1$lik() fit.y2 <- lavProbit(y=Y2, X=eXo) fit.y2$theta[fit.y2$th.idx] <- th.y2 fit.y2$theta[fit.y2$slope.idx] <- sl.y2 fit.y2$lik() pc_logl(Y1=Y1, Y2=Y2, eXo=eXo, rho=rho, fit.y1=fit.y1, fit.y2=fit.y2, freq=freq) } # polychoric correlation # # zero.add is a vector: first element is for 2x2 tables only, second element # for general tables # zero.keep.margins is only used for 2x2 tables pc_cor_TS <- function(Y1, Y2, eXo=NULL, fit.y1=NULL, fit.y2=NULL, freq=NULL, method="nlminb", zero.add = c(0.5, 0.0), control=list(), zero.keep.margins = TRUE, zero.cell.warn = FALSE, zero.cell.flag = FALSE, verbose=FALSE, Y1.name=NULL, Y2.name=NULL) { # cat("DEBUG: method = ", method, "\n") if(is.null(fit.y1)) fit.y1 <- lavProbit(y=Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavProbit(y=Y2, X=eXo) if(missing(Y1)) Y1 <- fit.y1$y else as.integer(Y1) if(missing(Y2)) Y2 <- fit.y2$y else as.integer(Y2) if(missing(eXo) && length(fit.y1$slope.idx) > 0L) eXo <- fit.y1$X stopifnot(min(Y1, na.rm=TRUE) == 1L, min(Y2, na.rm=TRUE) == 1L, method %in% c("nlminb", "BFGS", "nlminb.hessian", "optimize")) # empty cells or not empty.cells <- FALSE # exo or not? exo <- ifelse(length(fit.y1$slope.idx) > 0L, TRUE, FALSE) # thresholds th.y1 <- fit.y1$theta[fit.y1$th.idx] th.y2 <- fit.y2$theta[fit.y2$th.idx] # freq if(!exo) { if(is.null(freq)) freq <- pc_freq(fit.y1$y,fit.y2$y) nr <- nrow(freq); nc <- ncol(freq) # check for empty cells if(any(freq == 0L)) { empty.cells <- TRUE if(zero.cell.warn) { if(!is.null(Y1.name) && !is.null(Y2.name)) { warning("lavaan WARNING: empty cell(s) in bivariate table of ", Y1.name, " x ", Y2.name) } else { warning("lavaan WARNING: empty cell(s) in bivariate table") } } } # treat 2x2 tables if(nr == 2L && nc == 2L) { idx <- which(freq == 0L) # catch 2 empty cells: perfect correlation! if(length(idx) == 2L) { warning("lavaan WARNING: two empty cells in 2x2 table") if(freq[1,1] > 0L) { rho <- 1.0 if(zero.cell.flag) { attr(rho, "zero.cell.flag") <- empty.cells } return(rho) } else { rho <- -1.0 if(zero.cell.flag) { attr(rho, "zero.cell.flag") <- empty.cells } return(rho) } } else if(length(idx) == 1L && zero.add[1] > 0.0) { if(zero.keep.margins) { # add + compensate to preserve margins if(idx == 1L || idx == 4L) { # main diagonal freq[1,1] <- freq[1,1] + zero.add[1] freq[2,2] <- freq[2,2] + zero.add[1] freq[2,1] <- freq[2,1] - zero.add[1] freq[1,2] <- freq[1,2] - zero.add[1] } else { freq[1,1] <- freq[1,1] - zero.add[1] freq[2,2] <- freq[2,2] - zero.add[1] freq[2,1] <- freq[2,1] + zero.add[1] freq[1,2] <- freq[1,2] + zero.add[1] } } else { freq[idx] <- freq[idx] + zero.add[1] } } # general table } else { if(any(freq == 0L) && zero.add[2] > 0.0) { # general table: just add zero.add to the empty cell(s) freq[freq == 0] <- zero.add[2] } } # catch special cases for 2x2 tables if(nr == 2L && nc == 2L) { # 1. a*d == c*d storage.mode(freq) <- "numeric" # to avoid integer overflow if(freq[1,1]*freq[2,2] == freq[1,2]*freq[2,1]) { rho <- 0.0 if(zero.cell.flag) { attr(rho, "zero.cell.flag") <- empty.cells } return(rho) } # 2. equal margins (th1 = th2 = 0) if(th.y1[1] == 0L && th.y2[1] == 0L) { # see eg Brown & Benedetti 1977 eq 2 rho <- - cos( 2*pi*freq[1,1]/sum(freq) ) if(zero.cell.flag) { attr(rho, "zero.cell.flag") <- empty.cells } return(rho) } } } objectiveFunction <- function(x) { logl <- pc_logl(rho=tanh(x[1L]), fit.y1=fit.y1, fit.y2=fit.y2, freq=freq) -logl # to minimize! } gradientFunction <- function(x) { rho <- tanh(x[1L]) if(!exo) { PI <- pc_PI(rho, th.y1, th.y2) phi <- pc_PHI(rho, th.y1, th.y2) dx.rho <- sum(freq/PI * phi) } else { lik <- pc_lik(rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) dx <- ( dbinorm(fit.y1$z1, fit.y2$z1, rho) - dbinorm(fit.y1$z2, fit.y2$z1, rho) - dbinorm(fit.y1$z1, fit.y2$z2, rho) + dbinorm(fit.y1$z2, fit.y2$z2, rho) ) / lik dx.rho <- sum(dx, na.rm = TRUE) } -dx.rho * 1/(cosh(x)*cosh(x)) # dF/drho * drho/dx, dtanh = 1/cosh(x)^2 } #hessianFunction2 <- function(x) { # numDeriv::hessian(func=objectiveFunction, x=x) #} # OLSSON 1979 A2 + A3 (no EXO!!) hessianFunction <- function(x) { rho <- tanh(x[1L]) PI <- pc_PI(rho, th.y1, th.y2) phi <- pc_PHI(rho, th.y1, th.y2) gnorm <- pc_gnorm(rho, th.y1, th.y2) H <- sum(freq/PI * gnorm) - sum(freq/(PI*PI) * (phi*phi)) # to compensate for tanh # u=f(x), d^2y/dx^2 = d^2y/du^2 * (du/dx)^2 + dy/du * d^2u/dx^2 # dtanh = 1/cosh(x)^2 # dtanh_2 = 8*exp(2*x)*(1-exp(2*x))/(exp(2*x)+1)^3 grad <- sum(freq/PI * phi) u1 <- 1/(cosh(x)*cosh(x)) tmp3 <- (exp(2*x)+1) * (exp(2*x)+1) * (exp(2*x)+1) u2 <- 8*exp(2*x)*(1-exp(2*x))/tmp3 H <- H * (u1*u1) + grad * u2 dim(H) <- c(1L,1L) # for nlminb -H } # starting value # catch tetrachoric case #if(!exo && (nr == 2L && nc == 2L) && !any(freq == 0)) { # Divgi 1979 initial value # h <- max(abs(th.y1), abs(th.y2)); k <- min(abs(th.y1), abs(th.y2)) # h can not be zero; # if(h == 0) h <- 1e-5 # R <- (freq[1,1]*freq[2,2])/(freq[1,2]*freq[2,1]) # D <- k*(.79289 + 4.28981/(1+3.30231*h));D <- D*sign(th.y1)*sign(th.y2) # C <- 0.07557*h + (h-k)^2 * (0.51141/(h+2.05793) - 0.07557/h) # B <- 0.5/(1 + (h^2 + k^2)*(0.82281-1.03514*(k/sqrt(h^2+k^2)))) # A <- 0.5/(1 + (h^2 + k^2)*(0.12454-0.27102*(1-h/sqrt(h^2+k^2)))) # alpha <- A + B*(-1 + 1/(1 + C*(log(R)-D)^2)) # rho.init <- cos(pi/(1+R^alpha)) #} else { rho.init <- cor(Y1,Y2, use="pairwise.complete.obs") #} # check range of rho.init is within [-1,+1] if(abs(rho.init) >= 1.0) { rho.init <- 0.0 } # default values control.nlminb <- list(eval.max=20000L, iter.max=10000L, trace=ifelse(verbose, 1L, 0L), #abs.tol=1e-20, ### important!! fx never negative abs.tol=(.Machine$double.eps * 10), rel.tol=ifelse(method == "nlminb", 1e-10, 1e-7), x.tol=1.5e-8, xf.tol=2.2e-14) control.nlminb <- modifyList(control.nlminb, control) control <- control.nlminb[c("eval.max", "iter.max", "trace", "abs.tol", "rel.tol", "x.tol", "xf.tol")] if(method == "nlminb") { out <- nlminb(start=atanh(rho.init), objective=objectiveFunction, gradient=gradientFunction, scale=10, control=control) if(out$convergence != 0L) warning("no convergence") rho <- tanh(out$par) } else if(method == "BFGS") { # NOTE: known to fail if rho.init is too far from final value # seems to be better with parscale = 0.1?? out <- optim(par = atanh(rho.init), fn = objectiveFunction, gr = gradientFunction, control = list(parscale = 0.1, reltol = 1e-10, trace = ifelse(verbose, 1L, 0L), REPORT = 1L, abstol=(.Machine$double.eps * 10)), method = "BFGS") if(out$convergence != 0L) warning("no convergence") rho <- tanh(out$par) } else if(method == "optimize") { # not atanh/tanh transform objectiveFunction2 <- function(x) { logl <- pc_logl(rho=x[1L], fit.y1=fit.y1, fit.y2=fit.y2, freq=freq) -logl # to minimize! } out <- optimize(f = objectiveFunction2, interval = c(-0.9999,0.9999)) rho <- out$minimum } else if(method == "nlminb.hessian") { stopifnot(!exo) out <- nlminb(start=atanh(rho.init), objective=objectiveFunction, gradient=gradientFunction, hessian=hessianFunction, scale=100, # not needed? control=control) if(out$convergence != 0L) warning("no convergence") rho <- tanh(out$par) } if(zero.cell.flag) { attr(rho, "zero.cell.flag") <- empty.cells } rho } pc_cor_gradient_noexo <- function(Y1, Y2, rho, th.y1=NULL, th.y2=NULL, freq=NULL) { R <- sqrt(1- rho*rho) TH.Y1 <- c(-Inf, th.y1, Inf); TH.Y2 <- c(-Inf, th.y2, Inf) dth.y1 <- dnorm(th.y1); dth.y2 <- dnorm(th.y2) if(is.null(freq)) freq <- pc_freq(Y1, Y2) # rho PI <- pc_PI(rho, th.y1, th.y2) phi <- pc_PHI(rho, th.y1, th.y2) dx.rho <- sum(freq/PI * phi) # th.y2 PI.XY.inv <- 1/PI[ cbind(Y1,Y2) ] dx.th.y2 <- matrix(0, length(Y2), length(th.y2)) for(m in 1:length(th.y2)) { ki <- dth.y2[m] * pnorm((TH.Y1[Y1+1L ]-rho*th.y2[m])/R) ki1 <- dth.y2[m] * pnorm((TH.Y1[Y1+1L-1L]-rho*th.y2[m])/R) DpiDth <- ifelse(Y2 == m, (ki - ki1), ifelse(Y2 == m+1, (-ki + ki1), 0)) dx.th.y2[,m] <- PI.XY.inv * DpiDth } } pc_cor_scores <- function(Y1, Y2, eXo=NULL, rho, fit.y1=NULL, fit.y2=NULL, th.y1=NULL, th.y2=NULL, sl.y1=NULL, sl.y2=NULL, na.zero=FALSE) { # check if rho > R <- sqrt(1 - rho*rho) if(is.null(fit.y1)) fit.y1 <- lavProbit(y=Y1, X=eXo) if(is.null(fit.y2)) fit.y2 <- lavProbit(y=Y2, X=eXo) y1.update <- y2.update <- FALSE if(!is.null(th.y1)) { # update thresholds fit.y1 y1.update <- TRUE fit.y1$theta[fit.y1$th.idx] <- th.y1 } if(!is.null(th.y2)) { # update thresholds fit.y1 y2.update <- TRUE fit.y2$theta[fit.y2$th.idx] <- th.y2 } if(!is.null(sl.y1)) { # update slopes y1.update <- TRUE fit.y1$theta[fit.y1$slope.idx] <- sl.y1 } if(!is.null(sl.y2)) { # update slopes y2.update <- TRUE fit.y2$theta[fit.y2$slope.idx] <- sl.y2 } if(y1.update) tmp <- fit.y1$lik() if(y2.update) tmp <- fit.y2$lik() if(missing(Y1)) Y1 <- fit.y1$y if(missing(Y2)) Y2 <- fit.y2$y if(missing(eXo) && length(fit.y1$slope.idx) > 0L) eXo <- fit.y1$X # lik lik <- pc_lik(rho=rho, fit.y1=fit.y1, fit.y2=fit.y2) # th.y1 if(identical(R, 0.0)) { y1.Z1 <- dnorm(fit.y1$z1) * 0.5 y1.Z2 <- dnorm(fit.y1$z2) * 0.5 } else { y1.Z1 <- ( dnorm(fit.y1$z1) * pnorm( (fit.y2$z1-rho*fit.y1$z1)/R) - dnorm(fit.y1$z1) * pnorm( (fit.y2$z2-rho*fit.y1$z1)/R) ) y1.Z2 <- ( dnorm(fit.y1$z2) * pnorm( (fit.y2$z1-rho*fit.y1$z2)/R) - dnorm(fit.y1$z2) * pnorm( (fit.y2$z2-rho*fit.y1$z2)/R) ) } dx.th.y1 <- (fit.y1$Y1*y1.Z1 - fit.y1$Y2*y1.Z2) / lik if(na.zero) { dx.th.y1[is.na(dx.th.y1)] <- 0 } # th.y2 if(identical(R, 0.0)) { y2.Z1 <- dnorm(fit.y2$z1) * 0.5 y2.Z2 <- dnorm(fit.y2$z2) * 0.5 } else { y2.Z1 <- ( dnorm(fit.y2$z1) * pnorm( (fit.y1$z1-rho*fit.y2$z1)/R) - dnorm(fit.y2$z1) * pnorm( (fit.y1$z2-rho*fit.y2$z1)/R) ) y2.Z2 <- ( dnorm(fit.y2$z2) * pnorm( (fit.y1$z1-rho*fit.y2$z2)/R) - dnorm(fit.y2$z2) * pnorm( (fit.y1$z2-rho*fit.y2$z2)/R) ) } dx.th.y2 <- (fit.y2$Y1*y2.Z1 - fit.y2$Y2*y2.Z2) / lik if(na.zero) { dx.th.y2[is.na(dx.th.y2)] <- 0 } dx.sl.y1 <- dx.sl.y2 <- NULL if(length(fit.y1$slope.idx) > 0L) { # sl.y1 dx.sl.y1 <- (y1.Z2 - y1.Z1) * eXo / lik if(na.zero) { dx.sl.y1[is.na(dx.sl.y1)] <- 0 } # sl.y2 dx.sl.y2 <- (y2.Z2 - y2.Z1) * eXo / lik if(na.zero) { dx.sl.y2[is.na(dx.sl.y2)] <- 0 } } # rho if(length(fit.y1$slope.idx) == 0L) { phi <- pc_PHI(rho, th.y1=fit.y1$theta[fit.y1$th.idx], th.y2=fit.y2$theta[fit.y2$th.idx]) #PP <- phi/PI dx <- phi[cbind(Y1,Y2)] } else { dx <- ( dbinorm(fit.y1$z1, fit.y2$z1, rho) - dbinorm(fit.y1$z2, fit.y2$z1, rho) - dbinorm(fit.y1$z1, fit.y2$z2, rho) + dbinorm(fit.y1$z2, fit.y2$z2, rho) ) } dx.rho <- dx / lik if(na.zero) { dx.rho[is.na(dx.rho)] <- 0 } list(dx.th.y1=dx.th.y1, dx.th.y2=dx.th.y2, dx.sl.y1=dx.sl.y1, dx.sl.y2=dx.sl.y2, dx.rho=dx.rho) } lavaan/R/lav_partable_independence.R0000644000176200001440000002432613052637331017207 0ustar liggesusers# generate parameter table for an independence model lav_partable_independence <- function(lavobject = NULL, lavdata = NULL, lavoptions = NULL, lavsamplestats = NULL, # optional user-provided sample stats sample.cov = NULL, sample.mean = NULL, sample.th = NULL, sample.th.idx = NULL) { # grab everything from lavaan lavobject if(!is.null(lavobject)) { stopifnot(inherits(lavobject, "lavaan")) lavdata <- lavobject@Data lavoptions <- lavobject@Options lavsamplestats <- lavobject@SampleStats } # conditional.x ? check res.cov[[1]] slot conditional.x <- FALSE if(!is.null(lavsamplestats) && !is.null(lavsamplestats@res.cov[[1]])) { conditional.x <- TRUE } else if(!is.null(lavoptions) && lavoptions$conditional.x) { conditional.x <- TRUE } # if user-based moments are given, use these if(is.null(sample.cov) && !is.null(lavsamplestats)) { if(conditional.x) { sample.cov <- lavsamplestats@res.cov } else { sample.cov <- lavsamplestats@cov } } if(is.null(sample.mean) && !is.null(lavsamplestats)) { if(conditional.x) { sample.mean <- lavsamplestats@res.int } else { sample.mean <- lavsamplestats@mean } } if(is.null(sample.th) && !is.null(lavsamplestats)) { if(conditional.x) { sample.th <- lavsamplestats@res.th } else { sample.th <- lavsamplestats@th } } if(is.null(sample.th.idx) && !is.null(lavsamplestats)) { sample.th.idx <- lavsamplestats@th.idx } ov.names = lavdata@ov.names ov = lavdata@ov meanstructure = lavoptions$meanstructure parameterization = lavoptions$parameterization # what with fixed.x? if(lavoptions$mimic %in% c("lavaan", "Mplus")) { fixed.x = lavoptions$fixed.x ov.names.x = lavdata@ov.names.x } else if(lavoptions$mimic == "EQS") { # always ignore fixed.x fixed.x = FALSE ov.names.x = NULL } else if(lavoptions$mimic == "LISREL") { # always ignore fixed.x??? CHECKME!! fixed.x = FALSE ov.names.x = NULL } ngroups <- length(ov.names) lhs <- rhs <- op <- character(0) group <- free <- exo <- integer(0) ustart <- numeric(0) categorical <- any(ov$type == "ordered") for(g in 1:ngroups) { # a) VARIANCES (all ov's, if !conditional.x, also exo's) nvar <- length(ov.names[[g]]) lhs <- c(lhs, ov.names[[g]]) op <- c(op, rep("~~", nvar)) rhs <- c(rhs, ov.names[[g]]) group <- c(group, rep(g, nvar)) free <- c(free, rep(1L, nvar)) exo <- c(exo, rep(0L, nvar)) # starting values if(!is.null(sample.cov)) { ustart <- c(ustart, diag(sample.cov[[g]])) } else { ustart <- c(ustart, rep(as.numeric(NA), nvar)) } # ordered? fix variances, add thresholds ord.names <- character(0L) if(categorical) { ord.names <- ov$name[ ov$type == "ordered" ] # only for this group ord.names <- ov.names[[g]][ which(ov.names[[g]] %in% ord.names) ] if(length(ord.names) > 0L) { # fix variances to 1.0 idx <- which(lhs %in% ord.names & op == "~~" & lhs == rhs) ustart[idx] <- 1.0 free[idx] <- 0L # add thresholds lhs.th <- character(0); rhs.th <- character(0) for(o in ord.names) { nth <- ov$nlev[ ov$name == o ] - 1L if(nth < 1L) next lhs.th <- c(lhs.th, rep(o, nth)) rhs.th <- c(rhs.th, paste("t", seq_len(nth), sep="")) } nel <- length(lhs.th) lhs <- c(lhs, lhs.th) rhs <- c(rhs, rhs.th) op <- c(op, rep("|", nel)) group <- c(group, rep(g, nel)) free <- c(free, rep(1L, nel)) exo <- c(exo, rep(0L, nel)) # starting values if(!is.null(sample.th) && !is.null(sample.th.idx)) { th.start <- sample.th[[g]][ sample.th.idx[[g]] > 0L ] ustart <- c(ustart, th.start) } else { ustart <- c(ustart, rep(as.numeric(NA), nel)) } # add delta if(parameterization == "theta") { lhs.delta <- character(0); rhs.delta <- character(0) lhs.delta <- ov.names[[g]] nel <- length(lhs.delta) lhs <- c(lhs, lhs.delta) rhs <- c(rhs, lhs.delta) op <- c(op, rep("~*~", nel)) group <- c(group, rep(g, nel)) free <- c(free, rep(0L, nel)) exo <- c(exo, rep(0L, nel)) delta.start <- rep(1, nel) ustart <- c(ustart, delta.start) } # add mean/intercept, but fix to zero lhs.int <- ord.names nel <- length(lhs.int) rhs.int <- rep("", nel) lhs <- c(lhs, lhs.int) rhs <- c(rhs, rhs.int) op <- c(op, rep("~1", nel)) group <- c(group, rep(g, nel)) free <- c(free, rep(0L, nel)) exo <- c(exo, rep(0L, nel)) int.start <- rep(0, nel) ustart <- c(ustart, int.start) } } # meanstructure? if(meanstructure) { # auto-remove ordinal variables ov.int <- ov.names[[g]] idx <- which(ov.int %in% ord.names) if(length(idx)) ov.int <- ov.int[-idx] nel <- length(ov.int) lhs <- c(lhs, ov.int) op <- c(op, rep("~1", nel)) rhs <- c(rhs, rep("", nel)) group <- c(group, rep(g, nel)) free <- c(free, rep(1L, nel)) exo <- c(exo, rep(0L, nel)) # starting values if(!is.null(sample.mean)) { sample.int.idx <- match(ov.int, ov.names[[g]]) ustart <- c(ustart, sample.mean[[g]][sample.int.idx]) } else { ustart <- c(ustart, rep(as.numeric(NA), length(ov.int))) } } # fixed.x exogenous variables? if(!conditional.x && (nx <- length(ov.names.x[[g]])) > 0L) { # fix variances exo.idx <- which(rhs %in% ov.names.x[[g]] & lhs %in% ov.names.x[[g]] & op == "~~" & group == g) if(fixed.x) { exo[exo.idx] <- 1L free[exo.idx] <- 0L } # fix means exo.idx <- which(lhs %in% ov.names.x[[g]] & op == "~1" & group == g) if(fixed.x) { exo[exo.idx] <- 1L free[exo.idx] <- 0L } # add covariances pstar <- nx*(nx-1)/2 if(pstar > 0L) { # only if more than 1 variable tmp <- utils::combn(ov.names.x[[g]], 2) lhs <- c(lhs, tmp[1,]) # to fill upper.tri op <- c(op, rep("~~", pstar)) rhs <- c(rhs, tmp[2,]) group <- c(group, rep(g, pstar)) if(fixed.x) { free <- c(free, rep(0L, pstar)) exo <- c(exo, rep(1L, pstar)) } else { free <- c(free, rep(1L, pstar)) exo <- c(exo, rep(0L, pstar)) } # starting values if(!is.null(sample.cov)) { rhs.idx <- match(tmp[1,], ov.names[[g]]) lhs.idx <- match(tmp[2,], ov.names[[g]]) ustart <- c(ustart, sample.cov[[g]][ cbind(rhs.idx, lhs.idx) ]) } else { ustart <- c(ustart, rep(as.numeric(NA), pstar)) } } } if(conditional.x && (nx <- length(ov.names.x[[g]])) > 0L) { # add regressions lhs <- c(lhs, rep("dummy", nx)) op <- c( op, rep("~", nx)) rhs <- c(rhs, ov.names.x[[g]]) # add dummy latent lhs <- c(lhs,"dummy"); op <- c(op, "=~"); rhs <- c(rhs, "dummy") lhs <- c(lhs,"dummy"); op <- c(op, "~~"); rhs <- c(rhs, "dummy") exo <- c(exo, rep(1L, nx)); exo <- c(exo, c(0L,0L)) group <- c(group, rep(g, nx + 2L)) free <- c(free, rep(0L, nx + 2L)) ustart <- c(ustart, rep(0, nx + 2L)) if(meanstructure) { lhs <- c(lhs,"dummy"); op <- c(op, "~1"); rhs <- c(rhs, "") exo <- c(exo, 0L) group <- c(group, g) free <- c(free, 0L) ustart <- c(ustart, 0) } } } # ngroups # free counter idx.free <- which(free > 0) free[idx.free] <- 1:length(idx.free) LIST <- list( id = 1:length(lhs), lhs = lhs, op = op, rhs = rhs, user = rep(1L, length(lhs)), block = group, # for now group = group, mod.idx = rep(0L, length(lhs)), free = free, ustart = ustart, exo = exo, label = rep("", length(lhs)) #eq.id = rep(0L, length(lhs)), #unco = free ) LIST } lavaan/R/lav_constraints.R0000644000176200001440000002632712623632010015256 0ustar liggesuserslav_constraints_parse <- function(partable = NULL, constraints = NULL, theta = NULL, debug = FALSE) { # just in case we do not have a $free column in partable if(is.null(partable$free)) { partable$free <- seq_len(length(partable$lhs)) } # from the partable: free parameters if(!is.null(theta)) { # nothing to do } else if(!is.null(partable$est)) { theta <- partable$est[ partable$free > 0L ] } else if(!is.null(partable$start)) { theta <- partable$start[ partable$free > 0L ] } else { theta <- rep(0, length(partable$lhs)) } # number of free parameters npar <- length(theta) # parse the constraints if(is.null(constraints)) { LIST <- NULL } else if(!is.character(constraints)) { stop("lavaan ERROR: constraints should be a string") } else { FLAT <- lavParseModelString( constraints ) CON <- attr(FLAT, "constraints") LIST <- list() if(length(CON) > 0L) { lhs = unlist(lapply(CON, "[[", "lhs")) op = unlist(lapply(CON, "[[", "op")) rhs = unlist(lapply(CON, "[[", "rhs")) LIST$lhs <- c(LIST$lhs, lhs) LIST$op <- c(LIST$op, op) LIST$rhs <- c(LIST$rhs, rhs) } else { stop("lavaan ERROR: no constraints found in constraints argument") } } # variable definitions def.function <- lav_partable_constraints_def(partable, con = LIST, debug = debug) # construct ceq/ciq functions ceq.function <- lav_partable_constraints_ceq(partable, con = LIST, debug = debug) # linear or nonlinear? ceq.linear.idx <- lav_constraints_linear_idx(func = ceq.function, npar = npar) ceq.nonlinear.idx <- lav_constraints_nonlinear_idx(func = ceq.function, npar = npar) # inequalities cin.function <- lav_partable_constraints_ciq(partable, con = LIST, debug = debug) # linear or nonlinear? cin.linear.idx <- lav_constraints_linear_idx(func = cin.function, npar = npar) cin.nonlinear.idx <- lav_constraints_nonlinear_idx(func = cin.function, npar = npar) # Jacobians if(!is.null(body(ceq.function))) { ceq.JAC <- try(lav_func_jacobian_complex(func = ceq.function, x = theta), silent=TRUE) if(inherits(ceq.JAC, "try-error")) { # eg. pnorm() ceq.JAC <- lav_func_jacobian_simple(func = ceq.function, x = theta) } # constants # do we have a non-zero 'rhs' elements? FIXME!!! is this reliable?? ceq.rhs <- -1 * ceq.function( numeric(npar) ) # evaluate constraints ceq.theta <- ceq.function(theta) } else { ceq.JAC <- matrix(0, nrow = 0L, ncol = npar) ceq.rhs <- numeric(0L) ceq.theta <- numeric(0L) } if(!is.null(body(cin.function))) { cin.JAC <- try(lav_func_jacobian_complex(func = cin.function, x = theta), silent=TRUE) if(inherits(cin.JAC, "try-error")) { # eg. pnorm() cin.JAC <- lav_func_jacobian_simple(func = cin.function, x = theta) } # constants # do we have a non-zero 'rhs' elements? FIXME!!! is this reliable?? cin.rhs <- -1 * cin.function( numeric(npar) ) # evaluate constraints cin.theta <- cin.function(theta) } else { cin.JAC <- matrix(0, nrow = 0L, ncol = npar) cin.rhs <- numeric(0L) cin.theta <- numeric(0L) } # shortcut flags ceq.linear.flag <- length(ceq.linear.idx) > 0L ceq.nonlinear.flag <- length(ceq.nonlinear.idx) > 0L ceq.flag <- ceq.linear.flag || ceq.nonlinear.flag cin.linear.flag <- length(cin.linear.idx) > 0L cin.nonlinear.flag <- length(cin.nonlinear.idx) > 0L cin.flag <- cin.linear.flag || cin.nonlinear.flag ceq.only.flag <- ceq.flag && !cin.flag cin.only.flag <- cin.flag && !ceq.flag ceq.linear.only.flag <- ( ceq.linear.flag && !ceq.nonlinear.flag && !cin.flag ) # additional info if ceq.linear.flag if(ceq.linear.flag) { ## NEW: 18 nov 2014: handle general *linear* constraints ## ## see Nocedal & Wright (2006) 15.3 ## - from x to x.red: ## x.red <- MASS::ginv(Q2) %*% (x - Q1 %*% solve(t(R)) %*% b) ## or ## x.red <- as.numeric((x - b %*% qr.coef(QR,diag(npar))) %*% Q2) ## ## - from x.red to x ## x <- as.numeric(Q1 %*% solve(t(R)) %*% b + Q2 %*% x.red) ## or ## x <- as.numeric(b %*% qr.coef(QR, diag(npar))) + ## as.numeric(Q2 %*% x.red) ## ## we write eq.constraints.K = Q2 ## eq.constraints.k0 = b %*% qr.coef(QR, diag(npar))) # compute range+null space of the jacobion (JAC) of the constraint # matrix #JAC <- lav_func_jacobian_complex(func = ceq.function, # x = lavpartable$start[lavpartable$free > 0L] QR <- qr(t(ceq.JAC)) ranK <- QR$rank Q <- qr.Q(QR, complete = TRUE) # Q1 <- Q[,1:ranK, drop = FALSE] # range space # Q2 <- Q[,-seq_len(ranK), drop = FALSE] # null space # R <- qr.R(QR) ceq.JAC.NULL <- Q[,-seq_len(ranK), drop = FALSE] if(all(ceq.rhs == 0)) { ceq.rhs.NULL <- numeric(npar) } else { tmp <- qr.coef(QR, diag(npar)) NA.idx <- which(is.na(rowSums(tmp))) # catch NAs if(length(NA.idx) > 0L) { tmp[NA.idx,] <- 0 } ceq.rhs.NULL <- as.numeric(ceq.rhs %*% tmp) } } else { ceq.JAC.NULL <- matrix(0,0L,0L) ceq.rhs.NULL <- numeric(0L) } # dummy jacobian 'function' ceq.jacobian <- function() NULL cin.jacobian <- function() NULL OUT <- list(def.function = def.function, ceq.function = ceq.function, ceq.JAC = ceq.JAC, ceq.jacobian = ceq.jacobian, ceq.rhs = ceq.rhs, ceq.theta = ceq.theta, ceq.linear.idx = ceq.linear.idx, ceq.nonlinear.idx = ceq.nonlinear.idx, ceq.linear.flag = ceq.linear.flag, ceq.nonlinear.flag = ceq.nonlinear.flag, ceq.flag = ceq.flag, ceq.linear.only.flag = ceq.linear.only.flag, ceq.JAC.NULL = ceq.JAC.NULL, ceq.rhs.NULL = ceq.rhs.NULL, cin.function = cin.function, cin.JAC = cin.JAC, cin.jacobian = cin.jacobian, cin.rhs = cin.rhs, cin.theta = cin.theta, cin.linear.idx = cin.linear.idx, cin.nonlinear.idx = cin.nonlinear.idx, cin.linear.flag = cin.linear.flag, cin.nonlinear.flag = cin.nonlinear.flag, cin.flag = cin.flag, cin.only.flag = cin.only.flag) OUT } lav_constraints_linear_idx <- function(func = NULL, npar = NULL) { if(is.null(func) || is.null(body(func))) return(integer(0L)) # seed 1: rnorm A0 <- lav_func_jacobian_complex(func = func, x = rnorm(npar)) # seed 2: rnorm A1 <- lav_func_jacobian_complex(func = func, x = rnorm(npar)) A0minA1 <- A0 - A1 linear <- apply(A0minA1, 1, function(x) all(x == 0)) which(linear) } lav_constraints_nonlinear_idx <- function(func = NULL, npar = NULL) { if(is.null(func) || is.null(body(func))) return(integer(0L)) # seed 1: rnorm A0 <- lav_func_jacobian_complex(func = func, x = rnorm(npar)) # seed 2: rnorm A1 <- lav_func_jacobian_complex(func = func, x = rnorm(npar)) A0minA1 <- A0 - A1 linear <- apply(A0minA1, 1, function(x) all(x == 0)) which(!linear) } # FIXME: is there a more elegant/robust way to do this?? lav_constraints_check_linear <- function(model) { # seed 1: rnorm A.ceq <- A.cin <- matrix(0, model@nx.free, 0) if(!is.null(body(model@ceq.function))) A.ceq <- t(lav_func_jacobian_complex(func=model@ceq.function, x=rnorm(model@nx.free))) if(!is.null(body(model@cin.function))) A.cin <- t(lav_func_jacobian_complex(func=model@cin.function, x=rnorm(model@nx.free))) A0 <- cbind(A.ceq, A.cin) # seed 2: rnorm A.ceq <- A.cin <- matrix(0, model@nx.free, 0) if(!is.null(body(model@ceq.function))) A.ceq <- t(lav_func_jacobian_complex(func=model@ceq.function, x=rnorm(model@nx.free))) if(!is.null(body(model@cin.function))) A.cin <- t(lav_func_jacobian_complex(func=model@cin.function, x=rnorm(model@nx.free))) A1 <- cbind(A.ceq, A.cin) A0minA1 <- all.equal(A0, A1) if(is.logical(A0minA1) && A0minA1 == TRUE) return(TRUE) else return(FALSE) } # check if the equality constraints are 'simple' (a == b) lav_constraints_check_simple <- function(lavmodel = NULL) { ones <- (lavmodel@ceq.JAC == 1 | lavmodel@ceq.JAC == -1) simple <- all(lavmodel@ceq.rhs == 0) && all(apply(lavmodel@ceq.JAC != 0, 1, sum) == 2) && all(apply(ones, 1, sum) == 2) && length(lavmodel@ceq.nonlinear.idx) == 0 # TRUE or FALSE simple } lav_constraints_R2K <- function(lavmodel = NULL, R = NULL) { # constraint matrix if(!is.null(lavmodel)) { R <- lavmodel@ceq.JAC } stopifnot(!is.null(R)) npar.full <- NCOL(R) npar.red <- npar.full - NROW(R) K <- diag(npar.full) for(i in 1:NROW(R)) { idx1 <- which(R[i,] == 1) idx2 <- which(R[i,] == -1) K[idx2, idx1] <- 1 } # remove redundant columns neg.idx <- which(colSums(R) < 0) K <- K[,-neg.idx] K } lav_constraints_lambda_pre <- function(lavobject = NULL, method = "Don") { # compute factor 'pre' so that pre %*% g = lambda method <- tolower(method) R <- lavobject@Model@con.jac[,] if(is.null(R) || length(R) == 0L) { return( numeric(0L) ) } INFO <- lavTech(lavobject, "information.first.order") npar <- nrow(INFO) # Don 1985 if(method == "don") { R.plus <- MASS::ginv(R) # construct augmented matrix Z <- rbind( cbind(INFO, t(R)), cbind(R, matrix(0,nrow=nrow(R), ncol=nrow(R))) ) Z.plus <- MASS::ginv(Z) P.star <- Z.plus[1:npar, 1:npar] PRE <- t(R.plus) %*% (diag(npar) - INFO %*% P.star) # Bentler EQS manual } else if(method == "bentler") { INFO.inv <- solve(INFO) PRE <- solve( R %*% INFO.inv %*% t(R) ) %*% R %*% INFO.inv } PRE } lavaan/R/lav_model_gradient.R0000644000176200001440000012221513052777427015700 0ustar liggesusers# model gradient lav_model_gradient <- function(lavmodel = NULL, GLIST = NULL, lavsamplestats = NULL, lavdata = NULL, lavcache = NULL, type = "free", verbose = FALSE, forcePD = TRUE, group.weight = TRUE, Delta = NULL, m.el.idx = NULL, x.el.idx = NULL) { nmat <- lavmodel@nmat estimator <- lavmodel@estimator representation <- lavmodel@representation meanstructure <- lavmodel@meanstructure categorical <- lavmodel@categorical group.w.free <- lavmodel@group.w.free fixed.x <- lavmodel@fixed.x conditional.x <- lavmodel@conditional.x num.idx <- lavmodel@num.idx th.idx <- lavmodel@th.idx nx.free <- lavmodel@nx.free # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST if(estimator == "REML") warning("analytical gradient not implement; use numerical approximation") # group.weight # FIXME --> block.weight if(group.weight) { if(estimator %in% c("ML","PML","FML","MML","REML","NTRLS")) { group.w <- (unlist(lavsamplestats@nobs)/lavsamplestats@ntotal) } else { # FIXME: double check! group.w <- ((unlist(lavsamplestats@nobs)-1)/lavsamplestats@ntotal) } } else { group.w <- rep(1.0, lavmodel@nblocks) } # do we need WLS.est? if(estimator %in% c("WLS", "DWLS", "ULS", "GLS", "NTRLS")) { # always compute WLS.est WLS.est <- lav_model_wls_est(lavmodel = lavmodel, GLIST = GLIST) #, # cov.x = lavsamplestats@cov.x) } if(estimator %in% c("ML", "PML", "FML", "REML", "NTRLS")) { # compute moments for all groups #if(conditional.x) { # Sigma.hat <- computeSigmaHatJoint(lavmodel = lavmodel, # GLIST = GLIST, lavsamplestats = lavsamplestats, # extra = (estimator %in% c("ML", "REML","NTRLS"))) #} else { Sigma.hat <- computeSigmaHat(lavmodel = lavmodel, GLIST = GLIST, extra = (estimator %in% c("ML", "REML", "NTRLS"))) #} # ridge here? if(meanstructure && !categorical) { #if(conditional.x) { # Mu.hat <- computeMuHat(lavmodel = lavmodel, GLIST = GLIST, # lavsamplestats = lavsamplestats) #} else { Mu.hat <- computeMuHat(lavmodel = lavmodel, GLIST = GLIST) #} } else if(categorical) { TH <- computeTH(lavmodel = lavmodel, GLIST = GLIST) } if(conditional.x) { PI <- computePI(lavmodel = lavmodel, GLIST = GLIST) } if(estimator == "PML") { if(lavmodel@nexo > 0L) { PI <- computePI(lavmodel = lavmodel) } else { PI <- vector("list", length = lavmodel@nblocks) } } if(group.w.free) { GW <- computeGW(lavmodel = lavmodel, GLIST = GLIST) } } else if(estimator == "MML") { TH <- computeTH( lavmodel = lavmodel, GLIST = GLIST) THETA <- computeTHETA(lavmodel = lavmodel, GLIST = GLIST) GW <- computeGW( lavmodel = lavmodel, GLIST = GLIST) } # four approaches (FIXME!!!! merge this!) # - ML approach: using Omega (and Omega.mu) # Omega = 'POST' = Sigma.inv %*% (S - Sigma) %*% t(Sigma.inv) # (still 2x faster than Delta method) # - WLS/DWLS/GLS: using Delta + WLS.V; support for fixed.x, conditional.x # - (ML)/NTRLS: using Delta, no support for fixed.x, conditional.x # - PML/FML/MML: custom # 1. ML approach if( (estimator == "ML" || estimator == "REML") && !lavmodel@conditional.x ) { if(meanstructure) { Omega <- computeOmega(Sigma.hat=Sigma.hat, Mu.hat=Mu.hat, lavsamplestats=lavsamplestats, estimator=estimator, meanstructure=TRUE, conditional.x = conditional.x) Omega.mu <- attr(Omega, "mu") } else { Omega <- computeOmega(Sigma.hat=Sigma.hat, Mu.hat=NULL, lavsamplestats=lavsamplestats, estimator=estimator, meanstructure=FALSE, conditional.x = conditional.x) Omega.mu <- vector("list", length = lavmodel@nblocks) } # compute DX (for all elements in every model matrix) DX <- vector("list", length=length(GLIST)) for(g in 1:lavmodel@nblocks) { # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] mm.names <- names( GLIST[mm.in.group] ) if(representation == "LISREL") { DX.group <- derivative.F.LISREL(GLIST[mm.in.group], Omega[[g]], Omega.mu[[g]]) # FIXME!!! # add empty gamma if(lavmodel@conditional.x) { DX.group$gamma <- lavmodel@GLIST$gamma } # only save what we need DX[mm.in.group] <- DX.group[ mm.names ] } else { stop("only representation LISREL has been implemented for now") } # weight by group if(lavmodel@nblocks > 1L) { for(mm in mm.in.group) { DX[[mm]] <- group.w[g] * DX[[mm]] } } } # extract free parameters if(type == "free") { dx <- numeric( nx.free ) for(g in 1:lavmodel@nblocks) { mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] for(mm in mm.in.group) { m.free.idx <- lavmodel@m.free.idx[[mm]] x.free.idx <- lavmodel@x.free.idx[[mm]] dx[x.free.idx] <- DX[[mm]][m.free.idx] } } # handle equality constraints #if(lavmodel@eq.constraints && constraints) { # dx <- as.numeric( t(lavmodel@eq.constraints.K) %*% dx ) #} } else { dx <- DX # handle equality constraints ### FIXME!!!! TODO!!!! } } else # ML # 2. using Delta - *LS family if(estimator %in% c("WLS", "DWLS", "ULS", "GLS", "NTGLS")) { if(type != "free") { if(is.null(Delta)) stop("FIXME: Delta should be given if type != free") #stop("FIXME: WLS gradient with type != free needs fixing!") } else { Delta <- computeDelta(lavmodel = lavmodel, GLIST. = GLIST) } for(g in 1:lavmodel@nblocks) { #diff <- as.matrix(lavsamplestats@WLS.obs[[g]] - WLS.est[[g]]) #group.dx <- -1 * ( t(Delta[[g]]) %*% lavsamplestats@WLS.V[[g]] %*% diff) # 0.5-17: use crossprod twice; treat DWLS/ULS special if(estimator == "WLS" || estimator == "GLS" || estimator == "NTRLS") { # full weight matrix diff <- lavsamplestats@WLS.obs[[g]] - WLS.est[[g]] # full weight matrix if(estimator == "GLS" || estimator == "WLS") { WLS.V <- lavsamplestats@WLS.V[[g]] group.dx <- -1 * crossprod(Delta[[g]], crossprod(WLS.V, diff)) } else if(estimator == "NTRLS") { stopifnot(!conditional.x) #WLS.V <- lav_samplestats_Gamma_inverse_NT( # ICOV = attr(Sigma.hat[[g]],"inv")[,,drop=FALSE], # COV = Sigma.hat[[g]][,,drop=FALSE], # MEAN = Mu.hat[[g]], # x.idx = lavsamplestats@x.idx[[g]], # fixed.x = fixed.x, # conditional.x = conditional.x, # meanstructure = meanstructure, # slopestructure = conditional.x) S <- lavsamplestats@cov[[g]] Sigma <- Sigma.hat[[g]] Sigma.inv <- attr(Sigma, "inv") nvar <- NROW(Sigma) if(meanstructure) { MEAN <- lavsamplestats@mean[[g]]; Mu <- Mu.hat[[g]] POST.Sigma <- lav_matrix_duplication_pre( matrix((Sigma.inv %*% (S - Sigma) %*% t(Sigma.inv)) %*% (diag(nvar) + (S - Sigma) %*% Sigma.inv) + (Sigma.inv %*% tcrossprod(MEAN - Mu) %*% Sigma.inv), ncol = 1) ) POST.Mu <- as.numeric(2 * Sigma.inv %*% (MEAN - Mu)) POST <- c(POST.Mu, POST.Sigma) } else { POST <- lav_matrix_duplication_pre( matrix((Sigma.inv %*% (S - Sigma) %*% t(Sigma.inv)) %*% (diag(nvar) + (S - Sigma) %*% Sigma.inv), ncol = 1)) } group.dx <- as.numeric( -1 * crossprod(Delta[[g]], POST) ) } } else if(estimator == "DWLS" || estimator == "ULS") { # diagonal weight matrix diff <- lavsamplestats@WLS.obs[[g]] - WLS.est[[g]] group.dx <- -1 * crossprod(Delta[[g]], lavsamplestats@WLS.VD[[g]] * diff) } group.dx <- group.w[g] * group.dx if(g == 1) { dx <- group.dx } else { dx <- dx + group.dx } } # g if(type == "free") { # nothing to do } else { # make a GLIST dx <- lav_model_x2GLIST(lavmodel = lavmodel, x = dx, type = "custom", setDelta = FALSE, m.el.idx = m.el.idx, x.el.idx = x.el.idx) } } # WLS else if(estimator == "ML" && lavmodel@conditional.x) { if(type != "free") { if(is.null(Delta)) stop("FIXME: Delta should be given if type != free") #stop("FIXME: WLS gradient with type != free needs fixing!") } else { Delta <- computeDelta(lavmodel = lavmodel, GLIST. = GLIST) } for(g in 1:lavmodel@nblocks) { # augmented mean.x + cov.x matrix mean.x <- lavsamplestats@mean.x[[g]] cov.x <- lavsamplestats@cov.x[[g]] C3 <- rbind(c(1,mean.x), cbind(mean.x, cov.x + tcrossprod(mean.x))) Sigma <- Sigma.hat[[g]] Mu.g <- Mu.hat[[g]] PI.g <- PI[[g]] Sigma.inv <- attr(Sigma, "inv") nvar <- NROW(Sigma) S <- lavsamplestats@res.cov[[g]] # beta OBS <- t( cbind(lavsamplestats@res.int[[g]], lavsamplestats@res.slopes[[g]]) ) EST <- t( cbind(Mu.g, PI.g) ) #obs.beta <- c(lavsamplestats@res.int[[g]], # lav_matrix_vec(lavsamplestats@res.slopes[[g]])) #est.beta <- c(Mu.g, lav_matrix_vec(PI.g)) #beta.COV <- C3 %x% Sigma.inv #a <- t(obs.beta - est.beta) #b <- as.matrix(obs.beta - est.beta) #K <- lav_matrix_commutation(m = nvar, n = nvar) #AB <- (K %x% diag(NROW(C3)*NROW(C3))) %*% # (diag(nvar) %x% lav_matrix_vec(C3) %x% diag(nvar)) #K <- lav_matrix_commutation(m = nvar, n = NROW(C3)) #AB <- ( diag(NROW(C3)) %x% K %x% diag(nvar) ) %*% # (lav_matrix_vec(C3) %x% diag( nvar * nvar) ) #POST.beta <- 2 * beta.COV %*% (obs.beta - est.beta) d.BETA <- C3 %*% (OBS - EST) %*% Sigma.inv # NOTE: the vecr here, unlike lav_mvreg_dlogl_beta # this is because DELTA has used vec(t(BETA)), # instead of vec(BETA) POST.beta <- 2 * lav_matrix_vecr(d.BETA) #POST.sigma1 <- lav_matrix_duplication_pre( # (Sigma.inv %x% Sigma.inv) %*% t(AB) %*% (t(a) %x% b) ) # Sigma #POST.sigma2 <- lav_matrix_duplication_pre( # matrix( lav_matrix_vec( # Sigma.inv %*% (S - Sigma) %*% t(Sigma.inv)), ncol = 1L)) W.tilde <- S + t(OBS - EST) %*% C3 %*% (OBS - EST) d.SIGMA <- (Sigma.inv - Sigma.inv %*% W.tilde %*% Sigma.inv) d.vechSigma <- as.numeric( lav_matrix_duplication_pre( as.matrix(lav_matrix_vec(d.SIGMA)) ) ) POST.sigma <- -1 * d.vechSigma #POST <- c(POST.beta, POST.sigma1 + POST.sigma2) POST <- c(POST.beta, POST.sigma) group.dx <- as.numeric( -1 * crossprod(Delta[[g]], POST) ) group.dx <- group.w[g] * group.dx if(g == 1) { dx <- group.dx } else { dx <- dx + group.dx } } # g if(type == "free") { # nothing to do } else { # make a GLIST dx <- lav_model_x2GLIST(lavmodel = lavmodel, x = dx, type = "custom", setDelta = FALSE, m.el.idx = m.el.idx, x.el.idx = x.el.idx) } } # ML + conditional.x else if(estimator == "PML" || estimator == "FML" || estimator == "MML") { if(type != "free") { stop("FIXME: type != free in lav_model_gradient for estimator PML") } else { Delta <- computeDelta(lavmodel = lavmodel, GLIST. = GLIST) } for(g in 1:lavmodel@nblocks) { #print(GLIST) #print(lav_model_get_parameters(lavmodel = lavmodel, GLIST = GLIST)) #print(Sigma.hat[[g]]) #print(TH[[g]]) #cat("*****\n") # compute partial derivative of logLik with respect to # thresholds/means, slopes, variances, correlations if(estimator == "PML") { d1 <- pml_deriv1(Sigma.hat = Sigma.hat[[g]], TH = TH[[g]], th.idx = th.idx[[g]], num.idx = num.idx[[g]], X = lavdata@X[[g]], lavcache = lavcache[[g]], eXo = lavdata@eXo[[g]], PI = PI[[g]], missing = lavdata@missing) # chain rule (fmin) group.dx <- as.numeric(t(d1) %*% Delta[[g]]) } else if(estimator == "FML") { d1 <- fml_deriv1(Sigma.hat = Sigma.hat[[g]], TH = TH[[g]], th.idx = th.idx[[g]], num.idx = num.idx[[g]], X = lavdata@X[[g]], lavcache = lavcache[[g]]) # chain rule (fmin) group.dx <- as.numeric(t(d1) %*% Delta[[g]])/lavsamplestats@nobs[[g]] } else if(estimator == "MML") { group.dx <- lav_model_gradient_mml(lavmodel = lavmodel, GLIST = GLIST, THETA = THETA[[g]], TH = TH[[g]], group = g, lavdata = lavdata, sample.mean = lavsamplestats@mean[[g]], sample.mean.x = lavsamplestats@mean.x[[g]], lavcache = lavcache) } # group weights (if any) group.dx <- group.w[g] * group.dx if(g == 1) { dx <- group.dx } else { dx <- dx + group.dx } } # g } else { stop("lavaan ERROR: no analytical gradient available for estimator ", estimator) } # group.w.free for ML if(lavmodel@group.w.free && estimator %in% c("ML","MML","FML","PML","REML")) { #est.prop <- unlist( computeGW(lavmodel = lavmodel, GLIST = GLIST) ) #obs.prop <- unlist(lavsamplestats@group.w) # FIXME: G2 based -- ML and friends only!! #dx.GW <- - (obs.prop - est.prop) # poisson version est.freq <- exp(unlist(computeGW(lavmodel = lavmodel, GLIST = GLIST))) obs.freq <- unlist(lavsamplestats@group.w) * lavsamplestats@ntotal dx.GW <- - (obs.freq - est.freq) # divide by N (to be consistent with the rest of lavaan) dx.GW <- dx.GW / lavsamplestats@ntotal # remove last element (fixed LAST group to zero) # dx.GW <- dx.GW[-length(dx.GW)] # fill in in dx gw.mat.idx <- which(names(lavmodel@GLIST) == "gw") gw.x.idx <- unlist( lavmodel@x.free.idx[gw.mat.idx] ) dx[gw.x.idx] <- dx.GW } # dx is 1xnpar matrix of LIST (type != "free") if(is.matrix(dx)) { dx <- as.numeric(dx) } dx } # for testing purposes only #computeDeltaNumerical <- function(lavmodel = NULL, GLIST = NULL, g = 1L) { # # # state or final? # if(is.null(GLIST)) GLIST <- lavmodel@GLIST # # compute.moments <- function(x) { # GLIST <- lav_model_x2GLIST(lavmodel = NULL, x=x, type="free") # Sigma.hat <- computeSigmaHat(lavmodel = NULL, GLIST = GLIST) # S.vec <- lav_matrix_vech(Sigma.hat[[g]]) # if(lavmodel@meanstructure) { # Mu.hat <- computeMuHat(lavmodel = NULL, GLIST=GLIST) # out <- c(Mu.hat[[g]], S.vec) # } else { # out <- S.vec # } # out # } # # x <- lav_model_get_parameters(lavmodel = NULL, GLIST=GLIST, type="free") # Delta <- lav_func_jacobian_complex(func=compute.moments, x = x) # # Delta #} ### FIXME: should we here also: ### - weight for groups? (no, for now) ### - handle equality constraints? (yes, for now) computeDelta <- function(lavmodel = NULL, GLIST. = NULL, m.el.idx. = NULL, x.el.idx. = NULL) { representation <- lavmodel@representation categorical <- lavmodel@categorical conditional.x <- lavmodel@conditional.x group.w.free <- lavmodel@group.w.free nmat <- lavmodel@nmat nblocks <- lavmodel@nblocks nvar <- lavmodel@nvar num.idx <- lavmodel@num.idx th.idx <- lavmodel@th.idx nexo <- lavmodel@nexo parameterization <- lavmodel@parameterization # number of thresholds per group (if any) nth <- sapply(th.idx, function(x) sum(x > 0L)) # state or final? if(is.null(GLIST.)) GLIST <- lavmodel@GLIST else GLIST <- GLIST. # type = "free" or something else? type <- "nonfree" m.el.idx <- m.el.idx.; x.el.idx <- x.el.idx. if(is.null(m.el.idx) && is.null(x.el.idx)) type <- "free" # number of rows in DELTA.group pstar <- integer(nblocks) for(g in 1:nblocks) { pstar[g] <- as.integer(nvar[g] * (nvar[g] + 1) / 2) if(lavmodel@meanstructure) { pstar[g] <- nvar[g] + pstar[g] # first the means, then sigma } if(categorical) { pstar[g] <- pstar[g] - nvar[g] # remove variances pstar[g] <- pstar[g] - nvar[g] # remove means pstar[g] <- pstar[g] + nth[g] # add thresholds pstar[g] <- pstar[g] + length(num.idx[[g]]) # add num means pstar[g] <- pstar[g] + length(num.idx[[g]]) # add num vars } if(conditional.x && nexo[g] > 0L) { pstar[g] <- pstar[g] + (nvar[g] * nexo[g]) # add slopes } if(group.w.free) { pstar[g] <- pstar[g] + 1L # add group weight } } # number of columns in DELTA + m.el.idx/x.el.idx if(type == "free") { NCOL <- lavmodel@nx.free m.el.idx <- x.el.idx <- vector("list", length=length(GLIST)) for(mm in 1:length(GLIST)) { m.el.idx[[mm]] <- lavmodel@m.free.idx[[mm]] x.el.idx[[mm]] <- lavmodel@x.free.idx[[mm]] # handle symmetric matrices if(lavmodel@isSymmetric[mm]) { # since we use 'x.free.idx', only symmetric elements # are duplicated (not the equal ones, only in x.free.free) dix <- duplicated(x.el.idx[[mm]]) if(any(dix)) { m.el.idx[[mm]] <- m.el.idx[[mm]][!dix] x.el.idx[[mm]] <- x.el.idx[[mm]][!dix] } } } } else { ## FIXME: this does *not* take into account symmetric ## matrices; hence NCOL will be too large, and empty ## columns will be added ## this is ugly, but it doesn't hurt ## alternative could be: ## NCOL <- sum(unlist(lapply(x.el.idx, function(x) length(unique(x))))) #NCOL <- sum(unlist(lapply(m.el.idx, length))) NCOL <- sum(unlist(lapply(x.el.idx, function(x) length(unique(x))))) # sanity check #nx <- sum(unlist(lapply(x.el.idx, length))) #stopifnot(NCOL == nx) } # compute Delta Delta <- vector("list", length=nblocks) for(g in 1:nblocks) { Delta.group <- matrix(0, nrow=pstar[g], ncol=NCOL) # which mm belong to group g? mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] # label rows of Delta.group --- FIXME!!! #if(categorical) { # # 1. th (means interleaved?) # # 2. pi # # 3. var num + cor #} else { # if(meanstructure) { # } #} #if(group.w.free) { #} # if theta, do some preparation if(parameterization == "theta") { sigma.hat <- computeSigmaHat.LISREL(MLIST=GLIST[mm.in.group], delta=FALSE) dsigma <- diag(sigma.hat) # dcor/dcov for sigma R <- lav_deriv_cov2cor(sigma.hat, num.idx = lavmodel@num.idx[[g]]) theta.var.idx <- lav_matrix_diagh_idx(nvar[g]) } for(mm in mm.in.group) { mname <- names(lavmodel@GLIST)[mm] # skip empty ones if(!length(m.el.idx[[mm]])) next # get Delta columns for this model matrix if(representation == "LISREL") { # Sigma DELTA <- dxSigma <- derivative.sigma.LISREL(m = mname, idx = m.el.idx[[mm]], MLIST = GLIST[ mm.in.group ], delta = parameterization == "delta") if(categorical && parameterization == "theta") { DELTA <- R %*% DELTA } if(categorical) { # reorder: first variances (of numeric), then covariances cov.idx <- lav_matrix_vech_idx(nvar[g]) covd.idx <- lav_matrix_vech_idx(nvar[g], diagonal = FALSE) var.idx <- which(is.na(match(cov.idx, covd.idx)))[num.idx[[g]]] cor.idx <- match(covd.idx, cov.idx) DELTA <- rbind(DELTA[var.idx,,drop=FALSE], DELTA[cor.idx,,drop=FALSE]) } if(!categorical) { if(lavmodel@meanstructure) { DELTA.mu <- derivative.mu.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) if(conditional.x && lavmodel@nexo[g] > 0L) { DELTA.pi <- derivative.pi.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) DELTA <- rbind(DELTA.mu, DELTA.pi, DELTA) } else { DELTA <- rbind(DELTA.mu, DELTA) } } else { if(conditional.x && lavmodel@nexo[g] > 0L) { DELTA.pi <- derivative.pi.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) DELTA <- rbind(DELTA.pi, DELTA) } else { # nothing to do } } } else if(categorical) { DELTA.th <- derivative.th.LISREL(m=mname, idx=m.el.idx[[mm]], th.idx=th.idx[[g]], MLIST=GLIST[ mm.in.group ], delta = TRUE) if(parameterization == "theta") { # dy/ddsigma = -0.5/(ddsigma*sqrt(ddsigma)) dDelta.dx <- ( dxSigma[theta.var.idx,,drop=FALSE] * -0.5 / (dsigma*sqrt(dsigma)) ) dth.dDelta <- derivative.th.LISREL(m = "delta", idx = 1:nvar[g], MLIST = GLIST[ mm.in.group ], th.idx = th.idx[[g]]) # add dth.dDelta %*% dDelta.dx no.num.idx <- which(th.idx[[g]] > 0) DELTA.th[no.num.idx,] <- DELTA.th[no.num.idx,,drop=FALSE] + (dth.dDelta %*% dDelta.dx)[no.num.idx,,drop=FALSE] } if(lavmodel@nexo[g] > 0L) { DELTA.pi <- derivative.pi.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) if(parameterization == "theta") { dpi.dDelta <- derivative.pi.LISREL(m = "delta", idx = 1:nvar[g], MLIST = GLIST[ mm.in.group ]) # add dpi.dDelta %*% dDelta.dx no.num.idx <- which(!seq.int(1L,nvar[g]) %in% num.idx[[g]]) no.num.idx <- rep(seq.int(0,nexo[g]-1) * nvar[g], each=length(no.num.idx)) + no.num.idx DELTA.pi[no.num.idx,] <- DELTA.pi[no.num.idx,,drop=FALSE] + (dpi.dDelta %*% dDelta.dx)[no.num.idx,,drop=FALSE] } DELTA <- rbind(DELTA.th, DELTA.pi, DELTA) } else { DELTA <- rbind(DELTA.th, DELTA) } } if(group.w.free) { DELTA.gw <- derivative.gw.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) DELTA <- rbind(DELTA.gw, DELTA) } } else { stop("representation", representation, "not implemented yet") } Delta.group[ ,x.el.idx[[mm]]] <- DELTA } # mm # save(Delta.group, file=paste0("delta_NO_EQ",g,".Rdata")) # if type == "free" take care of equality constraints #if(type == "free" && lavmodel@eq.constraints) { # Delta.group <- Delta.group %*% lavmodel@eq.constraints.K #} #Delta.eq <- Delta.group # save(Delta.eq, file=paste0("delta_NO_EQ",g,".Rdata")) Delta[[g]] <- Delta.group } # g Delta } computeDeltaDx <- function(lavmodel = NULL, GLIST = NULL, target = "lambda") { # state or final? if(is.null(GLIST)) GLIST <- lavmodel@GLIST representation <- lavmodel@representation nmat <- lavmodel@nmat nblocks <- lavmodel@nblocks num.idx <- lavmodel@num.idx th.idx <- lavmodel@th.idx # number of columns in DELTA + m.el.idx/x.el.idx type <- "free" #if(type == "free") { NCOL <- lavmodel@nx.free m.el.idx <- x.el.idx <- vector("list", length=length(GLIST)) for(mm in 1:length(GLIST)) { m.el.idx[[mm]] <- lavmodel@m.free.idx[[mm]] x.el.idx[[mm]] <- lavmodel@x.free.idx[[mm]] # handle symmetric matrices if(lavmodel@isSymmetric[mm]) { # since we use 'x.free.idx', only symmetric elements # are duplicated (not the equal ones, only in x.free.free) dix <- duplicated(x.el.idx[[mm]]) if(any(dix)) { m.el.idx[[mm]] <- m.el.idx[[mm]][!dix] x.el.idx[[mm]] <- x.el.idx[[mm]][!dix] } } } #} else { # NCOL <- sum(unlist(lapply(x.el.idx, function(x) length(unique(x))))) #} # compute Delta per group Delta <- vector("list", length=nblocks) for(g in 1:nblocks) { mm.in.group <- 1:nmat[g] + cumsum(c(0,nmat))[g] Delta.group <- NULL for(mm in mm.in.group) { mname <- names(lavmodel@GLIST)[mm] # skip empty ones if(!length(m.el.idx[[mm]])) next # get Delta columns for this model matrix if(representation == "LISREL") { if(target == "lambda") { DELTA <- derivative.lambda.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "th") { DELTA <- derivative.th.LISREL(m=mname, th.idx = th.idx[[g]], idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ], delta=TRUE) } else if(target == "mu") { DELTA <- derivative.mu.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "nu") { DELTA <- derivative.nu.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "tau") { DELTA <- derivative.tau.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "theta") { DELTA <- derivative.theta.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "gamma") { DELTA <- derivative.gamma.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "beta") { DELTA <- derivative.beta.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "alpha") { DELTA <- derivative.alpha.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "psi") { DELTA <- derivative.psi.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ]) } else if(target == "sigma") { DELTA <- derivative.sigma.LISREL(m=mname, idx=m.el.idx[[mm]], MLIST=GLIST[ mm.in.group ], delta=TRUE) } else { stop("lavaan ERROR: target ", target, " not implemented yet") } # initialize? if(is.null(Delta.group)) { Delta.group <- matrix(0, nrow=nrow(DELTA), ncol=NCOL) } Delta.group[ ,x.el.idx[[mm]]] <- DELTA } } # mm #if(type == "free" && lavmodel@eq.constraints) { # Delta.group <- Delta.group %*% lavmodel@eq.constraints.K #} Delta[[g]] <- Delta.group } # g Delta } computeOmega <- function(Sigma.hat=NULL, Mu.hat=NULL, lavsamplestats=NULL, estimator="ML", meanstructure=FALSE, conditional.x = FALSE) { # nblocks nblocks <- length(Sigma.hat) Omega <- vector("list", length = nblocks) Omega.mu <- vector("list", length = nblocks) for(g in 1:nblocks) { # ML if(estimator == "ML" || estimator == "REML") { if(attr(Sigma.hat[[g]], "po") == FALSE) { # FIXME: WHAT IS THE BEST THING TO DO HERE?? # CURRENTLY: stop warning("lav_model_gradient: Sigma.hat is not positive definite\n") Sigma.hat.inv <- MASS::ginv(Sigma.hat[[g]]) Sigma.hat.log.det <- log(.Machine$double.eps) } else { Sigma.hat.inv <- attr(Sigma.hat[[g]], "inv") Sigma.hat.log.det <- attr(Sigma.hat[[g]], "log.det") } if(!lavsamplestats@missing.flag) { # complete data if(meanstructure) { if(conditional.x) { diff <- lavsamplestats@res.int[[g]] - Mu.hat[[g]] W.tilde <- lavsamplestats@res.cov[[g]] + tcrossprod(diff) } else { diff <- lavsamplestats@mean[[g]] - Mu.hat[[g]] W.tilde <- lavsamplestats@cov[[g]] + tcrossprod(diff) } # Browne 1995 eq 4.55 Omega.mu[[g]] <- t(t(diff) %*% Sigma.hat.inv) Omega[[g]] <- ( Sigma.hat.inv %*% (W.tilde - Sigma.hat[[g]]) %*% Sigma.hat.inv ) } else { if(conditional.x) { W.tilde <- lavsamplestats@res.cov[[g]] } else { W.tilde <- lavsamplestats@cov[[g]] } Omega[[g]] <- ( Sigma.hat.inv %*% (W.tilde - Sigma.hat[[g]]) %*% Sigma.hat.inv ) } } else { # missing data M <- lavsamplestats@missing[[g]] nvar <- ncol(lavsamplestats@cov[[g]]) OMEGA <- matrix(0, nvar, nvar) OMEGA.MU <- matrix(0, nvar, 1) for(p in 1:length(M)) { SX <- M[[p]][["SY"]] MX <- M[[p]][["MY"]] nobs <- M[[p]][["freq"]] var.idx <- M[[p]][["var.idx"]] Sigma.inv <- inv.chol(Sigma.hat[[g]][var.idx, var.idx], logdet=FALSE) Mu <- Mu.hat[[g]][var.idx] W.tilde <- SX + tcrossprod(MX - Mu) OMEGA.MU[var.idx, 1] <- ( OMEGA.MU[var.idx, 1] + nobs/lavsamplestats@ntotal * t(t(MX - Mu) %*% Sigma.inv) ) OMEGA[var.idx, var.idx] <- ( OMEGA[var.idx, var.idx] + nobs/lavsamplestats@ntotal * (Sigma.inv %*% (W.tilde - Sigma.hat[[g]][var.idx,var.idx]) %*% Sigma.inv ) ) } Omega.mu[[g]] <- OMEGA.MU Omega[[g]] <- OMEGA } # missing # GLS } else if(estimator == "GLS") { W.inv <- lavsamplestats@icov[[g]] W <- lavsamplestats@cov[[g]] Omega[[g]] <- (lavsamplestats@nobs[[g]]-1)/lavsamplestats@nobs[[g]] * (W.inv %*% (W - Sigma.hat[[g]]) %*% W.inv) if(meanstructure) { diff <- as.matrix(lavsamplestats@mean[[g]] - Mu.hat[[g]]) Omega.mu[[g]] <- t( t(diff) %*% W.inv ) } } } # g if(meanstructure) attr(Omega, "mu") <- Omega.mu Omega } lav_model_gradient_DD <- function(lavmodel, GLIST = NULL, group = 1L) { if(is.null(GLIST)) GLIST <- lavmodel@GLIST #### FIX th + mu!!!!! Delta.lambda <- computeDeltaDx(lavmodel, GLIST=GLIST, target="lambda")[[group]] Delta.tau <- computeDeltaDx(lavmodel, GLIST=GLIST, target="tau" )[[group]] Delta.nu <- computeDeltaDx(lavmodel, GLIST=GLIST, target="nu" )[[group]] Delta.theta <- computeDeltaDx(lavmodel, GLIST=GLIST, target="theta" )[[group]] Delta.beta <- computeDeltaDx(lavmodel, GLIST=GLIST, target="beta" )[[group]] Delta.psi <- computeDeltaDx(lavmodel, GLIST=GLIST, target="psi" )[[group]] Delta.alpha <- computeDeltaDx(lavmodel, GLIST=GLIST, target="alpha" )[[group]] Delta.gamma <- computeDeltaDx(lavmodel, GLIST=GLIST, target="gamma" )[[group]] ov.y.dummy.ov.idx <- lavmodel@ov.y.dummy.ov.idx[[group]] ov.x.dummy.ov.idx <- lavmodel@ov.x.dummy.ov.idx[[group]] ov.y.dummy.lv.idx <- lavmodel@ov.y.dummy.lv.idx[[group]] ov.x.dummy.lv.idx <- lavmodel@ov.x.dummy.lv.idx[[group]] ov.dummy.idx <- c(ov.y.dummy.ov.idx, ov.x.dummy.ov.idx) lv.dummy.idx <- c(ov.y.dummy.lv.idx, ov.x.dummy.lv.idx) th.idx <- lavmodel@th.idx[[group]] num.idx <- lavmodel@num.idx[[group]] ord.idx <- unique( th.idx[th.idx > 0L] ) # fix Delta's... mm.in.group <- 1:lavmodel@nmat[group] + cumsum(c(0,lavmodel@nmat))[group] MLIST <- GLIST[ mm.in.group ] DD <- list() nvar <- lavmodel@nvar nfac <- ncol(MLIST$lambda) - length(lv.dummy.idx) # DD$theta theta.idx <- lav_matrix_diagh_idx(nvar) DD$theta <- Delta.theta[theta.idx,,drop=FALSE] if(length(ov.dummy.idx) > 0L) { psi.idx <- lav_matrix_diagh_idx( ncol(MLIST$psi) )[lv.dummy.idx] DD$theta[ov.dummy.idx,] <- Delta.psi[psi.idx,,drop=FALSE] } # num only? FIXME or just all of them? DD$theta <- DD$theta[num.idx,,drop=FALSE] # DD$nu DD$nu <- Delta.nu if(length(ov.dummy.idx) > 0L) { DD$nu[ov.dummy.idx,] <- Delta.alpha[lv.dummy.idx,] } DD$nu <- DD$nu[num.idx,,drop=FALSE] # needed? # DD$lambda nr <- nvar; nc <- nfac lambda.idx <- nr*((1:nc) - 1L) + rep(1:nvar, each=nc) DD$lambda <- Delta.lambda[lambda.idx,,drop=FALSE] if(length(ov.dummy.idx) > 0L) { nr <- nrow(MLIST$beta); nc <- nfac # only the first 1:nfac columns # beta.idx <- rep(nr*((1:nc) - 1L), each=length(lv.dummy.idx)) + rep(lv.dummy.idx, times=nc) ## FIXME beta.idx <- rep(nr*((1:nc) - 1L), times=length(lv.dummy.idx)) + rep(lv.dummy.idx, each=nc) #l.idx <- inr*((1:nc) - 1L) + rep(ov.dummy.idx, each=nc) ## FIXME # l.idx <- rep(nr*((1:nc) - 1L), each=length(ov.dummy.idx)) + rep(ov.dummy.idx, times=nc) l.idx <- rep(nr*((1:nc) - 1L), times=length(ov.dummy.idx)) + rep(ov.dummy.idx, each=nc) DD$lambda[match(l.idx, lambda.idx),] <- Delta.beta[beta.idx,,drop=FALSE] } # DD$KAPPA DD$kappa <- Delta.gamma if(length(ov.dummy.idx) > 0L) { nr <- nrow(MLIST$gamma); nc <- ncol(MLIST$gamma) kappa.idx <- nr*((1:nc) - 1L) + rep(lv.dummy.idx, each=nc) DD$kappa <- DD$kappa[kappa.idx,,drop=FALSE] } # DD$GAMMA if(!is.null(MLIST$gamma)) { nr <- nrow(MLIST$gamma); nc <- ncol(MLIST$gamma) lv.idx <- 1:nfac # MUST BE ROWWISE! gamma.idx <- rep(nr*((1:nc) - 1L), times=length(lv.idx)) + rep(lv.idx, each=nc) DD$gamma <- Delta.gamma[gamma.idx,,drop=FALSE] } # DD$BETA if(!is.null(MLIST$beta)) { nr <- nc <- nrow(MLIST$beta) lv.idx <- 1:nfac # MUST BE ROWWISE! beta.idx <- rep(nr*((1:nfac) - 1L), times=nfac) + rep(lv.idx, each=nfac) DD$beta <- Delta.beta[beta.idx,,drop=FALSE] } ## DD$psi DD$psi <- Delta.psi if(length(lv.dummy.idx) > 0L) { nr <- nc <- nrow(MLIST$psi) lv.idx <- 1:nfac # MUST BE ROWWISE! psi.idx <- rep(nr*((1:nfac) - 1L), times=nfac) + rep(lv.idx, each=nfac) DD$psi <- DD$psi[psi.idx,,drop=FALSE] } ## DD$tau if(!is.null(MLIST$tau)) { DD$tau <- Delta.tau } DD } lavaan/R/lav_bootstrap.R0000644000176200001440000003655513043403126014731 0ustar liggesusers# main function used by various bootstrap related functions # this function draws the bootstrap samples, and estimates the # free parameters for each bootstrap sample # # return COEF matrix of size R x npar (R = number of bootstrap samples) # # Ed. 9 mar 2012 # # Notes: - faulty runs are simply ignored (with a warning) # - default R=1000 # # Updates: - now we have a separate @data slot, we only need to transform once # for the bollen.stine bootstrap (13 dec 2011) # - bug fix: we need to 'update' the fixed.x variances/covariances # for each bootstrap draw!! # # Question: if fixed.x=TRUE, should we not keep X fixed, and bootstrap Y # only, conditional on X?? How to implement the conditional part? bootstrapLavaan <- function(object, R = 1000L, type = "ordinary", verbose = FALSE, FUN = "coef", warn = -1L, return.boot = FALSE, parallel = c("no", "multicore", "snow"), ncpus = 1L, cl = NULL, iseed = NULL, h0.rmsea = NULL, ...) { # checks type. <- tolower(type) # overwritten if nonparametric stopifnot(inherits(object, "lavaan"), type. %in% c("nonparametric", "ordinary", "bollen.stine", "parametric", "yuan")) if(type. == "nonparametric") type. <- "ordinary" # check if options$se is not bootstrap, otherwise, we get an infinite loop if(object@Options$se == "bootstrap") stop("lavaan ERROR: se == \"bootstrap\"; please refit model with another option for \"se\"") # check if options$test is not bollen.stine if(object@Options$test == "bollen.stine") stop("lavaan ERROR: test == \"bollen.stine\"; please refit model with another option for \"test\"") # check for conditional.x = TRUE if(object@Model@conditional.x) { stop("lavaan ERROR: this function is not (yet) available if conditional.x = TRUE") } lavoptions. <- list(parallel = parallel, ncpus = ncpus, cl = cl, iseed = iseed) bootstrap.internal(object = object, lavdata. = NULL, lavmodel. = NULL, lavsamplestats. = NULL, lavoptions. = lavoptions., lavpartable. = NULL, R = R, type = type., verbose = verbose, FUN = FUN, warn = warn, return.boot = return.boot, h0.rmsea = h0.rmsea, ...) } # we need an internal version to be called from VCOV and lav_model_test # when there is no lavaan object yet! bootstrap.internal <- function(object = NULL, lavdata. = NULL, lavmodel. = NULL, lavsamplestats. = NULL, lavoptions. = NULL, lavpartable. = NULL, R = 1000L, type = "ordinary", verbose = FALSE, FUN = "coef", warn = 0L, return.boot = FALSE, h0.rmsea = NULL, ...) { # warning: avoid use of 'options', 'sample' (both are used as functions # below... # options -> opt # sample -> samp # object slots if(!is.null(object)) { lavdata <- object@Data lavmodel <- object@Model lavsamplestats <- object@SampleStats lavoptions <- object@Options lavpartable <- object@ParTable FUN <- match.fun(FUN) t0 <- FUN(object, ...) t.star <- matrix(as.numeric(NA), R, length(t0)) colnames(t.star) <- names(t0) } else { # internal version! lavdata <- lavdata. lavmodel <- lavmodel. lavsamplestats <- lavsamplestats. lavoptions <- lavoptions. lavpartable <- lavpartable. lavoptions$se <- "none"; lavoptions$test <- "standard" lavoptions$verbose <- FALSE if(FUN == "coef") { t.star <- matrix(as.numeric(NA), R, lavmodel@nx.free) lavoptions$test <- "none" } else if(FUN == "test") { t.star <- matrix(as.numeric(NA), R, 1L) } else if(FUN == "coeftest") { t.star <- matrix(as.numeric(NA), R, lavmodel@nx.free + 1L) } } parallel <- lavoptions$parallel ncpus <- lavoptions$ncpus cl <- lavoptions$cl iseed <- lavoptions$iseed # prepare old_options <- options(); options(warn = warn) # the next 10 lines are borrowed from the boot package have_mc <- have_snow <- FALSE if (parallel != "no" && ncpus > 1L) { if (parallel == "multicore") have_mc <- .Platform$OS.type != "windows" else if (parallel == "snow") have_snow <- TRUE if (!have_mc && !have_snow) ncpus <- 1L } # only if we return the seed #if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) #seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE) # bollen.stine, yuan, or parametric: we need the Sigma.hat values if(type == "bollen.stine" || type == "parametric" || type == "yuan") { Sigma.hat <- computeSigmaHat(lavmodel = lavmodel) Mu.hat <- computeMuHat(lavmodel = lavmodel) } # can we use the original data, or do we need to transform it first? if(type == "bollen.stine" || type == "yuan") { # check if data is complete if(lavoptions$missing != "listwise") stop("lavaan ERROR: bollen.stine/yuan bootstrap not available for missing data") dataX <- vector("list", length=lavdata@ngroups) } else { dataX <- lavdata@X } dataeXo <- lavdata@eXo # if bollen.stine, transform data here if(type == "bollen.stine") { for(g in 1:lavsamplestats@ngroups) { sigma.sqrt <- lav_matrix_symmetric_sqrt(Sigma.hat[[g]]) S.inv.sqrt <- lav_matrix_symmetric_sqrt(lavsamplestats@icov[[g]]) # center (needed???) X <- scale(lavdata@X[[g]], center=TRUE, scale=FALSE) # transform X <- X %*% S.inv.sqrt %*% sigma.sqrt # add model-based mean if(lavmodel@meanstructure) X <- scale(X, center=(-1*Mu.hat[[g]]), scale=FALSE) # transformed data dataX[[g]] <- X } } # if yuan, transform data here if(type == "yuan") { # page numbers refer to Yuan et al, 2007 # Define a function to find appropriate value of a # (p. 272); code supplied 16 jun 2016 by Cheng & Wu search.a <- function(F0, d, p) { if (F0 == 0) { a0 <- 0 return(a0) } max.a <- 1 / (1 - min(d)) - 1e-3 # starting value; Yuan p. 272 a0 <- min(sqrt(2 * F0 / sum((d - 1)^2)), max.a) # See Yuan p. 280 for (i in 1:50) { dia <- a0 * d + (1 - a0) g1 <- -sum(log(dia)) + sum(dia) - p dif <- g1 - F0 if(abs(dif) < 1e-6) return(a0) g2 <- a0 * sum((d - 1)^2 / dia) a0 <- min(max(a0 - dif/g2, 0), max.a) } # if search fails to converge in 50 iterations warning("lavaan WARNING: yuan bootstrap search for `a` did not converge. h0.rmsea may be too large.") a0 } # Now use g.a within each group for(g in 1:lavsamplestats@ngroups) { S <- lavsamplestats@cov[[g]] # test is in Fit slot ghat <- object@test[[1]]$stat.group[[g]] df <- object@test[[1]]$df Sigmahat <- Sigma.hat[[g]] nmv <- nrow(Sigmahat) n <- lavdata@nobs[[g]] # Calculate tauhat_1, middle p. 267. # Yuan et al note that tauhat_1 could be negative; # if so, we need to let S.a = Sigmahat. (see middle p 275) ifelse(length(h0.rmsea)==0, tau.hat <- (ghat - df)/(n-1), # middle p 267 tau.hat <- df*(h0.rmsea*h0.rmsea)) # middle p 273 if (tau.hat >= 0) { # from Cheng and Wu EL <- t(chol(Sigmahat)) ESE <- forwardsolve(EL, t(forwardsolve(EL, S))) d <- eigen(ESE, symmetric = TRUE, only.values = TRUE)$values # Find a to minimize g.a a <- search.a(tau.hat, d, nmv) # Calculate S_a (p. 267) S.a <- a*S + (1 - a)*Sigmahat } else { S.a <- Sigmahat } # Transform the data (p. 263) S.a.sqrt <- lav_matrix_symmetric_sqrt(S.a) S.inv.sqrt <- lav_matrix_symmetric_sqrt(lavsamplestats@icov[[g]]) X <- lavdata@X[[g]] X <- X %*% S.inv.sqrt %*% S.a.sqrt # transformed data dataX[[g]] <- X } } # run bootstraps fn <- function(b) { if(type == "bollen.stine" || type == "ordinary" || type == "yuan") { # take a bootstrap sample for each group for(g in 1:lavsamplestats@ngroups) { stopifnot(lavsamplestats@nobs[[g]] > 1L) boot.idx <- sample(x=lavsamplestats@nobs[[g]], size=lavsamplestats@nobs[[g]], replace=TRUE) dataX[[g]] <- dataX[[g]][boot.idx,,drop=FALSE] if(!is.null(dataeXo[[g]])) dataeXo[[g]] <- dataeXo[[g]][boot.idx,,drop=FALSE] } } else { # parametric! for(g in 1:lavsamplestats@ngroups) { dataX[[g]] <- MASS::mvrnorm(n = lavsamplestats@nobs[[g]], Sigma = Sigma.hat[[g]], mu = Mu.hat[[g]]) } } # verbose if(verbose) cat(" ... bootstrap draw number:", sprintf("%4d", b)) bootSampleStats <- try(lav_samplestats_from_data( lavdata = NULL, DataX = dataX, DataeXo = dataeXo, DataOv = lavdata@ov, DataOvnames = lavdata@ov.names, DataOvnamesx = lavdata@ov.names.x, missing = lavoptions$missing, rescale = (lavoptions$estimator == "ML" && lavoptions$likelihood == "normal"), estimator = lavoptions$estimator, mimic = lavoptions$mimic, meanstructure = lavoptions$meanstructure, conditional.x = lavoptions$conditional.x, group.w.free = lavoptions$group.w.free, #missing.h1 = (FUN != "coef"), # not if fixed.x, otherwise starting values fails! missing.h1 = TRUE, verbose = FALSE), silent=TRUE) if(inherits(bootSampleStats, "try-error")) { if(verbose) cat(" FAILED: creating sample statistics\n") options(old_options) return(NULL) } # just in case we need the new X in the data slot (lm!) lavdata@X <- dataX # adjust model slot if fixed.x variances/covariances # have changed: ### FIXME ##### #if(lavmodel@fixed.x && length(vnames(partable, "ov.x")) > 0L) { # for(g in 1:lavsamplestats@ngroups) { # # } #} if(lavmodel@fixed.x && length(vnames(lavpartable, "ov.x")) > 0L) { model.boot <- NULL } else { model.boot <- lavmodel } # fit model on bootstrap sample fit.boot <- lavaan(slotOptions = lavoptions, slotParTable = lavpartable, slotModel = model.boot, slotSampleStats = bootSampleStats, slotData = lavdata) if(!fit.boot@optim$converged) { if(verbose) cat(" FAILED: no convergence\n") options(old_options) return(NULL) } # extract information we need if(is.null(object)) { # internal use only! if(FUN == "coef") { out <- fit.boot@optim$x } else if(FUN == "test") { out <- fit.boot@test[[1L]]$stat } else if(FUN == "coeftest") { out <- c(fit.boot@optim$x, fit.boot@test[[1L]]$stat) } } else { # general use out <- try(FUN(fit.boot, ...), silent=TRUE) } if(inherits(out, "try-error")) { if(verbose) cat(" FAILED: applying FUN to fit.boot\n") options(old_options) return(NULL) } if(verbose) cat(" OK -- niter = ", sprintf("%3d", fit.boot@optim$iterations), " fx = ", sprintf("%13.9f", fit.boot@optim$fx), "\n") out } # this is from the boot function in package boot RR <- R res <- if (ncpus > 1L && (have_mc || have_snow)) { if (have_mc) { parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus) } else if (have_snow) { list(...) # evaluate any promises if (is.null(cl)) { cl <- parallel::makePSOCKcluster(rep("localhost", ncpus)) if(RNGkind()[1L] == "L'Ecuyer-CMRG") parallel::clusterSetRNGStream(cl, iseed = iseed) res <- parallel::parLapply(cl, seq_len(RR), fn) parallel::stopCluster(cl) res } else parallel::parLapply(cl, seq_len(RR), fn) } } else lapply(seq_len(RR), fn) # handle errors and fill in container error.idx <- integer(0) for(b in seq_len(RR)) { if(!is.null(res[[b]]) && length(res[[b]]) > 0L) { t.star[b, ] <- res[[b]] } else { error.idx <- c(error.idx, b) } } # handle errors if(length(error.idx) > 0L) { warning("lavaan WARNING: only ", (R-length(error.idx)), " bootstrap draws were successful") t.star <- t.star[-error.idx,,drop=FALSE] attr(t.star, "error.idx") <- error.idx } else { if(verbose) cat("Number of successful bootstrap draws:", (R - length(error.idx)), "\n") } # NOT DONE YET if(return.boot) { # mimic output boot function } # restore options options(old_options) t.star } lavaan/R/lav_partable_from_lm.R0000644000176200001440000000315112465075714016222 0ustar liggesusers# build a bare-bones parameter table from a fitted lm object lav_partable_from_lm <- function(object, est = FALSE, label = FALSE, as.data.frame. = FALSE) { # sanity check if(class(object) != "lm") { stop("object must be of class lm") } objectTerms <- terms(object) responseIndex <- attr(objectTerms, "response") varNames <- as.character(attr(objectTerms, "variables"))[-1] responseName <- varNames[responseIndex] predCoef <- coef(object) predNames <- names(predCoef) lhs <- rep(responseName, length(predNames)) op <- rep("~", length(predNames)) rhs <- predNames # intercept? if(attr(objectTerms, "intercept")) { int.idx <- which(rhs == "(Intercept)") op[int.idx] <- "~1" rhs[int.idx] <- "" } # always add residual variance? #lhs <- c(lhs, responseName) # op <- c(op, "~~") #rhs <- c(rhs, responseName) # construct minimal partable partable <- list(lhs = lhs, op = op, rhs = rhs) # include 'est' column? if(est) { #partable$est <- c(as.numeric(predCoef), # sum(resid(object)^2) / object$df.residual) partable$est <- as.numeric(predCoef) } # include 'label' column? if(label) { # partable$label <- c(predNames, responseName) partable$label <- predNames # convert all ':' to '.' partable$label <- gsub("[:()]", ".", partable$label) } # convert to data.frame? if(as.data.frame.) { partable <- as.data.frame(partable, stringsAsFactors = FALSE) } partable } lavaan/R/lav_test_LRT.R0000644000176200001440000002403613042164101014377 0ustar liggesusers# compare two nested models, by default using the chi-square # difference test # - in 0.5-16, SB.classic = TRUE is the default again (for now) # - in 0.5-18, SB.classic is replaced by 'method', with the following # options: # method = "default" (we choose a default method, based on the estimator) # method = "Satorra.2000" # method = "Satorra.Bentler.2001" # method = "Satorra.Bentler.2010" # method = "mean.var.adjusted.PLRT" lavTestLRT <- function(object, ..., method = "default", A.method = "exact", H1 = TRUE, type = "Chisq", model.names = NULL) { if(object@optim$npar > 0L && !object@optim$converged) stop("lavaan ERROR: model did not converge") type <- tolower(type) method <- tolower( gsub("[-_\\.]", "", method ) ) # NOTE: if we add additional arguments, it is not the same generic # anova() function anymore, and match.call will be screwed up mcall <- match.call(expand.dots = TRUE) dots <- list(...) modp <- if(length(dots)) sapply(dots, is, "lavaan") else logical(0) # some general properties (taken from the first model) estimator <- object@Options$estimator likelihood <- object@Options$likelihood ngroups <- object@Data@ngroups nobs <- object@SampleStats@nobs ntotal <- object@SampleStats@ntotal # shortcut for single argument (just plain LRT) if(!any(modp)) { if(type == "cf") { warning("lavaan WARNING: `type' argument is ignored for a single model") } aic <- bic <- c(NA, NA) if(estimator == "ML") { aic <- c(NA, AIC(object)) bic <- c(NA, BIC(object)) } val <- data.frame(Df = c(0, object@test[[1L]]$df), AIC = aic, BIC = bic, Chisq = c(0, object@test[[1L]]$stat), "Chisq diff" = c(NA, object@test[[1L]]$stat), "Df diff" = c(NA, object@test[[1L]]$df), "Pr(>Chisq)" = c(NA, object@test[[1L]]$pvalue), row.names = c("Saturated", "Model"), check.names = FALSE) attr(val, "heading") <- "Chi Square Test Statistic (unscaled)\n" class(val) <- c("anova", class(val)) return(val) } # list of models mods <- c(list(object), dots[modp]) if(!is.null(model.names)) { names(mods) <- model.names } else { names(mods) <- sapply(as.list(mcall)[which(c(FALSE, TRUE, modp))], deparse) } ## put them in order (using number of free parameters) #nfreepar <- sapply(mods, function(x) x@optim$npar) #if(any(duplicated(nfreepar))) { ## FIXME: what to do here? # # what, same number of free parameters? # # maybe, we need to count number of constraints # ncon <- sapply(mods, function(x) { nrow(x@Model@con.jac) }) # nfreepar <- nfreepar - ncon #} # put them in order (using degrees of freedom) ndf <- sapply(mods, function(x) x@test[[1]]$df) mods <- mods[order(ndf)] # here come the checks if(TRUE) { # 1. same set of observed variables? ov.names <- lapply(mods, function(x) { sort(lavNames(x)) }) OV <- ov.names[[1L]] # the observed variable names of the first model if(!all(sapply(ov.names, function(x) identical(x, OV)))) { warning("lavaan WARNING: some models are based on a different set of observed variables") } ## wow FIXME: we may need to reorder the rows/columns first!! #COVS <- lapply(mods, function(x) slot(slot(x, "Sample"), "cov")[[1]]) #if(!all(sapply(COVS, all.equal, COVS[[1]]))) { # stop("lavaan ERROR: models must be fit to the same data") #} # 2. nested models? *different* npars? # TODO! # 3. all meanstructure? mean.structure <- sapply(mods, inspect, "meanstructure") if(sum(mean.structure) > 0L && sum(mean.structure) < length(mean.structure)) { warning("lavaan WARNING: not all models have a meanstructure") } } mods.scaled <- unlist( lapply(mods, function(x) { any(c("satorra.bentler", "yuan.bentler", "mean.var.adjusted", "scaled.shifted") %in% unlist(sapply(slot(x, "test"), "[", "test")) ) })) if(all(mods.scaled)) { scaled <- TRUE # which type? TEST <- object@test[[2]]$test } else if(!all(mods.scaled)) { scaled <- FALSE TEST <- "standard" } else { stop("lavaan ERROR: some models (but not all) have scaled test statistics") } # which models have used a MEANSTRUCTURE? mods.meanstructure <- sapply(mods, function(x) { unlist(slot(slot(x, "Model"), "meanstructure"))}) if(all(mods.meanstructure)) { meanstructure <- "ok" } else if(sum(mods.meanstructure) == 0) { meanstructure <- "ok" } else { stop("lavaan ERROR: some models (but not all) have a meanstructure") } # collect statistics for each model if(type == "chisq") { Df <- sapply(mods, function(x) slot(x, "test")[[1]]$df) } else if(type == "cf") { Df <- rep(as.numeric(NA), length(mods)) } else { stop("lavaan ERROR: test type unknown: ", type) } if(type == "chisq") { STAT <- sapply(mods, function(x) slot(x, "test")[[1]]$stat) } else if(type == "cf") { tmp <- lapply(mods, lavTablesFitCf) STAT <- unlist(tmp) Df <- unlist(lapply(tmp, attr, "DF")) } else { stop("lavaan ERROR: test type unknown: ", type) } # difference statistics STAT.delta <- c(NA, diff(STAT)) Df.delta <- c(NA, diff(Df)) # correction for scaled test statistics if(type == "chisq" && scaled) { # select method if(method == "default") { if(estimator == "PML") { method <- "mean.var.adjusted.PLRT" } else if(TEST %in% c("satorra.bentler", "yuan.bentler")) { method <- "satorra.bentler.2001" } else { method <- "satorra.2000" } } else if(method == "meanvaradjustedplrt") { method <- "mean.var.adjusted.PLRT" stopifnot(estimator == "PML") } else if(method == "satorra2000") { method <- "satorra.2000" } else if(method == "satorrabentler2001") { method <- "satorra.bentler.2001" } else if(method == "satorrabentler2010") { method <- "satorra.bentler.2010" } else { stop("lavaan ERROR: unknown method for scaled difference test: ", method) } if(method == "satorra.bentler.2001") { # use formula from Satorra & Bentler 2001 for(m in seq_len(length(mods) - 1L)) { out <- lav_test_diff_SatorraBentler2001(mods[[m]], mods[[m+1]]) STAT.delta[m+1] <- out$T.delta Df.delta[m+1] <- out$df.delta } } else if (method == "mean.var.adjusted.PLRT") { for(m in seq_len(length(mods) - 1L)) { out <- ctr_pml_plrt_nested(mods[[m]], mods[[m+1]]) STAT.delta[m+1] <- out$FSMA.PLRT Df.delta[m+1] <- out$adj.df } } else if(method == "satorra.bentler.2010") { for(m in seq_len(length(mods) - 1L)) { out <- lav_test_diff_SatorraBentler2010(mods[[m]], mods[[m+1]]) STAT.delta[m+1] <- out$T.delta Df.delta[m+1] <- out$df.delta } } else if(method == "satorra.2000") { for(m in seq_len(length(mods) - 1L)) { if(TEST %in% c("satorra.bentler", "yuan.bentler")) { Satterthwaite <- FALSE } else { Satterthwaite <- TRUE } out <- lav_test_diff_Satorra2000(mods[[m]], mods[[m+1]], H1 = TRUE, Satterthwaite = Satterthwaite, A.method = A.method) STAT.delta[m+1] <- out$T.delta Df.delta[m+1] <- out$df.delta } } } # Pvalue Pvalue.delta <- pchisq(STAT.delta, Df.delta, lower.tail = FALSE) aic <- bic <- rep(NA, length(mods)) if(estimator == "ML") { aic <- sapply(mods, FUN=AIC) bic <- sapply(mods, FUN=BIC) } else if(estimator == "PML") { OUT <- lapply(mods, ctr_pml_aic_bic) aic <- sapply(OUT, "[[", "PL_AIC") bic <- sapply(OUT, "[[", "PL_BIC") } if(estimator == "PML") { val <- data.frame(Df = Df, PL_AIC = aic, PL_BIC = bic, Chisq = STAT, "Chisq diff" = STAT.delta, "Df diff" = Df.delta, "Pr(>Chisq)" = Pvalue.delta, row.names = names(mods), check.names = FALSE) } else { val <- data.frame(Df = Df, AIC = aic, BIC = bic, Chisq = STAT, "Chisq diff" = STAT.delta, "Df diff" = Df.delta, "Pr(>Chisq)" = Pvalue.delta, row.names = names(mods), check.names = FALSE) } if(type == "chisq") { if(scaled) { attr(val, "heading") <- paste("Scaled Chi Square Difference Test (method = \"", method, "\")\n", sep="") } else { attr(val, "heading") <- "Chi Square Difference Test\n" } } else if(type == "cf") { colnames(val)[c(3,4)] <- c("Cf", "Cf diff") attr(val, "heading") <- "Cf Difference Test\n" } class(val) <- c("anova", class(val)) return(val) } lavaan/R/lav_object_post_check.R0000644000176200001440000000562712743716004016370 0ustar liggesusers# check if a fitted model is admissible lav_object_post_check <- function(object, verbose = FALSE) { stopifnot(inherits(object, "lavaan")) lavpartable <- object@ParTable lavmodel <- object@Model lavdata <- object@Data var.ov.ok <- var.lv.ok <- result.ok <- TRUE # 1a. check for negative variances ov var.idx <- which(lavpartable$op == "~~" & lavpartable$lhs %in% lavNames(object, "ov") & lavpartable$lhs == lavpartable$rhs) if(length(var.idx) > 0L && any(lavpartable$est[var.idx] < 0.0)) { result.ok <- var.ov.ok <- FALSE warning("lavaan WARNING: some estimated ov variances are negative") } # 1b. check for negative variances lv var.idx <- which(lavpartable$op == "~~" & lavpartable$lhs %in% lavNames(object, "lv") & lavpartable$lhs == lavpartable$rhs) if(length(var.idx) > 0L && any(lavpartable$est[var.idx] < 0.0)) { result.ok <- var.lv.ok <- FALSE warning("lavaan WARNING: some estimated lv variances are negative") } # 2. is cov.lv (PSI) positive definite? (only if we did not already warn # for negative variances) if(var.lv.ok && length(lavNames(lavpartable, type="lv.regular")) > 0L) { ETA <- lavTech(object, "cov.lv") for(g in 1:lavdata@ngroups) { txt.group <- ifelse(lavdata@ngroups > 1L, paste(" in group ", g, sep=""), "") eigvals <- eigen(ETA[[g]], symmetric=TRUE, only.values=TRUE)$values if(any(eigvals < -1 * .Machine$double.eps^(3/4))) { warning( "lavaan WARNING: covariance matrix of latent variables\n", " is not positive definite", txt.group, ";\n", " use inspect(fit,\"cov.lv\") to investigate.") result.ok <- FALSE } } } # 3. is THETA positive definite (but only for numeric variables) # and if we not already warned for negative ov variances if(var.ov.ok) { THETA <- lavTech(object, "theta") for(g in 1:lavdata@ngroups) { num.idx <- lavmodel@num.idx[[g]] if(length(num.idx) > 0L) { txt.group <- ifelse(lavdata@ngroups > 1L, paste(" in group ", g, sep=""), "") eigvals <- eigen(THETA[[g]][num.idx, num.idx, drop=FALSE], symmetric = TRUE, only.values = TRUE)$values if(any(eigvals < -1 * .Machine$double.eps^(3/4))) { warning( "lavaan WARNING: the covariance matrix of the residuals of the observed\n", " variables (theta) is not positive definite", txt.group, ";\n", " use inspect(fit,\"theta\") to investigate.") result.ok <- FALSE } } } } result.ok } lavaan/R/lav_fabin.R0000644000176200001440000001542112743462644014000 0ustar liggesusers# full inverse of a square matrix using Gauss-Jordan elimination # without pivoting (interchanging rows) # # not for real use; only to check the GaussJordanPivot function # this function will fail if a pivot is (near)zero! GaussJordanInverseNoPivoting <- function(A.) { A <- A. # we will change it p <- (d <- dim(A))[1L] if (!is.numeric(A) || length(d) != 2L || p != d[2L]) stop("'A' is not a square numeric matrix") # the code below is based on Matlab code presented in Figure 1 # of the following paper: # # "Efficient Matrix Inversion via Gauss-Jordan Elimination and its # Parralelization" (1998). # Authors: Quintana, Quintana, Sun & van de Geijn # retrieved from www.cs.utexas.edu/users/plapack/papers/inverse-tr.ps for(k in 1:p) { pivot <- A[k,k] # column scaling A[-k,k] <- -A[-k,k]/pivot; A[k,k] <- 0.0 # rank-1 update A <- A + tcrossprod(A[,k], A[k,]) # row scaling A[k,-k] <- A[k,-k]/pivot; A[k,k] <- 1.0/pivot } A } # just for fun, *with* pivoting of rows GaussJordanInverse <- function(A.) { A <- A. p <- (d <- dim(A))[1L] if (!is.numeric(A) || length(d) != 2L || p != d[2L]) stop("'A' is not a square numeric matrix") ipiv <- 1:p for(k in 1:p) { # look for largest element in the column tmp <- A[,k]; if(k > 1L) tmp[1:(k-1)] <- 0.0; k.max <- which.max(tmp) pivot <- A[k.max,k] # pivoting rows? if(k != k.max) { tmp <- A[k,]; A[k,] <- A[k.max,]; A[k.max,] <- tmp tmp <- ipiv[k]; ipiv[k] <- ipiv[k.max]; ipiv[k.max] <- tmp } # column scaling A[-k,k] <- -A[-k,k]/pivot; A[k,k] <- 0.0 # rank-1 update A <- A + tcrossprod(A[,k], A[k,]) # row scaling A[k,-k] <- A[k,-k]/pivot; A[k,k] <- 1.0/pivot } # backward permutation A[,ipiv] <- A A } # perform a single 'Gauss-Jordan' pivot on a # square matrix 'A' using a diagonal element A[k,k] # as a pivot element GaussJordanPivot <- function(A., k=1L) { A <- A. p <- (d <- dim(A))[1L] if (!is.numeric(A) || length(d) != 2L || p != d[2L]) stop("'A' is not a square numeric matrix") pivot <- A[k,k] # column scaling A[-k,k] <- -A[-k,k]/pivot; A[k,k] <- 0.0 # rank-1 update A <- A + tcrossprod(A[,k], A[k,]) # row scaling A[k,-k] <- A[k,-k]/pivot; A[k,k] <- 1.0/pivot A } # fabin3 (2sls) for a single factor (that is what Mplus is doing) # S is a selection of ov.names corresponding with the indicators # of the (single) factor # 'Hagglund' version (slow) fabin3.uni2 <- function(S) { # Gosta Hagglund (Psychometrika, 1982, 47) nvar <- ncol(S) if(nvar < 3) { return( rep(1, nvar) ) } out <- numeric( nvar ); out[1L] <- 1.0 for(i in 2:nvar) { idx3 <- (1:nvar)[-c(i, 1L)] s23 <- S[i, idx3] S31 <- S[idx3, 1L]; S13 <- S[1L, idx3] S33 <- S[idx3,idx3] out[i] <- ( s23 %*% solve(S33) %*% S31 %*% solve(S13 %*% solve(S33) %*% S31) ) } out } # 'Jennrich' version (fast) fabin3.uni <- function(S) { nvar <- ncol(S) if(nvar < 3) { return( rep(1, nvar) ) } # zero out references S[1L, 1L] <- 0.0 S.tilde <- try(solve(S), silent = TRUE) if(inherits(S.tilde, "try-error")) { return( rep(1, nvar) ) } out <- numeric( nvar ); out[1L] <- 1.0 for(i in 2:nvar) { S.bar <- GaussJordanPivot(S.tilde[c(i,1L), c(i,1L)], k=1L) out[i] <- -S.bar[1,-1] - (S[i,1L] %*% S.bar[-1,-1]) } out } fabin2 <- function(S, ref.idx=NULL) { # Gosta Hagglund (Psychometrika, 1982, 47) nvar <- ncol(S); nfac <- length(ref.idx) stopifnot(nvar >= 3*nfac) out <- matrix(0, nvar, nfac) for(i in 1:nvar) { if(i %in% ref.idx) { out[i, ref.idx == i] <- 1.0 next } idx3 <- (1:nvar)[-c(i, ref.idx)] s23 <- S[i, idx3] S31 <- S[idx3, ref.idx]; S13 <- S[ref.idx, idx3] out[i,] <- s23 %*% S31 %*% solve(S13 %*% S31) } out } fabin3 <- function(S, ref.idx=NULL) { # Gosta Hagglund (Psychometrika, 1982, 47) nvar <- ncol(S); nfac <- length(ref.idx) stopifnot(nvar >= 3*nfac) out <- matrix(0, nvar, nfac) for(i in 1:nvar) { if(i %in% ref.idx) { out[i, ref.idx == i] <- 1.0 next } idx3 <- (1:nvar)[-c(i, ref.idx)] s23 <- S[i, idx3] S31 <- S[idx3, ref.idx]; S13 <- S[ref.idx, idx3] S33 <- S[idx3,idx3] out[i,] <- ( s23 %*% solve(S33) %*% S31 %*% solve(S13 %*% solve(S33) %*% S31) ) } out } # using the 'jennrich' computations (much faster!) fabin3.jennrich <- function(S, ref.idx=NULL) { # Robert I. Jennrich (Psychometrika, 1987, 52) nvar <- ncol(S); nfac <- length(ref.idx) stopifnot(nvar >= 3*nfac) # zero out references S[ref.idx, ref.idx] <- 0.0 S.tilde <- solve(S) out <- matrix(0, nvar, nfac) for(i in 1:nvar) { if(i %in% ref.idx) { out[i, ref.idx == i] <- 1.0 next } S.bar <- GaussJordanPivot(S.tilde[c(i,ref.idx), c(i,ref.idx)], k=1L) out[i,] <- -S.bar[1,-1] - (S[i,ref.idx] %*% S.bar[-1,-1]) } out } # Hagglund table 1 JoreskogLawley1968.COR <- matrix(c( 1.000, 0.411, 0.479, 0.401, 0.370, 0.393, 0.078, 0.389, 0.411, 0.411, 1.000, 0.463, 0.223, 0.198, 0.244, -0.042, 0.169, 0.324, 0.479, 0.463, 1.000, 0.231, 0.272, 0.357, -0.126, 0.153, 0.307, 0.401, 0.223, 0.231, 1.000, 0.659, 0.688, 0.215, 0.221, 0.256, 0.370, 0.198, 0.272, 0.659, 1.000, 0.649, 0.293, 0.279, 0.324, 0.393, 0.244, 0.357, 0.688, 0.649, 1.000, 0.226, 0.298, 0.294, 0.078, -0.042, -0.126, 0.215, 0.293, 0.226, 1.000, 0.602, 0.446, 0.389, 0.169, 0.153, 0.221, 0.279, 0.298, 0.602, 1.000, 0.630, 0.411, 0.324, 0.307, 0.256, 0.324, 0.294, 0.446, 0.630, 1.000), 9, 9, byrow=TRUE) # round(fabin2(JoreskogLawley1968.COR, ref.idx=c(1,4,7)), 3) # round(fabin3(JoreskogLawley1968.COR, ref.idx=c(1,4,7)), 3) # Jennrich Emmett.1949 <- matrix(c( 1.0, 0.523, 0.395, 0.471, 0.346, 0.426, 0.576, 0.434, 0.639, 0.523, 1.0, 0.479, 0.506, 0.418, 0.462, 0.547, 0.283, 0.645, 0.395, 0.479, 1.0, 0.355, 0.270, 0.254, 0.452, 0.219, 0.504, 0.471, 0.506, 0.355, 1.0, 0.691, 0.791, 0.443, 0.285, 0.505, 0.346, 0.418, 0.270, 0.691, 1.0, 0.679, 0.383, 0.149, 0.409, 0.426, 0.462, 0.254, 0.791, 0.679, 1.0, 0.372, 0.314, 0.472, 0.576, 0.547, 0.452, 0.443, 0.383, 0.372, 1.0, 0.385, 0.680, 0.434, 0.283, 0.219, 0.285, 0.149, 0.314, 0.385, 1.0, 0.470, 0.639, 0.645, 0.504, 0.505, 0.409, 0.472, 0.680, 0.470, 1.0), 9, 9, byrow=TRUE) colnames(Emmett.1949) <- rownames(Emmett.1949) <- paste("x",1:9,sep="") # round( fabin3(Emmett.1949, ref.idx=c(1,4,7)), 3) lavaan/R/lav_partable.R0000644000176200001440000004554313052630105014502 0ustar liggesusers# constructor for the ltavParTable model description # # initial version: YR 22/05/2009 # major revision: YR 02/11/2010: - FLATTEN the model syntax and turn it into a # data.frame, with a "modifiers" attribute # - add default elements here # - check for duplicate elements # - allow for every possible model... # - since 0.4-5 # - the end result is a full description of # a model (but no matrix representation) # - 14 Jan 2014: merge 02lavaanUser.R with lav_partable.R # move syntax-based code to lav_syntax.R # - 26 April 2016: handle multiple 'blocks' (levels, classes, groups, ...) lavaanify <- lavParTable <- function( model = NULL, meanstructure = FALSE, int.ov.free = FALSE, int.lv.free = FALSE, orthogonal = FALSE, std.lv = FALSE, conditional.x = FALSE, fixed.x = TRUE, parameterization = "delta", constraints = NULL, auto = FALSE, model.type = "sem", auto.fix.first = FALSE, auto.fix.single = FALSE, auto.var = FALSE, auto.cov.lv.x = FALSE, auto.cov.y = FALSE, auto.th = FALSE, auto.delta = FALSE, varTable = NULL, ngroups = 1L, group.equal = NULL, group.partial = NULL, group.w.free = FALSE, debug = FALSE, warn = TRUE, as.data.frame. = TRUE) { # check if model is already FLAT or a full parameter table if(is.list(model) && !is.null(model$lhs)) { if(is.null(model$mod.idx)) { warning("lavaan WARNING: input already looks like a parameter table; returning as is") return(model) } else { FLAT <- model } } else { # parse the model syntax and flatten the user-specified model # return a data.frame, where each line is a model element (rhs, op, lhs) FLAT <- lavParseModelString(model.syntax=model, warn=warn, debug=FALSE) } # user-specified *modifiers* are returned as an attribute MOD <- attr(FLAT, "modifiers"); attr(FLAT, "modifiers") <- NULL # user-specified *constraints* are returned as an attribute CON <- attr(FLAT, "constraints"); attr(FLAT, "constraints") <- NULL # extra constraints? if(!is.null(constraints) && nchar(constraints) > 0L) { FLAT2 <- lavParseModelString(model.syntax=constraints, warn=warn) CON2 <- attr(FLAT2, "constraints"); rm(FLAT2) CON <- c(CON, CON2) } if(debug) { cat("[lavaan DEBUG]: FLAT (flattened user model):\n") print(FLAT) cat("[lavaan DEBUG]: MOD (modifiers):\n") print( str(MOD) ) cat("[lavaan DEBUG]: CON (constraints):\n") print( str(CON) ) } # check for wrongly specified variances/covariances/intercepts # of exogenous variables in model syntax (if fixed.x=TRUE) if(fixed.x) { # we ignore the groups here! # we only call this function for the warning message tmp <- lav_partable_vnames(FLAT, "ov.x", warn = TRUE); rm(tmp) } # auto=TRUE? if(auto && model.type == "sem") { # mimic sem/cfa auto behavior if(model.type == "sem") { int.ov.free = TRUE int.lv.free = FALSE auto.fix.first = !std.lv auto.fix.single = TRUE auto.var = TRUE auto.cov.lv.x = TRUE auto.cov.y = TRUE auto.th = TRUE auto.delta = TRUE } else if(model.type == "growth") { model.type = "growth" int.ov.free = FALSE int.lv.free = TRUE auto.fix.first = !std.lv auto.fix.single = TRUE auto.var = TRUE auto.cov.lv.x = TRUE auto.cov.y = TRUE auto.th = TRUE auto.delta = TRUE } } # check for meanstructure if(any(FLAT$op == "~1")) meanstructure <- TRUE # check for block identifiers in the syntax (op = ":") n.block.flat <- length(which(FLAT$op == ":")) # for each non-empty `block' in n.block.flat, produce a USER if(n.block.flat > 0L) { # what are the block lhs labels? BLOCKS <- tolower(FLAT$lhs[FLAT$op == ":"]) BLOCK.lhs <- unique(BLOCKS) # block op == ":" indices BLOCK.op.idx <- which(FLAT$op == ":") # check for wrong spelled 'group' lhs if(length(grep("group", BLOCK.lhs)) > 1L) { warning("lavaan WARNING: ambiguous block identifiers for group:", "\n\t\t ", paste(BLOCK.lhs[grep("group", BLOCK.lhs)], collapse = ", ")) } # no empty :rhs fields allowed! if( any( nchar(FLAT$rhs[BLOCK.op.idx]) == 0L ) ) { empty.idx <- nchar(FLAT$rhs[BLOCK.op.idx]) == 0L txt <- paste(FLAT$lhs[BLOCK.op.idx][empty.idx], ":") stop("lavaan ERROR: syntax contains block identifiers with ", "missing numbers/labels:\n\t\t", txt) } # check for 'group' (needed?) if("group" %in% BLOCK.lhs) { # how many group blocks? group.block.idx <- FLAT$op == ":" & FLAT$lhs == "group" n.group.flat <- length( unique(FLAT$rhs[group.block.idx]) ) if(n.group.flat > 0L && n.group.flat != ngroups) { stop("lavaan ERROR: syntax defines ", n.group.flat, " groups; ", "data (or argument ngroups) suggests ", ngroups, " groups") } } # split the FLAT data.frame per `block', create LIST # for each `block', and rbind them together, adding block columns FLAT <- as.data.frame(FLAT, stringsAsFactors = FALSE) BLOCK.op.idx <- c(BLOCK.op.idx, nrow(FLAT) + 1L) BLOCK.rhs <- rep("0", length(BLOCK.lhs)) block.id <- 0L for(block in seq_len(n.block.flat)) { # fill BLOC.rhs value block.lhs <- FLAT$lhs[BLOCK.op.idx[block]] block.rhs <- FLAT$rhs[BLOCK.op.idx[block]] BLOCK.rhs[ which(block.lhs == BLOCK.lhs) ] <- block.rhs # another block identifier? if(BLOCK.op.idx[block+1] - BLOCK.op.idx[block] == 1L) { next } block.id <- block.id + 1L FLAT.block <- FLAT[(BLOCK.op.idx[block]+1L):(BLOCK.op.idx[block+1]-1L),] # rm 'block' column (if any) in FLAT.block FLAT.block$block <- NULL LIST.block <- lav_partable_flat(FLAT.block, blocks = BLOCK.lhs, block.id = block.id, meanstructure = meanstructure, int.ov.free = int.ov.free, int.lv.free = int.lv.free, orthogonal = orthogonal, std.lv = std.lv, conditional.x = conditional.x, fixed.x = fixed.x, parameterization = parameterization, auto.fix.first = auto.fix.first, auto.fix.single = auto.fix.single, auto.var = auto.var, auto.cov.lv.x = auto.cov.lv.x, auto.cov.y = auto.cov.y, auto.th = auto.th, auto.delta = auto.delta, varTable = varTable, group.equal = NULL, group.w.free = group.w.free, ngroups = 1L) LIST.block <- as.data.frame(LIST.block, stringsAsFactors = FALSE) # add block columns with current values in BLOCK.rhs for(b in seq_len(length(BLOCK.lhs))) { block.lhs <- BLOCK.lhs[b] block.rhs <- BLOCK.rhs[b] LIST.block[block.lhs] <- rep(block.rhs, length(LIST.block$lhs)) } if(!exists("LIST")) { LIST <- LIST.block } else { LIST.block$id <- LIST.block$id + max(LIST$id) LIST <- rbind(LIST, LIST.block) } } LIST <- as.list(LIST) # convert block columns to integers if possible for(b in seq_len(length(BLOCK.lhs))) { block.lhs <- BLOCK.lhs[b] block.rhs <- BLOCK.rhs[b] tmp <- try(scan(text = LIST[[block.lhs]], what = integer(), quiet = TRUE), silent = TRUE) if(class(tmp) == "integer") { LIST[[block.lhs]] <- tmp } } } else { LIST <- lav_partable_flat(FLAT, blocks = "group", meanstructure = meanstructure, int.ov.free = int.ov.free, int.lv.free = int.lv.free, orthogonal = orthogonal, std.lv = std.lv, conditional.x = conditional.x, fixed.x = fixed.x, parameterization = parameterization, auto.fix.first = auto.fix.first, auto.fix.single = auto.fix.single, auto.var = auto.var, auto.cov.lv.x = auto.cov.lv.x, auto.cov.y = auto.cov.y, auto.th = auto.th, auto.delta = auto.delta, varTable = varTable, group.equal = group.equal, group.w.free = group.w.free, ngroups = ngroups) } if(debug) { cat("[lavaan DEBUG]: parameter LIST without MODIFIERS:\n") print( as.data.frame(LIST, stringsAsFactors=FALSE) ) } # apply user-specified modifiers if(length(MOD)) { for(el in 1:length(MOD)) { idx <- which(LIST$mod.idx == el) # for each group # 0.5-21: check is idx exists # perhaps the corresponding element was duplicated, and removed if(length(idx) == 0L) { next } MOD.fixed <- MOD[[el]]$fixed MOD.start <- MOD[[el]]$start MOD.label <- MOD[[el]]$label MOD.prior <- MOD[[el]]$prior # check for single argument if multiple groups if(ngroups > 1L && length(idx) > 1L) { # Ok, this is not very consistent: # A) here we force same behavior across groups if(length(MOD.fixed) == 1L) MOD.fixed <- rep(MOD.fixed, ngroups) if(length(MOD.start) == 1L) MOD.start <- rep(MOD.start, ngroups) if(length(MOD.prior) == 1L) MOD.prior <- rep(MOD.prior, ngroups) # B) here we do NOT! otherwise, it would imply an equality # constraint... # except if group.equal="loadings"! if(length(MOD.label) == 1L) { if("loadings" %in% group.equal) { MOD.label <- rep(MOD.label, ngroups) } else { MOD.label <- c(MOD.label, rep("", (ngroups-1L)) ) } } } # check for wrong number of arguments if multiple groups nidx <- length(idx) if( (!is.null(MOD.fixed) && nidx != length(MOD.fixed)) || (!is.null(MOD.start) && nidx != length(MOD.start)) || (!is.null(MOD.prior) && nidx != length(MOD.prior)) || (!is.null(MOD.label) && nidx != length(MOD.label)) ) { el.idx <- which(LIST$mod.idx == el)[1L] stop("lavaan ERROR: wrong number of arguments in modifier (", paste(MOD.label, collapse=","), ") of element ", LIST$lhs[el.idx], LIST$op[el.idx], LIST$rhs[el.idx]) } # apply modifiers if(!is.null(MOD.fixed)) { # two options: constant or NA na.idx <- which(is.na(MOD.fixed)) not.na.idx <- which(!is.na(MOD.fixed)) # constant LIST$ustart[idx][not.na.idx] <- MOD.fixed[not.na.idx] LIST$free[ idx][not.na.idx] <- 0L # NA* modifier LIST$free[ idx][na.idx] <- 1L # eg factor loading LIST$ustart[idx][na.idx] <- as.numeric(NA) } if(!is.null(MOD.start)) { LIST$ustart[idx] <- MOD.start } if(!is.null(MOD.prior)) { # do we already have a `prior' column? if not, create one if(is.null(LIST$prior)) { LIST$prior <- character( length(LIST$lhs) ) } LIST$prior[idx] <- MOD.prior } if(!is.null(MOD.label)) { LIST$label[idx] <- MOD.label } } } # remove mod.idx column LIST$mod.idx <- NULL if(debug) { cat("[lavaan DEBUG]: parameter LIST with MODIFIERS:\n") print( as.data.frame(LIST, stringsAsFactors=FALSE) ) } # get 'virtual' parameter labels if(n.block.flat > 1L) { blocks <- BLOCK.lhs } else { blocks <- "group" } LABEL <- lav_partable_labels(partable = LIST, blocks = blocks, group.equal = group.equal, group.partial = group.partial) if(debug) { cat("[lavaan DEBUG]: parameter LIST with LABELS:\n") tmp <- LIST; tmp$LABEL <- LABEL print( as.data.frame(tmp, stringsAsFactors=FALSE) ) } # handle user-specified equality constraints # lavaan 0.5-18 # - rewrite 'LABEL-based' equality constraints as == constraints # - create plabel: internal labels, based on id # - create CON entries, using these internal labels LIST$plabel <- paste(".p", LIST$id, ".", sep="") idx.eq.label <- which(duplicated(LABEL)) if(length(idx.eq.label) > 0L) { CON.idx <- length(CON) # add 'user' column CON <- lapply(CON, function(x) {x$user <- 1L; x} ) for(idx in idx.eq.label) { eq.label <- LABEL[idx] all.idx <- which(LABEL == eq.label) # all same-label parameters ref.idx <- all.idx[1L] # the first one only # two possibilities: # 1. all.idx contains a fixed parameter: in this case, # we fix them all (hopefully to the same value) # 2. all.idx contains only free parameters # 1. fixed? if(any(LIST$free[all.idx] == 0L)) { # which one is fixed? (only pick the first!) fixed.all <- all.idx[ LIST$free[all.idx] == 0L ] fixed.idx <- fixed.all[1] # only pick the first! # sanity check: are all ustart values equal? ustart1 <- LIST$ustart[ fixed.idx ] if(! all(ustart1 == LIST$ustart[fixed.all]) ) { warning("lavaan WARNING: equality constraints involve fixed parameters with different values; only the first one will be used") } fixed.idx <- fixed.all[1] # only pick the first! # fix current 'idx' LIST$ustart[idx] <- LIST$ustart[fixed.idx] LIST$free[idx] <- 0L # not free anymore, since it must # be equal to the 'fixed' parameter # (Note: Mplus ignores this) # just in case: if ref.idx is not equal to fixed.idx, # fix this one too LIST$ustart[ref.idx] <- LIST$ustart[fixed.idx] LIST$free[ref.idx] <- 0L } else { # 2. ref.idx is a free parameter # user-label? #if(nchar(LIST$label[ref.idx]) > 0) { # lhs.lab <- LIST$label[ref.idx] #} else { # lhs.lab <- PLABEL[ref.idx] #} CON.idx <- CON.idx + 1L CON[[CON.idx]] <- list(op = "==", lhs = LIST$plabel[ref.idx], rhs = LIST$plabel[idx], user = 2L) # just to trick semTools, also add something in the label # colum, *if* it is empty for(i in all.idx) { if(nchar(LIST$label[i]) == 0L) { LIST$label[i] <- LIST$plabel[ ref.idx ] } } } } } if(debug) { print(CON) } # count free parameters idx.free <- which(LIST$free > 0) LIST$free[idx.free] <- seq_along(idx.free) # backwards compatibility... if(!is.null(LIST$unco)) { LIST$unco[idx.free] <- seq_along(idx.free) } # handle constraints (if any) (NOT per group, but overall - 0.4-11) if(length(CON) > 0L) { nCon <- length(CON) IDX <- length(LIST$id) + seq_len(nCon) # grow LIST with length(CON) extra rows LIST <- lapply(LIST, function(x) { if(is.character(x)) { c(x, rep("", nCon)) } else { c(x, rep(NA, nCon)) } }) # fill in some columns LIST$id[IDX] <- IDX LIST$lhs[IDX] <- unlist(lapply(CON, "[[", "lhs")) LIST$op[IDX] <- unlist(lapply(CON, "[[", "op")) LIST$rhs[IDX] <- unlist(lapply(CON, "[[", "rhs")) LIST$user[IDX] <- unlist(lapply(CON, "[[", "user")) # zero is nicer? LIST$free[IDX] <- rep(0L, nCon) LIST$exo[IDX] <- rep(0L, nCon) LIST$block[IDX] <- rep(0L, nCon) if(!is.null(LIST$group)) { if(is.character(LIST$group)) { LIST$group[IDX] <- rep("", nCon) } else { LIST$group[IDX] <- rep(0L, nCon) } } if(!is.null(LIST$level)) { if(is.character(LIST$level)) { LIST$level[IDX] <- rep("", nCon) } else { LIST$level[IDX] <- rep(0L, nCon) } } if(!is.null(LIST$class)) { if(is.character(LIST$class)) { LIST$class[IDX] <- rep("", nCon) } else { LIST$class[IDX] <- rep(0L, nCon) } } } # put lhs of := elements in label column def.idx <- which(LIST$op == ":=") LIST$label[def.idx] <- LIST$lhs[def.idx] if(debug) { cat("[lavaan DEBUG] lavParTable\n") print( as.data.frame(LIST) ) } # data.frame? if(as.data.frame.) { LIST <- as.data.frame(LIST, stringsAsFactors = FALSE) } LIST } lavaan/R/00generic.R0000644000176200001440000000205213053065676013626 0ustar liggesusers# for blavaan setGeneric("fitMeasures", function(object, fit.measures = "all", baseline.model = NULL) standardGeneric("fitMeasures")) setGeneric("fitmeasures", function(object, fit.measures = "all", baseline.model = NULL) standardGeneric("fitmeasures")) # S3 generics inspect <- function(object, what = "free", ...) { UseMethod("inspect", object) } lavInspect <- function(object, what = "free", add.labels = TRUE, add.class = TRUE, list.by.group = TRUE, drop.list.single.group = TRUE) { UseMethod("lavInspect", object) } lavTech <- function(object, what = "free", add.labels = FALSE, add.class = FALSE, list.by.group = FALSE, drop.list.single.group = FALSE) { UseMethod("lavTech", object) } lavaan/R/lav_test.R0000644000176200001440000007161113045674063013677 0ustar liggesuserstestStatisticSatorraBentler <- function(lavsamplestats=lavsamplestats, E.inv, Delta, WLS.V, Gamma, x.idx=list(integer(0)), test.UGamma.eigvals = FALSE) { # UG = Gamma %*% [V - V %*% Delta %*% E.inv %*% tDelta %*% V] # = Gamma %*% V - Gamma %*% V %*% Delta %*% E.inv %*% tDelta %*% V # = Gamma %*% A1 - Gamma %*% A1 %*% Delta %*% E.inv %*% tDelta %*% A1 # (B1 = A1 %*% Gamma %*% A1) # = B1 %*% A1.inv - B1 %*% A1.inv %*% Delta %*% E.inv %*% tDelta %*% A1 # # if only the trace is needed, we can use reduce the rhs (after the minus) # to B1 %*% Delta %*% E.inv %*% tDelta (eliminating A1 and A1.inv) # we write it like this to allow for fixed.x covariates which affect A1 # and B1 if(is.null(Gamma)) { Gamma <- lavsamplestats@NACOV } nss <- ncol(Gamma[[1]]) ngroups <- lavsamplestats@ngroups UG.group <- vector("list", length=ngroups) trace.UGamma2 <- numeric(ngroups) for(g in 1:ngroups) { fg <- lavsamplestats@nobs[[g]] /lavsamplestats@ntotal fg1 <- (lavsamplestats@nobs[[g]]-1)/lavsamplestats@ntotal Gamma.g <- Gamma[[g]] / fg ## ?? check this Delta.g <- Delta[[g]] # just for testing: regular UG: # WLS.Vg <- WLS.V[[g]] * fg # UG1 <- Gamma.g %*% (WLS.Vg - WLS.Vg %*% Delta[[g]] %*% E.inv %*% t(Delta[[g]]) %*% WLS.Vg) # diagonal WLS.V? we check for this since 0.5-17 diagonal <- FALSE if(is.matrix(WLS.V[[g]])) { A1 <- WLS.V[[g]] * fg B1 <- A1 %*% Gamma.g %*% A1 } else { diagonal <- TRUE a1 <- WLS.V[[g]] * fg # numeric vector! B1 <- Gamma.g * tcrossprod(a1) } # mask independent 'fixed-x' variables # note: this only affects the saturated H1 model if(length(x.idx[[g]]) > 0L) { # we should not be here if we have conditional.x = TRUE nvar <- ncol(lavsamplestats@cov[[g]]) idx <- eliminate.pstar.idx(nvar=nvar, el.idx=x.idx[[g]], meanstructure=TRUE, type="all") if(diagonal) { a1 <- a1[idx] } else { A1 <- A1[idx,idx] } B1 <- B1[idx,idx] Delta.g <- Delta.g[idx,] } if(diagonal) { a1.inv <- 1/a1 # if fixed.x = TRUE zero.idx <- which(a1 == 0.0) a1.inv[zero.idx] <- 0.0 tmp <- t(a1.inv * B1) - (B1 %*% Delta.g %*% tcrossprod(E.inv, Delta.g)) } else { A1.inv <- solve(A1) tmp <- (B1 %*% A1.inv) - (B1 %*% Delta.g %*% tcrossprod(E.inv, Delta.g)) } # sanity check 1: sum(diag(UG1)) - sum(diag(tmp)) # sanity check 2: sum(diag(UG1 %*% UG1)) - sum(diag(tmp %*% tmp)) trace.UGamma2[g] <- sum(tmp * t(tmp)) UG.group[[g]] <- tmp } # NOTE: if A, B, C are matrices # tr(A+B+C) = tr(A) + tr(B) + tr(C) # # BUT: # tr( (A+B+C)^2 ) != tr(A^2) + tr(B^2) + tr(C^2) # it would seem that we need the latter... (trace.UGamma3) for MLMV and # friends UG <- UG.group[[1]] if(ngroups > 1L) { for(g in 2:ngroups) { UG <- UG + UG.group[[g]] } } # trace trace.UGamma <- sum(diag(UG)) # for mean and variance adjusted tr UG^2 (per group) trace.UGamma4 <- trace.UGamma2 trace.UGamma2 <- sum(trace.UGamma2) # at least for MLMV in Mplus # this is what is needed when multiple # groups are used?? attr(trace.UGamma, "trace.UGamma2") <- trace.UGamma2 attr(trace.UGamma, "trace.UGamma4") <- trace.UGamma4 # testing only -- alternative interpretation of tr UG^2 # tUG <- t(UG); trace.UGamma3 <- sum(UG * tUG) # seems wrong? # attr(trace.UGamma, "trace.UGamma3") <- trace.UGamma3 # eigen values # this was for the lavaan.survey pval.pFsum() function # but for large problems, this can take a loooong time; not needed anymore if(test.UGamma.eigvals) { attr(trace.UGamma, "eigenvalues") <- Re(eigen(UG, only.values=TRUE)$values) } trace.UGamma } testStatisticYuanBentler <- function(lavsamplestats=lavsamplestats, A1.group=NULL, B1.group=NULL, Delta=NULL, E.inv=NULL, x.idx=list(integer(0))) { # we always assume a meanstructure meanstructure <- TRUE trace.UGamma <- numeric( lavsamplestats@ngroups ) trace.h1 <- numeric( lavsamplestats@ngroups ) trace.h0 <- numeric( lavsamplestats@ngroups ) for(g in 1:lavsamplestats@ngroups) { A1 <- A1.group[[g]] B1 <- B1.group[[g]] # mask independent 'fixed-x' variables # note: this only affects the saturated H1 model if(length(x.idx[[g]]) > 0L) { nvar <- ncol(lavsamplestats@cov[[g]]) idx <- eliminate.pstar.idx(nvar=nvar, el.idx=x.idx[[g]], meanstructure=meanstructure, type="all") A1 <- A1[idx,idx] B1 <- B1[idx,idx] } A1.inv <- solve(A1) trace.h1[g] <- sum( B1 * t( A1.inv ) ) trace.h0[g] <- sum( (B1 %*% Delta[[g]] %*% E.inv %*% t(Delta[[g]])) ) trace.UGamma[g] <- abs(trace.h1[g] - trace.h0[g]) } # traces trace.UGamma <- sum(trace.UGamma) #tUG <- t(UG); trace.UGamma2 <- sum(UG * tUG) #attr(trace.UGamma, "trace.UGamma2") <- trace.UGamma2 attr(trace.UGamma, "h1") <- trace.h1 attr(trace.UGamma, "h0") <- trace.h0 trace.UGamma } testStatisticYuanBentler.Mplus <- function(lavsamplestats=lavsamplestats, lavdata=lavdata, information="observed", B0.group=NULL, E.inv=NULL, x.idx=list(integer(0))) { # typical for Mplus: # - do NOT use the YB formula, but use an approximation # relying on A0 ~= Delta'*A1*Delta and the same for B0 # we always assume a meanstructure meanstructure <- TRUE ngroups <- lavsamplestats@ngroups trace.UGamma <- numeric( lavsamplestats@ngroups ) trace.h1 <- numeric( lavsamplestats@ngroups ) trace.h0 <- numeric( lavsamplestats@ngroups ) for(g in 1:lavsamplestats@ngroups) { # if lavdata is complete, A1.22 is simply 0.5*D'(S.inv x S.inv)D if(lavsamplestats@missing.flag) { if(information == "expected") { A1 <- lav_mvnorm_missing_information_expected( Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = lavsamplestats@missing.h1[[g]]$mu, Sigma = lavsamplestats@missing.h1[[g]]$sigma) } else { A1 <- lav_mvnorm_missing_information_observed_samplestats( Yp = lavsamplestats@missing[[g]], Mu = lavsamplestats@missing.h1[[g]]$mu, Sigma = lavsamplestats@missing.h1[[g]]$sigma) } } else { # data complete, under h1, expected == observed A1 <- lav_mvnorm_h1_information_observed_samplestats( sample.cov = lavsamplestats@cov[[g]], sample.cov.inv = lavsamplestats@icov[[g]]) } if(lavsamplestats@missing.flag) { B1 <- lav_mvnorm_missing_information_firstorder( Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = lavsamplestats@missing.h1[[g]]$mu, Sigma = lavsamplestats@missing.h1[[g]]$sigma) } else { B1 <- lav_mvnorm_h1_information_firstorder( Y = lavdata@X[[g]], Gamma = lavsamplestats@NACOV[[g]], sample.cov = lavsamplestats@cov[[g]], sample.cov.inv = lavsamplestats@icov[[g]]) } # mask independent 'fixed-x' variables # note: this only affects the saturated H1 model if(length(x.idx[[g]]) > 0L) { nvar <- ncol(lavsamplestats@cov[[g]]) idx <- eliminate.pstar.idx(nvar=nvar, el.idx=x.idx[[g]], meanstructure=meanstructure, type="all") A1 <- A1[idx,idx] B1 <- B1[idx,idx] } A1.inv <- solve(A1) trace.h1[g] <- sum( B1 * t( A1.inv ) ) trace.h0[g] <- ( lavsamplestats@nobs[[g]]/lavsamplestats@ntotal * sum( B0.group[[g]] * t(E.inv) ) ) trace.UGamma[g] <- (trace.h1[g] - trace.h0[g]) } # we take the sum here trace.UGamma <- sum(trace.UGamma) attr(trace.UGamma, "h1") <- trace.h1 attr(trace.UGamma, "h0") <- trace.h0 trace.UGamma } lav_model_test <- function(lavmodel = NULL, lavpartable = NULL, lavsamplestats = NULL, lavoptions = NULL, x = NULL, VCOV = NULL, lavcache = NULL, lavdata = NULL, test.UGamma.eigvals = FALSE) { mimic <- lavoptions$mimic test <- lavoptions$test information <- lavoptions$information estimator <- lavoptions$estimator TEST <- list() # degrees of freedom df <- lav_partable_df(lavpartable) # handle equality constraints (note: we ignore inequality constraints, # active or not!) # we use the rank of con.jac (even if the constraints are nonlinear) if(nrow(lavmodel@con.jac) > 0L) { ceq.idx <- attr(lavmodel@con.jac, "ceq.idx") if(length(ceq.idx) > 0L) { neq <- qr(lavmodel@con.jac[ceq.idx,,drop=FALSE])$rank df <- df + neq } } if(test == "none" || df < 0L || estimator == "MML") { TEST[[1]] <- list(test=test, stat=as.numeric(NA), stat.group=as.numeric(NA), df=df, refdistr="unknown", pvalue=as.numeric(NA)) # just in case TEST[[2]] <- list(test=test, stat=as.numeric(NA), stat.group=as.numeric(NA), df=df, refdistr="unknown", pvalue=as.numeric(NA)) return(TEST) } if(lavoptions$estimator == "PML" && test != "none") { PML <- ctr_pml_plrt(lavobject = NULL, lavmodel = lavmodel, lavdata = lavdata, lavoptions = lavoptions, x = x, VCOV = VCOV, lavcache = lavcache, lavsamplestats = lavsamplestats, lavpartable = lavpartable) # get chi.group from PML, since we compare to `unrestricted' model, # NOT observed data chisq.group <- PML$PLRTH0Sat.group } else { # get fx.group fx <- attr(x, "fx") fx.group <- attr(fx, "fx.group") # always compute `standard' test statistic ## FIXME: the NFAC is now implicit in the computation of fx... NFAC <- 2 * unlist(lavsamplestats@nobs) if(lavoptions$estimator == "ML" && lavoptions$likelihood == "wishart") { # first divide by two NFAC <- NFAC / 2 NFAC <- NFAC - 1 NFAC <- NFAC * 2 } chisq.group <- fx.group * NFAC } # check for negative values chisq.group[which(chisq.group < 0)] <- 0.0 # global test statistic chisq <- sum(chisq.group) # reference distribution: always chi-square, except for the # non-robust version of ULS if(estimator == "ULS" || estimator == "PML") { refdistr <- "unknown" pvalue <- as.numeric(NA) } else { refdistr <- "chisq" # pvalue ### FIXME: what if df=0? NA? or 1? or 0? # this is not trivial, since # 1 - pchisq(0, df=0) = 1 # but # 1 - pchisq(0.00000000001, df=0) = 0 # and # 1 - pchisq(0, df=0, ncp=0) = 0 # # This is due to different definitions of limits (from the left, # or from the right) # # From 0.5-17 onwards, we will use NA if df=0, to be consistent if(df == 0) { pvalue <- as.numeric(NA) } else { pvalue <- 1 - pchisq(chisq, df) } } TEST[[1]] <- list(test="standard", stat=chisq, stat.group=chisq.group, df=df, refdistr=refdistr, pvalue=pvalue) if(df == 0 && test %in% c("satorra.bentler", "yuan.bentler", "mean.var.adjusted", "scaled.shifted")) { TEST[[2]] <- list(test=test, stat=chisq, stat.group=chisq.group, df=df, refdistr=refdistr, pvalue=pvalue, scaling.factor=as.numeric(NA)) return(TEST) } # do we already know E.inv is singular? E.inv <- attr(VCOV, "E.inv") if(!is.null(E.inv) && inherits(E.inv, "try-error")) { TEST[[2]] <- list(test=test, stat=chisq, stat.group=chisq.group, df=df, refdistr=refdistr, pvalue=pvalue, scaling.factor=as.numeric(NA)) return(TEST) } # some require meanstructure (for now) if(test %in% c("satorra.bentler", "yuan.bentler") && !lavoptions$meanstructure) { stop("test (", test, ") requires meanstructure (for now)") } # fixed.x idx if(lavmodel@fixed.x && estimator == "ML" && !lavmodel@conditional.x) { x.idx <- lavsamplestats@x.idx } else { x.idx <- vector("list", length=lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { x.idx[[g]] <- integer(0L) } } if(lavoptions$estimator == "PML") { if(test == "standard") { # nothing to do } else if(test == "mean.var.adjusted") { TEST[[2]] <- list(test = test, stat = PML$stat, stat.group = TEST[[1]]$stat.group*PML$scaling.factor, df = PML$df, pvalue = PML$p.value, scaling.factor = 1/PML$scaling.factor, shift.parameter = as.numeric(NA), trace.UGamma = as.numeric(NA), trace.UGamma4 = as.numeric(NA), trace.UGamma2 = as.numeric(NA), UGamma.eigenvalues = as.numeric(NA)) } else { warning("test option ", test, " not available for estimator PML") } } else if(test %in% c("satorra.bentler", "mean.var.adjusted", "scaled.shifted") && df > 0 && lavoptions$estimator != "PML") { # try to extract attr from VCOV (if present) E.inv <- attr(VCOV, "E.inv") Delta <- attr(VCOV, "Delta") WLS.V <- attr(VCOV, "WLS.V") Gamma <- attr(VCOV, "Gamma") # if not present (perhaps se.type="standard" or se.type="none") # we need to compute these again if(is.null(E.inv) || is.null(Delta) || is.null(WLS.V)) { # this happens, for example, when we compute the independence # model if(mimic == "Mplus" && estimator == "ML") { # special treatment for Mplus E <- lav_model_information_expected_MLM(lavmodel = lavmodel, augmented = FALSE, inverted = FALSE, lavsamplestats=lavsamplestats, extra = TRUE) } else { E <- lav_model_information_expected(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, extra = TRUE) } E.inv <- try(lav_model_information_augment_invert(lavmodel, information = E, inverted = TRUE), silent=TRUE) if(inherits(E.inv, "try-error")) { TEST[[2]] <- list(test=test, stat=as.numeric(NA), stat.group=rep(as.numeric(NA), lavsamplestats@ngroups), df=df, refdistr=refdistr, pvalue=as.numeric(NA), scaling.factor=as.numeric(NA)) warning("lavaan WARNING: could not compute scaled test statistic\n") return(TEST) } Delta <- attr(E, "Delta") WLS.V <- attr(E, "WLS.V") } # if(mimic == "Mplus" && estimator == "ML") { ### TESTING ONLY FOR FIXED.X !!! ### # if(length(x.idx) > 0L) { # cat("\n\nDEBUG FIXED.X\n\n\n") # augUser <- user # idx <- which(augUser$exo > 0L) # augUser$exo[ idx ] <- 0L # augUser$free[ idx ] <- max(augUser$free) + 1:length(idx) # augUser$unco[idx ] <- max(augUser$unco) + 1:length(idx) # augModel <- lav_model(lavpartable = augUser, # representation = lavmodel@representation, # conditional.x = lavoptions$conditional.x, # link = lavmodel@link, # debug = FALSE) # # Delta <- computeDelta(lavmodel = augModel) # E <- lav_model_information_expected_MLM(object, lavsamplestats=lavsamplestats, # Delta=Delta) # fixed.x.idx <- max(lavpartable$free) + 1:length(idx) # free.idx <- 1:max(lavpartable$free) # E[free.idx, fixed.x.idx] <- 0.0 # E[fixed.x.idx, free.idx] <- 0.0 # E.inv <- solve(E) # x.idx <- integer(0) # } # } trace.UGamma <- testStatisticSatorraBentler(lavsamplestats = lavsamplestats, E.inv = E.inv, Delta = Delta, WLS.V = WLS.V, Gamma = Gamma, x.idx = x.idx) trace.UGamma2 <- attr(trace.UGamma, "trace.UGamma2") # trace.UGamma3 <- attr(trace.UGamma, "trace.UGamma3") trace.UGamma4 <- attr(trace.UGamma, "trace.UGamma4") if(test.UGamma.eigvals) { UGamma.eigenvalues <- attr(trace.UGamma, "eigenvalues") } else { UGamma.eigenvalues <- numeric(0L) } attributes(trace.UGamma) <- NULL # adjust df? if(test == "mean.var.adjusted") { if(mimic == "Mplus") { df <- floor(trace.UGamma^2/trace.UGamma2 + 0.5) } else { # more precise, fractional df df <- trace.UGamma^2 / trace.UGamma2 } } else if(test == "satorra.bentler") { trace.UGamma2 <- as.numeric(NA) } if(test == "scaled.shifted") { # this is the T3 statistic as used by Mplus 6 and higher # see 'Simple Second Order Chi-Square Correction' 2010 # www.statmodel.com # however, for multiple groups, Mplus reports something else # YR. 30 Aug 2012 -- after much trial and error, it turns out # that the shift-parameter (b) is weighted (while a is not)?? # however, the chisq.square per group are different; only # the sum seems ok?? fg <- unlist(lavsamplestats@nobs)/lavsamplestats@ntotal a <- sqrt(df/trace.UGamma2) #dprime <- trace.UGamma^2 / trace.UGamma2 #shift.parameter <- fg * (df - sqrt(df * dprime)) shift.parameter <- fg * (df - a*trace.UGamma) scaling.factor <- 1/a stat.group <- (chisq.group * a + shift.parameter) chisq.scaled <- sum(stat.group) pvalue.scaled <- 1 - pchisq(chisq.scaled, df) } else { scaling.factor <- trace.UGamma/df if(scaling.factor < 0) scaling.factor <- as.numeric(NA) stat.group <- chisq.group / scaling.factor chisq.scaled <- sum(stat.group) pvalue.scaled <- 1 - pchisq(chisq.scaled, df) shift.parameter <- as.numeric(NA) } TEST[[2]] <- list(test = test, stat = chisq.scaled, stat.group = stat.group, df = df, pvalue = pvalue.scaled, scaling.factor = scaling.factor, shift.parameter = shift.parameter, trace.UGamma = trace.UGamma, trace.UGamma4 = trace.UGamma4, trace.UGamma2 = trace.UGamma2, UGamma.eigenvalues = UGamma.eigenvalues) } else if(test == "yuan.bentler" && df > 0) { # try to extract attr from VCOV (if present) E.inv <- attr(VCOV, "E.inv") B0.group <- attr(VCOV, "B0.group") if(is.null(E.inv)) { # if se="standard", information is probably expected # change it to observed if(lavoptions$se != "robust.mlr") information <- "observed" E.inv <- lav_model_information(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, information = information, extra = FALSE, augmented = TRUE, inverted = TRUE) } if(inherits(E.inv, "try-error")) { TEST[[2]] <- list(test=test, stat=as.numeric(NA), stat.group=rep(as.numeric(NA), lavsamplestats@ngroups), df=df, refdistr=refdistr, pvalue=as.numeric(NA), scaling.factor=as.numeric(NA)) warning("lavaan WARNING: could not compute scaled test statistic\n") return(TEST) } if(mimic == "Mplus" || mimic == "lavaan") { if(is.null(B0.group)) { B0 <- lav_model_information_firstorder(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, extra = TRUE, check.pd = FALSE, augmented = FALSE, inverted = FALSE) B0.group <- attr(B0, "B0.group") } trace.UGamma <- testStatisticYuanBentler.Mplus(lavsamplestats = lavsamplestats, lavdata = lavdata, information = information, B0.group = B0.group, E.inv = E.inv, x.idx = x.idx) } else { Delta <- computeDelta(lavmodel = lavmodel) Sigma.hat <- computeSigmaHat(lavmodel = lavmodel) if(lavmodel@meanstructure) { Mu.hat <- computeMuHat(lavmodel = lavmodel) } A1.group <- vector("list", length=lavsamplestats@ngroups) B1.group <- vector("list", length=lavsamplestats@ngroups) for(g in 1:lavsamplestats@ngroups) { if(lavsamplestats@missing.flag) { out <- lav_mvnorm_missing_information_both( Y = lavdata@X[[g]], Mp = lavdata@Mp[[g]], Mu = Mu.hat[[g]], Sigma = Sigma.hat[[g]], information = information) A1.group[[g]] <- out$Abeta B1.group[[g]] <- out$Bbeta } else { if(information == "expected") { A1.group[[g]] <- lav_mvnorm_information_expected( Y = lavdata@X[[g]], Mu = Mu.hat[[g]], Sigma = Sigma.hat[[g]]) } else { A1.group[[g]] <- lav_mvnorm_information_observed_samplestats( sample.mean = lavsamplestats@mean[[g]], sample.cov = lavsamplestats@cov[[g]], Mu = Mu.hat[[g]], Sigma = Sigma.hat[[g]]) } B1.group[[g]] <- lav_mvnorm_information_firstorder( Y = lavdata@X[[g]], Mu = Mu.hat[[g]], Sigma = Sigma.hat[[g]]) } } trace.UGamma <- testStatisticYuanBentler(lavsamplestats=lavsamplestats, A1.group=A1.group, B1.group=B1.group, Delta=Delta, E.inv=E.inv, x.idx=x.idx) } scaling.factor <- sum(trace.UGamma) / df if(scaling.factor < 0) scaling.factor <- as.numeric(NA) chisq.scaled <- sum(chisq.group / scaling.factor) pvalue.scaled <- 1 - pchisq(chisq.scaled, df) ndat <- lav_partable_ndat(lavpartable) npar <- lav_partable_npar(lavpartable) scaling.factor.h1 <- sum( attr(trace.UGamma, "h1") ) / ndat scaling.factor.h0 <- sum( attr(trace.UGamma, "h0") ) / npar TEST[[2]] <- list(test=test, stat=chisq.scaled, stat.group=(chisq.group / scaling.factor), df=df, pvalue=pvalue.scaled, scaling.factor=scaling.factor, scaling.factor.h1=scaling.factor.h1, scaling.factor.h0=scaling.factor.h0, trace.UGamma=trace.UGamma) } else if(test == "bootstrap" || test == "bollen.stine") { # check if we have bootstrap lavdata BOOT.TEST <- attr(VCOV, "BOOT.TEST") if(is.null(BOOT.TEST)) { if(!is.null(lavoptions$bootstrap)) { R <- lavoptions$bootstrap } else { R <- 1000L } boot.type <- "bollen.stine" BOOT.TEST <- bootstrap.internal(object = NULL, lavmodel. = lavmodel, lavsamplestats. = lavsamplestats, lavpartable. = lavpartable, lavoptions. = lavoptions, lavdata. = lavdata, R = R, verbose = lavoptions$verbose, type = boot.type, FUN = "test", warn = -1L) BOOT.TEST <- drop(BOOT.TEST) } # bootstrap p-value boot.larger <- sum(BOOT.TEST > chisq) boot.length <- length(BOOT.TEST) pvalue.boot <- boot.larger/boot.length TEST[[2]] <- list(test="bootstrap", stat=chisq, stat.group=chisq.group, df=df, pvalue=pvalue.boot, boot.T=BOOT.TEST, boot.larger=boot.larger, boot.length=boot.length) } TEST } lavaan/R/xxx_lavaan.R0000644000176200001440000012143413054011247014213 0ustar liggesusers# main user-visible cfa/sem/growth functions # # initial version: YR 25/03/2009 # added lavoptions YR 02/08/2010 # major revision: YR 9/12/2010: - new workflow (since 0.4-5) # - merge cfa/sem/growth functions # YR 25/02/2012: changed data slot (from list() to S4); data@X contains data # YR 26 Jan 2017: use '...' to capture the never-ending list of options lavaan <- function(# user-specified model: can be syntax, parameter Table, ... model = NULL, # data (second argument, most used) data = NULL, # variable information ordered = NULL, # summary data sample.cov = NULL, sample.mean = NULL, sample.nobs = NULL, # multiple groups? group = NULL, # multiple levels? cluster = NULL, # constraints constraints = '', # user-specified variance matrices WLS.V = NULL, NACOV = NULL, # full slots from previous fits slotOptions = NULL, slotParTable = NULL, slotSampleStats = NULL, slotData = NULL, slotModel = NULL, slotCache = NULL, # options (dotdotdot) ... ) { # start timer start.time0 <- start.time <- proc.time()[3]; timing <- list() # 0a. store call mc <- match.call(expand.dots = TRUE) # handle dotdotdot dotdotdot <- list(...) # backwards compatibility, control= argument (<0.5-23) if(!is.null(dotdotdot$control)) { # optim.method if(!is.null(dotdotdot$control$optim.method)) { dotdotdot$optim.method <- dotdotdot$control$optim.method } # cor.optim.method if(!is.null(dotdotdot$control$cor.optim.method)) { dotdotdot$optim.method.cor <- dotdotdot$control$cor.optim.method } # control$optim.force.converged if(!is.null(dotdotdot$control$optim.force.converged)) { dotdotdot$optim.force.converged <- dotdotdot$control$optim.force.converged } # gradient if(!is.null(dotdotdot$control$gradient)) { dotdotdot$optim.gradient <- dotdotdot$control$gradient } if(!is.null(dotdotdot$gradient)) { dotdotdot$optim.gradient <- dotdotdot$gradient } # init_nelder_mead if(!!is.null(dotdotdot$control$init_nelder_mead)) { dotdotdot$optim.init_nelder_mead <- dotdotdot$control$init_nelder_mead } } ###################### #### 1. ov.names #### ###################### # 1a. get ov.names and ov.names.x (per group) -- needed for lavData() if(!is.null(slotParTable)) { FLAT <- slotParTable } else if(is.character(model)) { FLAT <- lavParseModelString(model) } else if(inherits(model, "lavaan")) { # hm, a lavaan model; let's try to extract the parameter table # and see what happens FLAT <- parTable(model) } else if(is.list(model)) { # two possibilities: either model is already lavaanified # or it is something else... # look for the bare minimum columns: lhs - op - rhs if(!is.null(model$lhs) && !is.null(model$op) && !is.null(model$rhs) && !is.null(model$free)) { # ok, we have something that looks like a parameter table # FIXME: we need to check for redundant arguments # (but if cfa/sem was used, we can not trust the call) # redundant <- c("meanstructure", "int.ov.free", "int.lv.free", # "fixed.x", "orthogonal", "std.lv", "parameterization", # "auto.fix.first", "auto.fix.single", "auto.var", # "auto.cov.lv.x", "auto.cov.y", "auto.th", "auto.delta") FLAT <- model # fix semTools issue here? for auxiliary() which does not use # block column yet if(!is.null(FLAT$block)) { N <- length(FLAT$lhs) if(length(FLAT$block) != N) { FLAT$block <- FLAT$group } if(any(is.na(FLAT$block))) { FLAT$block <- FLAT$group } } else if(!is.null(FLAT$group)) { FLAT$block <- FLAT$group } } else { bare.minimum <- c("lhs", "op", "rhs", "free") missing.idx <- is.na(match(bare.minimum, names(model))) missing.txt <- paste(bare.minimum[missing.idx], collapse = ", ") stop("lavaan ERROR: model is a list, but not a parameterTable?", "\n lavaan NOTE: ", "missing column(s) in parameter table: [", missing.txt, "]") } } # group blocks? if(any(FLAT$op == ":" & FLAT$lhs == "group")) { # here, we only need to figure out: # - ngroups # - ov's per group # - FIXME: we need a more efficient way, avoiding lavaanify/vnames group.idx <- which(FLAT$op == ":" & FLAT$lhs == "group") tmp.group.values <- unique(FLAT$rhs[group.idx]) tmp.ngroups <- length(tmp.group.values) tmp.lav <- lavaanify(FLAT, ngroups = tmp.ngroups) ov.names <- ov.names.y <- ov.names.x <- vector("list", length = tmp.ngroups) for(g in seq_len(tmp.ngroups)) { ov.names[[g]] <- unique(unlist(lav_partable_vnames(tmp.lav, type = "ov", group = tmp.group.values[g]))) ov.names.y[[g]] <- unique(unlist(lav_partable_vnames(tmp.lav, type = "ov.nox", group = tmp.group.values[g]))) ov.names.x[[g]] <- unique(unlist(lav_partable_vnames(tmp.lav, type = "ov.x", group = tmp.group.values[g]))) } } else { # no blocks: same set of variables per group/block ov.names <- lav_partable_vnames(FLAT, type = "ov") ov.names.y <- lav_partable_vnames(FLAT, type = "ov.nox") ov.names.x <- lav_partable_vnames(FLAT, type = "ov.x") } # sanity check ordered argument (just in case, add lhs variables names) ordered <- unique(c(ordered, lavNames(FLAT, "ov.ord"))) ####################### #### 2. lavoptions #### ####################### if(!is.null(slotOptions)) { lavoptions <- slotOptions } else { # load default options opt <- lav_options_default() # modifyList opt <- modifyList(opt, dotdotdot) # categorical mode? if(any(FLAT$op == "|")) { opt$categorical <- TRUE } else if(!is.null(data) && length(ordered) > 0L) { opt$categorical <- TRUE } else if(is.data.frame(data) && lav_dataframe_check_ordered(frame = data, ov.names = ov.names.y)) { opt$categorical <- TRUE } else { opt$categorical <- FALSE } # constraints if(nchar(constraints) > 0L) { opt$information <- "observed" } # meanstructure if(any(FLAT$op == "~1")) { opt$meanstructure <- TRUE } if(!is.null(group) && is.null(dotdotdot$meanstructure)) { opt$meanstructure <- TRUE } # conditional.x if( (is.list(ov.names.x) && sum(sapply(ov.names.x, FUN = length)) == 0L) || (is.character(ov.names.x) && length(ov.names.x) == 0L) ) { # if explicitly set to TRUE, give warning if(is.logical(dotdotdot$conditional.x) && dotdotdot$conditional.x) { warning("lavaan WARNING: no exogenous covariates; conditional.x will be set to FALSE") } opt$conditional.x <- FALSE } # fixed.x if( (is.list(ov.names.x) && sum(sapply(ov.names.x, FUN = length)) == 0L) || (is.character(ov.names.x) && length(ov.names.x) == 0L) ) { # if explicitly set to TRUE, give warning if(is.logical(dotdotdot$fixed.x) && dotdotdot$fixed.x) { # ok, we respect this: keep fixed.x = TRUE } else { opt$fixed.x <- FALSE } } # fill in remaining "default" values lavoptions <- lav_options_set(opt) } timing$InitOptions <- (proc.time()[3] - start.time) start.time <- proc.time()[3] # some additional checks for estimator="PML" if(lavoptions$estimator == "PML") { ovy <- unique( unlist(ov.names.y) ) ovx <- unique( unlist(ov.names.x) ) if(!is.null(slotData)) { ov.types <- slotData@ov$type[ slotData@ov$name %in% ovy ] } else { ov.types <- lav_dataframe_check_vartype(data, ov.names=ov.names.y) } # ordered argument? if(length(ordered) > 0L) { ord.idx <- which(ovy %in% ordered) ov.types[ord.idx] <- "ordered" } # 0. at least some variables must be ordinal if(!any(ov.types == "ordered")) { stop("lavaan ERROR: estimator=\"PML\" is only available if some variables are ordinal") } # 1. all variables must be ordinal (for now) # (the mixed continuous/ordinal case will be added later) if(any(ov.types != "ordered")) { stop("lavaan ERROR: estimator=\"PML\" can not handle mixed continuous and ordinal data (yet)") } # 2. we can not handle exogenous covariates yet #if(length(ovx) > 0L) { # stop("lavaan ERROR: estimator=\"PML\" can not handle exogenous covariates (yet)") #} } ##################### #### 3. lavdata #### ##################### if(!is.null(slotData)) { lavdata <- slotData } else { if(lavoptions$conditional.x) { ov.names <- ov.names.y } lavdata <- lavData(data = data, group = group, cluster = cluster, ov.names = ov.names, ov.names.x = ov.names.x, ordered = ordered, sample.cov = sample.cov, sample.mean = sample.mean, sample.nobs = sample.nobs, lavoptions = lavoptions) } # what have we learned from the data? if(lavdata@data.type == "none") { lavoptions$do.fit <- FALSE lavoptions$start <- "simple" lavoptions$se <- "none" lavoptions$test <- "none" } else if(lavdata@data.type == "moment") { # catch here some options that will not work with moments if(lavoptions$se == "bootstrap") { stop("lavaan ERROR: bootstrapping requires full data") } if(lavoptions$estimator %in% c("MLM", "MLMV", "MLMVS", "MLR", "ULSM", "ULSMV", "ULSMVS") && is.null(NACOV)) { stop("lavaan ERROR: estimator ", lavoptions$estimator, " requires full data or user-provided NACOV") } if(lavoptions$estimator %in% c("WLS", "WLSM", "WLSMV", "WLSMVS", "DWLS") && is.null(WLS.V)) { stop("lavaan ERROR: estimator ", lavoptions$estimator, " requires full data or user-provided WLS.V") } } timing$InitData <- (proc.time()[3] - start.time) start.time <- proc.time()[3] if(lavoptions$debug) { print(str(lavdata)) } # if lavdata@nlevels > 1L, adapt start option (for now) # until we figure out how to handle groups+blocks if(lavdata@nlevels > 1L) { lavoptions$start <- "simple" } ######################## #### 4. lavpartable #### ######################## if(!is.null(slotParTable)) { lavpartable <- slotParTable } else if(is.character(model)) { # check FLAT before we proceed if(lavoptions$debug) { print(as.data.frame(FLAT)) } # catch ~~ of fixed.x covariates if fixed.x = TRUE if(lavoptions$fixed.x) { tmp <- try(vnames(FLAT, type = "ov.x", ov.x.fatal = TRUE), silent = TRUE) if(inherits(tmp, "try-error")) { warning("lavaan WARNING: syntax contains parameters involving exogenous covariates; switching to fixed.x = FALSE") lavoptions$fixed.x <- FALSE } } if(lavoptions$conditional.x) { tmp <- vnames(FLAT, type = "ov.x", ov.x.fatal = TRUE) } lavpartable <- lavaanify(model = FLAT, constraints = constraints, varTable = lavdata@ov, ngroups = lavdata@ngroups, meanstructure = lavoptions$meanstructure, int.ov.free = lavoptions$int.ov.free, int.lv.free = lavoptions$int.lv.free, orthogonal = lavoptions$orthogonal, conditional.x = lavoptions$conditional.x, fixed.x = lavoptions$fixed.x, std.lv = lavoptions$std.lv, parameterization = lavoptions$parameterization, auto.fix.first = lavoptions$auto.fix.first, auto.fix.single = lavoptions$auto.fix.single, auto.var = lavoptions$auto.var, auto.cov.lv.x = lavoptions$auto.cov.lv.x, auto.cov.y = lavoptions$auto.cov.y, auto.th = lavoptions$auto.th, auto.delta = lavoptions$auto.delta, group.equal = lavoptions$group.equal, group.partial = lavoptions$group.partial, group.w.free = lavoptions$group.w.free, debug = lavoptions$debug, warn = lavoptions$warn, as.data.frame. = FALSE) } else if(inherits(model, "lavaan")) { lavpartable <- parTable(model) } else if(is.list(model)) { # we already checked this when creating FLAT # but we may need to complete it lavpartable <- as.list(FLAT) # in case model is a data.frame # complete table lavpartable <- lav_partable_complete(lavpartable) } else { stop("lavaan ERROR: model [type = ", class(model), "] is not of type character or list") } if(lavoptions$debug) { print(as.data.frame(lavpartable)) } # at this point, we should check if the partable is complete # or not; this is especially relevant if the lavaan() function # was used, but the user has forgotten some variances/intercepts... junk <- lav_partable_check(lavpartable, categorical = lavoptions$categorical, warn = TRUE) # 4b. get partable attributes lavpta <- lav_partable_attributes(lavpartable) timing$ParTable <- (proc.time()[3] - start.time) start.time <- proc.time()[3] ########################### #### 5. lavsamplestats #### ########################## if(!is.null(slotSampleStats)) { lavsamplestats <- slotSampleStats } else if(lavdata@data.type == "full") { lavsamplestats <- lav_samplestats_from_data( lavdata = lavdata, missing = lavoptions$missing, rescale = (lavoptions$estimator %in% c("ML","REML","NTRLS") && lavoptions$likelihood == "normal"), estimator = lavoptions$estimator, mimic = lavoptions$mimic, meanstructure = lavoptions$meanstructure, conditional.x = lavoptions$conditional.x, fixed.x = lavoptions$fixed.x, group.w.free = lavoptions$group.w.free, missing.h1 = (lavoptions$missing != "listwise"), WLS.V = WLS.V, NACOV = NACOV, se = lavoptions$se, information = lavoptions$information, ridge = lavoptions$ridge, optim.method = lavoptions$optim.method.cor, zero.add = lavoptions$zero.add, zero.keep.margins = lavoptions$zero.keep.margins, zero.cell.warn = lavoptions$zero.cell.warn, debug = lavoptions$debug, verbose = lavoptions$verbose) } else if(lavdata@data.type == "moment") { lavsamplestats <- lav_samplestats_from_moments( sample.cov = sample.cov, sample.mean = sample.mean, sample.nobs = sample.nobs, ov.names = lavpta$vnames$ov, estimator = lavoptions$estimator, mimic = lavoptions$mimic, meanstructure = lavoptions$meanstructure, group.w.free = lavoptions$group.w.free, WLS.V = WLS.V, NACOV = NACOV, ridge = lavoptions$ridge, rescale = lavoptions$sample.cov.rescale) } else { # no data lavsamplestats <- new("lavSampleStats", ngroups=lavdata@ngroups, nobs=as.list(rep(0L, lavdata@ngroups)), cov.x=vector("list",length=lavdata@ngroups), th.idx=lavpta$vidx$th.mean, missing.flag=FALSE) } timing$Sample <- (proc.time()[3] - start.time) start.time <- proc.time()[3] if(lavoptions$debug) { print(str(lavsamplestats)) } ##################### #### 6. lavstart #### ##################### if(!is.null(slotModel)) { lavmodel <- slotModel # FIXME #lavaanStart <- lav_model_get_parameters(lavmodel, type="user") #lavpartable$start <- lavaanStart timing$Start <- (proc.time()[3] - start.time) start.time <- proc.time()[3] timing$Model <- (proc.time()[3] - start.time) start.time <- proc.time()[3] } else { # check if we have provide a full parameter table as model= input if(!is.null(lavpartable$est) && lavoptions$start == "default") { # check if all 'est' values look ok # this is not the case, eg, if partables have been merged eg, as # in semTools' auxiliary() function # check for zero free variances and NA values zero.idx <- which(lavpartable$free > 0L & lavpartable$op == "~~" & lavpartable$lhs == lavpartable$rhs & lavpartable$est == 0) if(length(zero.idx) > 0L || any(is.na(lavpartable$est))) { lavpartable$start <- lav_start(start.method = lavoptions$start, lavpartable = lavpartable, lavsamplestats = lavsamplestats, model.type = lavoptions$model.type, mimic = lavoptions$mimic, debug = lavoptions$debug) } else { lavpartable$start <- lavpartable$est } } else { START <- lav_start(start.method = lavoptions$start, lavpartable = lavpartable, lavsamplestats = lavsamplestats, model.type = lavoptions$model.type, mimic = lavoptions$mimic, debug = lavoptions$debug) # sanity check if("start" %in% lavoptions$check) { START <- lav_start_check_cov(lavpartable = lavpartable, start = START) } lavpartable$start <- START } timing$Start <- (proc.time()[3] - start.time) start.time <- proc.time()[3] ##################### #### 7. lavmodel #### ##################### lavmodel <- lav_model(lavpartable = lavpartable, lavoptions = lavoptions, th.idx = lavsamplestats@th.idx) timing$Model <- (proc.time()[3] - start.time) start.time <- proc.time()[3] # if no data, call lav_model_set_parameters once (for categorical case) if(lavdata@data.type == "none" && lavmodel@categorical) { lavmodel <- lav_model_set_parameters(lavmodel = lavmodel, x = lav_model_get_parameters(lavmodel)) } } # slotModel ##################### #### 8. lavcache #### ##################### if(!is.null(slotCache)) { lavcache <- slotCache } else { # prepare cache -- stuff needed for estimation, but also post-estimation lavcache <- vector("list", length=lavdata@ngroups) if(lavoptions$estimator == "PML") { TH <- computeTH(lavmodel) BI <- lav_tables_pairwise_freq_cell(lavdata) # handle option missing = "available.cases" if(lavoptions$missing == "available.cases" || lavoptions$missing == "doubly.robust") { UNI <- lav_tables_univariate_freq_cell(lavdata) } # checks for missing = "double.robust" if (lavoptions$missing == "doubly.robust") { # check whether the probabilities pairwiseProbGivObs and # univariateProbGivObs are given by the user if(is.null(lavoptions$control$pairwiseProbGivObs)) { stop("lavaan ERROR: could not find `pairwiseProbGivObs' in control() list") } if(is.null(lavoptions$control$univariateProbGivObs)) { stop("lavaan ERROR: could not find `univariateProbGivObs' in control() list") } } for(g in 1:lavdata@ngroups) { if(is.null(BI$group) || max(BI$group) == 1L) { bifreq <- BI$obs.freq binobs <- BI$nobs } else { idx <- which(BI$group == g) bifreq <- BI$obs.freq[idx] binobs <- BI$nobs[idx] } LONG <- LongVecInd(no.x = ncol(lavdata@X[[g]]), all.thres = TH[[g]], index.var.of.thres = lavmodel@th.idx[[g]]) lavcache[[g]] <- list(bifreq = bifreq, nobs = binobs, LONG = LONG) # available cases if(lavoptions$missing == "available.cases" || lavoptions$missing == "doubly.robust") { if(is.null(UNI$group) || max(UNI$group) == 1L) { unifreq <- UNI$obs.freq uninobs <- UNI$nobs } else { idx <- which(UNI$group == g) unifreq <- UNI$obs.freq[idx] uninobs <- UNI$nobs[idx] } lavcache[[g]]$unifreq <- unifreq lavcache[[g]]$uninobs <- uninobs uniweights.casewise <- rowSums( is.na( lavdata@X[[g]] ) ) lavcache[[g]]$uniweights.casewise <- uniweights.casewise #weights per response category per variable in the same # order as unifreq; i.e. w_ia, i=1,...,p, (p variables), # a=1,...,Ci, (Ci response categories for variable i), # a running faster than i tmp.uniweights <- apply(lavdata@X[[g]], 2, function(x){ tapply(uniweights.casewise, as.factor(x), sum, na.rm=TRUE) } ) if( is.matrix(tmp.uniweights) ) { lavcache[[g]]$uniweights <- c(tmp.uniweights) } if( is.list(tmp.uniweights) ) { lavcache[[g]]$uniweights <- unlist(tmp.uniweights) } } # "available.cases" or "double.robust" # doubly.robust only if (lavoptions$missing == "doubly.robust") { # add the provided by the user probabilities # pairwiseProbGivObs and univariateProbGivObs in Cache lavcache[[g]]$pairwiseProbGivObs <- lavoptions$control$pairwiseProbGivObs[[g]] lavcache[[g]]$univariateProbGivObs <- lavoptions$control$univariateProbGivObs[[g]] # compute different indices vectors that will help to do # calculations ind.vec <- as.data.frame(LONG[1:5] ) ind.vec <- ind.vec[ ((ind.vec$index.thres.var1.of.pair!=0) & (ind.vec$index.thres.var2.of.pair!=0)) , ] idx.cat.y1 <- ind.vec$index.thres.var1.of.pair idx.cat.y2 <- ind.vec$index.thres.var2.of.pair idx.pairs <- ind.vec$index.pairs.extended lavcache[[g]]$idx.pairs <- idx.pairs idx.cat.y1.split <- split(idx.cat.y1, idx.pairs) idx.cat.y2.split <- split(idx.cat.y2, idx.pairs) lavcache[[g]]$idx.cat.y1.split <- idx.cat.y1.split lavcache[[g]]$idx.cat.y2.split <- idx.cat.y2.split # generate the variables, categories indices vector which # keep track to which variables and categories the # elements of vector probY1Gy2 refer to nlev <- lavdata@ov$nlev nvar <- length(nlev) idx.var.matrix <- matrix(1:nvar, nrow=nvar, ncol=nvar) idx.diag <- diag( matrix(1:(nvar*nvar), nrow=nvar, ncol=nvar) ) idx.Y1Gy2.matrix <- rbind(t(idx.var.matrix)[-idx.diag], idx.var.matrix [-idx.diag]) no.pairs.Y1Gy2 <- ncol(idx.Y1Gy2.matrix) idx.cat.Y1 <- unlist(lapply(1:no.pairs.Y1Gy2, function(x) { rep( 1:nlev[ idx.Y1Gy2.matrix[1,x] ], times= nlev[ idx.Y1Gy2.matrix[2,x] ] )} ) ) idx.cat.Gy2 <- unlist(lapply(1:no.pairs.Y1Gy2, function(x) { rep( 1:nlev[ idx.Y1Gy2.matrix[2,x] ], each= nlev[ idx.Y1Gy2.matrix[1,x] ] )} ) ) dim.pairs <- unlist(lapply(1:no.pairs.Y1Gy2, function(x) { nlev[ idx.Y1Gy2.matrix[1,x] ] * nlev[ idx.Y1Gy2.matrix[2,x] ] }) ) idx.Y1 <- unlist( mapply(rep, idx.Y1Gy2.matrix[1,], each=dim.pairs) ) idx.Gy2 <- unlist( mapply(rep, idx.Y1Gy2.matrix[2,], each=dim.pairs) ) lavcache[[g]]$idx.Y1 <- idx.Y1 lavcache[[g]]$idx.Gy2 <- idx.Gy2 lavcache[[g]]$idx.cat.Y1 <- idx.cat.Y1 lavcache[[g]]$idx.cat.Gy2 <- idx.cat.Gy2 # the vector below keeps track of the variable each column # of the matrix univariateProbGivObs refers to lavcache[[g]]$id.uniPrGivObs <- sort( c( unique(lavmodel@th.idx[[g]]) , lavmodel@th.idx[[g]] ) ) } # doubly.robust } # g } # copy response patterns to cache -- FIXME!! (data not included # in Model only functions) if(lavdata@data.type == "full" && !is.null(lavdata@Rp[[1L]])) { for(g in 1:lavdata@ngroups) { lavcache[[g]]$pat <- lavdata@Rp[[g]]$pat } } } # If estimator = MML, store Gauss-Hermite nodes/weights if(lavoptions$estimator == "MML") { for(g in 1:lavdata@ngroups) { # count only the ones with non-normal indicators #nfac <- lavpta$nfac.nonnormal[[g]] nfac <- lavpta$nfac[[g]] lavcache[[g]]$GH <- lav_integration_gauss_hermite(n = lavoptions$integration.ngh, dnorm = TRUE, mean = 0, sd = 1, ndim = nfac) #lavcache[[g]]$DD <- lav_model_gradient_DD(lavmodel, group = g) } } ############################ #### 10. est + lavoptim #### ############################ x <- NULL if(lavoptions$do.fit && lavoptions$estimator != "none" && lavmodel@nx.free > 0L) { x <- lav_model_estimate(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavoptions = lavoptions, lavcache = lavcache) lavmodel <- lav_model_set_parameters(lavmodel, x = x) # store parameters in @ParTable$est lavpartable$est <- lav_model_get_parameters(lavmodel = lavmodel, type = "user", extra = TRUE) if(!is.null(attr(x, "con.jac"))) lavmodel@con.jac <- attr(x, "con.jac") if(!is.null(attr(x, "con.lambda"))) lavmodel@con.lambda <- attr(x, "con.lambda") # check if model has converged or not if(!attr(x, "converged") && lavoptions$warn) { warning("lavaan WARNING: model has NOT converged!") } } else { x <- numeric(0L) attr(x, "iterations") <- 0L; attr(x, "converged") <- FALSE attr(x, "control") <- lavoptions$control attr(x, "fx") <- lav_model_objective(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache) lavpartable$est <- lavpartable$start } # should we fake/force convergence? (eg. to enforce the # computation of a test statistic) if(lavoptions$optim.force.converged) { attr(x, "converged") <- TRUE } # store optimization info in lavoptim lavoptim <- list() x2 <- x; attributes(x2) <- NULL lavoptim$x <- x2 lavoptim$npar <- length(x) lavoptim$iterations <- attr(x, "iterations") lavoptim$converged <- attr(x, "converged") fx.copy <- fx <- attr(x, "fx"); attributes(fx) <- NULL lavoptim$fx <- fx lavoptim$fx.group <- attr(fx.copy, "fx.group") if(!is.null(attr(fx.copy, "logl.group"))) { lavoptim$logl.group <- attr(fx.copy, "logl.group") lavoptim$logl <- sum(lavoptim$logl.group) } else { lavoptim$logl.group <- as.numeric(NA) lavoptim$logl <- as.numeric(NA) } lavoptim$control <- attr(x, "control") ######################## #### 11. lavimplied #### ######################## lavimplied <- lav_model_implied(lavmodel) timing$Estimate <- (proc.time()[3] - start.time) start.time <- proc.time()[3] ############################### #### 12. lavvcov + lavboot #### ############################### VCOV <- NULL if(lavoptions$se != "none" && lavoptions$se != "external" && lavmodel@nx.free > 0L && attr(x, "converged")) { if(lavoptions$verbose) { cat("Computing VCOV for se =", lavoptions$se, "...") } VCOV <- lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, lavoptions = lavoptions, lavdata = lavdata, lavpartable = lavpartable, lavcache = lavcache, lavimplied = lavimplied) if(lavoptions$verbose) { cat(" done.\n") } } # extract bootstrap results (if any) if(!is.null(attr(VCOV, "BOOT.COEF"))) { lavboot <- list() lavboot$coef <- attr(VCOV, "BOOT.COEF") } else { lavboot <- list() } # store VCOV in vcov # strip all attributes but 'dim' tmp.attr <- attributes(VCOV) VCOV1 <- VCOV attributes(VCOV1) <- tmp.attr["dim"] lavvcov <- list(se = lavoptions$se, information = lavoptions$information, vcov = VCOV1) # store se in partable if(lavoptions$se != "external") { lavpartable$se <- lav_model_vcov_se(lavmodel = lavmodel, lavpartable = lavpartable, VCOV = VCOV, BOOT = lavboot$coef) } else { if(is.null(lavpartable$se)) { lavpartable$se <- lav_model_vcov_se(lavmodel = lavmodel, lavpartable = lavpartable, VCOV = NULL, BOOT = NULL) warning("lavaan WARNING: se = \"external\" but parameter table does not contain a `se' column") } } timing$VCOV <- (proc.time()[3] - start.time) start.time <- proc.time()[3] ##################### #### 13. lavtest #### ##################### TEST <- NULL if(lavoptions$test != "none" && attr(x, "converged")) { if(lavoptions$verbose) { cat("Computing TEST for test =", lavoptions$test, "...") } TEST <- lav_model_test(lavmodel = lavmodel, lavpartable = lavpartable, lavsamplestats = lavsamplestats, lavoptions = lavoptions, x = x, VCOV = VCOV, lavdata = lavdata, lavcache = lavcache) if(lavoptions$verbose) { cat(" done.\n") } } else { TEST <- list(list(test="none", stat=NA, stat.group=rep(NA, lavdata@ngroups), df=NA, refdistr="unknown", pvalue=NA)) } # store test in lavtest lavtest <- TEST timing$TEST <- (proc.time()[3] - start.time) start.time <- proc.time()[3] #################### #### 14. lavfit #### #################### lavfit <- lav_model_fit(lavpartable = lavpartable, lavmodel = lavmodel, x = x, VCOV = VCOV, TEST = TEST) timing$total <- (proc.time()[3] - start.time0) #################### #### 15. lavaan #### #################### lavaan <- new("lavaan", call = mc, # match.call timing = timing, # list Options = lavoptions, # list ParTable = lavpartable, # list pta = lavpta, # list Data = lavdata, # S4 class SampleStats = lavsamplestats, # S4 class Model = lavmodel, # S4 class Cache = lavcache, # list Fit = lavfit, # S4 class boot = lavboot, # list optim = lavoptim, # list implied = lavimplied, # list vcov = lavvcov, # list test = lavtest, # list external = list() # empty list ) # post-fitting check if("post" %in% lavoptions$check && lavTech(lavaan, "converged")) { lavInspect(lavaan, "post.check") } lavaan } # cfa + sem cfa <- sem <- function(# user-specified model: can be syntax, parameter Table model = NULL, # data (second argument, most used) data = NULL, # variable information ordered = NULL, # summary data sample.cov = NULL, sample.mean = NULL, sample.nobs = NULL, # multiple groups? group = NULL, # multiple levels? cluster = NULL, # constraints constraints = '', # user-specified variance matrices WLS.V = NULL, NACOV = NULL, # options (dotdotdot) ...) { mc <- match.call(expand.dots = TRUE) # set model.type mc$model.type = as.character( mc[[1L]] ) if(length(mc$model.type) == 3L) { mc$model.type <- mc$model.type[3L] } dotdotdot <- list(...) if(!is.null(dotdotdot$std.lv)) { std.lv <- dotdotdot$std.lv } else { std.lv <- FALSE } # default options for sem/cfa call mc$int.ov.free = TRUE mc$int.lv.free = FALSE mc$auto.fix.first = !std.lv mc$auto.fix.single = TRUE mc$auto.var = TRUE mc$auto.cov.lv.x = TRUE mc$auto.cov.y = TRUE mc$auto.th = TRUE mc$auto.delta = TRUE # call mother function mc[[1L]] <- quote(lavaan::lavaan) eval(mc, parent.frame()) } # simple growth models growth <- function(# user-specified model: can be syntax, parameter Table model = NULL, # data (second argument, most used) data = NULL, # variable information ordered = NULL, # summary data sample.cov = NULL, sample.mean = NULL, sample.nobs = NULL, # multiple groups? group = NULL, # multiple levels? cluster = NULL, # constraints constraints = '', # user-specified variance matrices WLS.V = NULL, NACOV = NULL, # options (dotdotdot) ...) { mc <- match.call(expand.dots = TRUE) # set model.type mc$model.type = as.character( mc[[1L]] ) if(length(mc$model.type) == 3L) { mc$model.type <- mc$model.type[3L] } dotdotdot <- list(...) if(!is.null(dotdotdot$std.lv)) { std.lv <- dotdotdot$std.lv } else { std.lv <- FALSE } # default options for sem/cfa call mc$model.type = "growth" mc$int.ov.free = FALSE mc$int.lv.free = TRUE mc$auto.fix.first = !std.lv mc$auto.fix.single = TRUE mc$auto.var = TRUE mc$auto.cov.lv.x = TRUE mc$auto.cov.y = TRUE mc$auto.th = TRUE mc$auto.delta = TRUE # call mother function mc[[1L]] <- quote(lavaan::lavaan) eval(mc, parent.frame()) } lavaan/R/ctr_estfun.R0000644000176200001440000001674012660665273014241 0ustar liggesusers# contributed by Ed Merkle (17 Jan 2013) # YR 12 Feb 2013: small changes to match the results of lav_model_gradient # in the multiple group case # YR 30 May 2014: handle 1-variable case (fixing apply in lines 56, 62, 108) # YR 05 Nov 2015: add remove.duplicated = TRUE, to cope with strucchange in # case of simple equality constraints # YR 19 Nov 2015: if constraints have been used, compute case-wise Lagrange # multipliers, and define the scores as: SC + (t(R) lambda) # YR 05 Feb 2016: catch conditional.x = TRUE: no support (for now), until # we can use the generic 0.6 infrastructure for scores, # including the missing-values case # YR 16 Feb 2016: adapt to changed @Mp slot elements; add remove.empty.cases= # argument estfun.lavaan <- lavScores <- function(object, scaling = FALSE, ignore.constraints = FALSE, remove.duplicated = TRUE, remove.empty.cases = TRUE) { stopifnot(inherits(object, "lavaan")) # what if estimator != ML? # avoid hard error (using stop); throw a warning, and return an empty matrix if(object@Options$estimator != "ML") { warning("lavaan WARNING: scores only availalbe if estimator is ML") return(matrix(0,0,0)) } # check if conditional.x = TRUE if(object@Model@conditional.x) { stop("lavaan ERROR: scores not available (yet) if conditional.x = TRUE") } # shortcuts lavdata <- object@Data lavmodel <- object@Model lavsamplestats <- object@SampleStats lavoptions <- object@Options ## number variables/sample size #ntab <- unlist(lavsamplestats@nobs) ## change in 0.5-17: we keep the 'empty cases' ## and 'fill' in the scores at their 'case.idx' ## later, we remove the 'empty rows' #ntot <- max( object@Data@case.idx[[ object@Data@ngroups ]] ) ntab <- unlist(lavdata@norig) ntot <- sum(ntab) npar <- length(coef(object)) #if(object@Model@eq.constraints) { # npar <- NCOL(object@Model@eq.constraints.K) #} Score.mat <- matrix(NA, ntot, npar) for(g in 1:lavsamplestats@ngroups) { if (lavsamplestats@ngroups > 1){ moments <- fitted(object)[[g]] } else { moments <- fitted(object) } Sigma.hat <- moments$cov if(lavoptions$likelihood == "wishart") { N1 <- lavsamplestats@nobs[[g]]/(lavsamplestats@nobs[[g]] - 1) } else { N1 <- 1 } if(!lavsamplestats@missing.flag) { # complete data #if(lavmodel@meanstructure) { # mean structure nvar <- ncol(lavsamplestats@cov[[g]]) Mu.hat <- moments$mean X <- lavdata@X[[g]] Sigma.inv <- inv.chol(Sigma.hat, logdet=FALSE) group.w <- (unlist(lavsamplestats@nobs)/lavsamplestats@ntotal) J <- matrix(1, 1L, ntab[g]) ## FIXME: needed? better maybe rowSums/colSums? J2 <- matrix(1, nvar, nvar) diag(J2) <- 0.5 if(lavmodel@meanstructure) { ## scores.H1 (H1 = saturated model) mean.diff <- t(t(X) - Mu.hat %*% J) dx.Mu <- -1 * mean.diff %*% Sigma.inv dx.Sigma <- t(matrix(apply(mean.diff, 1L, function(x) lav_matrix_vech(- J2 * (Sigma.inv %*% (tcrossprod(x)*N1 - Sigma.hat) %*% Sigma.inv))), ncol=nrow(mean.diff))) scores.H1 <- cbind(dx.Mu, dx.Sigma) } else { mean.diff <- t(t(X) - lavsamplestats@mean[[g]] %*% J) dx.Sigma <- t(matrix(apply(mean.diff, 1L, function(x) lav_matrix_vech(- J2 * (Sigma.inv %*% (tcrossprod(x)*N1 - Sigma.hat) %*% Sigma.inv))), ncol=nrow(mean.diff))) scores.H1 <- dx.Sigma } ## FIXME? Seems like we would need group.w even in the ## complete-data case: ##if(scaling){ ## scores.H1 <- group.w[g] * scores.H1 ##} #} else { # ## no mean structure # stop("Score calculation with no mean structure is not implemented.") #} } else { # incomplete data nsub <- ntab[g] M <- lavsamplestats@missing[[g]] Mp <- lavdata@Mp[[g]] #pat.idx <- match(MP1$id, MP1$order) group.w <- (unlist(lavsamplestats@nobs)/lavsamplestats@ntotal) Mu.hat <- moments$mean nvar <- ncol(lavsamplestats@cov[[g]]) score.sigma <- matrix(0, nsub, nvar*(nvar+1)/2) score.mu <- matrix(0, nsub, nvar) for(p in 1:length(M)) { ## Data #X <- M[[p]][["X"]] case.idx <- Mp$case.idx[[p]] var.idx <- M[[p]][["var.idx"]] X <- lavdata@X[[g]][case.idx,var.idx,drop = FALSE] nobs <- M[[p]][["freq"]] ## Which unique entries of covariance matrix are estimated? ## (Used to keep track of scores in score.sigma) var.idx.mat <- tcrossprod(var.idx) Sigma.idx <- which(var.idx.mat[lower.tri(var.idx.mat, diag=T)]==1) J <- matrix(1, 1L, nobs) #[var.idx] J2 <- matrix(1, nvar, nvar)[var.idx, var.idx, drop = FALSE] diag(J2) <- 0.5 Sigma.inv <- inv.chol(Sigma.hat[var.idx, var.idx, drop = FALSE], logdet=FALSE) Mu <- Mu.hat[var.idx] mean.diff <- t(t(X) - Mu %*% J) ## Scores for missing pattern p within group g score.mu[case.idx,var.idx] <- -1 * mean.diff %*% Sigma.inv score.sigma[case.idx,Sigma.idx] <- t(matrix(apply(mean.diff, 1L, function(x) lav_matrix_vech(- J2 * (Sigma.inv %*% (tcrossprod(x) - Sigma.hat[var.idx,var.idx,drop = FALSE]) %*% Sigma.inv)) ), ncol=nrow(mean.diff)) ) } scores.H1 <- cbind(score.mu, score.sigma) if(scaling){ scores.H1 <- group.w[g] * scores.H1 } } # missing Delta <- computeDelta(lavmodel = lavmodel)[[g]] #if(lavmodel@eq.constraints) { # Delta <- Delta %*% lavmodel@eq.constraints.K # + lavmodel@eq.constraints.k0 # #x <- as.numeric(lavmodel@eq.constraints.K %*% x) + # # lavmodel@eq.constraints.k0 #} wi <- lavdata@case.idx[[g]] Score.mat[wi,] <- -scores.H1 %*% Delta if(scaling){ Score.mat[wi,] <- (-1/ntot) * Score.mat[wi,] } } # g # handle empty rows if(remove.empty.cases) { #empty.idx <- which( apply(Score.mat, 1L, # function(x) sum(is.na(x))) == ncol(Score.mat) ) empty.idx <- unlist(lapply(lavdata@Mp, "[[", "empty.idx")) if(length(empty.idx) > 0L) { Score.mat <- Score.mat[-empty.idx,,drop=FALSE] } } # provide column names colnames(Score.mat) <- names(coef(object)) # handle general constraints, so that the sum of the columns equals zero if(!ignore.constraints && sum(lavmodel@ceq.linear.idx, lavmodel@ceq.nonlinear.idx, lavmodel@cin.linear.idx, lavmodel@cin.nonlinear.idx) > 0) { R <- object@Model@con.jac[,] PRE <- lav_constraints_lambda_pre(object) #LAMBDA <- -1 * t(PRE %*% t(Score.mat)) #RLAMBDA <- t(t(R) %*% t(LAMBDA)) Score.mat <- Score.mat - t( t(R) %*% PRE %*% t(Score.mat) ) } # handle simple equality constraints if(remove.duplicated && lavmodel@eq.constraints) { simple.flag <- lav_constraints_check_simple(lavmodel) if(simple.flag) { K <- lav_constraints_R2K(lavmodel) Score.mat <- Score.mat %*% K } else { warning("lavaan WARNING: remove.duplicated is TRUE, but equality constraints do not appear to be simple; returning full scores") } } Score.mat } lavaan/R/lav_mvnorm.R0000644000176200001440000004244413040167043014226 0ustar liggesusers# the multivariate normal distribution # 1) loglikelihood (from raw data, or sample statistics) # 2) derivatives with respect to mu, Sigma, vech(Sigma) # 3) casewise scores with respect to mu, vech(Sigma), mu + vech(Sigma) # 4) hessian mu + vech(Sigma) # 5) information h0 mu + vech(Sigma) # 5a: (unit) expected information # 5b: (unit) observed information # 5c: (unit) first.order information # 6) inverted information h0 mu + vech(Sigma) # 6a: (unit) inverted expected information # 6b: / # 6c: / # 7) ACOV h0 mu + vech(Sigma) # 7a: 1/N * inverted expected information # 7b: 1/N * inverted observed information # 7c: 1/N * inverted first-order information # 7d: sandwich acov # YR 07 Feb 2016: first version # YR 24 Mar 2016: added firstorder information, hessian logl # YR 19 Jan 2017: added lav_mvnorm_inverted_information_expected # 0. densities lav_mvnorm_dmvnorm <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sigma.inv = NULL, Sinv.method = "eigen", log = TRUE) { if(is.matrix(Y)) { if(is.null(Mu) && is.null(Sigma) && is.null(Sigma.inv)) { out <- lav_mvnorm_loglik_data_z(Y = Y, casewise = TRUE) } else { out <- lav_mvnorm_loglik_data(Y = Y, Mu = Mu, Sigma = Sigma, casewise = TRUE, Sinv.method = Sinv.method) } } else { # just one P <- length(Y); LOG.2PI <- log(2 * pi) if(is.null(Mu) && is.null(Sigma) && is.null(Sigma.inv)) { # mahalanobis distance DIST <- sum(Y * Y) out <- -(P * LOG.2PI + DIST)/2 } else { if(is.null(Sigma.inv)) { Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) logdet <- attr(Sigma.inv, "logdet") } else { logdet <- attr(Sigma.inv, "logdet") if(is.null(logdet)) { # compute - ln|Sigma.inv| ev <- eigen(Sigma.inv, symmetric = TRUE, only.values = TRUE) logdet <- -1 * sum(log(ev$values)) } } # mahalanobis distance Yc <- Y - Mu DIST <- sum(Yc %*% Sigma.inv * Yc) out <- -(P * LOG.2PI + logdet + DIST)/2 } } if(!log) { out <- exp(out) } out } # 1. likelihood # 1a: input is raw data # (note casewise = TRUE same as: dmvnorm(Y, mean, sigma, log = TRUE)) lav_mvnorm_loglik_data <- function(Y = NULL, Mu = NULL, Sigma = NULL, casewise = FALSE, Sinv.method = "eigen") { P <- NCOL(Y); N <- NROW(Y); Mu <- as.numeric(Mu) if(casewise) { LOG.2PI <- log(2 * pi) # invert Sigma if(Sinv.method == "chol") { cS <- chol(Sigma); icS <- backsolve(cS, diag(P)) Yc <- t( t(Y) - Mu ) DIST <- rowSums((Yc %*% icS)^2) logdet <- -2 * sum(log(diag(icS))) } else { Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) logdet <- attr(Sigma.inv, "logdet") # mahalanobis distance Yc <- t( t(Y) - Mu ) DIST <- rowSums(Yc %*% Sigma.inv * Yc) } loglik <- -(P * LOG.2PI + logdet + DIST)/2 } else { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) sample.mean <- colMeans(Y) sample.cov <- 1/N*crossprod(Y) - tcrossprod(sample.mean) loglik <- lav_mvnorm_loglik_samplestats(sample.mean = sample.mean, sample.cov = sample.cov, sample.nobs = N, Mu = Mu, Sigma.inv = Sigma.inv) } loglik } # 1b: input are sample statistics (mean, cov, N) only lav_mvnorm_loglik_samplestats <- function(sample.mean = NULL, sample.cov = NULL, sample.nobs = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { P <- length(sample.mean); N <- sample.nobs Mu <- as.numeric(Mu); sample.mean <- as.numeric(sample.mean) LOG.2PI <- log(2 * pi) if(is.null(Sigma.inv)) { Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) logdet <- attr(Sigma.inv, "logdet") } else { logdet <- attr(Sigma.inv, "logdet") if(is.null(logdet)) { # compute - ln|Sigma.inv| ev <- eigen(Sigma.inv, symmetric = TRUE, only.values = TRUE) logdet <- -1 * sum(log(ev$values)) } } # tr(Sigma^{-1} %*% S) DIST1 <- sum(Sigma.inv * sample.cov) # (ybar - mu)^T %*% Sigma.inv %*% (ybar - mu) Diff <- as.numeric(sample.mean - Mu) DIST2 <- sum(as.numeric(crossprod(Diff, Sigma.inv)) * Diff) loglik <- -N/2 * (P * LOG.2PI + logdet + DIST1 + DIST2) loglik } # 1c special case: Mu = 0, Sigma = I lav_mvnorm_loglik_data_z <- function(Y = NULL, casewise = FALSE) { P <- NCOL(Y); N <- NROW(Y); LOG.2PI <- log(2 * pi) if(casewise) { DIST <- rowSums(Y * Y) loglik <- -(P * LOG.2PI + DIST)/2 } else { sample.mean <- colMeans(Y) sample.cov <- 1/N*crossprod(Y) - tcrossprod(sample.mean) DIST1 <- sum(diag(sample.cov)) DIST2 <- sum(sample.mean * sample.mean) loglik <- -N/2 * (P * LOG.2PI + DIST1 + DIST2) } loglik } # 2. Derivatives # 2a: derivative logl with respect to mu lav_mvnorm_dlogl_dmu <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # substract 'Mu' from Y Yc <- t( t(Y) - Mu ) # derivative dmu <- as.numeric(Sigma.inv %*% colSums(Yc)) dmu } # 2b: derivative logl with respect to Sigma (full matrix, ignoring symmetry) lav_mvnorm_dlogl_dSigma <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- NROW(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # substract 'Mu' from Y Yc <- t( t(Y) - Mu ) # W.tilde W.tilde <- crossprod(Yc) / N # derivative dSigma <- -(N/2)* (Sigma.inv - (Sigma.inv %*% W.tilde %*% Sigma.inv)) dSigma } # 2c: derivative logl with respect to vech(Sigma) lav_mvnorm_dlogl_dvechSigma <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- NROW(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # substract 'Mu' from Y Yc <- t( t(Y) - Mu ) # W.tilde W.tilde <- crossprod(Yc) / N # derivative (avoiding kronecker product) dSigma <- -(N/2)* (Sigma.inv - (Sigma.inv %*% W.tilde %*% Sigma.inv)) dvechSigma <- as.numeric( lav_matrix_duplication_pre( as.matrix(lav_matrix_vec(dSigma)) ) ) dvechSigma } # 3. Casewise scores # 3a: casewise scores with respect to mu lav_mvnorm_scores_mu <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # substract Mu Yc <- t( t(Y) - Mu ) # postmultiply with Sigma.inv SC <- Yc %*% Sigma.inv SC } # 3b: casewise scores with respect to vech(Sigma) lav_mvnorm_scores_vech_sigma <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { P <- NCOL(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # vech(Sigma.inv) isigma <- lav_matrix_vech(Sigma.inv) # substract Mu Yc <- t( t(Y) - Mu ) # postmultiply with Sigma.inv Yc <- Yc %*% Sigma.inv # tcrossprod idx1 <- lav_matrix_vech_col_idx(P); idx2 <- lav_matrix_vech_row_idx(P) Z <- Yc[,idx1] * Yc[,idx2] # substract isigma from each row SC <- t( t(Z) - isigma ) # adjust for vech SC[,lav_matrix_diagh_idx(P)] <- SC[,lav_matrix_diagh_idx(P)] / 2 SC } # 3c: casewise scores with respect to mu + vech(Sigma) lav_mvnorm_scores_mu_vech_sigma <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { P <- NCOL(Y); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # vech(Sigma.inv) isigma <- lav_matrix_vech(Sigma.inv) # substract Mu Yc <- t( t(Y) - Mu ) # postmultiply with Sigma.inv Yc <- Yc %*% Sigma.inv # tcrossprod idx1 <- lav_matrix_vech_col_idx(P); idx2 <- lav_matrix_vech_row_idx(P) Z <- Yc[,idx1] * Yc[,idx2] # substract isigma from each row SC <- t( t(Z) - isigma ) # adjust for lav_matrix_duplication_pre (not vech!) SC[,lav_matrix_diagh_idx(P)] <- SC[,lav_matrix_diagh_idx(P)] / 2 cbind(Yc, SC) } # 4. hessian of logl # 4a: hessian logl Mu and vech(Sigma) from raw data lav_mvnorm_logl_hessian_data <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- NROW(Y) # observed information observed <- lav_mvnorm_information_observed_data(Y = Y, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) -N*observed } # 4b: hessian Mu and vech(Sigma) from samplestats lav_mvnorm_logl_hessian_samplestats <- function(sample.mean = NULL, sample.cov = NULL, sample.nobs = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- sample.nobs # observed information observed <- lav_mvnorm_information_observed_samplestats(sample.mean = sample.mean, sample.cov = sample.cov, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) -N*observed } # 5) Information h0 # 5a: unit expected information h0 Mu and vech(Sigma) lav_mvnorm_information_expected <- function(Y = NULL, # unused! Mu = NULL, # unused! Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL, meanstructure = TRUE) { if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } I22 <- 0.5 * lav_matrix_duplication_pre_post(Sigma.inv %x% Sigma.inv) if(meanstructure) { I11 <- Sigma.inv out <- lav_matrix_bdiag(I11, I22) } else { out <- I22 } out } # 5b: unit observed information h0 lav_mvnorm_information_observed_data <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- NROW(Y) # sample statistics sample.mean <- colMeans(Y) sample.cov <- 1/N*crossprod(Y) - tcrossprod(sample.mean) lav_mvnorm_information_observed_samplestats(sample.mean = sample.mean, sample.cov = sample.cov, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) } # 5b-bis: observed information h0 from sample statistics lav_mvnorm_information_observed_samplestats <- function(sample.mean = NULL, sample.cov = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { sample.mean <- as.numeric(sample.mean); Mu <- as.numeric(Mu) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } W.tilde <- sample.cov + tcrossprod(sample.mean - Mu) I11 <- Sigma.inv I21 <- lav_matrix_duplication_pre( (Sigma.inv %*% (sample.mean - Mu)) %x% Sigma.inv ) I12 <- t(I21) AAA <- Sigma.inv %*% (2*W.tilde - Sigma) %*% Sigma.inv I22 <- (1/2) * lav_matrix_duplication_pre_post(Sigma.inv %x% AAA) rbind( cbind(I11, I12), cbind(I21, I22) ) } # 5c: unit first-order information h0 lav_mvnorm_information_firstorder <- function(Y = NULL, Mu = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL, meanstructure = TRUE) { N <- NROW(Y) if(meanstructure) { SC <- lav_mvnorm_scores_mu_vech_sigma(Y = Y, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) } else { SC <- lav_mvnorm_scores_vech_sigma(Y = Y, Mu = Mu, Sigma = Sigma, Sinv.method = Sinv.method, Sigma.inv = Sigma.inv) } crossprod(SC)/N } # 6: inverted information h0 # 6a: inverted unit expected information h0 Mu and vech(Sigma) lav_mvnorm_inverted_information_expected <- function(Y = NULL, # unused! Mu = NULL, # unused! Sigma = NULL, meanstructure = TRUE) { I22 <- 2 * lav_matrix_duplication_ginv_pre_post(Sigma %x% Sigma) if(meanstructure) { I11 <- Sigma out <- lav_matrix_bdiag(I11, I22) } else { out <- I22 } out } # 6b: inverted unit observed information h0 # one could use the inverse of a partitioned matrix, but that does not # seem to help much... unless we can find an expression for solve(I22) # 6c: inverted unit first-order information h0 # / # 7) ACOV h0 mu + vech(Sigma) # not implemented, as too trivial # 7a: 1/N * inverted expected information # 7b: 1/N * inverted observed information # 7c: 1/N * inverted first-order information # 7d: sandwich acov lavaan/R/lav_bootstrap_lrt.R0000644000176200001440000003376212743731003015613 0ustar liggesusersbootstrapLRT <- function (h0 = NULL, h1 = NULL, R = 1000L, type = "bollen.stine", verbose = FALSE, return.LRT = FALSE, double.bootstrap = "no", double.bootstrap.R = 500L, double.bootstrap.alpha = 0.05, warn = -1L, parallel = c("no", "multicore", "snow"), ncpus = 1L, cl = NULL, iseed = NULL) { # checks type <- tolower(type) stopifnot(inherits(h0, "lavaan"), inherits(h1, "lavaan"), type %in% c("bollen.stine", "parametric", "yuan", "nonparametric", "ordinary"), double.bootstrap %in% c("no", "FDB", "standard")) if(type == "nonparametric") type <- "ordinary" # check for conditional.x = TRUE if(h0@Model@conditional.x) { stop("lavaan ERROR: this function is not (yet) available if conditional.x = TRUE") } old_options <- options(); options(warn = warn) # prepare LRT <- rep(as.numeric(NA), R) if((h1@optim$fx - h0@optim$fx) > (.Machine$double.eps * 10)) { # restricted fit should not be better! cat(" ... h0@optim$fx = ", h0@optim$fx, "h1@optim$fx = ", h1@optim$fx, "h0 should not be better!\n") options(old_options) return(NULL) } LRT.original <- abs(anova(h0, h1)$`Chisq diff`[2L]) # abs only needed because df may be the same for both models! if(double.bootstrap == "FDB") { LRT.2 <- numeric(R) } else if(double.bootstrap == "standard") { plugin.pvalues <- numeric(R) } # prepare for parallel processing if(missing(parallel)) parallel <- "no" parallel <- match.arg(parallel) have_mc <- have_snow <- FALSE if(parallel != "no" && ncpus > 1L) { if(parallel == "multicore") have_mc <- .Platform$OS.type != "windows" else if(parallel == "snow") have_snow <- TRUE if(!have_mc && !have_snow) ncpus <- 1L } #data data <- h0@Data #Compute covariance matrix and additional mean vector if(type == "bollen.stine" || type == "parametric" || type == "yuan") { Sigma.hat <- computeSigmaHat(lavmodel = h0@Model) Mu.hat <- computeMuHat(lavmodel = h0@Model) } # can we use the original data, or do we need to transform it first? if(type == "bollen.stine" || type == "yuan") { # check if data is complete if(h0@Options$missing != "listwise") stop("lavaan ERROR: bollen.stine/yuan bootstrap not available for missing data") dataX <- vector("list", length=data@ngroups) } else { dataX <- data@X } #Bollen-Stine data transformation if(type == "bollen.stine") { for(g in 1:h0@Data@ngroups) { sigma.sqrt <- lav_matrix_symmetric_sqrt( Sigma.hat[[g]]) S.inv.sqrt <- lav_matrix_symmetric_sqrt(h0@SampleStats@icov[[g]]) # center X <- scale(data@X[[g]], center = TRUE, scale = FALSE) # transform X <- X %*% S.inv.sqrt %*% sigma.sqrt # add model based mean if (h0@Model@meanstructure) X <- scale(X, center = (-1 * Mu.hat[[g]]), scale = FALSE) # transformed data dataX[[g]] <- X } } # Yuan et al data transformation if(type == "yuan") { # page numbers refer to Yuan et al, 2007 # Define a function to find appropriate value of a # (p. 272) g.a <- function(a, Sigmahat, Sigmahat.inv, S, tau.hat, p){ S.a <- a*S + (1-a)*Sigmahat tmp.term <- S.a %*% Sigmahat.inv res1 <- (sum(diag(tmp.term)) - log(det(tmp.term)) - p) - tau.hat res <- res1*res1 # From p 272 attr(res, "gradient") <- sum(diag((S - Sigmahat) %*% (Sigmahat.inv - chol2inv(chol(S.a))))) res } # Now use g.a within each group for(g in 1:h0@Data@ngroups) { S <- h0@SampleStats@cov[[g]] # test is in Fit slot ghat <- h0@test[[1]]$stat.group[[g]] df <- h0@test[[1]]$df Sigmahat <- Sigma.hat[[g]] Sigmahat.inv <- inv.chol(Sigmahat) nmv <- nrow(Sigmahat) n <- data@nobs[[g]] # Calculate tauhat_1, middle p. 267. # Yuan et al note that tauhat_1 could be negative; # if so, we need to let S.a = Sigmahat. (see middle p 275) tau.hat <- (ghat - df)/(n-1) if (tau.hat >= 0){ # Find a to minimize g.a a <- optimize(g.a, c(0,1), Sigmahat, Sigmahat.inv, S, tau.hat, nmv)$minimum # Calculate S_a (p. 267) S.a <- a*S + (1-a)*Sigmahat } else { S.a <- Sigmahat } # Transform the data (p. 263) S.a.sqrt <- lav_matrix_symmetric_sqrt(S.a) S.inv.sqrt <- lav_matrix_symmetric_sqrt(h0@SampleStats@icov[[g]]) X <- data@X[[g]] X <- X %*% S.inv.sqrt %*% S.a.sqrt # transformed data dataX[[g]] <- X } } # run bootstraps fn <- function(b) { if (type == "bollen.stine" || type == "ordinary" || type == "yuan") { # take a bootstrap sample for each group for(g in 1:h0@Data@ngroups) { stopifnot(h0@SampleStats@nobs[[g]] > 1L) boot.idx <- sample(x = h0@SampleStats@nobs[[g]], size = h0@SampleStats@nobs[[g]], replace = TRUE) dataX[[g]] <- dataX[[g]][boot.idx,,drop=FALSE] } } else { # parametric! for(g in 1:h0@Data@ngroups) { dataX[[g]] <- MASS::mvrnorm(n = h0@SampleStats@nobs[[g]], mu = Mu.hat[[g]], Sigma = Sigma.hat[[g]]) } } # verbose if (verbose) cat(" ... bootstrap draw number: ", b, "\n") #Get sample statistics bootSampleStats <- try(lav_samplestats_from_data( lavdata = NULL, DataX = dataX, DataOv = data@ov, DataOvnames = data@ov.names, missing = h0@Options$missing, rescale = (h0@Options$estimator == "ML" && h0@Options$likelihood =="normal"), estimator = h0@Options$estimator, mimic = h0@Options$mimic, meanstructure = h0@Options$meanstructure, conditional.x = h0@Options$conditional.x, group.w.free = h0@Options$group.w.free, missing.h1 = TRUE, verbose = FALSE), silent=TRUE) if (inherits(bootSampleStats, "try-error")) { if (verbose) cat(" FAILED: creating h0@SampleStats statistics\n") options(old_options) return(NULL) } # just in case we need the new X in the data slot (lm!) data@X <- dataX if (verbose) cat(" ... ... model h0: ") h0@Options$verbose <- FALSE h0@Options$se <- "none" h0@Options$test <- "standard" #Fit h0 model fit.h0 <- lavaan(slotOptions = h0@Options, slotParTable = h0@ParTable, slotSampleStats = bootSampleStats, slotData = data) if (!fit.h0@optim$converged) { if (verbose) cat(" FAILED: no convergence\n") options(old_options) return(NULL) } if (verbose) cat(" ok -- niter = ", fit.h0@optim$iterations, " fx = ", fit.h0@optim$fx, "\n") if (verbose) cat(" ... ... model h1: ") h1@Options$verbose <- FALSE h1@Options$se <- "none" h1@Options$test <- "standard" #Fit h1 model fit.h1 <- lavaan(slotOptions = h1@Options, slotParTable = h1@ParTable, slotSampleStats = bootSampleStats, slotData = data) if (!fit.h1@optim$converged) { if (verbose) cat(" FAILED: no convergence -- niter = ", fit.h1@optim$iterations, " fx = ", fit.h1@optim$fx,"\n") options(old_options) return(NULL) } if (verbose) cat(" ok -- niter = ", fit.h1@optim$iterations, " fx = ", fit.h1@optim$fx, "\n") # store LRT if((fit.h1@optim$fx - fit.h0@optim$fx) > (.Machine$double.eps * 10)) { #if((fit.h1@optim$fx - fit.h0@optim$fx) > 0.0) { if (verbose) cat(" ... ... LRT = h0 > h1, delta = ", fit.h1@optim$fx - fit.h0@optim$fx, "\n") options(old_options) return(NULL) } else { lrt.boot <- abs(anova(fit.h1, fit.h0)$`Chisq diff`[2L]) if (verbose) cat(" ... ... LRT = ", lrt.boot, "\n") } #double bootstrap if (double.bootstrap == "standard") { if (verbose) cat(" ... ... calibrating p.value - ") plugin.pvalue <- bootstrapLRT(h0 = fit.h0, h1 = fit.h1, R = double.bootstrap.R, type = type, verbose = FALSE, return.LRT = FALSE, #FALSE warn = warn, parallel = parallel, ncpus = ncpus, cl = cl, double.bootstrap = "no") if (verbose) cat(sprintf("%5.3f", plugin.pvalue), "\n") attr(lrt.boot, "plugin.pvalue") <- plugin.pvalue } else if (double.bootstrap == "FDB") { #Fast double bootstrap plugin.pvalue <- bootstrapLRT(h0 = fit.h0, h1 = fit.h1, R = 1L, type = type, verbose = FALSE, warn = warn, return.LRT = TRUE, #TRUE parallel = parallel, ncpus = ncpus, cl = cl, double.bootstrap = "no") LRT.2 <- attr(plugin.pvalue, "LRT") if (verbose) cat(" ... ... LRT2 = ", LRT.2, "\n") attr(lrt.boot, "LRT.2") <- LRT.2 } lrt.boot } #Parallel processing RR <- sum(R) res <- if (ncpus > 1L && (have_mc || have_snow)) { if (have_mc) { parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus) } else if (have_snow) { if (is.null(cl)) { cl <- parallel::makePSOCKcluster(rep("localhost", ncpus)) # if (RNGkind()[1L] == "L'Ecuyer-CMRG") parallel::clusterSetRNGStream(cl, iseed = iseed) # res <- parallel::parLapply(cl, seq_len(RR), fn) # parallel::stopCluster(cl) # res } else parallel::parLapply(cl, seq_len(RR), fn) } } else lapply(seq_len(RR), fn) error.idx <- integer(0) for (b in seq_len(RR)) { if (!is.null(res[[b]])) { LRT[b] <- res[[b]] if (double.bootstrap == "standard") { plugin.pvalues[b] <- attr(res[[b]], "plugin.pvalue") } else if (double.bootstrap == "FDB") { LRT.2[b] <- attr(res[[b]], "LRT.2") } } else { error.idx <- c(error.idx, b) } } #Error handling if (length(error.idx) > 0L) { warning("lavaan WARNING: only ", (R - length(error.idx)), " bootstrap draws were successful") LRT <- LRT[-error.idx] if(length(LRT) == 0) LRT <- as.numeric(NA) if (double.bootstrap == "standard") { plugin.pvalues <- plugin.pvalues[-error.idx] attr(LRT, "error.idx") <- error.idx } if (double.bootstrap == "FDB") { LRT.2 <- LRT.2[-error.idx] attr(LRT.2, "error.idx") <- error.idx } } else { if (verbose) cat("Number of successful bootstrap draws:", (R - length(error.idx)), "\n") } pvalue <- sum(LRT > LRT.original) / length(LRT) if (return.LRT) { attr(pvalue, "LRT.original") <- LRT.original attr(pvalue, "LRT") <- LRT } if (double.bootstrap == "FDB") { Q <- (1 - pvalue) lrt.q <- quantile(LRT.2, Q, na.rm = TRUE) adj.pvalue <- sum(LRT > lrt.q) / length(LRT) attr(pvalue, "lrt.q") <- lrt.q attr(pvalue, "adj.pvalue") <- adj.pvalue if (return.LRT) { attr(pvalue, "LRT.original") <- LRT.original attr(pvalue, "LRT") <- LRT attr(pvalue, "LRT2") <- LRT.2 } } else if (double.bootstrap == "standard") { adj.alpha <- quantile(plugin.pvalues, double.bootstrap.alpha, na.rm=TRUE) attr(pvalue, "adj.alpha") <- adj.alpha adj.pvalue <- sum(plugin.pvalues < pvalue) / length(plugin.pvalues) attr(pvalue, "plugin.pvalues") <- plugin.pvalues attr(pvalue, "adj.pvalue") <- adj.pvalue } # restore options options(old_options) pvalue } lavaan/R/lav_tables.R0000644000176200001440000012557213046374253014177 0ustar liggesusers# construct 1D, 2D or pattern-based frequency tables # YR. 10 April 2013 # Notes: # - we do NOT make a distinction here between unordered and ordered categorical # variables # - object can be a matrix (most likely with integers), a full data frame, # a fitted lavaan object, or a lavData object # - 11 May 2013: added collapse=TRUE, min.std.resid options (suggested # by Myrsini Katsikatsou # - 11 June 2013: added dimension, to get one-way and two-way (three-way?) # tables # - 20 Sept 2013: - allow for sample-based or model-based cell probabilities # re-organize/re-name to provide a more consistent interface # rows in the output can be either: cells, tables or patterns # - dimension=0 equals type="pattern # - collapse=TRUE is replaced by type="table" # - changed names of statistics: std.resid is now GR.average # - added many more statistics; some based on the model, some # on the unrestricted model # - 8 Nov 2013: - skip empty cells for G2, instead of adding 0.5 to obs # - 7 Feb 2016: - take care of conditional.x = TRUE lavTables <- function(object, # what type of table? dimension = 2L, type = "cells", # if raw data, additional attributes categorical = NULL, group = NULL, # which statistics / fit indices? statistic = "default", G2.min = 3.0, # needed for G2.{p/n}large X2.min = 3.0, # needed for X2.{p/n}large # pvalues for statistics? p.value = FALSE, # Bonferonni # alpha.adj = FALSE, # output format output = "data.frame", patternAsString = TRUE) { # check input if(! (dimension == 0L || dimension == 1L || dimension == 2L) ) { stop("lavaan ERROR: dimension must be 0, 1 or 2 for pattern, one-way or two-way tables") } stopifnot(type %in% c("cells", "table", "pattern")) if(type == "pattern") { dimension <- 0L } # extract or create lavdata lavdata <- lavData(object, ordered = categorical, group = group) # is 'object' a lavaan object? lavobject <- NULL if(inherits(object, "lavaan")) { lavobject <- object } # case 1: response patterns if(dimension == 0L) { out <- lav_tables_pattern(lavobject = lavobject, lavdata = lavdata, statistic = statistic, patternAsString = patternAsString) # output format if(output == "data.frame") { class(out) <- c("lavaan.data.frame", "data.frame") } else { warning("lavaan WARNING: output option `", output, "' is not available; ignored.") } # case 2: one-way/univariate } else if(dimension == 1L) { out <- lav_tables_oneway(lavobject = lavobject, lavdata = lavdata, statistic = statistic) # output format if(output == "data.frame") { class(out) <- c("lavaan.data.frame", "data.frame") } else { warning("lavaan WARNING: output option `", output, "' is not available; ignored.") } # case 3a: two-way/pairwise/bivariate + cells } else if(dimension == 2L && type == "cells") { out <- lav_tables_pairwise_cells(lavobject = lavobject, lavdata = lavdata, statistic = statistic) # output format if(output == "data.frame") { class(out) <- c("lavaan.data.frame", "data.frame") } else if(output == "table") { out <- lav_tables_cells_format(out, lavdata = lavdata) } else { warning("lavaan WARNING: output option `", output, "' is not available; ignored.") } # case 3b: two-way/pairwise/bivariate + collapsed table } else if(dimension == 2L && (type == "table" || type == "tables")) { out <- lav_tables_pairwise_table(lavobject = lavobject, lavdata = lavdata, statistic = statistic, G2.min = G2.min, X2.min = X2.min, p.value = p.value) # output format if(output == "data.frame") { class(out) <- c("lavaan.data.frame", "data.frame") } else if(output == "table") { out <- lav_tables_table_format(out, lavdata = lavdata, lavobject = lavobject) } else { warning("lavaan WARNING: output option `", output, "' is not available; ignored.") } } if( (is.data.frame(out) && nrow(out) == 0L) || (is.list(out) && length(out) == 0L)) { # empty table (perhaps, no categorical variables) return(invisible(out)) } out } # shortcut, always dim=2, type="cells" #lavTablesFit <- function(object, # # if raw data, additional attributes # categorical = NULL, # group = NULL, # # which statistics / fit indices? # statistic = "default", # G2.min = 3.0, # X2.min = 3.0, # # pvalues for statistics? # p.value = FALSE, # # output format # output = "data.frame") { # # lavTables(object = object, dimension = 2L, type = "table", # categorical = categorical, group = group, # statistic = statistic, # G2.min = G2.min, X2.min = X2.min, p.value = p.value, # output = output, patternAsString = FALSE) #} #lavTables1D <- function(object, # # if raw data, additional attributes # categorical = NULL, # group = NULL, # # which statistics / fit indices? # statistic = "default", # # output format # output = "data.frame") { # # lavTables(object = object, dimension = 1L, # categorical = categorical, group = group, # statistic = statistic, p.value = FALSE, # output = output, patternAsString = FALSE) #} lav_tables_pattern <- function(lavobject = NULL, lavdata = NULL, statistic = NULL, patternAsString = TRUE) { # this only works if we have 'categorical' variables cat.idx <- which(lavdata@ov$type %in% c("ordered","factor")) if(length(cat.idx) == 0L) { warning("lavaan WARNING: no categorical variables are found") return(data.frame(pattern=character(0L), nobs=integer(0L), obs.freq=integer(0L), obs.prop=numeric(0L))) } # no support yet for mixture of endogenous ordered + numeric variables if(!is.null(lavobject) && length(lavNames(lavobject, "ov.nox")) > length(cat.idx)) { warning("lavaan WARNING: some endogenous variables are not categorical") return(data.frame(pattern=character(0L), nobs=integer(0L), obs.freq=integer(0L), obs.prop=numeric(0L))) } # default statistics if(!is.null(lavobject)) { if(length(statistic) == 1L && statistic == "default") { statistic <- c("G2", "X2") } else { stopifnot(statistic %in% c("G2.un", "X2.un", "G2", "X2")) } } else { # only data if(length(statistic) == 1L && statistic == "default") { # if data, none by default statistic <- character(0L) } else { stopifnot(statistic %in% c("G2.un", "X2.un")) } } # first, create basic table with response patterns for(g in 1:lavdata@ngroups) { pat <- lav_data_resp_patterns(lavdata@X[[g]])$pat obs.freq <- as.integer( rownames(pat) ) if(patternAsString) { pat <- data.frame(pattern = apply(pat, 1, paste, collapse=""), stringsAsFactors = FALSE) } else { pat <- as.data.frame(pat, stringsAsFactors = FALSE) names(pat) <- lavdata@ov.names[[g]] } #pat$id <- 1:nrow(pat) if(lavdata@ngroups > 1L) { pat$group <- rep(g, nrow(pat)) } NOBS <- sum(obs.freq) pat$nobs <- rep(NOBS, nrow(pat)) pat$obs.freq <- obs.freq rownames(pat) <- NULL if(g == 1L) { out <- pat } else { out <- rbind(out, pat) } } out$obs.prop <- out$obs.freq/out$nobs if(any(c("X2.un", "G2.un") %in% statistic)) { # not a good statistic... we only have uni+bivariate information warning("lavaan WARNING: limited information used for thresholds and correlations; but X2/G2 assumes full information") PI <- lav_tables_resp_pi(lavobject = lavobject, lavdata = lavdata, est = "h1") out$est.prop.un <- unlist(PI) if("G2.un" %in% statistic) { out$G2.un <- lav_tables_stat_G2(out$obs.prop, out$est.prop.un, out$nobs) } if("X2.un" %in% statistic) { out$X2.un <- lav_tables_stat_X2(out$obs.prop, out$est.prop.un, out$nobs) } } if(any(c("X2", "G2") %in% statistic)) { if(lavobject@Options$estimator %in% c("FML")) { # ok, nothing to say } else if(lavobject@Options$estimator %in% c("WLS","DWLS","PML","ULS")) { warning("lavaan WARNING: estimator ", lavobject@Options$estimator, " is not using full information while est.prop is using full information") } else { stop("lavaan ERROR: estimator ", lavobject@Options$estimator, " is not supported.") } PI <- lav_tables_resp_pi(lavobject = lavobject, lavdata = lavdata, est = "h0") out$est.prop <- unlist(PI) if("G2" %in% statistic) { out$G2 <- lav_tables_stat_G2(out$obs.prop, out$est.prop, out$nobs) } if("X2" %in% statistic) { out$X2 <- lav_tables_stat_X2(out$obs.prop, out$est.prop, out$nobs) } } # remove nobs? # out$nobs <- NULL out } # pairwise tables, rows = table cells lav_tables_pairwise_cells <- function(lavobject = NULL, lavdata = NULL, statistic = character(0L)) { # this only works if we have at least two 'categorical' variables cat.idx <- which(lavdata@ov$type %in% c("ordered","factor")) if(length(cat.idx) == 0L) { warning("lavaan WARNING: no categorical variables are found") return(data.frame(id=integer(0L), lhs=character(0L), rhs=character(0L), nobs=integer(0L), row=integer(0L), col=integer(0L), obs.freq=integer(0L), obs.prop=numeric(0L))) } if(length(cat.idx) == 1L) { warning("lavaan WARNING: at least two categorical variables are needed") return(data.frame(id=integer(0L), lhs=character(0L), rhs=character(0L), nobs=integer(0L), row=integer(0L), col=integer(0L), obs.freq=integer(0L), obs.prop=numeric(0L))) } # default statistics if(!is.null(lavobject)) { if(length(statistic) == 1L && statistic == "default") { statistic <- c("X2") } else { stopifnot(statistic %in% c("cor", "th", "X2","G2", "cor.un", "th.un", "X2.un","G2.un")) } } else { if(length(statistic) == 1L && statistic == "default") { # if data, none by default statistic <- character(0L) } else { stopifnot(statistic %in% c("cor.un", "th.un", "X2.un","G2.un")) } } # initial table, observed cell frequencies out <- lav_tables_pairwise_freq_cell(lavdata = lavdata, as.data.frame. = TRUE) out$obs.prop <- out$obs.freq/out$nobs if(any(c("cor.un", "th.un", "X2.un", "G2.un") %in% statistic)) { PI <- lav_tables_pairwise_sample_pi(lavobject = lavobject, lavdata = lavdata) out$est.prop.un <- unlist(PI) if("G2.un" %in% statistic) { out$G2.un <- lav_tables_stat_G2(out$obs.prop, out$est.prop.un, out$nobs) } if("X2.un" %in% statistic) { out$X2.un <- lav_tables_stat_X2(out$obs.prop, out$est.prop.un, out$nobs) } if("cor.un" %in% statistic) { COR <- attr(PI, "COR") cor.all <- unlist(lapply(COR, function(x) x[lower.tri(x, diag=FALSE)])) out$cor.un <- cor.all[out$id] } } if(any(c("cor", "th", "X2", "G2") %in% statistic)) { PI <- lav_tables_pairwise_model_pi(lavobject = lavobject) out$est.prop <- unlist(PI) if("G2" %in% statistic) { out$G2 <- lav_tables_stat_G2(out$obs.prop, out$est.prop, out$nobs) } if("X2" %in% statistic) { out$X2 <- lav_tables_stat_X2(out$obs.prop, out$est.prop, out$nobs) } if("cor" %in% statistic) { COR <- attr(PI, "COR") cor.all <- unlist(lapply(COR, function(x) x[lower.tri(x, diag=FALSE)])) out$cor <- cor.all[out$id] } } out } # G2 statistic lav_tables_stat_G2 <- function(obs.prop = NULL, est.prop = NULL, nobs = NULL) { # not defined if out$obs.prop is (close to) zero zero.idx <- which(obs.prop < .Machine$double.eps) if(length(zero.idx)) { obs.prop[zero.idx] <- as.numeric(NA) } # the usual G2 formula G2 <- 2*nobs*(obs.prop*log(obs.prop/est.prop)) G2 } # X2 (aka X2) statistic lav_tables_stat_X2 <- function(obs.prop = NULL, est.prop = NULL, nobs = NULL) { res.prop <- obs.prop-est.prop X2 <- nobs*(res.prop*res.prop)/est.prop X2 } # pairwise tables, rows = tables lav_tables_pairwise_table <- function(lavobject = NULL, lavdata = NULL, statistic = character(0L), G2.min = 3.0, X2.min = 3.0, p.value = FALSE) { # default statistics if(!is.null(lavobject)) { if(length(statistic) == 1L && statistic == "default") { statistic <- c("X2", "X2.average") } else { stopifnot(statistic %in% c("X2","G2","X2.un","G2.un", "cor", "cor.un", "RMSEA.un", "RMSEA", "G2.average", "G2.nlarge", "G2.plarge", "X2.average", "X2.nlarge", "X2.plarge")) } } else { if(length(statistic) == 1L && statistic == "default") { # if data, none by default statistic <- character(0L) } else { stopifnot(statistic %in% c("cor.un", "X2.un","G2.un", "RMSEA.un")) } } # identify 'categorical' variables #cat.idx <- which(lavdata@ov$type %in% c("ordered","factor")) # pairwise tables #pairwise.tables <- utils::combn(vartable$name[cat.idx], m=2L) #pairwise.tables <- rbind(seq_len(ncol(pairwise.tables)), # pairwise.tables) #ntables <- ncol(pairwise.tables) # initial table, observed cell frequencies #out <- as.data.frame(t(pairwise.tables)) #names(out) <- c("id", "lhs", "rhs") # collapse approach stat.cell <- character(0) if(any(c("G2","G2.average","G2.plarge","G2.nlarge") %in% statistic)) { stat.cell <- c(stat.cell, "G2") } if(any(c("X2","X2.average","X2.plarge","X2.nlarge") %in% statistic)) { stat.cell <- c(stat.cell, "X2") } if("G2" %in% statistic || "RMSEA" %in% statistic) { stat.cell <- c(stat.cell, "G2") } if("X2.un" %in% statistic) { stat.cell <- c(stat.cell, "X2.un") } if("G2.un" %in% statistic || "RMSEA.un" %in% statistic) { stat.cell <- c(stat.cell, "G2.un") } if("cor.un" %in% statistic) { stat.cell <- c(stat.cell, "cor.un") } if("cor" %in% statistic) { stat.cell <- c(stat.cell, "cor") } # get table with table cells out.cell <- lav_tables_pairwise_cells(lavobject = lavobject, lavdata = lavdata, statistic = stat.cell) # only 1 row per table row.idx <- which(!duplicated(out.cell$id)) if(is.null(out.cell$group)) { out <- out.cell[row.idx,c("lhs","rhs","nobs"),drop=FALSE] } else { out <- out.cell[row.idx,c("lhs","rhs","group", "nobs"),drop=FALSE] } # df if(length(statistic) > 0L) { nrow <- tapply(out.cell$row, INDEX=out.cell$id, FUN=max) ncol <- tapply(out.cell$col, INDEX=out.cell$id, FUN=max) out$df <- nrow*ncol - nrow - ncol } # cor if("cor" %in% statistic) { out$cor <- out.cell[row.idx, "cor"] } # cor.un if("cor.un" %in% statistic) { out$cor.un <- out.cell[row.idx, "cor.un"] } # X2 if("X2" %in% statistic) { out$X2 <- tapply(out.cell$X2, INDEX=out.cell$id, FUN=sum, na.rm=TRUE) if(p.value) { out$X2.pval <- pchisq(out$X2, df=out$df, lower.tail=FALSE) } } if("X2.un" %in% statistic) { out$X2.un <- tapply(out.cell$X2.un, INDEX=out.cell$id, FUN=sum, na.rm=TRUE) if(p.value) { out$X2.un.pval <- pchisq(out$X2.un, df=out$df, lower.tail=FALSE) } } # G2 if("G2" %in% statistic) { out$G2 <- tapply(out.cell$G2, INDEX=out.cell$id, FUN=sum, na.rm=TRUE) if(p.value) { out$G2.pval <- pchisq(out$G2, df=out$df, lower.tail=FALSE) } } if("G2.un" %in% statistic) { out$G2.un <- tapply(out.cell$G2.un, INDEX=out.cell$id, FUN=sum, na.rm=TRUE) if(p.value) { out$G2.un.pval <- pchisq(out$G2.un, df=out$df, lower.tail=FALSE) } } if("RMSEA" %in% statistic) { G2 <- tapply(out.cell$G2, INDEX=out.cell$id, FUN=sum, na.rm=TRUE) # note: there seems to be a mistake in Appendix 1 eqs 43/44 of Joreskog # SSI paper (2005) 'SEM with ordinal variables using LISREL' # 2*N*d should N*d out$RMSEA <- sqrt( pmax(0, (G2 - out$df)/ (out$nobs*out$df) ) ) if(p.value) { # note: MUST use 1 - pchisq (instead of lower.tail = FALSE) # because for ncp > 80, routine only computes lower tail out$RMSEA.pval <- 1.0 - pchisq(G2, ncp = 0.1*0.1*out$nobs*out$df, df=out$df, lower.tail = TRUE) } } if("RMSEA.un" %in% statistic) { G2 <- tapply(out.cell$G2.un, INDEX=out.cell$id, FUN=sum, na.rm=TRUE) # note: there seems to be a mistake in Appendix 1 eqs 43/44 of Joreskog # SSI paper (2005) 'SEM with ordinal variables using LISREL' # 2*N*d should N*d out$RMSEA.un <- sqrt( pmax(0, (G2 - out$df)/ (out$nobs*out$df) ) ) if(p.value) { # note: MUST use 1 - pchisq (instead of lower.tail = FALSE) # because for ncp > 80, routine only computes lower tail out$RMSEA.un.pval <- 1.0 - pchisq(G2, ncp = 0.1*0.1*out$nobs*out$df, df=out$df, lower.tail = TRUE) } } if("G2.average" %in% statistic) { out$G2.average <- tapply(out.cell$G2, INDEX=out.cell$id, FUN=mean, na.rm=TRUE) } if("G2.nlarge" %in% statistic) { out$G2.min <- rep(G2.min, length(out$lhs)) out$G2.nlarge <- tapply(out.cell$G2, INDEX=out.cell$id, FUN=function(x) sum(x > G2.min, na.rm=TRUE) ) } if("G2.plarge" %in% statistic) { out$G2.min <- rep(G2.min, length(out$lhs)) out$G2.plarge <- tapply(out.cell$G2, INDEX=out.cell$id, FUN=function(x) sum(x > G2.min, na.rm=TRUE)/length(x) ) } if("X2.average" %in% statistic) { out$X2.average <- tapply(out.cell$X2, INDEX=out.cell$id, FUN=mean, na.rm=TRUE) } if("X2.nlarge" %in% statistic) { out$X2.min <- rep(X2.min, length(out$lhs)) out$X2.nlarge <- tapply(out.cell$X2, INDEX=out.cell$id, FUN=function(x) sum(x > X2.min, na.rm=TRUE) ) } if("X2.plarge" %in% statistic) { out$X2.min <- rep(X2.min, length(out$lhs)) out$X2.plarge <- tapply(out.cell$X2, INDEX=out.cell$id, FUN=function(x) sum(x > X2.min, na.rm=TRUE)/length(x) ) } out } lav_tables_oneway <- function(lavobject = NULL, lavdata = NULL, statistic = NULL) { # shortcuts vartable <- lavdata@ov X <- lavdata@X # identify 'categorical' variables cat.idx <- which(vartable$type %in% c("ordered","factor")) ncat <- length(cat.idx) # do we have any categorical variables? if(length(cat.idx) == 0L) { warning("lavaan WARNING: no categorical variables are found") return(data.frame(id=integer(0L), lhs=character(0L), rhs=character(0L), nobs=integer(0L), obs.freq=integer(0L), obs.prop=numeric(0L), est.prop=numeric(0L), X2=numeric(0L))) } else { labels <- strsplit(vartable$lnam[cat.idx], "\\|") } # ok, we have an overview of all categorical variables in the data ngroups <- length(X) # for each group, for each categorical variable, collect information TABLES <- vector("list", length=ngroups) for(g in 1:ngroups) { TABLES[[g]] <- lapply(seq_len(ncat), FUN=function(x) { idx <- cat.idx[x] nrow <- vartable$nlev[idx] ncell<- nrow nvar <- length(lavdata@ov.names[[g]]) id <- (g-1)*nvar + x # compute observed frequencies FREQ <- tabulate(X[[g]][,idx], nbins = ncell) list( id = rep.int(id, ncell), lhs = rep.int(vartable$name[idx], ncell), # op = rep.int("freq", ncell), rhs = labels[[x]], group = rep.int(g, ncell), nobs = rep.int(sum(FREQ), ncell), obs.freq = FREQ, obs.prop = FREQ/sum(FREQ) ) }) } for(g in 1:ngroups) { TABLE <- TABLES[[g]] TABLE <- lapply(TABLE, as.data.frame, stringsAsFactors=FALSE) if(g == 1L) { out <- do.call(rbind, TABLE) } else { out <- rbind(out, do.call(rbind, TABLE)) } } if(g == 1) { # remove group column out$group <- NULL } # default statistics if(!is.null(lavobject)) { if(length(statistic) == 1L && statistic == "default") { statistic <- c("X2") } else { stopifnot(statistic %in% c("th.un", "th", "G2", "X2")) } # sample based # note, there is no G2.un or X2.un: always saturated! if("th.un" %in% statistic) { # sample based th <- unlist(lapply(1:lavdata@ngroups, function(x) { TH <- lavobject@SampleStats@th[[x]][ lavobject@SampleStats@th.idx[[x]] > 0 ] TH.IDX <- lavobject@SampleStats@th.idx[[x]][ lavobject@SampleStats@th.idx[[x]] > 0 ] unname(unlist(tapply(TH, INDEX=TH.IDX, function(y) c(y,Inf)))) })) # overwrite obs.prop # NOTE: if we have exogenous variables, obs.prop will NOT # correspond with qnorm(th) out$obs.prop <- unname(unlist(tapply(th, INDEX=out$id, FUN=function(x) (pnorm(c(x,Inf)) - pnorm(c(-Inf,x)))[-(length(x)+1)] ))) out$th.un <- th } # model based if(any(c("th","G2","X2") %in% statistic)) { # model based th.h0 <- unlist(lapply(1:lavdata@ngroups, function(x) { if(lavobject@Model@conditional.x) { TH <- lavobject@implied$res.th[[x]][ lavobject@SampleStats@th.idx[[x]] > 0 ] } else { TH <- lavobject@implied$th[[x]][ lavobject@SampleStats@th.idx[[x]] > 0 ] } TH.IDX <- lavobject@SampleStats@th.idx[[x]][ lavobject@SampleStats@th.idx[[x]] > 0 ] unname(unlist(tapply(TH, INDEX=TH.IDX, function(x) c(x,Inf)))) })) est.prop <- unname(unlist(tapply(th.h0, INDEX=out$id, FUN=function(x) (pnorm(c(x,Inf)) - pnorm(c(-Inf,x)))[-(length(x)+1)] ))) out$est.prop <- est.prop if("th" %in% statistic) { out$th <- th.h0 } if("G2" %in% statistic) { out$G2 <- lav_tables_stat_G2(out$obs.prop, out$est.prop, out$nobs) } if("X2" %in% statistic) { out$X2 <- lav_tables_stat_X2(out$obs.prop, out$est.prop, out$nobs) } } } else { if(length(statistic) == 1L && statistic == "default") { # if data, none by default statistic <- character(0L) } else { stopifnot(statistic %in% c("th.un")) } if("th.un" %in% statistic) { out$th.un <- unlist(tapply(out$obs.prop, INDEX=out$id, FUN=function(x) qnorm(cumsum(x)))) } } out } # compute pairwise (two-way) frequency tables lav_tables_pairwise_freq_cell <- function(lavdata = NULL, as.data.frame. = TRUE) { # shortcuts vartable <- as.data.frame(lavdata@ov, stringsAsFactors = FALSE) X <- lavdata@X ov.names <- lavdata@ov.names ngroups <- lavdata@ngroups # identify 'categorical' variables cat.idx <- which(vartable$type %in% c("ordered","factor")) # do we have any categorical variables? if(length(cat.idx) == 0L) { stop("lavaan ERROR: no categorical variables are found") } else if(length(cat.idx) == 1L) { stop("lavaan ERROR: at least two categorical variables are needed") } # pairwise tables pairwise.tables <- utils::combn(vartable$name[cat.idx], m=2L) pairwise.tables <- rbind(pairwise.tables, seq_len(ncol(pairwise.tables))) ntables <- ncol(pairwise.tables) # for each group, for each pairwise table, collect information TABLES <- vector("list", length=ngroups) for(g in 1:ngroups) { TABLES[[g]] <- apply(pairwise.tables, MARGIN=2, FUN=function(x) { idx1 <- which(vartable$name == x[1]) idx2 <- which(vartable$name == x[2]) id <- (g-1)*ntables + as.numeric(x[3]) nrow <- vartable$nlev[idx1] ncol <- vartable$nlev[idx2] ncell <- nrow*ncol # compute two-way observed frequencies Y1 <- X[[g]][,idx1] Y2 <- X[[g]][,idx2] # FREQ <- table(Y1, Y2) # we loose missings; useNA is ugly FREQ <- pc_freq(Y1, Y2) list( id = rep.int(id, ncell), lhs = rep.int(x[1], ncell), # op = rep.int("table", ncell), rhs = rep.int(x[2], ncell), group = rep.int(g, ncell), nobs = rep.int(sum(FREQ), ncell), row = rep.int(seq_len(ncol), times=nrow), col = rep(seq_len(nrow), each=ncol), obs.freq = lav_matrix_vec(FREQ) # col by col! ) }) } if(as.data.frame.) { for(g in 1:ngroups) { TABLE <- TABLES[[g]] TABLE <- lapply(TABLE, as.data.frame, stringsAsFactors=FALSE) if(g == 1) { out <- do.call(rbind, TABLE) } else { out <- rbind(out, do.call(rbind, TABLE)) } } if(g == 1) { # remove group column out$group <- NULL } } else { if(ngroups == 1L) { out <- TABLES[[1]] } else { out <- TABLES } } out } # low-level function to compute expected proportions per cell # object lav_tables_pairwise_model_pi <- function(lavobject = NULL) { stopifnot(lavobject@Model@categorical) # shortcuts lavmodel <- lavobject@Model implied <- lavobject@implied ngroups <- lavobject@Data@ngroups ov.types <- lavobject@Data@ov$type th.idx <- lavobject@Model@th.idx Sigma.hat <- if(lavmodel@conditional.x) implied$res.cov else implied$cov TH <- if(lavmodel@conditional.x) implied$res.th else implied$th PI <- vector("list", length=ngroups) for(g in 1:ngroups) { Sigmahat <- Sigma.hat[[g]] cors <- Sigmahat[lower.tri(Sigmahat)] if(any(abs(cors) > 1)) { warning("lavaan WARNING: some model-implied correlations are larger than 1.0") } nvar <- nrow(Sigmahat) # shortcut for all ordered - tablewise if(all(ov.types == "ordered") && !is.null(lavobject@Cache[[g]]$LONG)) { #FREQ.OBS <- c(FREQ.OBS, lavobject@Cache[[g]]$bifreq) LONG2 <- LongVecTH.Rho(no.x = nvar, all.thres = TH[[g]], index.var.of.thres = th.idx[[g]], rho.xixj = cors) # get expected probability per table, per pair PI[[g]] <- pairwiseExpProbVec(ind.vec = lavobject@Cache[[g]]$LONG, th.rho.vec=LONG2) } else { PI.group <- integer(0) # order! first i, then j, lav_matrix_vec(table)! for(i in seq_len(nvar-1L)) { for(j in (i+1L):nvar) { if(ov.types[i] == "ordered" && ov.types[j] == "ordered") { PI.table <- pc_PI(rho = Sigmahat[i,j], th.y1 = TH[[g]][ th.idx[[g]] == i ], th.y2 = TH[[g]][ th.idx[[g]] == j ]) PI.group <- c(PI.group, lav_matrix_vec(PI.table)) } } } PI[[g]] <- PI.group } } # g # add COR/TH/TH.IDX attr(PI, "COR") <- Sigma.hat attr(PI, "TH") <- TH attr(PI, "TH.IDX") <- th.idx PI } # low-level function to compute expected proportions per cell # using sample-based correlations + thresholds # # object can be either lavData or lavaan class lav_tables_pairwise_sample_pi <- function(lavobject = NULL, lavdata = NULL) { # get COR, TH and th.idx if(!is.null(lavobject)) { if(lavobject@Model@conditional.x) { COR <- lavobject@SampleStats@res.cov TH <- lavobject@SampleStats@res.th } else { COR <- lavobject@SampleStats@cov TH <- lavobject@SampleStats@th } TH.IDX <- lavobject@SampleStats@th.idx } else if(!is.null(lavdata)) { fit.un <- lavCor(object = lavdata, se = "none", output = "fit") if(lavobject@Model@conditional.x) { COR <- fit.un@SampleStats@res.cov TH <- fit.un@SampleStats@res.th } else { COR <- fit.un@SampleStats@cov TH <- fit.un@SampleStats@th } TH.IDX <- fit.un@SampleStats@th.idx } else { stop("lavaan ERROR: both lavobject and lavdata are NULL") } lav_tables_pairwise_sample_pi_cor(COR = COR, TH = TH, TH.IDX = TH.IDX) } # low-level function to compute expected proportions per cell lav_tables_pairwise_sample_pi_cor <- function(COR = NULL, TH = NULL, TH.IDX = NULL) { ngroups <- length(COR) PI <- vector("list", length=ngroups) for(g in 1:ngroups) { Sigmahat <- COR[[g]] cors <- Sigmahat[lower.tri(Sigmahat)] if(any(abs(cors) > 1)) { warning("lavaan WARNING: some model-implied correlations are larger than 1.0") } nvar <- nrow(Sigmahat) th.idx <- TH.IDX[[g]] # reconstruct ov.types ov.types <- rep("numeric", nvar) ord.idx <- unique(th.idx[th.idx > 0]) ov.types[ord.idx] <- "ordered" PI.group <- integer(0) # order! first i, then j, lav_matrix_vec(table)! for(i in seq_len(nvar-1L)) { for(j in (i+1L):nvar) { if(ov.types[i] == "ordered" && ov.types[j] == "ordered") { PI.table <- pc_PI(rho = Sigmahat[i,j], th.y1 = TH[[g]][ th.idx == i ], th.y2 = TH[[g]][ th.idx == j ]) PI.group <- c(PI.group, lav_matrix_vec(PI.table)) } } } PI[[g]] <- PI.group } # g # add COR/TH/TH.IDX attr(PI, "COR") <- COR attr(PI, "TH") <- TH attr(PI, "TH.IDX") <- TH.IDX PI } # low-level function to compute expected proportions per PATTERN # using sample-based correlations + thresholds # # object can be either lavData or lavaan class # # only valid if estimator = FML, POM or NOR # lav_tables_resp_pi <- function(lavobject = NULL, lavdata = NULL, est = "h0") { # shortcuts ngroups <- lavdata@ngroups lavmodel <- lavobject@Model implied <- lavobject@implied # h0 or unrestricted? if(est == "h0") { Sigma.hat <- if(lavmodel@conditional.x) implied$res.cov else implied$cov TH <- if(lavmodel@conditional.x) implied$res.th else implied$th TH.IDX <- lavobject@SampleStats@th.idx } else { if(is.null(lavobject)) { fit.un <- lavCor(object = lavdata, se = "none", output = "fit") Sigma.hat <- if(fit.un@Model@conditional.x) fit.un@implied$res.cov else fit.un@implied$cov TH <- if(fit.un@Model@conditional.x) fit.un@implied$res.th else fit.un@implied$th TH.IDX <- fit.un@SampleStats@th.idx } else { if(lavobject@Model@conditional.x) { Sigma.hat <- lavobject@SampleStats@res.cov TH <- lavobject@SampleStats@res.th } else { Sigma.hat <- lavobject@SampleStats@cov TH <- lavobject@SampleStats@th } TH.IDX <- lavobject@SampleStats@th.idx } } PI <- vector("list", length=ngroups) for(g in 1:ngroups) { Sigmahat <- Sigma.hat[[g]] cors <- Sigmahat[lower.tri(Sigmahat)] if(any(abs(cors) > 1)) { warning("lavaan WARNING: some model-implied correlations are larger than 1.0") } nvar <- nrow(Sigmahat) th.idx <- TH.IDX[[g]] MEAN <- rep(0, nvar) # reconstruct ov.types ov.types <- rep("numeric", nvar) ord.idx <- unique(th.idx[th.idx > 0]) ov.types[ord.idx] <- "ordered" if(all(ov.types == "ordered")) { # get patterns ## FIXME GET it if(!is.null(lavdata@Rp[[g]]$pat)) { PAT <- lavdata@Rp[[g]]$pat } else { PAT <- lav_data_resp_patterns( lavdata@X[[g]] )$pat } npatterns <- nrow(PAT) freq <- as.numeric( rownames(PAT) ) PI.group <- numeric(npatterns) TH.VAR <- lapply(1:nvar, function(x) c(-Inf, TH[[g]][th.idx==x], +Inf)) # FIXME!!! ok to set diagonal to 1.0? diag(Sigmahat) <- 1.0 for(r in 1:npatterns) { # compute probability for each pattern lower <- sapply(1:nvar, function(x) TH.VAR[[x]][ PAT[r,x] ]) upper <- sapply(1:nvar, function(x) TH.VAR[[x]][ PAT[r,x] + 1L ]) # handle missing values na.idx <- which(is.na(PAT[r,])) if(length(na.idx) > 0L) { lower <- lower[-na.idx] upper <- upper[-na.idx] MEAN.r <- MEAN[-na.idx] Sigmahat.r <- Sigmahat[-na.idx, -na.idx, drop=FALSE] } else { MEAN.r <- MEAN Sigmahat.r <- Sigmahat } PI.group[r] <- sadmvn(lower, upper, mean=MEAN.r, varcov=Sigmahat.r) } } else { # case-wise PI.group <- rep(as.numeric(NA), lavdata@nobs[[g]]) warning("lavaan WARNING: casewise PI not implemented") } PI[[g]] <- PI.group } # g PI } lav_tables_table_format <- function(out, lavdata = lavdata, lavobject = lavobject) { # determine column we need NAMES <- names(out) stat.idx <- which(NAMES %in% c("cor", "cor.un", "G2", "G2.un", "X2", "X2.un", "RMSEA", "RMSEA.un", "G2.average", "G2.plarge", "G2.nlarge", "X2.average", "X2.plarge", "X2.nlarge")) if(length(stat.idx) == 0) { if(!is.null(out$obs.freq)) { stat.idx <- which(NAMES == "obs.freq") } else if(!is.null(out$nobs)) { stat.idx <- which(NAMES == "nobs") } UNI <- NULL } else if(length(stat.idx) > 1) { stop("lavaan ERROR: more than one statistic for table output: ", paste(NAMES[stat.idx], collapse=" ")) } else { # univariate version of same statistic if(NAMES[stat.idx] == "G2.average") { UNI <- lavTables(lavobject, dimension = 1L, statistic="G2") } else if(NAMES[stat.idx] == "X2.average") { UNI <- lavTables(lavobject, dimension = 1L, statistic="X2") } else { UNI <- NULL } } OUT <- vector("list", length=lavdata@ngroups) for(g in 1:lavdata@ngroups) { if(lavdata@ngroups == 1L) { # no group column STAT <- out[[stat.idx]] } else { STAT <- out[[stat.idx]][ out$group == g ] } RN <- lavdata@ov.names[[g]] OUT[[g]] <- getCov(STAT, diagonal = FALSE, lower = FALSE, names = RN) # change diagonal elements: replace by univariate stat # if possible diag(OUT[[g]]) <- as.numeric(NA) if(!is.null(UNI)) { if(!is.null(UNI$group)) { idx <- which( UNI$group == g ) } else { idx <- 1:length(UNI$lhs) } if(NAMES[stat.idx] == "G2.average") { diag(OUT[[g]]) <- tapply(UNI$G2[idx], INDEX=UNI$id[idx], FUN=mean) } else if(NAMES[stat.idx] == "X2.average") { diag(OUT[[g]]) <- tapply(UNI$X2[idx], INDEX=UNI$id[idx], FUN=mean) } } else if(NAMES[stat.idx] %in% c("cor", "cor.un")) { diag(OUT[[g]]) <- 1 } class(OUT[[g]]) <- c("lavaan.matrix.symmetric", "matrix") } if(lavdata@ngroups > 1L) { names(OUT) <- lavdata@group.label out <- OUT } else { out <- OUT[[1]] } out } lav_tables_cells_format <- function(out, lavdata = lavdata, drop.list.single.group = FALSE) { OUT <- vector("list", length=lavdata@ngroups) if(is.null(out$group)) { out$group <- rep(1L, length(out$lhs)) } # do we have a statistic? # determine column we need NAMES <- names(out) stat.idx <- which(NAMES %in% c("cor", "cor.un", "G2", "G2.un", "X2", "X2.un", "RMSEA", "RMSEA.un", "G2.average", "G2.plarge", "G2.nlarge", "X2.average", "X2.plarge", "X2.nlarge")) if(length(stat.idx) == 0) { statistic <- "obs.freq" } else if(length(stat.idx) > 1) { stop("lavaan ERROR: more than one statistic for table output: ", paste(NAMES[stat.idx], collapse=" ")) } else { statistic <- NAMES[stat.idx] } for(g in 1:lavdata@ngroups) { case.idx <- which( out$group == g ) ID.group <- unique( out$id[ out$group == g] ) TMP <-lapply(ID.group, function(x) { Tx <- out[out$id == x,] M <- matrix(Tx[,statistic], max(Tx$row), max(Tx$col)) rownames(M) <- unique(Tx$row) colnames(M) <- unique(Tx$col) class(M) <- c("lavaan.matrix", "matrix") M }) names(TMP) <- unique(paste(out$lhs[case.idx], out$rhs[case.idx], sep="_")) OUT[[g]] <- TMP } if(lavdata@ngroups == 1L && drop.list.single.group) { OUT <- OUT[[1]] } else { if(length(lavdata@group.label) > 0L) { names(OUT) <- unlist(lavdata@group.label) } } OUT } lavaan/R/lav_test_diff.R0000644000176200001440000003615513053107534014664 0ustar liggesusers# various ways to compute a (scaled) difference chi-square test statistic lav_test_diff_Satorra2000 <- function(m1, m0, H1 = TRUE, A.method = "delta", A = NULL, Satterthwaite = FALSE, debug = FALSE) { # extract information from m1 and m2 T1 <- m1@test[[1]]$stat r1 <- m1@test[[1]]$df T0 <- m0@test[[1]]$stat r0 <- m0@test[[1]]$df # m = difference between the df's m <- r0 - r1 Gamma <- lavTech(m1, "Gamma") # the same for m1 and m0 if(H1) { WLS.V <- lavTech(m1, "WLS.V") PI <- computeDelta(m1@Model) P <- lavTech(m1, "information") # needed? (yes, if H1 already has eq constraints) P.inv <- lav_model_information_augment_invert(m1@Model, information = P, inverted = TRUE) if(inherits(P.inv, "try-error")) { return(list(T.delta = NA, scaling.factor = NA, df.delta = NA)) } #P.inv <- solve(P) # compute 'A' matrix # NOTE: order of parameters may change between H1 and H0, so be # careful! if(is.null(A)) { A <- lav_test_diff_A(m1, m0, method = A.method, reference = "H1") if(debug) print(A) } } else { stop("not ready yet") WLS.V <- lavTech(m0, "WLS.V") PI <- computeDelta(m0@Model) P <- lavTech(m0, "information") # needed? P.inv <- lav_model_information_augment_invert(m0@Model, information = P, inverted = TRUE) if(inherits(P.inv, "try-error")) { return(list(T.delta = NA, scaling.factor = NA, df.delta = NA)) } #P.inv <- solve(P) # compute 'A' matrix # NOTE: order of parameters may change between H1 and H0, so be # careful! if(is.null(A)) { # m1, m0 OR m0, m1 (works for delta, but not for exact) A <- lav_test_diff_A(m1, m0, method = A.method, reference = "H0") if(debug) print(A) } } # compute tr UG per group ngroups <- m1@SampleStats@ngroups UG.group <- vector("list", length=ngroups) # safety check: A %*% P.inv %*% t(A) should NOT contain all-zero # rows/columns APA <- A %*% P.inv %*% t(A) cSums <- colSums(APA) rSums <- rowSums(APA) empty.idx <- which( abs(cSums) < .Machine$double.eps^0.5 & abs(rSums) < .Machine$double.eps ^0.5 ) if(length(empty.idx) > 0) { A <- A[-empty.idx,, drop = FALSE] } # PAAPAAP PAAPAAP <- P.inv %*% t(A) %*% solve(A %*% P.inv %*% t(A)) %*% A %*% P.inv trace.UGamma <- numeric(ngroups) trace.UGamma2 <- numeric(ngroups) for(g in 1:ngroups) { UG.group <- WLS.V[[g]] %*% Gamma[[g]] %*% WLS.V[[g]] %*% PI[[g]] %*% PAAPAAP %*% t(PI[[g]]) trace.UGamma[g] <- sum(diag(UG.group)) if(Satterthwaite) { trace.UGamma2[g] <- sum(diag(UG.group %*% UG.group)) } } # compute scaling factor fg <- unlist(m1@SampleStats@nobs)/m1@SampleStats@ntotal if(Satterthwaite) { cd <- sum(fg * trace.UGamma2) / sum(fg * trace.UGamma) df.delta <- (sum(fg * trace.UGamma))^2 / sum(fg * trace.UGamma2) } else { cd <- 1/m * sum(fg * trace.UGamma) df.delta <- m } # compute scaled difference test T.delta <- (T0 - T1)/cd list(T.delta = T.delta, scaling.factor = cd, df.delta = df.delta) } lav_test_diff_SatorraBentler2001 <- function(m1, m0) { # extract information from m1 and m2 T1 <- m1@test[[1]]$stat r1 <- m1@test[[1]]$df c1 <- m1@test[[2]]$scaling.factor if(r1 == 0) { # saturated model c1 <- 1 } T0 <- m0@test[[1]]$stat r0 <- m0@test[[1]]$df c0 <- m0@test[[2]]$scaling.factor # m = difference between the df's m = r0 - r1 # compute c_d cd <- (r0 * c0 - r1 * c1) / m # warn if cd is negative if(cd < 0) { warning("lavaan WARNING: scaling factor is negative") cd <- as.numeric(NA) } # compute scaled difference test T.delta <- (T0 - T1)/cd list(T.delta = T.delta, scaling.factor = cd, df.delta = m) } lav_test_diff_SatorraBentler2010 <- function(m1, m0, H1 = FALSE) { ### FIXME: check if models are nested at the parameter level!!! # extract information from m1 and m2 T1 <- m1@test[[1]]$stat r1 <- m1@test[[1]]$df c1 <- m1@test[[2]]$scaling.factor if(r1 == 0) { # saturated model c1 <- 1 } T0 <- m0@test[[1]]$stat r0 <- m0@test[[1]]$df c0 <- m0@test[[2]]$scaling.factor if(r0 == 0) { # should never happen c0 <- 1 } # m = difference between the df's m = r0 - r1 # generate `M10' model if(H1) { # M0 with M1 parameters M01 <- lav_test_diff_m10(m0, m1, test = TRUE) c01 <- M01@test[[2]]$scaling.factor # compute c_d # cd.01 <- (r0 * c01 - r1 * c0) / m ??? cd <- (r0 * c0 - r1 * c01) / m } else { # M1 with M0 parameters (as in Satorra & Bentler 2010) M10 <- lav_test_diff_m10(m1, m0, test = TRUE) c10 <- M10@test[[2]]$scaling.factor # compute c_d cd <- (r0 * c0 - r1 * c10) / m } # compute scaled difference test T.delta <- (T0 - T1)/cd list(T.delta = T.delta, scaling.factor = cd, df.delta = m, T.delta.unscaled = (T0 - T1)) } # create a new model 'm10', where we use model 'm1', but we # inject it with the values of 'm0' lav_test_diff_m10 <- function(m1, m0, test = FALSE) { # switch of verbose/se/test Options <- m1@Options Options$verbose <- FALSE # should we compute se/test statistics? if(!test) { Options$se <- "none"; Options$test <- "none" } PT.M0 <- m0@ParTable PT.M1 <- m1@ParTable # `extend' PT.M1 partable to include all `fixed-to-zero parameters' PT.M1.FULL <- lav_partable_full(PT.M1, free = TRUE, start = TRUE) PT.M1.extended <- lav_partable_merge(PT.M1, PT.M1.FULL, remove.duplicated = TRUE, warn = FALSE) # `extend' PT.M0 partable to include all `fixed-to-zero parameters' PT.M0.FULL <- lav_partable_full(PT.M0, free = TRUE, start = TRUE) PT.M0.extended <- lav_partable_merge(PT.M0, PT.M0.FULL, remove.duplicated = TRUE, warn = FALSE) # `extend' PE of M0 to include all `fixed-to-zero parameters' PE.M0 <- parameterEstimates(m0, remove.eq = FALSE, remove.ineq = FALSE, remove.system.eq = FALSE, remove.def = FALSE) PE.M0.FULL <- lav_partable_full(PE.M0) PE.M0.extended <- lav_partable_merge(PE.M0, PE.M0.FULL, remove.duplicated = TRUE, warn = FALSE) # FIXME: # - check if H0 does not contain additional parameters... Options$optim.method = "none" Options$optim.force.converged = TRUE m10 <- lavaan(model = PT.M1.extended, start = PE.M0.extended, slotOptions = Options, slotSampleStats = m1@SampleStats, slotData = m1@Data, slotCache = m1@Cache) m10 } # compute the `A' matrix: the jacobian of the constraint function a(\delta) # (see Satorra 2000) # # # lav_test_diff_A <- function(m1, m0, method = "exact", reference = "H1") { # FIXME!!!! if(method == "exact") { if(reference == "H1") { af <- lav_test_diff_af_h1(m1 = m1, m0 = m0) xx <- m1@optim$x } else { # evaluate under H0 stop("not ready yet") #af <- .test_compute_partable_A_diff_h0(m1 = m1, m0 = m0) xx <- m0@optim$x } A <- try(lav_func_jacobian_complex(func = af, x = xx), silent = TRUE) if(inherits(A, "try-error")) { A <- lav_func_jacobian_simple(func = af, x = xx) } } else if(method == "delta") { # use a numeric approximation of `A' Delta1.list <- computeDelta(m1@Model) Delta0.list <- computeDelta(m0@Model) Delta1 <- do.call(rbind, Delta1.list) Delta0 <- do.call(rbind, Delta0.list) # take into account equality constraints m0 if(m0@Model@eq.constraints) { # the normalization creates a lot of distortion... Delta0 <- Delta0 %*% m0@Model@eq.constraints.K } # take into account equality constraints m1 if(m1@Model@eq.constraints) { # we need a better solution here... warning("lavaan WARNING: H1 contains equality constraints; this routine can not handle this (yet)") } # take into account equality constraints m1 #tDelta1Delta1 <- crossprod(Delta1) #tDelta1Delta1.inv <- # lav_model_information_augment_invert(m1@Model, # information = tDelta1Delta1, # inverted = TRUE) #H <- solve(t(Delta1) %*% Delta1) %*% t(Delta1) %*% Delta0 #H <- tDelta1Delta1.inv %*% t(Delta1) %*% Delta0 ## still wrong? # ## Delta1 not corrected H <- solve(t(Delta1) %*% Delta1) %*% t(Delta1) %*% Delta0 A <- t(lav_matrix_orthogonal_complement(H)) } A } # for each parameter in H1 (m1), see if we have somehow constrained # this parameter under H0 (m0) # # since we work 'under H0', we need to use the labels/constraints/def # as they appear in H0. Unfortunately, the order of the parameters, and # even the (p)labels may be different in the two models... # # Therefore, we will attempt to: # - change the 'order' of the 'free' column in m0, so that they map to # to the 'x' that we will provide from H1 # - the plabels used in "==" constraints must be renamed, if necessary # lav_test_diff_af_h1 <- function(m1, m0) { PT.M0 <- parTable(m0) PT.M1 <- parTable(m1) # select .p*. parameters only M0.p.idx <- which(grepl("\\.p", PT.M0$plabel)); np0 <- length(M0.p.idx) M1.p.idx <- which(grepl("\\.p", PT.M1$plabel)); np1 <- length(M1.p.idx) # check if parameter space is the same if(np0 != np1) { stop("lavaan ERROR: unconstrained parameter set is not the same in m0 and m1") } # split partable in 'parameter' and 'constraints' section PT.M0.part1 <- PT.M0[ M0.p.idx,] PT.M0.part2 <- PT.M0[-M0.p.idx,] PT.M1.part1 <- PT.M1[ M1.p.idx,] PT.M1.part2 <- PT.M1[-M1.p.idx,] #figure out relationship between m0 and m1 p1.id <- lav_partable_map_id_p1_in_p2(PT.M0.part1, PT.M1.part1) p0.free.idx <- which(PT.M0.part1$free > 0) # change 'free' order in m0 # NOTE: this only works all the free parameters in h0 are also free # in h1 (and if not, they will become fixed in h0) PT.M0.part1$free[p0.free.idx] <- PT.M1.part1$free[ PT.M0.part1$id[p1.id][p0.free.idx] ] # paste back PT.M0 <- rbind(PT.M0.part1, PT.M0.part2) PT.M1 <- rbind(PT.M1.part1, PT.M1.part2) # `extend' PT.M1 partable to include all `fixed-to-zero parameters' PT.M1.FULL <- lav_partable_full(PT.M1, free = TRUE, start = TRUE) PT.M1.extended <- lav_partable_merge(PT.M1, PT.M1.FULL, remove.duplicated = TRUE, warn = FALSE) # `extend' PT.M0 partable to include all `fixed-to-zero parameters' PT.M0.FULL <- lav_partable_full(PT.M0, free = TRUE, start = TRUE) PT.M0.extended <- lav_partable_merge(PT.M0, PT.M0.FULL, remove.duplicated = TRUE, warn = FALSE) p1 <- PT.M1.extended; np1 <- length(p1$lhs) p0 <- PT.M0.extended; np0 <- length(p0$lhs) con.function <- function() NULL formals(con.function) <- alist(.x.=, ...=) BODY.txt <- paste("{\nout <- numeric(0L)\n", sep = "") # first handle def + == constraints # but FIRST, remove == constraints that also appear in H1!!! # remove equivalent eq constraints from p0 P0 <- p0 p0.eq.idx <- which(p0$op == "==") p1.eq.idx <- which(p1$op == "==") p0.remove.idx <- integer(0L) if(length(p0.eq.idx) > 0L) { for(i in seq_along(p0.eq.idx)) { # e0 in p0 e0 <- p0.eq.idx[i] lhs <- p0$lhs[e0]; rhs <- p0$rhs[e0] # do we have an equivalent constraint in H1? # NOTE!! the (p)labels may differ # SO, we will use an 'empirical' approach: if we fill in (random) # values, and work out the constraint, do we get identical values? # if yes, constraint is equivalent, and we should NOT add it here if(length(p1.eq.idx) > 0) { # generate random parameter values xx1 <- rnorm( length(M1.p.idx) ) xx0 <- xx1[ p1.id ] con.h0.value <- m0@Model@ceq.function(xx0)[i] con.h1.values <- m1@Model@ceq.function(xx1) if(con.h0.value %in% con.h1.values) { p0.remove.idx <- c(p0.remove.idx, e0) } } } } if(length(p0.remove.idx) > 0L) { P0 <- P0[-p0.remove.idx,] } # only for the UNIQUE equality constraints in H0, generate syntax DEFCON.txt <- lav_partable_constraints_ceq(P0, txtOnly=TRUE) BODY.txt <- paste(BODY.txt, DEFCON.txt, "\n", sep="") # for each parameter in p1, we 'check' is it is fixed to a constant in p0 ncon <- length( which(P0$op == "==") ) for(i in seq_len(np1)) { # p in p1 lhs <- p1$lhs[i]; op <- p1$op[i]; rhs <- p1$rhs[i]; group <- p1$group[i] # ignore '==', '<', '>' and ':=' for now if(op == "==" || op == ">" || op == "<" || op == ":=") next # search for corresponding parameter in p0 p0.idx <- which(p0$lhs == lhs & p0$op == op & p0$rhs == rhs & p0$group == group) if(length(p0.idx) == 0L) { stop("lavaan ERROR: parameter in H1 not found in H0: ", paste(lhs, op, rhs, "(group = ", group, ")", sep=" ")) } # 4 possibilities: p is free/fixed in p1, p is free/fixed in p0 if(p1$free[i] == 0L) { if(p0$free[p0.idx] == 0L) { # match, nothing to do } else { warning("lavaan WARNING: fixed parameter in H1 is free in H0: ", paste("\"", lhs, " ", op, " ", rhs, "\" (group = ", group, ")", sep="")) } } else { if(p0$free[p0.idx] == 0L) { # match, this is a contrained parameter in H0 ncon <- ncon + 1L BODY.txt <- paste(BODY.txt, "out[", ncon, "] = .x.[", p1$free[i], "] - ", p0$ustart[p0.idx], "\n", sep="") next } else { # match, nothing to do } } } # wrap function BODY.txt <- paste(BODY.txt, "return(out)\n}\n", sep="") body(con.function) <- parse(file="", text=BODY.txt) con.function } lavaan/R/lav_mvreg.R0000644000176200001440000002532613036472335014040 0ustar liggesusers# the multivariate linear model using maximum likelihood # - loglikelihood (from raw data, or sample statitics) # - derivatives with respect to Beta, Sigma, vech(Sigma) # - casewise scores with respect to Beta, vech(Sigma), Beta + vech(Sigma) # - (unit) information of Beta + vech(Sigma) # - hessian of Beta + vech(Sigma) # 1. input is raw data lav_mvreg_loglik_data <- function(Y = NULL, X = NULL, # includes intercept Beta = NULL, Sigma = NULL, casewise = FALSE, Sinv.method = "eigen") { Q <- NCOL(Y); N <- NROW(Y) if(casewise) { LOG.2PI <- log(2 * pi) # invert Sigma if(Sinv.method == "chol") { cS <- chol(Sigma); icS <- backsolve(cS, diag(Q)) logdet <- -2 * sum(log(diag(icS))) RES <- Y - X %*% Beta DIST <- rowSums((RES %*% icS)^2) } else { Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) logdet <- attr(Sigma.inv, "logdet") RES <- Y - X %*% Beta DIST <- rowSums(RES %*% Sigma.inv * RES) } loglik <- -(Q * LOG.2PI + logdet + DIST)/2 } else { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) logdet <- attr(Sigma.inv, "logdet") RES <- Y - X %*% Beta # TOTAL <- TR( (Y - X%*%Beta) %*% Sigma.inv %*% t(Y - X%*%Beta) ) TOTAL <- sum( rowSums(RES %*% Sigma.inv * RES) ) loglik <- -(N*Q/2)*log(2*pi) - (N/2)*logdet - (1/2)*TOTAL } loglik } # 2. input are sample statistics (beta, cov, N) only lav_mvreg_loglik_samplestats <- function(sample.res.beta = NULL, sample.res.cov = NULL, sample.XX = NULL, sample.nobs = NULL, Beta = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { Q <- NCOL(sample.res.cov); N <- sample.nobs LOG.2PI <- log(2 * pi) if(is.null(Sigma.inv)) { Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = TRUE, Sinv.method = Sinv.method) logdet <- attr(Sigma.inv, "logdet") } else { logdet <- attr(Sigma.inv, "logdet") if(is.null(logdet)) { # compute - ln|Sigma.inv| ev <- eigen(Sigma.inv, symmetric = TRUE, only.values = TRUE) logdet <- -1 * sum(log(ev$values)) } } # tr(Sigma^{-1} %*% S) DIST1 <- sum(Sigma.inv * sample.res.cov) # tr( Sigma^{-1} (B-beta)' X'X (B-beta) Diff <- sample.res.beta - Beta DIST2 <- sum(Sigma.inv * crossprod(Diff, (1/N)*sample.XX) %*% Diff) loglik <- -(N/2) * (Q*log(2*pi) + logdet + DIST1 + DIST2) loglik } # derivative logl with respect to Beta # version 1: using Y/X # lav_matrix_vec( t(X) %*% RES %*% Sigma.inv ) # version 2: using B/S # lav_matrix_vec(XX %*% (B - Beta) %*% Sigma.inv) lav_mvreg_dlogl_dbeta <- function(Y = NULL, X = NULL, Beta = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # substract 'X %*% Beta' from Y RES <- Y - X %*% Beta # derivative dbeta <- as.numeric( t(X) %*% RES %*% Sigma.inv ) dbeta } # derivative logl with respect to Sigma (full matrix, ignoring symmetry) lav_mvreg_dlogl_dSigma <- function(Y = NULL, X = NULL, Beta = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- NROW(Y) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # substract 'X %*% Beta' from Y RES <- Y - X %*% Beta # W.tilde W.tilde <- crossprod(RES)/N # derivative dSigma <- -(N/2)* (Sigma.inv - (Sigma.inv %*% W.tilde %*% Sigma.inv)) dSigma } # derivative logl with respect to vech(Sigma) lav_mvreg_dlogl_dvechSigma <- function(Y = NULL, X = NULL, Beta = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- NROW(Y) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # substract 'X %*% Beta' from Y RES <- Y - X %*% Beta # W.tilde W.tilde <- crossprod(RES)/N # derivative dSigma <- -(N/2)* (Sigma.inv - (Sigma.inv %*% W.tilde %*% Sigma.inv)) dvechSigma <- as.numeric( lav_matrix_duplication_pre( as.matrix(lav_matrix_vec(dSigma)) ) ) dvechSigma } # casewise scores with respect to Beta lav_mvreg_scores_beta <- function(Y = NULL, X = NULL, Beta = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { Q <- NCOL(Y); P <- NCOL(X) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # substract Mu RES <- Y - X %*% Beta # post-multiply with Sigma.inv RES <- RES %*% Sigma.inv SC.Beta <- X[, rep(1:P, times = Q), drop = FALSE] * RES[,rep(1:Q, each = P), drop = FALSE] SC.Beta } # casewise scores with respect to vech(Sigma) lav_mvreg_scores_vech_sigma <- function(Y = NULL, X = NULL, Beta = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { Q <- NCOL(Y) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # vech(Sigma.inv) isigma <- lav_matrix_vech(Sigma.inv) # substract X %*% Beta RES <- Y - X %*% Beta # postmultiply with Sigma.inv RES <- RES %*% Sigma.inv # tcrossprod idx1 <- lav_matrix_vech_col_idx(Q) idx2 <- lav_matrix_vech_row_idx(Q) Z <- RES[,idx1] * RES[,idx2] # substract isigma from each row SC <- t( t(Z) - isigma ) # adjust for vech (and avoiding the 1/2 factor) SC[,lav_matrix_diagh_idx(Q)] <- SC[,lav_matrix_diagh_idx(Q)] / 2 SC } # casewise scores with respect to beta + vech(Sigma) lav_mvreg_scores_beta_vech_sigma <- function(Y = NULL, X = NULL, Beta = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { Q <- NCOL(Y); P <- NCOL(X) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } # vech(Sigma.inv) isigma <- lav_matrix_vech(Sigma.inv) # substract X %*% Beta RES <- Y - X %*% Beta # postmultiply with Sigma.inv RES <- RES %*% Sigma.inv SC.Beta <- X[, rep(1:P, times = Q), drop = FALSE] * RES[,rep(1:Q, each = P), drop = FALSE] # tcrossprod idx1 <- lav_matrix_vech_col_idx(Q) idx2 <- lav_matrix_vech_row_idx(Q) Z <- RES[,idx1] * RES[,idx2] # substract isigma from each row SC <- t( t(Z) - isigma ) # adjust for vech (and avoiding the 1/2 factor) SC[,lav_matrix_diagh_idx(Q)] <- SC[,lav_matrix_diagh_idx(Q)] / 2 cbind(SC.Beta, SC) } # information Beta and vech(Sigma) lav_mvreg_information_beta_vech_sigma_samplestats <- function(Sigma.inv = NULL, sample.XX = NULL, sample.nobs = NULL) { XXN <- (1/sample.nobs) * sample.XX I11 <- Sigma.inv %x% XXN I22 <- 0.5 * lav_matrix_duplication_pre_post(Sigma.inv %x% Sigma.inv) lav_matrix_bdiag(I11, I22) } # hessian Beta and vech(Sigma) lav_mvreg_hessian_beta_vech_sigma <- function(Y = NULL, X = NULL, Beta = NULL, Sigma = NULL, Sinv.method = "eigen", Sigma.inv = NULL) { N <- NROW(Y) if(is.null(Sigma.inv)) { # invert Sigma Sigma.inv <- lav_matrix_symmetric_inverse(S = Sigma, logdet = FALSE, Sinv.method = Sinv.method) } RES <- Y - X %*% Beta W.tilde <- 1/N * crossprod(RES) H11 <- Sigma.inv %x% ((1/N) * crossprod(X)) H21 <- lav_matrix_duplication_pre( Sigma.inv %x% (Sigma.inv %*% ((1/N) * crossprod(RES, X))) ) H12 <- t(H21) AAA <- Sigma.inv %*% (2*W.tilde - Sigma) %*% Sigma.inv H22 <- (1/2) * lav_matrix_duplication_pre_post(Sigma.inv %x% AAA) H <- -N * rbind( cbind(H11, H12), cbind(H21, H22) ) H } lavaan/R/lav_partable_vnames.R0000644000176200001440000005046413053256674016071 0ustar liggesusers# lav_partable_names # # YR. 29 june 2013 # - as separate file; used to be in utils-user.R # - lav_partable_names (aka 'vnames') allows multiple options in 'type' # returning them all as a list (or just a vector if only 1 type is needed) # public version lavNames <- function(object, type = "ov", ...) { if(inherits(object, "lavaan") || inherits(object, "lavaanList")) { partable <- object@ParTable } else if(class(object) == "list" || inherits(object, "data.frame")) { partable <- object } else if(class(object) == "character") { # just a model string? partable <- lavParseModelString(object) } lav_partable_vnames(partable, type = type, ...) } # alias for backwards compatibility lavaanNames <- lavNames # return variable names in a partable # - the 'type' argument determines the status of the variable: observed, # latent, endo/exo/...; default = "ov", but most used is type = "all" # - the 'group' argument either selects a single group (if group is an integer) # or returns a list per group # - the 'level' argument either selects a single level (if level is an integer) # or returns a list per level # - the 'block' argument either selects a single block (if block is an integer) # or returns a list per block lav_partable_vnames <- function(partable, type = NULL, ..., warn = FALSE, ov.x.fatal = FALSE) { # check for empy table if(length(partable$lhs) == 0) return(character(0L)) # dotdotdot dotdotdot <- list(...) type.list <- c("ov", # observed variables (ov) "ov.x", # (pure) exogenous observed variables "ov.nox", # non-exogenous observed variables "ov.model", # modeled observed variables (joint vs cond) "ov.y", # (pure) endogenous variables (dependent only) "ov.num", # numeric observed variables "ov.ord", # ordinal observed variables "ov.ind", # observed indicators of latent variables "ov.orphan", # lonely observed intercepts/variances "ov.interaction", # interaction terms (with colon) "th", # thresholds ordinal only "th.mean", # thresholds ordinal + numeric variables "lv", # latent variables "lv.regular", # latent variables (defined by =~ only) "lv.formative",# latent variables (defined by <~ only) "lv.x", # (pure) exogenous variables "lv.y", # (pure) endogenous variables "lv.nox", # non-exogenous latent variables "lv.nonnormal",# latent variables with non-normal indicators "lv.interaction", # interaction terms "eqs.y", # y's in regression "eqs.x" # x's in regression ) # sanity check stopifnot(is.list(partable), !missing(type), type %in% c(type.list, "all")) if(length(type) == 1L && type == "all") { type <- type.list } # ALWAYS need `block' column -- create one if missing if(is.null(partable$block)) { partable$block <- rep(1L, length(partable$lhs)) } # nblocks -- block column is integer only nblocks <- lav_partable_nblocks(partable) # per default, use full partable block.select <- lav_partable_block_values(partable) # check for ... selection argument(s) ndotdotdot <- length(dotdotdot) if(ndotdotdot > 0L) { dot.names <- names(dotdotdot) block.select <- rep(TRUE, length(partable$lhs)) for(dot in seq_len(ndotdotdot)) { # selection variable? block.var <- dot.names[dot] block.val <- dotdotdot[[block.var]] # do we have this 'block.var' in partable? if(is.null(partable[[block.var]])) { stop("lavaan ERROR: selection variable `", block.var, " not found in the parameter table.") } else { if(!all(block.val %in% partable[[block.var]])) { stop("lavaan ERROR: ", block.var , " column does not contain value `", block.val, "'") } block.select <- ( block.select & partable[[block.var]] %in% block.val ) } } # dot block.select <- unique(partable$block[block.select]) if(length(block.select) == 0L) { warnings("lavaan WARNING: no blocks selected.") } } # output: list per block OUT <- vector("list", length = nblocks) OUT$ov <- vector("list", length = nblocks) OUT$ov.x <- vector("list", length = nblocks) OUT$ov.nox <- vector("list", length = nblocks) OUT$ov.model <- vector("list", length = nblocks) OUT$ov.y <- vector("list", length = nblocks) OUT$ov.num <- vector("list", length = nblocks) OUT$ov.ord <- vector("list", length = nblocks) OUT$ov.ind <- vector("list", length = nblocks) OUT$ov.orphan <- vector("list", length = nblocks) OUT$ov.interaction <- vector("list", length = nblocks) OUT$th <- vector("list", length = nblocks) OUT$th.mean <- vector("list", length = nblocks) OUT$lv <- vector("list", length = nblocks) OUT$lv.regular <- vector("list", length = nblocks) OUT$lv.formative <- vector("list", length = nblocks) OUT$lv.x <- vector("list", length = nblocks) OUT$lv.y <- vector("list", length = nblocks) OUT$lv.nox <- vector("list", length = nblocks) OUT$lv.nonnormal <- vector("list", length = nblocks) OUT$lv.interaction <- vector("list", length = nblocks) OUT$eqs.y <- vector("list", length = nblocks) OUT$eqs.x <- vector("list", length = nblocks) for(b in block.select) { # always compute lv.names lv.names <- unique( partable$lhs[ partable$block == b & (partable$op == "=~" | partable$op == "<~") ] ) # determine lv interactions int.names <- unique(partable$rhs[ partable$block == b & grepl(":", partable$rhs) ] ) n.int <- length(int.names) if(n.int > 0L) { ok.idx <- logical(n.int) for(iv in seq_len(n.int)) { NAMES <- strsplit(int.names[iv], ":", fixed = TRUE)[[1L]] # three scenario's: # - both variables are latent (ok) # - both variables are observed (ignore) # - only one latent (warn??) -> upgrade observed to latent # thus if at least one is in lv.names, we treat it as a # latent interaction if(sum(NAMES %in% lv.names) > 0L) { ok.idx[iv] <- TRUE } } lv.interaction <- int.names[ok.idx] lv.names <- c(lv.names, lv.interaction) } else { lv.interaction <- character(0L) } # store lv if("lv" %in% type) { OUT$lv[[b]] <- lv.names } # regular latent variables ONLY (ie defined by =~ only) if("lv.regular" %in% type) { out <- unique( partable$lhs[ partable$block == b & partable$op == "=~" ] ) OUT$lv.regular[[b]] <- out } # interaction terms involving latent variables (only) if("lv.interaction" %in% type) { OUT$lv.interaction[[b]] <- lv.interaction } # formative latent variables ONLY (ie defined by <~ only) if("lv.formative" %in% type) { out <- unique( partable$lhs[ partable$block == b & partable$op == "<~" ] ) OUT$lv.formative[[b]] <- out } # eqs.y if(!(length(type) == 1L && type %in% c("lv", "lv.regular", "lv.nonnormal"))) { eqs.y <- unique( partable$lhs[ partable$block == b & partable$op == "~" ] ) } # store eqs.y if("eqs.y" %in% type) { OUT$eqs.y[[b]] <- eqs.y } # eqs.x if(!(length(type) == 1L && type %in% c("lv", "lv.regular", "lv.nonnormal","lv.x"))) { eqs.x <- unique( partable$rhs[ partable$block == b & (partable$op == "~" | partable$op == "<~") ] ) } # store eqs.x if("eqs.x" %in% type) { OUT$eqs.x[[b]] <- eqs.x } # v.ind -- indicators of latent variables if(!(length(type) == 1L && type %in% c("lv", "lv.regular", "lv.nonnormal"))) { v.ind <- unique( partable$rhs[ partable$block == b & partable$op == "=~" ] ) } # ov.* if(!(length(type) == 1L && type %in% c("lv", "lv.regular", "lv.nonnormal", "lv.x","lv.y"))) { # 1. indicators, which are not latent variables themselves ov.ind <- v.ind[ !v.ind %in% lv.names ] # 2. dependent ov's ov.y <- eqs.y[ !eqs.y %in% c(lv.names, ov.ind) ] # 3. independent ov's ov.x <- eqs.x[ !eqs.x %in% c(lv.names, ov.ind, ov.y) ] } # observed variables # easy approach would be: everything that is not in lv.names, # but the main purpose here is to 'order' the observed variables # according to 'type' (indicators, ov.y, ov.x, orphans) if(!(length(type) == 1L && type %in% c("lv", "lv.regular", "lv.nonnormal", "lv.x","lv.y"))) { # 4. orphaned covariances ov.cov <- c(partable$lhs[ partable$block == b & partable$op == "~~" & !partable$lhs %in% lv.names ], partable$rhs[ partable$block == b & partable$op == "~~" & !partable$rhs %in% lv.names ]) # 5. orphaned intercepts/thresholds ov.int <- partable$lhs[ partable$block == b & (partable$op == "~1" | partable$op == "|") & !partable$lhs %in% lv.names ] ov.tmp <- c(ov.ind, ov.y, ov.x) ov.extra <- unique(c(ov.cov, ov.int)) ov.names <- c(ov.tmp, ov.extra[ !ov.extra %in% ov.tmp ]) } # store ov? if("ov" %in% type) { OUT$ov[[b]] <- ov.names } if("ov.ind" %in% type) { OUT$ov.ind[[b]] <- ov.ind } if("ov.interaction" %in% type) { ov.int.names <- ov.names[ grepl(":", ov.names) ] n.int <- length(ov.int.names) if(n.int > 0L) { ov.names.noint <- ov.names[!ov.names %in% ov.int.names] ok.idx <- logical(n.int) for(iv in seq_len(n.int)) { NAMES <- strsplit(ov.int.names[iv], ":", fixed = TRUE)[[1L]] # two scenario's: # - both variables are in ov.names.noint (ok) # - at least one variables is NOT in ov.names.noint (ignore) if(all(NAMES %in% ov.names.noint)) { ok.idx[iv] <- TRUE } } ov.interaction <- ov.int.names[ok.idx] } else { ov.interaction <- character(0L) } OUT$ov.interaction[[b]] <- ov.interaction } # exogenous `x' covariates if(any(type %in% c("ov.x","ov.nox","ov.num", "ov.model", "th.mean","lv.nonnormal"))) { # correction: is any of these ov.names.x mentioned as a variance, # covariance, or intercept? # this should trigger a warning in lavaanify() if(is.null(partable$user)) { # FLAT! partable$user <- rep(1L, length(partable$lhs)) } vars <- c( partable$lhs[ partable$block == b & partable$op == "~1" & partable$user == 1 ], partable$lhs[ partable$block == b & partable$op == "~~" & partable$user == 1 ], partable$rhs[ partable$block == b & partable$op == "~~" & partable$user == 1 ] ) idx.no.x <- which(ov.x %in% vars) if(length(idx.no.x)) { if(ov.x.fatal) { stop("lavaan ERROR: model syntax contains variance/covariance/intercept formulas\n involving (an) exogenous variable(s): [", paste(ov.x[idx.no.x], collapse=" "), "];\n Please remove them and try again.") } if(warn) { warning("lavaan WARNING: model syntax contains variance/covariance/intercept formulas\n involving (an) exogenous variable(s): [", paste(ov.x[idx.no.x], collapse=" "), "];\n Please use fixed.x=FALSE or leave them alone") } ov.x <- ov.x[-idx.no.x] } ov.tmp.x <- ov.x # extra if(!is.null(partable$exo)) { ov.cov <- c(partable$lhs[ partable$block == b & partable$op == "~~" & partable$exo == 1L], partable$rhs[ partable$block == b & partable$op == "~~" & partable$exo == 1L]) ov.int <- partable$lhs[ partable$block == b & partable$op == "~1" & partable$exo == 1L ] ov.extra <- unique(c(ov.cov, ov.int)) ov.tmp.x <- c(ov.tmp.x, ov.extra[ !ov.extra %in% ov.tmp.x ]) } ov.names.x <- ov.tmp.x } # store ov.x? if("ov.x" %in% type) { OUT$ov.x[[b]] <- ov.names.x } # story ov.orphan? if("ov.orphan" %in% type) { OUT$ov.orphan[[b]] <- ov.extra } # ov's withouth ov.x if(any(type %in% c("ov.nox", "ov.num", "ov.model", "th.mean", "lv.nonnormal"))) { ov.names.nox <- ov.names[! ov.names %in% ov.names.x ] } # store ov.nox if("ov.nox" %in% type) { OUT$ov.nox[[b]] <- ov.names.nox } # store ov.model if("ov.model" %in% type) { # if no conditional.x, this is just ov # else, this is ov.nox if(any( partable$block == b & partable$op == "~" & partable$exo == 1L )) { OUT$ov.model[[b]] <- ov.names.nox } else { OUT$ov.model[[b]] <- ov.names } } # ov's strictly ordered if(any(type %in% c("ov.ord", "th", "th.mean", "ov.num", "lv.nonnormal"))) { tmp <- unique(partable$lhs[ partable$block == b & partable$op == "|" ]) ord.names <- ov.names[ ov.names %in% tmp ] } if("ov.ord" %in% type) { OUT$ov.ord[[b]] <- ord.names } # ov's strictly numeric (but no x) if(any(type %in% c("ov.num", "lv.nonnormal"))) { ov.num <- ov.names.nox[! ov.names.nox %in% ord.names ] } if("ov.num" %in% type) { OUT$ov.num[[b]] <- ov.num } # nonnormal lv's if("lv.nonnormal" %in% type) { # regular lv's lv.reg <- unique( partable$lhs[ partable$block == b & partable$op == "=~" ] ) if(length(lv.reg) > 0L) { out <- unlist( lapply(lv.reg, function(x) { # get indicators for this lv tmp.ind <- unique( partable$rhs[ partable$block == b & partable$op == "=~" & partable$lhs == x ] ) if(!all(tmp.ind %in% ov.num)) { return(x) } else { return(character(0)) } }) ) OUT$lv.nonnormal[[b]] <- out } else { OUT$lv.nonnormal[[b]] <- character(0) } } if(any(c("th","th.mean") %in% type)) { TH.lhs <- partable$lhs[ partable$block == b & partable$op == "|" ] TH.rhs <- partable$rhs[ partable$block == b & partable$op == "|" ] } # threshold if("th" %in% type) { if(length(ord.names) > 0L) { # return in the right order (following ord.names!) out <- unlist(lapply(ord.names, function(x) { idx <- which(x == TH.lhs) TH <- unique(paste(TH.lhs[idx], "|", TH.rhs[idx], sep="")) # make sure the th's are in increasing order sort(TH) })) } else { out <- character(0L) } OUT$th[[b]] <- out } # thresholds and mean/intercepts of numeric variables if("th.mean" %in% type) { if(length(ov.names.nox) > 0L) { # return in the right order (following ov.names.nox!) out <- unlist(lapply(ov.names.nox, function(x) { if(x %in% ord.names) { idx <- which(x == TH.lhs) TH <- unique(paste(TH.lhs[idx], "|", TH.rhs[idx], sep="")) # make sure the th's are in increasing order sort(TH) } else { x } })) } else { out <- character(0L) } OUT$th.mean[[b]] <- out } # exogenous lv's if(any(c("lv.x","lv.nox") %in% type)) { tmp <- lv.names[ !lv.names %in% c(v.ind, eqs.y) ] lv.names.x <- lv.names[ lv.names %in% tmp ] } if("lv.x" %in% type) { OUT$lv.x[[b]] <- lv.names.x } # dependent ov (but not also indicator or x) if("ov.y" %in% type) { tmp <- eqs.y[ !eqs.y %in% c(v.ind, eqs.x, lv.names) ] OUT$ov.y[[b]] <- ov.names[ ov.names %in% tmp ] } # dependent lv (but not also indicator or x) if("lv.y" %in% type) { tmp <- eqs.y[ !eqs.y %in% c(v.ind, eqs.x) & eqs.y %in% lv.names ] OUT$lv.y[[b]] <- lv.names[ lv.names %in% tmp ] } # non-exogenous latent variables if("lv.nox" %in% type) { OUT$lv.nox[[b]] <- lv.names[! lv.names %in% lv.names.x ] } } # to mimic old behaviour, if length(type) == 1L if(length(type) == 1L) { OUT <- OUT[[type]] # to mimic old behaviour, if specific block is requested if(ndotdotdot == 0L) { OUT <- unique(unlist(OUT)) } else if(length(block.select) == 1L) { OUT <- OUT[[block.select]] } else { OUT <- OUT[block.select] } } else { OUT <- OUT[type] } OUT } # alias for backward compatibility vnames <- lav_partable_vnames lavaan/R/lav_model_estimate.R0000644000176200001440000004441513043403631015702 0ustar liggesusers# model estimation lav_model_estimate <- function(lavmodel = NULL, lavsamplestats = NULL, lavdata = NULL, lavoptions = NULL, lavcache = list(), do.fit = TRUE) { estimator <- lavoptions$estimator verbose <- lavoptions$verbose debug <- lavoptions$debug ngroups <- lavsamplestats@ngroups if(lavsamplestats@missing.flag || estimator == "PML") { group.weight <- FALSE } else { group.weight <- TRUE } # temp test if(lavoptions$partrace) { # fx + parameter values PENV <- new.env() PENV$PARTRACE <- matrix(NA, nrow=0, ncol=lavmodel@nx.free + 1L) } # function to be minimized minimize.this.function <- function(x, verbose=FALSE, infToMax=FALSE) { #cat("DEBUG: x = ", x, "\n") # current strategy: forcePD is by default FALSE, except # if missing patterns are used #if(any(lavsamplestats@missing.flag)) { # forcePD <- TRUE #} else { forcePD <- FALSE #} # transform variances back #x[lavmodel@x.free.var.idx] <- tan(x[lavmodel@x.free.var.idx]) # update GLIST (change `state') and make a COPY! if(lavmodel@eq.constraints) { x <- as.numeric(lavmodel@eq.constraints.K %*% x) + lavmodel@eq.constraints.k0 } GLIST <- lav_model_x2GLIST(lavmodel, x=x) fx <- lav_model_objective(lavmodel = lavmodel, GLIST = GLIST, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, verbose = verbose, forcePD = forcePD) # only for PML: divide by N (to speed up convergence) if(estimator == "PML") { fx <- fx / lavsamplestats@ntotal } if(debug || verbose) { cat("Objective function = ", sprintf("%18.16f", fx), "\n", sep="") } if(debug) { #cat("Current unconstrained parameter values =\n") #tmp.x <- lav_model_get_parameters(lavmodel, GLIST=GLIST, type="unco") #print(tmp.x); cat("\n") cat("Current free parameter values =\n"); print(x); cat("\n") } if(lavoptions$partrace) { PENV$PARTRACE <- rbind(PENV$PARTRACE, c(fx, x)) } # for L-BFGS-B if(infToMax && is.infinite(fx)) fx <- 1e20 fx } first.derivative.param <- function(x, verbose=FALSE, infToMax=FALSE) { # transform variances back #x[lavmodel@x.free.var.idx] <- tan(x[lavmodel@x.free.var.idx]) # update GLIST (change `state') and make a COPY! if(lavmodel@eq.constraints) { x <- as.numeric(lavmodel@eq.constraints.K %*% x) + lavmodel@eq.constraints.k0 } GLIST <- lav_model_x2GLIST(lavmodel, x=x) dx <- lav_model_gradient(lavmodel = lavmodel, GLIST = GLIST, lavsamplestats = lavsamplestats, lavdata = lavdata, lavcache = lavcache, type = "free", group.weight = group.weight, ### check me!! verbose = verbose, forcePD = TRUE) if(debug) { cat("Gradient function (analytical) =\n"); print(dx); cat("\n") } #print( dx %*% lavmodel@eq.constraints.K ) #stop("for now") # handle linear equality constraints if(lavmodel@eq.constraints) { dx <- as.numeric( dx %*% lavmodel@eq.constraints.K ) } # only for PML: divide by N (to speed up convergence) if(estimator == "PML") { dx <- dx / lavsamplestats@ntotal } if(debug) { cat("Gradient function (analytical, after eq.constraints.K) =\n"); print(dx); cat("\n") } dx } first.derivative.param.numerical <- function(x, verbose=FALSE) { # transform variances back #x[lavmodel@x.free.var.idx] <- tan(x[lavmodel@x.free.var.idx]) # numerical approximation using the Richardson method npar <- length(x) h <- 10e-6 dx <- numeric( npar ) ## FIXME: call lav_model_objective directly!! for(i in 1:npar) { x.left <- x.left2 <- x.right <- x.right2 <- x x.left[i] <- x[i] - h; x.left2[i] <- x[i] - 2*h x.right[i] <- x[i] + h; x.right2[i] <- x[i] + 2*h fx.left <- minimize.this.function(x.left) fx.left2 <- minimize.this.function(x.left2) fx.right <- minimize.this.function(x.right) fx.right2 <- minimize.this.function(x.right2) dx[i] <- (fx.left2 - 8*fx.left + 8*fx.right - fx.right2)/(12*h) } #dx <- lavGradientC(func=minimize.this.function, x=x) # does not work if pnorm is involved... (eg PML) if(debug) { cat("Gradient function (numerical) =\n"); print(dx); cat("\n") } dx } # starting values start.x <- lav_model_get_parameters(lavmodel) if(lavmodel@eq.constraints) { start.x <- as.numeric( (start.x - lavmodel@eq.constraints.k0) %*% lavmodel@eq.constraints.K ) } if(debug) { #cat("start.unco = ", lav_model_get_parameters(lavmodel, type="unco"), "\n") cat("start.x = ", start.x, "\n") } # check if the initial values produce a positive definite Sigma # to begin with -- but only for estimator="ML" #if(estimator %in% c("ML","PML","FML","MML")) { if(estimator %in% c("ML","FML","MML")) { Sigma.hat <- computeSigmaHat(lavmodel, extra=TRUE, debug=lavoptions$debug) for(g in 1:ngroups) { if(!attr(Sigma.hat[[g]], "po")) { group.txt <- ifelse(ngroups > 1, paste(" in group ",g,".",sep=""), ".") if(debug) print(Sigma.hat[[g]]) stop("lavaan ERROR: initial model-implied matrix (Sigma) is not positive definite;\n check your model and/or starting parameters", group.txt) # FIXME: should we stop here?? or try anyway? x <- start.x fx <- as.numeric(NA) attr(fx, "fx.group") <- rep(as.numeric(NA), ngroups) attr(x, "converged") <- FALSE attr(x, "iterations") <- 0L attr(x, "control") <- lavoptions@control attr(x, "fx") <- fx return(x) } } } # scaling factors # FIXME: what is the best way to set the scale?? # current strategy: if startx > 1.0, we rescale by using # 1/startx SCALE <- rep(1.0, length(start.x)) #idx <- which(abs(start.x) > 10.0) idx <- which(abs(start.x) > 1.0) if(length(idx) > 0L) SCALE[idx] <- abs(1.0/start.x[idx]) #idx <- which(abs(start.x) < 1.0 & start.x != 0.0) #if(length(idx) > 0L) SCALE[idx] <- abs(1.0/start.x[idx]) if(debug) { cat("SCALE = ", SCALE, "\n") } # transforming variances using atan (or another sigmoid function?) # FIXME: better approach? #start.x[lavmodel@x.free.var.idx] <- atan(start.x[lavmodel@x.free.var.idx]) # first some nelder mead steps? (default = FALSE) INIT_NELDER_MEAD <- lavoptions$optim.init_nelder_mead # gradient: analytic, numerical or NULL? if(is.character(lavoptions$optim.gradient)) { if(lavoptions$optim.gradient %in% c("analytic","analytical")) { GRADIENT <- first.derivative.param } else if(lavoptions$optim.gradient %in% c("numerical", "numeric")) { GRADIENT <- first.derivative.param.numerical } else if(lavoptions$optim.gradient %in% c("NULL", "null")) { GRADIENT <- NULL } else { warning("lavaan WARNING: gradient should be analytic, numerical or NULL") } } else if(is.logical(lavoptions$optim.gradient)) { if(lavoptions$optim.gradient) { GRADIENT <- first.derivative.param } else { GRADIENT <- NULL } } else if(is.null(lavoptions$optim.gradient)) { GRADIENT <- first.derivative.param } # optimizer if(length(lavmodel@ceq.nonlinear.idx) == 0L && length(lavmodel@cin.linear.idx) == 0L && length(lavmodel@cin.nonlinear.idx) == 0L) { if(is.null(lavoptions$optim.method)) { OPTIMIZER <- "NLMINB" #OPTIMIZER <- "BFGS" # slightly slower, no bounds; better scaling! #OPTIMIZER <- "L-BFGS-B" # trouble with Inf values for fx! } else { OPTIMIZER <- toupper(lavoptions$optim.method) stopifnot(OPTIMIZER %in% c("NLMINB", "BFGS", "L-BFGS-B", "NONE")) } } else { OPTIMIZER <- "NLMINB.CONSTR" } if(INIT_NELDER_MEAD) { if(verbose) cat("Initial Nelder-Mead step:\n") trace <- 0L; if(verbose) trace <- 1L optim.out <- optim(par=start.x, fn=minimize.this.function, method="Nelder-Mead", #control=list(maxit=10L, # parscale=SCALE, # trace=trace), hessian=FALSE, verbose=verbose) cat("\n") start.x <- optim.out$par } if(OPTIMIZER == "NLMINB") { if(verbose) cat("Quasi-Newton steps using NLMINB:\n") #if(debug) control$trace <- 1L; control.nlminb <- list(eval.max=20000L, iter.max=10000L, trace=0L, #abs.tol=1e-20, ### important!! fx never negative abs.tol=(.Machine$double.eps * 10), rel.tol=1e-10, #step.min=2.2e-14, # in =< 0.5-12 step.min=1.0, # 1.0 in < 0.5-21 step.max=1.0, x.tol=1.5e-8, xf.tol=2.2e-14) control.nlminb <- modifyList(control.nlminb, lavoptions$control) control <- control.nlminb[c("eval.max", "iter.max", "trace", "step.min", "step.max", "abs.tol", "rel.tol", "x.tol", "xf.tol")] #cat("DEBUG: control = "); print(str(control.nlminb)); cat("\n") optim.out <- nlminb(start=start.x, objective=minimize.this.function, gradient=GRADIENT, control=control, scale=SCALE, verbose=verbose) if(verbose) { cat("convergence status (0=ok): ", optim.out$convergence, "\n") cat("nlminb message says: ", optim.out$message, "\n") cat("number of iterations: ", optim.out$iterations, "\n") cat("number of function evaluations [objective, gradient]: ", optim.out$evaluations, "\n") } iterations <- optim.out$iterations x <- optim.out$par if(optim.out$convergence == 0) { converged <- TRUE } else { converged <- FALSE } } else if(OPTIMIZER == "BFGS") { # warning: Bollen example with estimator=GLS does NOT converge! # (but WLS works!) # - BB.ML works too control.bfgs <- list(trace=0L, fnscale=1, parscale=SCALE, ## or not? ndeps=1e-3, maxit=10000, abstol=1e-20, reltol=1e-10, REPORT=1L) control.bfgs <- modifyList(control.bfgs, lavoptions$control) control <- control.bfgs[c("trace", "fnscale", "parscale", "ndeps", "maxit", "abstol", "reltol", "REPORT")] #trace <- 0L; if(verbose) trace <- 1L optim.out <- optim(par=start.x, fn=minimize.this.function, gr=GRADIENT, method="BFGS", control=control, hessian=FALSE, verbose=verbose) if(verbose) { cat("convergence status (0=ok): ", optim.out$convergence, "\n") cat("optim BFGS message says: ", optim.out$message, "\n") #cat("number of iterations: ", optim.out$iterations, "\n") cat("number of function evaluations [objective, gradient]: ", optim.out$counts, "\n") } #iterations <- optim.out$iterations iterations <- optim.out$counts[1] x <- optim.out$par if(optim.out$convergence == 0L) { converged <- TRUE } else { converged <- FALSE } } else if(OPTIMIZER == "L-BFGS-B") { # warning, does not cope with Inf values!! control.lbfgsb <- list(trace=0L, fnscale=1, parscale=SCALE, ## or not? ndeps=1e-3, maxit=10000, REPORT=1L, lmm=5L, factr=1e7, pgtol=0) control.lbfgsb <- modifyList(control.lbfgsb, lavoptions$control) control <- control.lbfgsb[c("trace", "fnscale", "parscale", "ndeps", "maxit", "REPORT", "lmm", "factr", "pgtol")] optim.out <- optim(par=start.x, fn=minimize.this.function, gr=GRADIENT, method="L-BFGS-B", control=control, hessian=FALSE, verbose=verbose, infToMax=TRUE) if(verbose) { cat("convergence status (0=ok): ", optim.out$convergence, "\n") cat("optim L-BFGS-B message says: ", optim.out$message, "\n") #cat("number of iterations: ", optim.out$iterations, "\n") cat("number of function evaluations [objective, gradient]: ", optim.out$counts, "\n") } #iterations <- optim.out$iterations iterations <- optim.out$counts[1] x <- optim.out$par if(optim.out$convergence == 0L) { converged <- TRUE } else { converged <- FALSE } } else if(OPTIMIZER == "NLMINB.CONSTR") { ocontrol <- list(verbose=verbose) if(!is.null(lavoptions$control$control.outer)) { ocontrol <- c(lavoptions$control$control.outer, verbose=verbose) } control.nlminb <- list(eval.max=20000L, iter.max=10000L, trace=0L, #abs.tol=1e-20, abs.tol=(.Machine$double.eps * 10), rel.tol=1e-9) # 1e-10 seems 'too strict' control.nlminb <- modifyList(control.nlminb, lavoptions$control) control <- control.nlminb[c("eval.max", "iter.max", "trace", "abs.tol", "rel.tol")] cin <- cin.jac <- ceq <- ceq.jac <- NULL if(!is.null(body(lavmodel@cin.function))) cin <- lavmodel@cin.function if(!is.null(body(lavmodel@cin.jacobian))) cin.jac <- lavmodel@cin.jacobian if(!is.null(body(lavmodel@ceq.function))) ceq <- lavmodel@ceq.function if(!is.null(body(lavmodel@ceq.jacobian))) ceq.jac <- lavmodel@ceq.jacobian trace <- FALSE; if(verbose) trace <- TRUE optim.out <- nlminb.constr(start = start.x, objective=minimize.this.function, gradient=GRADIENT, control=control, scale=SCALE, verbose=verbose, cin = cin, cin.jac = cin.jac, ceq = ceq, ceq.jac = ceq.jac, control.outer = ocontrol ) if(verbose) { cat("convergence status (0=ok): ", optim.out$convergence, "\n") cat("nlminb.constr message says: ", optim.out$message, "\n") cat("number of outer iterations: ", optim.out$outer.iterations, "\n") cat("number of inner iterations: ", optim.out$iterations, "\n") cat("number of function evaluations [objective, gradient]: ", optim.out$evaluations, "\n") } iterations <- optim.out$iterations x <- optim.out$par if(optim.out$convergence == 0) { converged <- TRUE } else { converged <- FALSE } } else if(OPTIMIZER == "NONE") { x <- start.x iterations <- 0L converged <- TRUE control <- list() optim.out <- list() } fx <- minimize.this.function(x) # to get "fx.group" attribute # transform back if(lavmodel@eq.constraints) { x <- as.numeric(lavmodel@eq.constraints.K %*% x) + lavmodel@eq.constraints.k0 } # transform variances back #x[lavmodel@x.free.var.idx] <- tan(x[lavmodel@x.free.var.idx]) attr(x, "converged") <- converged attr(x, "iterations") <- iterations attr(x, "control") <- control attr(x, "fx") <- fx if(!is.null(optim.out$con.jac)) attr(x, "con.jac") <- optim.out$con.jac if(!is.null(optim.out$lambda)) attr(x, "con.lambda") <- optim.out$lambda if(lavoptions$partrace) { attr(x, "partrace") <- PENV$PARTRACE } x } # backwards compatibility # estimateModel <- lav_model_estimate lavaan/R/zzz.R0000644000176200001440000000045612465075714012715 0ustar liggesusers.onAttach <- function(libname, pkgname) { version <- read.dcf(file=system.file("DESCRIPTION", package=pkgname), fields="Version") packageStartupMessage("This is ",paste(pkgname, version)) packageStartupMessage(pkgname, " is BETA software! Please report any bugs.") } lavaan/R/lav_test_Wald.R0000644000176200001440000000441212743732111014632 0ustar liggesusers# classic Wald test # # NOTE: does not handle redundant constraints yet! lavTestWald <- function(object, constraints = NULL, verbose = FALSE) { if(object@optim$npar > 0L && !object@optim$converged) stop("lavaan ERROR: model did not converge") if(is.null(constraints) || nchar(constraints) == 0L) { stop("lavaan ERROR: constraints are empty") } # remove == constraints from parTable PT <- as.data.frame(object@ParTable, stringsAsFactors = FALSE) eq.idx <- which(PT$op == "==") if(length(eq.idx) > 0L) { PT <- PT[-eq.idx,] } partable <- as.list(PT) # parse constraints FLAT <- lavParseModelString( constraints ); CON <- attr(FLAT, "constraints") LIST <- list() if(length(CON) > 0L) { lhs = unlist(lapply(CON, "[[", "lhs")) op = unlist(lapply(CON, "[[", "op")) rhs = unlist(lapply(CON, "[[", "rhs")) LIST$lhs <- c(LIST$lhs, lhs) LIST$op <- c(LIST$op, op) LIST$rhs <- c(LIST$rhs, rhs) } else { stop("lavaan ERROR: no equality constraints found in constraints argument") } # theta = free parameters only theta <- object@optim$x # build constraint function ceq.function <- lav_partable_constraints_ceq(partable = partable, con = LIST, debug = FALSE) # compute jacobian restrictions JAC <- try(lav_func_jacobian_complex(func = ceq.function, x = theta), silent=TRUE) if(inherits(JAC, "try-error")) { # eg. pnorm() JAC <- lav_func_jacobian_simple(func = ceq.function, x = theta) } if(verbose) { cat("Restriction matrix (jacobian):\n"); print(JAC); cat("\n") } # linear restriction theta.r <- ceq.function( theta ) if(verbose) { cat("Restricted theta values:\n"); print(theta.r); cat("\n") } # get VCOV VCOV <- vcov(object, labels = FALSE) # restricted vcov info.r <- JAC %*% VCOV %*% t(JAC) # Wald test statistic Wald <- as.numeric(t(theta.r) %*% solve( info.r ) %*% theta.r) # df Wald.df <- nrow(JAC) # p-value based on chisq Wald.pvalue <- 1 - pchisq(Wald, df=Wald.df) list(stat=Wald, df=Wald.df, p.value=Wald.pvalue, se=object@Options$se) } lavaan/R/ctr_informative_testing.R0000644000176200001440000003461512726446531017013 0ustar liggesusers# This code is contributed by Leonard Vanbrabant InformativeTesting <- function(model = NULL, data, constraints = NULL, R = 1000L, type = "bollen.stine", return.LRT = TRUE, double.bootstrap = "standard", double.bootstrap.R = 249, double.bootstrap.alpha = 0.05, parallel = c("no", "multicore", "snow"), ncpus = 1L, cl = NULL, verbose = FALSE, ...){ fit.B1 <- sem(model, ..., data = data, se = "none", test = "standard") fit.B0 <- fit.A1 <- sem(model, ..., data = data, se = "none", test = "standard", constraints = constraints) con.idx <- (max(fit.B1@ParTable$id) + 1L):max(fit.A1@ParTable$id) user.equal <- fit.A1@ParTable user.equal$op[con.idx] <- "==" fit.A0 <- sem(user.equal, ..., data = data, se = "none", test = "standard") lrt.bootA <- bootstrapLRT(fit.A0, fit.A1, R = R, type = type, verbose = verbose, return.LRT = return.LRT, double.bootstrap = double.bootstrap, double.bootstrap.R = double.bootstrap.R, double.bootstrap.alpha = double.bootstrap.alpha, parallel = parallel, ncpus = ncpus, cl = cl) lrt.bootB <- bootstrapLRT(fit.B0, fit.B1, R = R, type = type, verbose = verbose, return.LRT = return.LRT, double.bootstrap = double.bootstrap, double.bootstrap.R = double.bootstrap.R, double.bootstrap.alpha = double.bootstrap.alpha, parallel = parallel, ncpus = ncpus, cl = cl) output <- list(fit.A0 = fit.A0, fit.A1 = fit.A1, fit.B1 = fit.B1, lrt.bootA = lrt.bootA, lrt.bootB = lrt.bootB, double.bootstrap = double.bootstrap, double.bootstrap.alpha = double.bootstrap.alpha, return.LRT = return.LRT, type = type) class(output) <- "InformativeTesting" return(output) } print.InformativeTesting <- function(x, digits = max(3, getOption("digits") - 3), ...) { object <- x cat("\nInformativeTesting: Order/Inequality Constrained Hypothesis Testing:\n\n") cat(" Variable names in model :", unlist(object$fit.A1@Data@ov.names[1]), "\n") cat(" Number of variables :", object$fit.A1@Model@nvar[1], "\n") cat(" Number of groups :", object$fit.A1@Data@ngroups, "\n") cat(" Used sample size per group :", unlist(object$fit.A1@Data@nobs), "\n") cat(" Used sample size :", sum(unlist(object$fit.A1@Data@nobs)), "\n") cat(" Total sample size :", sum(unlist(object$fit.A1@Data@norig)), "\n\n") cat(" Estimator :", object$fit.A1@Options$estimator, "\n") cat(" Missing data :", object$fit.A1@Options$missing, "\n") cat(" Bootstrap method :", object$type, "\n") cat(" Double bootstrap method :", object$double.bootstrap, "\n") dbtype <- object$double.bootstrap # original LRT for hypothesis test Type A TsA <- attr(object$lrt.bootA, "LRT.original") # original LRT for hypothesis test Type B TsB <- attr(object$lrt.bootB, "LRT.original") # unadjusted pvalues for Ts pvalueA <- object$lrt.bootA[1] pvalueB <- object$lrt.bootB[1] alpha <- object$double.bootstrap.alpha ### if (dbtype == "no") { cat("\n\n Type A test: H0: all restriktions active (=)", "\n", " vs. H1: at least one restriktion strictly true (>)", "\n") cat(" Test statistic: ", format(round(TsA, digits), nsmall = digits), ", unadjusted p-value: ", if (pvalueA < 1e-04) { "<0.0001" } else { format(round(pvalueA, digits), nsmall = digits)}, " (alpha = ", alpha, ") ", "\n\n", sep = "") cat(" Type B test: H0: all restriktions true", "\n", " vs. H1: at least one restriktion false", "\n") cat(" Test statistic: ", format(round(TsB, digits), nsmall = digits), ", unadjusted p-value: ", if (pvalueB < 1e-04) { "<0.0001" } else { format(round(pvalueB, digits), nsmall = digits)}, " (alpha = ", alpha, ") ", "\n", sep = "") } else if (dbtype == "FDB") { # adjusted pvalues for Ts adj.pvalueA <- attr(object$lrt.bootA, "adj.pvalue") adj.pvalueB <- attr(object$lrt.bootB, "adj.pvalue") cat("\n\n Type A test: H0: all restriktions active (=)", "\n", " vs. H1: at least one restriktion strictly true (>)", "\n") cat(" Test statistic: ", format(round(TsA, digits), nsmall = digits), ", adjusted p-value: ", if (adj.pvalueA < 1e-04) { "<0.0001" } else { format(round(adj.pvalueA, digits), nsmall = digits)}, " (alpha = ", alpha, ") ", "\n\n", sep = "") cat(" Type B test: H0: all restriktions true", "\n", " vs. H1: at least one restriktion false", "\n") cat(" Test statistic: ", format(round(TsB, digits), nsmall = digits), ", adjusted p-value: ", if (adj.pvalueB < 1e-04) { "<0.0001" } else { format(round(adj.pvalueB, digits), nsmall = digits)}, " (alpha = ", alpha, ") ", "\n", sep = "") } else if (dbtype == "standard") { # adjusted nominal levels adj.alphaA <- attr(object$lrt.bootA, "adj.alpha") adj.alphaB <- attr(object$lrt.bootB, "adj.alpha") # adjusted pvalues for Ts adj.pvalueA <- attr(object$lrt.bootA, "adj.pvalue") adj.pvalueB <- attr(object$lrt.bootB, "adj.pvalue") cat("\n\n Type A test: H0: all restriktions active (=)", "\n", " vs. H1: at least one restriktion strictly true (>)", "\n") cat(" Test statistic: ", format(round(TsA, digits), nsmall = digits), ", adjusted p-value: ", if (adj.pvalueA < 1e-04) { "<0.0001" } else { format(round(adj.pvalueA, digits), nsmall = digits)}, " (alpha = ", alpha, ") ", "\n", sep = "") cat(" ", "unadjusted p-value: ", if (pvalueA < 1e-04) { "<0.0001" } else { format(round(pvalueA, digits), nsmall = digits)}, " (alpha = ", format(round(adj.alphaA, digits), nsmall = digits), ") ", "\n\n", sep = "") cat(" Type B test: H0: all restriktions true", "\n", " vs. H1: at least one restriktion false", "\n") cat(" Test statistic: ", format(round(TsB, digits), nsmall = digits), ", adjusted p-value: ", if (adj.pvalueB < 1e-04) { "<0.0001" } else { format(round(adj.pvalueB, digits), nsmall = digits)}, " (alpha = ", alpha, ") ", "\n", sep = "") cat(" ", "unadjusted p-value: ", if (pvalueB < 1e-04) { "<0.0001" } else { format(round(pvalueB, digits), nsmall = digits)}, " (alpha = ", format(round(adj.alphaB, digits), nsmall = digits), ") ", "\n\n", sep = "") } if (dbtype == "no") { cat("\n No double bootstrap method is set. The results may be spurious.\n\n") } } plot.InformativeTesting <- function(x, ..., type = c("lr", "ppv"), main = "main", xlab = "xlabel", ylab = "Frequency", freq = TRUE, breaks = 15, cex.main = 1, cex.lab = 1, cex.axis = 1, col = "grey", border = par("fg"), vline = TRUE, vline.col = c("red", "blue"), lty = c(1,2), lwd = 1, legend = TRUE, bty = "o", cex.legend = 1, loc.legend = "topright") { object <- x return.LRT <- object$return.LRT double.bootstrap <- object$double.bootstrap double.bootstrap.alpha <- object$double.bootstrap.alpha pvalue <- c(object$lrt.bootA[1], object$lrt.bootB[1]) par(mfrow = c(1, 2)) if (length(type) == 2) { par(mfrow = c(2, 2)) } if (return.LRT && (type == "lr" || length(type) == 2)) { lrt.obs <- c(attr(object$lrt.bootA, "LRT.original"), attr(object$lrt.bootB, "LRT.original")) lrt.A <- attr(object$lrt.bootA, "LRT") lrt.B <- attr(object$lrt.bootB, "LRT") if (length(lrt.A) - length(lrt.B) < 0L) { lrt <- as.data.frame(cbind(c(lrt.A, rep(as.numeric(NA), length(lrt.B) - length(lrt.A))), lrt.B)) } else { lrt <- as.data.frame(cbind(lrt.A, c(lrt.B, rep(as.numeric(NA), length(lrt.A) - length(lrt.B))))) } names(lrt) <- c("lrt.A", " lrt.B") if (xlab == "xlabel") { xlab.lrt <- c("Bootstrapped LR values") } if (main == "main") { main.lrt <- c("Distr. of LR values - Type A", "Distr. of LR values - Type B") } for (i in 1:2) { plot <- hist(lrt[,i], plot = FALSE, breaks = breaks) plot(plot, ..., freq = freq, main = main.lrt[i], xlab = xlab.lrt, ylab = ylab, cex.axis = cex.axis, cex.main = cex.main, cex.lab = cex.lab, col = col, border = border, axes = FALSE, xaxt = "n") axis(side = 1) axis(side = 2) box(lty = 1, col = "black") if (vline) { abline(v = lrt.obs[i], col = vline.col[1], lty = lty[1], lwd = lwd) } if (legend) { ppvalue <- sprintf("%.2f", pvalue[i]) obs.lrt <- sprintf("%.2f", lrt.obs[i]) ppval <- paste0("plug-in p value = ", ppvalue) obs.lrt <- paste0("observed LR = ", obs.lrt) legend.obj <- c(obs.lrt, ppval) if (!vline) { legend(loc.legend, legend.obj, lty = c(0, 0), lwd = lwd, cex = cex.legend, bty = bty) } else { legend(loc.legend, legend.obj, lty = c(lty[1], 0), col = vline.col[1], lwd = lwd, cex = cex.legend, bty = bty) } } } } if (double.bootstrap == "standard" && (type == "ppv" || length(type) == 2)) { ppvalue.A <- attr(object$lrt.bootA, "plugin.pvalues") ppvalue.B <- attr(object$lrt.bootB, "plugin.pvalues") adj.a <- c(quantile(ppvalue.A, double.bootstrap.alpha), quantile(ppvalue.B, double.bootstrap.alpha)) adj.ppv <- c(attr(object$lrt.bootA, "adj.pvalue"), attr(object$lrt.bootB, "adj.pvalue")) if (length(ppvalue.A) - length(ppvalue.B) < 0L) { ppv <- as.data.frame(cbind(c(ppvalue.A, rep(NA, length(ppvalue.B) - length(ppvalue.A))), ppvalue.B)) } else { ppv <- as.data.frame(cbind(ppvalue.A, c(ppvalue.B, rep(NA, length(ppvalue.A) - length(ppvalue.B))))) } names(ppv) <- c("ppA", "ppB") if (xlab == "xlabel") { xlab.ppv <- c("Bootstrapped plug-in p-values") } if (main == "main") { main.ppv <- c("Distr. of plug-in p-values - Type A", "Distr. of plug-in p-values - Type B") } for (i in 1:2) { plot <- hist(ppv[,i], plot = FALSE, breaks=breaks) plot(plot, ..., freq = freq, main = main.ppv[i], xlab = xlab.ppv, ylab = ylab, cex.axis = cex.axis, cex.main = cex.main, cex.lab = cex.lab, col = col, border = border, axes = FALSE, xaxt = "n") axis(side = 1, at = seq(0,1,0.1)) axis(side = 2) box(lty = 1, col = "black") if (vline) { abline(v = adj.a[i], col = vline.col[1], lty = lty[1], lwd = lwd) abline(v = adj.ppv[i], col = vline.col[2], lty = lty[2], lwd = lwd) } if (legend) { adj.alpha <- sprintf("%.2f", adj.a[i]) adj.pval <- sprintf("%.2f", adj.ppv[i]) adja <- paste0("Adjusted alpha = ", adj.alpha) adjp <- paste0("Adjusted p-value = ", adj.pval) legend.obj <- c(adja, adjp) if (!vline) { legend(loc.legend, legend.obj, lty = 0, col = vline.col, lwd = lwd, cex = cex.legend, bty = bty) } else { legend(loc.legend, legend.obj, lty = lty, col = vline.col, lwd = lwd, cex = cex.legend, bty = bty) } } } } } lavaan/R/lav_model_lik.R0000644000176200001440000001476213043400667014656 0ustar liggesusers# casewise likelihoods # closed-form marginal likelihood # - classic SEM models, continous observed variables only lav_model_lik_ml <- function(lavmodel = NULL, GLIST = NULL, lavdata = NULL, lavsamplestats = NULL) { } # marginal ML lav_model_lik_mml <- function(lavmodel = NULL, THETA = NULL, TH = NULL, GLIST = NULL, group = 1L, lavdata = NULL, sample.mean = NULL, sample.mean.x = NULL, lavcache = NULL) { conditional.x <- lavmodel@conditional.x # data for this group X <- lavdata@X[[group]]; nobs <- nrow(X); nvar <- ncol(X) eXo <- lavdata@eXo[[group]] # MLIST (for veta and yhat) mm.in.group <- 1:lavmodel@nmat[group] + cumsum(c(0,lavmodel@nmat))[group] MLIST <- GLIST[ mm.in.group ] # quadrature points GH <- lavcache[[group]]$GH; nGH <- nrow(GH$x) nfac <- ncol(GH$x) # compute VETAx (latent lv only) lv.dummy.idx <- c(lavmodel@ov.y.dummy.lv.idx[[group]], lavmodel@ov.x.dummy.lv.idx[[group]]) VETAx <- computeVETAx.LISREL(MLIST = MLIST, lv.dummy.idx = lv.dummy.idx) #VETAx <- computeVETAx.LISREL(MLIST = MLIST) # check for negative values? if(any(diag(VETAx) < 0)) { warning("lavaan WARNING: --- VETAx contains negative values") print(VETAx) return(0) } # cholesky? #if(is.null(lavmodel@control$cholesky)) { CHOLESKY <- TRUE #} else { # CHOLESKY <- as.logical(lavmodel@control$cholesky) #if(nfac > 1L && !CHOLESKY) { # warning("lavaan WARNING: CHOLESKY is OFF but nfac > 1L") #} #} if(!CHOLESKY) { # we should still 'scale' the factors, if std.lv=FALSE ETA.sd <- sqrt( diag(VETAx) ) } else { # cholesky takes care of scaling tchol.VETA <- try(chol(VETAx), silent = TRUE) if(inherits(tchol.VETA, "try-error")) { warning("lavaan WARNING: --- VETAx not positive definite") print(VETAx) return(0) } if(!is.null(MLIST$alpha) || !is.null(MLIST$gamma)) { if(conditional.x) { EETAx <- computeEETAx.LISREL(MLIST = MLIST, eXo = eXo, N = nobs, sample.mean = sample.mean, ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[group]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[group]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[group]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[group]]) } else { EETA <- computeEETA.LISREL(MLIST = MLIST, mean.x = sample.mean.x, sample.mean = sample.mean, ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[group]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[group]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[group]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[group]]) } #if(length(lv.dummy.idx) > 0L) { # EETAx <- EETAx[,-lv.dummy.idx,drop=FALSE] #} } } # compute (log)lik for each node, for each observation SUM.LOG.FY <- matrix(0, nrow=nGH, ncol=nobs) for(q in 1:nGH) { # current value(s) for ETA #eta <- matrix(0, nrow = 1, ncol = ncol(MLIST$lambda)) # non-dummy elements -> quadrature points #eta[1L, -lv.dummy.idx] <- GH$x[q,,drop=FALSE] XQ <- GH$x[q,,drop=FALSE] # rescale/unwhiten if(CHOLESKY) { # un-orthogonalize XQ <- XQ %*% tchol.VETA } else { # no unit scale? (un-standardize) XQ <- sweep(XQ, MARGIN=2, STATS=ETA.sd, FUN="*") } eta <- matrix(0, nrow = 1, ncol = ncol(MLIST$lambda)) if(length(lv.dummy.idx) > 0L) { eta[, -lv.dummy.idx] <- XQ } else { eta <- XQ } # eta_i = alpha + BETA eta_i + GAMMA eta_i + error # # - direct effect of BETA is already in VETAx, and hence tchol.VETA # - need to add alpha, and GAMMA eta_i if(!is.null(MLIST$alpha) || !is.null(MLIST$gamma)) { if(conditional.x) { eta <- sweep(EETAx, MARGIN=2, STATS=eta, FUN="+") } else { eta <- eta + EETA } } # compute yhat for this node (eta) if(lavmodel@conditional.x) { yhat <- computeEYetax.LISREL(MLIST = MLIST, eXo = eXo, ETA = eta, sample.mean = sample.mean, ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[group]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[group]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[group]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[group]]) } else { yhat <- computeEYetax3.LISREL(MLIST = MLIST, ETA = eta, sample.mean = sample.mean, mean.x = sample.mean.x, ov.y.dummy.ov.idx = lavmodel@ov.y.dummy.ov.idx[[group]], ov.x.dummy.ov.idx = lavmodel@ov.x.dummy.ov.idx[[group]], ov.y.dummy.lv.idx = lavmodel@ov.y.dummy.lv.idx[[group]], ov.x.dummy.lv.idx = lavmodel@ov.x.dummy.lv.idx[[group]]) } # compute fy.var, for this node (eta): P(Y_i = y_i | eta_i, x_i) log.fy.var <- lav_predict_fy_internal(X = X, yhat = yhat, TH = TH, THETA = THETA, num.idx = lavmodel@num.idx[[group]], th.idx = lavmodel@th.idx[[group]], link = lavmodel@link, log. = TRUE) # if log, fy is just the sum of log.fy.var log.fy <- apply(log.fy.var, 1L, sum) # store log likelihoods for this node SUM.LOG.FY[q,] <- log.fy } # integration lik <- as.numeric( t(GH$w) %*% exp(SUM.LOG.FY) ) # avoid underflow idx <- which(lik < exp(-600)) if(length(idx) > 0L) { lik[idx] <- exp(-600) } lik } lavaan/R/lav_tables_mvb.R0000644000176200001440000000644212465075714015041 0ustar liggesusers# tools for the multivariate Bernoulli distribution # # see: # # Maydeu-Olivares & Joe (2005). Limited- and Full-Information Estimation and # Goodness-of-Fit Testing in 2^n Contingency Tables: A Unified Framework. # Journal of the American Statistical Association, 100, 1009--1020. # YR. 15 April 2014 -- first version # compute higher-order joint moments (Teugels 1991) # PROP must be an array, with dim = rep(2L, nitems) lav_tables_mvb_getPiDot <- function(PROP, order. = nitems) { # number of items/dimensions nitems <- length(dim(PROP)) # compute 'pi dot' up to order = order. pidot <- unlist( lapply(1:order., function(Order) { IDX <- utils::combn(1:nitems, Order) tmp <- apply(IDX, 2L, function(idx) as.numeric(apply(PROP, idx, sum))[1L]) tmp }) ) pidot } # compute 'T' matrix, so that pidot = T %*% prop lav_tables_mvb_getT <- function(nitems = 3L, order. = nitems, rbind. = FALSE) { # index matrix INDEX <- array(1:(2^nitems), dim = rep(2L, nitems)) T.r <- lapply(1:order., function(Order) { IDX <- utils::combn(1:nitems, Order) TT <- matrix(0L, ncol(IDX), 2^nitems) TT <- do.call("rbind", lapply(1:ncol(IDX), function(i) { TRue <- as.list(rep(TRUE, nitems)); TRue[ IDX[,i] ] <- 1L ARGS <- c(list(INDEX), TRue) T1 <- integer( 2^nitems ) T1[ as.vector(do.call("[", ARGS)) ] <- 1L T1 })) TT }) if(rbind.) { T.r <- do.call("rbind", T.r) } T.r } # simple test function to check that pidot = T %*% prop lav_tables_mvb_test <- function(nitems = 3L, verbose = FALSE) { freq <- sample( 5:50, 2^nitems, replace=TRUE) prop <- freq/sum(freq) TABLE <- array(freq, dim=rep(2, nitems)) PROP <- array(prop, dim=rep(2, nitems)) # note: freq is always as.numeric(TABLE) # prop is always as.numeric(PROP) pidot <- lav_tables_mvb_getPiDot(PROP) T.r <- lav_tables_mvb_getT(nitems = nitems, order. = nitems, rbind. = TRUE) if(verbose) { out <- cbind(as.numeric(T.r %*% prop), pidot) colnames(out) <- c("T * prop", "pidot") print(out) } all.equal(pidot, as.numeric(T.r %*% prop)) } # L_r test of Maydeu-Olivares & Joe (2005) eq (4) lav_tables_mvb_Lr <- function(nitems = 0L, obs.prop = NULL, est.prop = NULL, nobs = 0L, order. = 2L) { # recreate tables obs.PROP <- array(obs.prop, dim = rep(2L, nitems)) est.PROP <- array(est.prop, dim = rep(2L, nitems)) # compute {obs,est}.prop.dot obs.prop.dot <- lav_tables_mvb_getPiDot(obs.PROP, order. = order.) est.prop.dot <- lav_tables_mvb_getPiDot(est.PROP, order. = order.) # compute T.r T.r <- lav_tables_mvb_getT(nitems = nitems, order. = order., rbind. = TRUE) # compute GAMMA based on est.prop GAMMA <- diag(est.prop) - tcrossprod(est.prop) # compute XI XI <- T.r %*% GAMMA %*% t(T.r) # compute Lr diff.dot <- obs.prop.dot - est.prop.dot Lr <- as.numeric(nobs * t(diff.dot) %*% solve(XI) %*% diff.dot) df <- 2^nitems - 1L p.value <- 1 - pchisq(Lr, df = df) # return list list(Lr = Lr, df = df, p.value = p.value) } lavaan/R/ctr_pairwise_fit.R0000644000176200001440000001343712771473712015420 0ustar liggesusers# This code is written by YR (using lavaan components), but based on # research code written by Mariska Barendse (Groningen/Amsterdam, NL) # # September 2013 # # Three fit indices for the PML estimator (if all categorical, no exo) # - Cp(max) # - CF # - CM # FIXME: how to handle multiple groups?? # Mariska Barendse Cp statistic #lav_tables_fit_Cp <- function(object, alpha = 0.05) { # # out <- lavTablesFit(object, statistic = "G2", p.value = TRUE) # # # Bonferonni adjusted p-value # ntests <- length(out$lhs) # out$alpha.adj <- alpha / ntests # #out$pval <- pchisq(out$G2, df=out$df, lower.tail = FALSE) # # # remove G2.h0.pval # #out$G2.h0.pval <- NULL # # out #} lavTablesFitCp <- function(object, alpha = 0.05) { lavdata <- object@Data if(!all(lavdata@ov$type == "ordered")) { return(list(G2=as.numeric(NA), df=as.numeric(NA), p.value=as.numeric(NA), p.value.Bonferroni=as.numeric(NA))) } TF <- lavTables(object, dimension = 2L, type = "table", statistic = "G2", p.value = TRUE) # Bonferonni adjusted p-value ntests <- length(TF$lhs) TF$alpha.adj <- alpha / ntests out <- subset(TF, TF$G2.pval < TF$alpha.adj) # find largest G2 max.idx <- which(TF$G2 == max(TF$G2)) extra <- list(G2=unname(TF$G2[max.idx]), df=unname(TF$df[max.idx]), lhs=TF$lhs[max.idx], rhs=TF$rhs[max.idx], group=TF$group[max.idx], p.value=unname(TF$G2.pval[max.idx]), ntests=ntests, p.value.Bonferroni=unname(TF$G2.pval[max.idx]*length(TF$lhs))) attr(out, "CpMax") <- extra class(out) <- c("lavaan.tables.fit.Cp", "lavaan.data.frame", "data.frame") out } print.lavaan.tables.fit.Cp <- function(x, ...) { cat("CP-values that are significant at a Bonferroni adjusted level of significance\n") tmp <- x class(tmp) <- c("lavaan.data.frame", "data.frame") print(tmp) } # Mariska Barendse CF statistic lavTablesFitCf <- function(object) { # check object class if(!inherits(object, "lavaan")) { stop("lavaan ERROR: object must be an object of class lavaan") } lavdata <- object@Data lavpta <- object@pta lavmodel <- object@Model lavcache <- object@Cache implied <- object@implied CF.group <- rep(as.numeric(NA), lavdata@ngroups) DF.group <- rep(as.numeric(NA), lavdata@ngroups) # check if all ordered if(!all(lavdata@ov$type == "ordered")) { CF <- as.numeric(NA) attr(CF, "CF.group") <- CF.group attr(CF, "DF.group") <- DF.group return(CF) } # ord var in this group ov.ord <- unique(unlist(lavpta$vnames$ov.ord)) ov.idx <- which(ov.ord %in% lavdata@ov$name) ov.nlev <- lavdata@ov$nlev[ ov.idx ] Sigma.hat <- if(lavmodel@conditional.x) implied$res.cov else implied$cov TH <- if(lavmodel@conditional.x) implied$res.th else implied$th DF <- prod(ov.nlev) - object@optim$npar - 1L for(g in seq_len(lavdata@ngroups)) { F.group <- estimator.FML(Sigma.hat = Sigma.hat[[g]], TH = TH[[g]], th.idx = lavmodel@th.idx[[g]], num.idx = lavmodel@num.idx[[g]], X = lavdata@X[[g]], lavcache = lavcache[[g]]) CF.group[g] <- 2*lavdata@nobs[[g]]*F.group } # check for negative values CF.group[CF.group < 0] <- 0.0 # global test statistic CF <- sum(CF.group) attr(CF, "CF.group") <- CF.group attr(CF, "DF") <- DF attr(CF, "rpat.observed") <- sapply(lavdata@Rp, "[[", "npatterns") attr(CF, "rpat.total") <- sapply(lavdata@Rp, "[[", "total.patterns") attr(CF, "rpat.empty") <- sapply(lavdata@Rp, "[[", "empty.patterns") class(CF) <- c("lavaan.tables.fit.Cf", "numeric") CF } print.lavaan.tables.fit.Cf <- function(x, ...) { cat("Total response patterns: ", attr(x, "rpat.total"), "\n") cat("Observed response patterns: ", attr(x, "rpat.observed"), "\n") cat("Empty response patterns: ", attr(x, "rpat.empty"), "\n") cat("Cf results may be biased because of large numbers of empty cells in the multivariate contingency table\n") cat("Cf-value, overall:\n") CF <- unclass(x); attributes(CF) <- NULL print(CF) CF.group <- attr(x, "CF.group") if(length(CF.group) > 1L) { cat("Cf-value, per group:\n") print(CF.group) } cat("Degrees of freedom\n") print(attr(x, "DF")) } lavTablesFitCm <- function(object) { lavdata <- object@Data lavoptions <- object@Options CF.h0 <- lavTablesFitCf(object) # fit unrestricted model h1 <- lavCor(lavdata, estimator = lavoptions$estimator, se = "none", test = "none", output = "lavaan") CF.h1 <- lavTablesFitCf(h1) CF.h0.group <- attr(CF.h0, "CF.group") CF.h1.group <- attr(CF.h1, "CF.group") DF.h0 <- attr(CF.h0, "DF") DF.h1 <- attr(CF.h1, "DF") attributes(CF.h0) <- NULL attributes(CF.h1) <- NULL CM <- CF.h0 - CF.h1 attr(CM, "CM.group") <- CF.h0.group - CF.h1.group attr(CM, "DF") <- DF.h0 - DF.h1 class(CM) <- c("lavaan.tables.fit.Cm", "numeric") CM } print.lavaan.tables.fit.Cm <- function(x, ...) { #cat("The percentage of empty cells\n") #weet niet goed want FML werkt niet #cat("CM results may be a little biased because of large numbers of empty cells in the multivariate contingency table\n") cat("Cm-value, overall:\n") CM <- unclass(x); attributes(CM) <- NULL print(CM) CM.group <- attr(x, "CM.group") if(length(CM.group) > 1L) { cat("Cm-value, per group:\n") print(CM.group) } cat("Degrees of freedom:\n") print(attr(x, "DF")) } lavaan/R/lav_data.R0000644000176200001440000010305513054004370013613 0ustar liggesusers# constructor for the 'lavData' class # # the lavData class describes how the data looks like # - do we have a full data frame, or only sample statistics? # (TODO: allow for patterns + freq, if data is categorical) # - variable type ("numeric", "ordered", ...) # - how many groups, how many observations, ... # - what about missing patterns? # # initial version: YR 14 April 2012 # YR 23 feb 2017: blocks/levels/groups, but everything is group-based! # FIXME: if nlevels > 1L, and ngroups > 1L, we should check that # group is at the upper-level # extract the data we need for this particular model lavData <- function(data = NULL, # data.frame group = NULL, # multiple groups? cluster = NULL, # clusters? ov.names = NULL, # variables needed in model ov.names.x = character(0), # exo variables ordered = NULL, # ordered variables sample.cov = NULL, # sample covariance(s) sample.mean = NULL, # sample mean vector(s) sample.nobs = NULL, # sample nobs lavoptions = lavOptions(), # lavoptions allow.single.case = FALSE # allow single case (for newdata in predict) ) { # get info from lavoptions # group.labels group.label <- lavoptions$group.label if(is.null(group.label)) { group.label <- character(0L) } # level.labels level.label <- lavoptions$level.label if(is.null(level.label)) { level.label <- character(0L) } # std.ov? std.ov <- lavoptions$std.ov if(is.null(std.ov)) { std.ov <- FALSE } # missing? missing <- lavoptions$missing if(is.null(missing) || missing == "default") { missing <- "listwise" } # warn? warn <- lavoptions$warn if(is.null(warn)) { warn <- TRUE } # four scenarios: # 0) data is already a lavData object: do nothing # 1) data is full data.frame (or a matrix) # 2) data are sample statistics only # 3) no data at all # 1) full data if(!is.null(data)) { # catch lavaan/lavData objects if(inherits(data, "lavData")) { return(data) } else if(inherits(data, "lavaan")) { return(data@Data) } # catch matrix if(!is.data.frame(data)) { # is it a matrix? if(is.matrix(data)) { if(nrow(data) == ncol(data)) { # perhaps it is a covariance matrix? stop("lavaan WARNING: data argument looks like a covariance matrix; please use the sample.cov argument instead") } else { # or perhaps it is a data matrix? #if(warn) { # warning("lavaan WARNING: data argument has been coerced to a data.frame") #} ### FIXME, we should avoid as.data.frame() and handle ### data matrices directly data <- as.data.frame(data, stringsAsFactors = FALSE) } } else { stop("lavaan ERROR: data object of class ", class(data)) } } lavData <- lav_data_full(data = data, group = group, cluster = cluster, group.label = group.label, level.label = level.label, ov.names = ov.names, ordered = ordered, ov.names.x = ov.names.x, std.ov = std.ov, missing = missing, warn = warn, allow.single.case = allow.single.case) sample.cov <- NULL # not needed, but just in case } # 2) sample moments if(is.null(data) && !is.null(sample.cov)) { # for now: no levels!! nlevels <- 1L # we also need the number of observations (per group) if(is.null(sample.nobs)) stop("lavaan ERROR: please specify number of observations") # list? if(is.list(sample.cov)) { # multiple groups, multiple cov matrices if(!is.null(sample.mean)) { stopifnot(length(sample.mean) == length(sample.cov)) } # multiple groups, multiple cov matrices ngroups <- length(sample.cov) LABEL <- names(sample.cov) if(is.null(group.label) || length(group.label) == 0L) { if(is.null(LABEL)) group.label <- paste("Group ", 1:ngroups, sep="") else group.label <- LABEL } else { if(is.null(LABEL)) { stopifnot(length(group.label) == ngroups) } else { # FIXME!!!! # check if they match } } } else { ngroups <- 1L; group.label <- character(0) if(!is.matrix(sample.cov)) stop("lavaan ERROR: sample.cov must be a matrix or a list of matrices") sample.cov <- list(sample.cov) } # get ov.names if (is.null(ov.names)) { ov.names <- lapply(sample.cov, row.names) } else if (!is.list(ov.names)) { # duplicate ov.names for each group tmp <- ov.names; ov.names <- vector("list", length = ngroups) ov.names[1:ngroups] <- list(tmp) } else { if (length(ov.names) != ngroups) stop("lavaan ERROR: ov.names assumes ", length(ov.names), " groups; data contains ", ngroups, " groups") # nothing to do } # handle ov.names.x if(!is.list(ov.names.x)) { tmp <- ov.names.x; ov.names.x <- vector("list", length = ngroups) ov.names.x[1:ngroups] <- list(tmp) } else { if(length(ov.names.x) != ngroups) stop("lavaan ERROR: ov.names.x assumes ", length(ov.names.x), " groups; data contains ", ngroups, " groups") } ov <- list() ov$name <- unique( unlist(c(ov.names, ov.names.x)) ) nvar <- length(ov$name) ov$idx <- rep(NA, nvar) ov$nobs <- rep(sample.nobs, nvar) ov$type <- rep("numeric", nvar) # if std.ov = TRUE, give a warning (suggested by Peter Westfall) if(std.ov) { warning("lavaan WARNING: std.ov argument is ignored if only sample statistics are provided.") } # construct lavData object lavData <- new("lavData", data.type = "moment", ngroups = ngroups, group = character(0L), nlevels = 1L, # for now cluster = character(0L), group.label = group.label, level.label = character(0L), nobs = as.list(sample.nobs), norig = as.list(sample.nobs), ov.names = ov.names, ov.names.x = ov.names.x, ordered = as.character(ordered), ov = ov, std.ov = FALSE, missing = "listwise", case.idx = vector("list", length = ngroups), Mp = vector("list", length = ngroups), Rp = vector("list", length = ngroups), Lp = vector("list", length = ngroups), X = vector("list", length = ngroups), eXo = vector("list", length = ngroups) ) } # 3) data.type = "none": both data and sample.cov are NULL if(is.null(data) && is.null(sample.cov)) { # no levels nlevels <- 1L if(is.null(sample.nobs)) sample.nobs <- 0L sample.nobs <- as.list(sample.nobs) ngroups <- length(unlist(sample.nobs)) if(ngroups > 1L) { group.label <- paste("Group ", 1:ngroups, sep="") } else { group.label <- character(0) } # handle ov.names if(is.null(ov.names)) { warning("lavaan WARNING: ov.names is NULL") ov.names <- character(0L) } if(!is.list(ov.names)) { tmp <- ov.names; ov.names <- vector("list", length = ngroups) ov.names[1:ngroups] <- list(tmp) } else { if (length(ov.names) != ngroups) stop("lavaan ERROR: ov.names assumes ", length(ov.names), " groups; sample.nobs suggests ", ngroups, " groups") # nothing to do } # handle ov.names.x if(!is.list(ov.names.x)) { tmp <- ov.names.x; ov.names.x <- vector("list", length = ngroups) ov.names.x[1:ngroups] <- list(tmp) } ov <- list() ov$name <- unique( unlist(c(ov.names, ov.names.x)) ) nvar <- length(ov$name) ov$idx <- rep(NA, nvar) ov$nobs <- rep(0L, nvar) ov$type <- rep("numeric", nvar) # construct lavData object lavData <- new("lavData", data.type = "none", ngroups = ngroups, group = character(0L), nlevels = 1L, # for now cluster = character(0L), group.label = group.label, level.label = character(0L), nobs = sample.nobs, norig = sample.nobs, ov.names = ov.names, ov.names.x = ov.names.x, ordered = as.character(ordered), ov = ov, missing = "listwise", case.idx = vector("list", length = ngroups), Mp = vector("list", length = ngroups), Rp = vector("list", length = ngroups), Lp = vector("list", length = ngroups), X = vector("list", length = ngroups), eXo = vector("list", length = ngroups) ) } lavData } # handle full data lav_data_full <- function(data = NULL, # data.frame group = NULL, # multiple groups? cluster = NULL, group.label = NULL, # custom group labels? level.label = NULL, ov.names = NULL, # variables needed # in model ordered = NULL, # ordered variables ov.names.x = character(0), # exo variables std.ov = FALSE, # standardize ov's? missing = "listwise", # remove missings? warn = TRUE, # produce warnings? allow.single.case = FALSE # allow single case? ) { # number of groups and group labels if(!is.null(group) && length(group) > 0L) { if(!(group %in% names(data))) { stop("lavaan ERROR: grouping variable ", sQuote(group), " not found;\n ", "variable names found in data frame are:\n ", paste(names(data), collapse=" ")) } # note: by default, we use the order as in the data; # not as in levels(data[,group]) if(length(group.label) == 0L) { group.label <- unique(as.character(data[[group]])) if(warn && any(is.na(group.label))) { warning("lavaan WARNING: group variable ", sQuote(group), " contains missing values\n", sep="") } group.label <- group.label[!is.na(group.label)] } else { group.label <- unique(as.character(group.label)) # check if user-provided group labels exist LABEL <- unique(as.character(data[[group]])) idx <- match(group.label, LABEL) if(warn && any(is.na(idx))) { warning("lavaan WARNING: some group.labels do not appear ", "in the grouping variable: ", paste(group.label[which(is.na(idx))], collapse=" ")) } group.label <- group.label[!is.na(idx)] # any groups left? if(length(group.label) == 0L) stop("lavaan ERROR: no group levels left; check the group.label argument") } ngroups <- length(group.label) } else { if(warn && length(group.label) > 0L) warning("lavaan WARNING: `group.label' argument", " will be ignored if `group' argument is missing") ngroups <- 1L group.label <- character(0L) group <- character(0L) } # cluster # number of levels and level labels if(!is.null(cluster) && length(cluster) > 0L) { # cluster variable in data? if(!all(cluster %in% names(data))) { # which one did we not find? not.ok <- which(!cluster %in% names(data)) stop("lavaan ERROR: cluster variable(s) ", sQuote(cluster[not.ok]), " not found;\n ", "variable names found in data frame are:\n ", paste(names(data), collapse = " ")) } # default level.labels if(length(level.label) == 0L) { level.label <- c("within", cluster) } else { # check if length(level.label) = 1 + length(cluster) if(length(level.label) != length(cluster) + 1L) { stop("lavaan ERROR: length(level.label) != length(cluster) + 1L") } # nothing to do } # check for missing values in cluster variable(s) for(cl in 1:length(cluster)) { if(warn && any(is.na(data[[cluster[cl]]]))) { warning("lavaan WARNING: cluster variable ", sQuote(cluster[cl]), " contains missing values\n", sep = "") } } nlevels <- length(level.label) } else { if(warn && length(level.label) > 0L) warning("lavaan WARNING: `level.label' argument", " will be ignored if `cluster' argument is missing") nlevels <- 1L level.label <- character(0L) cluster <- character(0L) } # ov.names (still needed???) if(is.null(ov.names)) { ov.names <- names(data) # remove 'group' name from ov.names if(length(group) > 0L) { group.idx <- which(ov.names == group) ov.names <- ov.names[-group.idx] } # remove 'cluster' names from ov.names if(length(cluster) > 0L) { cluster.idx <- which(ov.names %in% cluster) ov.names <- ov.names[-cluster.idx] } } # check ov.names vs ngroups if(ngroups > 1L) { if(is.list(ov.names)) { if(length(ov.names) != ngroups) stop("lavaan ERROR: ov.names assumes ", length(ov.names), " groups; data contains ", ngroups, " groups") } else { tmp <- ov.names ov.names <- vector("list", length = ngroups) ov.names[1:ngroups] <- list(tmp) } if(is.list(ov.names.x)) { if(length(ov.names.x) != ngroups) stop("lavaan ERROR: ov.names assumes ", length(ov.names.x), " groups; data contains ", ngroups, " groups") } else { tmp <- ov.names.x ov.names.x <- vector("list", length = ngroups) ov.names.x[1:ngroups] <- list(tmp) } } else { if(is.list(ov.names)) { if(length(ov.names) > 1L) stop("lavaan ERROR: model syntax defines multiple groups; data suggests a single group") } else { ov.names <- list(ov.names) } if(is.list(ov.names.x)) { if(length(ov.names.x) > 1L) stop("lavaan ERROR: model syntax defines multiple groups; data suggests a single group") } else { ov.names.x <- list(ov.names.x) } } # check if all ov.names can be found in the data.frame for(g in 1:ngroups) { # does the data contain all the observed variables # needed in the user-specified model for this group ov.all <- unique(ov.names[[g]], ov.names.x[[g]]) # no overlap if categ # handle interactions ov.int.names <- ov.all[ grepl(":", ov.all) ] n.int <- length(ov.int.names) if(n.int > 0L) { ov.names.noint <- ov.all[!ov.all %in% ov.int.names] for(iv in seq_len(n.int)) { NAMES <- strsplit(ov.int.names[iv], ":", fixed = TRUE)[[1L]] if(all(NAMES %in% ov.names.noint)) { # add this interaction term to the data.frame, unless # it already exists if(is.null(data[[ ov.int.names[iv] ]])) { data[[ ov.int.names[iv] ]] <- data[[NAMES[1L]]] * data[[NAMES[2L]]] } } } } # check for missing obsered variables idx.missing <- which(!(ov.all %in% names(data))) if(length(idx.missing)) { stop("lavaan ERROR: missing observed variables in dataset: ", paste(ov.all[idx.missing], collapse=" ")) } } # here, we know for sure all ov.names exist in the data.frame # create varTable # FIXME: should we add the 'group'/'cluster' variable (no for now) ov <- lav_dataframe_vartable(frame = data, ov.names = ov.names, ov.names.x = ov.names.x, ordered = ordered, as.data.frame. = FALSE) # do some checking # check for unordered factors (but only if nlev > 2) if("factor" %in% ov$type) { f.names <- ov$name[ov$type == "factor" & ov$nlev > 2L] if(warn && any(f.names %in% unlist(ov.names))) warning(paste("lavaan WARNING: unordered factor(s) with more than 2 levels detected in data:", paste(f.names, collapse=" "))) } # check for ordered exogenous variables if("ordered" %in% ov$type[ov$name %in% unlist(ov.names.x)]) { f.names <- ov$name[ov$type == "ordered" & ov$name %in% unlist(ov.names.x)] if(warn && any(f.names %in% unlist(ov.names.x))) warning(paste("lavaan WARNING: exogenous variable(s) declared as ordered in data:", paste(f.names, collapse=" "))) } # check for zero-cases idx <- which(ov$nobs == 0L | ov$var == 0) if(length(idx) > 0L) { OV <- as.data.frame(ov) rn <- rownames(OV) rn[idx] <- paste(rn[idx], "***", sep="") rownames(OV) <- rn print(OV) stop("lavaan ERROR: some variables have no values (only missings) or no variance") } # check for single cases (no variance!) idx <- which(ov$nobs == 1L | (ov$type == "numeric" & !is.finite(ov$var))) if(!allow.single.case && length(idx) > 0L) { OV <- as.data.frame(ov) rn <- rownames(OV) rn[idx] <- paste(rn[idx], "***", sep="") rownames(OV) <- rn print(OV) stop("lavaan ERROR: some variables have only 1 observation or no finite variance") } # check for ordered variables with only 1 level idx <- which(ov$type == "ordered" & ov$nlev == 1L) if(length(idx) > 0L) { OV <- as.data.frame(ov) rn <- rownames(OV) rn[idx] <- paste(rn[idx], "***", sep="") rownames(OV) <- rn print(OV) stop("lavaan ERROR: ordered variable(s) has/have only 1 level") } # check for mix small/large variances (NOT including exo variables) if(!std.ov && !allow.single.case && warn && any(ov$type == "numeric")) { num.idx <- which(ov$type == "numeric" & ov$exo == 0L) if(length(num.idx) > 0L) { min.var <- min(ov$var[num.idx]) max.var <- max(ov$var[num.idx]) rel.var <- max.var/min.var if(rel.var > 1000) { warning("lavaan WARNING: some observed variances are (at least) a factor 1000 times larger than others; use varTable(fit) to investigate") } } } # check for all-exogenous variables (eg in f <~ x1 + x2 + x3) if(all(ov$exo == 1L)) { warning("lavaan WARNING: all observed variables are exogenous; model may not be identified") } # prepare empty lists # group-based case.idx <- vector("list", length = ngroups) Mp <- vector("list", length = ngroups) Rp <- vector("list", length = ngroups) norig <- vector("list", length = ngroups) nobs <- vector("list", length = ngroups) X <- vector("list", length = ngroups) eXo <- vector("list", length = ngroups) Lp <- vector("list", length = ngroups) # collect information per upper-level group for(g in 1:ngroups) { # extract variables in correct order ov.idx <- ov$idx[match(ov.names[[g]], ov$name)] exo.idx <- ov$idx[match(ov.names.x[[g]], ov$name)] all.idx <- unique(c(ov.idx, exo.idx)) # extract cases per group if(ngroups > 1L || length(group.label) > 0L) { if(missing == "listwise") { case.idx[[g]] <- which(data[[group]] == group.label[g] & complete.cases(data[all.idx])) nobs[[g]] <- length(case.idx[[g]]) norig[[g]] <- length(which(data[[group]] == group.label[g])) } else if(missing == "pairwise" && length(exo.idx) > 0L) { case.idx[[g]] <- which(data[[group]] == group.label[g] & complete.cases(data[exo.idx])) nobs[[g]] <- length(case.idx[[g]]) norig[[g]] <- length(which(data[[group]] == group.label[g])) } else { case.idx[[g]] <- which(data[[group]] == group.label[g]) nobs[[g]] <- norig[[g]] <- length(case.idx[[g]]) } } else { if(missing == "listwise") { case.idx[[g]] <- which(complete.cases(data[all.idx])) nobs[[g]] <- length(case.idx[[g]]) norig[[g]] <- nrow(data) } else if(missing == "pairwise" && length(exo.idx) > 0L) { case.idx[[g]] <- which(complete.cases(data[exo.idx])) nobs[[g]] <- length(case.idx[[g]]) norig[[g]] <- nrow(data) } else { case.idx[[g]] <- 1:nrow(data) nobs[[g]] <- norig[[g]] <- length(case.idx[[g]]) } } # extract data X[[g]] <- data.matrix( data[case.idx[[g]], ov.idx, drop = FALSE] ) dimnames(X[[g]]) <- NULL ### copy? # construct integers for user-declared 'ordered' factors # FIXME: is this really (always) needed??? # (but still better than doing lapply(data[,idx], ordered) which # generated even more copies) user.ordered.names <- ov$name[ov$type == "ordered" & ov$user == 1L] user.ordered.idx <- which(ov.names[[g]] %in% user.ordered.names) if(length(user.ordered.idx) > 0L) { for(i in user.ordered.idx) { X[[g]][,i] <- as.numeric(as.factor(X[[g]][,i])) } } ## FIXME: ## - why also in X? (for samplestats, for now) if(length(exo.idx) > 0L) { eXo[[g]] <- data.matrix(data[case.idx[[g]], exo.idx, drop = FALSE]) dimnames(eXo[[g]]) <- NULL } else { eXo[g] <- list(NULL) } # standardize observed variables? numeric only! if(std.ov) { num.idx <- which(ov.names[[g]] %in% ov$name & ov$type == "numeric") if(length(num.idx) > 0L) { X[[g]][,num.idx] <- scale(X[[g]][,num.idx,drop = FALSE])[,,drop = FALSE] # three copies are made!!!!! } if(length(exo.idx) > 0L) { eXo[[g]] <- scale(eXo[[g]])[,,drop = FALSE] } } # missing data if(missing != "listwise") { # get missing patterns Mp[[g]] <- lav_data_missing_patterns(X[[g]], sort.freq = TRUE, coverage = TRUE) # checking! if(length(Mp[[g]]$empty.idx) > 0L) { empty.case.idx <- Mp[[g]]$empty.idx if(warn) { warning("lavaan WARNING: some cases are empty and will be ignored:\n ", paste(empty.case.idx, collapse=" ")) } } if(warn && any(Mp[[g]]$coverage < 0.1)) { warning("lavaan WARNING: due to missing values, some pairwise combinations have less than 10% coverage") } # in case we had observations with only missings nobs[[g]] <- NROW(X[[g]]) - length(Mp[[g]]$empty.idx) } # response patterns (categorical only, no exogenous variables) all.ordered <- all(ov.names[[g]] %in% ov$name[ov$type == "ordered"]) if(length(exo.idx) == 0L && all.ordered) { Rp[[g]] <- lav_data_resp_patterns(X[[g]]) } # warn if we have a small number of observations (but NO error!) if( !allow.single.case && warn && nobs[[g]] < (nvar <- length(ov.idx)) ) { txt <- "" if(ngroups > 1L) txt <- paste(" in group ", g, sep="") warning("lavaan WARNING: small number of observations (nobs < nvar)", txt, "\n nobs = ", nobs[[g]], " nvar = ", nvar) } # cluster information if(nlevels > 1L) { # extract cluster variable(s), for this group clus <- data.matrix(data[case.idx[[g]], cluster]) Lp[[g]] <- lav_data_cluster_patterns(Y = X[[g]], clus = clus, cluster = cluster) } } # groups, at first level lavData <- new("lavData", data.type = "full", ngroups = ngroups, group = group, nlevels = nlevels, cluster = cluster, group.label = group.label, level.label = level.label, std.ov = std.ov, nobs = nobs, norig = norig, ov.names = ov.names, ov.names.x = ov.names.x, #ov.types = ov.types, #ov.idx = ov.idx, ordered = as.character(ordered), ov = ov, case.idx = case.idx, missing = missing, X = X, eXo = eXo, Mp = Mp, Rp = Rp, Lp = Lp ) lavData } # get missing patterns lav_data_missing_patterns <- function(Y, sort.freq = FALSE, coverage = FALSE) { # construct TRUE/FALSE matrix: TRUE if value is observed OBS <- !is.na(Y) # empty cases empty.idx <- which(rowSums(OBS) == 0L) # this is what we did in < 0.6 #if(length(empty.idx) > 0L) { # OBS <- OBS[-empty.idx,,drop = FALSE] #} # pattern of observed values per observation case.id <- apply(1L * OBS, 1L, paste, collapse = "") # remove empty patterns if(length(empty.idx)) { case.id.nonempty <- case.id[-empty.idx] } else { case.id.nonempty <- case.id } # sort non-empty patterns (from high occurence to low occurence) if(sort.freq) { TABLE <- sort(table(case.id.nonempty), decreasing = TRUE) } else { TABLE <- table(case.id.nonempty) } # unique pattern ids pat.id <- names(TABLE) # number of patterns pat.npatterns <- length(pat.id) # case idx per pattern pat.case.idx <- lapply(seq_len(pat.npatterns), function(p) which(case.id == pat.id[p])) # unique pattern frequencies pat.freq <- as.integer(TABLE) # first occurrence of each pattern pat.first <- match(pat.id, case.id) # TRUE/FALSE for each pattern pat.obs <- OBS[pat.first,,drop = FALSE] # observed per pattern Mp <- list(npatterns = pat.npatterns, id = pat.id, freq = pat.freq, case.idx = pat.case.idx, pat = pat.obs, empty.idx = empty.idx) if(coverage) { # FIXME: if we have empty cases, include them in N? # no for now Mp$coverage <- crossprod(OBS) / sum(pat.freq) #Mp$coverage <- crossprod(OBS) / NROW(Y) } Mp } # get response patterns (ignore empty cases!) lav_data_resp_patterns <- function(Y) { # construct TRUE/FALSE matrix: TRUE if value is observed OBS <- !is.na(Y) # empty cases empty.idx <- which(rowSums(OBS) == 0L) # removeYempty cases if(length(empty.idx) > 0L) { Y <- Y[-empty.idx,,drop = FALSE] } ntotal <- nrow(Y); nvar <- ncol(Y) # identify, label and sort response patterns id <- apply(Y, MARGIN = 1, paste, collapse = "") # sort patterns (from high occurence to low occurence) TABLE <- sort(table(id), decreasing = TRUE) order <- names(TABLE) npatterns <- length(TABLE) pat <- Y[match(order, id), , drop = FALSE] row.names(pat) <- as.character(TABLE) # handle NA? Y[is.na(Y)] <- -9 total.patterns <- prod(apply(Y, 2, function(x) length(unique(x)))) empty.patterns <- total.patterns - npatterns # return a list #out <- list(nobs=ntotal, nvar=nvar, # id=id, npatterns=npatterns, # order=order, pat=pat) # only return pat out <- list(npatterns=npatterns, pat=pat, total.patterns=total.patterns, empty.patterns=empty.patterns) out } # get cluster information # - cluster can be a vector! # - clus can contain multiple columns! lav_data_cluster_patterns <- function(Y = NULL, clus = NULL, cluster = NULL) { # how many levels? nlevels <- length(cluster) # check clus stopifnot(ncol(clus) == nlevels, nrow(Y) == nrow(clus)) cluster.size <- vector("list", length = nlevels) cluster.id <- vector("list", length = nlevels) nclusters <- vector("list", length = nlevels) # for each clustering variable for(l in 1:nlevels) { cluster.size[[l]] <- as.integer(table(clus[,l])) cluster.id[[l]] <- unique(clus[,l]) nclusters[[l]] <- length(cluster.size[[l]]) } out <- list(cluster = cluster, clus = clus, nclusters = nclusters, cluster.size = cluster.size, cluster.id = cluster.id) out } setMethod("show", "lavData", function(object) { # print 'lavData' object lav_data_print_short(object) }) lav_data_print_short <- function(object) { # flag listwise <- object@missing == "listwise" if(object@ngroups == 1L) { if(listwise) { cat(sprintf(" %-40s", ""), sprintf(" %10s", "Used"), sprintf(" %10s", "Total"), "\n", sep="") } t0.txt <- sprintf(" %-40s", "Number of observations") t1.txt <- sprintf(" %10i", object@nobs[[1L]]) t2.txt <- ifelse(listwise, sprintf(" %10i", object@norig[[1L]]), "") cat(t0.txt, t1.txt, t2.txt, "\n", sep="") } else { if(listwise) { cat(sprintf(" %-40s", ""), sprintf(" %10s", "Used"), sprintf(" %10s", "Total"), "\n", sep="") } t0.txt <- sprintf(" %-40s", "Number of observations per group") cat(t0.txt, "\n") for(g in 1:object@ngroups) { t.txt <- sprintf(" %-40s %10i", object@group.label[[g]], object@nobs[[g]]) t2.txt <- ifelse(listwise, sprintf(" %10i", object@norig[[g]]), "") cat(t.txt, t2.txt, "\n", sep="") } } cat("\n") # missing patterns? if(!is.null(object@Mp[[1L]])) { if(object@ngroups == 1L) { t0.txt <- sprintf(" %-40s", "Number of missing patterns") t1.txt <- sprintf(" %10i", object@Mp[[1L]]$npatterns) cat(t0.txt, t1.txt, "\n\n", sep="") } else { t0.txt <- sprintf(" %-40s", "Number of missing patterns per group") cat(t0.txt, "\n") for(g in 1:object@ngroups) { t.txt <- sprintf(" %-40s %10i", object@group.label[[g]], object@Mp[[g]]$npatterns) cat(t.txt, "\n", sep="") } cat("\n") } } } lavaan/R/lav_fsr_croon.R0000644000176200001440000000334213051523100014664 0ustar liggesusers# use the `Croon' method to correct the covariance matrix # of the factor scores lav_fsr_croon_correction <- function(FS.COV, LVINFO, fs.method = "bartlett") { # ngroups ngroups <- length(FS.COV) # FSR.COV FSR.COV <- FS.COV for(g in 1:ngroups) { # number of factors - lv.names nfac <- nrow(FS.COV[[g]]) lv.names <- names(LVINFO[[g]]) # correct covariances only if(fs.method != "bartlett") { for(i in 1:(nfac-1)) { LHS <- lv.names[i] A.y <- LVINFO[[g]][[LHS]]$fsm lambda.y <- LVINFO[[g]][[LHS]]$lambda for(j in (i+1):nfac) { RHS <- lv.names[j] A.x <- LVINFO[[g]][[RHS]]$fsm lambda.x <- LVINFO[[g]][[RHS]]$lambda # always 1 if Bartlett A.xy <- as.numeric(crossprod(A.x %*% lambda.x, A.y %*% lambda.y)) # corrected covariance FSR.COV[[g]][i,j] <- FSR.COV[[g]][j,i] <- FS.COV[[g]][LHS,RHS] / A.xy } } } # correct variances for(i in 1:nfac) { RHS <- lv.names[i] A.x <- LVINFO[[g]][[RHS]]$fsm lambda.x <- LVINFO[[g]][[RHS]]$lambda theta.x <- LVINFO[[g]][[RHS]]$theta if(fs.method == "bartlett") { A.xx <- 1.0 } else { A.xx <- as.numeric(crossprod(A.x %*% lambda.x)) } offset.x <- as.numeric(A.x %*% theta.x %*% t(A.x)) FSR.COV[[g]][i,i] <- (FS.COV[[g]][RHS, RHS] - offset.x)/A.xx } } # g FSR.COV } lavaan/MD50000644000176200001440000002345713054131615012036 0ustar liggesusers85cbe9317110b969f43eb18821870a64 *DESCRIPTION 1b6f179a29336540a0a803347c8ea008 *NAMESPACE 2c06e981ca673126bd34646156f9c1c8 *R/00class.R d02be42658ab06953315c502439d65c6 *R/00generic.R 7dc16c76951b954557a170601dd51415 *R/01RefClass_00lavRefModel.R efe636d8bc46031f30e74622c0f4567e *R/01RefClass_01lavOptim.R c8818fa21dd499ecdbceda5a339e8d15 *R/01RefClass_02lavML.R db43385c92c53d05d6f2a8d20b7858e8 *R/ctr_estfun.R d6920d8404718f56582ffdff50498771 *R/ctr_informative_testing.R 50747b931a9bfe8c2ee13350bb766718 *R/ctr_modelcov.R 8743ac3ff8d84595ea44d777e482b867 *R/ctr_mplus2lavaan.R 2e4bf3cf21a238f4570930e8cb3e9747 *R/ctr_pairwise_fit.R 4a668097de5d79baef8f841c4908c518 *R/ctr_pairwise_table.R 2980be79922355e9a78f252ebf532958 *R/ctr_pml_doubly_robust_utils.R 96d2dcdbdffad48d2566f8053cdf453b *R/ctr_pml_plrt.R 1cd5c477a8fdf7c01252f67a3a51189e *R/ctr_pml_plrt2.R 37899115aaac5dac12a2b6ce2a68a6a3 *R/ctr_pml_plrt_nested.R 2a06adcd6bb5668afb71b893ff8691ef *R/ctr_pml_utils.R f237c1cc3b7746be5ad12ee67400d958 *R/lav_binorm.R 581485ed7e1fe3d65e64ee016754ad42 *R/lav_bootstrap.R 78a3bf33927df957e13fdcb1b21ad7b2 *R/lav_bootstrap_lrt.R acf7fe7756d4c2a9a40e8374ba95a75c *R/lav_constraints.R 6ed2ae59edd0513bc33489486579e1ec *R/lav_cor.R 1c9d1e953439b5f563ad577a5f68342d *R/lav_data.R 08a44f9b08c49498e396b076bf0766ac *R/lav_dataframe.R f59abdc945b1904dddf8d758064ca052 *R/lav_export.R 674dd495eece3854c7c37d3a1b7fd1ce *R/lav_export_bugs.R 43f593152b2d73dfc42678fb83173e15 *R/lav_export_mplus.R 3cda47a41dcc8bee79bd7ec952b61fab *R/lav_fabin.R 55a3b989fc4a661c52fd201913d41e92 *R/lav_fiml.R 927c6bdf214002c6e1d957aa1196a1da *R/lav_fit.R 66a27539c225fe6e29a9b3a5c0e43ee9 *R/lav_fit_measures.R 1181145124aa1612919339bc3b41f8f3 *R/lav_fsr_croon.R 1f6e6e42bbc82d84c6ceda3b61c5cfab *R/lav_func_deriv.R f6683b0da25bcfee057bc0d1b2732b9d *R/lav_graphics.R a04d2242134013d8b110738012999274 *R/lav_integrate.R 6894f2a2baebf698e15bfb0bdb06d91d *R/lav_lavaanList_inspect.R 47e479a777b7f69a49414b1e8532afaa *R/lav_lavaanList_methods.R 2b9193ae7b7bc3a5872214581126d0f0 *R/lav_lavaanList_multipleGroups.R 3608cb1844aabc56699cdf28b74a47ae *R/lav_lavaanList_multipleImputation.R b5fa0e59f276b50a5db71e28fb7bd852 *R/lav_lavaanList_simulate.R 6c02af13d17c94163c95f490d2a6502b *R/lav_matrix.R e9dc97111fa485af0835417e16a3a932 *R/lav_model.R ce5e150be5da2ab82257c75b1b6c6618 *R/lav_model_compute.R 642d8e64bc125f07ea2bb5366602a89e *R/lav_model_estimate.R 4118d18d3b91c222cd024acf27e02336 *R/lav_model_gradient.R 2e8276f6c234811092b8c974bbf414ff *R/lav_model_gradient_mml.R 66a94bb894225a3eca573b7f21af358b *R/lav_model_gradient_pml.R 61d3d322cc7fdde173d547ee16581964 *R/lav_model_hessian.R eeb577f2bf89e64fd0b813060adde9ab *R/lav_model_implied.R c42abb1a90e517543a15be92d8eea3e2 *R/lav_model_information.R 99eff636976f651be125d6fa18c36e21 *R/lav_model_lik.R 1a67a3244a1cbe1f09c7fceba1f52187 *R/lav_model_objective.R ea82deadd29d3f191e9154f4f55c8e22 *R/lav_model_utils.R 436e23c22cfb243aacc837503ac06ead *R/lav_model_vcov.R 7c78c87ce2b1b628bc22844b71313185 *R/lav_model_wls.R 0bbd69945544e58c1c62c60f55716da1 *R/lav_modification.R a5f097cfa2a38f95e77ad182b12e5736 *R/lav_mplus.R 4a8583ac4144633a40c0c1cc9bfeb65e *R/lav_muthen1984.R eaa2bd1288baf4cf8bc6de11b1d00b6f *R/lav_mvnorm.R 5080539e20975211859becc9f1c5939d *R/lav_mvnorm_h1.R 5b36cd325116e17614702f7c38f9b9e2 *R/lav_mvnorm_missing.R b83484bb56718b8b07959b2ae1f4d8ee *R/lav_mvnorm_missing_h1.R 6d65915876942b351b1cb3fe2ac59219 *R/lav_mvreg.R e43f0a4520aeb774aa19e95a6aca69bc *R/lav_nlminb_constr.R 5d83a269e546be51e48a808545133f01 *R/lav_norm.R 3562b9127f2a6e2c7ddf3ca81f7eabdb *R/lav_object_generate.R f3ea17468cb9580f5f45734ae3e3247c *R/lav_object_inspect.R eb100380277725a9cfd938574a461503 *R/lav_object_methods.R 3dbae6edf3f95ec4eccc2b2dcf4a5821 *R/lav_object_post_check.R 4bea7cbb90e9b9681c73c7f35b589016 *R/lav_objective.R 9d1b727a7a04ebc600e4f03811150073 *R/lav_ols.R 104ccc9befb7265a06952eca792d1c2e *R/lav_options.R ff133e16f8499b7e436b69038f2aa12b *R/lav_partable.R a0fecf6d3aeb22d937b6a2f0e72e8a9d *R/lav_partable_attributes.R 922cf90400fef61246c15a8bf416f2ee *R/lav_partable_check.R cacb1dc8b9b8f0811af79883befca31f *R/lav_partable_complete.R 69f229fe4207f87ec584b7b7f824aef0 *R/lav_partable_constraints.R 2b8c7cec7785acdf0214fcddf704e458 *R/lav_partable_flat.R 5df28f620e9b4079812a097ba195a346 *R/lav_partable_from_lm.R 4c1181cf31b9326918ee9362ca11ce9d *R/lav_partable_full.R eb9ba742eda2cab0d379ff1c18219d38 *R/lav_partable_independence.R 628e181964bd94246c8aa00e70b95110 *R/lav_partable_labels.R 09d977c10cc73bb9bbc4fe9040545dde *R/lav_partable_merge.R c6db81581a81ef93df9bb0b0d5c6adb6 *R/lav_partable_subset.R 170bfea2ae33549738620ef54fe3862c *R/lav_partable_unrestricted.R d27638ad54696d03ac432afc410454b7 *R/lav_partable_utils.R 1d59a968e40e904a9e3de8642ed84415 *R/lav_partable_vnames.R 0e9330efc762a1624539e3de8f107826 *R/lav_pearson.R 89f78a64372a29e8137f558e79a1fd63 *R/lav_polychor.R 1082355ff850feeb16b3f91b6f63e4c1 *R/lav_polyserial.R 40059db7654ebcf91e7ed780b4274d8b *R/lav_predict.R 5772816536e8e78943c8f9356f9a6b19 *R/lav_prelis.R b8c4aeb8fc7c892a2be954c849e393b3 *R/lav_print.R 9f8025fb464c03dacfa18692d76cdba7 *R/lav_probit.R 5b7d428b3d7be51a57191b0680d6cff0 *R/lav_representation.R c9ccab2abb5c999b7335fcb4b9765c32 *R/lav_representation_lisrel.R ef1f7fb540578a364b5d02ba8750eda2 *R/lav_residuals.R 185c104eec587587f65a94fb8fb29bf0 *R/lav_samplestats.R 13b7ff1768cccafa70f47edecc9b5756 *R/lav_samplestats_gamma.R bb369a476bca2e7c8183f579d30b8777 *R/lav_samplestats_icov.R 257a1eb16e8c380cfc617889561bd7ce *R/lav_samplestats_igamma.R cee93731b368f057118e8b8d8e3f9d9f *R/lav_samplestats_step1.R a9d3cf9cec393eeecf892ff581307612 *R/lav_samplestats_step2.R 2a34b9538206daf1d2706aa642f4af67 *R/lav_samplestats_wls_obs.R 24574624218d7d39683b0d441b124e18 *R/lav_simulate.R dcc301411cd8bd4f1ab5017f03a42e8c *R/lav_standardize.R 980d2ee512e4161479c24262e2407649 *R/lav_start.R 5a98227abbbcc60879ee20cff7d42c71 *R/lav_syntax.R e53cfe7dbe16fb84f466f9092ac106c4 *R/lav_syntax_independence.R 8862508aaa88781c225712d60a4f83cb *R/lav_tables.R 2ef06fa1d328ec66c323c95fd1badeae *R/lav_tables_mvb.R 1f8a1698fb32a035ecb925122d1f3938 *R/lav_test.R f1ed09c297b98070e4143c1e0d9fb0bb *R/lav_test_LRT.R 4fab9b5a0a3c04bdb02c2149496854da *R/lav_test_Wald.R b84a4f37d8bd35e5fdb307822af124c5 *R/lav_test_diff.R 63260bbd2c47479ee96751ce547e1da7 *R/lav_test_satorra_bentler.R 4534f85e5ae8a2c000372b940175f735 *R/lav_test_score.R 4747e171a70889c14521a768151b8311 *R/lav_utils.R e55602f251d703e85bd891db1ab23bb2 *R/lavaan-deprecated.R 9f98e4dd8b7f1d46765d3343feaf4169 *R/xxx_fsr.R 6064003b1b158e6dc73d12e3dffc72aa *R/xxx_lavaan.R 80c919eccb8bdf6b832a929c8cac94b8 *R/xxx_lavaanList.R ab268a2eeb7c39f66ee65c3cf2dd3edc *R/xxx_prelav.R 63eac6a2eb3399ebccd1558b3124353d *R/zzz.R 3b9220c0c6ba9e0d5a7afac5ed96d94c *R/zzz_OLDNAMES.R 9e007dd29a2cc3ba2bbf789337706826 *README 8dcf021a8c1e3bec0e544d6d6fcf35a1 *data/Demo.growth.rda 140160c6aed2f9a94529130d0c3706d8 *data/FacialBurns.rda b5da5b64e3d3e91c1bcef8f3923bdfd9 *data/HolzingerSwineford1939.rda 5fc7b4e0adf386955e92d32a1ac248cc *data/PoliticalDemocracy.rda aa532929bbb274547c8a26c60d0f6df0 *inst/CITATION bb73dca4c8e777c91e0e07ba4a081e38 *man/Demo.growth.Rd db13fee7bf3038ffe5c8cb7dbb5592af *man/FacialBurns.Rd 32975e756b249b0d9e096c520e01cd82 *man/HolzingerSwineford1939.Rd cf2455a356ece4902222b7a4597c5b9e *man/InformativeTesting.Rd 798b9cb26b058c6656e3333315cfa184 *man/PoliticalDemocracy.Rd 9738b5515aac6b61f5c3c000b01a9b21 *man/bootstrap.Rd 0345234b227b87194ae179f7f6a049fe *man/cfa.Rd 86e7e1309c7ec3a0199e2ec03007a34b *man/estfun.Rd 1952397a65d3bb19089f213561c1a998 *man/fitMeasures.Rd 1f07df92df54b4f7be7de71e1f370da4 *man/fsr.Rd e10ce2f596f1e9105b12d1e9860aacfd *man/getCov.Rd 9cebe1d44273925ea3e6b3ab02cf2d72 *man/growth.Rd 73e825fa761127c81205c806be420ffa *man/inspectSampleCov.Rd b33382dcd6e15dc2d62e130c0006959b *man/lavCor.Rd 225af6a64d1104380844614ca4191caa *man/lavExport.Rd 82d47428d8b8f014665079c5d7ad3303 *man/lavInspect.Rd ef260cb8a8feaffdba84528cdc1b3f4b *man/lavListInspect.Rd dd828fdc7f100a867061aa464b3a55b0 *man/lavMatrixRepresentation.Rd fff3a2754f2e4996ade52d8eb794ab44 *man/lavNames.Rd 69d6a8669b54b60dabdc42d61df1a16d *man/lavOptions.Rd 299c95c112718f7d852ba8c33b3f5f68 *man/lavParTable.Rd 557cbd44a5edac922e6284934b269478 *man/lavPredict.Rd d2e8961bb9e6e83fc8bc38c476f8ccc1 *man/lavTables.Rd 964c3b180c0f99409f7ba85e332899e9 *man/lavTablesFit.Rd 7fa0dff6a902cb32a81badad0d99c4db *man/lavTestLRT.Rd 387144a2126e7bfbecdef35b8282c1a7 *man/lavTestScore.Rd b2e463238f7fe7bfd43176d5443bbd2d *man/lavTestWald.Rd 81f283668573ca1a58784c8478f50be4 *man/lav_constraints.Rd f057078c4e6dbf973f4461ca1a560dc2 *man/lav_func.Rd 7b73fb60ab06c6e5ada8216f47d9c3ee *man/lav_matrix.Rd af2579cb02a596b3a204f2f5abb6b8d9 *man/lav_model.Rd ba05e4d7d9017d8dbb9111b1c1b81db9 *man/lav_partable.Rd b3a44227f7af430828548addfd6f6b51 *man/lavaan-class.Rd 12c96de013f21abae8a64ae3b83e4845 *man/lavaan-deprecated.Rd 2a9ce97b038dacf567ef297f72f6cfd2 *man/lavaan.Rd 76ace308ab03a4f3000c7d6a92028418 *man/lavaanList-class.Rd c8e07abdc279f4d2c142ccbac61717fe *man/lavaanList.Rd 86a16eee44d8d394b0d9cf59a438365f *man/model.syntax.Rd 3b1606f152eb91e705cfa4ed28271b80 *man/modificationIndices.Rd daa58c2293dc757b81235068df0a7a51 *man/mplus2lavaan.Rd 9e2d7388f9de4c8314af3a679b5c4471 *man/mplus2lavaan.modelSyntax.Rd 54df69ab414febb1b03b4232ed7b533b *man/parameterEstimates.Rd 0c7105b43a2ca7c3b1aa0793507580ca *man/plot.InformativeTesting.Rd 6029d7b0733788cafaf13f427bc35e19 *man/sem.Rd 4ad763d50981425d29ba3af2fab22dc2 *man/simulateData.Rd 392ce24890e602302ceddd4a88624386 *man/standardizedSolution.Rd 31fb942eea99dbd852bd8ea3f2ed0a7b *man/varTable.Rd 57845942affbefe30b1e0fce405a406b *tests/testthat.R 3a80335c74d1ce33a58a807939e1d1c4 *tests/testthat/helper-skip_level.R cbc5444212e37b23305b07a1681a0328 *tests/testthat/test-lav_matrix.R a12db7b5aafb3df1955d471eb1314a7f *tests/testthat/test-lav_mvnorm.R af2a8848a7d40dbed3ec9fa7ea99c517 *tests/testthat/test-skip_example.R lavaan/README0000644000176200001440000000233312104004704012365 0ustar liggesuserslavaan is a free, open source R package for latent variable analysis. You can use lavaan to estimate a large variety of multivariate statistical models, including path analysis, confirmatory factor analysis, structural equation modeling and growth curve models. The lavaan package is developed to provide useRs, researchers and teachers a free open-source, but commercial-quality package for latent variable modeling. The long-term goal of lavaan is to implement all the state-of-the-art capabilities that are currently available in commercial packages. However, lavaan is still under development, and much work still needs to be done. To get a first impression of how lavaan works in practice, consider the following example of a SEM model (the Political Democracy Example from Bollen's 1989 book): library(lavaan) model <- ' # latent variable definitions ind60 =~ x1 + x2 + x3 dem60 =~ y1 + y2 + y3 + y4 dem65 =~ y5 + y6 + y7 + y8 # regressions dem60 ~ ind60 dem65 ~ ind60 + dem60 # residual covariances y1 ~~ y5 y2 ~~ y4 + y6 y3 ~~ y7 y4 ~~ y8 y6 ~~ y8 ' fit <- sem(model, data=PoliticalDemocracy) summary(fit) More information can be found on the website: http://lavaan.org lavaan/DESCRIPTION0000644000176200001440000000510713054131615013224 0ustar liggesusersPackage: lavaan Title: Latent Variable Analysis Version: 0.5-23.1097 Authors@R: c(person(given = "Yves", family = "Rosseel", role = c("aut", "cre"), email = "Yves.Rosseel@UGent.be"), person(given = "Daniel", family = "Oberski", role = "ctb", email = "daniel.oberski@gmail.com"), person(given = "Jarrett", family = "Byrnes", role = "ctb", email = "byrnes@nceas.ucsb.edu"), person(given = "Leonard", family = "Vanbrabant", role = "ctb", email = "Leonard.Vanbrabant@ugent.be"), person(given = "Victoria", family = "Savalei", role = "ctb", email = "vsavalei@ubc.ca"), person(given = "Ed", family = "Merkle", role = "ctb", email = "merklee@missouri.edu"), person(given = "Michael", family = "Hallquist", role = "ctb", email = "michael.hallquist at gmail.com"), person(given = "Mijke", family = "Rhemtulla", role = "ctb", email = "mijke@ku.edu"), person(given = "Myrsini", family = "Katsikatsou", role = "ctb", email = "mirtok2@gmail.com"), person(given = "Mariska", family = "Barendse", role = "ctb", email = "m.t.barendse@rug.nl"), person(given = "Michael", family = "Chow", role = "ctb", email = "machow@princeton.edu"), person(given = "Terrence", family = "Jorgensen", role = "ctb") ) Description: Fit a variety of latent variable models, including confirmatory factor analysis, structural equation modeling and latent growth curve models. Depends: R(>= 3.1.0) Imports: methods, stats4, stats, utils, graphics, MASS, mnormt, pbivnorm, quadprog, numDeriv Suggests: testthat License: GPL (>= 2) LazyData: yes URL: http://lavaan.org NeedsCompilation: no Packaged: 2017-02-24 13:08:31 UTC; yves Author: Yves Rosseel [aut, cre], Daniel Oberski [ctb], Jarrett Byrnes [ctb], Leonard Vanbrabant [ctb], Victoria Savalei [ctb], Ed Merkle [ctb], Michael Hallquist [ctb], Mijke Rhemtulla [ctb], Myrsini Katsikatsou [ctb], Mariska Barendse [ctb], Michael Chow [ctb], Terrence Jorgensen [ctb] Maintainer: Yves Rosseel Repository: CRAN Date/Publication: 2017-02-24 23:28:29 lavaan/man/0000755000176200001440000000000013054027040012262 5ustar liggesuserslavaan/man/lav_model.Rd0000644000176200001440000000336013042204301014507 0ustar liggesusers\name{lav_model} \alias{lav_model_get_parameters} \alias{lav_model_set_parameters} \alias{lav_model_implied} \alias{lav_model_vcov_se} \title{lavaan model functions} \description{Utility functions related to internal model representation (lavmodel)} \usage{ # set/get free parameters lav_model_set_parameters(lavmodel, x = NULL) lav_model_get_parameters(lavmodel, GLIST = NULL, type = "free", extra = TRUE) # compute model-implied statistics lav_model_implied(lavmodel) # compute standard errors lav_model_vcov_se(lavmodel, lavpartable, VCOV = NULL, BOOT = NULL) } \arguments{ \item{lavmodel}{An internal representation of a lavaan model.} \item{x}{Numeric.}{A vector containing the values of all the free model parameters.} \item{GLIST}{List. A list of model matrices, similar to the output of \code{lavInspect(object, "est")}.} \item{type}{Character string. If \code{"free"}, only return the free model parameters. If \code{"user"}, return all the parameters (free and fixed) as they appear in the user-specified parameter table.} \item{extra}{Logical. If \code{TRUE}, also include values for rows in the parameter table where the operator is one of \code{":="}, \code{"=="}, \code{"<"} or \code{">"}.} \item{lavpartable}{A parameter table.} \item{VCOV}{Numeric matrix containing an estimate of the variance covariance matrix of the free model parameters.} \item{BOOT}{Numeric matrix containing the bootstrap based parameter estimates (in the columns) for each bootstrap sample (in the rows).} } \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) lavmodel <- fit@Model est <- lav_model_get_parameters(lavmodel) est } lavaan/man/sem.Rd0000644000176200001440000001153213043341243013341 0ustar liggesusers\name{sem} \alias{sem} \title{Fit Structural Equation Models} \description{ Fit a Structural Equation Model (SEM).} \usage{ sem(model = NULL, data = NULL, ordered = NULL, sample.cov = NULL, sample.mean = NULL, sample.nobs = NULL, group = NULL, cluster = NULL, constraints = "", WLS.V = NULL, NACOV = NULL, ...) } \arguments{ \item{model}{A description of the user-specified model. Typically, the model is described using the lavaan model syntax. See \code{\link{model.syntax}} for more information. Alternatively, a parameter table (eg. the output of the \code{lavaanify()} function) is also accepted.} \item{data}{An optional data frame containing the observed variables used in the model. If some variables are declared as ordered factors, lavaan will treat them as ordinal variables.} \item{ordered}{Character vector. Only used if the data is in a data.frame. Treat these variables as ordered (ordinal) variables, if they are endogenous in the model. Importantly, all other variables will be treated as numeric (unless they are declared as ordered in the original data.frame.)} \item{sample.cov}{Numeric matrix. A sample variance-covariance matrix. The rownames and/or colnames must contain the observed variable names. For a multiple group analysis, a list with a variance-covariance matrix for each group. Note that if maximum likelihood estimation is used and \code{likelihood="normal"}, the user provided covariance matrix is internally rescaled by multiplying it with a factor (N-1)/N, to ensure that the covariance matrix has been divided by N. This can be turned off by setting the \code{sample.cov.rescale} argument to \code{FALSE}.} \item{sample.mean}{A sample mean vector. For a multiple group analysis, a list with a mean vector for each group.} \item{sample.nobs}{Number of observations if the full data frame is missing and only sample moments are given. For a multiple group analysis, a list or a vector with the number of observations for each group.} \item{group}{A variable name in the data frame defining the groups in a multiple group analysis.} \item{cluster}{Not used yet.} \item{constraints}{Additional (in)equality constraints not yet included in the model syntax. See \code{\link{model.syntax}} for more information.} \item{WLS.V}{A user provided weight matrix to be used by estimator \code{"WLS"}; if the estimator is \code{"DWLS"}, only the diagonal of this matrix will be used. For a multiple group analysis, a list with a weight matrix for each group. The elements of the weight matrix should be in the following order (if all data is continuous): first the means (if a meanstructure is involved), then the lower triangular elements of the covariance matrix including the diagonal, ordered column by column. In the categorical case: first the thresholds (including the means for continuous variables), then the slopes (if any), the variances of continuous variables (if any), and finally the lower triangular elements of the correlation/covariance matrix excluding the diagonal, ordered column by column.} \item{NACOV}{A user provided matrix containing the elements of (N times) the asymptotic variance-covariance matrix of the sample statistics. For a multiple group analysis, a list with an asymptotic variance-covariance matrix for each group. See the \code{WLS.V} argument for information about the order of the elements.} \item{...}{Many more additional options can be defined, using 'name = value'. See \code{\link{lavOptions}} for a complete list.} } \details{ The \code{sem} function is a wrapper for the more general \code{\link{lavaan}} function, but setting the following default options: \code{int.ov.free = TRUE}, \code{int.lv.free = FALSE}, \code{auto.fix.first = TRUE} (unless \code{std.lv = TRUE}), \code{auto.fix.single = TRUE}, \code{auto.var = TRUE}, \code{auto.cov.lv.x = TRUE}, \code{auto.th = TRUE}, \code{auto.delta = TRUE}, and \code{auto.cov.y = TRUE}. } \value{ An object of class \code{\linkS4class{lavaan}}, for which several methods are available, including a \code{summary} method. } \references{ Yves Rosseel (2012). lavaan: An R Package for Structural Equation Modeling. Journal of Statistical Software, 48(2), 1-36. URL http://www.jstatsoft.org/v48/i02/.} \seealso{ \code{\link{lavaan}} } \examples{ ## The industrialization and Political Democracy Example ## Bollen (1989), page 332 model <- ' # latent variable definitions ind60 =~ x1 + x2 + x3 dem60 =~ y1 + a*y2 + b*y3 + c*y4 dem65 =~ y5 + a*y6 + b*y7 + c*y8 # regressions dem60 ~ ind60 dem65 ~ ind60 + dem60 # residual correlations y1 ~~ y5 y2 ~~ y4 + y6 y3 ~~ y7 y4 ~~ y8 y6 ~~ y8 ' fit <- sem(model, data=PoliticalDemocracy) summary(fit, fit.measures=TRUE) } lavaan/man/lavTestLRT.Rd0000644000176200001440000000632512760602654014600 0ustar liggesusers\name{lavTestLRT} \alias{lavTestLRT} \alias{lavtestLRT} \alias{LRT} \alias{lavLRTTest} \alias{lavLRT} \alias{anova} \title{LRT test} \description{ LRT test for comparing (nested) lavaan models.} \usage{ lavTestLRT(object, ..., method = "default", A.method = "exact", H1 = TRUE, type = "Chisq", model.names = NULL) anova(object, ...) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{...}{additional objects of class \code{\linkS4class{lavaan}}.} \item{method}{Character string. The possible options are \code{"satorra.bentler.2001"}, \code{"satorra.bentler.2010"} and \code{"satorra.2000"}. See details.} \item{H1}{Not used yet} \item{A.method}{Character string. The possible options are \code{"exact"} and \code{"delta"}. This is only used when method = \code{"satorra.2000"}. It determines how the Jacobian of the constraint function (the matrix A) will be computed.} \item{type}{Character. If \code{"Chisq"}, the test statistic for each model is the (scaled or unscaled) model fit test statistic. If \code{"Cf"}, the test statistic for each model is computed by the \code{\link{lavTablesFitCf}} function.} \item{model.names}{Character vector. If provided, use these model names in the first column of the anova table.} } \value{ An object of class anova. When given a single argument, it simply returns the test statistic of this model. When given a sequence of objects, this function tests the models against one another in the order specified. } \details{ The \code{anova} function for lavaan objects simply calls the \code{lavTestLRT} function, which has a few additional arguments. If \code{type = "Chisq"} and the test statistics are scaled, a special scaled difference test statistic is computed. If method is \code{"satorra.bentler.2001"}, a simple approximation is used described in Satorra \& Bentler (2001). In some settings, this can lead to a negative test statistic. To ensure a positive test statistic, we can use the method proposed by Satorra \& Bentler (2010). Alternatively, when method is \code{"satorra.2000"}, the original formulas of Satorra (2000) are used. Note that for the Satorra (2000) method, the models must be nested in the parameter sense, while for the other methods, they only need to be nested in the covariance matrix sense. } \references{ Satorra, A. (2000). Scaled and adjusted restricted tests in multi-sample analysis of moment structures. In Heijmans, R.D.H., Pollock, D.S.G. & Satorra, A. (eds.), Innovations in multivariate statistical analysis. A Festschrift for Heinz Neudecker (pp.233-247). London: Kluwer Academic Publishers. Satorra, A., & Bentler, P. M. (2001). A scaled difference chi-square test statistic for moment structure analysis. Psychometrika, 66(4), 507-514. Satorra, A., & Bentler, P. M. (2010). Ensuring postiveness of the scaled difference chi-square test statistic. Psychometrika, 75(2), 243-248. } \examples{ HS.model <- ' visual =~ x1 + b1*x2 + x3 textual =~ x4 + b2*x5 + x6 speed =~ x7 + b3*x8 + x9 ' fit1 <- cfa(HS.model, data = HolzingerSwineford1939) fit0 <- cfa(HS.model, data = HolzingerSwineford1939, orthogonal = TRUE) lavTestLRT(fit1, fit0) } lavaan/man/bootstrap.Rd0000644000176200001440000001272112743506572014611 0ustar liggesusers\name{bootstrapLavaan} \alias{bootstrapLavaan} \alias{bootstrapLRT} \title{Bootstrapping a Lavaan Model} \description{Bootstrap the LRT, or any other statistic (or vector of statistics) you can extract from a fitted lavaan object.} \usage{ bootstrapLavaan(object, R = 1000L, type = "ordinary", verbose = FALSE, FUN = "coef", warn = -1L, return.boot = FALSE, parallel = c("no", "multicore", "snow"), ncpus = 1L, cl = NULL, iseed = NULL, h0.rmsea = NULL, ...) bootstrapLRT(h0 = NULL, h1 = NULL, R = 1000L, type="bollen.stine", verbose = FALSE, return.LRT = FALSE, double.bootstrap = "no", double.bootstrap.R = 500L, double.bootstrap.alpha = 0.05, warn = -1L, parallel = c("no", "multicore", "snow"), ncpus = 1L, cl = NULL, iseed = NULL) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{h0}{An object of class \code{\linkS4class{lavaan}}. The restricted model.} \item{h1}{An object of class \code{\linkS4class{lavaan}}. The unrestricted model.} \item{R}{Integer. The number of bootstrap draws.} \item{type}{If \code{"ordinary"} or \code{"nonparametric"}, the usual (naive) bootstrap method is used. If \code{"bollen.stine"}, the data is first transformed such that the null hypothesis holds exactly in the resampling space. If \code{"yuan"}, the data is first transformed by combining data and theory (model), such that the resampling space is closer to the population space. If \code{"parametric"}, the parametric bootstrap approach is used; currently, this is only valid for continuous data following a multivariate normal distribution. See references for more details.} \item{FUN}{A function which when applied to the \code{\linkS4class{lavaan}} object returns a vector containing the statistic(s) of interest. The default is \code{FUN="coef"}, returning the estimated values of the free parameters in the model.} \item{\dots}{Other named arguments for \code{FUN} which are passed unchanged each time it is called.} \item{verbose}{If \code{TRUE}, show information for each bootstrap draw.} \item{warn}{Sets the handling of warning messages. See \code{\link{options}}.} \item{return.boot}{Not used for now.} \item{return.LRT}{If \code{TRUE}, return the LRT values as an attribute to the pvalue.} \item{parallel}{The type of parallel operation to be used (if any). If missing, the default is \code{"no"}. } \item{ncpus}{Integer: number of processes to be used in parallel operation: typically one would chose this to the number of available CPUs. } \item{cl}{An optional \pkg{parallel} or \pkg{snow} cluster for use if \code{parallel = "snow"}. If not supplied, a cluster on the local machine is created for the duration of the \code{bootstrapLavaan} or \code{bootstrapLRT} call.} \item{iseed}{An integer to set the seed. Or NULL if no reproducible seeds are needed. To make this work, make sure the first RNGkind() element is \code{"L'Ecuyer-CMRG"}. You can check this by typing \code{RNGkind()} in the console. You can set it by typing \code{RNGkind("L'Ecuyer-CMRG")}, before the boostrap functions are called.} \item{h0.rmsea}{Only used if \code{type="yuan"}. Allows one to do the Yuan bootstrap under the hypothesis that the population RMSEA equals a specified value.} \item{double.bootstrap}{If \code{"standard"} the genuine double bootstrap is used to compute an additional set of plug-in p-values for each boostrap sample. If \code{"FDB"}, the fast double bootstrap is used to compute second level LRT-values for each bootstrap sample. If \code{"no"}, no double bootstrap is used. The default is set to \code{"FDB"}.} \item{double.bootstrap.R}{Integer. The number of bootstrap draws to be use for the double bootstrap.} \item{double.bootstrap.alpha}{The significance level to compute the adjusted alpha based on the plugin p-values.} } \author{Yves Rosseel and Leonard Vanbrabant. Ed Merkle contributed Yuan's bootstrap. Improvements to Yuan's bootstrap were contributed by Hao Wu and Chuchu Cheng.} \details{The FUN function can return either a scalar or a numeric vector. This function can be an existing function (for example \code{coef}) or can be a custom defined function. For example: \preformatted{ myFUN <- function(x) { # require(lavaan) modelImpliedCov <- fitted(x)$cov vech(modelImpliedCov) } } If \code{parallel="snow"}, it is imperative that the \code{require(lavaan)} is included in the custom function.} \references{ Bollen, K. and Stine, R. (1992) Bootstrapping Goodness of Fit Measures in Structural Equation Models. Sociological Methods and Research, 21, 205--229. Yuan, K.-H., Hayashi, K., & Yanagihara, H. (2007). A class of population covariance matrices in the bootstrap approach to covariance structure analysis. Multivariate Behavioral Research, 42, 261--281. } \examples{ # fit the Holzinger and Swineford (1939) example HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939, se="none") # get the test statistic for the original sample T.orig <- fitMeasures(fit, "chisq") # bootstrap to get bootstrap test statistics # we only generate 10 bootstrap sample in this example; in practice # you may wish to use a much higher number T.boot <- bootstrapLavaan(fit, R=10, type="bollen.stine", FUN=fitMeasures, fit.measures="chisq") # compute a bootstrap based p-value pvalue.boot <- length(which(T.boot > T.orig))/length(T.boot) } lavaan/man/lavTestScore.Rd0000644000176200001440000000634512531360230015177 0ustar liggesusers\name{lavTestScore} \alias{lavTestScore} \alias{lavtestscore} \alias{score} \alias{Score} \alias{lavScoreTest} \title{Score test} \description{ Score test (or Lagrange Multiplier test) for releasing one or more fixed or constrained parameters in model.} \usage{ lavTestScore(object, add = NULL, release = NULL, univariate = TRUE, cumulative = FALSE, epc = FALSE, verbose = FALSE, warn = TRUE) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{add}{Either a character string (typically between single quotes) or a parameter table containing additional (currently fixed-to-zero) parameters for which the score test must be computed.} \item{release}{Vector of Integers. The indices of the constraints that should be released. The indices correspond to the order of the equality constraints as they appear in the parameter table.} \item{univariate}{Logical. If \code{TRUE}, compute the univariate score statistics, one for each constraints.} \item{cumulative}{Logical. If \code{TRUE}, order the univariate score statistics from large to small, and compute a series of multivariate score statistics, each time adding an additional constraint.} \item{epc}{Logical. If \code{TRUE}, and we are releasing existing constraints, compute the expected parameter changes for the existing (free) parameters, for each released constraint.} \item{verbose}{Logical. Not used for now.} \item{warn}{Logical. If \code{TRUE}, print out warnings if they occur.} } \details{ This function can be used to compute both multivariate and univariate score tests. There are two modes: 1) releasing fixed-to-zero parameters (using the \code{add} argument), and 2) releasing existing equality constraints (using the \code{release} argument). The two modes can not be used simultaneously. When adding new parameters, they should not already be part of the model (i.e. not listed in the parameter table). If you want to test for a parameter that was explicitly fixed to a constant (say to zero), it is better to label the parameter, and use an explicit equality constraint. } \value{ A list containing at least three elements: the Score test statistic (stat), the degrees of freedom (df), and a p-value under the chi-square distribution (p.value). If univariate tests were requested, an additional element (TS.univariate) containing a numeric vector of univariate score statistics. If cumulative tests were requested, an additional element (TS.order) showing the order of the univariate test statistics, and an element (TS.cumulative) containing a numeric vector of cumulative multivariate score statistics. } \references{ Bentler, P. M., & Chou, C. P. (1993). Some new covariance structure model improvement statistics. Sage Focus Editions, 154, 235-255. } \examples{ HS.model <- ' visual =~ x1 + b1*x2 + x3 textual =~ x4 + b2*x5 + x6 speed =~ x7 + b3*x8 + x9 b1 == b2 b2 == b3 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) # test 1: release both two equality constraints lavTestScore(fit, cumulative = TRUE) # test 2: the score test for adding two (currently fixed # to zero) cross-loadings newpar = ' visual =~ x9 textual =~ x3 ' lavTestScore(fit, add = newpar) } lavaan/man/HolzingerSwineford1939.Rd0000644000176200001440000000361312736245312016736 0ustar liggesusers\name{HolzingerSwineford1939} \alias{HolzingerSwineford1939} \docType{data} \title{ Holzinger and Swineford Dataset (9 Variables) } \description{ The classic Holzinger and Swineford (1939) dataset consists of mental ability test scores of seventh- and eighth-grade children from two different schools (Pasteur and Grant-White). In the original dataset (available in the \code{MBESS} package), there are scores for 26 tests. However, a smaller subset with 9 variables is more widely used in the literature (for example in Joreskog's 1969 paper, which also uses the 145 subjects from the Grant-White school only). } \usage{data(HolzingerSwineford1939)} \format{ A data frame with 301 observations of 15 variables. \describe{ \item{\code{id}}{Identifier} \item{\code{sex}}{Gender} \item{\code{ageyr}}{Age, year part} \item{\code{agemo}}{Age, month part} \item{\code{school}}{School (Pasteur or Grant-White)} \item{\code{grade}}{Grade} \item{\code{x1}}{Visual perception} \item{\code{x2}}{Cubes} \item{\code{x3}}{Lozenges} \item{\code{x4}}{Paragraph comprehension} \item{\code{x5}}{Sentence completion} \item{\code{x6}}{Word meaning} \item{\code{x7}}{Speeded addition} \item{\code{x8}}{Speeded counting of dots} \item{\code{x9}}{Speeded discrimination straight and curved capitals} } } \source{ This dataset was originally retrieved from \verb{http://web.missouri.edu/~kolenikovs/stata/hs-cfa.dta} (link no longer active) and converted to an R dataset. } \references{ Holzinger, K., and Swineford, F. (1939). A study in factor analysis: The stability of a bifactor solution. Supplementary Educational Monograph, no. 48. Chicago: University of Chicago Press. Joreskog, K. G. (1969). A general approach to confirmatory maximum likelihood factor analysis. \emph{Psychometrika}, 34, 183-202. } \seealso{ \code{\link{cfa}} } \examples{ head(HolzingerSwineford1939) } lavaan/man/model.syntax.Rd0000644000176200001440000004063212743507773015227 0ustar liggesusers\name{model.syntax} \alias{model.syntax} \alias{lavaanify} \alias{lavParTable} \alias{lavpartable} \alias{lavPartable} \alias{parseModelString} \alias{lavParseModelString} \title{The Lavaan Model Syntax} \description{ The lavaan model syntax describes a latent variable model. The function \code{lavaanify} turns it into a table that represents the full model as specified by the user. We refer to this table as the parameter table.} \usage{ lavaanify(model = NULL, meanstructure = FALSE, int.ov.free = FALSE, int.lv.free = FALSE, orthogonal = FALSE, std.lv = FALSE, conditional.x = FALSE, fixed.x = TRUE, parameterization = "delta", constraints = NULL, auto = FALSE, model.type = "sem", auto.fix.first = FALSE, auto.fix.single = FALSE, auto.var = FALSE, auto.cov.lv.x = FALSE, auto.cov.y = FALSE, auto.th = FALSE, auto.delta = FALSE, varTable = NULL, ngroups = 1L, group.equal = NULL, group.partial = NULL, group.w.free = FALSE, debug = FALSE, warn = TRUE, as.data.frame. = TRUE) lavParTable(model = NULL, meanstructure = FALSE, int.ov.free = FALSE, int.lv.free = FALSE, orthogonal = FALSE, std.lv = FALSE, conditional.x = FALSE, fixed.x = TRUE, parameterization = "delta", constraints = NULL, auto = FALSE, model.type = "sem", auto.fix.first = FALSE, auto.fix.single = FALSE, auto.var = FALSE, auto.cov.lv.x = FALSE, auto.cov.y = FALSE, auto.th = FALSE, auto.delta = FALSE, varTable = NULL, ngroups = 1L, group.equal = NULL, group.partial = NULL, group.w.free = FALSE, debug = FALSE, warn = TRUE, as.data.frame. = TRUE) lavParseModelString(model.syntax = '', as.data.frame.=FALSE, warn=TRUE, debug=FALSE) } \arguments{ \item{model}{A description of the user-specified model. Typically, the model is described using the lavaan model syntax; see details for more information. Alternatively, a parameter table (e.g., the output of \code{lavParseModelString} is also accepted.} \item{model.syntax}{The model syntax specifying the model. Must be a literal string.} \item{meanstructure}{If \code{TRUE}, intercepts/means will be added to the model both for both observed and latent variables.} \item{int.ov.free}{If \code{FALSE}, the intercepts of the observed variables are fixed to zero.} \item{int.lv.free}{If \code{FALSE}, the intercepts of the latent variables are fixed to zero.} \item{orthogonal}{If \code{TRUE}, the exogenous latent variables are assumed to be uncorrelated.} \item{std.lv}{If \code{TRUE}, the metric of each latent variable is determined by fixing their variances to 1.0. If \code{FALSE}, the metric of each latent variable is determined by fixing the factor loading of the first indicator to 1.0.} \item{conditional.x}{If \code{TRUE}, we set up the model conditional on the exogenous `x' covariates; the model-implied sample statistics only include the non-x variables. If \code{FALSE}, the exogenous `x' variables are modeled jointly with the other variables, and the model-implied statistics refect both sets of variables.} \item{fixed.x}{If \code{TRUE}, the exogenous `x' covariates are considered fixed variables and the means, variances and covariances of these variables are fixed to their sample values. If \code{FALSE}, they are considered random, and the means, variances and covariances are free parameters.} \item{parameterization}{Currently only used if data is categorical. If \code{"delta"}, the delta parameterization is used. If \code{"theta"}, the theta parameterization is used.} \item{constraints}{Additional (in)equality constraints. See details for more information.} \item{auto}{If \code{TRUE}, the default values are used for the auto.* arguments, depending on the value of \code{model.type}.} \item{model.type}{Either \code{"sem"} or \code{"growth"}; only used if \code{auto=TRUE}.} \item{auto.fix.first}{If \code{TRUE}, the factor loading of the first indicator is set to 1.0 for every latent variable.} \item{auto.fix.single}{If \code{TRUE}, the residual variance (if included) of an observed indicator is set to zero if it is the only indicator of a latent variable.} \item{auto.var}{If \code{TRUE}, the residual variances and the variances of exogenous latent variables are included in the model and set free.} \item{auto.cov.lv.x}{If \code{TRUE}, the covariances of exogenous latent variables are included in the model and set free.} \item{auto.cov.y}{If \code{TRUE}, the covariances of dependent variables (both observed and latent) are included in the model and set free.} \item{auto.th}{If \code{TRUE}, thresholds for limited dependent variables are included in the model and set free.} \item{auto.delta}{If \code{TRUE}, response scaling parameters for limited dependent variables are included in the model and set free.} \item{varTable}{The variable table containing information about the observed variables in the model.} \item{ngroups}{The number of (independent) groups.} \item{group.equal}{A vector of character strings. Only used in a multiple group analysis. Can be one or more of the following: \code{"loadings"}, \code{"intercepts"}, \code{"means"}, \code{"regressions"}, \code{"residuals"} or \code{"covariances"}, specifying the pattern of equality constraints across multiple groups.} \item{group.partial}{A vector of character strings containing the labels of the parameters which should be free in all groups (thereby overriding the group.equal argument for some specific parameters).} \item{group.w.free}{Logical. If \code{TRUE}, the group frequencies are considered to be free parameters in the model. In this case, a Poisson model is fitted to estimate the group frequencies. If \code{FALSE} (the default), the group frequencies are fixed to their observed values.} \item{warn}{If \code{TRUE}, some (possibly harmless) warnings are printed out.} \item{as.data.frame.}{If \code{TRUE}, return the list of model parameters as a \code{data.frame}.} \item{debug}{If \code{TRUE}, debugging information is printed out.} } \details{ The model syntax consists of one or more formula-like expressions, each one describing a specific part of the model. The model syntax can be read from a file (using \code{\link{readLines}}), or can be specified as a literal string enclosed by single quotes as in the example below. \preformatted{myModel <- ' # 1. latent variable definitions f1 =~ y1 + y2 + y3 f2 =~ y4 + y5 + y6 f3 =~ y7 + y8 + y9 + y10 f4 =~ y11 + y12 + y13 ! this is also a comment # 2. regressions f1 ~ f3 + f4 f2 ~ f4 y1 + y2 ~ x1 + x2 + x3 # 3. (co)variances y1 ~~ y1 y2 ~~ y4 + y5 f1 ~~ f2 # 4. intercepts f1 ~ 1; y5 ~ 1 # 5. thresholds y11 | t1 + t2 + t3 y12 | t1 y13 | t1 + t2 # 6. scaling factors y11 ~*~ y11 y12 ~*~ y12 y13 ~*~ y13 # 7. formative factors f5 <~ z1 + z2 + z3 + z4 ' } Blank lines and comments can be used in between the formulas, and formulas can be split over multiple lines. Both the sharp (#) and the exclamation (!) characters can be used to start a comment. Multiple formulas can be placed on a single line if they are separated by a semicolon (;). There can be seven types of formula-like expressions in the model syntax: \enumerate{ \item Latent variable definitions: The \code{"=~"} operator can be used to define (continuous) latent variables. The name of the latent variable is on the left of the \code{"=~"} operator, while the terms on the right, separated by \code{"+"} operators, are the indicators of the latent variable. The operator \code{"=~"} can be read as ``is manifested by''. \item Regressions: The \code{"~"} operator specifies a regression. The dependent variable is on the left of a \code{"~"} operator and the independent variables, separated by \code{"+"} operators, are on the right. These regression formulas are similar to the way ordinary linear regression formulas are used in R, but they may include latent variables. Interaction terms are currently not supported. \item Variance-covariances: The \code{"~~"} (`double tilde') operator specifies (residual) variances of an observed or latent variable, or a set of covariances between one variable, and several other variables (either observed or latent). Several variables, separated by \code{"+"} operators can appear on the right. This way, several pairwise (co)variances involving the same left-hand variable can be expressed in a single expression. The distinction between variances and residual variances is made automatically. \item Intercepts: A special case of a regression formula can be used to specify an intercept (or a mean) of either an observed or a latent variable. The variable name is on the left of a \code{"~"} operator. On the right is only the number \code{"1"} representing the intercept. Including an intercept formula in the model automatically implies \code{meanstructure = TRUE}. The distinction between intercepts and means is made automatically. \item Thresholds: The \code{"|"} operator can be used to define the thresholds of categorical endogenous variables (on the left hand side of the operator). By convention, the thresholds (on the right hand sided, separated by the \code{"+"} operator, are named \code{"t1"}, \code{"t2"}, etcetera. \item Scaling factors: The \code{"~*~"} operator defines a scale factor. The variable name on the left hand side must be the same as the variable name on the right hand side. Scale factors are used in the Delta parameterization, in a multiple group analysis when factor indicators are categorical. \item Formative factors: The \code{"<~"} operator can be used to define a formative factor (on the right hand side of the operator), in a similar why as a reflexive factor is defined (using the \code{"=~"} operator). This is just syntax sugar to define a phantom latent variable (equivalent to using \code{"f =~ 0"}). And in addition, the (residual) variance of the formative factor is fixed to zero. } Usually, only a single variable name appears on the left side of an operator. However, if multiple variable names are specified, separated by the \code{"+"} operator, the formula is repeated for each element on the left side (as for example in the third regression formula in the example above). The only exception are scaling factors, where only a single element is allowed on the left hand side. In the right-hand side of these formula-like expressions, each element can be modified (using the \code{"*"} operator) by either a numeric constant, an expression resulting in a numeric constant, an expression resulting in a character vector, or one of three special functions: \code{start()}, \code{label()} and \code{equal()}. This provides the user with a mechanism to fix parameters, to provide alternative starting values, to label the parameters, and to define equality constraints among model parameters. All \code{"*"} expressions are referred to as \emph{modifiers}. They are explained in more detail in the following sections. } \section{Fixing parameters}{ It is often desirable to fix a model parameter that is otherwise (by default) free. Any parameter in a model can be fixed by using a modifier resulting in a numerical constaint. Here are some examples: \itemize{ \item Fixing the regression coefficient of the predictor \code{x2}: \preformatted{y ~ x1 + 2.4*x2 + x3} \item Specifying an orthogonal (zero) covariance between two latent variables: \preformatted{f1 ~~ 0*f2} \item Specifying an intercept and a linear slope in a growth model: \preformatted{i =~ 1*y11 + 1*y12 + 1*y13 + 1*y14 s =~ 0*y11 + 1*y12 + 2*y13 + 3*y14} } Instead of a numeric constant, one can use a mathematical function that returns a numeric constant, for example \code{sqrt(10)}. Multiplying with \code{NA} will force the corresponding parameter to be free. } \section{Starting values}{ User-provided starting values can be given by using the special function \code{start()}, containing a numeric constant. For example: \preformatted{y ~ x1 + start(1.0)*x2 + x3} Note that if a starting value is provided, the parameter is not automatically considered to be free. } \section{Parameter labels and equality constraints}{ Each free parameter in a model is automatically given a name (or label). The name given to a model parameter consists of three parts, coerced to a single character vector. The first part is the name of the variable in the left-hand side of the formula where the parameter was implied. The middle part is based on the special `operator' used in the formula. This can be either one of \code{"=~"}, \code{"~"} or \code{"~~"}. The third part is the name of the variable in the right-hand side of the formula where the parameter was implied, or \code{"1"} if it is an intercept. The three parts are pasted together in a single string. For example, the name of the fixed regression coefficient in the regression formula \code{y ~ x1 + 2.4*x2 + x3} is the string \code{"y~x2"}. The name of the parameter corresponding to the covariance between two latent variables in the formula \code{f1 ~~ f2} is the string \code{"f1~~f2"}. Although this automatic labeling of parameters is convenient, the user may specify its own labels for specific parameters simply by pre-multiplying the corresponding term (on the right hand side of the operator only) by a character string (starting with a letter). For example, in the formula \code{f1 =~ x1 + x2 + mylabel*x3}, the parameter corresponding with the factor loading of \code{x3} will be named \code{"mylabel"}. An alternative way to specify the label is as follows: \code{f1 =~ x1 + x2 + label("mylabel")*x3}, where the label is the argument of special function \code{label()}; this can be useful if the label contains a space, or an operator (like "~"). To constrain a parameter to be equal to another target parameter, there are two ways. If you have specified your own labels, you can use the fact that \emph{equal labels imply equal parameter values}. If you rely on automatic parameter labels, you can use the special function \code{equal()}. The argument of \code{equal()} is the (automatic or user-specified) name of the target parameter. For example, in the confirmatory factor analysis example below, the intercepts of the three indicators of each latent variable are constrained to be equal to each other. For the first three, we have used the default names. For the last three, we have provided a custom label for the \code{y2a} intercept. \preformatted{model <- ' # two latent variables with fixed loadings f1 =~ 1*y1a + 1*y1b + 1*y1c f2 =~ 1*y2a + 1*y2b + 1*y2c # intercepts constrained to be equal # using the default names y1a ~ 1 y1b ~ equal("y1a~1") * 1 y1c ~ equal("y1a~1") * 1 # intercepts constrained to be equal # using a custom label y2a ~ int2*1 y2b ~ int2*1 y2c ~ int2*1 ' } } \section{Multiple groups}{ In a multiple group analysis, modifiers that contain a single constant must be replaced by a vector, having the same length as the number of groups. The only exception are numerical constants (for fixing values): if you provide only a single number, the same number will be used for all groups. However, it is safer (and cleaner) to specify the same number of elements as the number of groups. For example, if there are two groups: \preformatted{ HS.model <- ' visual =~ x1 + 0.5*x2 + c(0.6, 0.8)*x3 textual =~ x4 + start(c(1.2, 0.6))*x5 + x6 speed =~ x7 + x8 + c(x9.group1, x9.group2)*x9 ' } In this example, the factor loading of the `x2' indicator is fixed to the value 0.5 for all groups. However, the factor loadings of the `x3' indicator are fixed to 0.6 and 0.8 for group 1 and group 2 respectively. The same logic is used for all modifiers. Note that character vectors can contain unquoted strings. } \section{Multiple modifiers}{ In the model syntax, you can specify a variable more than once on the right hand side of an operator; therefore, several `modifiers' can be applied simultaneously; for example, if you want to fix the value of a parameter and also label that parameter, you can use something like: \preformatted{ f1 =~ x1 + x2 + 4*x3 + x3.loading*x3} } \references{ Yves Rosseel (2012). lavaan: An R Package for Structural Equation Modeling. Journal of Statistical Software, 48(2), 1-36. URL http://www.jstatsoft.org/v48/i02/.} lavaan/man/lav_func.Rd0000644000176200001440000000442112616436177014371 0ustar liggesusers\name{lav_func} \alias{lav_func_gradient_complex} \alias{lav_func_gradient_simple} \alias{lav_func_jacobian_complex} \alias{lav_func_jacobian_simple} \title{Utility Functions: Gradient and Jacobian} \description{Utility functions for computing the gradient of a scalar-valued function or the Jacobian of a vector-valued function by numerical approximation.} \usage{ lav_func_gradient_complex(func, x, h = .Machine$double.eps, ..., check.scalar = TRUE, fallback.simple = TRUE) lav_func_jacobian_complex(func, x, h = .Machine$double.eps, ..., fallback.simple = TRUE) lav_func_gradient_simple(func, x, h = sqrt(.Machine$double.eps), ..., check.scalar = TRUE) lav_func_jacobian_simple(func, x, h = sqrt(.Machine$double.eps), ...) } \arguments{ \item{func}{A real-valued function returning a numeric scalar or a numeric vector.} \item{x}{A numeric vector: the point(s) at which the gradient/Jacobian of the function should be computed.} \item{h}{Numeric value representing a small change in `x' when computing the gradient/Jacobian.} \item{...}{Additional arguments to be passed to the function `func'.} \item{check.scalar}{Logical. If TRUE, check if the function is scalar-valued.} \item{fallback.simple}{Logical. If TRUE, and the function evaluation fails, we call the corresponding simple (non-complex) method instead.} } \details{ The complex versions use complex numbers to gain more precision, while retaining the simplicity (and speed) of the simple forward method (see references). These functions were added to lavaan (around 2012) when the complex functionality was not part of the numDeriv package. They were used internally, and made public in 0.5-17 per request of other package developers. } \references{ Squire, W. and Trapp, G. (1998). Using Complex Variables to Estimate Derivatives of Real Functions. SIAM Review, 40(1), 110-112. } \examples{ # very accurate complex method lav_func_gradient_complex(func = exp, x = 1) - exp(1) # less accurate forward method lav_func_gradient_simple(func = exp, x = 1) - exp(1) # very accurate complex method diag(lav_func_jacobian_complex(func = exp, x = c(1,2,3))) - exp(c(1,2,3)) # less accurate forward method diag(lav_func_jacobian_simple(func = exp, x = c(1,2,3))) - exp(c(1,2,3)) } lavaan/man/lavTestWald.Rd0000644000176200001440000000314112251337020015002 0ustar liggesusers\name{lavTestWald} \alias{lavTestWald} \alias{lavtestwald} \alias{wald} \alias{Wald} \alias{lavWaldTest} \title{Wald test} \description{ Wald test for testing a linear hypothesis about the parameters of fitted lavaan object.} \usage{ lavTestWald(object, constraints = NULL, verbose = FALSE) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{constraints}{A character string (typically between single quotes) containing one or more equality constraints. See examples for more details.} \item{verbose}{Logical. If \code{TRUE}, print out the restriction matrix and the estimated restricted values.} } \details{ The constraints are specified using the \code{"=="} operator. Both the left-hand side and the right-hand side of the equality can contain a linear combination of model parameters, or a constant (like zero). The model parameters must be specified by their user-specified labels. Names of defined parameters (using the \code{":="} operator) can be included too. } \value{ A list containing three elements: the Wald test statistic (stat), the degrees of freedom (df), and a p-value under the chi-square distribution (p.value). } \examples{ HS.model <- ' visual =~ x1 + b1*x2 + x3 textual =~ x4 + b2*x5 + x6 speed =~ x7 + b3*x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) # test 1: test about a single parameter # this is the 'chi-square' version of the # z-test from the summary() output lavTestWald(fit, constraints = "b1 == 0") # test 2: several constraints con = ' 2*b1 == b3 b2 - b3 == 0 ' lavTestWald(fit, constraints = con) } lavaan/man/lavTables.Rd0000644000176200001440000001242712354535466014516 0ustar liggesusers\name{lavTables} \alias{lavTables} \title{lavaan frequency tables} \description{ Frequency tables for categorical variables and related statistics.} \usage{ lavTables(object, dimension = 2L, type = "cells", categorical = NULL, group = NULL, statistic = "default", G2.min = 3, X2.min = 3, p.value = FALSE, output = "data.frame", patternAsString = TRUE) } \arguments{ \item{object}{Either a \code{data.frame}, or an object of class \code{\linkS4class{lavaan}}.} \item{dimension}{Integer. If 0L, display all response patterns. If 1L, display one-dimensional (one-way) tables; if 2L, display two-dimensional (two-way or pairwise) tables. For the latter, we can change the information per row: if \code{type = "cells"}, each row is a cell in a pairwise table; if \code{type = "table"}, each row is a table.} \item{type}{If \code{"cells"}, display information for each cell in the (one-way or two-way) table. If \code{"table"}, display information per table. If \code{"pattern"}, display response patterns (implying \code{"dimension = 0L"}).} \item{categorical}{Only used if \code{object} is a \code{data.frame}. Specify variables that need to be treated as categorical.} \item{group}{Only used if \code{object} is a \code{data.frame}. Specify a grouping variable.} \item{statistic}{Either a character string, or a vector of character strings requesting one or more statistics for each cell, pattern or table. Always available are \code{X2} and \code{G2} for the Pearson and LRT based goodness-of-fit statistics. A distinction is made between the unrestricted and restricted model. The statistics based on the former have an extension \code{*.un}, as in \code{X2.un} and \code{G2.un}. If object is a \code{data.frame}, the unrestricted versions of the statistics are the only ones available. For one-way tables, additional statistics are the thresholds (\code{th.un} and \code{th}). For two-way tables and \code{type = "table"}, the following statistics are available: \code{X2}, \code{G2}, \code{cor} (polychoric correlation), \code{RMSEA} and the corresponding unrestricted versions (\code{X2.un} etc). Additional statistics are \code{G2.average}, \code{G2.nlarge} and \code{G2.plarge} statistics based on the cell values \code{G2}: \code{G2.average} is the average of the \code{G2} values in each cell of the two-way table; \code{G2.nlarge} is the number of cells with a \code{G2} value larger than \code{G2.min}, and \code{G2.plarge} is the proportion of cells with a \code{G2} value larger than \code{G2.min}. A similar set of statistics based on \code{X2} is also available. If \code{"default"}, the selection of statistics (if any) depends on the \code{dim} and \code{type} arguments, and if the object is a \code{data.frame} or a fitted lavaan object.} \item{G2.min}{Numeric. All cells with a G2 statistic larger than this number are considered `large', as reflected in the (optional) \code{"G2.plarge"} and \code{"G2.nlarge"} columns.} \item{X2.min}{Numeric. All cells with a X2 statistic larger than this number are considered `large', as reflected in the (optional) \code{"X2.plarge"} and \code{"X2.nlarge"} columns.} \item{p.value}{Logical. If \code{"TRUE"}, p-values are computed for requested statistics (eg G2 or X2) if possible.} \item{output}{If \code{"data.frame"}, the output is presented as a data.frame where each row is either a cell, a table, or a response pattern, depending on the \code{"type"} argument. If \code{"table"}, the output is presented as a table (or matrix) or a list of tables. Only a single statistic can be shown in this case, and if the \code{statistic} is empty, the observed frequencies are shown.} \item{patternAsString}{Logical. Only used for response patterns (dimension = 0L). If \code{"TRUE"}, response patterns are displayed as a compact string. If \code{"FALSE"}, as many columns as observed variables are displayed.} } \value{ If \code{output = "data.frame"}, the output is presented as a data.frame where each row is either a cell, a table, or a response pattern, depending on the \code{"type"} argument. If \code{output = "table"} (only for two-way tables), a list of tables (if \code{type = "cells"}) where each list element corresponds to a pairwise table, or if \code{type = "table"}, a single table (per group). In both cases, the table entries are determined by the (single) \code{statistic} argument. } \references{ Joreskog, K.G. & Moustaki, I. (2001). Factor analysis of ordinal variables: A comparison of three approaches. Multivariate Behavioral Research, 36, 347-387. } \seealso{\code{\link{varTable}}.} \examples{ HS9 <- HolzingerSwineford1939[,c("x1","x2","x3","x4","x5", "x6","x7","x8","x9")] HSbinary <- as.data.frame( lapply(HS9, cut, 2, labels=FALSE) ) # using the data only lavTables(HSbinary, dim = 0L, categorical = names(HSbinary)) lavTables(HSbinary, dim = 1L, categorical = names(HSbinary), stat=c("th.un")) lavTables(HSbinary, dim = 2L, categorical = names(HSbinary), type = "table") # fit a model HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HSbinary, ordered=names(HSbinary)) lavTables(fit, 1L) lavTables(fit, 2L, type="cells") lavTables(fit, 2L, type="table", stat=c("cor.un", "G2", "cor")) lavTables(fit, 2L, type="table", output="table", stat="X2") } lavaan/man/lavParTable.Rd0000644000176200001440000000134612142446075014764 0ustar liggesusers\name{parTable} \alias{parameterTable} \alias{parametertable} \alias{parTable} \alias{partable} \title{Parameter Table} \description{ Show the parameter table of a fitted model.} \usage{ parameterTable(object) parTable(object) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} } \value{ A \code{data.frame} containing the model parameters. This is simply the output of the \code{\link{lavaanify}} function coerced to a \code{data.frame} (with \code{stringsAsFactors = FALSE}). } \seealso{\code{\link{lavaanify}}.} \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) parTable(fit) } lavaan/man/lav_matrix.Rd0000644000176200001440000002444012642240620014726 0ustar liggesusers\name{lav_matrix} \alias{lav_matrix_vec} \alias{lav_matrix_vecr} \alias{lav_matrix_vech} \alias{lav_matrix_vechr} \alias{lav_matrix_vechu} \alias{lav_matrix_vechru} \alias{lav_matrix_vech_idx} \alias{lav_matrix_vech_row_idx} \alias{lav_matrix_vech_col_idx} \alias{lav_matrix_vechr_idx} \alias{lav_matrix_vechu_idx} \alias{lav_matrix_vechru_idx} \alias{lav_matrix_vech_reverse} \alias{lav_matrix_vechru_reverse} \alias{lav_matrix_upper2full} \alias{lav_matrix_vechr_reverse} \alias{lav_matrix_vechu_reverse} \alias{lav_matrix_lower2full} \alias{lav_matrix_diag_idx} \alias{lav_matrix_diagh_idx} \alias{lav_matrix_antidiag_idx} \alias{lav_matrix_duplication} \alias{lav_matrix_duplication_pre} \alias{lav_matrix_duplication_post} \alias{lav_matrix_duplication_pre_post} \alias{lav_matrix_duplication_ginv} \alias{lav_matrix_duplication_ginv_pre} \alias{lav_matrix_duplication_ginv_post} \alias{lav_matrix_duplication_ginv_pre_post} \alias{lav_matrix_commutation} \alias{lav_matrix_commutation_pre} \alias{lav_matrix_commutation_mn_pre} \alias{lav_matrix_symmetric_sqrt} \alias{lav_matrix_orthogonal_complement} \alias{lav_matrix_bdiag} \alias{lav_matrix_trace} \title{Utility Functions: Matrices and Vectors} \description{Utility functions for Matrix and Vector operations.} \usage{ # matrix to vector lav_matrix_vec(A) lav_matrix_vecr(A) lav_matrix_vech(S, diagonal = TRUE) lav_matrix_vechr(S, diagonal = TRUE) # matrix/vector indices lav_matrix_vech_idx(n = 1L, diagonal = TRUE) lav_matrix_vech_row_idx(n = 1L, diagonal = TRUE) lav_matrix_vech_col_idx(n = 1L, diagonal = TRUE) lav_matrix_vechr_idx(n = 1L, diagonal = TRUE) lav_matrix_vechru_idx(n = 1L, diagonal = TRUE) lav_matrix_diag_idx(n = 1L) lav_matrix_diagh_idx(n = 1L) lav_matrix_antidiag_idx(n = 1L) # vector to matrix lav_matrix_vech_reverse(x, diagonal = TRUE) lav_matrix_vechru_reverse(x, diagonal = TRUE) lav_matrix_upper2full(x, diagonal = TRUE) lav_matrix_vechr_reverse(x, diagonal = TRUE) lav_matrix_vechu_reverse(x, diagonal = TRUE) lav_matrix_lower2full(x, diagonal = TRUE) # the duplication matrix lav_matrix_duplication(n = 1L) lav_matrix_duplication_pre(A = matrix(0,0,0)) lav_matrix_duplication_post(A = matrix(0,0,0)) lav_matrix_duplication_pre_post(A = matrix(0,0,0)) lav_matrix_duplication_ginv(n = 1L) lav_matrix_duplication_ginv_pre(A = matrix(0,0,0)) lav_matrix_duplication_ginv_post(A = matrix(0,0,0)) lav_matrix_duplication_ginv_pre_post(A = matrix(0,0,0)) # the commutation matrix lav_matrix_commutation(m = 1L, n = 1L) lav_matrix_commutation_pre(A = matrix(0,0,0)) lav_matrix_commutation_mn_pre(A, m = 1L, n = 1L) # other matrix operations lav_matrix_symmetric_sqrt(S = matrix(0,0,0)) lav_matrix_orthogonal_complement(A = matrix(0,0,0)) lav_matrix_bdiag(...) lav_matrix_trace(..., check = TRUE) } \arguments{ \item{A}{A general matrix.} \item{S}{A symmetric matrix.} \item{diagonal}{Logical. If TRUE, include the diagonal.} \item{n}{Integer. When it is the only argument, the dimension of a square matrix. If m is also provided, the number of column of the matrix.} \item{m}{Integer. The number of rows of a matrix.} \item{x}{Numeric. A vector.} \item{...}{One or more matrices, or a list of matrices.} \item{check}{Logical. If \code{check = TRUE}, we check if the (final) matrix is square.} } \details{ These are a collection of lower-level matrix/vector related functions that are used throughout the lavaan code. They are made public per request of package developers. Below is a brief description of what they do: The \code{lav_matrix_vec} function implements the vec operator (for 'vectorization') and transforms a matrix into a vector by stacking the columns of the matrix one underneath the other. The \code{lav_matrix_vecr} function is similar to the \code{lav_matrix_vec} function but transforms a matrix into a vector by stacking the rows of the matrix one underneath the other. The \code{lav_matrix_vech} function implements the vech operator (for 'half vectorization') and transforms a symmetric matrix into a vector by stacking the columns of the matrix one underneath the other, but eliminating all supradiagonal elements. If diagonal = FALSE, the diagonal elements are also eliminated. The \code{lav_matrix_vechr} function is similar to the \code{lav_matrix_vech} function but transforms a matrix into a vector by stacking the rows of the matrix one underneath the other, eliminating all supradiagonal elements. The \code{lav_matrix_vech_idx} function returns the vector indices of the lower triangular elements of a symmetric matrix of size n, column by column. The \code{lav_matrix_vech_row_idx} function returns the row indices of the lower triangular elements of a symmetric matrix of size n. The \code{lav_matrix_vech_col_idx} function returns the column indices of the lower triangular elements of a symmetric matrix of size n. The \code{lav_matrix_vechr_idx} function returns the vector indices of the lower triangular elements of a symmetric matrix of size n, row by row. The \code{lav_matrix_vechu_idx} function returns the vector indices of the upper triangular elements of a symmetric matrix of size n, column by column. The \code{lav_matrix_vechru_idx} function returns the vector indices of the upper triangular elements of a symmetric matrix of size n, row by row. The \code{lav_matrix_diag_idx} function returns the vector indices of the diagonal elements of a symmetric matrix of size n. The \code{lav_matrix_diagh_idx} function returns the vector indices of the lower part of a symmetric matrix of size n. The \code{lav_matrix_antidiag_idx} function returns the vector indices of the anti diagonal elements a symmetric matrix of size n. The \code{lav_matrix_vech_reverse} function (alias: \code{lav_matrix_vechru_reverse} and \code{lav_matrix_upper2full}) creates a symmetric matrix, given only upper triangular elements, row by row. If diagonal = FALSE, an diagonal with zero elements is added. The \code{lav_matrix_vechr_reverse} (alias: \code{lav_matrix_vechu_reverse} and \code{lav_matrix_lower2full}) creates a symmetric matrix, given only the lower triangular elements, row by row. If diagonal = FALSE, an diagonal with zero elements is added. The \code{lav_matrix_duplication} function generates the duplication matrix for a symmetric matrix of size n. This matrix duplicates the elements in vech(S) to create vec(S) (where S is symmetric). This matrix is very sparse, and should probably never be explicitly created. Use one of the functions below. The \code{lav_matrix_duplication_pre} function computes the product of the transpose of the duplication matrix and a matrix A. The A matrix should have n*n rows, where n is an integer. The duplication matrix is not explicitly created. The \code{lav_matrix_duplication_post} function computes the product of a matrix A with the duplication matrix. The A matrix should have n*n columns, where n is an integer. The duplication matrix is not explicitly created. The \code{lav_matrix_duplication_pre_post} function first pre-multiplies a matrix A with the transpose of the duplication matrix, and then post multiplies the result again with the duplication matrix. A must be square matrix with n*n rows and columns, where n is an integer. The duplication matrix is not explicitly created. multiplies a matrix A with the The \code{lav_matrix_duplication_ginv} function computes the generalized inverse of the duplication matrix. The matrix removes the duplicated elements in vec(S) to create vech(S). This matrix is very sparse, and should probably never be explicitly created. Use one of the functions below. The \code{lav_matrix_duplication_ginv_pre} function computes the product of the generalized inverse of the duplication matrix and a matrix A with n*n rows, where n is an integer. The generalized inverse of the duplication matrix is not explicitly created. The \code{lav_matrix_duplication_ginv_post} function computes the product of a matrix A (with n*n columns, where n is an integer) and the transpose of the generalized inverse of the duplication matrix. The generalized inverse of the duplication matrix is not explicitly created. The \code{lav_matrix_duplication_ginv_pre_post} function first pre-multiplies a matrix A with the transpose of the generalized inverse of the duplication matrix, and the post multiplies the result again with the transpose of the generalized inverse matrix. The matrix A must be square with n*n rows and columns, where n is an integer. The generalized inverse of the duplication matrix is not explicitly created. The \code{lav_matrix_commutation} function computes the commutation matrix which is a permutation matrix which transforms vec(A) (with m rows and n columns) into vec(t(A)). The \code{lav_matrix_commutation_pre} function computes the product of the commutation matrix with a matrix A, without explicitly creating the commutation matrix. The matrix A must have n*n rows, where n is an integer. The \code{lav_matrix_commutation_mn_pre} function computes the product of the commutation matrix with a matrix A, without explicitly creating the commutation matrix. The matrix A must have m*n rows, where m and n are integers. The \code{lav_matrix_symmetric_sqrt} function computes the square root of a positive definite symmetric matrix (using an eigen decomposition). If some of the eigenvalues are negative, they are silently fixed to zero. The \code{lav_matrix_orthogonal_complement} function computes an orthogonal complement of the matrix A, using a qr decomposition. The \code{lav_matrix_bdiag} function constructs a block diagonal matrix from its arguments. The \code{lav_matrix_trace} function computes the trace (the sum of the diagonal elements) of a single (square) matrix, or if multiple matrices are provided (either as a list, or as multiple arguments), we first compute their product (which must result in a square matrix), and then we compute the trace; if \code{check = TRUE}, we check if the (final) matrix is square. } \references{ Magnus, J. R. and H. Neudecker (1999). Matrix Differential Calculus with Applications in Statistics and Econometrics, Second Edition, John Wiley. } \examples{ # upper elements of a 3 by 3 symmetric matrix (row by row) x <- c(30, 16, 5, 10, 3, 1) # construct full symmetric matrix S <- lav_matrix_upper2full(x) # compute the normal theory `Gamma' matrix given a covariance # matrix (S), using the formula: Gamma = 2 * D^{+} (S %x% S) t(D^{+}) Gamma.NT <- 2 * lav_matrix_duplication_ginv_pre_post(S \%x\% S) Gamma.NT } lavaan/man/lav_partable.Rd0000644000176200001440000001163113052615413015214 0ustar liggesusers\name{lav_partable} \alias{lav_partable_independence} \alias{lav_partable_unrestricted} \alias{lav_partable_df} \alias{lav_partable_ndat} \alias{lav_partable_npar} \alias{lav_partable_labels} \alias{lav_partable_from_lm} \alias{lav_partable_complete} \alias{lav_partable_attributes} \alias{lav_partable_merge} \title{lavaan partable functions} \description{Utility functions related to the parameter table (partable)} \usage{ # extract information from a parameter table lav_partable_df(partable) lav_partable_ndat(partable) lav_partable_npar(partable) lav_partable_attributes(partable, pta = NULL) # generate parameter labels lav_partable_labels(partable, blocks = "group", group.equal = "", group.partial = "", type = "user") # generate parameter table for specific models lav_partable_independence(lavobject = NULL, lavdata = NULL, lavoptions = NULL, lavsamplestats = NULL, sample.cov = NULL, sample.mean = NULL, sample.th = NULL, sample.th.idx = NULL) lav_partable_unrestricted(lavobject = NULL, lavdata = NULL, lavoptions = NULL, lavsamplestats = NULL, sample.cov = NULL, sample.mean = NULL, sample.slopes = NULL, sample.th = NULL, sample.th.idx = NULL) lav_partable_from_lm(object, est = FALSE, label = FALSE, as.data.frame. = FALSE) # complete a parameter table only containing a few columns (lhs,op,rhs) lav_partable_complete(partable = NULL, start = TRUE) # merge two parameter tables lav_partable_merge(pt1 = NULL, pt2 = NULL, remove.duplicated = FALSE, fromLast = FALSE, warn = TRUE) } \arguments{ \item{partable}{A parameter table. see \code{\link{lavParTable}} for more information.} \item{blocks}{Character vector. Which columns in the parameter table should be taken to distinguish between different blocks/groups of parameters (and hence be given different labels)?} \item{group.equal}{The same options can be used here as in the fitting functions. Parameters that are constrained to be equal across groups will be given the same label.} \item{group.partial}{A vector of character strings containing the labels of the parameters which should be free in all groups.} \item{type}{Character string. Can be either `user' or `free' to select all entries or only the free parameters from the parameter table respectively.} \item{lavobject}{An object of class `lavaan'. If this argument is provided, it should be the only argument. All the values for the other arguments are extracted from this object.} \item{lavdata}{An object of class `lavData'. The Data slot from a lavaan object.} \item{lavoptions}{A names list. The Options lsot from a lavaan object.} \item{lavsamplestats}{An object of class `lavSampleStats'. The SampleStats slot from a lavaan object.} \item{sample.cov}{Optional list of numeric matrices. Each list element contains a sample variance-covariance matrix for this group. If provided, these values will be used as starting values.} \item{sample.mean}{Optional list of numeric vectors. Each list element contains a sample mean vector for this group. If provided, these values will be used as starting values.} \item{sample.slopes}{Optional list of numeric matrices. Each list element contains the sample slopes for this group (only used when \code{conditional.x = TRUE}). If provided, these values will be used as starting values.} \item{sample.th}{Optional list of numeric vectors. Each list element contains a vector of sample thresholds for this group. If provided (and also sample.th.idx is provided), these values will be used as starting values.} \item{sample.th.idx}{Optional list of integers. Each list contains the threshold indices for this group.} \item{est}{Logical. If TRUE, include the fitted estimates in the parameter table.} \item{label}{Logical. If TRUE, include parameter labels in the parameter table.} \item{as.data.frame.}{Logical. If TRUE, return the parameter table as a data.frame.} \item{object}{An object of class \code{lm}.} \item{start}{Logical. If TRUE, include a start column, based on the simple method for generating starting values.} \item{pta}{A list containing parameter attributes.} \item{pt1}{A parameter table.} \item{pt2}{A parameter table.} \item{remove.duplicated}{Logical. If \code{TRUE}, remove duplicated elements when merging two parameter tables.} \item{fromLast}{Logical. If \code{TRUE}, duplicated elements are considered from the bottom of the merged parameter table.} \item{warn}{Logical. If code{TRUE}, a warning is produced when duplicated elements are found, when merging two parameter tables.} } \examples{ # generate syntax for an independence model HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) lav <- lav_partable_independence(fit) as.data.frame(lav, stringsAsFactors = FALSE) # how many free parameters? lav_partable_npar(lav) # how many sample statistics? lav_partable_ndat(lav) # how many degrees of freedom? lav_partable_df(lav) } lavaan/man/standardizedSolution.Rd0000644000176200001440000000445413054027023016772 0ustar liggesusers\name{standardizedSolution} \alias{standardizedSolution} \alias{standardizedsolution} \title{Standardized Solution} \description{ Standardized solution of a latent variable model.} \usage{ standardizedSolution(object, type = "std.all", se = TRUE, zstat = TRUE, pvalue = TRUE, remove.eq = TRUE, remove.ineq = TRUE, remove.def = FALSE, GLIST = NULL, est = NULL) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{type}{If \code{"std.lv"}, the standardized estimates are on the variances of the (continuous) latent variables only. If \code{"std.all"}, the standardized estimates are based on both the variances of both (continuous) observed and latent variables. If \code{"std.nox"}, the standardized estimates are based on both the variances of both (continuous) observed and latent variables, but not the variances of exogenous covariates.} \item{se}{Logical. If TRUE, standard errors for the standardized parameters will be computed, together with a z-statistic and a p-value.} \item{zstat}{Logical. If \code{TRUE}, an extra column is added containing the so-called z-statistic, which is simply the value of the estimate divided by its standard error.} \item{pvalue}{Logical. If \code{TRUE}, an extra column is added containing the pvalues corresponding to the z-statistic, evaluated under a standard normal distribution.} \item{remove.eq}{Logical. If TRUE, filter the output by removing all rows containing equality constraints, if any.} \item{remove.ineq}{Logical. If TRUE, filter the output by removing all rows containing inequality constraints, if any.} \item{remove.def}{Logical. If TRUE, filter the ouitput by removing all rows containing parameter definitions, if any.} \item{GLIST}{List of model matrices. If provided, they will be used instead of the GLIST inside the object@Model slot.} \item{est}{Numeric. Parameter values (as in the `est' column of a parameter table). If provided, they will be used instead of the parameters that can be extract from object.} } \value{ A data.frame containing standardized model parameters. } \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) standardizedSolution(fit) } lavaan/man/lavExport.Rd0000644000176200001440000000347212142662055014553 0ustar liggesusers\name{lavExport} \alias{lavExport} \title{lavaan Export} \description{ Export a fitted lavaan object to an external program.} \usage{ lavExport(object, target = "lavaan", prefix = "sem", dir.name = "lavExport", export = TRUE) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{target}{The target program. Current options are \code{"lavaan"} and \code{"Mplus"}.} \item{prefix}{The prefix used to create the input files; the name of the input file has the pattern \sQuote{prefix dot target dot in}; the name of the data file has the pattern \sQuote{prefix dot target dot raw}.} \item{dir.name}{The directory name (including a full path) where the input files will be written.} \item{export}{If \code{TRUE}, the files are written to the output directory (\code{dir.name}). If \code{FALSE}, only the syntax is generated as a character string.} } \value{ If \code{export = TRUE}, a directory (called \code{lavExport} by default) will be created, typically containing a data file, and an input file so that the same analysis can be run using an external program. If \code{export = FALSE}, a character string containing the model syntax only for the target program. } \details{ This function was mainly created to quickly generate an Mplus syntax file to compare the results between Mplus and lavaan. The target \code{"lavaan"} can be useful to create a full model syntax as needed for the \code{lavaan()} function. More targets (perhaps for \code{LISREL} or \code{EQS}) will be added in future releases. } \seealso{\code{\link{lavaanify}}, \code{\link{mplus2lavaan}}} \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) out <- lavExport(fit, target = "Mplus", export=FALSE) cat(out) } lavaan/man/lavOptions.Rd0000644000176200001440000004052413043110360014710 0ustar liggesusers\name{lavOptions} \alias{lavOptions} \alias{lavoptions} \title{lavaan Options} \description{ Show the default options used by the \code{lavaan()} function. The options can be changed by passing 'name = value' arguments to the \code{lavaan()} function call, where they will be added to the '...' argument. } \usage{ lavOptions(x = NULL, default = NULL, mimic = "lavaan") } \arguments{ \item{x}{Character. A character string holding an option name, or a character string vector holding multiple option names. All option names are converted to lower case.} \item{default}{If a single option is specified but not available, this value is returned.} \item{mimic}{Character. Adjust the option list for this mimic flag.} } \details{ This is the full list of options that are accepted by the \code{lavaan()} function, organized in several sections: Model features (always available): \describe{ \item{\code{meanstructure}:}{If \code{TRUE}, the means of the observed variables enter the model. If \code{"default"}, the value is set based on the user-specified model, and/or the values of other arguments.} \item{\code{int.ov.free}:}{If \code{FALSE}, the intercepts of the observed variables are fixed to zero.} \item{\code{int.lv.free}:}{If \code{FALSE}, the intercepts of the latent variables are fixed to zero.} \item{\code{conditional.x}:}{If \code{TRUE}, we set up the model conditional on the exogenous `x' covariates; the model-implied sample statistics only include the non-x variables. If \code{FALSE}, the exogenous `x' variables are modeled jointly with the other variables, and the model-implied statistics refect both sets of variables. If \code{"default"}, the value is set depending on the estimator, and whether or not the model involves categorical endogenous variables.} \item{\code{fixed.x}:}{If \code{TRUE}, the exogenous `x' covariates are considered fixed variables and the means, variances and covariances of these variables are fixed to their sample values. If \code{FALSE}, they are considered random, and the means, variances and covariances are free parameters. If \code{"default"}, the value is set depending on the mimic option.} \item{\code{orthogonal}:}{If \code{TRUE}, the exogenous latent variables are assumed to be uncorrelated.} \item{\code{std.lv}:}{If \code{TRUE}, the metric of each latent variable is determined by fixing their (residual) variances to 1.0. If \code{FALSE}, the metric of each latent variable is determined by fixing the factor loading of the first indicator to 1.0.} \item{\code{parameterization}:}{Currently only used if data is categorical. If \code{"delta"}, the delta parameterization is used. If \code{"theta"}, the theta parameterization is used.} } Model features (only available for the \code{lavaan()} function): \describe{ \item{\code{auto.fix.first}:}{If \code{TRUE}, the factor loading of the first indicator is set to 1.0 for every latent variable.} \item{\code{auto.fix.single}:}{If \code{TRUE}, the residual variance (if included) of an observed indicator is set to zero if it is the only indicator of a latent variable.} \item{\code{auto.var}:}{If \code{TRUE}, the residual variances and the variances of exogenous latent variables are included in the model and set free.} \item{\code{auto.cov.lv.x}:}{If \code{TRUE}, the covariances of exogenous latent variables are included in the model and set free.} \item{\code{auto.cov.y}:}{If \code{TRUE}, the covariances of dependent variables (both observed and latent) are included in the model and set free.} \item{\code{auto.th}:}{If \code{TRUE}, thresholds for limited dependent variables are included in the model and set free.} \item{\code{auto.delta}:}{If \code{TRUE}, response scaling parameters for limited dependent variables are included in the model and set free.} } Data options: \describe{ \item{\code{std.ov}:}{If \code{TRUE}, all observed variables are standardized before entering the analysis.} \item{\code{missing}:}{If \code{"listwise"}, cases with missing values are removed listwise from the data frame before analysis. If \code{direct} or \code{"ml"} or \code{"fiml"} and the estimator is maximum likelihood, Full Information Maximum Likelihood (FIML) estimation is used using all available data in the data frame. This is only valid if the data are missing completely at random (MCAR) or missing at random (MAR). If \code{"default"}, the value is set depending on the estimator and the mimic option.} } Data summary options: \describe{ \item{\code{sample.cov.rescale}:}{If \code{TRUE}, the sample covariance matrix provided by the user is internally rescaled by multiplying it with a factor (N-1)/N. If \code{"default"}, the value is set depending on the estimator and the likelihood option: it is set to \code{TRUE} if maximum likelihood estimation is used and \code{likelihood="normal"}, and \code{FALSE} otherwise.} \item{\code{ridge}:}{Numeric. Small constant used for ridging. Only used if the sample covariance matrix is non positive definite.} } Multiple group options: \describe{ \item{\code{group.label}:}{A character vector. The user can specify which group (or factor) levels need to be selected from the grouping variable, and in which order. If missing, all grouping levels are selected, in the order as they appear in the data.} \item{\code{group.equal}:}{A vector of character strings. Only used in a multiple group analysis. Can be one or more of the following: \code{"loadings"}, \code{"intercepts"}, \code{"means"}, \code{"thresholds"}, \code{"regressions"}, \code{"residuals"}, \code{"residual.covariances"}, \code{"lv.variances"} or \code{"lv.covariances"}, specifying the pattern of equality constraints across multiple groups.} \item{\code{group.partial}:}{A vector of character strings containing the labels of the parameters which should be free in all groups (thereby overriding the group.equal argument for some specific parameters).} \item{\code{group.w.free}:}{Logical. If \code{TRUE}, the group frequencies are considered to be free parameters in the model. In this case, a Poisson model is fitted to estimate the group frequencies. If \code{FALSE} (the default), the group frequencies are fixed to their observed values.} } Estimation options: \describe{ \item{\code{estimator}:}{The estimator to be used. Can be one of the following: \code{"ML"} for maximum likelihood, \code{"GLS"} for generalized least squares, \code{"WLS"} for weighted least squares (sometimes called ADF estimation), \code{"ULS"} for unweighted least squares and \code{"DWLS"} for diagonally weighted least squares. These are the main options that affect the estimation. For convenience, the \code{"ML"} option can be extended as \code{"MLM"}, \code{"MLMV"}, \code{"MLMVS"}, \code{"MLF"}, and \code{"MLR"}. The estimation will still be plain \code{"ML"}, but now with robust standard errors and a robust (scaled) test statistic. For \code{"MLM"}, \code{"MLMV"}, \code{"MLMVS"}, classic robust standard errors are used (\code{se="robust.sem"}); for \code{"MLF"}, standard errors are based on first-order derivatives (\code{se="first.order"}); for \code{"MLR"}, `Huber-White' robust standard errors are used (\code{se="robust.huber.white"}). In addition, \code{"MLM"} will compute a Satorra-Bentler scaled (mean adjusted) test statistic (\code{test="satorra.bentler"}) , \code{"MLMVS"} will compute a mean and variance adjusted test statistic (Satterthwaite style) (\code{test="mean.var.adjusted"}), \code{"MLMV"} will compute a mean and variance adjusted test statistic (scaled and shifted) (\code{test="scaled.shifted"}), and \code{"MLR"} will compute a test statistic which is asymptotically equivalent to the Yuan-Bentler T2-star test statistic. Analogously, the estimators \code{"WLSM"} and \code{"WLSMV"} imply the \code{"DWLS"} estimator (not the \code{"WLS"} estimator) with robust standard errors and a mean or mean and variance adjusted test statistic. Estimators \code{"ULSM"} and \code{"ULSMV"} imply the \code{"ULS"} estimator with robust standard errors and a mean or mean and variance adjusted test statistic.} \item{\code{likelihood}:}{Only relevant for ML estimation. If \code{"wishart"}, the wishart likelihood approach is used. In this approach, the covariance matrix has been divided by N-1, and both standard errors and test statistics are based on N-1. If \code{"normal"}, the normal likelihood approach is used. Here, the covariance matrix has been divided by N, and both standard errors and test statistics are based on N. If \code{"default"}, it depends on the mimic option: if \code{mimic="lavaan"} or \code{mimic="Mplus"}, normal likelihood is used; otherwise, wishart likelihood is used.} \item{\code{link}:}{Currently only used if estimator is MML. If \code{"logit"}, a logit link is used for binary and ordered observed variables. If \code{"probit"}, a probit link is used. If \code{"default"}, it is currently set to \code{"probit"} (but this may change).} \item{\code{information}:}{If \code{"expected"}, the expected information matrix is used (to compute the standard errors). If \code{"observed"}, the observed information matrix is used. If \code{"default"}, the value is set depending on the estimator and the mimic option.} \item{\code{se}:}{If \code{"standard"}, conventional standard errors are computed based on inverting the (expected or observed) information matrix. If \code{"first.order"}, standard errors are computed based on first-order derivatives. If \code{"robust.sem"}, conventional robust standard errors are computed. If \code{"robust.huber.white"}, standard errors are computed based on the `mlr' (aka pseudo ML, Huber-White) approach. If \code{"robust"}, either \code{"robust.sem"} or \code{"robust.huber.white"} is used depending on the estimator, the mimic option, and whether the data are complete or not. If \code{"boot"} or \code{"bootstrap"}, bootstrap standard errors are computed using standard bootstrapping (unless Bollen-Stine bootstrapping is requested for the test statistic; in this case bootstrap standard errors are computed using model-based bootstrapping). If \code{"none"}, no standard errors are computed.} \item{\code{test}:}{If \code{"standard"}, a conventional chi-square test is computed. If \code{"Satorra.Bentler"}, a Satorra-Bentler scaled test statistic is computed. If \code{"Yuan.Bentler"}, a Yuan-Bentler scaled test statistic is computed. If \code{"mean.var.adjusted"} or \code{"Satterthwaite"}, a mean and variance adjusted test statistic is compute. If \code{"scaled.shifted"}, an alternative mean and variance adjusted test statistic is computed (as in Mplus version 6 or higher). If \code{"boot"} or \code{"bootstrap"} or \code{"Bollen.Stine"}, the Bollen-Stine bootstrap is used to compute the bootstrap probability value of the test statistic. If \code{"default"}, the value depends on the values of other arguments.} \item{\code{bootstrap}:}{Number of bootstrap draws, if bootstrapping is used.} \item{\code{do.fit}:}{If \code{FALSE}, the model is not fit, and the current starting values of the model parameters are preserved.} } Optimization options: \describe{ \item{\code{control}:}{A list containing control parameters passed to the external optimizer. By default, lavaan uses \code{"nlminb"}. See the manpage of \code{\link{nlminb}} for an overview of the control parameters. If another (external) optimizer is selected, see the manpage for that optimizer to see the possible control parameters.} \item{\code{optim.method}:}{Character. The optimizer that should be used. For unconstrained optimization (the model syntax does not include any "==", ">" or "<" operators), the available options are \code{"nlminb"} (the default), \code{"BFGS"} and \code{"L-BFGS-B"}. For constrained optimization, the only available option is \code{"nlminb.constr"}.} } Categorical estimation options: \describe{ \item{\code{zero.add}:}{A numeric vector containing two values. These values affect the calculation of polychoric correlations when some frequencies in the bivariate table are zero. The first value only applies for 2x2 tables. The second value for larger tables. This value is added to the zero frequency in the bivariate table. If \code{"default"}, the value is set depending on the \code{"mimic"} option. By default, lavaan uses \code{zero.add = c(0.5. 0.0)}.} \item{\code{zero.keep.margins}:}{Logical. This argument only affects the computation of polychoric correlations for 2x2 tables with an empty cell, and where a value is added to the empty cell. If \code{TRUE}, the other values of the frequency table are adjusted so that all margins are unaffected. If \code{"default"}, the value is set depending on the \code{"mimic"}. The default is \code{TRUE}.} \item{\code{zero.cell.warn}:}{Logical. Only used if some observed endogenous variables are categorical. If \code{TRUE}, give a warning if one or more cells of a bivariate frequency table are empty.} } Starting values options: \describe{ \item{\code{start}:}{If it is a character string, the two options are currently \code{"simple"} and \code{"Mplus"}. In the first case, all parameter values are set to zero, except the factor loadings (set to one), the variances of latent variables (set to 0.05), and the residual variances of observed variables (set to half the observed variance). If \code{"Mplus"}, we use a similar scheme, but the factor loadings are estimated using the fabin3 estimator (tsls) per factor. If \code{start} is a fitted object of class \code{\linkS4class{lavaan}}, the estimated values of the corresponding parameters will be extracted. If it is a model list, for example the output of the \code{paramaterEstimates()} function, the values of the \code{est} or \code{start} or \code{ustart} column (whichever is found first) will be extracted.} } Check options: \describe{ \item{\code{check}:}{Character vector. If \code{check} includes \code{"start"}, the starting values are checked for possibly inconsistent values (for example values implying correlations larger than one); if \code{check} includes \code{"post"}, a check is performed after (post) fitting, to check if the solution is admissable.} } Verbosity options: \describe{ \item{\code{verbose}:}{If \code{TRUE}, the function value is printed out during each iteration.} \item{\code{warn}:}{If \code{TRUE}, some (possibly harmless) warnings are printed out during the iterations.} \item{\code{debug}:}{If \code{TRUE}, debugging information is printed out.} } Miscellaneous: \describe{ \item{\code{model.type}:}{Set the model type: possible values are \code{"cfa"}, \code{"sem"} or \code{"growth"}. This may affect how starting values are computed, and may be used to alter the terminology used in the summary output, or the layout of path diagrams that are based on a fitted lavaan object.} \item{\code{mimic}:}{If \code{"Mplus"}, an attempt is made to mimic the Mplus program. If \code{"EQS"}, an attempt is made to mimic the EQS program. If \code{"default"}, the value is (currently) set to to \code{"lavaan"}, which is very close to \code{"Mplus"}.} \item{\code{representation}:}{If \code{"LISREL"} the classical LISREL matrix representation is used to represent the model (using the all-y variant). No other options are available (for now).} } } \seealso{ \code{\link{lavaan}} } \examples{ lavOptions() lavOptions("std.lv") lavOptions(c("std.lv", "orthogonal")) } lavaan/man/Demo.growth.Rd0000644000176200001440000000205712104004704014747 0ustar liggesusers\name{Demo.growth} \alias{Demo.growth} \docType{data} \title{ Demo dataset for a illustrating a linear growth model. } \description{ A toy dataset containing measures on 4 time points (t1,t2, t3 and t4), two predictors (x1 and x2) influencing the random intercept and slope, and a time-varying covariate (c1, c2, c3 and c4). } \usage{data(Demo.growth)} \format{ A data frame of 400 observations of 10 variables. \describe{ \item{\code{t1}}{Measured value at time point 1} \item{\code{t2}}{Measured value at time point 2} \item{\code{t3}}{Measured value at time point 3} \item{\code{t4}}{Measured value at time point 4} \item{\code{x1}}{Predictor 1 influencing intercept and slope} \item{\code{x2}}{Predictor 2 influencing intercept and slope} \item{\code{c1}}{Time-varying covariate time point 1} \item{\code{c2}}{Time-varying covariate time point 2} \item{\code{c3}}{Time-varying covariate time point 3} \item{\code{c4}}{Time-varying covariate time point 4} } } \seealso{ \code{\link{growth}} } \examples{ head(Demo.growth) } lavaan/man/InformativeTesting.Rd0000644000176200001440000001365212726446531016420 0ustar liggesusers\name{InformativeTesting} \alias{InformativeTesting} \alias{informativetesting} \title{Testing order/inequality Constrained Hypotheses in SEM} \description{Testing order/inequality constrained Hypotheses in SEM} \usage{ InformativeTesting(model = NULL, data, constraints = NULL, R = 1000L, type = "bollen.stine", return.LRT = TRUE, double.bootstrap = "standard", double.bootstrap.R = 249L, double.bootstrap.alpha = 0.05, parallel = c("no", "multicore", "snow"), ncpus = 1L, cl = NULL, verbose = FALSE, \dots) } \arguments{ \item{model}{Model syntax specifying the model. See \code{\link{model.syntax}} for more information.} \item{data}{The data frame containing the observed variables being used to fit the model.} \item{constraints}{The imposed inequality constraints on the model.} \item{R}{Integer; number of bootstrap draws. The default value is set to 1000.} \item{type}{If \code{"parametric"}, the parametric bootstrap is used. If \code{"bollen.stine"}, the semi-nonparametric Bollen-Stine bootstrap is used. The default is set to \code{"bollen.stine"}.} \item{return.LRT}{Logical; if \code{TRUE}, the function returns bootstrapped LRT-values.} \item{double.bootstrap}{If \code{"standard"} (default) the genuine double bootstrap is used to compute an additional set of plug-in p-values for each bootstrap sample. If \code{"no"}, no double bootstrap is used. If \code{"FDB"}, the fast double bootstrap is used to compute second level LRT-values for each bootstrap sample. Note that the \code{"FDB"} is experimental and should not be used by inexperienced users.} \item{double.bootstrap.R}{Integer; number of double bootstrap draws. The default value is set to 249.} \item{double.bootstrap.alpha}{The significance level to compute the adjusted alpha based on the plugin p-values. Only used if \code{double.bootstrap = "standard"}. The default value is set to 0.05.} \item{parallel}{The type of parallel operation to be used (if any). If missing, the default is set "no".} \item{ncpus}{Integer: number of processes to be used in parallel operation: typically one would chose this to the number of available CPUs.} \item{cl}{An optional parallel or snow cluster for use if \code{parallel = "snow"}. If not supplied, a cluster on the local machine is created for the duration of the \code{InformativeTesting} call.} \item{verbose}{Logical; if \code{TRUE}, information is shown at each bootstrap draw.} \item{...}{Other named arguments from the lavaan package which are passed to the function. For example \code{"group"} in a multiple group model.} } \details{The following hypothesis tests are available: \itemize{ \item Type A: Test H0: all restriktions with equalities ("=") active against HA: at least one inequality restriktion (">") strictly true. \item Type B: Test H0: all restriktions with inequalities (">") (including some equalities ("=")) active against HA: at least one restriktion false (some equality restriktions may be maintained).} } \value{An object of class InformativeTesting for which a print and a plot method is available.} %\seealso{} \examples{ \dontrun{ ######################### ### real data example ### ######################### # Multiple group path model for facial burns example. # model syntax with starting values. burns.model <- 'Selfesteem ~ Age + c(m1, f1)*TBSA + HADS + start(-.10, -.20)*TBSA HADS ~ Age + c(m2, f2)*TBSA + RUM + start(.10, .20)*TBSA ' # constraints syntax burns.constraints <- 'f2 > 0 ; m1 < 0 m2 > 0 ; f1 < 0 f2 > m2 ; f1 < m1' # we only generate 2 bootstrap samples in this example; in practice # you may wish to use a much higher number. # the double bootstrap was switched off; in practice you probably # want to set it to "standard". example1 <- InformativeTesting(model = burns.model, data = FacialBurns, R = 2, constraints = burns.constraints, double.bootstrap = "no", group = "Sex") example1 ########################## ### artificial example ### ########################## # Simple ANOVA model with 3 groups (N = 20 per group) set.seed(1234) Y <- cbind(c(rnorm(20,0,1), rnorm(20,0.5,1), rnorm(20,1,1))) grp <- c(rep("1", 20), rep("2", 20), rep("3", 20)) Data <- data.frame(Y, grp) #create model matrix fit.lm <- lm(Y ~ grp, data = Data) mfit <- fit.lm$model mm <- model.matrix(mfit) Y <- model.response(mfit) X <- data.frame(mm[,2:3]) names(X) <- c("d1", "d2") Data.new <- data.frame(Y, X) # model model <- 'Y ~ 1 + a1*d1 + a2*d2' # fit without constraints fit <- sem(model, data = Data.new) # constraints syntax: mu1 < mu2 < mu3 constraints <- ' a1 > 0 a1 < a2 ' # we only generate 10 bootstrap samples in this example; in practice # you may wish to use a much higher number, say > 1000. The double # bootstrap is not necessary in case of an univariate ANOVA model. example2 <- InformativeTesting(model = model, data = Data.new, start = parTable(fit), R = 10L, double.bootstrap = "no", constraints = constraints) example2 } } \references{ Van de Schoot, R., Hoijtink, H., & Dekovic, M. (2010). Testing inequality constrained hypotheses in SEM models. \emph{Structural Equation Modeling}, \bold{17}, 443-463. Van de Schoot, R., Strohmeier, D. (2011). Testing informative hypotheses in SEM increases power: An illustration contrasting classical. \emph{International Journal of Behavioral Development}, \bold{35}, 180-190. Silvapulle, M.J. and Sen, P.K. (2005). \emph{Constrained Statistical Inference}. Wiley, New York. } \author{ Leonard Vanbrabant \email{lgf.vanbrabant@gmail.com} } lavaan/man/parameterEstimates.Rd0000644000176200001440000000761212760033020016414 0ustar liggesusers\name{parameterEstimates} \alias{parameterEstimates} \alias{parameterestimates} \title{Parameter Estimates} \description{ Parameter estimates of a latent variable model.} \usage{ parameterEstimates(object, se = TRUE, zstat = TRUE, pvalue = TRUE, ci = TRUE, level = 0.95, boot.ci.type = "perc", standardized = FALSE, fmi = FALSE, remove.system.eq = TRUE, remove.eq = TRUE, remove.ineq = TRUE, remove.def = FALSE, rsquare = FALSE, add.attributes = FALSE) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{se}{Logical. If \code{TRUE}, include column containing the standard errors. If \code{FALSE}, this implies \code{zstat} and \code{pvalue} and \code{ci} are also \code{FALSE}.} \item{zstat}{Logical. If \code{TRUE}, an extra column is added containing the so-called z-statistic, which is simply the value of the estimate divided by its standard error.} \item{pvalue}{Logical. If \code{TRUE}, an extra column is added containing the pvalues corresponding to the z-statistic, evaluated under a standard normal distribution.} \item{ci}{If \code{TRUE}, confidence intervals are added to the output} \item{level}{The confidence level required.} \item{boot.ci.type}{If bootstrapping was used, the type of interval required. The value should be one of \code{"norm"}, \code{"basic"}, \code{"perc"}, or \code{"bca.simple"}. For the first three options, see the help page of the \code{\link[boot]{boot.ci}} function in the boot package. The \code{"bca.simple"} option produces intervals using the adjusted bootstrap percentile (BCa) method, but with no correction for acceleration (only for bias).} \item{standardized}{Logical. If \code{TRUE}, standardized estimates are added to the output} \item{fmi}{Logical. If \code{TRUE}, an extra column is added containing the fraction of missing information for each estimated parameter. Only available if \code{estimator="ML"}, \code{missing="(fi)ml"}, and \code{se="standard"}. See references for more information.} \item{remove.eq}{Logical. If \code{TRUE}, filter the output by removing all rows containing user-specified equality constraints, if any.} \item{remove.system.eq}{Logical. If \code{TRUE}, filter the output by removing all rows containing system-generated equality constraints, if any.} \item{remove.ineq}{Logical. If \code{TRUE}, filter the output by removing all rows containing inequality constraints, if any.} \item{remove.def}{Logical. If \code{TRUE}, filter the ouitput by removing all rows containing parameter definitions, if any.} \item{rsquare}{Logical. If \code{TRUE}, add additional rows containing the rsquare values (in the \code{est} column) of all endogenous variables in the model. Both the \code{lhs} and \code{rhs} column contain the name of the endogenous variable, while the code{op} column contains \code{r2}, to indicate that the values in the \code{est} column are rsquare values.} \item{add.attributes}{Logical. If \code{TRUE}, add a class attribute (class \code{lavaan.parameterEstimates}) and other attributes to be used by the print function for this class (\code{print.lavaan.parameterEstimates}). This is used by the \code{summary()} function, to prettify the output.} } \value{ A data.frame containing the estimated parameters, parameters, standard errors, and (by default) z-values , p-values, and the lower and upper values of the confidence intervals. If requested, extra columns are added with standardized versions of the parameter estimates. } \references{ Savalei, V. & Rhemtulla, M. (2012). On obtaining estimates of the fraction of missing information from FIML. Structural Equation Modeling: A Multidisciplinary Journal, 19(3), 477-494. } \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) parameterEstimates(fit) } lavaan/man/estfun.Rd0000644000176200001440000000373512675011004014066 0ustar liggesusers\name{estfun} \alias{estfun.lavaan} \alias{lavScores} \title{Extract Empirical Estimating Functions} \description{ A function for extracting the empirical estimating functions of a fitted lavaan model. This is the derivative of the objective function with respect to the parameter vector, evaluated at the observed (case-wise) data. In other words, this function returns the case-wise scores, evaluated at the fitted model parameters. } \usage{ estfun.lavaan(object, scaling = FALSE, ignore.constraints = FALSE, remove.duplicated = TRUE, remove.empty.cases = TRUE) lavScores(object, scaling = FALSE, ignore.constraints = FALSE, remove.duplicated = TRUE, remove.empty.cases = TRUE) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{scaling}{If \code{TRUE}, the scores are scaled to reflect the specific objective function used by lavaan. If \code{FALSE} (the default), the objective function is the loglikelihood function assuming multivariate normality.} \item{ignore.constraints}{Logical. If \code{TRUE}, the scores do not reflect the (equality or inequality) constraints. If \code{FALSE}, the scores are computed by taking the unconstrained scores, and adding the term \code{t(R) lambda}, where \code{lambda} are the (case-wise) Lagrange Multipliers, and \code{R} is the Jacobian of the constraint function. Only in the latter case will the sum of the columns be (almost) equal to zero.} \item{remove.duplicated}{If \code{TRUE}, and all the equality constraints have a simple form (eg. a == b), the unconstrained scores are post-multiplied with a transformation matrix in order to remove the duplicated parameters.} \item{remove.empty.cases}{If \code{TRUE}, empty cases with only missing values will be removed from the output.} } \value{A n x k matrix corresponding to n observations and k parameters.} \author{Ed Merkle; the \code{remove.duplicated}, \code{ignore.constraints} and \code{remove.empty.cases} arguments were added by Yves Rosseel} lavaan/man/fsr.Rd0000644000176200001440000000613013052017616013351 0ustar liggesusers\name{fsr} \alias{fsr} \title{Factor Score Regression} \description{ Fit a SEM model using factor score regression.} \usage{ fsr(model = NULL, data = NULL, cmd = "sem", fsr.method = "Croon", fs.method = "Bartlett", fs.scores = FALSE, Gamma.NT = TRUE, lvinfo = FALSE, ...) } \arguments{ \item{model}{A description of the user-specified model. Typically, the model is described using the lavaan model syntax. See \code{\link{model.syntax}} for more information. Alternatively, a parameter table (eg. the output of the \code{lavaanify()} function) is also accepted.} \item{data}{An optional data frame containing the observed variables used in the model. If some variables are declared as ordered factors, lavaan will treat them as ordinal variables.} \item{cmd}{Charcater. Which command is used to run the sem models. The possible choices are \code{"sem"} or \code{"lavaan"}, determining how we deal with default options.} \item{fsr.method}{Character. Factor score regression method. Possible options are \code{naive}, \code{Skrondal-Laake}, and \code{Croon}.} \item{fs.method}{Character. Factor score estimation method. Possible options are \code{Bartlett} and \code{regression}.} \item{fs.scores}{Logical. If \code{TRUE}, explicitly compute factor scores; if \code{FALSE}, only compute the mean vector and variance matrix of the factor scores.} \item{Gamma.NT}{Logical. Only needed when \code{se="robust.sem"} and data is missing; if \code{TRUE}, compute Gamma (N times the variance matrix of the sample statistics) assuming normality.} \item{lvinfo}{Logical. If \code{TRUE}, return latent variable information as an attribute to the output.} \item{...}{Further arguments that we pass to the \code{"cfa"}, \code{"sem"} or \code{"lavaan"} functions.} } \details{ The \code{fsr} function implements a two-step procedure to estimate the parameters of the structural (regression) part of a SEM model. In a first step, factor scores are computed for each latent variable. In a second step, the latent variables are replaced by the factor scores, and a path analysis is used to estimate all remaining model parameters. Special techniques are used in order to ensure (approximately) unbiased estimation of point estimates and standard errors. } \value{ An object of class \code{\linkS4class{lavaan}}, for which several methods are available, including a \code{summary} method. } \references{ Devlieger, I., Mayer, A., & Rosseel, Y. (2015). Hypothesis Testing Using Factor Score Regression: A Comparison of Four Methods. Educational and Psychological Measurement. http://epm.sagepub.com/content/early/2015/09/29/0013164415607618.abstract } \seealso{ \code{\link{lavaan}}, \code{\link{sem}}, \code{\link{lavPredict}} } \examples{ ## The industrialization and Political Democracy Example ## Bollen (1989), page 332, simplified model <- ' # latent variable definitions ind60 =~ x1 + x2 + x3 dem60 =~ y1 + y2 + y3 + y4 # regressions dem60 ~ ind60 ' fit <- fsr(model, data = PoliticalDemocracy, fsr.method = "Skrondal-Laake") summary(fit) } lavaan/man/lavaan.Rd0000644000176200001440000001230013043110661014007 0ustar liggesusers\name{lavaan} \alias{lavaan} \title{Fit a Latent Variable Model} \description{ Fit a latent variable model.} \usage{ lavaan(model = NULL, data = NULL, ordered = NULL, sample.cov = NULL, sample.mean = NULL, sample.nobs = NULL, group = NULL, cluster = NULL, constraints = "", WLS.V = NULL, NACOV = NULL, slotOptions = NULL, slotParTable = NULL, slotSampleStats = NULL, slotData = NULL, slotModel = NULL, slotCache = NULL, ...) } \arguments{ \item{model}{A description of the user-specified model. Typically, the model is described using the lavaan model syntax. See \code{\link{model.syntax}} for more information. Alternatively, a parameter table (eg. the output of the \code{lavaanify()} function) is also accepted.} \item{data}{An optional data frame containing the observed variables used in the model. If some variables are declared as ordered factors, lavaan will treat them as ordinal variables.} \item{ordered}{Character vector. Only used if the data is in a data.frame. Treat these variables as ordered (ordinal) variables, if they are endogenous in the model. Importantly, all other variables will be treated as numeric (unless they are declared as ordered in the original data.frame.)} \item{sample.cov}{Numeric matrix. A sample variance-covariance matrix. The rownames and/or colnames must contain the observed variable names. For a multiple group analysis, a list with a variance-covariance matrix for each group. Note that if maximum likelihood estimation is used and \code{likelihood="normal"}, the user provided covariance matrix is internally rescaled by multiplying it with a factor (N-1)/N, to ensure that the covariance matrix has been divided by N. This can be turned off by setting the \code{sample.cov.rescale} argument to \code{FALSE}.} \item{sample.mean}{A sample mean vector. For a multiple group analysis, a list with a mean vector for each group.} \item{sample.nobs}{Number of observations if the full data frame is missing and only sample moments are given. For a multiple group analysis, a list or a vector with the number of observations for each group.} \item{group}{A variable name in the data frame defining the groups in a multiple group analysis.} \item{cluster}{Not used yet.} \item{constraints}{Additional (in)equality constraints not yet included in the model syntax. See \code{\link{model.syntax}} for more information.} \item{WLS.V}{A user provided weight matrix to be used by estimator \code{"WLS"}; if the estimator is \code{"DWLS"}, only the diagonal of this matrix will be used. For a multiple group analysis, a list with a weight matrix for each group. The elements of the weight matrix should be in the following order (if all data is continuous): first the means (if a meanstructure is involved), then the lower triangular elements of the covariance matrix including the diagonal, ordered column by column. In the categorical case: first the thresholds (including the means for continuous variables), then the slopes (if any), the variances of continuous variables (if any), and finally the lower triangular elements of the correlation/covariance matrix excluding the diagonal, ordered column by column.} \item{NACOV}{A user provided matrix containing the elements of (N times) the asymptotic variance-covariance matrix of the sample statistics. For a multiple group analysis, a list with an asymptotic variance-covariance matrix for each group. See the \code{WLS.V} argument for information about the order of the elements.} \item{slotOptions}{Options slot from a fitted lavaan object. If provided, no new Options slot will be created by this call.} \item{slotParTable}{ParTable slot from a fitted lavaan object. If provided, no new ParTable slot will be created by this call.} \item{slotSampleStats}{SampleStats slot from a fitted lavaan object. If provided, no new SampleStats slot will be created by this call.} \item{slotData}{Data slot from a fitted lavaan object. If provided, no new Data slot will be created by this call.} \item{slotModel}{Model slot from a fitted lavaan object. If provided, no new Model slot will be created by this call.} \item{slotCache}{Cache slot from a fitted lavaan object. If provided, no new Cache slot will be created by this call.} \item{...}{Many more additional options can be defined, using 'name = value'. See \code{\link{lavOptions}} for a complete list.} } \value{ An object of class \code{\linkS4class{lavaan}}, for which several methods are available, including a \code{summary} method. } \references{ Yves Rosseel (2012). lavaan: An R Package for Structural Equation Modeling. Journal of Statistical Software, 48(2), 1-36. URL http://www.jstatsoft.org/v48/i02/.} \seealso{ \code{\link{cfa}}, \code{\link{sem}}, \code{\link{growth}} } \examples{ # The Holzinger and Swineford (1939) example HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- lavaan(HS.model, data=HolzingerSwineford1939, auto.var=TRUE, auto.fix.first=TRUE, auto.cov.lv.x=TRUE) summary(fit, fit.measures=TRUE) } lavaan/man/lavaan-deprecated.Rd0000644000176200001440000000354412506010276016123 0ustar liggesusers\name{lavaan-deprecated} \alias{lavaan-deprecated} \alias{vech} \alias{vechru} \alias{vech.reverse} \alias{vechru.reverse} \alias{upper2full} \alias{vechr} \alias{vechu} \alias{vechr.reverse} \alias{vechu.reverse} \alias{lower2full} \alias{duplicationMatrix} \alias{commutationMatrix} \alias{sqrtSymmetricMatrix} \title{Deprecated Functions in the lavaan package} \description{These functions have been renamed, or have been removed. They are still included in this version for convenience, but they may be eventually removed.} \usage{ vech(S, diagonal = TRUE) vechr(S, diagonal = TRUE) vechu(S, diagonal = TRUE) vechru(S, diagonal = TRUE) vech.reverse(x, diagonal = TRUE) vechru.reverse(x, diagonal = TRUE) vechr.reverse(x, diagonal = TRUE) vechu.reverse(x, diagonal = TRUE) lower2full(x, diagonal = TRUE) upper2full(x, diagonal = TRUE) duplicationMatrix(n = 1L) commutationMatrix(m = 1L, n = 1L) sqrtSymmetricMatrix(S) } \arguments{ \item{S}{A symmetric matrix.} \item{x}{A numeric vector containing the lower triangular or upper triangular elements of a symmetric matrix, possibly including the diagonal elements.} \item{diagonal}{Logical. If \code{TRUE}, the diagonal is included. If \code{FALSE}, the diagonal is not included.} \item{n}{Integer. Dimension of the symmetric matrix, or column dimension of a non-square matrix.} \item{m}{Integer. Row dimension of a matrix.} } \details{ The \code{vech} function has been renamed \code{lav_matrix_vech}. The \code{vech.reverse} function has been renamed \code{lav_matrix_vech_reverse}. The \code{lower2full} function has been renamed \code{lav_matrix_lower2full}. The \code{duplicationMatrix} function has been renamed \code{lav_matrix_duplication}. The \code{commutationMatrix} function has been renamed \code{lav_matrix_commutation}. The \code{sqrtSymmetricMatrix} function has been renamed \code{lav_matrix_symmetric_sqrt}. } lavaan/man/lavaanList-class.Rd0000644000176200001440000000620212736255423015770 0ustar liggesusers\name{lavaanList-class} \docType{class} \alias{lavaanList-class} \alias{coef,lavaanList-method} \alias{summary,lavaanList-method} \title{Class For Representing A List of (Fitted) Latent Variable Models} \description{The \code{lavaanList} class represents a collection of (fitted) latent variable models, for a (potentially large) number of datasets. It contains information about the model (which is always the same), and for every dataset a set of (user-specified) slots from a regular lavaan object.} \section{Objects from the Class}{ Objects can be created via the \code{\link{cfaList}}, \code{\link{semList}}, or \code{\link{lavaanList}} functions. } \section{Slots}{ \describe{ \item{\code{call}:}{The function call as returned by \code{match.call()}.} \item{\code{Options}:}{Named list of options that were provided by the user, or filled-in automatically.} \item{\code{ParTable}:}{Named list describing the model parameters. Can be coerced to a data.frame. In the documentation, this is called the `parameter table'.} \item{\code{pta}:}{Named list containing parameter table attributes.} \item{\code{Data}:}{Object of internal class \code{"Data"}: information about the data.} \item{\code{Model}:}{Object of internal class \code{"Model"}: the internal (matrix) representation of the model} \item{\code{timingList}:}{List. Timing slot per dataset.} \item{\code{ParTableList}:}{List. ParTable slot per dataset.} \item{\code{DataList}:}{List. Data slot per dataset.} \item{\code{SampleStatsList}:}{List. SampleStats slot per dataset.} \item{\code{CacheList}:}{List. Cache slot per dataset.} \item{\code{vcovList}:}{List. vcov slot per dataset.} \item{\code{testList}:}{List. test slot per dataset.} \item{\code{optimList}:}{List. optim slot per dataset.} \item{\code{impliedList}:}{List. implied slot per dataset.} \item{\code{funList}:}{List. fun slot per dataset.} \item{\code{external}:}{List. Empty slot to be used by add-on packages.} } } \section{Methods}{ \describe{ \item{coef}{\code{signature(object = "lavaanList", type = "free")}: Returns the estimates of the parameters in the model as the columns in a matrix; each column corresponds to a different datasets. If \code{type="free"}, only the free parameters are returned. If \code{type="user"}, all parameters listed in the parameter table are returned, including constrained and fixed parameters.} \item{summary}{\code{signature(object = "lavaanList", header = TRUE, estimates = TRUE, nd = 3L)}: Print a summary of the collection of fitted models. If \code{header = TRUE}, the header section is printed. If \code{estimates = TRUE}, print the parameter estimates section. The argument \code{nd} determines the number of digits after the decimal point to be printed (currently only in the parameter estimates section.) Nothing is returned (use \code{parameterEstimates} or another extractor function to extract information from this object).} } } \seealso{ \code{\link{cfaList}}, \code{\link{semList}}, \code{\link{lavaanList}} } lavaan/man/PoliticalDemocracy.Rd0000644000176200001440000000367712736245224016351 0ustar liggesusers\name{PoliticalDemocracy} \alias{PoliticalDemocracy} \docType{data} \title{ Industrialization And Political Democracy Dataset } \description{ The `famous' Industrialization and Political Democracy dataset. This dataset is used throughout Bollen's 1989 book (see pages 12, 17, 36 in chapter 2, pages 228 and following in chapter 7, pages 321 and following in chapter 8). The dataset contains various measures of political democracy and industrialization in developing countries. } \usage{data(PoliticalDemocracy)} \format{ A data frame of 75 observations of 11 variables. \describe{ \item{\code{y1}}{Expert ratings of the freedom of the press in 1960} \item{\code{y2}}{The freedom of political opposition in 1960} \item{\code{y3}}{The fairness of elections in 1960} \item{\code{y4}}{The effectiveness of the elected legislature in 1960} \item{\code{y5}}{Expert ratings of the freedom of the press in 1965} \item{\code{y6}}{The freedom of political opposition in 1965} \item{\code{y7}}{The fairness of elections in 1965} \item{\code{y8}}{The effectiveness of the elected legislature in 1965} \item{\code{x1}}{The gross national product (GNP) per capita in 1960} \item{\code{x2}}{The inanimate energy consumption per capita in 1960} \item{\code{x3}}{The percentage of the labor force in industry in 1960} } } \source{ The dataset was retrieved from \verb{http://web.missouri.edu/~kolenikovs/Stat9370/democindus.txt} (link no longer valid; see discussion on SEMNET 18 Jun 2009) } \references{ Bollen, K. A. (1989). \emph{Structural Equations with Latent Variables.} Wiley Series in Probability and Mathematical Statistics. New York: Wiley. Bollen, K. A. (1979). Political democracy and the timing of development. \emph{American Sociological Review}, 44, 572-587. Bollen, K. A. (1980). Issues in the comparative measurement of political democracy. \emph{American Sociological Review}, 45, 370-390. } \examples{ head(PoliticalDemocracy) } lavaan/man/lavCor.Rd0000644000176200001440000001230012251325715014003 0ustar liggesusers\name{lavCor} \alias{lavCor} \title{Polychoric, polyserial and Pearson correlations} \description{ Fit an unrestricted model to compute polychoric, polyserial and/or Pearson correlations.} \usage{ lavCor(object, ordered = NULL, group = NULL, missing = "listwise", ov.names.x = NULL, se = "none", estimator = "two.step", ..., output = "cor") } \arguments{ \item{object}{Either a \code{data.frame}, or an object of class \code{\linkS4class{lavaan}}. If the input is a \code{data.frame}, and some variables are declared as ordered factors, lavaan will treat them as ordinal variables.} \item{ordered}{Character vector. Only used if \code{object} is a \code{data.frame}. Treat these variables as ordered (ordinal) variables. Importantly, all other variables will be treated as numeric (unless they are declared as ordered in the original data frame.)} \item{group}{Only used if \code{object} is a \code{data.frame}. Specify a grouping variable.} \item{missing}{If \code{"listwise"}, cases with missing values are removed listwise from the data frame. If \code{"direct"} or \code{"ml"} or \code{"fiml"} and the estimator is maximum likelihood, an EM algorithm is used to estimate the unrestricted covariance matrix (and mean vector). If \code{"pairwise"}, pairwise deletion is used. If \code{"default"}, the value is set depending on the estimator and the mimic option.} \item{ov.names.x}{Only used if \code{object} is a \code{data.frame}. Specify variables that need to be treated as exogenous. Only used if at least one variable is declared as ordered.} \item{se}{Only used if \code{output} (see below) contains standard errors. See the \code{\link{lavaan}} function for possible options.} \item{estimator}{If \code{"none"} or \code{"two.step"} or \code{"two.stage"}, only starting values are computed for the correlations (and thresholds), without any further estimation. If all variables are continuous, the starting values are the sample covariances (converted to correlations if \code{output = "cor"}). If at least one variable is ordered, the thresholds are computed using univariate information only. The polychoric and/or polyserial correlations are computed in a second stage, keeping the values of the thresholds constant. If an estimator (other than \code{"two.step"} or \code{"two.stage"}) is specified (for example \code{estimator = "PML"}), these starting values are further updated by fitting the unrestricted model using the chosen estimator. See the \code{\link{lavaan}} function for alternative estimators.} \item{...}{Optional parameters that are passed to the \code{\link{lavaan}} function.} \item{output}{If \code{"cor"}, the function returns the correlation matrix only. If \code{"cov"}, the function returns the covariance matrix (this only makes a difference if at least one variable is numeric). If \code{"th"} or \code{"thresholds"}, only the thresholds are returned. If \code{"sampstat"}, the output equals the result of \code{inspect(fit, "sampstat")} where fit is the unrestricted model. If \code{"est"} or \code{"pe"} or \code{"parameterEstimates"}, the output equals the result of \code{parameterEstimates(fit)}. Finally, if output is \code{"fit"} or \code{"lavaan"}, the function returns an object of class \code{\linkS4class{lavaan}}.} } \details{ This function is a wrapper around the \code{\link{lavaan}} function, but where the model is defined as the unrestricted model. The following free parameters are included: all covariances/correlations among the variables, variances for continuous variables, means for continuous variables, thresholds for ordered variables, and if exogenous variables are included (\code{ov.names.x} is not empty) while some variables are ordered, also the regression slopes enter the model. } \value{ By default, if \code{output = "cor"} or \code{output = "cov"}, a symmetric matrix (of class \code{"lavaan.matrix.symmetric"}, which only affects the way the matrix is printed). If \code{output = "th"}, a named vector of thresholds. If \code{output = "fit"} or \code{output = "lavaan"}, an object of class \code{\linkS4class{lavaan}}. } \references{ Olsson, U. (1979). Maximum likelihood estimation of the polychoric correlation coefficient. Psychometrika, 44(4), 443-460. Olsson, U., Drasgow, F., & Dorans, N. J. (1982). The polyserial correlation coefficient. Psychometrika, 47(3), 337-347. } \seealso{ \code{\link{lavaan}} } \examples{ # Holzinger and Swineford (1939) example HS9 <- HolzingerSwineford1939[,c("x1","x2","x3","x4","x5", "x6","x7","x8","x9")] # Pearson correlations lavCor(HS9) # ordinal version, with three categories HS9ord <- as.data.frame( lapply(HS9, cut, 3, labels=FALSE) ) # polychoric correlations, two-stage estimation lavCor(HS9ord, ordered=names(HS9ord)) # thresholds only lavCor(HS9ord, ordered=names(HS9ord), output = "th") # polychoric correlations, with standard errors lavCor(HS9ord, ordered=names(HS9ord), se = "standard", output="est") # polychoric correlations, full output fit.un <- lavCor(HS9ord, ordered=names(HS9ord), se = "standard", output="fit") summary(fit.un) } lavaan/man/lavInspect.Rd0000644000176200001440000004041413053066670014677 0ustar liggesusers\name{lavInspect} \alias{lavInspect} \alias{inspect} \alias{lavTech} \title{Inspect or extract information from a fitted lavaan object} \description{ The \code{lavInspect()} and \code{lavTech()} functions can be used to inspect/extract information that is stored inside (or can be computed from) a fitted lavaan object. Note: the (older) \code{inspect()} function is now simply a shortcut for \code{lavInspect()} with default arguments. } \usage{ lavInspect(object, what = "free", add.labels = TRUE, add.class = TRUE, list.by.group = TRUE, drop.list.single.group = TRUE) lavTech(object, what = "free", add.labels = FALSE, add.class = FALSE, list.by.group = FALSE, drop.list.single.group = FALSE) inspect(object, what = "free", ...) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{what}{Character. What needs to be inspected/extracted? See Details for a full list. Note: the \code{what} argument is not case-sensitive (everything is converted to lower case.)} \item{add.labels}{If \code{TRUE}, variable names are added to the vectors and/or matrices.} \item{add.class}{If \code{TRUE}, vectors are given the `lavaan.vector' class; matrices are given the `lavaan.matrix' class, and symmetric matrices are given the `lavaan.matrix.symmetric' class. This only affects the way they are printed on the screen.} \item{list.by.group}{Logical. Only used when the output are model matrices. If \code{TRUE}, the model matrices are nested within groups. If \code{FALSE}, a flattened list is returned containing all model matrices, with repeated names for multiple groups.} \item{drop.list.single.group}{If \code{FALSE}, the results are returned as a list, where each element corresponds to a group (even if there is only a single group.) If \code{TRUE}, the list will be unlisted if there is only a single group.} \item{...}{Additional arguments. Not used by lavaan, but by other packages.} } \details{ The \code{lavInspect()} and \code{lavTech()} functions only differ in the way they return the results. The \code{lavInspect()} function will prettify the output by default, while the \code{lavTech()} will not attempt to prettify the output by default. The (older) \code{inspect()} function is a simplified version of \code{lavInspect()} with only the first two arguments. Below is a list of possible values for the \code{what} argument, organized in several sections: Model matrices: \describe{ \item{\code{"free"}:}{A list of model matrices. The non-zero integers represent the free parameters. The numbers themselves correspond to the position of the free parameter in the parameter vector. This determines the order of the model parameters in the output of for example \code{coef()} and \code{vcov()}.} \item{\code{"partable"}:}{A list of model matrices. The non-zero integers represent both the fixed parameters (for example, factor loadings fixed at 1.0), and the free parameters if we ignore any equality constraints. They correspond with all entries (fixed or free) in the parameter table. See \code{\link{parTable}}.} \item{\code{"se"}:}{A list of model matrices. The non-zero numbers represent the standard errors for the free parameters in the model. If two parameters are constrained to be equal, they will have the same standard error for both parameters. Aliases: \code{"std.err"} and \code{"standard.errors"}.} \item{\code{"start"}:}{A list of model matrices. The values represent the starting values for all model parameters. Alias: \code{"starting.values"}.} \item{\code{"est"}:}{A list of model matrices. The values represent the estimated model parameters. Aliases: \code{"estimates"}, \code{"coef"}, \code{"coefficients"} and \code{"x"}.} \item{\code{"dx.free"}:}{A list of model matrices. The values represent the gradient (first derivative) values of the model parameters. If two parameters are constrained to be equal, they will have the same gradient value.} \item{\code{"dx.all"}:}{A list of model matrices. The values represent the first derivative with respect to all possible matrix elements. Currently, this is only available when the estimator is \code{"ML"} or \code{"GLS"}.} \item{\code{"std"}:}{A list of model matrices. The values represent the (completely) standardized model parameters (the variances of both the observed and the latent variables are set to unity). Aliases: \code{"std.all"}, \code{"standardized"}.} \item{\code{"std.lv"}:}{A list of model matrices. The values represent the standardized model parameters (only the variances of the latent variables are set to unity.)} \item{\code{"std.nox"}:}{A list of model matrices. The values represent the (completely) standardized model parameters (the variances of both the observed and the latent variables are set to unity; however, the variances of any observed exogenous variables are not set to unity; hence no-x.)} } Information about the data (including missing patterns): \describe{ \item{\code{"data"}:}{A matrix containing the observed variables that have been used to fit the model. No column/row names are provided. Column names correspond to the output of \code{lavNames(object)}, while the rows correspond to the output of \code{lavInspect(object, "case.idx"}.} \item{\code{"group"}:}{A character string. The group variable in the data.frame (if any).} \item{\code{"ngroups"}:}{Integer. The number of groups.} \item{\code{"group.label"}:}{A character vector. The group labels.} \item{\code{"cluster"}:}{A character vector. The cluster variable(s) in the data.frame (if any).} \item{\code{"ordered"}:}{A character vector. The ordered variables.} \item{\code{"nobs"}:}{Integer vector. The number of observations in each group that were used in the analysis.} \item{\code{"norig"}:}{Integer vector. The original number of observations in each group.} \item{\code{"ntotal"}:}{Integer. The total number of observations that were used in the analysis. If there is just a single group, this is the same as the \code{"nobs"} option; if there are multiple groups, this is the sum of the \code{"nobs"} numbers for each group.} \item{\code{"case.idx"}:}{The case/observation numbers that were used in the analysis. In the case of multiple groups: a list of numbers.} \item{\code{"empty.idx"}:}{The case/observation numbers of those cases/observations that contained missing values only (at least for the observed variables that were included in the model). In the case of multiple groups: a list of numbers.} \item{\code{"patterns"}:}{A binary matrix. The rows of the matrix are the missing data patterns where 1 and 0 denote non-missing and missing values for the corresponding observed variables respectively (or \code{TRUE} and \code{FALSE} if \code{lavTech()} is used.) If the data is complete (no missing values), there will be only a single pattern. In the case of multiple groups: a list of pattern matrices.} \item{\code{"coverage"}:}{A symmetric matrix where each element contains the proportion of observed datapoints for the corresponding pair of observed variables. In the case of multiple groups: a list of coverage matrices.} } Observed sample statistics: \describe{ \item{\code{"sampstat"}:}{Observed sample statistics. Aliases: \code{"samp"}, \code{"sample"}, \code{"samplestatistics"}. In the presence of missing values, the sample covariance matrix is computed using \code{use="pairwise"}, while listwise deletion is used for the means (and thresholds, if any).} \item{\code{"sampstat.h1"}:}{If all variables are continuous, and \code{missing = "ml"} (or \code{"fiml"}), the EM algorithm is used to compute an estimate of the sample covariance matrix and mean vector under the unrestricted (H1) model. Aliases: \code{"h1"}, \code{"missing.h1"}.} \item{\code{"wls.obs"}:}{The observed sample statistics (covariance elements, intercepts/thresholds, etc.) in a single vector.} \item{\code{"wls.v"}:}{The weight vector as used in weighted least squares estimation.} \item{\code{"gamma"}:}{N times the asymptotic variance matrix of the sample statistics. Alias: \code{"sampstat.nacov"}.} } Model features: \describe{ \item{\code{"meanstructure"}:}{Logical. \code{TRUE} if a meanstructure was included in the model.} \item{\code{"categorical"}:}{Logical. \code{TRUE} if categorical endogenous variables were part of the model.} \item{\code{"fixed.x"}:}{Logical. \code{TRUE} if the exogenous x-covariates are treated as fixed.} \item{\code{"parameterization"}:}{Character. Either \code{"delta"} or \code{"theta"}.} } Model-implied sample statistics: \describe{ \item{\code{"cov.lv"}:}{The model-implied variance-covariance matrix of the latent variables. Alias: \code{"veta"} [for V(eta)].} \item{\code{"cor.lv"}:}{The model-implied correlation matrix of the latent variables.} \item{\code{"mean.lv"}:}{The model-implied mean vector of the latent variables. Alias: \code{"eeta"} [for E(eta)].} \item{\code{"cov.ov"}:}{The model-implied variance-covariance matrix of the observed variables. Aliases: \code{"sigma"}, \code{"sigma.hat"}.} \item{\code{"cor.ov"}:}{The model-implied correlation matrix of the observed variables.} \item{\code{"mean.ov"}:}{The model-implied mean vector of the observed variables. Aliases: \code{"mu"}, \code{"mu.hat"}.} \item{\code{"cov.all"}:}{The model-implied variance-covariance matrix of both the observed and latent variables.} \item{\code{"cor.all"}:}{The model-implied correlation matrix of both the observed and latent variables.} \item{\code{"th"}:}{The model-implied thresholds. Alias: \code{"thresholds"}.} \item{\code{"wls.est"}:}{The model-implied sample statistics (covariance elements, intercepts/thresholds, etc.) in a single vector.} \item{\code{"vy"}:}{The model-implied unconditional variances of the observed variables.} \item{\code{"rsquare"}:}{The R-square value for all endogenous variables. Aliases: \code{"r-square"}, \code{"r2"}.} } Optimizer information: \describe{ \item{\code{"converged"}:}{Logical. \code{TRUE} if the optimizer has converged; \code{FALSE} otherwise.} \item{\code{"iteratons"}:}{Integer. The number of iterations used by the optimizer.} \item{\code{"optim"}:}{List. All available information regarding the optimization results.} } Gradient, Hessian, observed, expected and first.order information matrices: \describe{ \item{\code{"gradient"}:}{Numeric vector containing the first derivatives of the discrepancy function with respect to the (free) model parameters.} \item{\code{"hessian"}:}{Matrix containing the second derivatives of the discrepancy function with respect to the (free) model parameters.} \item{\code{"information"}:}{Matrix containing either the observed or the expected information matrix (depending on the information option of the fitted model.} \item{\code{"information.expected"}:}{Matrix containing the expected information matrix for the free model parameters.} \item{\code{"information.observed"}:}{Matrix containing the observed information matrix for the free model parameters.} \item{\code{"information.first.order"}:}{Matrix containing the first.order information matrix for the free model parameters. This is the outer product of the gradient elements (the first derivative of the discrepancy function with respect to the (free) model parameters). Alias: \code{"first.order"}.} \item{\code{"augmented.information"}:}{Matrix containing either the observed or the expected augmented (or bordered) information matrix (depending on the information option of the fitted model. Only relevant if constraints have been used in the model.} \item{\code{"augmented.information.expected"}:}{Matrix containing the expected augmented (or bordered) information matrix. Only relevant if constraints have been used in the model.} \item{\code{"augmented.information.observed"}:}{Matrix containing the observed augmented (or bordered) information matrix. Only relevant if constraints have been used in the model.} \item{\code{"augmented.information.first.order"}:}{Matrix containing the first.order augmented (or bordered) information matrix. Only relevant if constraints have been used in the model.} \item{\code{"inverted.information"}:}{Matrix containing either the observed or the expected inverted information matrix (depending on the information option of the fitted model.} \item{\code{"inverted.information.expected"}:}{Matrix containing the inverted expected information matrix for the free model parameters.} \item{\code{"inverted.information.observed"}:}{Matrix containing the inverted observed information matrix for the free model parameters.} \item{\code{"inverted.information.first.order"}:}{Matrix containing the inverted first.order information matrix for the free model parameters.} } Variance covariance matrix of the model parameters: \describe{ \item{\code{"vcov"}:}{Matrix containing the variance covariance matrix of the estimated model parameters.} \item{\code{"vcov.std.all"}:}{Matrix containing the variance covariance matrix of the standardized estimated model parameters. Standardization is done with respect to both observed and latent variables.} \item{\code{"vcov.std.lv"}:}{Matrix containing the variance covariance matrix of the standardized estimated model parameters. Standardization is done with respect to the latent variables only.} \item{\code{"vcov.std.nox"}:}{Matrix containing the variance covariance matrix of the standardized estimated model parameters. Standardization is done with respect to both observed and latent variables, but ignoring any exogenous observed covariates.} } Miscellaneous: \describe{ \item{\code{"UGamma"}:}{Matrix containing the product of 'U' and 'Gamma' matrices as used by the Satorra-Bentler correction. The trace of this matrix, divided by the degrees of freedom, gives the scaling factor.} \item{\code{"list"}:}{The parameter table. The same output as given by \code{parTable()}.} \item{\code{"fit"}:}{The fit measures. Aliases: \code{"fitmeasures"}, \code{"fit.measures"}, \code{"fit.indices"}. The same output as given by \code{fitMeasures()}.} \item{\code{"mi"}:}{The modification indices. Alias: \code{"modindices"}, \code{"modification.indices"}. The same output as given by \code{modindices()}.} \item{\code{"options"}:}{List. The option list.} \item{\code{"call"}:}{List. The call as returned by match.call, coerced to a list.} \item{\code{"timing"}:}{List. The timing (in milliseconds) of various lavaan subprocedures.} \item{\code{"test"}:}{List. All available information regarding the (goodness-of-fit) test statistic(s).} \item{\code{"post.check"}:}{Post-fitting check if the solution is admissible. A warning is raised if negative variances are found, or if either \code{lavInspect(fit, "cov.lv")} or \code{lavInspect(fit, "theta")} return a non-positive definite matrix.} \item{\code{"zero.cell.tables"}:}{List. List of bivariate frequency tables where at least one cell is empty.} } } \seealso{ \code{\link{lavaan}} } \examples{ # fit model HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939, group = "school") # extract information lavInspect(fit, "sampstat") lavTech(fit, "sampstat") } lavaan/man/lavPredict.Rd0000644000176200001440000000454212727771522014674 0ustar liggesusers\name{lavPredict} \alias{lavPredict} \alias{lavpredict} \title{Predict the values of latent variables (and their indicators).} \description{ The \code{lavPredict()} function can be used to compute (or `predict') estimated values for latent variables, and given these values, the model-implied values for the indicators of these latent variables.} \usage{ lavPredict(object, type = "lv", newdata = NULL, method = "EBM", se.fit = FALSE, label = TRUE, fsm = FALSE, optim.method = "nlminb") } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{type}{A character string. If \code{"lv"}, estimated values for the latent variables in the model are computed. If \code{"ov"}, model predicted values for the indicators of the latent variables in the model are computed.} \item{newdata}{An optional data.frame, containing the same variables as the data.frame used when fitting the model in object.} \item{method}{A character string. In the linear case (when the indicators are continuous), the possible options are \code{"regression"} or \code{"Bartlett"}. In the categorical case, the only option (for now) is \code{"EBM"} for the Empirical Bayes Modal approach.} \item{se.fit}{Not used yet.} \item{label}{Logical. If TRUE, the columns are labeled.} \item{fsm}{Logical. If TRUE, return the factor score matrix as an attribute. Only for numeric data.} \item{optim.method}{Character string. Only used in the categorical case. If \code{"nlminb"} (the default), the \code{"nlminb()"} function is used for the optimization. If \code{"BFGS"}, the \code{"optim()"} function is used with the BFGS method.} } \details{ The \code{predict()} function calls the \code{lavPredict()} function with its default options. If there are no latent variables in the model, \code{type = "ov"} will simply return the values of the observed variables. Note that this function can not be used to `predict' values of dependent variables, given the values of independent values (in the regression sense). In other words, the structural component is completely ignored (for now). } \seealso{ \code{\link{lavaan}} } \examples{ # fit model HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) head(lavPredict(fit)) head(lavPredict(fit, type = "ov")) } lavaan/man/plot.InformativeTesting.Rd0000644000176200001440000001362512726532720017371 0ustar liggesusers\name{InformativeTesting methods} \alias{print.InformativeTesting} \alias{plot.InformativeTesting} \title{Methods for output InformativeTesting()} \description{The print function shows the results of hypothesis tests Type A and Type B. The plot function plots the distributions of bootstrapped LRT values and plug-in p-values.} \usage{ \method{print}{InformativeTesting}(x, digits = max(3, getOption("digits") - 3), ...) \method{plot}{InformativeTesting}(x, ..., type = c("lr","ppv"), main = "main", xlab = "xlabel", ylab = "Frequency", freq = TRUE, breaks = 15, cex.main = 1, cex.lab = 1, cex.axis = 1, col = "grey", border = par("fg"), vline = TRUE, vline.col = c("red", "blue"), lty = c(1,2), lwd = 1, legend = TRUE, bty = "o", cex.legend = 1, loc.legend = "topright") } \arguments{ \item{x}{object of class "InformativeTesting".} \item{digits}{the number of significant digits to use when printing.} \item{...}{Currently not used.} \item{type}{If \code{"lr"}, a distribution of the first-level bootstrapped LR values is plotted. If \code{"ppv"} a distribution of the bootstrapped plug-in p-values is plotted.} \item{main}{The main title(s) for the plot(s).} \item{xlab}{A label for the x axis, default depends on input type.} \item{ylab}{A label for the y axis.} \item{freq}{Logical; if TRUE, the histogram graphic is a representation of frequencies, the counts component of the result; if \code{FALSE}, probability densities, component density, are plotted (so that the histogram has a total area of one). The default is set to \code{TRUE}.} \item{breaks}{see \code{\link[graphics]{hist}}} \item{cex.main}{The magnification to be used for main titles relative to the current setting of cex.} \item{cex.lab}{The magnification to be used for x and y labels relative to the current setting of cex.} \item{cex.axis}{The magnification to be used for axis annotation relative to the current setting of cex.} \item{col}{A colour to be used to fill the bars. The default of NULL yields unfilled bars.} \item{border}{Color for rectangle border(s). The default means par("fg").} \item{vline}{Logical; if \code{TRUE} a vertical line is drawn at the observed LRT value. If \code{double.bootstrap = "FDB"} a vertical line is drawn at the 1-p* quantile of the second-level LRT values, where p* is the first-level bootstrapped p-value} \item{vline.col}{Color(s) for the vline.LRT.} \item{lty}{The line type. Line types can either be specified as an integer (0=blank, 1=solid (default), 2=dashed, 3=dotted, 4=dotdash, 5=longdash, 6=twodash) or as one of the character strings "blank", "solid", "dashed", "dotted", "dotdash", "longdash", or "twodash", where "blank" uses 'invisible lines' (i.e., does not draw them).} \item{lwd}{The line width, a positive number, defaulting to 1.} \item{legend}{Logical; if \code{TRUE} a legend is added to the plot.} \item{bty}{A character string which determined the type of box which is drawn about plots. If bty is one of "o" (the default), "l", "7", "c", "u", or "]" the resulting box resembles the corresponding upper case letter. A value of "n" suppresses the box.} \item{cex.legend}{A numerical value giving the amount by which the legend text and symbols should be magnified relative to the default. This starts as 1 when a device is opened, and is reset when the layout is changed.} \item{loc.legend}{The location of the legend, specified by a single keyword from the list \code{"bottomright"}, \code{"bottom"}, \code{"bottomleft"}, \code{"left"}, \code{"topleft"}, \code{"top"}, \code{"topright"}, \code{"right"} and \code{"center"}.} } \examples{ \dontrun{ ######################### ### real data example ### ######################### # Multiple group path model for facial burns example. # model syntax with starting values. burns.model <- 'Selfesteem ~ Age + c(m1, f1)*TBSA + HADS + start(-.10, -.20)*TBSA HADS ~ Age + c(m2, f2)*TBSA + RUM + start(.10, .20)*TBSA ' # constraints syntax burns.constraints <- 'f2 > 0 ; m1 < 0 m2 > 0 ; f1 < 0 f2 > m2 ; f1 < m1' # we only generate 2 bootstrap samples in this example; in practice # you may wish to use a much higher number. # the double bootstrap was switched off; in practice you probably # want to set it to "standard". example1 <- InformativeTesting(model = burns.model, data = FacialBurns, R = 2, constraints = burns.constraints, double.bootstrap = "no", group = "Sex") example1 plot(example1) ########################## ### artificial example ### ########################## # Simple ANOVA model with 3 groups (N = 20 per group) set.seed(1234) Y <- cbind(c(rnorm(20,0,1), rnorm(20,0.5,1), rnorm(20,1,1))) grp <- c(rep("1", 20), rep("2", 20), rep("3", 20)) Data <- data.frame(Y, grp) #create model matrix fit.lm <- lm(Y ~ grp, data = Data) mfit <- fit.lm$model mm <- model.matrix(mfit) Y <- model.response(mfit) X <- data.frame(mm[,2:3]) names(X) <- c("d1", "d2") Data.new <- data.frame(Y, X) # model model <- 'Y ~ 1 + a1*d1 + a2*d2' # fit without constraints fit <- sem(model, data = Data.new) # constraints syntax: mu1 < mu2 < mu3 constraints <- ' a1 > 0 a1 < a2 ' # we only generate 10 bootstrap samples in this example; in practice # you may wish to use a much higher number, say > 1000. The double # bootstrap is not necessary in case of an univariate ANOVA model. example2 <- InformativeTesting(model = model, data = Data.new, start = parTable(fit), R = 10L, double.bootstrap = "no", constraints = constraints) example2 # plot(example2) } } \author{ Leonard Vanbrabant \email{lgf.vanbrabant@gmail.com} } lavaan/man/growth.Rd0000644000176200001440000001144213043341140014063 0ustar liggesusers\name{growth} \alias{growth} \title{Fit Growth Curve Models} \description{ Fit a Growth Curve model.} \usage{ growth(model = NULL, data = NULL, ordered = NULL, sample.cov = NULL, sample.mean = NULL, sample.nobs = NULL, group = NULL, cluster = NULL, constraints = "", WLS.V = NULL, NACOV = NULL, ...) } \arguments{ \item{model}{A description of the user-specified model. Typically, the model is described using the lavaan model syntax. See \code{\link{model.syntax}} for more information. Alternatively, a parameter table (eg. the output of the \code{lavaanify()} function) is also accepted.} \item{data}{An optional data frame containing the observed variables used in the model. If some variables are declared as ordered factors, lavaan will treat them as ordinal variables.} \item{ordered}{Character vector. Only used if the data is in a data.frame. Treat these variables as ordered (ordinal) variables, if they are endogenous in the model. Importantly, all other variables will be treated as numeric (unless they are declared as ordered in the original data.frame.)} \item{sample.cov}{Numeric matrix. A sample variance-covariance matrix. The rownames and/or colnames must contain the observed variable names. For a multiple group analysis, a list with a variance-covariance matrix for each group. Note that if maximum likelihood estimation is used and \code{likelihood="normal"}, the user provided covariance matrix is internally rescaled by multiplying it with a factor (N-1)/N, to ensure that the covariance matrix has been divided by N. This can be turned off by setting the \code{sample.cov.rescale} argument to \code{FALSE}.} \item{sample.mean}{A sample mean vector. For a multiple group analysis, a list with a mean vector for each group.} \item{sample.nobs}{Number of observations if the full data frame is missing and only sample moments are given. For a multiple group analysis, a list or a vector with the number of observations for each group.} \item{group}{A variable name in the data frame defining the groups in a multiple group analysis.} \item{cluster}{Not used yet.} \item{constraints}{Additional (in)equality constraints not yet included in the model syntax. See \code{\link{model.syntax}} for more information.} \item{WLS.V}{A user provided weight matrix to be used by estimator \code{"WLS"}; if the estimator is \code{"DWLS"}, only the diagonal of this matrix will be used. For a multiple group analysis, a list with a weight matrix for each group. The elements of the weight matrix should be in the following order (if all data is continuous): first the means (if a meanstructure is involved), then the lower triangular elements of the covariance matrix including the diagonal, ordered column by column. In the categorical case: first the thresholds (including the means for continuous variables), then the slopes (if any), the variances of continuous variables (if any), and finally the lower triangular elements of the correlation/covariance matrix excluding the diagonal, ordered column by column.} \item{NACOV}{A user provided matrix containing the elements of (N times) the asymptotic variance-covariance matrix of the sample statistics. For a multiple group analysis, a list with an asymptotic variance-covariance matrix for each group. See the \code{WLS.V} argument for information about the order of the elements.} \item{...}{Many more additional options can be defined, using 'name = value'. See \code{\link{lavOptions}} for a complete list.} } \details{ The \code{growth} function is a wrapper for the more general \code{\link{lavaan}} function, using the following default arguments: \code{meanstructure = TRUE}, \code{int.ov.free = FALSE}, \code{int.lv.free = TRUE}, \code{auto.fix.first = TRUE} (unless \code{std.lv = TRUE}), \code{auto.fix.single = TRUE}, \code{auto.var = TRUE}, \code{auto.cov.lv.x = TRUE}, \code{auto.th = TRUE}, \code{auto.delta = TRUE}, and \code{auto.cov.y = TRUE}. } \value{ An object of class \code{\linkS4class{lavaan}}, for which several methods are available, including a \code{summary} method. } \references{ Yves Rosseel (2012). lavaan: An R Package for Structural Equation Modeling. Journal of Statistical Software, 48(2), 1-36. URL http://www.jstatsoft.org/v48/i02/.} \seealso{ \code{\link{lavaan}} } \examples{ ## linear growth model with a time-varying covariate model.syntax <- ' # intercept and slope with fixed coefficients i =~ 1*t1 + 1*t2 + 1*t3 + 1*t4 s =~ 0*t1 + 1*t2 + 2*t3 + 3*t4 # regressions i ~ x1 + x2 s ~ x1 + x2 # time-varying covariates t1 ~ c1 t2 ~ c2 t3 ~ c3 t4 ~ c4 ' fit <- growth(model.syntax, data=Demo.growth) summary(fit) } lavaan/man/fitMeasures.Rd0000644000176200001440000000263712601547543015064 0ustar liggesusers\name{fitMeasures} \alias{fitMeasures} \alias{fitmeasures} \alias{fitMeasures,lavaan-method} \alias{fitmeasures,lavaan-method} \alias{fitindices} \title{Fit Measures for a Latent Variable Model} \description{ This function computes a variety of fit measures to assess the global fit of a latent variable model.} \usage{ fitMeasures(object, fit.measures = "all", baseline.model = NULL) fitmeasures(object, fit.measures = "all", baseline.model = NULL) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{fit.measures}{If \code{"all"}, all fit measures available will be returned. If only a single or a few fit measures are specified by name, only those are computed and returned.} \item{baseline.model}{If not NULL, an object of class \code{\linkS4class{lavaan}}, representing a user-specified baseline model. If a baseline model is provided, all fit indices relying on a baseline model (eg. CFI or TLI) will use the test statistics from this user-specified baseline model, instead of the default baseline model.} } \value{ A named numeric vector of fit measures. } %\details{ %The following fit measures are available: % %} \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) fitMeasures(fit) fitMeasures(fit, "cfi") fitMeasures(fit, c("chisq", "df", "pvalue", "cfi", "rmsea")) } lavaan/man/getCov.Rd0000644000176200001440000000551212104004704014000 0ustar liggesusers\name{getCov} \alias{getCov} \alias{cor2cov} \alias{char2num} \title{Utility Functions For Covariance Matrices} \description{Convenience functions to deal with covariance and correlation matrices.} \usage{ getCov(x, lower = TRUE, diagonal = TRUE, sds = NULL, names = paste("V", 1:nvar, sep="")) char2num(s) cor2cov(R, sds, names = NULL) } \arguments{ \item{x}{The elements of the covariance matrix. Either inside a character string or as a numeric vector. In the former case, the function \code{char2num} is used to convert the numbers (inside the character string) to numeric values.} \item{lower}{Logical. If \code{TRUE}, the numeric values in \code{x} are the lower-triangular elements of the (symmetric) covariance matrix only. If \code{FALSE}, \code{x} contains the upper triangular elements only. Note we always assumed the elements are provided row-wise!} \item{diagonal}{Logical. If \code{TRUE}, the numeric values in \code{x} include the diagonal elements. If \code{FALSE}, a unit diagonal is assumed.} \item{sds}{A numeric vector containing the standard deviations to be used to scale the elements in \code{x} or the correlation matrix \code{R} into a covariance matrix.} \item{names}{The variable names of the observed variables.} \item{s}{Character string containing numeric values; comma's and semi-colons are ignored.} \item{R}{A correlation matrix, to be scaled into a covariance matrix.} } \details{ The \code{getCov} function is typically used to input the lower (or upper) triangular elements of a (symmetric) covariance matrix. In many examples found in handbooks, only those elements are shown. However, lavaan needs a full matrix to proceed. The \code{cor2cov} function is the inverse of the \code{\link[stats]{cov2cor}} function, and scales a correlation matrix into a covariance matrix given the standard deviations of the variables. Optionally, variable names can be given. } \examples{ # The classic Wheaton et. al. (1977) model # panel data on he stability of alienation lower <- ' 11.834, 6.947, 9.364, 6.819, 5.091, 12.532, 4.783, 5.028, 7.495, 9.986, -3.839, -3.889, -3.841, -3.625, 9.610, -21.899, -18.831, -21.748, -18.775, 35.522, 450.288 ' # convert to a full symmetric covariance matrix with names wheaton.cov <- getCov(lower, names=c("anomia67","powerless67", "anomia71", "powerless71","education","sei")) # the model wheaton.model <- ' # measurement model ses =~ education + sei alien67 =~ anomia67 + powerless67 alien71 =~ anomia71 + powerless71 # equations alien71 ~ alien67 + ses alien67 ~ ses # correlated residuals anomia67 ~~ anomia71 powerless67 ~~ powerless71 ' # fitting the model fit <- sem(wheaton.model, sample.cov=wheaton.cov, sample.nobs=932) # showing the results summary(fit, standardized=TRUE) } lavaan/man/mplus2lavaan.modelSyntax.Rd0000644000176200001440000000162413010632104017462 0ustar liggesusers\name{mplus2lavaan.modelSyntax} \alias{mplus2lavaan.modelSyntax} \title{Convert Mplus model syntax to lavaan} \description{ Converts Mplus model syntax into lavaan model syntax.} \usage{ mplus2lavaan.modelSyntax(syntax) } \arguments{ \item{syntax}{A character vector containing Mplus model syntax to be converted to lavaan model syntax. Note that parsing Mplus syntax often requires correct usage of newline characters. If \code{syntax} is a vector of multiple strings, these will be joined with newlines prior to conversion. Alternatively, \code{\\n} characters can be included inline in \code{syntax}.} } \value{ A character string of converted \code{lavaan} model syntax. } \author{Michael Hallquist} \seealso{\code{\link{mplus2lavaan}}.} \examples{ \dontrun{ syntax <- ' f1 BY x1*1 x2 x3; x1 WITH x2; x3 (1); x2 (1); ' lavSyntax <- mplus2lavaan.modelSyntax(syntax) cat(lavSyntax) } } lavaan/man/lav_constraints.Rd0000644000176200001440000000515113031512272015765 0ustar liggesusers\name{lav_constraints} \alias{lav_constraints_parse} \alias{lav_partable_constraints_ceq} \alias{lav_partable_constraints_ciq} \alias{lav_partable_constraints_def} \title{Utility Functions: Constraints} \description{Utility functions for equality and inequality constraints.} \usage{ lav_constraints_parse(partable = NULL, constraints = NULL, theta = NULL, debug = FALSE) lav_partable_constraints_ceq(partable, con = NULL, debug = FALSE, txtOnly = FALSE) lav_partable_constraints_ciq(partable, con = NULL, debug = FALSE, txtOnly = FALSE) lav_partable_constraints_def(partable, con = NULL, debug = FALSE, txtOnly = FALSE) } \arguments{ \item{partable}{A lavaan parameter table.} \item{constraints}{A character string containing the constraints.} \item{theta}{A numeric vector. Optional vector with values for the model parameters in the parameter table.} \item{debug}{Logical. If TRUE, show debugging information.} \item{con}{An optional partable where the operator is one of `==', `>', `<' or `:='} \item{txtOnly}{Logical. If TRUE, only the body of the function is returned as a character string. If FALSE, a function is returned.} } \details{ This is a collection of lower-level constraint related functions that are used in the lavaan code. They are made public per request of package developers. Below is a brief description of what they do: The \code{lav_constraints_parse} function parses the constraints specification (provided as a string, see example), and generates a list with useful information about the constraints. The \code{lav_partable_constraints_ceq} function creates a function which takes the (unconstrained) parameter vector as input, and returns the slack values for each equality constraint. If the equality constraints hold perfectly, this function returns zeroes. The \code{lav_partable_constraints_ciq} function creates a function which takes the (unconstrained) parameter vector as input, and returns the slack values for each inequality constraint. The \code{lav_partable_constraints_def} function creates a function which takes the (unconstrained) parameter vector as input, and returns the computed values of the defined parameters. } \examples{ myModel <- 'x1 ~ a*x2 + b*x3 + c*x4' myParTable <- lavaanify(myModel, as.data.frame. = FALSE) con <- ' a == 2*b b - c == 5 ' conInfo <- lav_constraints_parse(myParTable, constraints = con) myModel2 <- 'x1 ~ a*x2 + b*x3 + c*x4 a == 2*b b - c == 5 ' ceq <- lav_partable_constraints_ceq(partable = lavaanify(myModel2)) ceq( c(2,3,4) ) } lavaan/man/lavNames.Rd0000644000176200001440000000522013052523531014322 0ustar liggesusers\name{lavNames} \alias{lavNames} \alias{lavaanNames} \title{lavaan Names} \description{ Extract variables names from a fitted lavaan object.} \usage{ lavNames(object, type = "ov", ...) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{type}{Character. The type of variables whose names should be extracted. See details for a complete list.} \item{...}{Additional selection variables. For example \code{"group = 2L"} (in a multiple-group analysis) only considers the variables included in the model for the second group.} } \details{ The order of the variable names, as returned by \code{lavNames} determines the order in which the variables are listed in the parameter table, and therefore also in the summary output. The following variable types are available: \itemize{ \item \code{"ov"}: observed variables \item \code{"ov.x"}: (pure) exogenous observed variables (no mediators) \item \code{"ov.nox"}: non-exogenous observed variables \item \code{"ov.model"}: modelled observed variables (joint vs conditional) \item \code{"ov.y"}: (pure) endogenous variables (dependent only) (no mediators) \item \code{"ov.num"}: numeric observed variables \item \code{"ov.ord"}: ordinal observed variables \item \code{"ov.ind"}: observed indicators of latent variables \item \code{"ov.orphan"}: lonely observed variables (only intercepts/variancesappear in the model syntax) \item \code{"ov.interaction"}: interaction terms (defined by the colon operator) \item \code{"th"}: threshold names ordinal variables only \item \code{"th.mean"}: threshold names ordinal + numeric variables (if any) \item \code{"lv"}: latent variables \item \code{"lv.regular"}: latent variables (defined by =~ only) \item \code{"lv.formative"}: latent variables (defined by <~ only) \item \code{"lv.x"}: (pure) exogenous variables \item \code{"lv.y"}: (pure) endogenous variables \item \code{"lv.nox"}: non-exogenous latent variables \item \code{"lv.nonnormal"}: latent variables with non-normal indicators \item \code{"lv.interaction"}: interaction terms at the latent level \item \code{"eqs.y"}: variables that appear as dependent variables in a regression formula (but not indicators of latent variables) \item \code{"eqs.x"}: variables that appear as independent variables in a regression formula } } \seealso{\code{\link{lavaanify}}, \code{\link{parTable}}} \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) lavNames(fit, "ov") } lavaan/man/inspectSampleCov.Rd0000644000176200001440000000207513044137764016052 0ustar liggesusers\name{inspectSampleCov} \alias{inspectSampleCov} \title{Observed Variable Correlation Matrix from a Model and Data} \description{ The lavaan model syntax describes a latent variable model. Often, the user wants to see the covariance matrix generated by their model for diagnostic purposes. However, their data may have far more columns of information than what is contained in their model.} \usage{ inspectSampleCov(model, data, ...) } \arguments{ \item{model}{The model that will be fit by lavaan.} \item{data}{The data frame being used to fit the model.} \item{...}{Other arguments to \code{\link{sem}} for how to deal with multiple groups, missing values, etc.} } \author{Jarrett Byrnes} \details{ One must supply both a model, coded with proper \code{\link{model.syntax}} and a data frame from which a covariance matrix will be calculated. This function essentially calls \code{\link{sem}}, but doesn't fit the model, then uses \code{\link{lavInspect}} to get the sample covariance matrix and meanstructure. } \section{See also}{\code{\link{sem}}, \code{\link{lavInspect}} } lavaan/man/mplus2lavaan.Rd0000644000176200001440000000173513031513223015162 0ustar liggesusers\name{mplus2lavaan} \alias{mplus2lavaan} \alias{lavImport} \title{mplus to lavaan converter} \description{ Read in an Mplus input file, convert it to lavaan syntax, and fit the model.} \usage{ mplus2lavaan(inpfile, run = TRUE) } \arguments{ \item{inpfile}{The filename (including a full path) of the Mplus input file. The data (as referred to in the Mplus input file) should be in the same directory as the Mplus input file.} \item{run}{Whether to run the specified Mplus input syntax (\code{TRUE}) or only to parse and convert the syntax (\code{FALSE}).} } \value{ A \code{lavaan} object with the fitted results of the Mplus model. The parsed and converted Mplus syntax is preserved in the \code{@external} slot of the \code{lavaan} object in the \code{$mplus.inp} element. If \code{run} is \code{FALSE}, a \code{list} of converted syntax is returned. } \author{Michael Hallquist} \seealso{\code{\link{lavExport}}.} \examples{ \dontrun{ out <- mplus2lavaan("ex5.1.inp") summary(out) } } lavaan/man/cfa.Rd0000644000176200001440000001123713043110745013311 0ustar liggesusers\name{cfa} \alias{cfa} \title{Fit Confirmatory Factor Analysis Models} \description{ Fit a Confirmatory Factor Analysis (CFA) model.} \usage{ cfa(model = NULL, data = NULL, ordered = NULL, sample.cov = NULL, sample.mean = NULL, sample.nobs = NULL, group = NULL, cluster = NULL, constraints = "", WLS.V = NULL, NACOV = NULL, ...) } \arguments{ \item{model}{A description of the user-specified model. Typically, the model is described using the lavaan model syntax. See \code{\link{model.syntax}} for more information. Alternatively, a parameter table (eg. the output of the \code{lavaanify()} function) is also accepted.} \item{data}{An optional data frame containing the observed variables used in the model. If some variables are declared as ordered factors, lavaan will treat them as ordinal variables.} \item{ordered}{Character vector. Only used if the data is in a data.frame. Treat these variables as ordered (ordinal) variables, if they are endogenous in the model. Importantly, all other variables will be treated as numeric (unless they are declared as ordered in the original data.frame.)} \item{sample.cov}{Numeric matrix. A sample variance-covariance matrix. The rownames and/or colnames must contain the observed variable names. For a multiple group analysis, a list with a variance-covariance matrix for each group. Note that if maximum likelihood estimation is used and \code{likelihood="normal"}, the user provided covariance matrix is internally rescaled by multiplying it with a factor (N-1)/N, to ensure that the covariance matrix has been divided by N. This can be turned off by setting the \code{sample.cov.rescale} argument to \code{FALSE}.} \item{sample.mean}{A sample mean vector. For a multiple group analysis, a list with a mean vector for each group.} \item{sample.nobs}{Number of observations if the full data frame is missing and only sample moments are given. For a multiple group analysis, a list or a vector with the number of observations for each group.} \item{group}{A variable name in the data frame defining the groups in a multiple group analysis.} \item{cluster}{Not used yet.} \item{constraints}{Additional (in)equality constraints not yet included in the model syntax. See \code{\link{model.syntax}} for more information.} \item{WLS.V}{A user provided weight matrix to be used by estimator \code{"WLS"}; if the estimator is \code{"DWLS"}, only the diagonal of this matrix will be used. For a multiple group analysis, a list with a weight matrix for each group. The elements of the weight matrix should be in the following order (if all data is continuous): first the means (if a meanstructure is involved), then the lower triangular elements of the covariance matrix including the diagonal, ordered column by column. In the categorical case: first the thresholds (including the means for continuous variables), then the slopes (if any), the variances of continuous variables (if any), and finally the lower triangular elements of the correlation/covariance matrix excluding the diagonal, ordered column by column.} \item{NACOV}{A user provided matrix containing the elements of (N times) the asymptotic variance-covariance matrix of the sample statistics. For a multiple group analysis, a list with an asymptotic variance-covariance matrix for each group. See the \code{WLS.V} argument for information about the order of the elements.} \item{...}{Many more additional options can be defined, using 'name = value'. See \code{\link{lavOptions}} for a complete list.} } \details{ The \code{cfa} function is a wrapper for the more general \code{\link{lavaan}} function, using the following default arguments: \code{int.ov.free = TRUE}, \code{int.lv.free = FALSE}, \code{auto.fix.first = TRUE} (unless \code{std.lv = TRUE}), \code{auto.fix.single = TRUE}, \code{auto.var = TRUE}, \code{auto.cov.lv.x = TRUE}, \code{auto.th = TRUE}, \code{auto.delta = TRUE}, and \code{auto.cov.y = TRUE}. } \value{ An object of class \code{\linkS4class{lavaan}}, for which several methods are available, including a \code{summary} method. } \references{ Yves Rosseel (2012). lavaan: An R Package for Structural Equation Modeling. Journal of Statistical Software, 48(2), 1-36. URL http://www.jstatsoft.org/v48/i02/.} \seealso{ \code{\link{lavaan}} } \examples{ ## The famous Holzinger and Swineford (1939) example HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) summary(fit, fit.measures=TRUE) } lavaan/man/lavaan-class.Rd0000644000176200001440000002154413044140123015121 0ustar liggesusers\name{lavaan-class} \docType{class} \alias{lavaan-class} \alias{coef,lavaan-method} \alias{vcov,lavaan-method} \alias{anova,lavaan-method} \alias{predict,lavaan-method} \alias{resid,lavaan-method} \alias{residuals,lavaan-method} \alias{fitted,lavaan-method} \alias{fitted.values,lavaan-method} %% NOTE: no white space here! \alias{nobs} %% not imported in < 2.13 \alias{nobs,lavaan-method} \alias{logLik,lavaan-method} \alias{update,lavaan-method} \alias{show,lavaan-method} \alias{summary,lavaan-method} \title{Class For Representing A (Fitted) Latent Variable Model} \description{The \code{lavaan} class represents a (fitted) latent variable model. It contains a description of the model as specified by the user, a summary of the data, an internal matrix representation, and if the model was fitted, the fitting results.} \section{Objects from the Class}{ Objects can be created via the \code{\link{cfa}}, \code{\link{sem}}, \code{\link{growth}} or \code{\link{lavaan}} functions. } \section{Slots}{ \describe{ \item{\code{call}:}{The function call as returned by \code{match.call()}.} \item{\code{timing}:}{The elapsed time (user+system) for various parts of the program as a list, including the total time.} \item{\code{Options}:}{Named list of options that were provided by the user, or filled-in automatically.} \item{\code{ParTable}:}{Named list describing the model parameters. Can be coerced to a data.frame. In the documentation, this is called the `parameter table'.} \item{\code{pta}:}{Named list containing parameter table attributes.} \item{\code{Data}:}{Object of internal class \code{"Data"}: information about the data.} \item{\code{SampleStats}:}{Object of internal class \code{"SampleStats"}: sample statistics} \item{\code{Model}:}{Object of internal class \code{"Model"}: the internal (matrix) representation of the model} \item{\code{Cache}:}{List using objects that we try to compute only once, and reuse many times.} \item{\code{Fit}:}{Object of internal class \code{"Fit"}: the results of fitting the model} \item{\code{boot}:}{List. Results and information about the bootstrap.} \item{\code{optim}:}{List. Information about the optimization.} \item{\code{implied}:}{List. Model implied statistics.} \item{\code{vcov}:}{List. Information about the variance matrix (vcov) of the model parameters.} \item{\code{test}:}{List. Different test statistics.} \item{\code{external}:}{List. Empty slot to be used by add-on packages.} } } \section{Methods}{ \describe{ \item{coef}{\code{signature(object = "lavaan", type = "free")}: Returns the estimates of the parameters in the model as a named numeric vector. If \code{type="free"}, only the free parameters are returned. If \code{type="user"}, all parameters listed in the parameter table are returned, including constrained and fixed parameters.} \item{fitted.values}{\code{signature(object = "lavaan")}: Returns the implied moments of the model as a list with two elements (per group): \code{cov} for the implied covariance matrix, and \code{mean} for the implied mean vector. If only the covariance matrix was analyzed, the implied mean vector will be zero.} \item{fitted}{\code{signature(object = "lavaan")}: an alias for \code{fitted.values}.} \item{residuals}{\code{signature(object = "lavaan", type="raw")}: If \code{type="raw"}, this function returns the raw (=unstandardized) difference between the implied moments and the observed moments as a list of two elements: \code{cov} for the residual covariance matrix, and \code{mean} for the residual mean vector. If only the covariance matrix was analyzed, the residual mean vector will be zero. If \code{type="cor"}, the observed and model implied covariance matrix is first transformed to a correlation matrix (using \code{cov2cor}), before the residuals are computed. If \code{type="normalized"}, the residuals are divided by the square root of an asymptotic variance estimate of the corresponding sample statistic (the variance estimate depends on the choice for the \code{se} argument). If \code{type="standardized"}, the residuals are divided by the square root of the difference between an asymptotic variance estimate of the corresponding sample statistic and an asymptotic variance estimate of the corresponding model-implied statistic. In the latter case, the residuals have a metric similar to z-values. On the other hand, they may often result in \code{NA} values; for these cases, it may be better to use the normalized residuals. For more information about the normalized and standardized residuals, see the Mplus reference below.} \item{resid}{\code{signature(object = "lavaan")}: an alias for \code{residuals}} \item{vcov}{\code{signature(object = "lavaan")}: returns the covariance matrix of the estimated parameters.} \item{predict}{\code{signature(object = "lavaan")}: compute factor scores for all cases that are provided in the data frame. For complete data only.} \item{anova}{\code{signature(object = "lavaan")}: returns model comparison statistics. This method is just a wrapper around the function \code{\link{lavTestLRT}}. If only a single argument (a fitted model) is provided, this model is compared to the unrestricted model. If two or more arguments (fitted models) are provided, the models are compared in a sequential order. Test statistics are based on the likelihood ratio test. For more details and further options, see the \code{\link{lavTestLRT}} page.} \item{update}{\code{signature(object = "lavaan", model, ..., evaluate=TRUE)}: update a fitted lavaan object and evaluate it (unless \code{evaluate=FALSE}). Note that we use the environment that is stored within the lavaan object, which is not necessarily the parent frame.} \item{nobs}{\code{signature(object = "lavaan")}: returns the effective number of observations used when fitting the model. In a multiple group analysis, this is the sum of all observations per group.} \item{logLik}{\code{signature(object = "lavaan")}: returns the log-likelihood of the fitted model, if maximum likelihood estimation was used. The \code{\link[stats]{AIC}} and \code{\link[stats]{BIC}} methods automatically work via \code{logLik()}.} \item{show}{\code{signature(object = "lavaan")}: Print a short summary of the model fit} \item{summary}{\code{signature(object = "lavaan", header = TRUE, fit.measures=FALSE, estimates = TRUE, ci = FALSE, fmi = FALSE, standardized = FALSE, rsquare=FALSE, std.nox = FALSE, modindices=FALSE, ci=FALSE, nd = 3L)}: Print a nice summary of the model estimates. If \code{header = TRUE}, the header section (including fit measures) is printed. If \code{fit.measures = TRUE}, additional fit measures are added to the header section. If \code{estimates = TRUE}, print the parameter estimates section. If \code{ci = TRUE}, add confidence intervals to the parameter estimates section. If \code{fmi = TRUE}, add the fmi (fraction of missing information) column, if it is available. If \code{standardized=TRUE}, the standardized solution is also printed. If \code{rsquare=TRUE}, the R-Square values for the dependent variables in the model are printed. If \code{std.nox = TRUE}, the \code{std.all} column contains the the \code{std.nox} column from the parameterEstimates() output. If \code{modindices=TRUE}, modification indices are printed for all fixed parameters. The argument \code{nd} determines the number of digits after the decimal point to be printed (currently only in the parameter estimates section.) Nothing is returned (use \code{lavInspect} or another extractor function to extract information from a fitted model).} } } \references{ Yves Rosseel (2012). lavaan: An R Package for Structural Equation Modeling. Journal of Statistical Software, 48(2), 1-36. URL http://www.jstatsoft.org/v48/i02/. Standardized Residuals in Mplus. Document retrieved from URL http://www.statmodel.com/download/StandardizedResiduals.pdf } \seealso{ \code{\link{cfa}}, \code{\link{sem}}, \code{\link{growth}}, \code{\link{fitMeasures}}, \code{\link{standardizedSolution}}, \code{\link{parameterEstimates}}, \code{\link{lavInspect}}, \code{\link{modindices}} } \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) summary(fit, standardized=TRUE, fit.measures=TRUE, rsquare=TRUE) fitted(fit) coef(fit) resid(fit, type="normalized") } lavaan/man/lavaanList.Rd0000644000176200001440000001027113043663360014661 0ustar liggesusers\name{lavaanList} \alias{lavaanList} \alias{semList} \alias{cfaList} \title{Fit List of Latent Variable Models} \description{ Fit the same latent variable model, for a (potentially large) number of datasets.} \usage{ lavaanList(model = NULL, dataList = NULL, dataFunction = NULL, dataFunction.args = list(), ndat = length(dataList), cmd = "lavaan", ..., store.slots = c("partable"), FUN = NULL, show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL, iseed = NULL) semList(model = NULL, dataList = NULL, dataFunction = NULL, dataFunction.args = list(), ndat = length(dataList), ..., store.slots = c("partable"), FUN = NULL, show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL, iseed = NULL) cfaList(model = NULL, dataList = NULL, dataFunction = NULL, dataFunction.args = list(), ndat = length(dataList), ..., store.slots = c("partable"), FUN = NULL, show.progress = FALSE, parallel = c("no", "multicore", "snow"), ncpus = max(1L, parallel::detectCores() - 1L), cl = NULL, iseed = NULL) } \arguments{ \item{model}{A description of the user-specified model. Typically, the model is described using the lavaan model syntax. See \code{\link{model.syntax}} for more information. Alternatively, a parameter table (eg. the output of the \code{lavaanify()} function) is also accepted.} \item{dataList}{List. Each element contains a full data frame containing the observed variables used in the model.} \item{dataFunction}{Function. A function that generated a full data frame containing the observed variables used in the model. It can also be a matrix, if the columns are named.} \item{dataFunction.args}{List. Optional list of arguments that are passed to the \code{dataFunction} function.} \item{ndat}{Integer. The number of datasets that should be generated using the \code{dataFunction} function.} \item{cmd}{Character. Which command is used to run the sem models. The possible choices are \code{"sem"}, \code{"cfa"} or \code{"lavaan"}, determining how we deal with default options.} \item{\dots}{Other named arguments for \code{lavaan} function.} \item{store.slots}{Character vector. Which slots (from a lavaan object) should be stored for each dataset? The possible choices are \code{"timing"}, \code{"partable"}, \code{"data"}, \code{"samplestats"}, \code{"vcov"}, \code{"test"}, \code{"optim"} or \code{"implied"}. Finally, \code{"all"} selects all slots.} \item{FUN}{Function. A function which when applied to the \code{\linkS4class{lavaan}} object returns the information of interest.} \item{parallel}{The type of parallel operation to be used (if any). If missing, the default is \code{"no"}.} \item{ncpus}{Integer. The number of processes to be used in parallel operation: typically one would chose this to the number of available CPUs.} \item{cl}{An optional \pkg{parallel} or \pkg{snow} cluster for use if \code{parallel = "snow"}. If not supplied, a cluster on the local machine is created for the duration of the \code{lavaanList} call.} \item{iseed}{An integer to set the seed. Or NULL if no reproducible seeds are needed. To make this work, make sure the first RNGkind() element is \code{"L'Ecuyer-CMRG"}. You can check this by typing \code{RNGkind()} in the console. You can set it by typing \code{RNGkind("L'Ecuyer-CMRG")}, before the lavaanList functions are called.} \item{show.progress}{If \code{TRUE}, show information for each dataset.} } \value{ An object of class \code{\linkS4class{lavaanList}}, for which several methods are available, including a \code{summary} method. } \seealso{ \code{\linkS4class{lavaanList}} } \examples{ # The Holzinger and Swineford (1939) example HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' # a data generating function generateData <- function() simulateData(HS.model, sample.nobs = 100) set.seed(1234) fit <- semList(HS.model, dataFunction = generateData, ndat = 5, store.slots = "partable") # show parameter estimates, per dataset coef(fit) } lavaan/man/simulateData.Rd0000644000176200001440000001534413053021035015172 0ustar liggesusers\name{simulateData} \alias{simulateData} \title{Simulate Data From a Lavaan Model Syntax} \description{Simulate data starting from a lavaan model syntax.} \usage{ simulateData(model = NULL, model.type = "sem", meanstructure = FALSE, int.ov.free = TRUE, int.lv.free = FALSE, conditional.x = FALSE, fixed.x = FALSE, orthogonal = FALSE, std.lv = TRUE, auto.fix.first = FALSE, auto.fix.single = FALSE, auto.var = TRUE, auto.cov.lv.x = TRUE, auto.cov.y = TRUE, ..., sample.nobs = 500L, ov.var = NULL, group.label = paste("G", 1:nblocks, sep = ""), skewness = NULL, kurtosis = NULL, seed = NULL, empirical = FALSE, return.type = "data.frame", return.fit = FALSE, debug = FALSE, standardized = FALSE) } \arguments{ \item{model}{A description of the user-specified model. Typically, the model is described using the lavaan model syntax. See \code{\link{model.syntax}} for more information. Alternatively, a parameter table (eg. the output of the \code{lavaanify()} function) is also accepted.} \item{model.type}{Set the model type: possible values are \code{"cfa"}, \code{"sem"} or \code{"growth"}. This may affect how starting values are computed, and may be used to alter the terminology used in the summary output, or the layout of path diagrams that are based on a fitted lavaan object.} \item{meanstructure}{If \code{TRUE}, the means of the observed variables enter the model. If \code{"default"}, the value is set based on the user-specified model, and/or the values of other arguments.} \item{int.ov.free}{If \code{FALSE}, the intercepts of the observed variables are fixed to zero.} \item{int.lv.free}{If \code{FALSE}, the intercepts of the latent variables are fixed to zero.} \item{conditional.x}{If \code{TRUE}, we set up the model conditional on the exogenous `x' covariates; the model-implied sample statistics only include the non-x variables. If \code{FALSE}, the exogenous `x' variables are modeled jointly with the other variables, and the model-implied statistics refect both sets of variables. If \code{"default"}, the value is set depending on the estimator, and whether or not the model involves categorical endogenous variables.} \item{fixed.x}{If \code{TRUE}, the exogenous `x' covariates are considered fixed variables and the means, variances and covariances of these variables are fixed to their sample values. If \code{FALSE}, they are considered random, and the means, variances and covariances are free parameters. If \code{"default"}, the value is set depending on the mimic option.} \item{orthogonal}{If \code{TRUE}, the exogenous latent variables are assumed to be uncorrelated.} \item{std.lv}{If \code{TRUE}, the metric of each latent variable is determined by fixing their variances to 1.0. If \code{FALSE}, the metric of each latent variable is determined by fixing the factor loading of the first indicator to 1.0.} \item{auto.fix.first}{If \code{TRUE}, the factor loading of the first indicator is set to 1.0 for every latent variable.} \item{auto.fix.single}{If \code{TRUE}, the residual variance (if included) of an observed indicator is set to zero if it is the only indicator of a latent variable.} \item{auto.var}{If \code{TRUE}, the residual variances and the variances of exogenous latent variables are included in the model and set free.} \item{auto.cov.lv.x}{If \code{TRUE}, the covariances of exogenous latent variables are included in the model and set free.} \item{auto.cov.y}{If \code{TRUE}, the covariances of dependent variables (both observed and latent) are included in the model and set free.} \item{\dots}{additional arguments passed to the \code{\link{lavaan}} function.} \item{sample.nobs}{Number of observations. If a vector, multiple datasets are created. If \code{return.type = "matrix"} or \code{return.type = "cov"}, a list of \code{length(sample.nobs)} is returned, with either the data or covariance matrices, each one based on the number of observations as specified in \code{sample.nobs}. If \code{return.type = "data.frame"}, all datasets are merged and a \code{group} variable is added to mimic a multiple group dataset.} \item{ov.var}{The user-specified variances of the observed variables.} \item{group.label}{The group labels that should be used if multiple groups are created.} \item{skewness}{Numeric vector. The skewness values for the observed variables. Defaults to zero.} \item{kurtosis}{Numeric vector. The kurtosis values for the observed variables. Defaults to zero.} \item{seed}{Set random seed.} \item{empirical}{Logical. If \code{TRUE}, the implied moments (Mu and Sigma) specify the empirical not population mean and covariance matrix.} \item{return.type}{If \code{"data.frame"}, a data.frame is returned. If \code{"matrix"}, a numeric matrix is returned (without any variable names). If \code{"cov"}, a covariance matrix is returned (without any variable names).} \item{return.fit}{If \code{TRUE}, return the fitted model that has been used to generate the data as an attribute (called \code{"fit"}); this may be useful for inspection.} \item{debug}{If \code{TRUE}, debugging information is displayed.} \item{standardized}{If \code{TRUE}, the residual variances of the observed variables are set in such a way such that the model implied variances are unity. This allows regression coefficients and factor loadings (involving observed variables) to be specified in a standardized metric.} } \details{Model parameters can be specified by fixed values in the lavaan model syntax. If no fixed values are specified, the value zero will be assumed, except for factor loadings and variances, which are set to unity by default. By default, multivariate normal data are generated. However, by providing skewness and/or kurtosis values, nonnormal multivariate data can be generated, using the Vale & Maurelli (1983) method.} \value{The generated data. Either as a data.frame (if \code{return.type="data.frame"}), a numeric matrix (if \code{return.type="matrix"}), or a covariance matrix (if \code{return.type="cov"}).} \examples{ # specify population model population.model <- ' f1 =~ x1 + 0.8*x2 + 1.2*x3 f2 =~ x4 + 0.5*x5 + 1.5*x6 f3 =~ x7 + 0.1*x8 + 0.9*x9 f3 ~ 0.5*f1 + 0.6*f2 ' # generate data set.seed(1234) myData <- simulateData(population.model, sample.nobs=100L) # population moments fitted(sem(population.model)) # sample moments round(cov(myData), 3) round(colMeans(myData), 3) # fit model myModel <- ' f1 =~ x1 + x2 + x3 f2 =~ x4 + x5 + x6 f3 =~ x7 + x8 + x9 f3 ~ f1 + f2 ' fit <- sem(myModel, data=myData) summary(fit) } lavaan/man/lavTablesFit.Rd0000644000176200001440000001177612732205574015161 0ustar liggesusers\name{lavTablesFitCp} \alias{lavTablesFitCp} \alias{lavTablesFitCf} \alias{lavTablesFitCm} \title{Pairwise maximum likelihood fit statistics} \description{ Three measures of fit for the pairwise maximum likelihood estimation method that are based on likelihood ratios (LR) are defined: \eqn{C_F}, \eqn{C_M}, and \eqn{C_P}. Subscript \eqn{F} signifies a comparison of model-implied proportions of full response patterns with observed sample proportions, subscript \eqn{M} signifies a comparison of model-implied proportions of full response patterns with the proportions implied by the assumption of multivariate normality, and subscript \eqn{P} signifies a comparison of model-implied proportions of pairs of item responses with the observed proportions of pairs of item responses. } \usage{ lavTablesFitCf(object) lavTablesFitCp(object, alpha = 0.05) lavTablesFitCm(object) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{alpha}{The nominal level of signifiance of global fit.} } \references{ Barendse, M. T., Ligtvoet, R., Timmerman, M. E., & Oort, F. J. (2016). Structural Equation Modeling of Discrete data: Model Fit after Pairwise Maximum Likelihood.\emph{Frontiers in psychology, 7}, 1-8. Joreskog, K. G., & Moustaki, I. (2001). Factor analysis of ordinal variables: A comparison of three approaches. \emph{Multivariate Behavioral Research, 36}, 347-387. } \details{ \subsection{\eqn{C_F}}{ The \eqn{C_F} statistic compares the log-likelihood of the model-implied proportions (\eqn{\pi_r}) with the observed proportions (\eqn{p_r}) of the full multivariate responses patterns: \deqn{ C_F = 2N\sum_{r}p_{r}\ln[p_{r}/\hat{\pi}_{r}], } which asymptotically has a chi-square distribution with \deqn{ df_F = m^k - n - 1, } where \eqn{k} denotes the number of items with discrete response scales, \eqn{m} denotes the number of response options, and \eqn{n} denotes the number of parameters to be estimated. Notice that \eqn{C_F} results may be biased because of large numbers of empty cells in the multivariate contingency table. } \subsection{\eqn{C_M}}{ The \eqn{C_M} statistic is based on the \eqn{C_F} statistic, and compares the proportions implied by the model of interest (Model 1) with proportions implied by the assumption of an underlying multivariate normal distribution (Model 0): \deqn{ C_M = C_{F1} - C_{F0}, } where \eqn{C_{F0}} is \eqn{C_F} for Model 0 and \eqn{C_{F1}} is \eqn{C_F} for Model 1. Statistic \eqn{C_M} has a chi-square distribution with degrees of freedom \deqn{ df_M = k(k-1)/2 + k(m-1) - n_{1}, } where \eqn{k} denotes the number of items with discrete response scales, \eqn{m} denotes the number of response options, and \eqn{k(k-1)/2} denotes the number of polychoric correlations, \eqn{k(m-1)} denotes the number of thresholds, and \eqn{n_1} is the number of parameters of the model of interest. Notice that \eqn{C_M} results may be biased because of large numbers of empty cells in the multivariate contingency table. However, bias may cancels out as both Model 1 and Model 0 contain the same pattern of empty responses. } \subsection{\eqn{C_P}}{ With the \eqn{C_P} statistic we only consider pairs of responses, and compare observed sample proportions (\eqn{p}) with model-implied proportions of pairs of responses(\eqn{\pi}). For items \eqn{i} and \eqn{j} we obtain a pairwise likelihood ratio test statistic \eqn{C_{P_{ij}}} \deqn{ C_{P_{ij}}=2N\sum_{c_i=1}^m \sum_{c_j=1}^m p_{c_i,c_j}\ln[p_{c_i,c_j}/\hat{\pi}_{c_i,c_j}], } where \eqn{m} denotes the number of response options and \eqn{N} denotes sample size. The \eqn{C_P} statistic has an asymptotic chi-square distribution with degrees of freedom equal to the information \eqn{(m^2 -1)} minus the number of parameters (2(m-1) thresholds and 1 correlation), \deqn{ df_P = m^{2} - 2(m - 1) - 2. } As \eqn{k} denotes the number of items, there are \eqn{k(k-1)/2} possible pairs of items. The \eqn{C_P} statistic should therefore be applied with a Bonferroni adjusted level of significance \eqn{\alpha^*}, with \deqn{ \alpha^*= \alpha /(k(k-1)/2)), } to keep the family-wise error rate at \eqn{\alpha}. The hypothesis of overall goodness-of-fit is tested at \eqn{\alpha} and rejected as soon as \eqn{C_P} is significant at \eqn{\alpha^*} for at least one pair of items. Notice that with dichotomous items, \eqn{m = 2}, and \eqn{df_P = 0}, so that hypothesis can not be tested. } } \seealso{ \code{\link{lavTables}, \link{lavaan}} } \examples{ # Data HS9 <- HolzingerSwineford1939[,c("x1","x2","x3","x4","x5", "x6","x7","x8","x9")] HSbinary <- as.data.frame( lapply(HS9, cut, 2, labels=FALSE) ) # Single group example with one latent factor HS.model <- ' trait =~ x1 + x2 + x3 + x4 ' fit <- cfa(HS.model, data=HSbinary[,1:4], ordered=names(HSbinary), estimator="PML") lavTablesFitCm(fit) lavTablesFitCp(fit) lavTablesFitCf(fit) } \keyword{ pairwise maximum likelihood, discrete data, goodness of fit } lavaan/man/modificationIndices.Rd0000644000176200001440000000564412531323750016534 0ustar liggesusers\name{modificationIndices} \alias{modificationIndices} \alias{modificationindices} \alias{modindices} \title{Modification Indices} \description{ Modification indices of a latent variable model.} \usage{ modificationIndices(object, standardized = TRUE, power = FALSE, delta = 0.1, alpha = 0.05, high.power = 0.75, sort. = FALSE, minimum.value = 0, maximum.number = nrow(LIST), free.remove = TRUE, na.remove = TRUE, op = NULL) modindices(object, standardized = TRUE, power = FALSE, delta = 0.1, alpha = 0.05, high.power = 0.75, sort. = FALSE, minimum.value = 0, maximum.number = nrow(LIST), free.remove = TRUE, na.remove = TRUE, op = NULL) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaan}}.} \item{standardized}{If \code{TRUE}, two extra columns (sepc.lv and sepc.all) will contain standardized values for the epc's. In the first column (sepc.lv), standardizization is based on the variances of the (continuous) latent variables. In the second column (sepc.all), standardization is based on both the variances of both (continuous) observed and latent variables. (Residual) covariances are standardized using (residual) variances.} \item{power}{If \code{TRUE}, the (post-hoc) power is computed for each modification index, using the values of \code{delta} and \code{alpha}.} \item{delta}{The value of the effect size, as used in the post-hoc power computation, currently using the unstandardized metric of the epc column.} \item{alpha}{The significance level used for deciding if the modification index is statistically significant or not.} \item{high.power}{If the computed power is higher than this cutoff value, the power is considered `high'. If not, the power is considered `low'. This affects the values in the 'decision' column in the output.} \item{sort.}{Logical. If TRUE, sort the output using the values of the modification index values. Higher values appear first.} \item{minimum.value}{Numeric. Filter output and only show rows with a modification index value equal or higher than this minimum value.} \item{maximum.number}{Integer. Filter output and only show the first maximum number rows. Most useful when combined with the \code{sort.} option.} \item{free.remove}{Logical. If TRUE, filter output by removing all rows corresponding to free (unconstrained) parameters in the original model.} \item{na.remove}{Logical. If TRUE, filter output by removing all rows with NA values for the modification indices.} \item{op}{Character string. Filter the output by selectin only those rows with operator \code{op}.} } \value{ A data.frame containing modification indices and EPC's. } \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) modindices(fit) } lavaan/man/FacialBurns.Rd0000644000176200001440000000162012104004704014736 0ustar liggesusers\name{FacialBurns} \alias{FacialBurns} \docType{data} \title{Dataset for illustrating the InformativeTesting function.} \description{ A dataset from the Dutch burn center (http://www.adbc.nl). The data were used to examine psychosocial functioning in patients with facial burn wounds. Psychosocial functioning was measured by Anxiety and depression symptoms (HADS), and self-esteem (Rosenberg's self-esteem scale).} \usage{data(FacialBurns)} \format{ A data frame of 77 observations of 6 variables. \describe{ \item{\code{Selfesteem}}{Rosenberg's self-esteem scale} \item{\code{HADS}}{Anxiety and depression scale} \item{\code{Age}}{Age measured in years, control variable} \item{\code{TBSA}}{Total Burned Surface Area} \item{\code{RUM}}{Rumination, control variable} \item{\code{Sex}}{Gender, grouping variable} } } \examples{ head(FacialBurns) } lavaan/man/lavListInspect.Rd0000644000176200001440000001235013043664351015530 0ustar liggesusers\name{lavListInspect} \alias{lavListInspect} \alias{lavListTech} \title{Inspect or extract information from a lavaanList object} \description{ The \code{lavListInspect()} and \code{lavListTech()} functions can be used to inspect/extract information that is stored inside (or can be computed from) a lavaanList object. } \usage{ lavListInspect(object, what = "free", add.labels = TRUE, add.class = TRUE, list.by.group = TRUE, drop.list.single.group = TRUE) lavListTech(object, what = "free", add.labels = FALSE, add.class = FALSE, list.by.group = FALSE, drop.list.single.group = FALSE) } \arguments{ \item{object}{An object of class \code{\linkS4class{lavaanList}}.} \item{what}{Character. What needs to be inspected/extracted? See Details for a full list. Note: the \code{what} argument is not case-sensitive (everything is converted to lower case.)} \item{add.labels}{If \code{TRUE}, variable names are added to the vectors and/or matrices.} \item{add.class}{If \code{TRUE}, vectors are given the `lavaan.vector' class; matrices are given the `lavaan.matrix' class, and symmetric matrices are given the `lavaan.matrix.symmetric' class. This only affects the way they are printed on the screen.} \item{list.by.group}{Logical. Only used when the output are model matrices. If \code{TRUE}, the model matrices are nested within groups. If \code{FALSE}, a flattened list is returned containing all model matrices, with repeated names for multiple groups.} \item{drop.list.single.group}{If \code{FALSE}, the results are returned as a list, where each element corresponds to a group (even if there is only a single group.) If \code{TRUE}, the list will be unlisted if there is only a single group.} } \details{ The \code{lavListInspect()} and \code{lavListTech()} functions only differ in the way they return the results. The \code{lavListInspect()} function will prettify the output by default, while the \code{lavListTech()} will not attempt to prettify the output by default. Below is a list of possible values for the \code{what} argument, organized in several sections: Model matrices: \describe{ \item{\code{"free"}:}{A list of model matrices. The non-zero integers represent the free parameters. The numbers themselves correspond to the position of the free parameter in the parameter vector. This determines the order of the model parameters in the output of for example \code{coef()} and \code{vcov()}.} \item{\code{"partable"}:}{A list of model matrices. The non-zero integers represent both the fixed parameters (for example, factor loadings fixed at 1.0), and the free parameters if we ignore any equality constraints. They correspond with all entries (fixed or free) in the parameter table. See \code{\link{parTable}}.} \item{\code{"start"}:}{A list of model matrices. The values represent the starting values for all model parameters. Alias: \code{"starting.values"}.} } Information about the data (including missing patterns): \describe{ \item{\code{"group"}:}{A character string. The group variable in the data.frame (if any).} \item{\code{"ngroups"}:}{Integer. The number of groups.} \item{\code{"group.label"}:}{A character vector. The group labels.} \item{\code{"cluster"}:}{A character vector. The cluster variable(s) in the data.frame (if any).} \item{\code{"ordered"}:}{A character vector. The ordered variables.} \item{\code{"nobs"}:}{Integer vector. The number of observations in each group that were used in the analysis (in each dataset).} \item{\code{"norig"}:}{Integer vector. The original number of observations in each group (in each dataset).} \item{\code{"ntotal"}:}{Integer. The total number of observations that were used in the analysis. If there is just a single group, this is the same as the \code{"nobs"} option; if there are multiple groups, this is the sum of the \code{"nobs"} numbers for each group (in each dataset).} } Model features: \describe{ \item{\code{"meanstructure"}:}{Logical. \code{TRUE} if a meanstructure was included in the model.} \item{\code{"categorical"}:}{Logical. \code{TRUE} if categorical endogenous variables were part of the model.} \item{\code{"fixed.x"}:}{Logical. \code{TRUE} if the exogenous x-covariates are treated as fixed.} \item{\code{"parameterization"}:}{Character. Either \code{"delta"} or \code{"theta"}.} } \describe{ \item{\code{"list"}:}{The parameter table. The same output as given by \code{parTable()}.} \item{\code{"options"}:}{List. The option list.} \item{\code{"call"}:}{List. The call as returned by match.call, coerced to a list.} } } \seealso{ \code{\link{lavaanList}} } \examples{ # fit model HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' # a data generating function generateData <- function() simulateData(HS.model, sample.nobs = 100) set.seed(1234) fit <- semList(HS.model, dataFunction = generateData, ndat = 5, store.slots = "partable") # extract information lavListInspect(fit, "free") lavListTech(fit, "free") } lavaan/man/lavMatrixRepresentation.Rd0000644000176200001440000000301612760606675017467 0ustar liggesusers\name{lavMatrixRepresentation} \alias{lavMatrixRepresentation} \title{lavaan matrix representation} \description{ Extend the parameter table with a matrix representation. } \usage{ lavMatrixRepresentation(partable, representation = "LISREL", add.attributes = FALSE, as.data.frame. = TRUE) } \arguments{ \item{partable}{A lavaan parameter table (as extracted by the \code{\link{parTable}} function, or generated by the \code{\link{lavPartable}} function).} \item{representation}{Character. The matrix representation style. Currently, only the all-y version of the LISREL representation is supported.} \item{add.attributes}{Logical. If \code{TRUE}, additional information about the model matrix representation is added as attributes.} \item{as.data.frame.}{Logical. If \code{TRUE}, the extended parameter table is returned as a data.frame.} } \value{ A list or a data.frame containing the original parameter table, plus three columns: a \code{"mat"} column containing matrix names, and a \code{"row"} and \code{"col"} column for the row and column indices of the model parameters in the model matrices. } \seealso{\code{\link{lavParTable}}, \code{\link{parTable}}} \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) # extract partable partable <- parTable(fit) # add matrix representation (and show only a few columns) lavMatrixRepresentation(partable)[,c("lhs","op","rhs","mat","row","col")] } lavaan/man/varTable.Rd0000644000176200001440000000320012142446773014323 0ustar liggesusers\name{varTable} \alias{varTable} \alias{vartable} \alias{variableTable} \alias{variabletable} \title{Variable Table} \description{ Summary information about the variables included in either a data.frame, or a fitted lavaan object.} \usage{ varTable(object, ov.names = names(object), ov.names.x = NULL, ordered = NULL, factor = NULL, as.data.frame. = TRUE) } \arguments{ \item{object}{Either a data.frame, or an object of class \code{\linkS4class{lavaan}}.} \item{ov.names}{Only used if object is a data.frame. A character vector containing the variables that need to be summarized.} \item{ov.names.x}{Only used if object is a data.frame. A character vector containing additional variables that need to be summarized.} \item{ordered}{Character vector. Which variables should be treated as ordered factors} \item{factor}{Character vector. Which variables should be treated as (unordered) factors?} \item{as.data.frame.}{If TRUE, return the list as a data.frame.} } \value{ A \code{list} or \code{data.frame} containing summary information about variables in a data.frame. If \code{object} is a fitted lavaan object, it displays the summary information about the observed variables that are included in the model. The summary information includes variable type (numeric, ordered, \ldots), the number of non-missing values, the mean and variance for numeric variables, the number of levels of ordered variables, and the labels for ordered variables. } \examples{ HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' fit <- cfa(HS.model, data=HolzingerSwineford1939) varTable(fit) }