sem/0000755000175000017500000000000014126775115011201 5ustar nileshnileshsem/MD50000644000175000017500000001153514126775115011516 0ustar nileshnilesh6c3f3130f6674d0f15b055e56149ea53 *DESCRIPTION 6fd5f04c0ae824d6a23100edf0bd01b4 *NAMESPACE 55b80674bdba0ce2e4de86159433dbaa *NEWS 8847aeafd88a3c87be520511b7c30009 *R/anova.methods.R 511491887de6453cb0a5cd1382787b13 *R/bootSem.R 9781913d2d4ddf378f4a67dff2588a68 *R/cfa.R 2498e7096d81fc8d04d7a1f3cc25de12 *R/chisqNull.R 7836ee6204abab7585800999f69e5bf6 *R/csem.R 14578b3cfb3dd6236cbe19be9c98cefe *R/effects.R 8f3b53b89979418e1f648c682fa1e4f4 *R/from-matrixcalc.R f9a41ba3f0542636d7bbafb2068c997f *R/fscores.R b21bb0a8ce1342e86a47e3d900e5343c *R/icmethods.R b843cbe79e1a2cff80a78bd128138c3c *R/miSem.R ca15dcfec81d5513a0d1544f33312877 *R/modIndices.R 42558c43c84ad02789f78a89cb69e671 *R/multigroup.R d4f3de94e03ea9910f63d3d2555cdf16 *R/objectiveFIML.R 0237565343dccf23c450c2721605d3ae *R/objectiveFIML2.R 6bd1f1d099cc892d786742dcf268dfcd *R/objectiveGLS.R b71c84e322a34b3d654cc4709badb16e *R/objectiveGLS2.R c61060fe404ea72f9af42b110ecf0f9b *R/objectiveML.R 250d32c08e200ec466ab7d8d75b680fc *R/objectiveML2.R 3af2018d0f07faee29b74b1f3b14007a *R/optimizerNlm.R ce8734534902e16ae94ec7df1851785b *R/optimizerNlminb.R 3a98fcdfcfebb147856762b2b6eb5fe1 *R/optimizerOptim.R 542187b66c55ee301e0e86d4884f5320 *R/optimizerSem.R f7b1f6baf2fcf9841eb06373ea19c58c *R/pathDiagram.R 12062f699cc233aaac93b2973b791e0a *R/print.methods.R 56cf34eba0ff8466dd53c84a687f1e20 *R/ram.R b6761bac9f8a0bad80cb03c93edc06dd *R/rawMoments.R 99f39bbaa2b1332a5b98071139bcee2d *R/readMoments.R f6c99509287da072a47f5b324b98a4a2 *R/residuals.R fa8bde0c15ab075d119e02e3369b72c7 *R/robust.R d52bc4f6a2d0f0618163570b41eac22b *R/sem.R 93f4e9e335ed2b073e8f1e34e820e644 *R/specifyModel.R 24800e46cf9a596191f208fe0a4664b4 *R/standardizedCoefficients.R e9d74da3560a9970a141ca14be48f273 *R/startvalues.R bb0d605868b06e1bb7c9d2acda4aa428 *R/summary.methods.R bac40fa9079b65706eee65802482cbeb *R/tsls.R 5b3aaf75d5c4a1e5dd547ffbed4f0fa5 *R/update.R 503e74b7d42e6c0d9486febe49739763 *cleanup 8ae227af42addc469ac160300057d163 *data/Bollen.rda 5040697d08f2855983cba10d9001b849 *data/CNES.rda 25b362f1446ebc03303492bd19489cce *data/HS.data.rda 75586bdbf9352216b00627c825705771 *data/Klein.txt.bz2 f9fa983a237637c61801c9d8d849cdcc *data/Kmenta.txt.gz 0e5e82bcb8b1057b973dae3c097aab18 *data/Tests.rda 7e382b6314a1a9226b7536666a9e78e8 *inst/CHANGES d2cc9fecf94a4ec26090e69783fd8203 *inst/etc/GreekLetters.txt b292b20b397dcc0974b8b723f5d7d27b *inst/etc/M-McArdle.txt 9397e8325ff3e88642589790d0ad762e *inst/etc/R-Blau-Duncan.txt 06a3255987aa921ce484afaab101069d *inst/etc/R-DHP.txt 3e27dc00e04e11ce2a010ffdaa86d43f *inst/etc/R-Kerchoff.txt c0c076fedae61b6a12a5c2e67e761e3b *inst/etc/R-Thurstone.txt e965161ac9ff2aa625d0d77c849beda4 *inst/etc/S-Wheaton.txt 8aefee6fb790b6cb15859bbab0742547 *inst/etc/model-Blau-Duncan.txt 1a89f94545cf6a0e19bf396f2c9afd1d *inst/etc/model-Bollen.txt a417cb71b35b0a0a74ed89d88d853ba7 *inst/etc/model-DHP.txt d2ebc8ead28f05e25f14950fb21d10dc *inst/etc/model-HS.txt 4e98d4ca979caad38f502629c01b8ee4 *inst/etc/model-Kerchoff.txt a19b4cf4b68944fd2c98f3ab0342e4c2 *inst/etc/model-McArdle.txt ab484607fff050ac6389a3fce5a17ae4 *inst/etc/model-Tests.txt bfaa1e1feb720bd9c67498932eafd4e6 *inst/etc/model-Thurstone.txt 0f95e99fa972b9f9670984dfbd13eefb *inst/etc/model-Wheaton-1.txt 52b1d7fd5d57b87ffe394ee66456feef *inst/etc/model-Wheaton-2.txt 8454daf6904c568fbab381c0c74e55a2 *man/Bollen.Rd 3bb3d66e58652dde182ed40d56fca6e2 *man/CNES.Rd e5d8b9a0edf3f5638c295fccbecc7e95 *man/HS.data.Rd 6ae8eb1d60e0ce2be07ddbf466246a61 *man/Klein.Rd 518bbcccc0fe1dc3264bcdd2f1f8ed65 *man/Kmenta.Rd 71fd7a88c3a51553c1f5b6755f61bf25 *man/ML.methods.Rd c5d585b30fc4f4808e16904e3fe0f640 *man/Tests.Rd ffd592b205b2d45134b1d0daf4f9b98e *man/bootSem.Rd dee292347d7d2af36d5947bc9e3cb51e *man/effects.Rd 60d22fbcd9982c4d96732a1f575710d2 *man/fscores.Rd c60f9556597268e50e597c40e4eb8c91 *man/information.criteria.Rd afdb8564866862c0e810fda48156e0b7 *man/miSem.Rd e84500c1703d967115c88b8bf640231d *man/modIndices.Rd 19774522f55f1f6bdbff0976deeff0fd *man/objective.functions.Rd 12104db120bb81220c71a8d76cb34f5c *man/optimizers.Rd 365bb7d42277e7b753f2adf7e5749451 *man/pathDiagram.Rd 491e32489cfd78eabfb6b00467f7ab40 *man/ram.Rd 1d4dca54ecf6b458c2314cd67411b031 *man/rawMoments.Rd ac238f7eae2bc845e73a7483b911b0ce *man/readMoments.Rd 2d310af7dc3a04021593595495898d9b *man/residuals.Rd 67a9be4cc7e4463aa0f0f949e811ac9a *man/sem-deprecated.Rd d4359105dd9fa409a31c96ef2070af7b *man/sem.Rd f8f25eebcaaeb3924f307bc66aa3f949 *man/specifyModel.Rd 6a4dda2f6c8d69abcc62d8c4de3da82f *man/standardizedCoefficients.Rd a1d9e03dcff9cea404a755ccbbe1af2d *man/tsls.Rd 07f06070ce4a1675791580f41a9b4d10 *src/Makevars b8e8870841139b5e6471bd7140e80c26 *src/csem.cpp 9f6cbfc66e5cbed2f4fc303070dd3a17 *src/csem.h 1c6c11f2fe85c7334eaeeb057b0fe5a3 *src/csemnlm.c f14c4c1dba9d4e0a6d0a9d425f802127 *src/csemnlm.h 9a89a4935d920e22e6ebe171d088e9fc *src/init.c a7d81ff2f83fdf2ebd2662435de48782 *src/uncmin.c 120ac4d5369777630e7f17282a9c02b6 *src/utils.h sem/DESCRIPTION0000644000175000017500000000344314126775115012713 0ustar nileshnileshPackage: sem Version: 3.1-13 Date: 2021-10-03 Title: Structural Equation Models Authors@R: c(person("John", "Fox", role = c("aut", "cre"), email = "jfox@mcmaster.ca"), person("Zhenghua", "Nie", role = "aut", email="zhenghua.nie@gmail.com"), person("Jarrett", "Byrnes", role = "aut", email="byrnes@nceas.ucsb.edu"), person("Michael", "Culbertson", role = "ctb"), person("Saikat", "DebRoy", role = "ctb"), person("Michael", "Friendly", role = "ctb"), person("Benjamin", "Goodrich", role = "ctb"), person(given="Richard H.", family="Jones", role = "ctb"), person("Adam", "Kramer", role = "ctb"), person("Georges", "Monette", role = "ctb"), person("Frederick", "Novomestky", role = "ctb"), person("R-Core", role = "ctb") ) Depends: R (>= 3.5.0), stats Imports: MASS, boot, mi (>= 0.9-99), utils Suggests: polycor, DiagrammeR LazyLoad: yes LazyData: yes ByteCompile: yes Description: Functions for fitting general linear structural equation models (with observed and latent variables) using the RAM approach, and for fitting structural equations in observed-variable models by two-stage least squares. License: GPL (>= 2) URL: https://www.r-project.org, https://socialsciences.mcmaster.ca/jfox/ Author: John Fox [aut, cre], Zhenghua Nie [aut], Jarrett Byrnes [aut], Michael Culbertson [ctb], Saikat DebRoy [ctb], Michael Friendly [ctb], Benjamin Goodrich [ctb], Richard H. Jones [ctb], Adam Kramer [ctb], Georges Monette [ctb], Frederick Novomestky [ctb], R-Core [ctb] Maintainer: John Fox Repository: CRAN Repository/R-Forge/Project: sem Repository/R-Forge/Revision: 182 Repository/R-Forge/DateTimeStamp: 2021-10-03 20:22:56 Date/Publication: 2021-10-05 07:10:05 UTC NeedsCompilation: yes Packaged: 2021-10-03 20:28:07 UTC; rforge sem/man/0000755000175000017500000000000014126411066011744 5ustar nileshnileshsem/man/rawMoments.Rd0000644000175000017500000000500711701252753014373 0ustar nileshnilesh\name{rawMoments} \alias{rawMoments} \alias{rawMoments.formula} \alias{rawMoments.default} \alias{cov2raw} \alias{print.rawmoments} \title{Compute Raw Moments Matrix} \description{ Computes the \dQuote{uncorrected} sum-of-squares-and-products matrix divided by the number of observations. } \usage{ \method{rawMoments}{formula}(formula, data, subset, na.action, contrasts=NULL, ...) \method{rawMoments}{default}(object, na.rm=FALSE, ...) cov2raw(cov, mean, N, sd) \method{print}{rawmoments}(x, ...) } \arguments{ \item{object}{a one-sided model formula or an object coercible to a numeric matrix.} \item{formula}{a one-sided model formula specifying the model matrix for which raw moments are to be computed; note that a constant is included if it is not explicitly suppressed by putting \code{-1} in the formula.} \item{data}{an optional data frame containing the variables in the formula. By default the variables are taken from the environment from which \code{rawMoments} is called.} \item{subset}{an optional vector specifying a subset of observations to be used in computing moments.} \item{na.action}{a function that indicates what should happen when the data contain \code{NA}s. The default is set by the \code{na.action} option.} \item{contrasts}{an optional list. See the \code{contrasts.arg} argument of \code{\link[stats:model.matrix]{model.matrix.default}}}. \item{na.rm}{if \code{TRUE}, any data rows with missing data will be removed.} \item{cov}{a covariance or correlation matrix.} \item{mean}{a vector of means.} \item{N}{the number of observations on which the covariances or correlations are based.} \item{sd}{an optional vector of standard deviations, to be given if \code{cov} is a correlation matrix.} \item{x}{an object of class \code{rawmoments} to print.} \item{\dots}{arguments passed down.} } \value{ \code{rawMoments} and \code{cov2raw} return an object of class \code{rawmoments}, which is simply a matrix with an attribute \code{"N"} that contains the number of observations on which the moments are based. } \author{John Fox \email{jfox@mcmaster.ca}} \seealso{\code{\link{sem}}} \examples{ # the following are all equivalent (with the exception of the name of the intercept): rawMoments(cbind(1, Kmenta)) rawMoments(~ Q + P + D + F + A, data=Kmenta) Cov <- with(Kmenta, cov(cbind(Q, P, D, F, A))) cov2raw(Cov, colMeans(Kmenta), nrow(Kmenta)) } \keyword{manip} sem/man/Klein.Rd0000644000175000017500000000266111701252753013304 0ustar nileshnilesh\name{Klein} \alias{Klein} \docType{data} \title{Klein's Data on the U. S. Economy} \usage{Klein} \description{ Data for Klein's (1950) simple econometric model of the U. S. economy. The \code{Klein} data frame has 22 rows and 10 columns. } \format{ This data frame contains the following columns: \describe{ \item{Year}{1921--1941} \item{C}{consumption.} \item{P}{private profits.} \item{Wp}{private wages.} \item{I}{investment.} \item{K.lag}{capital stock, lagged one year.} \item{X}{equilibrium demand.} \item{Wg}{government wages.} \item{G}{government non-wage spending.} \item{T}{indirect business taxes and net exports.} } } \source{ Greene, W. H. (1993) \emph{Econometric Analysis, Second Edition.} Macmillan. } \references{ Klein, L. (1950) \emph{Economic Fluctuations in the United States 1921--1941.} Wiley. } \examples{ Klein$P.lag <- c(NA, Klein$P[-22]) Klein$X.lag <- c(NA, Klein$X[-22]) summary(tsls(C ~ P + P.lag + I(Wp + Wg), instruments=~1 + G + T + Wg + I(Year - 1931) + K.lag + P.lag + X.lag, data=Klein)) summary(tsls(I ~ P + P.lag + K.lag, instruments=~1 + G + T + Wg + I(Year - 1931) + K.lag + P.lag + X.lag, data=Klein)) summary(tsls(Wp ~ X + X.lag + I(Year - 1931), instruments=~1 + G + T + Wg + I(Year - 1931) + K.lag + P.lag + X.lag, data=Klein)) } \keyword{datasets} sem/man/ML.methods.Rd0000644000175000017500000001420012213674643014211 0ustar nileshnilesh\name{ML.methods} \alias{ML.methods} \alias{GLS.methods} \alias{anova.objectiveML} \alias{anova.objectiveFIML} \alias{logLik.objectiveML} \alias{logLik.objectiveFIML} \alias{AIC.objectiveML} \alias{AIC.objectiveFIML} \alias{AICc.objectiveML} \alias{AICc.objectiveFIML} \alias{BIC.objectiveML} \alias{BIC.objectiveFIML} \alias{CAIC.objectiveML} \alias{CAIC.objectiveFIML} \alias{deviance.objectiveML} \alias{deviance.objectiveFIML} \alias{print.objectiveML} \alias{print.objectiveGLS} \alias{print.objectiveFIML} \alias{summary.objectiveML} \alias{summary.objectiveGLS} \alias{summary.objectiveFIML} \alias{print.summary.objectiveML} \alias{print.msemObjectiveML} \alias{print.msemObjectiveGLS} \alias{summary.msemObjectiveML} \alias{summary.msemObjectiveGLS} \alias{deviance.msemObjectiveML} \alias{AIC.msemObjectiveML} \alias{AICc.msemObjectiveML} \alias{BIC.msemObjectiveML} \alias{deviance.msemObjectiveML} \alias{anova.msemObjectiveML} \alias{logLik.msemObjectiveML} \title{ Methods for sem Objects Fit Using the \code{objectiveML}, \code{objectiveGLS}, \code{objectiveFIML}, \code{msemObjectiveML}, and \code{msemObjectiveGLS} Objective Functions } \description{ These functions are for objects fit by \code{\link{sem}} using the \code{\link{objectiveML}} (multivariate-normal full-information maximum-likelihood), \code{link{objectiveFIML}} (multivariate-normal full-information maximum-likihood in the presence of missing data), \code{\link{objectiveGLS}} (generalized least squares), and \code{\link{msemObjectiveML}} (multigroup multivariate-normal FIML) objective functions. } \usage{ \method{anova}{objectiveML}(object, model.2, robust=FALSE, ...) \method{anova}{objectiveFIML}(object, model.2, ...) \method{logLik}{objectiveML}(object, ...) \method{logLik}{objectiveFIML}(object, saturated=FALSE, intercept="Intercept", iterlim=1000, ...) \method{deviance}{objectiveML}(object, ...) \method{deviance}{objectiveFIML}(object, saturated.logLik, ...) \method{deviance}{msemObjectiveML}(object, ...) \method{AIC}{objectiveML}(object, ..., k) \method{AIC}{objectiveFIML}(object, saturated.logLik, ..., k) \method{AIC}{msemObjectiveML}(object, ..., k) \method{AICc}{objectiveML}(object, ...) \method{AICc}{objectiveFIML}(object, saturated.logLik, ...) \method{AICc}{msemObjectiveML}(object, ...) \method{BIC}{objectiveML}(object, ...) \method{BIC}{objectiveFIML}(object, saturated.logLik, ...) \method{BIC}{msemObjectiveML}(object, ...) \method{CAIC}{objectiveML}(object, ...) \method{CAIC}{objectiveFIML}(object, saturated.logLik, ...) \method{print}{objectiveML}(x, ...) \method{print}{objectiveGLS}(x, ...) \method{print}{objectiveFIML}(x, saturated=FALSE, ...) \method{print}{msemObjectiveML}(x, ...) \method{print}{msemObjectiveGLS}(x, ...) \method{summary}{objectiveML}(object, digits=getOption("digits"), conf.level=.90, robust=FALSE, analytic.se=object$t <= 500, fit.indices=c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR", "AIC", "AICc", "BIC", "CAIC"), ...) \method{summary}{objectiveFIML}(object, digits=getOption("digits"), conf.level=.90, fit.indices=c("AIC", "AICc", "BIC", "CAIC"), saturated=FALSE, intercept="Intercept", saturated.logLik, ...) \method{summary}{objectiveGLS}(object, digits=getOption("digits"), conf.level=.90, fit.indices=c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR"), ...) \method{summary}{msemObjectiveML}(object, digits=getOption("digits"), conf.level=.90, robust=FALSE, analytic.se=object$t <= 500, fit.indices=c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR", "AIC", "AICc", "BIC"), ...) \method{summary}{msemObjectiveGLS}(object, digits=getOption("digits"), conf.level=.90, fit.indices=c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR"), ...) } \arguments{ \item{object, model.2, x}{an object inheriting from class \code{objectiveML}, \code{objectiveGLS}, \code{objectiveFIML}, \code{msemObjectiveML}, or \code{msemObjectiveGLS}.} \item{robust}{if \code{TRUE}, compute robust standard errors or test.} \item{fit.indices}{a character vector of ``fit indices'' to report; the allowable values are those given in \bold{Usage} above, and vary by the objective function. If the argument isn't given then the fit indices reported are taken from the R \code{fit.indices} option; if this option isn't set, then only the AIC and BIC are reported for models fit with \code{objectiveML}, \code{objectiveFIML}, or \code{msemObjectiveML}, and no fit indices are reported for models fit with \code{objectiveGLS} or \code{msemObjectiveGLS}.} \item{k, \ldots}{ignored.} \item{digits}{digits to be printed.} \item{conf.level}{level for confidence interval for the RMSEA index (default is .9).} \item{analytic.se}{use analytic (as opposed to numeric) coefficient standard errors; default is \code{TRUE} where analytic standard errors are available if there are no more than 100 parameters in the model and \code{FALSE} otherwise.} \item{saturated}{if \code{TRUE} (the default is \code{FALSE}); compute the log-likelihood (and statistics that depend on it) for the saturated model when the objective function is FIML in the presence of missing data. This can be computationally costly.} \item{intercept}{the name of the intercept regressor in the raw data, to be used in calculating the saturated log-likelihood for the FIML estimator; the default is \code{"Intercept"}.} \item{saturated.logLik}{the log-likelihood for the saturated model, as returned by \code{logLik} with \code{saturated=TRUE}; if absent, this will be computed and the computation can be time-consuming.} \item{iterlim}{iteration limit used by the \code{nlm} optimizer to compute the saturated log-likelihood for the FIML estimator with missing data; defaults to \code{1000}.} } \author{John Fox \email{jfox@mcmaster.ca} and Jarrett Byrnes} \references{ See \code{\link{sem}}. } \seealso{ \code{\link{sem}}, \code{\link{objective.functions}}, \code{\link{modIndices.objectiveML}} } \keyword{models} sem/man/sem-deprecated.Rd0000644000175000017500000000347111701252753015124 0ustar nileshnilesh\name{sem-deprecated} \alias{sem-deprecated} \alias{boot.sem} \alias{mod.indices} \alias{normalized.residuals} \alias{path.diagram} \alias{raw.moments} \alias{read.moments} \alias{specify.model} \alias{standardized.coefficients} \alias{standardized.residuals} \alias{std.coef} \title{Deprecated Functions in the sem Package} \description{ These functions are provided for compatibility with older versions of the \pkg{sem} package only, and may be removed eventually. Although an effort has been made to insure backwards-compatibility, commands that worked in versions of the \pkg{sem} package prior to version 2.0-0 will not necessarily work in version 2.0-0 and beyond, or may not work in the same manner. } \usage{ boot.sem(...) mod.indices(...) normalized.residuals(...) path.diagram(...) raw.moments(...) read.moments(...) specify.model(...) standardized.coefficients(...) standardized.residuals(...) std.coef(...) } \arguments{ \item{\dots}{pass arguments down to replacements for deprecated functions.} } \details{ \code{boot.sem} is now a synonym for the \code{\link{bootSem}} function. \code{mod.indices} is now a synonym for \code{\link{modIndices}}. \code{normalized.residuals} is now a synonym for \code{\link{normalizedResiduals}}. \code{path.diagram} is now a synonym for \code{\link{pathDiagram}}. \code{raw.moments} is now a synonym for \code{\link{rawMoments}}. \code{read.moments} is now a synonym for \code{\link{readMoments}}. \code{specify.model} is now a synonym for \code{\link{specifyModel}}. \code{standardized.coefficients} and \code{std.coef} are now synonyms for the \code{\link{standardizedCoefficients}} and \code{\link{stdCoef}} functions. \code{standardized.residuals} is now a synonym for \code{\link{standardizedResiduals}}. }sem/man/information.criteria.Rd0000644000175000017500000000200111701252753016354 0ustar nileshnilesh\name{information.criteria} \alias{information.criteria} \alias{AICc} \alias{CAIC} \title{ Additional Information Criteria } \description{ These are generic functions for computing, respectively, the AICc (second-order corrected Akaike Information Criterion) and CAIC (consistent Akaike Information Criterion). } \usage{ AICc(object, ...) CAIC(object, ...) } \arguments{ \item{object}{an object for which an appropriate \code{AICc} or \code{CAIC} method exists.} \item{\ldots}{possible additional arguments for methods.} } \author{Jarrett Byrnes and John Fox \email{jfox@mcmaster.ca}} \references{ Burnham, K. P., and Anderson, D. R. (1998) \emph{Model Selection and Inference: A Practical Information-Theoretical Approach.} Springer. Bozdogan, H. (1987) Model selection and Akaike's information criterion (AIC). \emph{Psychometrika} bold{52}, 345--370. } \seealso{ \code{\link{AICc.objectiveML}}, \code{\link{CAIC.objectiveML}} } \keyword{models}sem/man/modIndices.Rd0000644000175000017500000001321712127622145014316 0ustar nileshnilesh\name{modIndices} \alias{modIndices} \alias{modIndices.objectiveML} \alias{modIndices.msemObjectiveML} \alias{print.modIndices} \alias{print.msemModIndices} \alias{summary.modIndices} \alias{summary.msemModIndices} \title{Modification Indices for Structural Equation Models} \description{ \code{mod.indices} calculates modification indices (score tests) and estimated parameter changes for the fixed and constrained parameters in a structural equation model fit by multinormal maximum likelihood. } \usage{ \method{modIndices}{objectiveML}(model, duplicated, deviance=NULL, ...) \method{modIndices}{msemObjectiveML}(model, ...) \method{print}{modIndices}(x, n.largest=5, ...) \method{print}{msemModIndices}(x, ...) \method{summary}{modIndices}(object, round=2, print.matrices=c("both", "par.change", "mod.indices"), ...) \method{summary}{msemModIndices}(object, ...) } \arguments{ \item{model}{an object of class \code{objectiveML} or \code{msemObjectiveML}, produced by the \code{\link{sem}} function.} \item{object, x}{an object of class \code{modIndices} or \code{msemModIndices}, produced by the \code{modIndices} function.} \item{n.largest}{number of modification indices to print in each of the \eqn{A} and \eqn{P} matrices of the RAM model.} \item{round}{number of places to the right of the decimal point in printing modification indices.} \item{print.matrices}{which matrices to print: estimated changes in the fixed parameters, modification indices, or both (the default).} \item{duplicated, deviance}{for internal use.} \item{\dots}{arguments to be passed down.} } \details{ Modification indices are one-df chi-square score (\dQuote{Lagrange-multiplier}) test statistics for the fixed and constrained parameters in a structural equation model. They may be regarded as an estimate of the improvement in the likelihood-ratio chi-square statistic for the model if the corresponding parameter is respecified as a free parameter. The \code{modIndices} function also estimates the change in the value of a fixed or constrained parameter if the parameter is respecified as free. When several parameters are set equal, modification indices and estimated changes are given for all but the first. Modification indices and estimated parameter changes for currently free parameters are given as \code{NA}. The method employed is described in Saris, Satorra, and Sorbom (1987) and Sorbom (1989). } \value{ \code{modIndices} returns an object of class \code{modIndices} with the following elements: \item{mod.A}{modification indices for the elements of the \eqn{A} matrix.} \item{mod.P}{modification indices for the elements of the \eqn{P} matrix.} \item{par.A}{estimated parameter changes for the elements of the \eqn{A} matrix.} \item{par.P}{estimated parameter changes for the elements of the \eqn{P} matrix.} } \references{ Sarris, W. E., Satorra, A., and Sorbom, D. (1987) The detection and correction of specification errors in structural equation models. Pp. 105--129 in Clogg, C. C. (ed.), \emph{Sociological Methodology 1987.} American Sociological Association. Sorbom, D. (1989) Model modification. \emph{Psychometrika} \bold{54}, 371--384. } \author{John Fox \email{jfox@mcmaster.ca} and Michael Culbertson} \seealso{\code{\link{sem}}} \examples{ # In the first example, readMoments() and specifyModel() read from the # input stream. This example cannot be executed via example() but can be entered # at the command prompt. The example is repeated using file input; # this example can be executed via example(). \dontrun{ # This example is adapted from the SAS manual S.wh <- readMoments(names=c('Anomia67','Powerless67','Anomia71', 'Powerless71','Education','SEI')) 11.834 6.947 9.364 6.819 5.091 12.532 4.783 5.028 7.495 9.986 -3.839 -3.889 -3.841 -3.625 9.610 -21.899 -18.831 -21.748 -18.775 35.522 450.288 model.wh <- specifyModel() Alienation67 -> Anomia67, NA, 1 Alienation67 -> Powerless67, NA, 0.833 Alienation71 -> Anomia71, NA, 1 Alienation71 -> Powerless71, NA, 0.833 SES -> Education, NA, 1 SES -> SEI, lamb, NA SES -> Alienation67, gam1, NA Alienation67 -> Alienation71, beta, NA SES -> Alienation71, gam2, NA Anomia67 <-> Anomia67, the1, NA Anomia71 <-> Anomia71, the1, NA Powerless67 <-> Powerless67, the2, NA Powerless71 <-> Powerless71, the2, NA Education <-> Education, the3, NA SEI <-> SEI, the4, NA Anomia67 <-> Anomia71, the5, NA Powerless67 <-> Powerless71, the5, NA Alienation67 <-> Alienation67, psi1, NA Alienation71 <-> Alienation71, psi2, NA SES <-> SES, phi, NA sem.wh <- sem(model.wh, S.wh, 932) modIndices(sem.wh) } # The following example can be executed via example(): etc <- system.file(package="sem", "etc") # path to data and model files (S.wh <- readMoments(file=file.path(etc, "S-Wheaton.txt"), names=c('Anomia67','Powerless67','Anomia71', 'Powerless71','Education','SEI'))) (model.wh <- specifyModel(file=file.path(etc, "model-Wheaton-1.txt"))) (sem.wh <- sem(model.wh, S.wh, 932)) modIndices(sem.wh) } \keyword{models} sem/man/objective.functions.Rd0000644000175000017500000000555312042646337016232 0ustar nileshnilesh\name{objective.functions} \alias{objective.functions} \alias{objectiveML} \alias{objectiveGLS} \alias{objectiveFIML} \alias{objectiveML2} \alias{objectiveGLS2} \alias{objectiveFIML2} \alias{msemObjectiveML} \alias{msemObjectiveML2} \alias{msemObjectiveGLS} %% \alias{msemObjectiveFIML} \title{ sem Objective-Function Builders } \description{ These functions return objective functions suitable for use with \code{\link{optimizers}} called by \code{\link{sem}}. The user would not normally call these functions directly, but rather supply one of them in the \code{objective} argument to \code{sem}. Users may also write their own objective functions. \code{objectiveML} and \code{objectiveML2} are for multinormal maximum-likelihood estimation; \code{objectiveGLS} and \code{objectiveGLS2} are for generalized least squares; and \code{objectiveFIML2} is for so-called ``full-information maximum-likelihood'' estimation in the presence of missing data. The FIML estimator provides the same estimates as the ML estimator when there is no missing data; it can be slow because it iterates over the unique patterns of missing data that occur in the data set. \code{objectiveML} and \code{objectiveGLS} use compiled code and are therefore substantially faster. \code{objectiveML2} and \code{objectiveGLS2} are provided primarily to illustrate how to write \code{sem} objective functions in R. \code{msemObjectiveML} uses compiled code is for fitting multi-group models by multinormal maximum likelihood; \code{msemObjectiveML2} is similar but doesn't use compiled code. \code{msemObjectiveGLS} uses compiled code and is for fitting multi-group models by generalized least squares. } \usage{ objectiveML(gradient=TRUE, hessian=FALSE) objectiveML2(gradient=TRUE) objectiveGLS(gradient=FALSE) objectiveGLS2(gradient=FALSE) objectiveFIML(gradient=TRUE, hessian=FALSE) objectiveFIML2(gradient=TRUE, hessian=FALSE) msemObjectiveML(gradient=TRUE) msemObjectiveML2(gradient=TRUE) msemObjectiveGLS(gradient=FALSE) %% msemObjectiveFIML(gradient=FALSE) } \arguments{ \item{gradient}{If \code{TRUE}, the object that's returned includes a function for computing an analytic gradient; there is at present no analytic gradient available for \code{objectiveFIML}, \code{objectiveGLS}, \code{objectiveGLS2}, or \code{msemObjectiveGL}.} \item{hessian}{If \code{TRUE}, the objected returned includes a function to compute an analytic Hessian; only avaiable for \code{objectiveML} and not generally recommended.} } \value{ These functions return an object of class \code{"semObjective"}, with up to two elements: \item{objective}{an objective function.} \item{gradient}{a gradient function.} } \author{John Fox \email{jfox@mcmaster.ca}} \references{ See \code{\link{sem}}. } \seealso{ \code{\link{sem}}, \code{\link{optimizers}} } \keyword{models} sem/man/specifyModel.Rd0000644000175000017500000005515113660067757014705 0ustar nileshnilesh\name{specifyModel} \alias{specifyModel} \alias{specifyEquations} \alias{cfa} \alias{multigroupModel} \alias{print.semmod} \alias{print.semmodList} \alias{classifyVariables} \alias{removeRedundantPaths} \alias{combineModels} \alias{combineModels.semmod} \alias{update.semmod} \alias{edit.semmod} \title{Specify a Structural Equation Model} \description{ Create the RAM specification of a structural equation model. } \usage{ specifyModel(file="", text, exog.variances=FALSE, endog.variances=TRUE, covs, suffix="", quiet=FALSE) specifyEquations(file="", text, ...) cfa(file="", text, covs=paste(factors, collapse=","), reference.indicators=TRUE, raw=FALSE, subscript=c("name", "number"), ...) multigroupModel(..., groups=names(models), allEqual=FALSE) classifyVariables(model) removeRedundantPaths(model, warn=TRUE) \method{combineModels}{semmod}(..., warn=TRUE) \method{update}{semmod}(object, file = "", text, ...) \method{edit}{semmod}(name, ...) \method{print}{semmod}(x, ...) \method{print}{semmodList}(x, ...) } \arguments{ \item{file}{The (quoted) file from which to read the model specification, including the path to the file if it is not in the current directory. If \code{""} (the default) and the \code{text} argument is not supplied, then the specification is read from the standard input stream, and is terminated by a blank line.} \item{text}{The model specification given as a character string, as an alternative to specifying the ]code{file} argument or reading the model specification from the input stream --- e.g., when the session is not interactive and there is no standard input.} \item{exog.variances}{If \code{TRUE} (the default is \code{FALSE}), free variance parameters are added for the exogenous variables that lack them.} \item{endog.variances}{If \code{TRUE} (the default), free error-variance parameters are added for the endogenous variables that lack them.} \item{covs}{optional: a character vector of one or more elements, with each element giving a string of variable names, separated by commas. Variances and covariances among all variables in each such string are added to the model. For confirmatory factor analysis models specified via \code{cfa}, \code{covs} defaults to all of the factors in the model, thus specifying all variances and covariances among these factors. \emph{Warning}: \code{covs="x1, x2"} and \code{covs=c("x1", "x2")} are \emph{not} equivalent: \code{covs="x1, x2"} specifies the variance of \code{x1}, the variance of \code{x2}, \emph{and} their covariance, while \code{covs=c("x1", "x2")} specifies the variance of \code{x1} and the variance of \code{x2} \emph{but not} their covariance.} \item{suffix}{a character string (defaulting to an empty string) to be appended to each parameter name; this can be convenient for specifying multiple-group models.} \item{reference.indicators}{if \code{FALSE}, the default, variances of factors are set to 1 by \code{cfa}; if \code{TRUE}, variances of factors are free parameters to estimate from the data, and instead the first factor loading for each factor is set to 1 to identify the model.} \item{raw}{if \code{TRUE} (the default is \code{FALSE}), a path from \code{Intercept} to each observed variable is added to the model, and the raw second moment for \code{Intercept} is fixed to \code{1}. The \code{sem} function should then be called with \code{raw=TRUE}, and either supplied with a data set (via the \code{data} argument) or a raw-moment matrix (via the \code{S} argument).} \item{subscript}{The \dQuote{subscripts} to be appended to \code{lam} to name factor-loading parameters, either \code{"name"} (the default) to use the names of observed variables, or \code{"number"} to number the parameters serially within each factor. Using \code{"number"} produces shorter parameter names.} \item{quiet}{if \code{FALSE}, the default, then the number of input lines is reported and a message is printed suggesting that \code{specifyEquations} or \code{cfa} be used.} \item{x, model, object, name}{An object of class \code{semmod} or \code{semmodList}, as produced by \code{specifyModel} or \code{multigroupModel}.} \item{warn}{print a warning if redundant paths are detected.} \item{...}{For \code{multigroupModel}, one or more optionally named arguments each of which is a \code{semmod} object produced, e.g., by \code{specifyModel}, \code{specifyEquations}, or \code{cfa}; if only one such model is given, then it will be used for all groups defined by the \code{groups} argument. If parameters have the same name in different groups, then they will be constrained to be equal. For \code{specifyEquations} and \code{cfa}, arguments (such as \code{covs}, in the case of \code{specifyEquations}) to be passed to \code{specifyModel}; for \code{combineModels}, \code{sem} objects; ignored in the \code{update} and \code{print} methods.} \item{groups}{a character vector of names for the groups in a multigroup model; taken by default from the names of the \code{...} arguments.} \item{allEqual}{if \code{FALSE} (the default), then if only one model object is given for a multigroup model, all corresponding parameters in the groups will be distinct; if \code{TRUE}, all corresponding parameters will be constrained to be equal.} } \details{ The principal functions for model specification are \code{specifyModel}, to specify a model in RAM (path) format via single- and double-headed arrows; \code{specifyEquations}, to specify a model in equation format, which is then translated by the function into RAM format; and \code{cfa}, for compact specification of simple confirmatory factor analysis models. \code{specifyModel}: Each line of the RAM specification for \code{specifyModel} consists of three (unquoted) entries, separated by commas: \describe{ \item{1. Arrow specification:}{This is a simple formula, of the form \code{A -> B} or, equivalently, \code{B <- A} for a regression coefficient (i.e., a single-headed or directional arrow); \code{A <-> A} for a variance or \code{A <-> B} for a covariance (i.e., a double-headed or bidirectional arrow). Here, \code{A} and \code{B} are variable names in the model. If a name does not correspond to an observed variable, then it is assumed to be a latent variable. Spaces can appear freely in an arrow specification, and there can be any number of hyphens in the arrows, including zero: Thus, e.g., \code{A->B}, \code{A --> B}, and \code{A>B} are all legitimate and equivalent.} \item{2. Parameter name:}{The name of the regression coefficient, variance, or covariance specified by the arrow. Assigning the same name to two or more arrows results in an equality constraint. Specifying the parameter name as \code{NA} produces a fixed parameter.} \item{3. Value:}{start value for a free parameter or value of a fixed parameter. If given as \code{NA} (or simply omitted), \code{sem} will compute the start value.} } Lines may end in a comment following \code{#}. \code{specifyEquations}: For \code{specifyEquations}, each input line is either a regression equation or the specification of a variance or covariance. Regression equations are of the form \code{y = par1*x1 + par2*x2 + ... + park*xk} where \code{y} and the \code{x}s are variables in the model (either observed or latent), and the \code{par}s are parameters. If a parameter is given as a numeric value (e.g., \code{1}) then it is treated as fixed. Note that no \dQuote{error} variable is included in the equation; \dQuote{error variances} are specified via either the \code{covs} argument, via \code{V(y) = par} (see immediately below), or are added automatically to the model when, as by default, \code{endog.variances=TRUE}. A regression equation may be split over more than one input by breaking at a \code{+}, so that \code{+} is either the last non-blank character on a line or the first non-blank character on the subsequent line. Variances are specified in the form \code{V(var) = par} and covariances in the form \code{C(var1, var2) = par}, where the \code{var}s are variables (observed or unobserved) in the model. The symbols \code{V} and \code{C} may be in either lower- or upper-case. If \code{par} is a numeric value (e.g., \code{1}) then it is treated as fixed. In conformity with the RAM model, a variance or covariance for an endogenous variable in the model is an \dQuote{error} variance or covariance. \emph{Warning}: If the \code{covs} argument to \code{specifyEquations} is used to specify variances and covariances, please be aware that \code{covs="x1, x2"} and \code{covs=c("x1", "x2")} are \emph{not} equivalent: \code{covs="x1, x2"} specifies the variance of \code{x1}, the variance of \code{x2}, \emph{and} their covariance, while \code{covs=c("x1", "x2")} specifies the variance of \code{x1} and the variance of \code{x2} \emph{but not} their covariance. To set a start value for a free parameter, enclose the numeric start value in parentheses after the parameter name, as \code{parameter(value)}. \code{cfa}: For \code{cfa}, each input line includes the names of the variables, separated by commas, that load on the corresponding factor; the name of the factor is given optionally at the beginning of the line, followed by a colon. If necessary, the variables that load on a factor may be continued across two or more input lines; in this case, each such line but the last must end in a comma. A variable may load on more than one factor (as long as the resulting model is identified, of course), but each factor may appear in only one input line (or set of input lines, if the variable list is continued onto the next line). Equality constraints for factor loadings can be set by using equal-signs (\code{=}) rather than commas to separate observed variable names. For example, \code{fac1: x1=x2=x3, x4=x5} sets the loadings for \code{x1}, \code{x2}, and \code{x3} equal to each other, and the loadings for \code{x4} and \code{x5} equal to each other. Equality constraints among error variances can similarly be specified by using \code{var:} or \code{variance:} at the beginning of a line (actually, any character string beginning with \code{var} will do, and thus no factor name may begin with the characters \code{var}). For example, \code{var: x1=x2=x3, x4=x5} sets the error variances for \code{x1}, \code{x2}, and \code{x3} equal to each other, and the error variances for \code{x4} and \code{x5} equal to each other. There may be several lines beginning with \code{var:}. If the argument \code{reference.indicators=FALSE}, the default, \code{cfa} will fix the variance of each factor to 1, and by default include covariances (i.e., correlations) among all pairs of factors. Alternatively, if \code{reference.indicators=TRUE}, then the factor variances are free parameters to be estimated from the data, and the first loading for each factor is set to 1 to identify the model. These two approaches produce equivalent models, with the same fit to the data, but alternative parametrizations. Specifying the argument \code{covs=NULL} implicitly fixes the factor intercorrelations to 0. See \code{\link{sem}} and the examples for further details on model specification. Other Functions: \code{classifyVariables} classifies the variables in a model as endogenous or exogenous. \code{combineModels} and \code{removeRedundantPaths} take \code{semmod} objects as arguments and do what their names imply. The \code{file} input argument to the \code{update} method for \code{semmod} objects, which by default comes from standard input, is a set of update directives, one per line. There are five kinds of directives. In each case the directive begins with the directive name, followed by one or more fields separated by commas. \describe{ \item{1. delete:}{Remove a path from the model. Example: \code{delete, RSES -> FGenAsp}} \item{2. add:}{Add a path to the model. Example (the \code{NA} for the start value is optional): \code{add, RSES -> FGenAsp, gam14, NA}} \item{3. replace:}{Replace every occurrence of the first string with the second in the variables and parameters of the model. This directive may be used, for example, to change one variable to another or to rename a parameter. Example: \code{replace, gam, gamma}, substitutes the string \code{"gamma"} for \code{"gam"} wherever the latter appears, presumably in parameter names.} \item{4. fix:}{Fix a parameter that was formerly free. Example: \code{fix, RGenAsp -> REdAsp, 1}} \item{5. free:}{Free a parameter that was formerly fixed. Example (the \code{NA} for the start value is optional): \code{free, RGenAsp -> ROccAsp, lam11, NA}} } The \code{edit} method for \code{semmod} objects opens the model in the R editor. } \value{ \code{specifyModel}, \code{specifyEquations}, \code{cfa}, \code{removeRedundantPaths}, \code{combineModels}, \code{update}, and \code{edit} return an object of class \code{semmod}, suitable as input for \code{\link{sem}}. \code{multigroupModel} returns an object of class \code{semmodList}, also suitable as input for \code{\link{sem}}. \code{classifyVariables} returns a list with two character vectors: \code{endogenous}, containing the names of endogenous variables in the model; and \code{exogenous}, containing the names of exogenous variables. } \author{John Fox \email{jfox@mcmaster.ca} and Jarrett Byrnes} \seealso{\code{\link{sem}}} \examples{ # example using the text argument: model.dhp <- specifyModel(text=" RParAsp -> RGenAsp, gam11, NA RIQ -> RGenAsp, gam12, NA RSES -> RGenAsp, gam13, NA FSES -> RGenAsp, gam14, NA RSES -> FGenAsp, gam23, NA FSES -> FGenAsp, gam24, NA FIQ -> FGenAsp, gam25, NA FParAsp -> FGenAsp, gam26, NA FGenAsp -> RGenAsp, beta12, NA RGenAsp -> FGenAsp, beta21, NA RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21, NA FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42, NA RGenAsp <-> RGenAsp, ps11, NA FGenAsp <-> FGenAsp, ps22, NA RGenAsp <-> FGenAsp, ps12, NA ROccAsp <-> ROccAsp, theta1, NA REdAsp <-> REdAsp, theta2, NA FOccAsp <-> FOccAsp, theta3, NA FEdAsp <-> FEdAsp, theta4, NA ") model.dhp # same model in equation form: model.dhp.1 <- specifyEquations(covs="RGenAsp, FGenAsp", text=" RGenAsp = gam11*RParAsp + gam12*RIQ + gam13*RSES + gam14*FSES + beta12*FGenAsp FGenAsp = gam23*RSES + gam24*FSES + gam25*FIQ + gam26*FParAsp + beta21*RGenAsp ROccAsp = 1*RGenAsp REdAsp = lam21(1)*RGenAsp # to illustrate setting start values FOccAsp = 1*FGenAsp FEdAsp = lam42(1)*FGenAsp ") model.dhp # Note: The following examples can't be run via example() because the # default file argument requires that the model specification be entered # at the command prompt. The examples can be copied and run in an interactive # session in the R console, however. \dontrun{ model.dhp <- specifyModel() RParAsp -> RGenAsp, gam11, NA RIQ -> RGenAsp, gam12, NA RSES -> RGenAsp, gam13, NA FSES -> RGenAsp, gam14, NA RSES -> FGenAsp, gam23, NA FSES -> FGenAsp, gam24, NA FIQ -> FGenAsp, gam25, NA FParAsp -> FGenAsp, gam26, NA FGenAsp -> RGenAsp, beta12, NA RGenAsp -> FGenAsp, beta21, NA RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21, NA FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42, NA RGenAsp <-> RGenAsp, ps11, NA FGenAsp <-> FGenAsp, ps22, NA RGenAsp <-> FGenAsp, ps12, NA ROccAsp <-> ROccAsp, theta1, NA REdAsp <-> REdAsp, theta2, NA FOccAsp <-> FOccAsp, theta3, NA FEdAsp <-> FEdAsp, theta4, NA model.dhp # an equivalent specification, allowing specifyModel() to generate # variance parameters for endogenous variables (and suppressing # the unnecessary trailing NAs): model.dhp <- specifyModel() RParAsp -> RGenAsp, gam11 RIQ -> RGenAsp, gam12 RSES -> RGenAsp, gam13 FSES -> RGenAsp, gam14 RSES -> FGenAsp, gam23 FSES -> FGenAsp, gam24 FIQ -> FGenAsp, gam25 FParAsp -> FGenAsp, gam26 FGenAsp -> RGenAsp, beta12 RGenAsp -> FGenAsp, beta21 RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21 FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42 RGenAsp <-> FGenAsp, ps12 model.dhp # Another equivalent specification, telling specifyModel to add paths for # variances and covariance of RGenAsp and FGenAsp: model.dhp <- specifyModel(covs="RGenAsp, FGenAsp") RParAsp -> RGenAsp, gam11 RIQ -> RGenAsp, gam12 RSES -> RGenAsp, gam13 FSES -> RGenAsp, gam14 RSES -> FGenAsp, gam23 FSES -> FGenAsp, gam24 FIQ -> FGenAsp, gam25 FParAsp -> FGenAsp, gam26 FGenAsp -> RGenAsp, beta12 RGenAsp -> FGenAsp, beta21 RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21 FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42 model.dhp # The same model in equation format: model.dhp.1 <- specifyEquations(covs="RGenAsp, FGenAsp") RGenAsp = gam11*RParAsp + gam12*RIQ + gam13*RSES + gam14*FSES + beta12*FGenAsp FGenAsp = gam23*RSES + gam24*FSES + gam25*FIQ + gam26*FParAsp + beta21*RGenAsp ROccAsp = 1*RGenAsp REdAsp = lam21(1)*RGenAsp # to illustrate setting start values FOccAsp = 1*FGenAsp FEdAsp = lam42(1)*FGenAsp model.dhp classifyVariables(model.dhp) # updating the model to impose equality constraints # and to rename the latent variables and gamma parameters model.dhp.eq <- update(model.dhp) delete, RSES -> FGenAsp delete, FSES -> FGenAsp delete, FIQ -> FGenAsp delete, FParAsp -> FGenAs delete, RGenAsp -> FGenAsp add, RSES -> FGenAsp, gam14, NA add, FSES -> FGenAsp, gam13, NA add, FIQ -> FGenAsp, gam12, NA add, FParAsp -> FGenAsp, gam26, NA add, RGenAsp -> FGenAsp, beta12, NA replace, gam, gamma replace, Gen, General model.dhp.eq # A three-factor CFA model for the Thurstone mental-tests data, # specified three equivalent ways: R.thur <- readMoments(diag=FALSE, names=c('Sentences','Vocabulary', 'Sent.Completion','First.Letters','4.Letter.Words','Suffixes', 'Letter.Series','Pedigrees', 'Letter.Group')) .828 .776 .779 .439 .493 .46 .432 .464 .425 .674 .447 .489 .443 .59 .541 .447 .432 .401 .381 .402 .288 .541 .537 .534 .35 .367 .32 .555 .38 .358 .359 .424 .446 .325 .598 .452 # (1a) in CFA format: mod.cfa.thur.c <- cfa(reference.indicators=FALSE) FA: Sentences, Vocabulary, Sent.Completion FB: First.Letters, 4.Letter.Words, Suffixes FC: Letter.Series, Pedigrees, Letter.Group cfa.thur.c <- sem(mod.cfa.thur.c, R.thur, 213) summary(cfa.thur.c) # (1b) in CFA format, using reference indicators: mod.cfa.thur.r <- cfa() FA: Sentences, Vocabulary, Sent.Completion FB: First.Letters, 4.Letter.Words, Suffixes FC: Letter.Series, Pedigrees, Letter.Group cfa.thur.r <- sem(mod.cfa.thur.r, R.thur, 213) summary(cfa.thur.r) # (2) in equation format: mod.cfa.thur.e <- specifyEquations(covs="F1, F2, F3") Sentences = lam11*F1 Vocabulary = lam21*F1 Sent.Completion = lam31*F1 First.Letters = lam42*F2 4.Letter.Words = lam52*F2 Suffixes = lam62*F2 Letter.Series = lam73*F3 Pedigrees = lam83*F3 Letter.Group = lam93*F3 V(F1) = 1 V(F2) = 1 V(F3) = 1 cfa.thur.e <- sem(mod.cfa.thur.e, R.thur, 213) summary(cfa.thur.e) # (3) in path format: mod.cfa.thur.p <- specifyModel(covs="F1, F2, F3") F1 -> Sentences, lam11 F1 -> Vocabulary, lam21 F1 -> Sent.Completion, lam31 F2 -> First.Letters, lam41 F2 -> 4.Letter.Words, lam52 F2 -> Suffixes, lam62 F3 -> Letter.Series, lam73 F3 -> Pedigrees, lam83 F3 -> Letter.Group, lam93 F1 <-> F1, NA, 1 F2 <-> F2, NA, 1 F3 <-> F3, NA, 1 cfa.thur.p <- sem(mod.cfa.thur.p, R.thur, 213) summary(cfa.thur.p) # The Thursstone CFA model with equality constraints on the # factor loadings and error variances mod.cfa.thur.ceq <- cfa(reference.indicators=FALSE) FA: Sentences = Vocabulary = Sent.Completion FB: First.Letters = 4.Letter.Words = Suffixes FC: Letter.Series = Pedigrees = Letter.Group var: Sentences = Vocabulary = Sent.Completion var: First.Letters = 4.Letter.Words = Suffixes var: Letter.Series = Pedigrees = Letter.Group cfa.thur.ceq <- sem(mod.cfa.thur.ceq, R.thur, 213) summary(cfa.thur.ceq) anova(cfa.thur.c, cfa.thur.ceq) pathDiagram(cfa.thur.ceq, ignore.double=FALSE, ignore.self=TRUE, min.rank="FA, FB, FC", edge.labels="values") # a multigroup CFA model fit to the Holzinger-Swineford # mental-tests data mod.hs <- cfa() spatial: visual, cubes, paper, flags verbal: general, paragrap, sentence, wordc, wordm memory: wordr, numberr, figurer, object, numberf, figurew math: deduct, numeric, problemr, series, arithmet mod.mg <- multigroupModel(mod.hs, groups=c("Female", "Male")) sem.mg <- sem(mod.mg, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg) # with cross-group equality constraints: mod.mg.eq <- multigroupModel(mod.hs, groups=c("Female", "Male"), allEqual=TRUE) sem.mg.eq <- sem(mod.mg.eq, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg.eq) } } \keyword{models} sem/man/bootSem.Rd0000644000175000017500000001642414126410440013645 0ustar nileshnilesh\name{bootSem} \alias{bootSem} \alias{bootSem.sem} \alias{bootSem.msem} \alias{print.bootsem} \alias{summary.bootsem} \alias{print.summary.bootsem} \title{Bootstrap a Structural Equation Model} \description{ Bootstraps a structural equation model in an \code{sem} object (as returned by the \code{\link{sem}} function). } \usage{ bootSem(model, ...) \method{bootSem}{sem}(model, R=100, Cov=cov, data=model$data, max.failures=10, show.progress=TRUE, ...) \method{bootSem}{msem}(model, R=100, Cov=cov, data=model$data, max.failures=10, show.progress=TRUE, ...) \method{print}{bootsem}(x, digits=getOption("digits"), ...) \method{summary}{bootsem}(object, type=c("perc", "bca", "norm", "basic", "none"), level=0.95, ...) } \arguments{ \item{model}{an \code{sem} or \code{msem} object, produced by the \code{sem} function.} \item{R}{the number of bootstrap replications; the default is 100, which should be enough for computing standard errors, but not confidence intervals (except for the normal-theory intervals).} \item{Cov}{a function to compute the input covariance or moment matrix; the default is \code{\link{cov}}. Use \code{\link[stats]{cor}} if the model is fit to the correlation matrix. The function \code{\link[polycor]{hetcor}} in the \code{polycor} package will compute product-moment, polychoric, and polyserial correlations among mixed continuous and ordinal variables (see the first example below for an illustration).} \item{data}{in the case of a \code{sem} (i.e., single-group) \code{model}, a data set in a form suitable for \code{Cov}; for example, for the default \code{Cov=cov}, \code{data} may be a numeric data frame or a numeric matrix. In the case of an \code{msem} (i.e., multi-group) \code{model}, a list of data sets (again in the appropriate form), one for each group; in this case, bootstrapping is done within each group, treating the groups as strata. Note that the original observations are required, not just the covariance matrix of the observed variables in the model. The default is the data set stored in the \code{sem} object, which will be present only if the model was fit to a data set rather than to a covariance or moment matrix, and may not be in a form suitable for \code{Cov}.} \item{max.failures}{maximum number of consecutive convergence failures before \code{bootSem} gives up.} \item{show.progress}{display a text progress bar on the console tracing the bootstrap replications.} \item{x, object}{an object of class \code{bootsem}.} \item{digits}{controls the number of digits to print.} \item{type}{type of bootstrapped confidence intervals to compute; the default is \code{"perc"} (percentile); see \code{\link[boot]{boot.ci}} for details.} \item{level}{level for confidence intervals; default is \code{0.95}.} \item{...}{in \code{bootSem}, arguments to be passed to \code{\link{sem}}; otherwise ignored.} } \details{ \code{bootSem} implements the nonparametric bootstrap, assuming an independent random sample. Convergence failures in the bootstrap resamples are discarded (and a warning printed); more than \code{max.failures} consecutive convergence failures (default, 10) result in an error. You can use the \code{\link[boot]{boot}} function in the \pkg{boot} package for more complex sampling schemes and additional options. Bootstrapping is implemented by resampling the observations in \code{data}, recalculating the input covariance matrix with \code{Cov}, and refitting the model with \code{\link{sem}}, using the parameter estimates from the original sample as start-values. \bold{Warning:} the bootstrapping process can be very time-consuming. } \value{ \code{bootSem} returns an object of class \code{bootsem}, which inherits from class \code{boot}, supported by the \pkg{boot} package. The returned object contains the following components: \item{t0}{the estimated parameters in the model fit to the original data set.} \item{t}{a matrix containing the bootstrapped estimates, one bootstrap replication per row.} \item{data}{the data to which the model was fit.} \item{seed}{the value of \code{.Random.seed} when \code{bootSem} was called.} \item{statistic}{the function used to produce the bootstrap replications; this is always the local function \code{refit} from \code{bootSem}.} \item{sim}{always set to \code{"ordinary"}; see the documentation for the \code{\link[boot]{boot}} function.} \item{stype}{always set to \code{"i"}; see the documentation for the \code{\link[boot]{boot}} function.} \item{call}{the call of the \code{bootSem} function.} \item{weights}{a vector of length equal to the number of observations \eqn{N}, with entries \eqn{1/N}. For a multi-group model, the weights in group \eqn{j} are \eqn{1/N_j}, the inverse of the number of observations in the group.} \item{strata}{a vector of length \eqn{N} containing the number of the stratum to which each observation belongs; for a single-group model, all entries are 1.} } \references{ Davison, A. C., and Hinkley, D. V. (1997) \emph{Bootstrap Methods and their Application.} Cambridge. Efron, B., and Tibshirani, R. J. (1993) \emph{An Introduction to the Bootstrap.} Chapman and Hall. } \author{John Fox \email{jfox@mcmaster.ca}} \seealso{\code{\link[boot]{boot}}, \code{\link{sem}}} \examples{ \dontrun{ # because of long execution time # A simple confirmatory factor-analysis model using polychoric correlations. # The polycor package is required for the hetcor function. if (require(polycor)){ # The following function returns correlations computed by hetcor, # and is used for the bootstrapping. hcor <- function(data) hetcor(data, std.err=FALSE)$correlations model.cnes <- specifyModel(text=" F -> MBSA2, lam1 F -> MBSA7, lam2 F -> MBSA8, lam3 F -> MBSA9, lam4 F <-> F, NA, 1 MBSA2 <-> MBSA2, the1 MBSA7 <-> MBSA7, the2 MBSA8 <-> MBSA8, the3 MBSA9 <-> MBSA9, the4 ") R.cnes <- hcor(CNES) sem.cnes <- sem(model.cnes, R.cnes, N=1529) summary(sem.cnes) } # Note: this can take a minute: set.seed(12345) # for reproducibility system.time(boot.cnes <- bootSem(sem.cnes, R=100, Cov=hcor, data=CNES)) summary(boot.cnes, type="norm") # cf., standard errors to those computed by summary(sem.cnes) } \dontrun{ # because of long execution time # An example bootstrapping a multi-group model mod.hs <- cfa(text=" spatial: visual, cubes, paper, flags verbal: general, paragrap, sentence, wordc, wordm memory: wordr, numberr, figurer, object, numberf, figurew math: deduct, numeric, problemr, series, arithmet ") mod.mg <- multigroupModel(mod.hs, groups=c("Female", "Male")) sem.mg <- sem(mod.mg, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) # Note: this example can take several minutes or more; # you can decrease R if you just want to see how it works: set.seed(12345) # for reproducibility system.time(boot.mg <- bootSem(sem.mg, R=100)) summary(boot.mg, type="norm") # cf., standard errors to those computed by summary(sem.mg) } } \keyword{htest} \keyword{models} sem/man/fscores.Rd0000644000175000017500000001136612127622145013707 0ustar nileshnilesh\name{fscores} \alias{fscores} \alias{fscores.sem} \alias{fscores.msem} \title{Factor Scores for Latent Variables} \description{ Calculate factor scores or factor-score coefficients for the latent variables in a structural-equation model. } \usage{ \method{fscores}{sem}(model, data=model$data, center=TRUE, scale=FALSE, ...) \method{fscores}{msem}(model, data=model$data, center=TRUE, scale=FALSE, ...) } \arguments{ \item{model}{an object of class \code{"sem"} or \code{"msem"}, produced by the \code{\link{sem}} function.} \item{data}{an optional numeric data frame or matrix containing the observed variables in the model; if not \code{NULL}, the estimated factor scores are returned; if \code{NULL}, the factor-score \emph{coefficients} are returned. The default is the \code{data} element of \code{model}, which is non-\code{NULL} if the model was fit to a data set rather than a covariance or moment matrix.} \item{center}{if \code{TRUE}, the default, the means of the observed variables are subtracted prior to computing factor scores. One would normally use this option if the model is estimated from a covariance or correlation matrix among the observed variables.} \item{scale}{if \code{TRUE}, the possibly centered variables are divided by their root-mean-squares; the default is \code{FALSE}. One would normally use this option if the model is estimated from a correlation matrix among the observed variables. Centering and scaling are performed by the \code{\link[base]{scale}} function.} \item{\dots}{arguments to pass down.} } \details{ Factor-score coefficients are computed by the \dQuote{regression} method as \eqn{B = C^{-1} C^{*}}{C^-1 C*}, where \eqn{C} is the model-implied covariance or moment matrix among the observed variables and \eqn{C^{*}}{C*} is the matrix of model-implied covariances or moments between the observed and latent variables. } \value{ Either a matrix of estimated factor scores (if the \code{data} argument is supplied) or a matrix of factor-score coefficients (otherwise). In the case of an \code{"msem"} argument, a list of matrices is returned. } \references{ Bollen, K. A. (1989) \emph{Structural Equations With Latent Variables.} Wiley. } \author{John Fox \email{jfox@mcmaster.ca}} \seealso{\code{\link{sem}}, \code{\link[base]{scale}}} \examples{ # In the first example, readMoments() and specifyModel() read from the # input stream. This example cannot be executed via example() but can be entered # at the command prompt. The example is repeated using file input; # this example can be executed via example(). \dontrun{ S.wh <- readMoments(names=c('Anomia67','Powerless67','Anomia71', 'Powerless71','Education','SEI')) 11.834 6.947 9.364 6.819 5.091 12.532 4.783 5.028 7.495 9.986 -3.839 -3.889 -3.841 -3.625 9.610 -21.899 -18.831 -21.748 -18.775 35.522 450.288 # This model in the SAS manual for PROC CALIS model.wh.1 <- specifyModel() Alienation67 -> Anomia67, NA, 1 Alienation67 -> Powerless67, NA, 0.833 Alienation71 -> Anomia71, NA, 1 Alienation71 -> Powerless71, NA, 0.833 SES -> Education, NA, 1 SES -> SEI, lamb, NA SES -> Alienation67, gam1, NA Alienation67 -> Alienation71, beta, NA SES -> Alienation71, gam2, NA Anomia67 <-> Anomia67, the1, NA Anomia71 <-> Anomia71, the1, NA Powerless67 <-> Powerless67, the2, NA Powerless71 <-> Powerless71, the2, NA Education <-> Education, the3, NA SEI <-> SEI, the4, NA Anomia67 <-> Anomia71, the5, NA Powerless67 <-> Powerless71, the5, NA Alienation67 <-> Alienation67, psi1, NA Alienation71 <-> Alienation71, psi2, NA SES <-> SES, phi, NA sem.wh.1 <- sem(model.wh.1, S.wh, 932) fscores(sem.wh.1) } # The following example can be executed via example(): etc <- system.file(package="sem", "etc") # path to data and model files (S.wh <- readMoments(file=file.path(etc, "S-Wheaton.txt"), names=c('Anomia67','Powerless67','Anomia71', 'Powerless71','Education','SEI'))) (model.wh.1 <- specifyModel(file=file.path(etc, "model-Wheaton-1.txt"))) (sem.wh.1 <- sem(model.wh.1, S.wh, 932)) fscores(sem.wh.1) } \keyword{models} \keyword{manip} sem/man/Tests.Rd0000644000175000017500000000175612005517445013350 0ustar nileshnilesh\name{Tests} \alias{Tests} \docType{data} \title{ Six Mental Tests } \description{ These data are from the SAS manual and consist of six mental tests for 32 students, with some missing data. The three \code{x} variables are intended to load on a verbal factor, and the three \code{y} variables on a math factor. The data can be used to illustrate the estimation of a confirmatory factor analysis model by multinormal full-information maximum-likelihood in the presence of missing data. } \usage{Tests} \format{ A data frame with 32 observations on the following 6 variables. \describe{ \item{\code{x1}}{score on verbal test 1.} \item{\code{x2}}{score on verbal test 2.} \item{\code{x3}}{score on verbal test 3.} \item{\code{y1}}{score on math test 1.} \item{\code{y2}}{score on math test 2.} \item{\code{y3}}{score on math test 3.} } } \source{ Example 25.13 from \emph{SAS/STAT 9.22 User's Guide}, SAS Institute, 2010. } \keyword{datasets} sem/man/optimizers.Rd0000644000175000017500000000751212006744056014450 0ustar nileshnilesh\name{optimizers} \alias{optimizers} \alias{optimizerSem} \alias{optimizerNlm} \alias{optimizerOptim} \alias{optimizerNlminb} \alias{optimizerMsem} \alias{msemOptimizerNlm} \title{sem Optimizers} \description{ The default optimizer used by \code{\link{sem}} is \code{optimizerSem}, which employs compiled code and is integrated with the \code{\link{objectiveML}} and \code{\link{objectiveGLS}} objective functions; \code{optimizerSem}, written by Zhenghua Nie, is a modified version of the standard R \code{nlm} optimizer, which was written by Saikat DebRoy, R-core, and Richard H. Jones. The other functions call optimizers (\code{\link[stats]{nlm}}, \code{\link[stats]{optim}}, or \code{\link[stats]{nlminb}}), to fit structural equation models, and are called by the \code{\link{sem}} function. The user would not normally call these functions directly, but rather supply one of them in the \code{optimizer} argument to \code{sem}. Users may also write them own optimizer functions. \code{msemOptimizerNlm} is for fitting multigroup models, and also adapts the \code{nlm} code. } \usage{ optimizerSem(start, objective=objectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn, ...) optimizerMsem(start, objective=msemObjectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn=FALSE, ...) optimizerNlm(start, objective=objectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn, ...) optimizerOptim(start, objective=objectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn, method="CG", ...) optimizerNlminb(start, objective=objectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn, ...) msemOptimizerNlm(start, objective=msemObjectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn=FALSE, ...) } \arguments{ \item{start}{a vector of start values for the parameters.} \item{objective}{the objective function to be optimized; see \link{objective.functions}.} \item{gradient}{\code{TRUE} if an analytic gradient is to be used (if one is available).} \item{maxiter}{the maximum number of iterations allowed.} \item{debug}{\code{TRUE} to show the iteration history and other available information about the optimization.} \item{par.size}{\code{"startvalues"} to have the optimizer scale the problem according to the magitudes of the start values (ignored by \code{optimizerNlminb}).} \item{model.description}{a list with elements describing the structural-equation model (see the code for details).} \item{warn}{if \code{FALSE}, suppress warnings during the optimization.} \item{method}{the method to be employed by the \code{\link[stats]{optim}} optimizer; the default is \code{"CG"} (conjugate-gradient).} \item{\dots}{additional arguments for the \code{nlm}, \code{optim}, or \code{nlminb} optimizer.} } \value{ An object of class \code{"semResult"}, with elements: \item{convergence}{\code{TRUE} if the optimization apparently converged.} \item{iterations}{the number of iterations required.} \item{par}{the vector of parameter estimates.} \item{vcov}{the estimated covariance matrix of the parameter estimates, based on a numeric Hessian; not supplied by \code{optimizerNlminb}.} \item{criterion}{the optimized value of the objective function.} \item{C}{the model-implied covariance or moment matrix at the parameter estimates.} \item{A}{the estimated \eqn{A} matrix.} \item{P}{the estimated \eqn{P} matrix.} } \author{John Fox \email{jfox@mcmaster.ca}, and Zhenghua Nie, in part adapting work by Saikat DebRoy, R-core, and Richard H. Jones.} \seealso{\code{\link{sem}}, \code{\link{objective.functions}}, \code{\link[stats]{nlm}}, \code{\link[stats]{optim}}, \code{\link[stats]{nlminb}}} \keyword{models} sem/man/standardizedCoefficients.Rd0000644000175000017500000000760712213674643017252 0ustar nileshnilesh\name{standardizedCoefficients} \alias{standardizedCoefficients} \alias{standardizedCoefficients.sem} \alias{standardizedCoefficients.msem} \alias{stdCoef} \title{Standardized Coefficients for Structural Equation Models} \description{ These functions calculate standardized regression coefficients for structural equation models. The function \code{stdCoef} is simply an abbreviation for \code{standardizedCoefficients}. } \usage{ standardizedCoefficients(object, ...) \method{standardizedCoefficients}{sem}(object, digits = getOption("digits"), oneheaded = TRUE, twoheaded = TRUE, ...) \method{standardizedCoefficients}{msem}(object, ...) stdCoef(...) } \arguments{ \item{object}{an object of class \code{sem} or \code{msem} returned by the \code{\link{sem}} function.} \item{digits}{number of digits for printed output.} \item{oneheaded}{standardize path coefficients? Default is \code{TRUE}.} \item{twoheaded}{standardize variances and covariances? Default is \code{TRUE}.} \item{...}{arguments to pass down.} } \value{ Returns a data frame with the coefficients, labelled both by parameter names and by arrows in the path diagram for the model. The \code{msem} (multigroup) method computes and prints the standardized coefficients for each group; it does not return a useful result. } \references{ Bollen, K. A. (1989) \emph{Structural Equations With Latent Variables.} Wiley. } \author{John Fox \email{jfox@mcmaster.ca} and Adam Kramer} \seealso{\code{\link{sem}}} \examples{ # In the first example, readMoments() and specifyModel() read from the # input stream. This example cannot be executed via example() but can be entered # at the command prompt. The example is repeated using file input; # this example can be executed via example(). \dontrun{ # Duncan, Haller, and Portes peer-influences model R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp")) .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 model.dhp <- specifyModel() RParAsp -> RGenAsp, gam11, NA RIQ -> RGenAsp, gam12, NA RSES -> RGenAsp, gam13, NA FSES -> RGenAsp, gam14, NA RSES -> FGenAsp, gam23, NA FSES -> FGenAsp, gam24, NA FIQ -> FGenAsp, gam25, NA FParAsp -> FGenAsp, gam26, NA FGenAsp -> RGenAsp, beta12, NA RGenAsp -> FGenAsp, beta21, NA RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21, NA FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42, NA RGenAsp <-> RGenAsp, ps11, NA FGenAsp <-> FGenAsp, ps22, NA RGenAsp <-> FGenAsp, ps12, NA ROccAsp <-> ROccAsp, theta1, NA REdAsp <-> REdAsp, theta2, NA FOccAsp <-> FOccAsp, theta3, NA FEdAsp <-> FEdAsp, theta4, NA sem.dhp <- sem(model.dhp, R.DHP, 329, fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp')) standardizedCoefficients(sem.dhp) } # The following example can be executed via example(): etc <- system.file(package="sem", "etc") # path to data and model files (R.DHP <- readMoments(file=file.path(etc, "R-DHP.txt"), diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"))) (model.dhp <- specifyModel(file=file.path(etc, "model-DHP.txt"))) (sem.dhp <- sem(model.dhp, R.DHP, 329, fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp'))) standardizedCoefficients(sem.dhp) } \keyword{models} sem/man/HS.data.Rd0000644000175000017500000000434213660067757013500 0ustar nileshnilesh\name{HS.data} \alias{HS.data} \docType{data} \title{ Holizinger and Swineford's Data } \description{ This data set, for scores on a variety of tests, was originally in the MBESS package. A new version of the data set in that package doesn't appear to be identical to this one. } \usage{HS.data} \format{ A data frame with 301 observations on the following 32 variables. \describe{ \item{\code{id}}{a numeric vector} \item{\code{Gender}}{a factor with levels \code{Female} \code{Male}} \item{\code{grade}}{a numeric vector} \item{\code{agey}}{a numeric vector} \item{\code{agem}}{a numeric vector} \item{\code{school}}{a factor with levels \code{Grant-White} \code{Pasteur}} \item{\code{visual}}{a numeric vector} \item{\code{cubes}}{a numeric vector} \item{\code{paper}}{a numeric vector} \item{\code{flags}}{a numeric vector} \item{\code{general}}{a numeric vector} \item{\code{paragrap}}{a numeric vector} \item{\code{sentence}}{a numeric vector} \item{\code{wordc}}{a numeric vector} \item{\code{wordm}}{a numeric vector} \item{\code{addition}}{a numeric vector} \item{\code{code}}{a numeric vector} \item{\code{counting}}{a numeric vector} \item{\code{straight}}{a numeric vector} \item{\code{wordr}}{a numeric vector} \item{\code{numberr}}{a numeric vector} \item{\code{figurer}}{a numeric vector} \item{\code{object}}{a numeric vector} \item{\code{numberf}}{a numeric vector} \item{\code{figurew}}{a numeric vector} \item{\code{deduct}}{a numeric vector} \item{\code{numeric}}{a numeric vector} \item{\code{problemr}}{a numeric vector} \item{\code{series}}{a numeric vector} \item{\code{arithmet}}{a numeric vector} \item{\code{paperrev}}{a numeric vector} \item{\code{flagssub}}{a numeric vector} } } \source{ Originally from Holzinger and Swineford (1939). This copy is originally from version 4.6.0 of the MBESS package. } \references{ Holzinger, K. J. and Swineford, F. A. (1939). A study in factor analysis: The stability of a bi-factor solution. Supplementary Education Monographs, 48. University of Chicago. } \examples{ summary(HS.data) } \keyword{datasets} sem/man/Bollen.Rd0000644000175000017500000000277011701252753013456 0ustar nileshnilesh\name{Bollen} \alias{Bollen} \docType{data} \title{ Bollen's Data on Industrialization and Political Democracy } \description{ This data set includes four measures of democracy at two points in time, 1960 and 1965, and three measures of industrialization in 1960, for 75 developing countries. } \usage{Bollen} \format{ A data frame with 75 observations on the following 11 variables. \describe{ \item{\code{y1}}{freedom of the press, 1960} \item{\code{y2}}{freedom of political opposition, 1960} \item{\code{y3}}{fairness of elections, 1960} \item{\code{y4}}{effectivness of elected legislature, 1960} \item{\code{y5}}{freedom of the press, 1965} \item{\code{y6}}{freedom of political opposition, 1965} \item{\code{y7}}{fairness of elections, 1965} \item{\code{y8}}{effectivness of elected legislature, 1965} \item{\code{x1}}{GNP per capita, 1960} \item{\code{x2}}{energy consumption per capita, 1960} \item{\code{x3}}{percentage of labor force in industry, 1960} } } \details{ Variables \code{y1} through \code{y4} are intended to be indicators of the latent variable \emph{political democracy in 1960}; \code{y5} through \code{y8} indicators of \emph{political democracy in 1965}; and \code{x1} through \code{x3} indicators of \emph{industrialization in 1960}. } \source{ personal communication from Ken Bollen. } \references{ Bollen, K. A. (1989) \emph{Structural Equations With Latent Variables}. Wiley. } \keyword{datasets} sem/man/ram.Rd0000644000175000017500000000173112026676605013025 0ustar nileshnilesh\name{ram} \alias{ram} \title{RAM Matrix for a Structural-Equation Model} \description{ Print the labelled RAM definition matrix for a structural-equation model fit by \code{sem}. } \usage{ ram(object, digits=getOption("digits"), startvalues=FALSE) } \arguments{ \item{object}{an object of class \code{sem} returned by the \code{sem} function.} \item{digits}{number of digits for printed output.} \item{startvalues}{if \code{TRUE}, start values for parameters are printed; otherwise, the parameter estimates are printed; the default is \code{FALSE}.} } \value{ A data frame containing the labelled RAM definition matrix, which is normally just printed. } \author{John Fox \email{jfox@mcmaster.ca}} \seealso{\code{\link{sem}}} \examples{ # ------------- assumes that Duncan, Haller and Portes peer-influences model # ------------- has been fit and is in sem.dhp \dontrun{ ram(sem.dhp) } } \keyword{models} sem/man/CNES.Rd0000644000175000017500000000323611701252753012771 0ustar nileshnilesh\name{CNES} \alias{CNES} \docType{data} \title{Variables from the 1997 Canadian National Election Study} \description{ These variables are from the mailback questionnaire to the 1997 Canadian National Election Study, and are intended to tap attitude towards \dQuote{traditional values.} } \usage{CNES} \format{ A data frame with 1529 observations on the following 4 variables. \describe{ \item{\code{MBSA2}}{an ordered factor with levels \code{StronglyDisagree}, \code{Disagree}, \code{Agree}, and \code{StronglyAgree}, in response to the statement, \dQuote{We should be more tolerant of people who choose to live according to their own standards, even if they are very different from our own.}} \item{\code{MBSA7}}{an ordered factor with levels \code{StronglyDisagree}, \code{Disagree}, \code{Agree}, and \code{StronglyAgree}, in response to the statement, \dQuote{Newer lifestyles are contributing to the breakdown of our society.}} \item{\code{MBSA8}}{an ordered factor with levels \code{StronglyDisagree}, \code{Disagree}, \code{Agree}, and \code{StronglyAgree}, in response to the statement, \dQuote{The world is always changing and we should adapt our view of moral behaviour to these changes.}} \item{\code{MBSA9}}{an ordered factor with levels \code{StronglyDisagree}, \code{Disagree}, \code{Agree}, and \code{StronglyAgree}, in response to the statement, \dQuote{This country would have many fewer problems if there were more emphasis on traditional family values.}} } } \source{ York University Institute for Social Research. } \keyword{datasets} sem/man/effects.Rd0000644000175000017500000001001512127622145013650 0ustar nileshnilesh\name{effects.sem} \alias{effects.sem} \alias{effects.msem} \alias{print.semeffects} \alias{print.semeffectsList} \title{ Total, Direct, and Indirect Effects for Structural Equation Models } \description{ The \code{sem} method for the standard generic function \code{effects} computes total, direct, and indirect effects for a fitted structural equation model according to the method described in Fox (1980). } \usage{ \method{effects}{sem}(object, ...) \method{effects}{msem}(object, ...) \method{print}{semeffects}(x, digits = getOption("digits"), ...) \method{print}{semeffectsList}(x, digits = getOption("digits"), ...) } \arguments{ \item{object}{a fitted structural-equation model object produced by the \code{\link{sem}} function.} \item{x}{an object of class \code{semeffects} or \code{semeffectsList}, produced by \code{effects}.} \item{digits}{digits to print.} \item{\dots}{not used.} } \value{ \code{effect.sem} returns an object of class \code{semeffects} with \code{Total}, \code{Direct}, and \code{Indirect} elements. } \references{ Fox, J. (1980) Effect analysis in structural equation models: Extensions and simplified methods of computation. \emph{Sociological Methods and Research} \bold{9}, 3--28. } \author{John Fox \email{jfox@mcmaster.ca}} \seealso{\code{\link{sem}}} \examples{ \dontrun{ # These examples are from Fox (1980) # In the first pair of examples, readMoments() and specifyModel() read from the # input stream. These examples cannot be executed via example() but can be entered # at the command prompt. The Blau and Duncan example is repeated using file input; # this example can be executed via example(). # The recursive Blau and Duncan basic stratification model: # x1 is father's education, x2 father's SES, y3 respondent's education, # y4 SES of respondent's first job, y5 respondent's SES in 1962 R.bd <- readMoments(names=c("x1", "x2", "y3", "y4", "y5")) 1 .516 1 .453 .438 1 .332 .417 .538 1 .322 .405 .596 .541 1 mod.bd <- specifyModel() y3 <- x1, gam31 y3 <- x2, gam32 y4 <- x2, gam42 y4 <- y3, beta43 y5 <- x2, gam52 y5 <- y3, beta53 y5 <- y4, beta54 sem.bd <- sem(mod.bd, R.bd, N=20700, fixed.x=c("x1", "x2")) summary(sem.bd) effects(sem.bd) # The nonrecursive Duncan, Haller, and Portes peer-influences model for observed variables: R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp")) .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 model.dhp <- specifyModel() RIQ -> ROccAsp, gam51, NA RSES -> ROccAsp, gam52, NA FSES -> FOccAsp, gam63, NA FIQ -> FOccAsp, gam64, NA FOccAsp -> ROccAsp, beta56, NA ROccAsp -> FOccAsp, beta65, NA ROccAsp <-> ROccAsp, ps55, NA FOccAsp <-> FOccAsp, ps66, NA ROccAsp <-> FOccAsp, ps56, NA # Note: The following generates a warning because not all of the variables # in the correlation matrix are used sem.dhp <- sem(model.dhp, R.DHP, 329, fixed.x=c('RIQ', 'RSES', 'FSES', 'FIQ')) summary(sem.dhp) effects(sem.dhp) } ## the following example may be executed via example() etc <- system.file(package="sem", "etc") # path to data and model files # The recursive Blau and Duncan basic stratification model: # x1 is father's education, x2 father's SES, y3 respondent's education, # y4 SES of respondent's first job, y5 respondent's SES in 1962 (R.bd <- readMoments(file=file.path(etc, "R-Blau-Duncan.txt"), names=c("x1", "x2", "y3", "y4", "y5"))) (mod.bd <- specifyModel(file=file.path(etc, "model-Blau-Duncan.txt"))) sem.bd <- sem(mod.bd, R.bd, N=20700, fixed.x=c("x1", "x2")) summary(sem.bd) effects(sem.bd) } \keyword{models} sem/man/residuals.Rd0000644000175000017500000001131212127622145014225 0ustar nileshnilesh\name{residuals.sem} \alias{residuals.sem} \alias{residuals.msem} \alias{standardizedResiduals} \alias{standardizedResiduals.sem} \alias{standardizedResiduals.msem} \alias{normalizedResiduals} \alias{normalizedResiduals.objectiveML} \alias{normalizedResiduals.objectiveGLS} \alias{normalizedResiduals.msemObjectiveML} \title{Residual Covariances for a Structural Equation Model} \description{ These functions compute residual covariances, variance-standardized residual covariances, and normalized residual covariances for the observed variables in a structural-equation model fit by \code{sem}. } \usage{ \method{residuals}{sem}(object, ...) \method{residuals}{msem}(object, ...) \method{standardizedResiduals}{sem}(object, ...) \method{standardizedResiduals}{msem}(object, ...) \method{normalizedResiduals}{objectiveML}(object, ...) \method{normalizedResiduals}{objectiveGLS}(object, ...) \method{normalizedResiduals}{msemObjectiveML}(object, ...) } \arguments{ \item{object}{an object inheriting from class \code{sem} or \code{msem} returned by the \code{\link{sem}} function.} \item{...}{not for the user.} } \details{ Residuals are defined as \eqn{S - C}, where \eqn{S} is the sample covariance matrix of the observed variables and \eqn{C} is the model-reproduced covariance matrix. The \emph{standardized} residual covariance for a pair of variables divides the residual covariance by the product of the sample standard deviations of the two variables, \eqn{(s_{ij} - c_{ij})/(s_{ii}s_{jj})^{1/2}}. The \emph{normalized} residual is given by \deqn{\frac{s_{ij}-c_{ij}} {[(c_{ii}c_{jj}-c_{ij}^2)/N^{*}]^{1/2}}}{% (s[ij] - c[ij])/[(c[ii]c[ii] + c[ij]^2)/N*]^[1/2]} where \eqn{N^{*}} is the number of observations minus one if the model is fit to a covariance matrix, or the number of observations if it is fit to a raw moment matrix. } \value{ Each function returns a matrix of residuals. } \references{ Bollen, K. A. (1989) \emph{Structural Equations With Latent Variables.} Wiley. } \author{John Fox \email{jfox@mcmaster.ca}} \seealso{\code{\link{sem}}} \examples{ # In the first example, readMoments() and specifyModel() read from the # input stream. This example cannot be executed via example() but can be entered # at the command prompt. The example is repeated using file input; # this example can be executed via example(). \dontrun{ # Duncan, Haller, and Portes peer-influences model R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp")) .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 model.dhp <- specifyModel() RParAsp -> RGenAsp, gam11, NA RIQ -> RGenAsp, gam12, NA RSES -> RGenAsp, gam13, NA FSES -> RGenAsp, gam14, NA RSES -> FGenAsp, gam23, NA FSES -> FGenAsp, gam24, NA FIQ -> FGenAsp, gam25, NA FParAsp -> FGenAsp, gam26, NA FGenAsp -> RGenAsp, beta12, NA RGenAsp -> FGenAsp, beta21, NA RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21, NA FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42, NA RGenAsp <-> RGenAsp, ps11, NA FGenAsp <-> FGenAsp, ps22, NA RGenAsp <-> FGenAsp, ps12, NA ROccAsp <-> ROccAsp, theta1, NA REdAsp <-> REdAsp, theta2, NA FOccAsp <-> FOccAsp, theta3, NA FEdAsp <-> FEdAsp, theta4, NA sem.dhp <- sem(model.dhp, R.DHP, 329, fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp')) residuals(sem.dhp) normalizedResiduals(sem.dhp) standardizedResiduals(sem.dhp) # same as residuals because model is fit to correlations } # The following example can be executed via example(): etc <- system.file(package="sem", "etc") # path to data and model files (R.DHP <- readMoments(file=file.path(etc, "R-DHP.txt"), diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"))) (model.dhp <- specifyModel(file=file.path(etc, "model-DHP.txt"))) (sem.dhp <- sem(model.dhp, R.DHP, 329, fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp'))) residuals(sem.dhp) normalizedResiduals(sem.dhp) standardizedResiduals(sem.dhp) # same as residuals because model is fit to correlations } \keyword{models} sem/man/readMoments.Rd0000644000175000017500000000544712423534616014530 0ustar nileshnilesh\name{readMoments} \alias{readMoments} \title{Input a Covariance, Correlation, or Raw Moment Matrix} \description{ This functions makes it simpler to input covariance, correlation, and raw-moment matrices to be analyzed by the \code{\link{sem}} function. The matrix is input in lower-triangular form on as many lines as is convenient, omitting the above-diagonal elements. The elements on the diagonal may also optionally be omitted, in which case they are taken to be 1. } \usage{ readMoments(file="", text, diag=TRUE, names=as.character(paste("X", 1:n, sep = ""))) } \arguments{ \item{file}{The (quoted) file from which to read the moment matrix, including the path to the file if it is not in the current directory. If \code{""} (the default) and the \code{text} argument is absent, then the moment matrix is read from the standard input stream, and is terminated by a blank line.} \item{text}{The moment matrix given as a character string, as an alternative to specifying the \code{file} argument or reading the moments from the input stream --- e.g., when the session is not interactive and there is no standard input.} \item{diag}{If \code{TRUE} (the default), then the input matrix includes diagonal elements.} \item{names}{a character vector containing the names of the variables, to label the rows and columns of the moment matrix.} } \value{ Returns a lower-triangular matrix (i.e., with zeroes above the main diagonal) suitable for input to \code{sem}. } \author{John Fox \email{jfox@mcmaster.ca}} \seealso{\code{\link{sem}}} \examples{ R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"), text=" .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 ") R.DHP #the following will work only in an interactive sessions: \dontrun{ R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp")) .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 R.DHP } } \keyword{manip} sem/man/tsls.Rd0000644000175000017500000001054012213674643013227 0ustar nileshnilesh\name{tsls} \alias{tsls} \alias{tsls.formula} \alias{tsls.default} \alias{fitted.tsls} \alias{residuals.tsls} \alias{coef.tsls} \alias{vcov.tsls} \alias{anova.tsls} \alias{print.tsls} \alias{summary.tsls} \alias{print.summary.tsls} \title{Two-Stage Least Squares} \description{ Fits a regression equation, such as an equation in a structural-equation model, by two-stage least squares. This is equivalent to direct instrumental-variables estimation when the number of instruments is equal to the number of predictors. } \usage{ \method{tsls}{formula}(formula, instruments, data, subset, weights, na.action, contrasts=NULL, ...) \method{tsls}{default}(y, X, Z, w, names=NULL, ...) \method{print}{tsls}(x, ...) \method{summary}{tsls}(object, digits=getOption("digits"), ...) \method{print}{summary.tsls}(x, ...) \method{anova}{tsls}(object, model.2, s2, dfe, ...) \method{fitted}{tsls}(object, ...) \method{residuals}{tsls}(object, ...) \method{coef}{tsls}(object, ...) \method{vcov}{tsls}(object, ...) } \arguments{ \item{formula}{model formula for structural equation to be estimated; a regression constant is implied if not explicitly omitted.} \item{instruments}{one-sided model formula specifying instrumental variables.} \item{data}{an optional data frame containing the variables in the model. By default the variables are taken from the environment from which \code{tsls} is called.} \item{subset}{an optional vector specifying a subset of observations to be used in fitting the model.} \item{weights, w}{an optional vector of weights to be used in the fitting process; if specified should be a non-negative numeric vector with one entry for each observation, to be used to compute weighted 2SLS estimates.} \item{na.action}{a function that indicates what should happen when the data contain \code{NA}s. The default is set by the \code{na.action} option.} \item{contrasts}{an optional list. See the \code{contrasts.arg} argument of \code{\link[stats:model.matrix]{model.matrix.default}}.} \item{y}{Response-variable vector.} \item{X}{Matrix of predictors, including a constant (if one is in the model).} \item{Z}{Matrix of instrumental variables, including a constant (if one is in the model).} \item{names}{optional character vector of names for the columns of the \code{X} matrix.} \item{x, object, model.2}{objects of class \code{tsls} returned by \code{tsls.formula} (or of class \code{summary.tsls}), for \code{anova} containing nested models to be compared by an incremental \eqn{F}-test. One model should be nested in the other; the order of models is immaterial.} \item{s2}{an optional estimate of error variance for the denominator of the \eqn{F}-test. If missing, the error-variance estimate is taken from the larger model.} \item{dfe}{optional error degrees of freedom, to be specified when an estimate of error variance is given.} \item{digits}{number of digits for summary output.} \item{...}{arguments to be passed down.} } \value{ \code{tsls.formula} returns an object of class \code{tsls}, with the following components: \item{n}{number of observations.} \item{p}{number of parameters.} \item{coefficients}{parameter estimates.} \item{V}{estimated covariance matrix of coefficients.} \item{s}{residual standard error.} \item{residuals}{vector of residuals.} \item{response}{vector of response values.} \item{X}{model matrix.} \item{Z}{instrumental-variables matrix.} \item{response.name}{name of response variable, or expression evaluating to response.} \item{formula}{model formula.} \item{instruments}{one-sided formula for instrumental variables.} } \references{ Fox, J. (1979) Simultaneous equation models and two-stage least-squares. In Schuessler, K. F. (ed.) \emph{Sociological Methodology 1979}, Jossey-Bass. Greene, W. H. (1993) \emph{Econometric Analysis, Second Edition}, Macmillan. } \author{John Fox \email{jfox@mcmaster.ca}} \seealso{\code{\link{sem}}} \examples{ summary(tsls(Q ~ P + D, ~ D + F + A, data=Kmenta)) # demand equation summary(tsls(Q ~ P + F + A, ~ D + F + A, data=Kmenta)) # supply equation anova(tsls(Q ~ P + F + A, ~ D + F + A, data=Kmenta), tsls(Q ~ 1, ~ D + F + A, data=Kmenta)) } \keyword{models} sem/man/miSem.Rd0000644000175000017500000001117114126410440013301 0ustar nileshnilesh\name{miSem} \alias{miSem} \alias{miSem.semmod} \alias{miSem.semmodList} \alias{print.miSem} \alias{summary.miSem} \title{ Estimate a Structural Equation Model By Multiple Imputation } \description{ \code{miSem} uses the \code{\link{mi}} function in the \pkg{mi} package to generate multiple imputations of missing data, fitting the specified model to each completed data set. } \usage{ miSem(model, ...) \method{miSem}{semmod}(model, ..., data, formula = ~., raw=FALSE, fixed.x=NULL, objective=objectiveML, n.imp=5, n.chains=n.imp, n.iter=30, seed=sample(1e6, 1), mi.args=list(), show.progress=TRUE) \method{miSem}{semmodList}(model, ..., data, formula = ~., group, raw=FALSE, fixed.x=NULL, objective=msemObjectiveML, n.imp=5, n.chains=n.imp, n.iter=30, seed=sample(1e6, 1), mi.args=list(), show.progress=TRUE) \method{print}{miSem}(x, ...) \method{summary}{miSem}(object, digits=max(3, getOption("digits") - 2), ...) } \arguments{ \item{model}{ an SEM model-description object of class \code{semmod} or \code{semmodList}, created by \code{\link{specifyEquations}} \code{\link{cfa}}, or \code{\link{specifyModel}}, in the case of a multi-group model in combination with \code{\link{multigroupModel}}. } \item{\dots, formula, raw, fixed.x, objective, group}{ arguments to be passed to \code{\link{sem}}. } \item{data}{ an R data frame, presumably with some missing data (encoded as \code{NA}), containing the data for fitting the SEM, possibly along with other variables to use to obtain multiple imputations of missing values. In the case of a multi-group model, this must be a \emph{single} data frame. } \item{n.imp}{ number of imputations (default \code{5}). } \item{n.chains}{ number of Markov chains (default is the number of imputations). } \item{n.iter}{ number of iterations for the multiple-imputation process (default \code{30}). } \item{seed}{ seed for the random-number generator (default is an integer sampled from 1 to 1E6); stored in the resulting object. } \item{mi.args}{ other arguments to be passed to \code{\link{mi}}. } \item{show.progress}{ show a text progress bar on the console tracking model fitting to the multiple imputations; this is distinct from the progress of the multiple-imputation process, which is controlled by the \code{verbose} argument to \code{\link{mi}} (and which, although it defaults to \code{TRUE}, \emph{fails} to produce verbose output on Windows system, as of \pkg{mi} version 1.0). } \item{x, object}{ an object of class \code{"miSem"}. } \item{digits}{ for printing numbers. } } \value{ \code{miSem} returns an object of class \code{"miSem"} with the following components: \item{initial.fit}{an \code{sem} model object produced using \code{\link{objectiveFIML}} if \code{raw=TRUE}, or the objective function given by the \code{objective} argument otherwise.} \item{mi.fits}{a list of \code{sem} model objects, one for each imputed data set.} \item{imputation}{the object produced by \code{\link{complete}}, containing the completed imputed data sets.} \item{seed}{the seed used for the random number generator.} \item{mi.data}{the object returned by \code{\link{mi}}, containing the multiple imputations, and useful, e.g., for diagnostic checking of the imputation process.} } \references{ Yu-Sung Su, Andrew Gelman, Jennifer Hill, Masanao Yajima. (2011). ``Multiple imputation with diagnostics (mi) in R: Opening windows into the black box.'' \emph{Journal of Statistical Software} 45(2). } \author{ John Fox \email{jfox@mcmaster.ca} } \seealso{ \code{\link{sem}}, \code{\link{mi}} } \examples{ \dontrun{ # because of long execution time mod.cfa.tests <- cfa(raw=TRUE, text=" verbal: x1, x2, x3 math: y1, y2, y3 ") imps <- miSem(mod.cfa.tests, data=Tests, fixed.x="Intercept", raw=TRUE, seed=12345) summary(imps, digits=3) # introduce some missing data to the HS.data data set: HS <- HS.data[, c(2,7:10,11:15,20:25,26:30)] set.seed(12345) r <- sample(301, 100, replace=TRUE) c <- sample(2:21, 100, replace=TRUE) for (i in 1:100) HS[r[i], c[i]] <- NA mod.hs <- cfa(text=" spatial: visual, cubes, paper, flags verbal: general, paragrap, sentence, wordc, wordm memory: wordr, numberr, figurer, object, numberf, figurew math: deduct, numeric, problemr, series, arithmet ") mod.mg <- multigroupModel(mod.hs, groups=c("Female", "Male")) system.time( # relatively time-consuming! imps.mg <- miSem(mod.mg, data=HS, group="Gender", seed=12345) ) summary(imps.mg, digits=3) } } \keyword{models} sem/man/pathDiagram.Rd0000644000175000017500000003503013660067757014475 0ustar nileshnilesh\name{pathDiagram} \alias{pathDiagram} \alias{pathDiagram.sem} \alias{pathDiagram.semmod} \alias{math} \title{Draw Path Diagram} \description{ \code{pathDiagram} creates a description of the path diagram for a structural-equation-model or SEM-specification object to be processed by the graph-drawing program \emph{dot}. } \usage{ pathDiagram(model, ...) \method{pathDiagram}{sem}(model, file = "pathDiagram", style = c("ram", "traditional"), output.type = c("html", "graphics", "dot"), graphics.fmt = "pdf", dot.options = NULL, size = c(8, 8), node.font = c("Helvetica", 14), edge.font = c("Helvetica", 10), digits = 2, rank.direction = c("LR", "TB"), min.rank = NULL, max.rank = NULL, same.rank = NULL, variables = model$var.names, var.labels, parameters, par.labels, ignore.double = TRUE, ignore.self = FALSE, error.nodes = TRUE, edge.labels = c("names", "values", "both"), edge.colors = c("black", "black"), edge.weight = c("fixed", "proportional"), node.colors = c("transparent", "transparent", "transparent"), standardize = FALSE, ...) \method{pathDiagram}{semmod}(model, obs.variables, ...) math(text, html.only=FALSE, hat=FALSE) } \arguments{ \item{model}{a structural-equation-model or SEM-specification object produced by \code{sem}, or, respectively, \code{specifyEquations}, \code{specifyModel}, or \code{cfa}.} \item{\dots}{arguments passed down, e.g., from the \code{semmod} method to the \code{sem} method.} \item{file}{a file name, by default \code{"pathDiagram"}, given \emph{without} an extension, to which to write the \emph{dot} description of the path diagram if \code{output.type} \code{"graphics"} or \code{"dot"} is selected, and for the graphics output file (with appropriate extension) if \code{"graphics"} output is selected, in which case a "\code{.dot}" file and a graphics file of type specified by the \code{graphics.fmt} argument (below); \code{file} may include a path specification.} \item{style}{\code{"ram"} (the default) for a RAM path diagram including self-directed double-headed arrows representing variances, including error variances; or \code{"traditional"} for a path diagram including nodes representing error variables.} \item{output.type}{if \code{"html"} (the default), the path diagram will open in the user"s default web browser; if \code{"dot"}, a file containing \emph{dot} commands will be written; if \code{"graphics"}, both \code{.dot} and graphics files will be written.} \item{graphics.fmt}{a graphics format recognized by the \emph{dot} program; the default is \code{"pdf"}; \code{graphics.fmt} is also used for the extension of the graphics file that is created.} \item{dot.options}{options to be passed to the \emph{dot} program, given as a character string.} \item{size}{the size of the graph, in inches.} \item{node.font}{font name and point-size for printing variable names.} \item{edge.font}{font name and point-size for printing arrow names or values.} \item{digits}{number of digits after the decimal point (default, 2) to which to round parameter estimates.} \item{rank.direction}{draw graph left-to-right, \code{"LR"}, the default, or top-to-bottom, \code{"TB"}.} \item{min.rank}{a character string listing names of variables to be assigned minimum rank (order) in the graph; the names should be separated by commas.} \item{max.rank}{a character string listing names of variables to be assigned maximum rank in the graph; the names should be separated by commas.} \item{same.rank}{a character string or vector of character strings of variables to be assigned equivalent rank in the graph; names in each string should be separated by commas.} \item{variables}{variable names; defaults to the variable names in \code{model}. If specified, the variable names should be in the same order as in \code{model}.} \item{var.labels}{a character vector with labels to be used in lieu of (some of) the variables names, for greater flexibility in labelling nodes in the graph --- e.g., the labels can be created with the \code{math} function. The elements of the vector must have names corresponding to variables in the model.} \item{parameters}{parameter names; defaults to the parameter names in \code{model}. If specified, the parameter names should be in the same order as in \code{model}.} \item{par.labels}{a character vector with labels to be used in lieu of (some of) the parameter names, for greater flexibility in labelling edges in the graph --- e.g., the labels can be created with the \code{math} function. The elements of the vector must have names corresponding to parameters in the model.} \item{ignore.double}{if \code{TRUE}, the default, double-headed arrows, representing variances and covariances, are not graphed.} \item{ignore.self}{if \code{TRUE} (the default is \code{FALSE}), and \code{ignore.double=FALSE}, self-directed double-headed arrows representing error variances are suppressed; note that if \code{ignore.double=TRUE}, \emph{all} double-headed arrows, including self-directed arrows, are suppressed.} \item{error.nodes}{if \code{TRUE} (the default) and \code{style="traditional"}, show the nodes representing error variables.} \item{edge.labels}{\code{"names"} to label arrows with parameter names; \code{"values"} to label arrows with parameter estimates, or \code{"both"}.} \item{edge.colors}{two-element character vector giving colors of positive and negative arrows respectively; the default is \code{c("black", "black")}.} \item{edge.weight}{if \code{"proportional"} (the default is \code{"fixed"}), the thickness of edges is proportional to the absolute size of the corresponding parameter estimate; this is generally sensible only if \code{standardize=TRUE}.} \item{node.colors}{a two- or three-element character vector giving colors of nodes representing exogenous, endogenous, and error variables (for traditional path diagrams) consecutively; the default is \code{"transparent"} for all three; if a two colors are given, error variables are colored as exogenous (the first color.} \item{standardize}{if \code{TRUE}, display standardized coefficients; default is \code{FALSE}.} \item{obs.variables}{a character vector with the names of the observed variables in the model.} \item{text}{a character string or vector of character strings to be translated into node or edge label symbols. If a vector of character strings is supplied, then the elements of the vector should be named with the corresponding variable (node) or parameter (edge) name.} \item{html.only}{If \code{TRUE} (the default is \code{FALSE}), the character strings in \code{text} are to be treated as an HTML character codes, in which case the prefix \code{"#"} and suffix \code{";"} are appended to each. Otherwise, \code{text} should only contain the names of lowercase or uppercase Greek letters, such as \code{"alpha"} or \code{"Alpha"}. The full set of Greek letters recognized is given in the file \code{Greek.txt} in the package's \code{etc} subdirectory -- or type \code{sem:::Greek} at the R command prompt. In either case, the symbols may be followed by numeric subscripts in curly braces consisting of numerals (e.g., \code{"beta_{12}"}), and/or numeric superscripts (e.g., \code{"sigma^{2}"}, \code{"sigma_{1}^{2}"}). Depending upon your OS, subscripts and superscripts may only work properly with HTML output from \code{pathDiagram}, not with graphics output produced by \emph{dot}.} \item{hat}{If \code{TRUE} (the default is \code{FALSE}), a hat (circumflex) is placed over the symbols in \code{text}; this feature doesn't produce a visually appealing result.} } \details{ \code{pathDiagram} creates a description of the path diagram for a structural-equation-model or SEM-specification object to be processed by the graph-drawing program \emph{dot}, which can be called automatically; see Koutsofios and North (2002) and \url{http://www.graphviz.org/}. To obtain graphics output directly, the \emph{dot} program must be on the system search path. Alternatively, \emph{HTML} output can be created in a web browser without an independent installation of \code{dot} using facilities in the \pkg{DiagrammeR} package. The \code{math} function can be used to create node (variable) and edge (arrow) labels with symbols such as Greek letters, subscripts, and superscripts. The \code{semmod} method of \code{pathDiagram} sets up a call to the \code{sem} method. The various arguments to \code{pathDiagram} can be used to customize the diagram, but if there are too many constraints on node placement, \emph{dot} may fail to produce a graph or may produce a distorted graph. \code{pathDiagram} can create both RAM-style diagrams, in which variances are represented as self-directed arrows, and traditional path diagrams, in which error variables appear explicitly as nodes. As is conventional, latent variables (including error variables) are represented as ellipses and observed variables as rectangles; double-headed arrows represent covariances (and in RAM diagrams, variances) and single-headed arrows represent structural coefficients. } \value{ \code{pathDiagram} invisibly returns a character vector containing \emph{dot} commands. \code{math} returns a character vector containing suitable \code{HTML} markup. } \references{ Koutsofios, E., and North, S. C. (2002) Drawing graphs with \emph{dot}. \url{https://graphviz.org/documentation/}. } \author{John Fox \email{jfox@mcmaster.ca}, Adam Kramer, and Michael Friendly} \seealso{\code{\link{sem}}, \code{\link{specifyEquations}}, \code{\link{specifyModel}}, \code{\link{cfa}}} \examples{ if (interactive()) { # The Duncan, Haller, and Portes Peer-Influences Model R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"), text=" .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 ") model.dhp <- specifyModel(text=" RParAsp -> RGenAsp, gam11, NA RIQ -> RGenAsp, gam12, NA RSES -> RGenAsp, gam13, NA FSES -> RGenAsp, gam14, NA RSES -> FGenAsp, gam23, NA FSES -> FGenAsp, gam24, NA FIQ -> FGenAsp, gam25, NA FParAsp -> FGenAsp, gam26, NA FGenAsp -> RGenAsp, beta12, NA RGenAsp -> FGenAsp, beta21, NA RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21, NA FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42, NA RGenAsp <-> RGenAsp, ps11, NA FGenAsp <-> FGenAsp, ps22, NA RGenAsp <-> FGenAsp, ps12, NA ROccAsp <-> ROccAsp, theta1, NA REdAsp <-> REdAsp, theta2, NA FOccAsp <-> FOccAsp, theta3, NA FEdAsp <-> FEdAsp, theta4, NA ") sem.dhp <- sem(model.dhp, R.DHP, 329, fixed.x=c("RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp")) pathDiagram(sem.dhp, min.rank="RIQ, RSES, RParAsp, FParAsp, FSES, FIQ", max.rank="ROccAsp, REdAsp, FEdAsp, FOccAsp", same.rank="RGenAsp, FGenAsp", edge.labels="values") pathDiagram(model.dhp, obs.variables=c("RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp", "ROccAsp", "REdAsp", "FOccAsp", "FEdAsp"), style="traditional", node.colors=c("pink", "lightblue", "lightgreen"), min.rank="RIQ, RSES, RParAsp, FParAsp, FSES, FIQ", max.rank="ROccAsp, REdAsp, FEdAsp, FOccAsp", same.rank="RGenAsp, FGenAsp", var.labels=c(RParAsp="Respondent Parental Aspiration", RIQ="Respondent IQ", RSES="Respondent SES", FSES="Friend SES", FIQ="Friend IQ", FParAsp="Friend Parental Aspiration", ROccAsp="Respondent Occupational Aspiration", REdAsp="Respondent Educational Aspiration", RGenAsp="Respondent General Aspiration", FOccAsp="Friend Occupational Aspiration", FEdAsp="Friend Educational Aspiration", FGenAsp="Friend General Aspiration", math(c(RGenAsp.error="xi_{1}", FGenAsp.error="xi_{2}", ROccAsp.error="epsilon_{1}", REdAsp.error="epsilon_{2}", FOccAsp.error="epsilon_{3}", FEdAsp.error="epsilon_{4}"))), par.labels=math(c(gam11="gamma_{11}", gam12="gamma_{12}", gam13="gamma_{13}", gam14="gamma_{14}", gam23="gamma_{23}", gam24="gamma_{24}", gam25="gamma_{25}", gam26="gamma_{26}", beta12="beta_{12}", beta21="beta_{21}", lam21="lambda_{21}", lam42="lambda_{42}", ps11="psi_{11}", ps22="psi_{22}", ps12="psi_{12}", theta1="theta_{1}", theta2="theta_{2}", theta3="theta_{3}", theta4="theta_{4}"))) # the following example contributed by Michael Friendly: union <- readMoments(diag=TRUE, names=c('y1', 'y2', 'y3', 'x1', 'x2'), text=" 14.610 -5.250 11.017 -8.057 11.087 31.971 -0.482 0.677 1.559 1.021 -18.857 17.861 28.250 7.139 215.662 ") union.mod <- specifyEquations(covs=c("x1, x2"), text=" y1 = gam12*x2 y2 = beta21*y1 + gam22*x2 y3 = beta31*y1 + beta32*y2 + gam31*x1 ") union.sem <- sem(union.mod, union, N=173) dot <- pathDiagram(union.sem, style="traditional", ignore.double=FALSE, error.nodes=FALSE, edge.labels="values", min.rank=c("Years", "Age"), max.rank=c("Sentiment", "Sentiment.error"), same.rank=c("Deference, Deference.error", "Activism, Activism.error"), variables=c("Deference", "Activism", "Sentiment", "Years", "Age"), edge.colors=c("black", "red"), node.colors = c("pink", "lightblue")) cat(paste(dot, collapse="\n")) # dot commands } } \keyword{dplot} \keyword{models} sem/man/Kmenta.Rd0000644000175000017500000000172511701252753013461 0ustar nileshnilesh\name{Kmenta} \alias{Kmenta} \docType{data} \title{Partly Artificial Data on the U. S. Economy} \usage{Kmenta} \description{ These are partly contrived data from Kmenta (1986), constructed to illustrate estimation of a simultaneous-equation model. The \code{Kmenta} data frame has 20 rows and 5 columns. } \format{ This data frame contains the following columns: \describe{ \item{Q}{ food consumption per capita. } \item{P}{ ratio of food prices to general consumer prices. } \item{D}{ disposable income in constant dollars. } \item{F}{ ratio of preceding year's prices received by farmers to general consumer prices. } \item{A}{ time in years. } } } \details{ The exogenous variables \code{D}, \code{F}, and \code{A} are based on real data; the endogenous variables \code{P} and \code{Q} were generated by simulation. } \source{ Kmenta, J. (1986) \emph{Elements of Econometrics, Second Edition}, Macmillan. } \keyword{datasets} sem/man/sem.Rd0000644000175000017500000015155313660067757013051 0ustar nileshnilesh\name{sem} \alias{sem} \alias{sem.semmod} \alias{sem.default} \alias{sem.semmodList} \alias{sem.msemmod} \alias{startvalues} \alias{startvalues2} \alias{coef.sem} \alias{vcov.sem} \alias{df.residual.sem} \alias{coef.msem} \alias{vcov.msem} \alias{df.residual.msem} \title{General Structural Equation Models} \description{ \code{sem} fits general structural equation models (with both observed and unobserved variables). Observed variables are also called \emph{indicators} or \emph{manifest variables}; unobserved variables are also called \emph{factors} or \emph{latent variables}. Normally, the generic function (\code{sem}) is called directly with a \code{semmod} first argument produced by \code{\link{specifyModel}}, \code{\link{specifyEquations}}, or \code{\link{cfa}}, invoking the \code{sem.semmod} method, which in turn sets up a call to the \code{sem.default} method; thus, the user may wish to specify arguments accepted by the \code{semmod} and \code{default} methods. Similarly, for a multigroup model, \code{sem} would normally be called with a \code{semmodList} object produced by \code{\link{multigroupModel}} as its first argument, and would then generate a call to the code \code{msemmod} method. } \usage{ \method{sem}{semmod}(model, S, N, data, raw=identical(na.action, na.pass), obs.variables=rownames(S), fixed.x=NULL, formula= ~ ., na.action=na.omit, robust=!missing(data), debug=FALSE, optimizer=optimizerSem, objective=objectiveML, ...) \method{sem}{default}(model, S, N, raw=FALSE, data=NULL, start.fn=startvalues, pattern.number=NULL, valid.data.patterns=NULL, use.means=TRUE, param.names, var.names, fixed.x=NULL, robust=!is.null(data), semmod=NULL, debug=FALSE, analytic.gradient=!identical(objective, objectiveFIML), warn=FALSE, maxiter=1000, par.size=c("ones", "startvalues"), start.tol=1E-6, optimizer=optimizerSem, objective=objectiveML, cls, ...) \method{sem}{semmodList}(model, S, N, data, raw=FALSE, fixed.x=NULL, robust=!missing(data), formula, group="Group", debug=FALSE, ...) \method{sem}{msemmod}(model, S, N, start.fn=startvalues, group="Group", groups=names(model), raw=FALSE, fixed.x, param.names, var.names, debug=FALSE, analytic.gradient=TRUE, warn=FALSE, maxiter=5000, par.size = c("ones", "startvalues"), start.tol = 1e-06, start=c("initial.fit", "startvalues"), initial.maxiter=1000, optimizer = optimizerMsem, objective = msemObjectiveML, ...) startvalues(S, ram, debug=FALSE, tol=1E-6) startvalues2(S, ram, debug=FALSE, tol=1E-6) \method{coef}{sem}(object, standardized=FALSE, ...) \method{coef}{msem}(object, ...) \method{vcov}{sem}(object, robust=FALSE, analytic=inherits(object, "objectiveML") && object$t <= 500, ...) \method{vcov}{msem}(object, robust=FALSE, analytic=inherits(object, "msemObjectiveML") && object$t <= 500, ...) \method{df.residual}{sem}(object, ...) \method{df.residual}{msem}(object, ...) } \arguments{ \item{model}{RAM specification, which is a simple encoding of the path diagram for the model. The model may be given either in symbolic form (as a \code{semmod} object, as returned by the \code{\link{specifyModel}}, \code{\link{specifyEquations}}, or \code{\link{cfa}} function, or as a character matrix), invoking \code{sem.semmod}, which calls \code{sem.default} after setting up the model, or (less conveniently) in numeric form, invoking \code{sem.default} directly, which is not recommended (see \bold{Details} below). The \code{model} argument may also be a multigroup-model specification, as produced by \code{\link{multigroupModel}}.} \item{S}{covariance matrix among observed variables; may be input as a symmetric matrix, or as a lower- or upper-triangular matrix. \code{S} may also be a raw (i.e., \dQuote{uncorrected}) moment matrix --- that is, a sum-of-squares-and-products matrix divided by \code{N}. This form of input is useful for fitting models with intercepts, in which case the moment matrix should include the mean square and cross-products for a unit variable all of whose entries are 1; of course, the raw mean square for the unit variable is 1. Raw-moment matrices may be computed by \code{\link{rawMoments}}. If the \code{ram} argument is given in symbolic form, then the observed-variable covariance or raw-moment matrix may contain variables that do not appear in the model, in which case a warning is printed. \code{S} may also be a list of covariance or moment matrices for each group in a multigroup model. As an alternative to specifying \code{S} the user may supply a data frame containing the data for the model (see the argument \code{data}).} \item{N}{number of observations on which the covariance matrix is based; for a multigroup model, a vector of group \eqn{N}s.} \item{data}{As a generally preferable alternative to specifying \code{S} and \code{N}, the user may supply a data frame containing the data to which the model is to be fit. In a multigroup model, the \code{data} argument may be a list of data frames or a single data frame; in the later event, the factor given as the \code{group} argument is used to split the data into groups.} \item{start.fn}{a function to compute startvalues for the free parameters of the model; two functions are supplied, \code{startvalues} and a older version, \code{startvalues2}, the first of which is the default.} \item{na.action}{a function to process missing data, if raw data are supplied in the \code{data} argument. The default is \code{na.omit}, which returns only complete cases; specify \code{na.action=na.pass} to get FIML estimates in the presence of missing data from the \code{objectiveFIML} and \code{objectiveFIML2} objective functions.} \item{raw}{\code{TRUE} if \code{S} is a raw moment matrix or if a raw moment matrix --- as opposed to a covariance matrix --- is to be computed from \code{data}; the default is \code{FALSE} unless the \code{na.action} argument is set to \code{na.pass}.} \item{pattern.number, valid.data.patterns}{these arguments pass information about valid (i.e., non-missing) data patterns and normally would not be specified directly by the user.} \item{use.means}{When raw data are supplied and intercepts are included in the model, use the observed-variable means as start values for the intercepts; the default is \code{TRUE}.} \item{obs.variables}{names of observed variables, by default taken from the row names of the covariance or moment matrix \code{S}, which may be given directly or generated according to the \code{data} and \code{formula} arguments.} \item{fixed.x}{names (if the \code{ram} matrix is given in symbolic form) or indices (if it is in numeric form) of fixed exogenous variables. Specifying these obviates the necessity of having to fix the variances and covariances among these variables (and produces correct degrees of freedom for the model chisquare).} \item{formula}{a one-sided formula, to be applied to \code{data} to generate the variables for which covariances or raw moments are computed. The default formula is \code{~.}, i.e., all of the variables in the data, including an implied intercept; if a covariance matrix is to be computed, the constant is suppressed. In a multigroup model, alternatively a list one one-sided formulas as be given, to be applied individually to the groups.} \item{robust}{In \code{sem}: if \code{TRUE}, then quantities are calculated that can be used to compute robust estimates of coefficient standard errors and robust tests when the model is fit by multinormal maximum likelihood; the default is \code{TRUE} when the \code{data} argument is \code{TRUE}, and this option is only available when the \code{data} argument is given. In \code{vcov}: if \code{TRUE}, return a robust coefficient covariance matrix (if \code{object} contains the requisite information).} \item{semmod}{a \code{semmod} object containing the description of the model; optional, and normally supplied not directly by the user but via the \code{semmod} method for \code{sem}.} \item{debug}{if \code{TRUE}, some information is printed to help you debug the symbolic model specification; for example, if a variable name is misspelled, \code{sem} will assume that the variable is a (new) latent variable. Information about the optimization will also be printed, but details will vary with the optimizer employed. The default is \code{FALSE}.} \item{...}{arguments to be passed down, including from \code{sem.default} to the optimizer.} \item{param.names}{names of the \eqn{t} free parameters, given in their numerical order; default names are \code{Param1}, \ldots, \code{Param}\emph{t}. Note: Should not be specified when the model is given in symbolic form.} \item{var.names}{names of the \eqn{m} entries of the \eqn{v} vector (typically the observed and latent variables --- see below), given in their numerical order; default names are \code{Var1}, \ldots, \code{Var}\emph{m}. Note: Should not be specified when the model is given in symbolic form.} \item{analytic.gradient}{if \code{TRUE} (the default, except for the \code{objectiveFIML} objective function, where, at present, an analytic gradient slows down the computation), then analytic first derivatives are used in the maximization of the likelihood if the optimzer employed will accept them; otherwise numeric derivatives are used, again if the optimizer will compute them.} \item{warn}{if \code{TRUE}, warnings produced by the optimization function will be printed. This should generally not be necessary, since \code{sem} prints its own warning, and saves information about convergence. The default is \code{FALSE}.} \item{maxiter}{the maximum number of iterations for the optimization of the objective function, to be passed to the optimizer.} \item{par.size}{the anticipated size of the free parameters; if \code{"ones"}, a vector of ones is used; if \code{"startvalues"}, taken from the start values. You can try changing this argument if you encounter convergence problems. The default is \code{"startvalues"} if the largest input variance is at least 100 times the smallest, and \code{"ones"} otherwise. Whether this argument is actually used depends upon the optimizer employed.} \item{start.tol, tol}{if the magnitude of an automatic start value is less than \code{start.tol}, then it is set to \code{start.tol}; defaults to 1E-6.} \item{optimizer}{a function to be used to minimize the objective function; the default for single-group models is \code{\link{optimizerSem}}. Alternatives are \code{\link[stats]{nlm}}, which employs the standard R optimizer \code{\link[stats]{nlm}}; \code{\link{optimizerOptim}}, which employs \code{\link[stats]{optim}}; and \code{\link{optimizerNlminb}}, which uses \code{\link[stats]{nlminb}} --- or the user can supply an optimizer. For multigroup model, the default is \code{\link{optimizerMsem}}, and \code{\link{msemOptimizerNlm}}, based on \code{nlm}, is provided as an alternative.} \item{objective}{An objective function to be minimized, sometimes called a \dQuote{fit} function in the SEM literature. The default for single-group models is \code{\link{objectiveML}}, which produces maximum-likelihood estimates assuming multinormality. An alternative is \code{\link{objectiveGLS}}, which produced generalized least squares estimates --- or the user can supply an objective function to be minimized. For multigroup models, the default is available is \code{\link{msemObjectiveML}} for ML estimates and an alternative is \code{\link{msemObjectiveGLS}} for GLS estiamtes.} \item{cls}{primary class to be assigned to the result; normally this is not specified directly, but raither is inferred from the objective function.} \item{ram}{numeric RAM matrix.} \item{object}{an object of class \code{"sem"} or \code{"msem"}, returned by \code{sem}.} \item{standardized}{if \code{TRUE}, return standardized coefficients.} \item{analytic}{return an analytic (as opposed to numeric) estimate of the coefficient covariance matrix; at present only available for the \code{\link{objectiveML}} objective function. The default is \code{FALSE} for this objective function if there are no more than 100 parameters and \code{FALSE} otherwise.} \item{group}{for a multigroup model, the quoted name of the group variable; if the \code{data} argument is given, snd is a single data frame, then this should be a factor in the data set or a variable coercible to a factor, to be used to split the data into groups; otherwise, the name is arbitrary.} \item{groups}{a character vector giving the names of the groups; will be ignored if \code{group} is a factor in \code{data}.} \item{start}{if \code{"initial.fit"} (the default), start values for a multi-group model are computed by first fitting the intra-group models separately by group; if \code{"startvalues"}, then start values are computed as for a single-group model. In some cases, the intra-group models may not be identified even if the multi-group model is, and then \code{start="initial.fit"} should not be used.} \item{initial.maxiter}{if \code{start="initial.fit"} for a multi-group model, then \code{initial.maxiter} gives the maximum number of iterations for each initial intra-group fit.} } \details{ The model is set up using either RAM (\dQuote{reticular action model} -- don't ask!) notation -- a simple format for specifying general structural equation models by coding the \dQuote{arrows} in the path diagram for the model (see, e.g., McArdle and McDonald, 1984) -- typically using the \code{\link{specifyModel}} function; in equation format using the \code{\link{specifyEquations}} function; or, for a simple confirmatory factor analysis model, via the \code{\link{cfa}} function. In any case, the model is represented internally in RAM format. The variables in the \eqn{v} vector in the model (typically, the observed and unobserved variables, but not error variables) are numbered from 1 to \eqn{m}. the RAM matrix contains one row for each (free or constrained) parameter of the model, and may be specified either in symbolic format or in numeric format. A symbolic \code{ram} matrix consists of three columns, as follows: \describe{ \item{1. Arrow specification:}{This is a simple formula, of the form \code{"A -> B"} or, equivalently, \code{"B <- A"} for a regression coefficient (i.e., a single-headed or directional arrow); \code{"A <-> A"} for a variance or \code{"A <-> B"} for a covariance (i.e., a double-headed or bidirectional arrow). Here, \code{A} and \code{B} are variable names in the model. If a name does not correspond to an observed variable, then it is assumed to be a latent variable. Spaces can appear freely in an arrow specification, and there can be any number of hyphens in the arrows, including zero: Thus, e.g., \code{"A->B"}, \code{"A --> B"}, and \code{"A>B"} are all legitimate and equivalent.} \item{2. Parameter name:}{The name of the regression coefficient, variance, or covariance specified by the arrow. Assigning the same name to two or more arrows results in an equality constraint. Specifying the parameter name as \code{NA} produces a fixed parameter.} \item{3. Value:}{start value for a free parameter or value of a fixed parameter. If given as \code{NA}, \code{sem} will compute the start value.} } It is simplest to construct the RAM matrix with the \code{\link{specifyModel}}, \code{\link{specifyEquations}}, or \code{\link{cfa}} function, all of which return an object of class \code{semmod}, and also incorporate some model-specification convenience shortcuts. This process is illustrated in the examples below. A numeric \code{ram} matrix consists of five columns, as follows: \describe{ \item{1. Number of arrow heads:}{1 (directed arrow) or 2 (covariance).} \item{2. Arrow \emph{to}:}{index of the variable at the head of a directional arrow, or at one end of a bidirectional arrow. Observed variables should be assigned the numbers 1 to \eqn{n}, where \eqn{n} is the number of rows/columns in the covariance matrix \code{S}, with the indices corresponding to the variables' positions in \code{S}. Variable indices above \eqn{n} represent latent variables.} \item{3. Arrow \emph{from}:}{the index of the variable at the tail of a directional arrow, or at the other end of a bidirectional arrow.} \item{4. Parameter number:}{free parameters are numbered from 1 to \eqn{t}, but do not necessarily appear in consecutive order. Fixed parameters are given the number 0. Equality contraints are specified by assigning two or more parameters the same number.} \item{5. Value:}{start value for a free parameter, or value of a fixed parameter. If given as \code{NA}, the program will compute a start value, by a slight modification of the method described by McDonald and Hartmann (1992). \emph{Note:} In some circumstances, some start values are selected randomly; this might produce small differences in the parameter estimates when the program is rerun.} } The numeric \code{ram} matrix is normally generated automatically, not specified directly by the user. For \code{specifyEquations}, each input line is either a regression equation or the specification of a variance or covariance. Regression equations are of the form \code{y = par1*x1 + par2*x2 + ... + park*xk} where \code{y} and the \code{x}s are variables in the model (either observed or latent), and the \code{par}s are parameters. If a parameter is given as a numeric value (e.g., \code{1}) then it is treated as fixed. Note that no \dQuote{error} variable is included in the equation; \dQuote{error variances} are specified via either the \code{covs} argument, via \code{V(y) = par} (see immediately below), or are added automatically to the model when, as by default, \code{endog.variances=TRUE}. Variances are specified in the form \code{V(var) = par} and covariances in the form \code{C(var1, var2) = par}, where the \code{var}s are variables (observed or unobserved) in the model. The symbols \code{V} and \code{C} may be in either lower- or upper-case. If \code{par} is a numeric value (e.g., \code{1}) then it is treated as fixed. In conformity with the RAM model, a variance or covariance for an endogenous variable in the model is an \dQuote{error} variance or covariance. To set a start value for a free parameter, enclose the numeric start value in parentheses after the parameter name, as \code{parameter(value)}. \code{sem} fits the model by calling the optimizer specified in the \code{optimizer} argument to minimize the objective function specified in the \code{objective} argument. If the optimization fails to converge, a warning message is printed. The RAM formulation of the general structural equation model is given by the basic equation \deqn{v = Av + u} where \eqn{v} and \eqn{u} are vectors of random variables (observed or unobserved), and the parameter matrix \eqn{A} contains regression coefficients, symbolized by single-headed arrows in a path diagram. Another parameter matrix, \deqn{P = E(uu')} contains covariances among the elements of \eqn{u} (assuming that the elements of \eqn{u} have zero means). Usually \eqn{v} contains endogenous and exogenous observed and unobserved variables, but not error variables (see the examples below). The \code{startvalues} function may be called directly, but is usually called by \code{sem.default}; \code{startvalues2} is an older version of this function that may be used alternatively; see the \code{startvalues} argument to \code{sem}. } \value{ \code{sem} returns an object of class \code{c(}\emph{objective}\code{, "sem")}, where \emph{objective} is the name of the objective function that was optimized (e.g., \code{"objectiveML"}), with the following elements: \item{var.names}{vector of variable names.} \item{ram}{RAM matrix, including any rows generated for covariances among fixed exogenous variables; column 5 includes computed start values.} \item{S}{observed covariance matrix.} \item{J}{RAM selection matrix, \eqn{J}, which picks out observed variables.} \item{n.fix}{number of fixed exogenous variables.} \item{n}{number of observed variables.} \item{N}{number of observations.} \item{m}{number of variables (observed plus unobserved).} \item{t}{number of free parameters.} \item{raw}{\code{TRUE} if the model is fit to a raw moment matrix, \code{FALSE} otherwise.} \item{data}{the observed-variable data matrix, or \code{NULL} if data are not supplied.} \item{semmod}{the \code{semmod} specification object for the model, if one was supplied; otherwise \code{NULL}.} \item{optimizer}{the optimizer function.} \item{objective}{the objective function.} \item{coeff}{estimates of free parameters.} \item{vcov}{estimated asymptotic covariance matrix of parameter estimates, based on a numeric Hessian, if supplied by the optimizer; otherwise \code{NULL}.} \item{par.posn}{indices of free parameters.} \item{convergence}{\code{TRUE} or \code{FALSE}, depending upon whether the optimization apparently converged.} \item{iterations}{number of iterations performed.} \item{criterion}{value of the objective function at the minimum.} \item{C}{model-reproduced covariance matrix.} \item{A}{RAM \eqn{A} matrix.} \item{P}{RAM \eqn{P} matrix.} \item{adj.obj}{robust adjusted value of the objective function; \code{NULL} if \code{robust} is \code{FALSE}.} \item{robust.vcov}{robust estimated coefficient covariance matrix; \code{NULL} if \code{robust} is \code{FALSE}.} For multigroup models, \code{sem} returns an object of class \code{c("msemObjectiveML", "msem")}. } \section{Warning}{ A common error is to fail to specify variance or covariance terms in the model, which are denoted by double-headed arrows, \code{<->}. In general, every observed or latent variable in the model should be associated with a variance or error variance. This may be a free parameter to estimate or a fixed constant (as in the case of a latent exogenous variable for which you wish to fix the variance, e.g., to 1). Again in general, there will be an \emph{error variance} associated with each endogenous variable in the model (i.e., each variable to which at least one single-headed arrow points --- including observed indicators of latent variables), and a \emph{variance} associated with each exogenous variable (i.e., each variable that appears only at the tail of single-headed arrows, never at the head). To my knowledge, the only \emph{apparent} exception to this rule is for observed variables that are declared to be fixed exogenous variables. In this case, the program generates the necessary (fixed-constant) variances and covariances automatically. If there are missing variances, a warning message will be printed, and estimation will almost surely fail in some manner. Missing variances might well indicate that there are missing covariances too, but it is not possible to deduce this in a mechanical manner. The \code{\link{specifyModel}} funciton will by default supply error-variance parameters if these are missing. } \references{ Fox, J. (2006) Structural equation modeling with the sem package in R. \emph{Structural Equation Modeling} \bold{13}:465--486. Bollen, K. A. (1989) \emph{Structural Equations With Latent Variables.} Wiley. Bollen, K. A. and Long, J. S. (eds.) \emph{Testing Structural Equation Models}, Sage. McArdle, J. J. and Epstein, D. (1987) Latent growth curves within developmental structural equation models. \emph{Child Development} \bold{58}, 110--133. McArdle, J. J. and McDonald, R. P. (1984) Some algebraic properties of the reticular action model. \emph{British Journal of Mathematical and Statistical Psychology} \bold{37}, 234--251. McDonald, R. P. and Hartmann, W. M. (1992) A procedure for obtaining initial values of parameters in the RAM model. \emph{Multivariate Behavioral Research} \bold{27}, 57--76. Raftery, A. E. (1993) Bayesian model selection in structural equation models. In Bollen, K. A. and Long, J. S. (eds.) \emph{Testing Structural Equation Models}, Sage. Raftery, A. E. (1995) Bayesian model selection in social research (with discussion). \emph{Sociological Methodology} \bold{25}, 111--196. Satorra, A. (2000) Scaled and adjusted restricted tests in multi-sample analysis of moment structures. pp. 233--247 in Heijmans, R.D.H., Pollock, D.S.G. & Satorra, A. (eds.) \emph{Innovations in Multivariate Statistical Analysis. A Festschrift for Heinz Neudecker }, Kluwer. } \author{John Fox \email{jfox@mcmaster.ca}, Zhenghua Nie, and Jarrett Byrnes} \seealso{\code{\link{rawMoments}}, \code{\link{startvalues}}, \code{\link{objectiveML}}, \code{\link{objectiveGLS}}, \code{\link{optimizerNlm}}, \code{\link{optimizerOptim}}, \code{\link{optimizerNlminb}}, \code{\link[stats]{nlm}}, \code{\link[stats]{optim}}, \code{\link[stats]{nlminb}}, \code{\link{specifyModel}}, \code{\link{specifyEquations}}, \code{\link{cfa}} } \examples{ # The following example illustrates the use the text argument to # readMoments() and specifyEquations(): R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"), text=" .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 ") model.dhp.1 <- specifyEquations(covs="RGenAsp, FGenAsp", text=" RGenAsp = gam11*RParAsp + gam12*RIQ + gam13*RSES + gam14*FSES + beta12*FGenAsp FGenAsp = gam23*RSES + gam24*FSES + gam25*FIQ + gam26*FParAsp + beta21*RGenAsp ROccAsp = 1*RGenAsp REdAsp = lam21(1)*RGenAsp # to illustrate setting start values FOccAsp = 1*FGenAsp FEdAsp = lam42(1)*FGenAsp ") sem.dhp.1 <- sem(model.dhp.1, R.DHP, 329, fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp')) summary(sem.dhp.1) # Note: The following set of examples can't be run via example() because the default file # argument of specifyeEquations, specifyModel(), and readMoments() requires that the model # specification and covariances, correlations, or raw moments be entered in an interactive # session at the command prompt. The examples can be copied and run in the R console, # however. See ?specifyModel and ?readMoments for further information. # These examples are repeated below using file input to specifyModel() and # readMoments(). The second version of the examples may be executed through example(). \dontrun{ # ------------- Duncan, Haller and Portes peer-influences model ---------------------- # A nonrecursive SEM with unobserved endogenous variables and fixed exogenous variables R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp")) .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 # Fit the model using a symbolic ram specification model.dhp <- specifyModel() RParAsp -> RGenAsp, gam11, NA RIQ -> RGenAsp, gam12, NA RSES -> RGenAsp, gam13, NA FSES -> RGenAsp, gam14, NA RSES -> FGenAsp, gam23, NA FSES -> FGenAsp, gam24, NA FIQ -> FGenAsp, gam25, NA FParAsp -> FGenAsp, gam26, NA FGenAsp -> RGenAsp, beta12, NA RGenAsp -> FGenAsp, beta21, NA RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21, NA FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42, NA RGenAsp <-> RGenAsp, ps11, NA FGenAsp <-> FGenAsp, ps22, NA RGenAsp <-> FGenAsp, ps12, NA ROccAsp <-> ROccAsp, theta1, NA REdAsp <-> REdAsp, theta2, NA FOccAsp <-> FOccAsp, theta3, NA FEdAsp <-> FEdAsp, theta4, NA # an equivalent specification, allowing specifyModel() to generate # variance parameters for endogenous variables (and suppressing the # unnecessary NAs): model.dhp <- specifyModel() RParAsp -> RGenAsp, gam11 RIQ -> RGenAsp, gam12 RSES -> RGenAsp, gam13 FSES -> RGenAsp, gam14 RSES -> FGenAsp, gam23 FSES -> FGenAsp, gam24 FIQ -> FGenAsp, gam25 FParAsp -> FGenAsp, gam26 FGenAsp -> RGenAsp, beta12 RGenAsp -> FGenAsp, beta21 RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21 FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42 RGenAsp <-> FGenAsp, ps12 # Another equivalent specification, telling specifyModel to add paths for # variances and covariance of RGenAsp and FGenAsp: model.dhp <- specifyModel(covs="RGenAsp, FGenAsp") RParAsp -> RGenAsp, gam11 RIQ -> RGenAsp, gam12 RSES -> RGenAsp, gam13 FSES -> RGenAsp, gam14 RSES -> FGenAsp, gam23 FSES -> FGenAsp, gam24 FIQ -> FGenAsp, gam25 FParAsp -> FGenAsp, gam26 FGenAsp -> RGenAsp, beta12 RGenAsp -> FGenAsp, beta21 RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21 FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42 # Yet another equivalent specification using specifyEquations(): model.dhp.1 <- specifyEquations(covs="RGenAsp, FGenAsp") RGenAsp = gam11*RParAsp + gam12*RIQ + gam13*RSES + gam14*FSES + beta12*FGenAsp FGenAsp = gam23*RSES + gam24*FSES + gam25*FIQ + gam26*FParAsp + beta21*RGenAsp ROccAsp = 1*RGenAsp REdAsp = lam21(1)*RGenAsp # to illustrate setting start values FOccAsp = 1*FGenAsp FEdAsp = lam42(1)*FGenAsp sem.dhp.1 <- sem(model.dhp.1, R.DHP, 329, fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp')) summary(sem.dhp.1) # Fit the model using a numerical ram specification (not recommended!) ram.dhp <- matrix(c( # heads to from param start 1, 1, 11, 0, 1, 1, 2, 11, 1, NA, # lam21 1, 3, 12, 0, 1, 1, 4, 12, 2, NA, # lam42 1, 11, 5, 3, NA, # gam11 1, 11, 6, 4, NA, # gam12 1, 11, 7, 5, NA, # gam13 1, 11, 8, 6, NA, # gam14 1, 12, 7, 7, NA, # gam23 1, 12, 8, 8, NA, # gam24 1, 12, 9, 9, NA, # gam25 1, 12, 10, 10, NA, # gam26 1, 11, 12, 11, NA, # beta12 1, 12, 11, 12, NA, # beta21 2, 1, 1, 13, NA, # theta1 2, 2, 2, 14, NA, # theta2 2, 3, 3, 15, NA, # theta3 2, 4, 4, 16, NA, # theta4 2, 11, 11, 17, NA, # psi11 2, 12, 12, 18, NA, # psi22 2, 11, 12, 19, NA # psi12 ), ncol=5, byrow=TRUE) params.dhp <- c('lam21', 'lam42', 'gam11', 'gam12', 'gam13', 'gam14', 'gam23', 'gam24', 'gam25', 'gam26', 'beta12', 'beta21', 'theta1', 'theta2', 'theta3', 'theta4', 'psi11', 'psi22', 'psi12') vars.dhp <- c('ROccAsp', 'REdAsp', 'FOccAsp', 'FEdAsp', 'RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp', 'RGenAsp', 'FGenAsp') sem.dhp.2 <- sem(ram.dhp, R.DHP, 329, param.names=params.dhp, var.names=vars.dhp, fixed.x=5:10) summary(sem.dhp.2) # -------------------- Wheaton et al. alienation data ---------------------- S.wh <- readMoments(names=c('Anomia67','Powerless67','Anomia71', 'Powerless71','Education','SEI')) 11.834 6.947 9.364 6.819 5.091 12.532 4.783 5.028 7.495 9.986 -3.839 -3.889 -3.841 -3.625 9.610 -21.899 -18.831 -21.748 -18.775 35.522 450.288 # This is the model in the SAS manual for PROC CALIS: A Recursive SEM with # latent endogenous and exogenous variables. # Curiously, both factor loadings for two of the latent variables are fixed. model.wh.1 <- specifyModel() Alienation67 -> Anomia67, NA, 1 Alienation67 -> Powerless67, NA, 0.833 Alienation71 -> Anomia71, NA, 1 Alienation71 -> Powerless71, NA, 0.833 SES -> Education, NA, 1 SES -> SEI, lamb, NA SES -> Alienation67, gam1, NA Alienation67 -> Alienation71, beta, NA SES -> Alienation71, gam2, NA Anomia67 <-> Anomia67, the1, NA Anomia71 <-> Anomia71, the1, NA Powerless67 <-> Powerless67, the2, NA Powerless71 <-> Powerless71, the2, NA Education <-> Education, the3, NA SEI <-> SEI, the4, NA Anomia67 <-> Anomia71, the5, NA Powerless67 <-> Powerless71, the5, NA Alienation67 <-> Alienation67, psi1, NA Alienation71 <-> Alienation71, psi2, NA SES <-> SES, phi, NA sem.wh.1 <- sem(model.wh.1, S.wh, 932) summary(sem.wh.1) # The same model in equation format: model.wh.1 <- specifyEquations() Anomia67 = 1*Alienation67 Powerless67 = 0.833*Alienation67 Anomia71 = 1*Alienation71 Powerless71 = 0.833*Alienation71 Education = 1*SES SEI = lamb*SES Alienation67 = gam1*SES Alienation71 = gam2*SES + beta*Alienation67 V(Anomia67) = the1 V(Anomia71) = the1 V(Powerless67) = the2 V(Powerless71) = the2 V(SES) = phi C(Anomia67, Anomia71) = the5 C(Powerless67, Powerless71) = the5 # The same model, but treating one loading for each latent variable as free # (and equal to each other). model.wh.2 <- specifyModel() Alienation67 -> Anomia67, NA, 1 Alienation67 -> Powerless67, lamby, NA Alienation71 -> Anomia71, NA, 1 Alienation71 -> Powerless71, lamby, NA SES -> Education, NA, 1 SES -> SEI, lambx, NA SES -> Alienation67, gam1, NA Alienation67 -> Alienation71, beta, NA SES -> Alienation71, gam2, NA Anomia67 <-> Anomia67, the1, NA Anomia71 <-> Anomia71, the1, NA Powerless67 <-> Powerless67, the2, NA Powerless71 <-> Powerless71, the2, NA Education <-> Education, the3, NA SEI <-> SEI, the4, NA Anomia67 <-> Anomia71, the5, NA Powerless67 <-> Powerless71, the5, NA Alienation67 <-> Alienation67, psi1, NA Alienation71 <-> Alienation71, psi2, NA SES <-> SES, phi, NA sem.wh.2 <- sem(model.wh.2, S.wh, 932) summary(sem.wh.2) # And again, in equation format: model.wh <- specifyEquations() Anomia67 = 1*Alienation67 Powerless67 = lamby*Alienation67 Anomia71 = 1*Alienation71 Powerless71 = lamby*Alienation71 Education = 1*SES SEI = lambx*SES Alienation67 = gam1*SES Alienation71 = gam2*SES + beta*Alienation67 V(Anomia67) = the1 V(Anomia71) = the1 V(Powerless67) = the2 V(Powerless71) = the2 V(SES) = phi C(Anomia67, Anomia71) = the5 C(Powerless67, Powerless71) = the5 # Compare the two models by a likelihood-ratio test: anova(sem.wh.1, sem.wh.2) # ----------------------- Thurstone data --------------------------------------- # Second-order confirmatory factor analysis, from the SAS manual for PROC CALIS R.thur <- readMoments(diag=FALSE, names=c('Sentences','Vocabulary', 'Sent.Completion','First.Letters','4.Letter.Words','Suffixes', 'Letter.Series','Pedigrees', 'Letter.Group')) .828 .776 .779 .439 .493 .46 .432 .464 .425 .674 .447 .489 .443 .59 .541 .447 .432 .401 .381 .402 .288 .541 .537 .534 .35 .367 .32 .555 .38 .358 .359 .424 .446 .325 .598 .452 model.thur <- specifyModel() F1 -> Sentences, lam11 F1 -> Vocabulary, lam21 F1 -> Sent.Completion, lam31 F2 -> First.Letters, lam42 F2 -> 4.Letter.Words, lam52 F2 -> Suffixes, lam62 F3 -> Letter.Series, lam73 F3 -> Pedigrees, lam83 F3 -> Letter.Group, lam93 F4 -> F1, gam1 F4 -> F2, gam2 F4 -> F3, gam3 F1 <-> F1, NA, 1 F2 <-> F2, NA, 1 F3 <-> F3, NA, 1 F4 <-> F4, NA, 1 sem.thur <- sem(model.thur, R.thur, 213) summary(sem.thur) # The model in equation format: model.thur <- specifyEquations() Sentences = lam11*F1 Vocabulary = lam21*F1 Sent.Completion = lam31*F1 First.Letters = lam42*F2 4.Letter.Words = lam52*F2 Suffixes = lam62*F2 Letter.Series = lam73*F3 Pedigrees = lam83*F3 Letter.Group = lam93*F3 F1 = gam1*F4 F2 = gam2*F4 F3 = gam3*F4 V(F1) = 1 V(F2) = 1 V(F3) = 1 V(F4) = 1 #------------------------- Kerchoff/Kenney path analysis --------------------- # An observed-variable recursive SEM from the LISREL manual R.kerch <- readMoments(diag=FALSE, names=c('Intelligence','Siblings', 'FatherEd','FatherOcc','Grades','EducExp','OccupAsp')) -.100 .277 -.152 .250 -.108 .611 .572 -.105 .294 .248 .489 -.213 .446 .410 .597 .335 -.153 .303 .331 .478 .651 model.kerch <- specifyModel() Intelligence -> Grades, gam51 Siblings -> Grades, gam52 FatherEd -> Grades, gam53 FatherOcc -> Grades, gam54 Intelligence -> EducExp, gam61 Siblings -> EducExp, gam62 FatherEd -> EducExp, gam63 FatherOcc -> EducExp, gam64 Grades -> EducExp, beta65 Intelligence -> OccupAsp, gam71 Siblings -> OccupAsp, gam72 FatherEd -> OccupAsp, gam73 FatherOcc -> OccupAsp, gam74 Grades -> OccupAsp, beta75 EducExp -> OccupAsp, beta76 sem.kerch <- sem(model.kerch, R.kerch, 737, fixed.x=c('Intelligence', 'Siblings', 'FatherEd', 'FatherOcc')) summary(sem.kerch) # The model in equation format: model.kerch <- specifyEquations() Grades = gam51*Intelligence + gam52*Siblings + gam53*FatherEd + gam54*FatherOcc EducExp = gam61*Intelligence + gam62*Siblings + gam63*FatherEd + gam64*FatherOcc + beta65*Grades OccupAsp = gam71*Intelligence + gam72*Siblings + gam73*FatherEd + gam74*FatherOcc + beta75*Grades + beta76*EducExp #------------------- McArdle/Epstein latent-growth-curve model ----------------- # This model, from McArdle and Epstein (1987, p.118), illustrates the use of a # raw moment matrix to fit a model with an intercept. (The example was suggested # by Mike Stoolmiller.) M.McArdle <- readMoments( names=c('WISC1', 'WISC2', 'WISC3', 'WISC4', 'UNIT')) 365.661 503.175 719.905 675.656 958.479 1303.392 890.680 1265.846 1712.475 2278.257 18.034 25.819 35.255 46.593 1.000 mod.McArdle <- specifyModel() C -> WISC1, NA, 6.07 C -> WISC2, B2, NA C -> WISC3, B3, NA C -> WISC4, B4, NA UNIT -> C, Mc, NA C <-> C, Vc, NA, WISC1 <-> WISC1, Vd, NA WISC2 <-> WISC2, Vd, NA WISC3 <-> WISC3, Vd, NA WISC4 <-> WISC4, Vd, NA sem.McArdle <- sem(mod.McArdle, M.McArdle, 204, fixed.x="UNIT", raw=TRUE) summary(sem.McArdle) # The model in equation format: mod.McArdle <- specifyEquations() WISC1 = 6.07*C WISC2 = B2*C WISC3 = B3*C WISC4 = b4*C C = Mc*UNIT v(C) = Vc v(WISC1) = Vd v(WISC2) = Vd v(WISC3) = Vd v(WISC4) = Vd #------------ Bollen industrialization and democracy example ----------------- # This model, from Bollen (1989, Ch. 8), illustrates the use in sem() of a # case-by-variable data (see ?Bollen) set rather than a covariance or moment matrix model.bollen <- specifyModel() Demo60 -> y1, NA, 1 Demo60 -> y2, lam2, Demo60 -> y3, lam3, Demo60 -> y4, lam4, Demo65 -> y5, NA, 1 Demo65 -> y6, lam2, Demo65 -> y7, lam3, Demo65 -> y8, lam4, Indust -> x1, NA, 1 Indust -> x2, lam6, Indust -> x3, lam7, y1 <-> y5, theta15 y2 <-> y4, theta24 y2 <-> y6, theta26 y3 <-> y7, theta37 y4 <-> y8, theta48 y6 <-> y8, theta68 Indust -> Demo60, gamma11, Indust -> Demo65, gamma21, Demo60 -> Demo65, beta21, Indust <-> Indust, phi sem.bollen <- sem(model.bollen, data=Bollen) summary(sem.bollen) summary(sem.bollen, robust=TRUE) # robust SEs and tests summary(sem.bollen, analytic.se=FALSE) # uses numeric rather than analytic Hessian # GLS rather than ML estimator: sem.bollen.gls <- sem(model.bollen, data=Bollen, objective=objectiveGLS) summary(sem.bollen.gls) # The model in equation format: model.bollen <- specifyEquations() y1 = 1*Demo60 y2 = lam2*Demo60 y3 = lam3*Demo60 y4 = lam4*Demo60 y5 = 1*Demo65 y6 = lam2*Demo65 y7 = lam3*Demo65 y8 = lam4*Demo65 x1 = 1*Indust x2 = lam6*Indust x3 = lam7*Indust c(y1, y5) = theta15 c(y2, y4) = theta24 c(y2, y6) = theta26 c(y3, y7) = theta37 c(y4, y8) = theta48 c(y6, y8) = theta68 Demo60 = gamma11*Indust Demo65 = gamma21*Indust + beta21*Demo60 v(Indust) = phi # -------------- A simple CFA model for the Thurstone mental tests data -------------- R.thur <- readMoments(diag=FALSE, names=c('Sentences','Vocabulary', 'Sent.Completion','First.Letters','4.Letter.Words','Suffixes', 'Letter.Series','Pedigrees', 'Letter.Group')) .828 .776 .779 .439 .493 .46 .432 .464 .425 .674 .447 .489 .443 .59 .541 .447 .432 .401 .381 .402 .288 .541 .537 .534 .35 .367 .32 .555 .38 .358 .359 .424 .446 .325 .598 .452 # (1) in CFA format: mod.cfa.thur.c <- cfa(reference.indicators=FALSE) FA: Sentences, Vocabulary, Sent.Completion FB: First.Letters, 4.Letter.Words, Suffixes FC: Letter.Series, Pedigrees, Letter.Group cfa.thur.c <- sem(mod.cfa.thur.c, R.thur, 213) summary(cfa.thur.c) # (2) in equation format: mod.cfa.thur.e <- specifyEquations(covs="F1, F2, F3") Sentences = lam11*F1 Vocabulary = lam21*F1 Sent.Completion = lam31*F1 First.Letters = lam42*F2 4.Letter.Words = lam52*F2 Suffixes = lam62*F2 Letter.Series = lam73*F3 Pedigrees = lam83*F3 Letter.Group = lam93*F3 V(F1) = 1 V(F2) = 1 V(F3) = 1 cfa.thur.e <- sem(mod.cfa.thur.e, R.thur, 213) summary(cfa.thur.e) # (3) in path format: mod.cfa.thur.p <- specifyModel(covs="F1, F2, F3") F1 -> Sentences, lam11 F1 -> Vocabulary, lam21 F1 -> Sent.Completion, lam31 F2 -> First.Letters, lam41 F2 -> 4.Letter.Words, lam52 F2 -> Suffixes, lam62 F3 -> Letter.Series, lam73 F3 -> Pedigrees, lam83 F3 -> Letter.Group, lam93 F1 <-> F1, NA, 1 F2 <-> F2, NA, 1 F3 <-> F3, NA, 1 cfa.thur.p <- sem(mod.cfa.thur.p, R.thur, 213) summary(cfa.thur.p) # ----- a CFA model fit by FIML to the mental-tests dataset with missing data ----- mod.cfa.tests <- cfa(raw=TRUE) verbal: x1, x2, x3 math: y1, y2, y3 cfa.tests <- sem(mod.cfa.tests, data=Tests, na.action=na.pass, objective=objectiveFIML, fixed.x="Intercept") summary(cfa.tests) summary(cfa.tests, saturated=TRUE) # takes time to fit saturated model for comparison # --- a multigroup CFA model fit to the Holzinger-Swineford mental-tests data ----- mod.hs <- cfa() spatial: visual, cubes, paper, flags verbal: general, paragrap, sentence, wordc, wordm memory: wordr, numberr, figurer, object, numberf, figurew math: deduct, numeric, problemr, series, arithmet mod.mg <- multigroupModel(mod.hs, groups=c("Female", "Male")) sem.mg <- sem(mod.mg, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg) # with cross-group equality constraints: mod.mg.eq <- multigroupModel(mod.hs, groups=c("Female", "Male"), allEqual=TRUE) sem.mg.eq <- sem(mod.mg.eq, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg.eq) anova(sem.mg, sem.mg.eq) # test equality constraints } ## =============================================================================== # The following examples use file input and may be executed via example(): etc <- system.file(package="sem", "etc") # path to data and model files # to get all fit indices (not recommended, but for illustration): opt <- options(fit.indices = c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR", "AIC", "AICc", "BIC", "CAIC")) # ------------- Duncan, Haller and Portes peer-influences model ---------------------- # A nonrecursive SEM with unobserved endogenous variables and fixed exogenous variables (R.DHP <- readMoments(file=file.path(etc, "R-DHP.txt"), diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"))) (model.dhp <- specifyModel(file=file.path(etc, "model-DHP.txt"))) sem.dhp.1 <- sem(model.dhp, R.DHP, 329, fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp')) summary(sem.dhp.1) # -------------------- Wheaton et al. alienation data ---------------------- (S.wh <- readMoments(file=file.path(etc, "S-Wheaton.txt"), names=c('Anomia67','Powerless67','Anomia71', 'Powerless71','Education','SEI'))) # This is the model in the SAS manual for PROC CALIS: A Recursive SEM with # latent endogenous and exogenous variables. # Curiously, both factor loadings for two of the latent variables are fixed. (model.wh.1 <- specifyModel(file=file.path(etc, "model-Wheaton-1.txt"))) sem.wh.1 <- sem(model.wh.1, S.wh, 932) summary(sem.wh.1) # The same model, but treating one loading for each latent variable as free # (and equal to each other). (model.wh.2 <- specifyModel(file=file.path(etc, "model-Wheaton-2.txt"))) sem.wh.2 <- sem(model.wh.2, S.wh, 932) summary(sem.wh.2) # Compare the two models by a likelihood-ratio test: anova(sem.wh.1, sem.wh.2) # ----------------------- Thurstone data --------------------------------------- # Second-order confirmatory factor analysis, from the SAS manual for PROC CALIS (R.thur <- readMoments(file=file.path(etc, "R-Thurstone.txt"), diag=FALSE, names=c('Sentences','Vocabulary', 'Sent.Completion','First.Letters','4.Letter.Words','Suffixes', 'Letter.Series','Pedigrees', 'Letter.Group'))) (model.thur <- specifyModel(file=file.path(etc, "model-Thurstone.txt"))) sem.thur <- sem(model.thur, R.thur, 213) summary(sem.thur) #------------------------- Kerchoff/Kenney path analysis --------------------- # An observed-variable recursive SEM from the LISREL manual (R.kerch <- readMoments(file=file.path(etc, "R-Kerchoff.txt"), diag=FALSE, names=c('Intelligence','Siblings', 'FatherEd','FatherOcc','Grades','EducExp','OccupAsp'))) (model.kerch <- specifyModel(file=file.path(etc, "model-Kerchoff.txt"))) sem.kerch <- sem(model.kerch, R.kerch, 737, fixed.x=c('Intelligence', 'Siblings', 'FatherEd', 'FatherOcc')) summary(sem.kerch) #------------------- McArdle/Epstein latent-growth-curve model ----------------- # This model, from McArdle and Epstein (1987, p.118), illustrates the use of a # raw moment matrix to fit a model with an intercept. (The example was suggested # by Mike Stoolmiller.) (M.McArdle <- readMoments(file=file.path(etc, "M-McArdle.txt"), names=c('WISC1', 'WISC2', 'WISC3', 'WISC4', 'UNIT'))) (mod.McArdle <- specifyModel(file=file.path(etc, "model-McArdle.txt"))) sem.McArdle <- sem(mod.McArdle, M.McArdle, 204, fixed.x="UNIT", raw=TRUE) summary(sem.McArdle) #------------ Bollen industrialization and democracy example ----------------- # This model, from Bollen (1989, Ch. 8), illustrates the use in sem() of a # case-by-variable data set (see ?Bollen) rather than a covariance or moment matrix (model.bollen <- specifyModel(file=file.path(etc, "model-Bollen.txt"))) sem.bollen <- sem(model.bollen, data=Bollen) summary(sem.bollen) summary(sem.bollen, robust=TRUE) # robust SEs and tests summary(sem.bollen, analytic.se=FALSE) # uses numeric rather than analytic Hessian # GLS rather than ML estimator: sem.bollen.gls <- sem(model.bollen, data=Bollen, objective=objectiveGLS) summary(sem.bollen.gls) # ----- a CFA model fit by FIML to the mental-tests dataset with missing data ----- (mod.cfa.tests <- cfa(file=file.path(etc, "model-Tests.txt"), raw=TRUE)) cfa.tests <- sem(mod.cfa.tests, data=Tests, na.action=na.pass, optimizer=optimizerNlm, objective=objectiveFIML, fixed.x="Intercept") summary(cfa.tests) #------------ Holzinger and Swineford muiltigroup CFA example ---------------- mod.hs <- cfa(file=file.path(etc, "model-HS.txt")) mod.mg <- multigroupModel(mod.hs, groups=c("Female", "Male")) sem.mg <- sem(mod.mg, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg) # with cross-group equality constraints: mod.mg.eq <- multigroupModel(mod.hs, groups=c("Female", "Male"), allEqual=TRUE) sem.mg.eq <- sem(mod.mg.eq, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg.eq) anova(sem.mg, sem.mg.eq) # test equality constraints options(opt) # restore fit.indices option } \keyword{models} sem/src/0000755000175000017500000000000014126411067011761 5ustar nileshnileshsem/src/csem.cpp0000644000175000017500000021407414126174210013420 0ustar nileshnilesh/* * ===================================================================================== * * Filename: csem.cpp * * Description: csem * * Version: 1.0 * Created: 27/12/2011 00:28:29 * Revision: none * Compiler: gcc * * Author: Zhenghua Nie (ZHN), zhenghua.nie@gmail.com * Company: McMaster University * * Copyright (C) 2011 Zhenghua Nie. All Rights Reserved. * This code is published under GNU GENERAL PUBLIC LICENSE. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed WITHOUT ANY WARRANTY. See the * GNU General Public License for more details. * * If you do not have a copy of the GNU General Public License, * write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * * * ===================================================================================== */ #include "csem.h" #include "utils.h" using namespace std; // Environment for evaluation and object hunting static SEXP theenv; static SEXP thefun; // call R functions. Currently, we call "print" in R. static double csem_NaN; static const double log2Pi = 1.83787706640934533908193770912476; //log(2*pi) void printSEXP(SEXP sexp, const string msg); void semprintRealVector(const double *x, int n, int index) { SEXP rargs,Rcall,result; // Allocate memory for a vector of reals. // This vector will contain the elements of x, // x is the argument to the R function R_eval_f PROTECT(rargs = allocVector(REALSXP,n)); for (int i=0;i= 0 && ind < length(list)) { elmt = VECTOR_ELT(list, ind); } else error(("The index is not in the range of the list.")); return elmt; } SEXP getListElement(SEXP list, std::string str) { SEXP elmt = R_NilValue, names = getAttrib(list, R_NamesSymbol); int i; for (i = 0; i < length(list); i++) if(str.compare(CHAR(STRING_ELT(names, i))) == 0) { elmt = VECTOR_ELT(list, i); break; } return elmt; } double getVectorElement(SEXP vect, int ind ) { double elmt = csem_NaN; if(ind >= 0 && ind < length(vect)) elmt = REAL(AS_NUMERIC(vect))[ind]; else error(("The index is not in the range of the vector.")); return elmt; } // if ind==-1, we will search the names, and then return the object. double getVectorElement(SEXP vect, std::string str ) { SEXP names = getAttrib(vect, R_NamesSymbol); double elmt = csem_NaN; int i; for (i = 0; i < length(vect); i++) if(str.compare(CHAR(STRING_ELT(names, i))) == 0) { elmt = REAL(AS_NUMERIC(vect))[i]; break; } return elmt; } SEXP generateMatrix(double *A, int nrow, int ncol) { SEXP elmt; elmt = allocMatrix(REALSXP, nrow, ncol); for(int i=0; i < nrow*ncol; ++i) REAL(elmt)[i] = A[i]; return(elmt); } /* * We commented this function. This function is for debugging. SEXP showArgs1(SEXP largs) { int i, nargs = LENGTH(largs); Rcomplex cpl; SEXP el, names; PROTECT(names = getAttrib(largs, R_NamesSymbol)); const char *name; for(i = 0; i < nargs; i++) { el = VECTOR_ELT(largs, i); name = isNull(names) ? "" : CHAR(STRING_ELT(names, i)); switch(TYPEOF(el)) { case REALSXP: Rprintf("[%d] '%s' %f\n", i+1, name, REAL(el)[0]); break; case LGLSXP: case INTSXP: Rprintf("[%d] '%s' %d\n", i+1, name, INTEGER(el)[0]); break; case CPLXSXP: cpl = COMPLEX(el)[0]; Rprintf("[%d] '%s' %f + %fi\n", i+1, name, cpl.r, cpl.i); break; case STRSXP: Rprintf("[%d] '%s' %s\n", i+1, name, CHAR(STRING_ELT(el, 0))); break; default: Rprintf("[%d] '%s' R type\n", i+1, name); } } UNPROTECT(1); return(R_NilValue); } */ // void setApplicationOptions(int &hessian, double &fscale, double &steptol, double &stepmax, int &iterlim, int &ndigit, int &print_level, int &check_analyticals, double &gradtol, SEXP opts ) { const int options_integer_num = 5; const string option_names_integer[options_integer_num]={ "hessian", "iterlim", "ndigit", "print.level", "check.analyticals" }; int option_integer[options_integer_num]; // Default value option_integer[0] = 0; //hessian option_integer[1] = 100; //iterlim option_integer[2] = 12; //ndigit option_integer[3] = 0; //print_level option_integer[4] = 1; //check.analytics const int options_numeric_num = 4; const string option_names_numeric[options_numeric_num]={ "fscale", "steptol", "stepmax", "gradtol" }; double option_double[options_numeric_num]; // Default Values option_double[0] = 1.0; //fscale option_double[1] = 1.0e-6; //steptol option_double[2] = stepmax; //stepmax, if not given, max(1000*sqrt(sum(x0/typsiz)^2), 1000) option_double[3] = 1.0e-6; //gradtol // extract the sub-lists with options of the different types into separate lists SEXP opts_integer = getListElement(opts, "integer"); SEXP opts_numeric = getListElement(opts, "numeric"); SEXP opts_string = getListElement(opts, "string"); // loop over the integer options and set them SEXP opts_integer_names; PROTECT(opts_integer_names = getAttrib(opts_integer, R_NamesSymbol)); for (int list_cnt=0;list_cnthave_gradient) { for(ind = 0; ind < n; ++ind){ g[ind] = 2.0*(x[ind]-ind-1); } //The interested thing is that we need more function evaluations when Hessian is provided (n_eval:33). //If not provided, n_eval is 18. IS there anything wrong? if(state->have_hessian){ for(ind = 0; ind < n; ++ind){ h[ind*n+ind] = 2.0; } } } return; } void msem_test_objective(int n, const double x[], double *f, double *g, double *h,double *A, double *P, double *C, double *ff, msem_function_info *state) { int ind; *A = csem_NaN; *P = csem_NaN; *C = csem_NaN; *ff = csem_NaN; *f = 0.0; for(ind=0;indhave_gradient) { for(ind = 0; ind < n; ++ind){ g[ind] = 2.0*(x[ind]-ind-1); } //The interested thing is that we need more function evaluations when Hessian is provided (n_eval:33). //If not provided, n_eval is 18. IS there anything wrong? if(state->have_hessian){ for(ind = 0; ind < n; ++ind){ h[ind*n+ind] = 2.0; } } } return; } static void MatrixInverse(double* A, const int n) { int *IPIV = new int[n+1]; int LWORK = n*n; double *WORK = new double[LWORK]; int INFO; F77_CALL(dgetrf)(&n, &n, A, &n, IPIV, &INFO FCONE FCONE); if(INFO != 0) { error(( "The matrix is non-invertable.")); } F77_CALL(dgetri)(&n, A, &n, IPIV, WORK, &LWORK, &INFO FCONE FCONE); delete[] IPIV; delete[] WORK; return; } static double MatrixDeterminant(const double *A,const int nrow,const int ncol) { double det; double *tA; //temp matrix int *IPIV; int INFO; if(nrow != ncol) { error(("We cannot comptue the determinant of a non-square matrix.\n")); } IPIV = new int[nrow+1]; tA = new double[nrow*nrow]; Memcpy(tA, A, nrow*ncol); F77_CALL(dgetrf)(&nrow, &ncol, tA, &ncol, IPIV, &INFO FCONE FCONE); if(INFO != 0) { error(("Nonsingular matrix." )); } det = 1; for(int i = 0; i < nrow; i++) { det *= tA[i*nrow+ i]; //diagonal if(IPIV[i] != (i+1)) det *= -1; } delete[] tA; delete[] IPIV; return(det); } static double MatrixTrace(const double *A, int nrow, int ncol) { double trace; if(nrow != ncol) { error(("We fail to comptue a trace of a non-square matrix.\n")); } trace = 0.0; for(int j = 0; j < nrow; j++) { trace += A[j*nrow+j]; } return(trace); } //C=A.B , no transpose. static void MatrixMulti(const double *A, int rowA, int colA, const double *B, int rowB, int colB, double *C) { if(SEM_DEBUG) Rprintf("A[%4d-by-%4d]*B[%4d-by-%4d]=C[%4d-by-%4d]\n", rowA, colA, rowB, colB, rowA, colB); if(colA != rowB) error(("The matrices are not conformable.")); /// C := alpha*op( A )*op( B ) + beta*C, // R_ext/Blas.h // F77_NAME(dgemm)(const char *transa, const char *transb, const int *m, // const int *n, const int *k, const double *alpha, // const double *a, const int *lda, // const double *b, const int *ldb, // const double *beta, double *c, const int *ldc); memset(C, 0, rowA*colB*sizeof(double)); char Tran = 'n'; const double alpha = 1.0; const double beta = 0.0; F77_CALL(dgemm)(&Tran, &Tran, &rowA, &colB, &colA, &alpha, A, &rowA, B, &colA, &beta, C, &rowA FCONE FCONE); return; } //C=A.Transpose(B). static void MatrixMultiTransB(const double *A, int rowA, int colA, const double *B, int rowB, int colB, double *C) { if(SEM_DEBUG) Rprintf("A[%4d-by-%4d]*B'[%4d-by-%4d]=C[%4d-by-%4d]\n", rowA, colA, rowB, colB, rowA, rowB); if(colA != colB) error(("The matrices are not conformable.")); /// C := alpha*op( A )*op( B ) + beta*C, // R_ext/Blas.h // F77_NAME(dgemm)(const char *transa, const char *transb, const int *m, // const int *n, const int *k, const double *alpha, // const double *a, const int *lda, // const double *b, const int *ldb, // const double *beta, double *c, const int *ldc); memset(C, 0, rowA*rowB*sizeof(double)); char ATran = 'n'; char BTran = 't'; const double alpha = 1.0; const double beta = 0.0; F77_CALL(dgemm)(&ATran, &BTran, &rowA, &rowB, &colA, &alpha, A, &rowA, B, &rowB, &beta, C, &rowA FCONE FCONE); return; } //C=Transpose(A).Transpose(B) . static void MatrixMultiTransAB(const double *A, int rowA, int colA, const double *B, int rowB, int colB, double *C) { if(SEM_DEBUG) Rprintf("A'[%4d-by-%4d]*B'[%4d-by-%4d]=C[%4d-by-%4d]\n", rowA, colA, rowB, colB, colA, rowB); if(colA != colB) error(("The matrices are not conformable.")); /// C := alpha*op( A )*op( B ) + beta*C, // R_ext/Blas.h // F77_NAME(dgemm)(const char *transa, const char *transb, const int *m, // const int *n, const int *k, const double *alpha, // const double *a, const int *lda, // const double *b, const int *ldb, // const double *beta, double *c, const int *ldc); memset(C, 0, colA*rowB*sizeof(double)); char ATran = 't'; char BTran = 't'; const double alpha = 1.0; const double beta = 0.0; F77_CALL(dgemm)(&ATran, &BTran, &colA, &rowB, &rowA, &alpha, A, &rowA, B, &rowB, &beta, C, &rowA FCONE FCONE); return; } //The fowllowing function may be rewritten as similar as ddot.f static void sempdot(const int *n, const double *x, const int *incx, const double *y, const int *incy, double *z) { for(int i=0; i < *n; ++i) *z++ = *x++ * *y++; return; } //column_wise static double *Kronecker(const double *A, const int &rowA, const int &colA, const double *B, const int &rowB, const int &colB) { double *kron = new double[rowA*colA*rowB*colB]; int iA, jA, iB, jB; int rowC = rowA*rowB; for(iA = 0; iA < colA; ++iA) { for(jA = 0; jA < rowA; ++jA) { double a = *A++; // the matrix is column-wise. const double *b = &B[0]; int M = iA * colB; int N = jA * rowB; for(iB = 0; iB < colB; ++iB) { for(jB = 0; jB < rowB; ++jB) { kron[(M+iB)*rowC+N+jB] = a * *b++; // the matrix is column-wise } } } } return kron; } /* #this function will transfer into C code for generating matrix A and P with new parameters. # the index of par should be the same as start, so we set par's names as start's names. generate.AP <- function(par, start=start, model=model){ names(par) <- names(start) A <- P <- matrix(0, model$m, model$m) val <- ifelse(model$fixed, model$ram[, 5], par[model$sel.free]) A[model$arrows.1]<-val[model$one.head] P[model$arrows.2t] <- P[model$arrows.2] <- val[!model$one.head] A <- diag(model$m)-A #please note here.We don't need A, we always want I-A. AP <- list(A=A, P=P) AP } * */ static void generate_AP(int n, const double x[], double *A, double *P, double *ImA, model_info *model) { int n_val = length(model->fixed); int *fixed = new int[n_val]; int *sel_free = new int[length(model->sel_free)]; double *ram5 = new double[nrows(model->ram)]; double *val = new double[n_val]; Memcpy(fixed, INTEGER(AS_INTEGER(model->fixed)), n_val); Memcpy(sel_free, INTEGER(AS_INTEGER(model->sel_free)), length(model->sel_free)); Memcpy(ram5, REAL(AS_NUMERIC(model->ram))+4*nrows(model->ram), nrows(model->ram)); for(int i=0;ifixed); ++i){ val[i] = (fixed[i]==1) ? ram5[i] : x[sel_free[i]-1]; //fortran to C } int *one_head = new int[length(model->one_head)]; double *val_one_head = new double[n_val]; double *val_two_head = new double[n_val]; Memcpy(one_head, INTEGER(AS_INTEGER(model->one_head)), length(model->one_head)); int ind_one = 0; int ind_two = 0; for(int i=0; i< n_val; ++i){ if(one_head[i]==1) { val_one_head[ind_one] = val[i]; ++ind_one; } else { val_two_head[ind_two] = val[i]; ++ind_two; } } int m = model->m; memset(A, 0, m*m*sizeof(double)); memset(P, 0, m*m*sizeof(double)); memset(ImA, 0, m*m*sizeof(double)); //A and diag(m)-A int nA = length(model->arrows_1)/2; int nP = length(model->arrows_2)/2; int *tA = new int[max(nA*2, nP*2)]; Memcpy(tA, INTEGER(AS_INTEGER(model->arrows_1)), nA*2); for(int i=0;iarrows_2)), nP*2); int *tAt = new int[nP*2]; Memcpy(tAt, INTEGER(AS_INTEGER(model->arrows_2t)), nP*2); for(int i=0;imodel; int m = model->m; int modeln = model->n; int maxmn = (m > modeln ? m : modeln); double *ImA = new double[m*m]; generate_AP(n, x, A, P, ImA, model); if(SEM_DEBUG) { printMatrix(A, m, m, "Matrix A", 1); printMatrix(ImA, m, m, "Matrix (I-A)", 1); printMatrix(P, m, m, "Matrix P", 1); } double *invA, *invC; double *C0; //for matrix multiplication. invC = new double[maxmn*maxmn]; //After the compuation, C is n-by-n. C0 = new double[maxmn*maxmn]; //After the compuation, C is n-by-n. memset(C, 0, maxmn*maxmn*sizeof(double)); memset(C0, 0, maxmn*maxmn*sizeof(double)); memset(invC, 0, maxmn*maxmn*sizeof(double)); ///Please be careful that R uses column-wise to store matrix. invA = new double[m*m]; Memcpy(invA, ImA, m*m); MatrixInverse(invA, m); if(SEM_DEBUG) printMatrix(invA, m, m, "invA", 1); if(SEM_DEBUG) { MatrixMulti(ImA, m, m, invA, m, m, C0); printMatrix(C0, m, m, "A %*% invA", 1); } MatrixMulti(REAL(model->J), nrows(model->J), ncols(model->J), invA, m, m, C0); if(SEM_DEBUG) printMatrix(C0, modeln, m, "J %*% I.Ainv", 1); MatrixMulti(C0, modeln, m, P, m, m, C); if(SEM_DEBUG) printMatrix(C, modeln, m, "J %*% I.Ainv %*% P", 1); MatrixMultiTransB(C, modeln, m, invA, m, m, C0); if(SEM_DEBUG) printMatrix(C0, modeln, m, "J %*% I.Ainv %*% P %*% t(I.Ainv)", 1); MatrixMultiTransB(C0, modeln, m, REAL(model->J), modeln, m, C); if(SEM_DEBUG) printMatrix(C, modeln, modeln, "J %*% I.Ainv %*% P %*% t(I, Ainv) %*% t(J)", 1); Memcpy(invC, C, modeln*modeln); MatrixInverse(invC, modeln); if(SEM_DEBUG) { printMatrix(invC, modeln, modeln, "Cinv", 1); } MatrixMulti(REAL(model->S), modeln, modeln, invC, modeln, modeln, C0); if(SEM_DEBUG) printMatrix(C0, modeln, modeln, "S %*% Cinv", 1); *f = MatrixTrace(C0, modeln, modeln)+log(MatrixDeterminant(C, modeln, modeln))-modeln-model->logdetS; //now we start to calculate the gradient. if(state->have_gradient) { double *grad_P, *grad_A; grad_P = new double[maxmn*maxmn]; //After the compuation, grad_P is m-by-m. grad_A = new double[maxmn*maxmn]; //After the compuation, grad_A is m-by-m. memset(grad_P, 0, maxmn*maxmn*sizeof(double)); memset(grad_A, 0, maxmn*maxmn*sizeof(double)); MatrixMultiTransAB(invA, m, m, REAL(model->J), modeln, m, C0); if(SEM_DEBUG) printMatrix(C0, m, modeln, "t(I.Ainv) %*% t(J)", 1); MatrixMulti(C0, m, modeln, invC, modeln, modeln, grad_P); if(SEM_DEBUG) printMatrix(grad_P, m, modeln, "t(I.Ainv) %*% t(J) %*% Cinv", 1); Memcpy(grad_A, C, modeln*modeln); //y, we use daxpy, y=ax+y Memcpy(C0, REAL(model->S), modeln*modeln); //y, we use daxpy, y=ax+y //F77_NAME(daxpy)(const int *n, const double *alpha, // const double *dx, const int *incx, // double *dy, const int *incy); double alpha = -1.0; int incx = 1; int mm = modeln*modeln; F77_CALL(daxpy)(&mm,&alpha, C0,&incx, grad_A, &incx FCONE FCONE); //grad_A = -C0 + grad_A ==> grad_A = -S + C if(SEM_DEBUG) printMatrix(grad_A, modeln, modeln, "(C-S)", 1); MatrixMulti(grad_P, m, modeln, grad_A, modeln, modeln, C0); if(SEM_DEBUG) printMatrix(C0, m, modeln, "t(I.Ainv) %*% t(J) %*% Cinv %*% (C-S)", 1); MatrixMulti(C0, m, modeln, invC, modeln, modeln, grad_P); if(SEM_DEBUG) printMatrix(grad_P, m, modeln, "t(I.Ainv) %*% t(J) %*% Cinv %*% (C-S) %*% Cinv", 1); MatrixMulti(grad_P, m, modeln, REAL(model->J), modeln, m, C0); if(SEM_DEBUG) printMatrix(C0, m, m, "t(I.Ainv) %*% t(J) %*% Cinv %*% (C-S) %*% Cinv %*% J", 1); MatrixMulti(C0, m, m, invA, m, m, grad_A); if(SEM_DEBUG) printMatrix(grad_A, m, m, "t(I.Ainv) %*% t(J) %*% Cinv %*% (C-S) %*% Cinv %*% J %*% I.Ainv ", 1); mm = m*m; sempdot(&mm, REAL(model->correct), &incx, grad_A, &incx, grad_P); if(SEM_DEBUG) printMatrix(grad_P, m, m, "correct * t(I.Ainv) %*% t(J) %*% Cinv %*% (C-S) %*% Cinv %*% J %*% I.Ainv ", 1); MatrixMulti(grad_P, m, m, P, m, m, C0); if(SEM_DEBUG) printMatrix(C0, m, m, "grad.P %*% P", 1); MatrixMultiTransB(C0, m, m,invA, m, m, grad_A ); if(SEM_DEBUG) printMatrix(grad_A, m, m, "grad.A = grad.P %*% P %*% t(I.Ainv)", 1); //The following code will produce gradients based on grad_A and grad_P. //This is the implentation of "tapply" in R. double *A_grad, *P_grad; int nA; int nP; A_grad = new double[n]; P_grad = new double[n]; memset(A_grad, 0, n*sizeof(double)); memset(P_grad, 0, n*sizeof(double)); double *grad_Au, *grad_Pu; nA = length(model->arrows_1_free)/2; nP = length(model->arrows_2_free)/2; grad_Au = new double[nA]; grad_Pu = new double[nP]; int *tA = new int[max(max(nA*2, nP*2), max(length(model->unique_free_1), length(model->unique_free_2)))]; Memcpy(tA, INTEGER(AS_INTEGER(model->arrows_1_free)), length(model->arrows_1_free)); for(int i=0;iarrows_1_seq[i]-1] += grad_Au[i]; } Memcpy(tA, INTEGER(AS_INTEGER(model->arrows_2_free)), nP*2); for(int i=0;iarrows_2_seq[i]-1] += grad_Pu[i]; } nA=length(model->unique_free_1); Memcpy(tA, INTEGER(AS_INTEGER(model->unique_free_1)), nA); for(int i=0;iunique_free_2); Memcpy(tA, INTEGER(AS_INTEGER(model->unique_free_2)), nP); for(int i=0;imodel; function_info *state = new function_info; //(function_info *)R_alloc(1, sizeof(function_info)); state->have_gradient = m_state->have_gradient; state->have_hessian = m_state->have_hessian; int G = m_model->G; int i; int indAP=0, indC=0; *f = 0.0; if(state->have_gradient) memset(g, 0, n*sizeof(double)); int sumN=0; double *grad = new double[n]; int maxmn = 0; int maxmni; for(i = 0;i < G; ++i) { sumN += INTEGER(AS_INTEGER(m_model->N))[i]; maxmni = (m_model->gmodel[i].n > m_model->gmodel[i].m ? m_model->gmodel[i].n:m_model->gmodel[i].m); maxmn = (maxmni > maxmn ? maxmni : maxmn); } double *C0 = new double[maxmn*maxmn]; for(i = 0; i < G; ++i) { state->model = &m_model->gmodel[i]; memset(grad, 0, n*sizeof(double)); memset(C0, 0, maxmn*maxmn); objectiveML(n, x, &ff[i], grad, h, &A[indAP], &P[indAP], C0, state); Memcpy(&C[indC], C0, state->model->n*state->model->n); indAP += state->model->m*state->model->m; //update the index for A, P, C indC += state->model->n*state->model->n; *f += (state->model->N-(1-state->model->raw))*ff[i]; if(state->have_gradient) { double alpha = (state->model->N-(1-state->model->raw))/(sumN-(1.0-state->model->raw)*G); int incx = 1; //F77_NAME(daxpy)(const int *n, const double *alpha, // const double *dx, const int *incx, // double *dy, const int *incy); y = ax+y F77_CALL(daxpy)(&n,&alpha, grad,&incx, g, &incx FCONE FCONE); //grad.all = grad.all+((N[g]-!raw)/(sum(N)-(!raw)*G))*grad } } *f = *f/(sumN-(1-m_model->raw)*G); delete[] C0; delete[] grad; delete state; return; } // this function will compute GLS. void objectiveGLS(int n, const double x[], double *f, double *g, double *h, double *A, double *P, double *C, function_info *state) { R_CheckUserInterrupt(); model_info *model = state->model; int m = model->m; int modeln = model->n; int maxmn = (m > modeln ? m : modeln); double *ImA = new double[m*m]; generate_AP(n, x, A, P, ImA, model); if(SEM_DEBUG) { printMatrix(A, m, m, "Matrix A", 1); printMatrix(ImA, m, m, "Matrix (I-A)", 1); printMatrix(P, m, m, "Matrix P", 1); } double *invA, *invC; double *C0; //for matrix multiplication. invC = new double[maxmn*maxmn]; //After the compuation, C is n-by-n. C0 = new double[maxmn*maxmn]; //After the compuation, C is n-by-n. memset(C, 0, maxmn*maxmn*sizeof(double)); memset(C0, 0, maxmn*maxmn*sizeof(double)); memset(invC, 0, maxmn*maxmn*sizeof(double)); ///Please be careful that R uses column-wise to store matrix. invA = new double[m*m]; Memcpy(invA, ImA, m*m); MatrixInverse(invA, m); if(SEM_DEBUG) printMatrix(invA, m, m, "invA", 1); if(SEM_DEBUG) { MatrixMulti(ImA, m, m, invA, m, m, C0); printMatrix(C0, m, m, "A %*% invA", 1); } MatrixMulti(REAL(model->J), nrows(model->J), ncols(model->J), invA, m, m, C0); if(SEM_DEBUG) printMatrix(C0, modeln, m, "J %*% I.Ainv", 1); MatrixMulti(C0, modeln, m, P, m, m, C); if(SEM_DEBUG) printMatrix(C, modeln, m, "J %*% I.Ainv %*% P", 1); MatrixMultiTransB(C, modeln, m, invA, m, m, C0); if(SEM_DEBUG) printMatrix(C0, modeln, m, "J %*% I.Ainv %*% P %*% t(I.Ainv)", 1); MatrixMultiTransB(C0, modeln, m, REAL(model->J), modeln, m, C); if(SEM_DEBUG) printMatrix(C, modeln, modeln, "J %*% I.Ainv %*% P %*% t(I, Ainv) %*% t(J)", 1); double *grad_P; grad_P = new double[maxmn*maxmn]; //After the compuation, grad_P is m-by-m. double alpha = -1.0; int incx = 1; int mm = modeln*modeln; Memcpy(invC, C, modeln*modeln); //y, we use daxpy, y=ax+y Memcpy(C0, REAL(model->S), modeln*modeln); //y, we use daxpy, y=ax+y //F77_NAME(daxpy)(const int *n, const double *alpha, // const double *dx, const int *incx, // double *dy, const int *incy); F77_CALL(daxpy)(&mm,&alpha, invC,&incx, C0, &incx FCONE FCONE); //C0 = C0 - grad_A ==> grad_A = S - C if(SEM_DEBUG) printMatrix(C0, modeln, modeln, "(S-C)", 1); Memcpy(grad_P, REAL(AS_NUMERIC(model->invS)), modeln*modeln); MatrixMulti(grad_P, modeln, modeln, C0, modeln, modeln, invC); /* SS (grad_A) = invS * (S-C) */ MatrixMulti(invC, modeln, modeln, invC, modeln, modeln, C0); /* C0 = SS %*% SS */ *f = 0.5 * MatrixTrace(C0, modeln, modeln); delete[] grad_P; delete[] invA; delete[] C0; delete[] invC; delete[] ImA; return; } // // this function will compute GLS. void msem_objectiveGLS(int n, const double x[], double *f, double *g, double *h, double *A, double *P, double *C, double *ff, msem_function_info *m_state) { R_CheckUserInterrupt(); msem_model_info *m_model = m_state->model; function_info *state = new function_info; //(function_info *)R_alloc(1, sizeof(function_info)); state->have_gradient = m_state->have_gradient; state->have_hessian = m_state->have_hessian; int G = m_model->G; int i; int indAP=0, indC=0; *f = 0.0; if(state->have_gradient) memset(g, 0, n*sizeof(double)); int sumN=0; double *grad = new double[n]; int maxmn = 0; for(i = 0;i < G; ++i) { sumN += INTEGER(AS_INTEGER(m_model->N))[i]; maxmn = (m_model->gmodel[i].n > m_model->gmodel[i].m ? m_model->gmodel[i].n:m_model->gmodel[i].m); } double *C0 = new double[maxmn*maxmn]; for(i = 0; i < G; ++i) { state->model = &m_model->gmodel[i]; memset(grad, 0, n*sizeof(double)); memset(C0, 0, maxmn*maxmn*sizeof(double)); objectiveGLS(n, x, &ff[i], grad, h, &A[indAP], &P[indAP], C0, state); Memcpy(&C[indC], C0, state->model->n*state->model->n); indAP += state->model->m*state->model->m; //update the index for A, P, C indC += state->model->n*state->model->n; *f += (state->model->N-(1-state->model->raw))*ff[i]; if(state->have_gradient) { double alpha = (state->model->N-(1-state->model->raw))/(sumN-(1.0-state->model->raw)*G); int incx = 1; //F77_NAME(daxpy)(const int *n, const double *alpha, // const double *dx, const int *incx, // double *dy, const int *incy); y = ax+y F77_CALL(daxpy)(&n,&alpha, grad,&incx, g, &incx FCONE FCONE); //grad.all = grad.all+((N[g]-!raw)/(sum(N)-(!raw)*G))*grad } } *f = *f/(sumN-(1-m_model->raw)*G); // Rprintf("Number of Evaluations: %d [%f]\n", m_state->n_eval, *f); delete[] C0; delete[] grad; delete state; return; } // this function will deal with missing data. void objectiveFIML(int n, const double x[], double *f, double *g, double *h, double *A, double *P, double *C, function_info *state) { R_CheckUserInterrupt(); model_info *model = state->model; int m = model->m; int modeln = model->n; int maxmn = (m > modeln ? m : modeln); double *ImA = new double[m*m]; generate_AP(n, x, A, P, ImA, model); if(SEM_DEBUG) { printMatrix(A, m, m, "Matrix A", 1); printMatrix(ImA, m, m, "Matrix (I-A)", 1); printMatrix(P, m, m, "Matrix P", 1); } double *invA; double *C0; //for matrix multiplication. double *JAinv; C0 = new double[maxmn*maxmn]; //After the compuation, C is n-by-n. JAinv = new double[modeln*m]; //for gradient.n-by-m memset(C, 0, maxmn*maxmn*sizeof(double)); memset(C0, 0, maxmn*maxmn*sizeof(double)); ///Please be careful that R uses column-wise to store matrix. invA = new double[m*m]; Memcpy(invA, ImA, m*m); MatrixInverse(invA, m); if(SEM_DEBUG) printMatrix(invA, m, m, "invA", 1); if(SEM_DEBUG) { MatrixMulti(ImA, m, m, invA, m, m, C0); printMatrix(C0, m, m, "A %*% invA", 1); } MatrixMulti(REAL(model->J), nrows(model->J), ncols(model->J), invA, m, m, C0); Memcpy(JAinv, C0, modeln*m); // for gradient if(SEM_DEBUG) printMatrix(C0, modeln, m, "J %*% I.Ainv", 1); MatrixMulti(C0, modeln, m, P, m, m, C); if(SEM_DEBUG) printMatrix(C, modeln, m, "J %*% I.Ainv %*% P", 1); MatrixMultiTransB(C, modeln, m, invA, m, m, C0); if(SEM_DEBUG) printMatrix(C0, modeln, m, "J %*% I.Ainv %*% P %*% t(I.Ainv)", 1); MatrixMultiTransB(C0, modeln, m, REAL(model->J), modeln, m, C); if(SEM_DEBUG) printMatrix(C, modeln, modeln, "J %*% I.Ainv %*% P %*% t(I, Ainv) %*% t(J)", 1); *f = 0.0; int Npatterns = nrows(model->valid_data_patterns); // number of patterns, each row represents one pattern of the missing data. int Npattern_number = length(model->pattern_number); // equal to nrows(model->data) int *pattern_number = new int[Npattern_number]; double *dfdC = new double[modeln*modeln]; //for gradient df/dC memset(dfdC, 0, modeln*modeln*sizeof(double)); for(int i = 0; i < Npatterns; ++i) { int *sel = SubMatrixRow(model->valid_data_patterns, Npatterns, ncols(model->valid_data_patterns), i); if(SEM_DEBUG) printMatrix(sel, 1, ncols(model->valid_data_patterns), "sel", 0); Memcpy(pattern_number, INTEGER(AS_INTEGER(model->pattern_number)), Npattern_number); if(SEM_DEBUG) printMatrix(pattern_number, 1, Npattern_number, "pattern.number", 0); for(int j = 0; j < Npattern_number; ++j) { pattern_number[j] = (pattern_number[j]==(i+1) ? 1 : 0); } if(SEM_DEBUG) printMatrix(pattern_number, 1, Npattern_number, "pattern.number", 0); int row_subX, col_subX; double *X = SubMatrix(REAL(AS_NUMERIC(model->data)), pattern_number, sel, Npattern_number, ncols(model->data), row_subX, col_subX); if(SEM_DEBUG) printMatrix(X, row_subX, col_subX, "X", 1); int row_subC, col_subC; double *subC = SubMatrix(C, sel, sel, modeln, modeln, row_subC, col_subC); //row_subC should be equal to col_subC; if(SEM_DEBUG) printMatrix(subC, row_subC, col_subC, "C[sel, sel]", 1); double detC = MatrixDeterminant(subC, row_subC, col_subC); *f += row_subX * (log2Pi + log(detC)); MatrixInverse(subC, row_subC); // now subC is the inverse of C[sel, sel] if(SEM_DEBUG) printMatrix(subC, row_subC, col_subC, "Inverse(C[sel, sel])", 1); double *CC = new double[row_subX*col_subC]; double *CCX = new double[max(row_subX*max(col_subX, row_subX), row_subC*max(row_subC, col_subC))]; MatrixMulti(X, row_subX, col_subX, subC, row_subC, col_subC, CC); if(SEM_DEBUG) printMatrix(CC, row_subX, col_subC, "X %*% inv(C[sel, sel])", 1); MatrixMultiTransB(CC, row_subX, col_subC, X, row_subX, col_subX, CCX); if(SEM_DEBUG) printMatrix(CCX, row_subX, row_subX, "X %*% inv(C[sel, sel]) %*% X^T", 1); *f += MatrixTrace(CCX, row_subX, row_subX); // gradient if(state->have_gradient) { int ndfdCi = row_subC*col_subC; double *dfdCi = new double[ndfdCi]; memset(dfdCi, 0, ndfdCi*sizeof(double)); double *subC_T = new double[ndfdCi]; //transpose (C[sel, sel])^(-T) Memcpy(subC_T, subC, ndfdCi); MatrixTranspose(subC_T, col_subC, row_subC); if(SEM_DEBUG) printMatrix(subC_T, col_subC, row_subC, "(inv(C[sel, sel]))^T", 1); int incx = 1; //F77_NAME(daxpy)(const int *n, const double *alpha, // const double *dx, const int *incx, // double *dy, const int *incy); y = ax+y double alpha = static_cast(row_subX); F77_CALL(daxpy)(&ndfdCi,&alpha, subC_T,&incx, dfdCi, &incx FCONE FCONE); if(SEM_DEBUG) printMatrix(dfdCi, 1, ndfdCi, "nrow(X)*t(vec(t(inv(C[sel, sel]))))", 1); int nX = row_subX*col_subX; double *X_T = new double[nX]; Memcpy(X_T, X, nX); MatrixTranspose(X_T, col_subX, row_subX); // X: the column-wise if(SEM_DEBUG) printMatrix(X_T, col_subX, row_subX, "X^T", 1); double *IsubC = new double[ndfdCi]; //identity matrix memset(IsubC, 0, ndfdCi*sizeof(double)); for(int j = 0; j < row_subC; ++j) IsubC[j*col_subC + j] = 1.0; // in fact, row_subC = col_subC. double *XkronI = Kronecker(X, row_subX, col_subX, IsubC, row_subC, col_subC); if(SEM_DEBUG) printMatrix(XkronI, row_subX*row_subC, col_subX*col_subC, "X %x% diag(nrow(Cinv))", 1); MatrixMulti(X_T, 1, nX, XkronI, row_subX*row_subC, col_subX*col_subC, CCX); if(SEM_DEBUG) printMatrix(CCX, 1, ndfdCi, "t(vec(t(X))) %*% (X %x% diag(nrow(Cinv)))", 1); double *CKronC = Kronecker(subC_T, col_subC, row_subC, subC, row_subC, col_subC ); if(SEM_DEBUG) printMatrix(CKronC, col_subC*row_subC, row_subC*col_subC, "t(Cinv) %x% Cinv", 1); MatrixMulti(CCX, 1, ndfdCi, CKronC, ndfdCi, ndfdCi, IsubC); if(SEM_DEBUG) printMatrix(IsubC, 1, ndfdCi, "t(vec(t(X))) %*% (X %x% diag(nrow(Cinv))) %*% t(Cinv) %x% Cinv", 1); alpha = -1.0; F77_CALL(daxpy)(&ndfdCi,&alpha, IsubC, &incx, dfdCi, &incx FCONE FCONE); // if(SEM_DEBUG) printMatrix(dfdCi, 1, ndfdCi, "dfdCi", 1); double *dfdCiExtend = ExtendMatrix(dfdCi, row_subC, col_subC, sel, ncols(model->valid_data_patterns)); //modeln if(SEM_DEBUG) printMatrix(dfdCiExtend, modeln, modeln, "dfdCiExtend", 1); alpha = 1.0/static_cast(model->N); int nn = modeln*modeln; F77_CALL(daxpy)(&nn,&alpha, dfdCiExtend, &incx, dfdC, &incx FCONE FCONE); // if(SEM_DEBUG) printMatrix(dfdC, 1, nn, "dfdC = dfdC + dfdCi", 1); delete[] dfdCi; delete[] subC_T; delete[] X_T; delete[] IsubC; delete[] XkronI; delete[] CKronC; delete[] dfdCiExtend; } delete[] sel; delete[] X; delete[] subC; delete[] CC; delete[] CCX; } *f /= static_cast(model->N); if(state->have_gradient) { double *dCdP = Kronecker(JAinv, modeln, m, JAinv, modeln, m); if(SEM_DEBUG) printMatrix(dCdP, modeln*modeln, m*m, "dCdP = (J %*% I.Ainv) %x% (J %*% I.Ainv)", 1); memset(ImA, 0, m*m*sizeof(double)); for(int i=0; i < m; ++i) ImA[i*m+i] = 1.0; double *B = Kronecker(ImA, m, m, REAL(model->J), modeln, m); if(SEM_DEBUG) printMatrix(B, modeln*m, m*m, "(diag(nrow(A)) %x% J)", 1); double *tinvA = new double[m*m]; Memcpy(tinvA, invA, m*m); MatrixTranspose(tinvA, m, m); double *kinvA = Kronecker(tinvA, m, m, invA, m, m); if(SEM_DEBUG) printMatrix(kinvA, m*m, m*m, "(t(I.Ainv) %x% I.Ainv)", 1); double *dBdA = new double[modeln*m*m*m]; MatrixMulti(B, m*modeln, m*m, kinvA, m*m, m*m, dBdA); if(SEM_DEBUG) printMatrix(dBdA, m*modeln, m*m, "(diag(nrow(A)) %x% J) %*% (t(I.Ainv) %x% I.Ainv)", 1); double *Tmn = CommutationMatrix(m, modeln); //column-wise if(SEM_DEBUG) printMatrix(Tmn, modeln*m, modeln*m, "Tmn", 1); //zhenghua, check double *dCdA1 = new double[modeln*m]; MatrixMultiTransB(JAinv, modeln, m, P, m, m, dCdA1); if(SEM_DEBUG) printMatrix(dCdA1, modeln, m, "(B %*% t(P))", 1); double *ImB = new double[modeln*modeln]; memset(ImB, 0, modeln*modeln*sizeof(double)); for(int i=0; icorrect), m, m); if(SEM_DEBUG) printMatrix(vcorr, 1, m*m, "correct", 1); double *dfdP = new double[m*m]; int mm = m*m; incx = 1; sempdot(&mm, vcorr, &incx, dfdP0, &incx, dfdP); if(SEM_DEBUG) printMatrix(dfdP, 1, m*m, "dfdP", 1); double *dfdA = new double[m*m]; MatrixMulti(dfdC, 1, modeln*modeln, dCdA, modeln*modeln, m*m, dfdA); if(SEM_DEBUG) printMatrix(dfdA, 1, m*m, "dfdA", 1); //The following code will produce gradients based on grad_A and grad_P. //This is the implentation of "tapply" in R. double *A_grad, *P_grad; int nA; int nP; A_grad = new double[n]; P_grad = new double[n]; memset(A_grad, 0, n*sizeof(double)); memset(P_grad, 0, n*sizeof(double)); double *grad_Au, *grad_Pu; nA = length(model->arrows_1_free)/2; nP = length(model->arrows_2_free)/2; grad_Au = new double[nA]; grad_Pu = new double[nP]; int *tA = new int[max(max(nA*2, nP*2), max(length(model->unique_free_1), length(model->unique_free_2)))]; Memcpy(tA, INTEGER(AS_INTEGER(model->arrows_1_free)), length(model->arrows_1_free)); for(int i=0;iarrows_1_seq[i]-1] += grad_Au[i]; } Memcpy(tA, INTEGER(AS_INTEGER(model->arrows_2_free)), nP*2); for(int i=0;iarrows_2_seq[i]-1] += grad_Pu[i]; } nA=length(model->unique_free_1); Memcpy(tA, INTEGER(AS_INTEGER(model->unique_free_1)), nA); for(int i=0;iunique_free_2); Memcpy(tA, INTEGER(AS_INTEGER(model->unique_free_2)), nP); for(int i=0;imodel->semObject; int ncolData = ncols(semObject->data); double *C0 = new double[ncolData*ncolData]; memset(C0, 0, ncolData*ncolData*sizeof(double)); int posn_intercept = semObject->posn_intercept - 1; // fortran to C C0[posn_intercept*n+posn_intercept] = 1.0; int ncolTri = ncols(semObject->tri); int nrowTri = nrows(semObject->tri); int *tri = new int[length(semObject->tri)]; Memcpy(tri, INTEGER(AS_INTEGER(semObject->tri)), length(semObject->tri)); //printMatrix(tri, nrowTri, ncolTri, "tri", 1); for(int i = 0; i < ncolTri; ++i) { for(int j = 0; j < nrowTri; ++j) if(tri[i*nrowTri + j]) C0[i*nrowTri + j] = *x++; else C0[i*nrowTri + j] = C0[j*nrowTri + i]; } // C <- C + t(C) - diag(diag(C)) //printMatrix(C0, ncolData, ncolData, "C", 1); *f = 0.0; int Npatterns = nrows(semObject->valid_data_patterns); // number of patterns, each row represents one pattern of the missing data. int Npattern_number = length(semObject->pattern_number); // equal to nrows(model->data) int *pattern_number = new int[Npattern_number]; // double *dfdC = new double[ncolTri*nrowTri]; //for gradient df/dC // memset(dfdC, 0, ncolTri*nrowTri*sizeof(double)); for(int i = 0; i < Npatterns; ++i) { int *sel = SubMatrixRow(semObject->valid_data_patterns, Npatterns, ncols(semObject->valid_data_patterns), i); if(SEM_DEBUG) printMatrix(sel, 1, ncols(semObject->valid_data_patterns), "sel", 0); Memcpy(pattern_number, INTEGER(AS_INTEGER(semObject->pattern_number)), Npattern_number); if(SEM_DEBUG) printMatrix(pattern_number, 1, Npattern_number, "pattern.number", 0); for(int j = 0; j < Npattern_number; ++j) { pattern_number[j] = (pattern_number[j]==(i+1) ? 1 : 0); } if(SEM_DEBUG) printMatrix(pattern_number, 1, Npattern_number, "pattern.number", 0); int row_subX, col_subX; double *X = SubMatrix(REAL(AS_NUMERIC(semObject->data)), pattern_number, sel, Npattern_number, ncols(semObject->data), row_subX, col_subX); if(SEM_DEBUG) printMatrix(X, row_subX, col_subX, "X", 1); int row_subC, col_subC; double *subC = SubMatrix(C0, sel, sel, nrowTri, nrowTri, row_subC, col_subC); //row_subC should be equal to col_subC; if(SEM_DEBUG) printMatrix(subC, row_subC, col_subC, "C[sel, sel]", 1); double detC = MatrixDeterminant(subC, row_subC, col_subC); *f += row_subX * (log2Pi + log(detC)); MatrixInverse(subC, row_subC); // now subC is the inverse of C[sel, sel] if(SEM_DEBUG) printMatrix(subC, row_subC, col_subC, "Inverse(C[sel, sel])", 1); double *CC = new double[row_subX*col_subC]; double *CCX = new double[max(row_subX*max(col_subX, row_subX), row_subC*max(row_subC, col_subC))]; MatrixMulti(X, row_subX, col_subX, subC, row_subC, col_subC, CC); if(SEM_DEBUG) printMatrix(CC, row_subX, col_subC, "X %*% inv(C[sel, sel])", 1); MatrixMultiTransB(CC, row_subX, col_subC, X, row_subX, col_subX, CCX); if(SEM_DEBUG) printMatrix(CCX, row_subX, row_subX, "X %*% inv(C[sel, sel]) %*% X^T", 1); *f += MatrixTrace(CCX, row_subX, row_subX); delete[] sel; delete[] X; delete[] subC; delete[] CC; delete[] CCX; } delete[] pattern_number; delete[] C0; delete[] tri; return; } // // this function will compute GLS. void msem_objectiveFIML(int n, const double x[], double *f, double *g, double *h, double *A, double *P, double *C, double *ff, msem_function_info *m_state) { R_CheckUserInterrupt(); msem_model_info *m_model = m_state->model; function_info *state = new function_info; //(function_info *)R_alloc(1, sizeof(function_info)); state->have_gradient = m_state->have_gradient; state->have_hessian = m_state->have_hessian; int G = m_model->G; int i; int indAP=0, indC=0; *f = 0.0; if(state->have_gradient) memset(g, 0, n*sizeof(double)); int sumN=0; double *grad = new double[n]; int maxmn = 0; for(i = 0;i < G; ++i) { sumN += INTEGER(AS_INTEGER(m_model->N))[i]; maxmn = (m_model->gmodel[i].n > m_model->gmodel[i].m ? m_model->gmodel[i].n:m_model->gmodel[i].m); } double *C0 = new double[maxmn*maxmn]; for(i = 0; i < G; ++i) { state->model = &m_model->gmodel[i]; memset(grad, 0, n*sizeof(double)); memset(C0, 0, maxmn*maxmn*sizeof(double)); objectiveFIML(n, x, &ff[i], grad, h, &A[indAP], &P[indAP], C0, state); Memcpy(&C[indC], C0, state->model->n*state->model->n); indAP += state->model->m*state->model->m; //update the index for A, P, C indC += state->model->n*state->model->n; *f += (state->model->N-(1-state->model->raw))*ff[i]; if(state->have_gradient) { double alpha = (state->model->N-(1-state->model->raw))/(sumN-(1.0-state->model->raw)*G); int incx = 1; //F77_NAME(daxpy)(const int *n, const double *alpha, // const double *dx, const int *incx, // double *dy, const int *incy); y = ax+y F77_CALL(daxpy)(&n,&alpha, grad,&incx, g, &incx FCONE FCONE); //grad.all = grad.all+((N[g]-!raw)/(sum(N)-(!raw)*G))*grad } } *f = *f/(sumN-(1-m_model->raw)*G); // Rprintf("Number of Evaluations: %d [%f]\n", m_state->n_eval, *f); delete[] C0; delete[] grad; delete state; return; } /* "start" = start, "opts" = opts, "S" = model$S, "logdetS" = model$logdetS, "invS" = model$invS, "N" = model$N, "m" = model$m, "n" = model$n, "t" = model$t, "fixed" = model$fixed, "ram" = model$ram, "sel.free" = model$sel.free, "arrows.1" = model$arrows.1, "arrows.1.free" = model$arrows.1.free, "one.head" = model$one.head, "arrows.2t" = model$arrows.2t, "arrows.2" = model$arrows.2, "arrows.2.free" = model$arrows.2.free, "unique.free.1" = model$unique.free.1, "unique.free.2" = model$unique.free.2, "J" = model$J, "correct" = model$correct, "param.names" = model$param.names, "var.names" = model$var.names, "one.free" = model$one.free, "two.free" = model$two.free, "raw" = model$raw) * */ // //column_wise: 1 SEXP csemSolve( SEXP args ) { R_CheckUserInterrupt(); theenv = getListElement(args, "csem.environment"); thefun = getListElement(args, "print.f"); // for print a Real vector csem_NaN = std::numeric_limits::quiet_NaN(); SEXP solution; int num_prot = 1; PROTECT(solution=args); //showArgs1(args); // Define objective functions and their properties. const int num_objs = 5; const string name_objs[num_objs] = { "objectiveML", "objectiveGLS", "objectiveFIML", "objectivelogLik", "test_objective" }; const myfcn_p objectiveFun[num_objs]= { (myfcn_p) objectiveML, //objective, gradient (iagflg[0]=1), no hessian (iahflg[0]=0) (myfcn_p) objectiveGLS, //objective, no gradient (iagflg[1]=0), no hessian (iahflg[1]=0) (myfcn_p) objectiveFIML, //objective, gradient (iagflg[2]=1), no hessian (iahflg[1]=0) (myfcn_p) objectivelogLik, //objective, no gradient (iaglfg[1]=0i), no hessian (iahflg[1]=0) (myfcn_p) test_objective //objective, gradient, hessian }; const int iagflg[num_objs]={1, 0, 1, 0, 1}; //gradients const int iahflg[num_objs]={0, 0, 0, 0, 1}; //hessian int gradient = 1; //0: we don't use gradient to optimize. int obj_ind = 0; //default objective function. SEXP st; st= getListElement(args, "objective"); for (int i = 0; i < num_objs; ++i) { if(name_objs[i].compare(CHAR(STRING_ELT(st, 0))) == 0) { obj_ind = i; break; } } if(SEM_DEBUG) printSEXP(st, "Objective Function"); st = getListElement(args, "gradient"); gradient = INTEGER(st)[0]; int optimize; //0: only compute the objective function, gradients and Hessian if it is provided. st = getListElement(args, "opt.flg"); optimize = INTEGER(st)[0]; model_info *model = NULL; model = new model_info; if(obj_ind < 3 ) { // model st = getListElement(args, "logdetS"); model->logdetS = REAL(st)[0]; st = getListElement(args, "N"); model->N = INTEGER(st)[0]; st = getListElement(args, "t"); model->t = INTEGER(st)[0]; st = getListElement(args, "n"); model->n = INTEGER(st)[0]; st = getListElement(args, "m"); model->m = INTEGER(st)[0]; PROTECT(model->ram = getListElement(args, "ram")); PROTECT(model->sel_free = getListElement(args, "sel.free")); PROTECT(model->arrows_1 = getListElement(args, "arrows.1")); PROTECT(model->arrows_1_free = getListElement(args, "arrows.1.free")); PROTECT(model->one_head = getListElement(args, "one.head")); PROTECT(model->arrows_2t = getListElement(args, "arrows.2t")); PROTECT(model->arrows_2 = getListElement(args, "arrows.2")); PROTECT(model->arrows_2_free = getListElement(args, "arrows.2.free")); PROTECT(model->unique_free_1 = getListElement(args, "unique.free.1")); PROTECT(model->unique_free_2 = getListElement(args, "unique.free.2")); PROTECT(model->param_names = getListElement(args, "param.names")); PROTECT(model->var_names = getListElement(args, "var.names")); PROTECT(model->one_free = getListElement(args, "one.free")); PROTECT(model->two_free = getListElement(args, "two.free")); PROTECT(model->S = getListElement(args, "S")); PROTECT(model->invS = getListElement(args, "invS")); PROTECT(model->fixed = getListElement(args, "fixed")); PROTECT(model->J = getListElement(args, "J")); PROTECT(model->correct = getListElement(args, "correct")); num_prot += 19; PROTECT(model->data = getListElement(args, "data")); PROTECT(model->pattern_number = getListElement(args, "pattern.number")); PROTECT(model->valid_data_patterns = getListElement(args, "valid.data.patterns")); num_prot += 3; st = getListElement(args, "raw"); model->raw = INTEGER(st)[0]; st = getListElement(args, "arrows.1.seq"); model->arrows_1_seq = (int *)R_alloc(length(st), sizeof(int)); Memcpy(model->arrows_1_seq, INTEGER(AS_INTEGER(st)), length(st)); st = getListElement(args, "arrows.2.seq"); model->arrows_2_seq = (int *)R_alloc(length(st), sizeof(int)); Memcpy(model->arrows_2_seq, INTEGER(AS_INTEGER(st)), length(st)); //Print if debug if(SEM_DEBUG){ printSEXP(model->S, "\nMatrix S"); printSEXP(model->invS, "\nMatrix invS"); printSEXP(model->J, "\nMatrix J"); printSEXP(model->fixed, "\nVector fixed"); printSEXP(model->correct, "\nMatrix correct"); } //initial values x0 and typsize double *x0 = new double[model->t]; double *typsiz = new double[model->t]; //typsiz = (double *)R_alloc(model->t, sizeof(double)); int ind; double sum = 0.0; SEXP sx0 = getListElement(args, "start"); if(LENGTH(sx0) != model->t) error(("The number of variables are not consistent!\n")); SEXP stypsiz = getListElement(args, "typsize"); Memcpy(typsiz, REAL(AS_NUMERIC(stypsiz)), model->t); //x0 = (double *)R_alloc(model->t, sizeof(double)); for(ind=0;ind t;++ind){ R_CheckUserInterrupt(); x0[ind]=REAL(sx0)[ind]; sum += (x0[ind]/typsiz[ind])*(x0[ind]/typsiz[ind]); } //options for optimization double stepmax=1000*sqrt(sum); stepmax = stepmax > 1000.0? stepmax: 1000.0; int hessian; double fscale; double steptol; double gradtol; int iterlim; int ndigit; int print_level; int check_analyticals; int msg=0; int msg_print[]={8, 0, 16}; SEXP opts; PROTECT(opts = getListElement(args, "options")); setApplicationOptions(hessian, fscale, steptol, stepmax, iterlim, ndigit, print_level, check_analyticals,gradtol, opts ); UNPROTECT(1); if(SEM_DEBUG) { Rprintf("hessian: [%d]\n", hessian); Rprintf("iterlim: [%d]\n", iterlim); Rprintf("ndigit: [%d]\n", ndigit); Rprintf("print.level: [%d]\n", print_level); Rprintf("check.analyticals: [%d]\n", check_analyticals); Rprintf("fscale: [%f]\n", fscale); Rprintf("steptol: [%f]\n", steptol); Rprintf("stepmax: [%f]\n", stepmax); Rprintf("gradtol: [%f]\n", gradtol); } msg = 1+msg_print[print_level]; if (check_analyticals==0) msg += 2 + 4; solution = csemnlm(x0, model->t, iagflg[obj_ind]&gradient, iahflg[obj_ind], hessian, typsiz, fscale, msg, ndigit, gradtol, stepmax, steptol, iterlim, model, (myfcn_p) objectiveFun[obj_ind], optimize); delete[] x0; delete[] typsiz; } else { //for matrix A, P, C model->m = 1; model->n = 1; sem_object *semObject = new sem_object; st = getListElement(args, "t"); semObject->t = INTEGER(st)[0]; st = getListElement(args, "posn.intercept"); semObject->posn_intercept = INTEGER(st)[0]; PROTECT(semObject->data = getListElement(args, "data")); PROTECT(semObject->pattern_number = getListElement(args, "pattern.number")); PROTECT(semObject->valid_data_patterns = getListElement(args, "valid.data.patterns")); PROTECT(semObject->tri = getListElement(args, "tri")); num_prot += 4; model->semObject = semObject; //printSEXP(semObject->valid_data_patterns, "valid.data.patterns"); //printSEXP(semObject->pattern_number, "pattern.number"); //initial values x0 and typsize double *x0 = new double[semObject->t]; double *typsiz = new double[semObject->t]; int ind; double sum = 0.0; SEXP sx0 = getListElement(args, "start"); if(LENGTH(sx0) != semObject->t) error(("The number of variables are not consistent!\n")); SEXP stypsiz = getListElement(args, "typsize"); Memcpy(typsiz, REAL(AS_NUMERIC(stypsiz)), semObject->t); for(ind=0;ind t;++ind){ R_CheckUserInterrupt(); x0[ind]=REAL(sx0)[ind]; sum += (x0[ind]/typsiz[ind])*(x0[ind]/typsiz[ind]); } //options for optimization double stepmax=1000*sqrt(sum); stepmax = stepmax > 1000.0? stepmax: 1000.0; int hessian; double fscale; double steptol; double gradtol; int iterlim; int ndigit; int print_level; int check_analyticals; int msg=0; int msg_print[]={8, 0, 16}; SEXP opts; PROTECT(opts = getListElement(args, "options")); setApplicationOptions(hessian, fscale, steptol, stepmax, iterlim, ndigit, print_level, check_analyticals,gradtol, opts ); UNPROTECT(1); msg = 1+msg_print[print_level]; if (check_analyticals==0) msg += 2 + 4; solution = csemnlm(x0, semObject->t, iagflg[obj_ind]&gradient, iahflg[obj_ind], hessian, typsiz, fscale, msg, ndigit, gradtol, stepmax, steptol, iterlim, model, (myfcn_p) objectiveFun[obj_ind], optimize); delete semObject; delete[] x0; delete[] typsiz; } UNPROTECT(num_prot); delete model; return(solution); } SEXP cmsemSolve( SEXP args ) { R_CheckUserInterrupt(); theenv = getListElement(args, "csem.environment"); thefun = getListElement(args, "print.f"); // for print a Real vector csem_NaN = std::numeric_limits::quiet_NaN(); SEXP solution; int num_prot = 1; PROTECT(solution=args); // showArgs1(args); // Define objective functions and their properties. const int num_objs = 4; const string name_objs[num_objs] = { "objectiveML", "objectiveGLS", "objectiveFIML", "test_objective" }; const msem_fcn_p objectiveFun[num_objs]= { (msem_fcn_p) msem_objectiveML, //objective, gradient (iagflg[0]=1), no hessian (iahflg[0]=0) (msem_fcn_p) msem_objectiveGLS, //objective, no gradient (iagflg[1]=0), no hessian (iahflg[1]=0) (msem_fcn_p) msem_objectiveFIML, //objective, no gradient (iagflg[1]=0), no hessian (iahflg[1]=0) (msem_fcn_p) msem_test_objective //objective, gradient, hessian }; const int iagflg[num_objs]={1, 0, 0, 1}; //gradients const int iahflg[num_objs]={0, 0, 0, 1}; //hessian int gradient=1; int obj_ind = 0; //default objective function. SEXP st; st= getListElement(args, "objective"); for (int i = 0; i < num_objs; ++i) { if(name_objs[i].compare(CHAR(STRING_ELT(st, 0))) == 0) { obj_ind = i; break; } } if(SEM_DEBUG) printSEXP(st, "Objective Function"); st = getListElement(args, "gradient"); gradient = INTEGER(st)[0]; int optimize; //0: only compute the objective function, gradients and Hessian if it is provided. st = getListElement(args, "opt.flg"); optimize = INTEGER(st)[0]; // model msem_model_info *model; model = new msem_model_info; st = getListElement(args, "G"); model->G = INTEGER(AS_INTEGER(st))[0]; PROTECT(model->logdetS = getListElement(args, "logdetS")); PROTECT(model->N = getListElement(args, "N")); st = getListElement(args, "t"); model->t = INTEGER(st)[0]; PROTECT(model->n = getListElement(args, "n")); PROTECT(model->m = getListElement(args, "m")); PROTECT(model->ram = getListElement(args, "ram")); PROTECT(model->sel_free = getListElement(args, "sel.free")); PROTECT(model->arrows_1 = getListElement(args, "arrows.1")); PROTECT(model->arrows_1_free = getListElement(args, "arrows.1.free")); PROTECT(model->one_head = getListElement(args, "one.head")); PROTECT(model->arrows_2t = getListElement(args, "arrows.2t")); PROTECT(model->arrows_2 = getListElement(args, "arrows.2")); PROTECT(model->arrows_2_free = getListElement(args, "arrows.2.free")); PROTECT(model->unique_free_1 = getListElement(args, "unique.free.1")); PROTECT(model->unique_free_2 = getListElement(args, "unique.free.2")); PROTECT(model->param_names = getListElement(args, "param.names")); PROTECT(model->var_names = getListElement(args, "var.names")); PROTECT(model->one_free = getListElement(args, "one.free")); PROTECT(model->two_free = getListElement(args, "two.free")); PROTECT(model->S = getListElement(args, "S")); PROTECT(model->invS = getListElement(args, "invS")); PROTECT(model->fixed = getListElement(args, "fixed")); PROTECT(model->J = getListElement(args, "J")); PROTECT(model->correct = getListElement(args, "correct")); num_prot += 23; if(obj_ind == 2) { // objectiveFIML PROTECT(model->data = getListElement(args, "data")); PROTECT(model->pattern_number = getListElement(args, "pattern.number")); PROTECT(model->valid_data_patterns = getListElement(args, "valid.data.patterns")); num_prot += 3; } st = getListElement(args, "raw"); model->raw = INTEGER(st)[0]; PROTECT(model->arrows_1_seq = getListElement(args, "arrows.1.seq")); PROTECT(model->arrows_2_seq = getListElement(args, "arrows.2.seq")); num_prot += 2; //produce pointer for each group's model. model->gmodel = new model_info[model->G]; for(int i = 0; i < model->G; ++i) { model_info *gmodel = &model->gmodel[i]; PROTECT(gmodel->S=getListElement(model->S, i)); gmodel->logdetS = REAL(AS_NUMERIC(model->logdetS))[i]; gmodel->N = INTEGER(AS_INTEGER(model->N))[i]; gmodel->m = INTEGER(AS_INTEGER(model->m))[i]; gmodel->n = INTEGER(AS_INTEGER(model->n))[i]; PROTECT(gmodel->fixed = getListElement(model->fixed, i)); PROTECT(gmodel->ram = getListElement(model->ram, i)); PROTECT(gmodel->sel_free = getListElement(model->sel_free, i)); gmodel->t = length(model->sel_free); PROTECT(gmodel->arrows_1 = getListElement(model->arrows_1, i)); PROTECT(gmodel->arrows_1_free = getListElement(model->arrows_1_free, i)); PROTECT(gmodel->one_head = getListElement(model->one_head, i)); PROTECT(gmodel->arrows_2t = getListElement(model->arrows_2t, i)); PROTECT(gmodel->arrows_2 = getListElement(model->arrows_2, i)); PROTECT(gmodel->arrows_2_free = getListElement(model->arrows_2_free, i)); PROTECT(gmodel->unique_free_1 = getListElement(model->unique_free_1, i)); PROTECT(gmodel->unique_free_2 = getListElement(model->unique_free_2, i)); PROTECT(gmodel->J = getListElement(model->J, i)); PROTECT(gmodel->correct = getListElement(model->correct, i)); st = getListElement(model->arrows_1_seq, i); gmodel->arrows_1_seq = (int *)R_alloc(length(st), sizeof(int)); Memcpy(gmodel->arrows_1_seq, INTEGER(AS_INTEGER(st)), length(st)); st = getListElement(model->arrows_2_seq, i); gmodel->arrows_2_seq = (int *)R_alloc(length(st), sizeof(int)); Memcpy(gmodel->arrows_2_seq, INTEGER(AS_INTEGER(st)), length(st)); gmodel->raw = model->raw; num_prot += 14; //inverse of S for GLS if(obj_ind == 1) //objectiveGLS { int nrow = nrows(gmodel->S); int ncol = ncols(gmodel->S); double *invS = new double[nrow*ncol]; Memcpy(invS, REAL(AS_NUMERIC(gmodel->S)), nrow*ncol); MatrixInverse(invS, nrow); PROTECT(gmodel->invS=generateMatrix(invS, nrow, ncol)); num_prot++; } // for objectiveFIML if(obj_ind == 2) { PROTECT(gmodel->data = getListElement(model->data, i)); PROTECT(gmodel->valid_data_patterns = getListElement(model->valid_data_patterns, i)); PROTECT(gmodel->pattern_number = getListElement(model->pattern_number, i)); num_prot += 3; } } //Print if debug if(SEM_DEBUG){ printSEXP(model->S, "\nMatrix S"); printSEXP(model->invS, "\nMatrix invS"); printSEXP(model->J, "\nMatrix J"); printSEXP(model->fixed, "\nVector fixed"); printSEXP(model->correct, "\nMatrix correct"); } //initial values x0 and typsize double *x0 = new double[model->t]; double *typsiz = new double[model->t]; //typsiz = (double *)R_alloc(model->t, sizeof(double)); int ind; double sum = 0.0; SEXP sx0 = getListElement(args, "start"); if(LENGTH(sx0) != model->t) error(("The number of variables are not consistent!\n")); SEXP stypsiz = getListElement(args, "typsize"); Memcpy(typsiz, REAL(AS_NUMERIC(stypsiz)), model->t); //x0 = (double *)R_alloc(model->t, sizeof(double)); for(ind=0;ind t;++ind){ R_CheckUserInterrupt(); x0[ind]=REAL(sx0)[ind]; sum += (x0[ind]/typsiz[ind])*(x0[ind]/typsiz[ind]); } //options for optimization double stepmax=1000*sqrt(sum); stepmax = stepmax > 1000.0? stepmax: 1000.0; int hessian; double fscale; double steptol; double gradtol; int iterlim; int ndigit; int print_level; int check_analyticals; int msg=0; int msg_print[]={8, 0, 16}; SEXP opts; PROTECT(opts = getListElement(args, "options")); setApplicationOptions(hessian, fscale, steptol, stepmax, iterlim, ndigit, print_level, check_analyticals,gradtol, opts ); UNPROTECT(1); if(SEM_DEBUG) { Rprintf("hessian: [%d]\n", hessian); Rprintf("iterlim: [%d]\n", iterlim); Rprintf("ndigit: [%d]\n", ndigit); Rprintf("print.level: [%d]\n", print_level); Rprintf("check.analyticals: [%d]\n", check_analyticals); Rprintf("fscale: [%f]\n", fscale); Rprintf("steptol: [%f]\n", steptol); Rprintf("stepmax: [%f]\n", stepmax); Rprintf("gradtol: [%f]\n", gradtol); } msg = 1+msg_print[print_level]; if (check_analyticals==0) msg += 2 + 4; solution = cmsemnlm(x0, model->t, iagflg[obj_ind]&gradient, iahflg[obj_ind], hessian, typsiz, fscale, msg, ndigit, gradtol, stepmax, steptol, iterlim, model, (msem_fcn_p) objectiveFun[obj_ind], optimize); UNPROTECT(num_prot); delete[] model->gmodel; delete model; delete[] x0; delete[] typsiz; return(solution); } } /* extern "C" */ sem/src/csemnlm.h0000644000175000017500000001567314126174210013600 0ustar nileshnilesh/* * ===================================================================================== * * Filename: csemnlm.h * * Description: csemnlm * * Version: 1.0 * Created: 27/12/2011 04:31:12 * Revision: none * Compiler: gcc * * Author: Zhenghua Nie (ZHN), zhenghua.nie@gmail.com * Company: McMaster University * * Copyright (C) 2011 Zhenghua Nie. All Rights Reserved. * This code is published under GNU GENERAL PUBLIC LICENSE. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed WITHOUT ANY WARRANTY. See the * GNU General Public License for more details. * * If you do not have a copy of the GNU General Public License, * write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * * * ===================================================================================== */ #ifndef __CSEMNLM_HPP__ #define __CSEMNLM_HPP__ #define USE_FC_LEN_T #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include /* for DBL_MAX */ #include #include #include /* for Memcpy */ #include #include #ifndef FCONE #define FCONE #endif #ifdef DEBUGSEM #define SEM_DEBUG 1 #else #define SEM_DEBUG 0 #endif // the structure for the solution of sem (object returned by sem) // currently, we onlu use it for calculating logLik for missing data (solved by objectiveFIML) typedef struct sem_solution_Info { SEXP data; SEXP pattern_number; SEXP valid_data_patterns; SEXP tri; int posn_intercept; int t; } sem_object; /* "data" = model$data, "pattern.number" = model$model.number, "valid.data.patterns"= model$valid.data.patterns, "S" = model$S, "logdetS" = model$logdetS, "invS" = model$invS, "N" = as.integer(model$N), "m" = as.integer(model$m), "n" = as.integer(model$n), "t" = as.integer(model$t), "fixed" = model$fixed, "ram" = model$ram, "sel.free" = model$sel.free, "arrows.1" = model$arrows.1, "arrows.1.free" = model$arrows.1.free, "one.head" = model$one.head, "arrows.2t" = model$arrows.2t, "arrows.2" = model$arrows.2, "arrows.2.free" = model$arrows.2.free, "unique.free.1" = model$unique.free.1, "unique.free.2" = model$unique.free.2, "J" = model$J, "correct" = model$correct, "param.names" = model$param.names, "var.names" = model$var.names, "one.free" = model$one.free, "two.free" = model$two.free, "raw" = model$raw) * */ typedef struct model_Info { SEXP data; SEXP pattern_number; SEXP valid_data_patterns; SEXP S; //n-by-n double logdetS; SEXP invS; //n-by-n int N; int m; int n; int t; SEXP fixed; //vector, t+m-n SEXP ram; SEXP sel_free; SEXP arrows_1; SEXP arrows_1_free; SEXP one_head; SEXP arrows_2t; SEXP arrows_2; SEXP arrows_2_free; SEXP unique_free_1; SEXP unique_free_2; SEXP J; //n-by-m SEXP correct; ///m-by-m SEXP param_names; SEXP var_names; SEXP one_free; SEXP two_free; int raw; int *arrows_1_seq; int *arrows_2_seq; sem_object *semObject; } model_info; typedef struct msem_model_Info { int G; //number of groups SEXP data; SEXP pattern_number; SEXP valid_data_patterns; SEXP S; //n-by-n SEXP logdetS; SEXP invS; //n-by-n SEXP N; SEXP m; SEXP n; int t; SEXP fixed; //vector, t+m-n SEXP ram; SEXP sel_free; SEXP arrows_1; SEXP arrows_1_free; SEXP one_head; SEXP arrows_2t; SEXP arrows_2; SEXP arrows_2_free; SEXP unique_free_1; SEXP unique_free_2; SEXP J; //n-by-m SEXP correct; ///m-by-m SEXP param_names; SEXP var_names; SEXP one_free; SEXP two_free; int raw; SEXP arrows_1_seq; SEXP arrows_2_seq; model_info *gmodel; //a pointer for each group's model. } msem_model_info; //this will define the protocol of our objective function typedef void (*myfcn_p)(int, const double *, double *, double *, double *, double *, double *, double *, void *); //typedef void (*myfcn_p)(int n, double *x, double *f, double *g, double *h, double *A, double *P, double *C, void *state); typedef void (*msem_fcn_p)(int, const double *, double *, double *, double *, double *, double *, double *, double *, void *); //typedef void (*msem_fcn_p)(int n, double *x, double *f, double *g, double *h, double *A, double *P, double *C, double *ff, void *state); #define FT_SIZE 3 /* default size of table to store computed function values */ typedef struct { double fval; double *x; double *grad; double *hess; double *C; double *A; double *P; } ftable; typedef struct { double fval; double *x; double *grad; double *hess; double *C; double *A; double *P; double *ff; } msem_ftable; typedef struct { int n_eval; /* the number of evaluations of the objective function. */ myfcn_p *myobjfun; int have_gradient; int have_hessian; /* int n; -* length of the parameter (x) vector */ int FT_size; /* size of table to store computed function values */ int FT_last; /* Newest entry in the table */ ftable *Ftable; model_info *model; /*including sem_object*/ } function_info; typedef struct { int n_eval; /* the number of evaluations of the objective function. */ msem_fcn_p *myobjfun; int have_gradient; int have_hessian; /* int n; -* length of the parameter (x) vector */ int FT_size; /* size of table to store computed function values */ int FT_last; /* Newest entry in the table */ msem_ftable *Ftable; msem_model_info *model; int sizeAP; int sizeC; } msem_function_info; #ifdef __cplusplus extern "C" { #endif void fdhess(int n, double *x, double fval, fcn_p fun, void *state, double *h, int nfd, double *step, double *f, int ndigit, double *typx); void optif9(int nr, int n, double *x, fcn_p fcn, fcn_p d1fcn, d2fcn_p d2fcn, void *state, double *typsiz, double fscale, int method, int iexp, int *msg, int ndigit, int itnlim, int iagflg, int iahflg, double dlt, double gradtl, double stepmx, double steptl, double *xpls, double *fpls, double *gpls, int *itrmcd, double *a, double *wrk, int *itncnt); SEXP csemnlm(double *x0, int n, int iagflg, int iahflg, int want_hessian, double *typsize, double fscale, int msg, int ndigit, double gradtl, double stepmx, double steptol, int itnlim, void *model, myfcn_p myobjfun, int optimize); SEXP cmsemnlm(double *x0, int n, int iagflg, int iahflg, int want_hessian, double *typsize, double fscale, int msg, int ndigit, double gradtl, double stepmx, double steptol, int itnlim, msem_model_info *model, msem_fcn_p myobjfun, int optimize); #ifdef __cplusplus } #endif #endif sem/src/csem.h0000644000175000017500000000346711703656246013103 0ustar nileshnilesh/* * ===================================================================================== * * Filename: csem.h * * Description: Header files for csem * * Version: 1.0 * Created: Tue 27 Dec 2011 00:36:32 EST * Revision: none * Compiler: gcc * * Author: Zhenghua Nie (ZHN), zhenghua.nie@gmail.com * Company: McMaster University * * Copyright (C) 2011 Zhenghua Nie. All Rights Reserved. * This code is published under GNU GENERAL PUBLIC LICENSE. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed WITHOUT ANY WARRANTY. See the * GNU General Public License for more details. * * If you do not have a copy of the GNU General Public License, * write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * ===================================================================================== */ #ifndef __CSEM_HPP__ #define __CSEM_HPP__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for DBL_MAX */ #include #include #include /* for Memcpy */ #include #include #include #include "csemnlm.h" #endif sem/src/Makevars0000755000175000017500000000055413076762626013501 0ustar nileshnilesh# last modified 2016-08-15 by J. Fox OBJECTS = csemnlm.o uncmin.o csem.o init.o #DEBUGING_FLAGS=-DDEBUGSEM # DEBUGSEM gives details # PKG_LIBS= -g -O2 -lm $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) # PKG_FLAGS = -arch x86_64 $(DEBUGING_FLAGS) PKG_LIBS = -lm $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) PKG_CPPFLAGS = $(DEBUGING_FLAGS) sem/src/uncmin.c0000644000175000017500000023271714126174210013426 0ustar nileshnilesh/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1997-2001 Saikat DebRoy and the * R Development Core Team * Copyright (C) 2003-2010 The R Foundation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * http://www.r-project.org/Licenses/ * USA */ /* ../appl/uncmin.f -- translated by f2c (version of 1 June 1993 23:00:00). -- and hand edited by Saikat DebRoy */ /*--- The Dennis + Schnabel Minimizer -- used by R's nlm() ---*/ #include #include #include /* DBL_MAX */ #include #include #include /* ddot, dnrm2, dtrsl, dscal */ #include #include "csemnlm.h" #include "utils.h" /* CC subroutines mvmlt[lsu] should be REPLACED by BLAS ones! * CC * CC--- choldc(nr,n,a,diagmx,tol,addmax) is ``choleski + tolerance'' * CC ------ * CC it should make use of BLAS routines as [linkpack's dpofa!] */ void fdhess(int n, double *x, double fval, fcn_p fun, void *state, double *h, int nfd, double *step, double *f, int ndigit, double *typx) { /* calculates a numerical approximation to the upper triangular * portion of the second derivative matrix (the hessian). * Algorithm A5.6.2 from Dennis and Schnabel (1983), numerical methods * for unconstrained optimization and nonlinear equations, * prentice-hall, 321-322. * programmed by richard h. jones, january 11, 1989 * INPUT to subroutine * n the number of parameters * x vector of parameter values * fval double precision value of function at x * fun a function provided by the user which must be declared as * external in the calling program. its call must * be of the call fun(n,x,state,fval) where fval is the * computed value of the function * state information other than x and n that fun requires. * state is not modified in fdhess (but can be modified by fun). * nfd first dimension of h in the calling program * OUTPUT from subroutine * h an n by n matrix of the approximate hessian * Work space : * step a real array of length n * f a double precision array of length n */ int i, j; double tempi, tempj, fii, eta, fij; eta = pow(10.0, -ndigit/3.0); for (i = 0; i < n; ++i) { step[i] = eta * fmax2(x[i], typx[i]); if (typx[i] < 0.) step[i] = -step[i]; tempi = x[i]; x[i] += step[i]; step[i] = x[i] - tempi; (*fun)(n, x, &f[i], state); x[i] = tempi; } for (i = 0; i < n; ++i) { tempi = x[i]; x[i] += step[i] * 2.; (*fun)(n, x, &fii, state); h[i + i * nfd] = (fval - f[i] + (fii - f[i]))/(step[i] * step[i]); x[i] = tempi + step[i]; for (j = i + 1; j < n; ++j) { tempj = x[j]; x[j] += step[j]; (*fun)(n, x, &fij, state); h[i + j * nfd] = (fval - f[i] + (fij - f[j]))/(step[i] * step[j]); x[j] = tempj; } x[i] = tempi; } } /* fdhess */ static void d1fcn_dum(int n, double *x, double *g, void *state) { /* dummy routine to prevent unsatisfied external diagnostic * when specific analytic gradient function not supplied. */ } static void d2fcn_dum(int nr, int n, double *x, double *h, void *state) { /* dummy routine to prevent unsatisfied external diagnostic * when specific analytic hessian function not supplied. */ } static void mvmltl(int nr, int n, double *a, double *x, double *y) { /* compute y = l x * where l is a lower triangular matrix stored in a * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * a(n,n) --> lower triangular (n*n) matrix * x(n) --> operand vector * y(n) <-- result vector * note * x and y cannot share storage */ int i, j; double sum; for (i = 0; i < n; ++i) { sum = 0.; for (j = 0; j <= i; ++j) sum += a[i + j * nr] * x[j]; y[i] = sum; } } /* mvmltl */ static void mvmltu(int nr, int n, double *a, double *x, double *y) { /* compute y = (L+) x * where L is a lower triangular matrix stored in a * (L-transpose (L+) is taken implicitly) * ARGUMENTS : * nr --> row dimension of matrix * n --> dimension of problem * a(nr,1) --> lower triangular (n*n) matrix * x(n) --> operand vector * y(n) <-- result vector * NOTE : x and y cannot share storage */ int i, length, one = 1; for (i = 0, length = n; i < n; --length, ++i) y[i] = F77_CALL(ddot)(&length, &a[i + i * nr], &one, &x[i], &one FCONE FCONE); } /* mvmltu */ static void mvmlts(int nr, int n, double *a, double *x, double *y) { /* compute y=ax * where "a" is a symmetric (n*n) matrix stored in its lower * triangular part and x,y are n-vectors * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * a(n,n) --> symmetric (n*n) matrix stored in * lower triangular part and diagonal * x(n) --> operand vector * y(n) <-- result vector * NOTE: x and y cannot share storage. */ int i, j; double sum; for (i = 0; i < n; ++i) { sum = 0.; for (j = 0; j <= i; ++j) { sum += a[i + j * nr] * x[j]; } for (j = i+1; j < n; ++j) { sum += a[j + i * nr] * x[j]; } y[i] = sum; } } /* mvmlts */ static void lltslv(int nr, int n, double *a, double *x, double *b) { /* solve ax=b where a has the form l(l-transpose) * but only the lower triangular part, l, is stored. * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * a(n,n) --> matrix of form l(l-transpose). * on return a is unchanged. * x(n) <-- solution vector * b(n) --> right-hand side vector * note * if b is not required by calling program, then * b and x may share the same storage. */ int job = 0, info; if( x != b) Memcpy(x, b, (size_t) n); F77_CALL(dtrsl)(a, &nr, &n, x, &job, &info FCONE FCONE); job = 10; F77_CALL(dtrsl)(a, &nr, &n, x, &job, &info FCONE FCONE); } /* lltslv */ static void choldc(int nr, int n, double *a, double diagmx, double tol, double *addmax) { /* Find the perturbed l(l-transpose) [written ll+] decomposition * of a+d, where d is a non-negative diagonal matrix added to a if * necessary to allow the cholesky decomposition to continue. * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * a(n,n) <--> on entry: matrix for which to find perturbed * cholesky decomposition * on exit: contains l of ll+ decomposition * in lower triangular part and diagonal of "a" * diagmx --> maximum diagonal element of "a" * tol --> tolerance * addmax <-- maximum amount implicitly added to diagonal of "a" * in forming the cholesky decomposition of a+d * internal variables * aminl smallest element allowed on diagonal of l * amnlsq =aminl**2 * offmax maximum off-diagonal element in column of a * description * the normal cholesky decomposition is performed. however, if at any * point the algorithm would attempt to set l(i,i)=sqrt(temp) * with temp < tol*diagmx, then l(i,i) is set to sqrt(tol*diagmx) * instead. this is equivalent to adding tol*diagmx-temp to a(i,i) */ double tmp1, tmp2; int i, j, k; double aminl, offmax, amnlsq; double sum; *addmax = 0.0; aminl = sqrt(diagmx * tol); amnlsq = aminl * aminl; /* form row i of l */ for (i = 0; i < n; ++i) { /* find diagonal elements of l */ sum = 0.; for (k = 0; k < i; ++k) sum += a[i + k * nr] * a[i + k * nr]; tmp1 = a[i + i * nr] - sum; if (tmp1 >= amnlsq) { a[i + i * nr] = sqrt(tmp1); } else { /* find maximum off-diagonal element in row */ offmax = 0.; for (j = 0; j < i; ++j) { if(offmax < (tmp2 = fabs(a[i + j * nr]))) offmax = tmp2; } if (offmax <= amnlsq) offmax = amnlsq; /* add to diagonal element to * allow cholesky decomposition to continue */ a[i + i * nr] = sqrt(offmax); if(*addmax < (tmp2 = offmax - tmp1)) *addmax = tmp2; } /* find i,j element of lower triangular matrix */ for (j = 0; j < i; ++j) { sum = 0.; for (k = 0; k < j; ++k) sum += a[i + k * nr] * a[j + k * nr]; a[i + j * nr] = (a[i + j * nr] - sum) / a[j + j * nr]; } } } /* choldc */ static void qraux1(int nr, int n, double *r, int i) { /* Interchange rows i,i+1 of the upper hessenberg matrix r, columns i to n . * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of matrix * r[n*n] <--> upper hessenberg matrix * i --> index of row to interchange (i < n-1) */ double tmp; double *r1, *r2; /* pointer arithmetic : */ r1 = r + i + i * nr; r2 = r1 + 1; while(n-- > i) { tmp = *r1; *r1 = *r2; *r2 = tmp; r1 += nr; r2 += nr; } } /* qraux1 */ static void qraux2(int nr, int n, double *r, int i, double a, double b) { /* Pre-multiply r by the jacobi rotation j(i,i+1,a,b) . * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of matrix * r(n,n) <--> upper hessenberg matrix * i --> index of row * a --> scalar * b --> scalar */ double c, s; double y, z, den; double *r1, *r2; den = hypot(a,b); c = a / den; s = b / den; /* pointer arithmetic : */ r1 = r + i + i*nr; r2 = r1 + 1; while(n-- > i) { y = *r1; z = *r2; *r1 = c * y - s * z; *r2 = s * y + c * z; r1 += nr; r2 += nr; } } /* qraux2 */ static void qrupdt(int nr, int n, double *a, double *u, double *v) { /* Find an orthogonal (n*n) matrix (q*) and an upper triangular (n*n) * matrix (r*) such that (q*)(r*)=r+u(v+) * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * a(n,n) <--> on input: contains r * on output: contains (r*) * u(n) --> vector * v(n) --> vector */ int i, j, k; double t1, t2; int ii; /* determine last non-zero in u(.) */ for(k = n-1; k > 0 && u[k] == 0.0; k--) ; /* (k-1) jacobi rotations transform * r + u(v+) --> (r*) + (u(1)*e1)(v+) * which is upper hessenberg */ if (k > 0) { ii = k; while(ii > 0) { i = ii - 1; if (u[i] == 0.0) { qraux1(nr, n, a, i); u[i] = u[ii]; } else { qraux2(nr, n, a, i, u[i], -u[ii]); u[i] = hypot(u[i], u[ii]); } ii = i; } } /* r <-- r + (u(1)*e1)(v+) */ for (j = 0; j < n; ++j) a[j * nr] += u[0] * v[j]; /* (k-1) jacobi rotations transform upper hessenberg r * to upper triangular (r*) */ for (i = 0; i < k; ++i) { if (a[i + i * nr] == 0.) qraux1(nr, n, a, i); else { t1 = a[i + i * nr]; t2 = -a[i + 1 + i * nr]; qraux2(nr, n, a, i, t1, t2); } } } /* qrupdt */ static void tregup(int nr, int n, double *x, double f, double *g, double *a, fcn_p fcn, void *state, double *sc, double *sx, Rboolean nwtake, double stepmx, double steptl, double *dlt, int *iretcd, double *xplsp, double *fplsp, double *xpls, double *fpls, Rboolean *mxtake, int method, double *udiag) { /* TRust REGion UPdating * == == == * Decide whether to accept xpls = x+sc as the next iterate and * update the trust region radius dlt. * Used iff method == 2 or 3 * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> old iterate x[k-1] * f --> function value at old iterate, f(x) * g(n) --> gradient at old iterate, g(x), or approximate * a(n,n) --> cholesky decomposition of hessian in * lower triangular part and diagonal. * hessian or approx in upper triangular part * fcn --> name of subroutine to evaluate function * state <--> information other than x and n that fcn requires. * state is not modified in tregup (but can be * modified by fcn). * sc(n) --> current step * sx(n) --> diagonal scaling matrix for x * nwtake --> boolean, = TRUE if newton step taken * stepmx --> maximum allowable step size * steptl --> relative step size at which successive iterates * considered close enough to terminate algorithm * dlt <--> trust region radius * iretcd <--> return code * =0 xpls accepted as next iterate; * dlt trust region for next iteration. * =1 xpls unsatisfactory but accepted as next iterate * because xpls-x < smallest allowable step length. * =2 f(xpls) too large. continue current iteration * with new reduced dlt. * =3 f(xpls) sufficiently small, but quadratic model * predicts f(xpls) sufficiently well to continue * current iteration with new doubled dlt. * xplsp(n) <--> workspace [value needs to be retained between * succesive calls of k-th global step] * fplsp <--> [retain value between successive calls] * xpls(n) <-- new iterate x[k] * fpls <-- function value at new iterate, f(xpls) * mxtake <-- boolean flag indicating step of maximum length used * ipr --> device to which to send output * method --> algorithm to use to solve minimization problem * =1 line search * =2 double dogleg * =3 more-hebdon * udiag(n) --> diagonal of hessian in a(.,.) */ double dltf; double temp1; int i, j, one = 1; double dltfp, dltmp; double rln, slp; *mxtake = FALSE; for (i = 0; i < n; ++i) xpls[i] = x[i] + sc[i]; (*fcn)(n, xpls, fpls, state); dltf = *fpls - f; slp = F77_CALL(ddot)(&n, g, &one, sc, &one FCONE FCONE); /* next statement added for case of compilers which do not optimize evaluation of next "if" statement (in which case fplsp could be undefined). if (*iretcd == 4) { *fplsp = 0.; } */ if (*iretcd == 3 && (*fpls >= *fplsp || dltf > slp * 1e-4)) { /* reset xpls to xplsp and terminate global step */ *iretcd = 0; for (i = 0; i < n; ++i) xpls[i] = xplsp[i]; *fpls = *fplsp; *dlt *= .5; } else { /* fpls too large */ if (dltf > slp * 1e-4) { rln = 0.; for (i = 0; i < n; ++i) { temp1 = fabs(sc[i])/fmax2(fabs(xpls[i]), 1./sx[i]); if(rln < temp1) rln = temp1; } if (rln < steptl) { /* cannot find satisfactory xpls sufficiently distinct from x */ *iretcd = 1; } else { /* reduce trust region and continue global step */ *iretcd = 2; dltmp = -slp * *dlt / ((dltf - slp) * 2.); if (dltmp < *dlt * .1) *dlt *= .1; else *dlt = dltmp; } } else { /* fpls sufficiently small */ dltfp = 0.; if (method == 2) { for (i = 0; i < n; ++i) { temp1 = 0.; for (j = i; j < n; ++j) temp1 += a[j + i * nr] * sc[j]; dltfp += temp1 * temp1; } } else { /* method != 2 */ for (i = 0; i < n; ++i) { dltfp += udiag[i] * sc[i] * sc[i]; temp1 = 0.; for (j = i+1; j < n; ++j) temp1 += a[i + j * nr] * sc[i] * sc[j]; dltfp += temp1 * 2.; } } dltfp = slp + dltfp / 2.; if (*iretcd != 2 && fabs(dltfp - dltf) <= fabs(dltf) * 0.1 && nwtake && *dlt <= stepmx * .99) { /* double trust region and continue global step */ *iretcd = 3; for (i = 0; i < n; ++i) xplsp[i] = xpls[i]; *fplsp = *fpls; temp1 = *dlt * 2.0; *dlt = fmin2(temp1, stepmx); } else { /* accept xpls as next iterate. choose new trust region. */ *iretcd = 0; if (*dlt > stepmx * .99) *mxtake = TRUE; if (dltf >= dltfp * .1) { /* decrease trust region for next iteration */ *dlt *= .5; } else { /* check whether to increase trust region for next iteration */ if (dltf <= dltfp * .75) { temp1 = *dlt * 2.0; *dlt = fmin2(temp1, stepmx); } } } } } } /* tregup */ static void lnsrch(int n, double *x, double f, double *g, double *p, double *xpls, double *fpls, fcn_p fcn, void *state, Rboolean *mxtake, int *iretcd, double stepmx, double steptl, double *sx) { /* Find a next newton iterate by line search. (iff method == 1) * PARAMETERS : * n --> dimension of problem * x(n) --> old iterate: x[k-1] * f --> function value at old iterate, f(x) * g(n) --> gradient at old iterate, g(x), or approximate * p(n) --> non-zero newton step * xpls(n) <-- new iterate x[k] * fpls <-- function value at new iterate, f(xpls) * fcn --> name of subroutine to evaluate function * state <--> information other than x and n that fcn requires. * state is not modified in lnsrch (but can be * modified by fcn). * iretcd <-- return code * mxtake <-- boolean flag indicating step of maximum length used * stepmx --> maximum allowable step size * steptl --> relative step size at which successive iterates * considered close enough to terminate algorithm * sx(n) --> diagonal scaling matrix for x * internal variables * sln newton length * rln relative length of newton step */ int i, one = 1; Rboolean firstback = TRUE; double disc; double a3, b; double t1, t2, t3, lambda, tlmbda, rmnlmb; double scl, rln, sln, slp; double temp1; double pfpls = 0., plmbda = 0.; /* -Wall */ temp1 = 0.; for (i = 0; i < n; ++i) temp1 += sx[i] * sx[i] * p[i] * p[i]; sln = sqrt(temp1); if (sln > stepmx) { /* newton step longer than maximum allowed */ scl = stepmx / sln; F77_CALL(dscal)(&n, &scl, p, &one FCONE FCONE); sln = stepmx; } slp = F77_CALL(ddot)(&n, g, &one, p, &one FCONE FCONE); rln = 0.; for (i = 0; i < n; ++i) { temp1 = fabs(p[i])/ fmax2(fabs(x[i]), 1./sx[i]); if(rln < temp1) rln = temp1; } rmnlmb = steptl / rln; lambda = 1.0; /* check if new iterate satisfactory. generate new lambda if necessary. */ *mxtake = FALSE; *iretcd = 2; do { for (i = 0; i < n; ++i) xpls[i] = x[i] + lambda * p[i]; (*fcn)(n, xpls, fpls, state); if (*fpls <= f + slp * 1e-4 * lambda) { /* solution found */ *iretcd = 0; if (lambda == 1. && sln > stepmx * .99) *mxtake = TRUE; return; } /* else : solution not (yet) found */ /* First find a point with a finite value */ if (lambda < rmnlmb) { /* no satisfactory xpls found sufficiently distinct from x */ *iretcd = 1; return; } else { /* calculate new lambda */ /* modifications by BDR 2000/01/05 to cover non-finite values * ">=" instead of "==" : MM 2001/07/24 */ if (*fpls >= DBL_MAX) { lambda *= 0.1; firstback = TRUE; } else { if (firstback) { /* first backtrack: quadratic fit */ tlmbda = -lambda * slp / ((*fpls - f - slp) * 2.); firstback = FALSE; } else { /* all subsequent backtracks: cubic fit */ t1 = *fpls - f - lambda * slp; t2 = pfpls - f - plmbda * slp; t3 = 1. / (lambda - plmbda); a3 = 3. * t3 * (t1 / (lambda * lambda) - t2 / (plmbda * plmbda)); b = t3 * (t2 * lambda / (plmbda * plmbda) - t1 * plmbda / (lambda * lambda)); disc = b * b - a3 * slp; if (disc > b * b) /* only one positive critical point, must be minimum */ tlmbda = (-b + ((a3 < 0)? -sqrt(disc): sqrt(disc))) /a3; else /* both critical points positive, first is minimum */ tlmbda = (-b + ((a3 < 0)? sqrt(disc): -sqrt(disc))) /a3; if (tlmbda > lambda * .5) tlmbda = lambda * .5; } plmbda = lambda; pfpls = *fpls; if (tlmbda < lambda * .1) lambda *= .1; else lambda = tlmbda; } } } while(*iretcd > 1); } /* lnsrch */ static void dog_1step(int nr, int n, double *g, double *a, double *p, double *sx, double rnwtln, double *dlt, Rboolean *nwtake, Rboolean *fstdog, double *ssd, double *v, double *cln, double *eta, double *sc, double stepmx) { /* Find new step by double dogleg algorithm (iff method == 2); * repeatedly called by dogdrv() only. * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * g(n) --> gradient at current iterate, g(x) * a(n,n) --> cholesky decomposition of hessian in * lower part and diagonal * p(n) --> newton step * sx(n) --> diagonal scaling matrix for x * rnwtln --> newton step length * dlt <--> trust region radius * nwtake <--> boolean, =.true. if newton step taken * fstdog <--> boolean, =.true. if on first leg of dogleg * ssd(n) <--> workspace [cauchy step to the minimum of the * quadratic model in the scaled steepest descent * direction] [retain value between successive calls] * v(n) <--> workspace [retain value between successive calls] * cln <--> cauchy length * [retain value between successive calls] * eta [retain value between successive calls] * sc(n) <-- current step * ipr --> device to which to send output * stepmx --> maximum allowable step size * internal variables * cln length of cauchy step */ int i, j, one = 1; double alam, bet, alpha, tmp, dot1, dot2; /* can we take newton step */ *nwtake = (rnwtln <= *dlt); if (*nwtake) { for (i = 0; i < n; ++i) sc[i] = p[i]; *dlt = rnwtln; return; } /* else *nwtake = FALSE : * newton step too long -- cauchy step is on double dogleg curve */ if (*fstdog) { /* calculate double dogleg curve (ssd) */ *fstdog = FALSE; alpha = 0.; for (i = 0; i < n; ++i) alpha += g[i] * g[i] / (sx[i] * sx[i]); bet = 0.; for (i = 0; i < n; ++i) { tmp = 0.; for (j = i; j < n; ++j) tmp += a[j + i * nr] * g[j] / (sx[j] * sx[j]); bet += tmp * tmp; } for (i = 0; i < n; ++i) ssd[i] = -(alpha / bet) * g[i] / sx[i]; *cln = alpha * sqrt(alpha) / bet; *eta = (.8 * alpha * alpha / (-bet * F77_CALL(ddot)(&n, g, &one, p, &one FCONE FCONE))) + .2; for (i = 0; i < n; ++i) v[i] = *eta * sx[i] * p[i] - ssd[i]; if (*dlt == -1.) *dlt = fmin2(*cln, stepmx); } if (*eta * rnwtln <= *dlt) { /* take partial step in newton direction */ for (i = 0; i < n; ++i) sc[i] = *dlt / rnwtln * p[i]; } else if (*cln >= *dlt) { /* take step in steepest descent direction */ for (i = 0; i < n; ++i) sc[i] = *dlt / *cln * ssd[i] / sx[i]; } else { /* calculate convex combination of ssd and eta*p which has scaled length dlt */ dot1 = F77_CALL(ddot)(&n, v, &one, ssd, &one FCONE FCONE); dot2 = F77_CALL(ddot)(&n, v, &one, v, &one FCONE FCONE); alam = (-dot1 + sqrt(dot1 * dot1 - dot2 * (*cln * *cln - *dlt * *dlt))) / dot2; for (i = 0; i < n; ++i) sc[i] = (ssd[i] + alam * v[i]) / sx[i]; } } /* dog_1step */ static void dogdrv(int nr, int n, double *x, double f, double *g, double *a, double *p, double *xpls, double *fpls, fcn_p fcn, void *state, double *sx, double stepmx, double steptl, double *dlt, int *iretcd, Rboolean *mxtake, double *sc, double *wrk1, double *wrk2, double *wrk3, int *itncnt) { /* Find a next newton iterate (xpls) by the double dogleg method * (iff method == 2 ). * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> old iterate x[k-1] * f --> function value at old iterate, f(x) * g(n) --> gradient at old iterate, g(x), or approximate * a(n,n) --> cholesky decomposition of hessian * in lower triangular part and diagonal * p(n) --> newton step * xpls(n) <-- new iterate x[k] * fpls <-- function value at new iterate, f(xpls) * fcn --> name of subroutine to evaluate function * state <--> information other than x and n that fcn requires. * state is not modified in dogdrv (but can be * modified by fcn). * sx(n) --> diagonal scaling matrix for x * stepmx --> maximum allowable step size * steptl --> relative step size at which successive iterates * considered close enough to terminate algorithm * dlt <--> trust region radius * [retain value between successive calls] * iretcd <-- return code * =0 satisfactory xpls found * =1 failed to find satisfactory xpls sufficiently * distinct from x * mxtake <-- boolean flag indicating step of maximum length used * sc(n) --> workspace [current step] * wrk1(n) --> workspace (and place holding argument to tregup) * wrk2(n) --> workspace * wrk3(n) --> workspace * ipr --> device to which to send output */ Rboolean fstdog, nwtake; int i; double fplsp, rnwtln, eta = 0.0, cln = 0.0, tmp; /* -Wall */ tmp = 0.; for (i = 0; i < n; ++i) tmp += sx[i] * sx[i] * p[i] * p[i]; rnwtln = sqrt(tmp); *iretcd = 4; fstdog = TRUE; do { /* find new step by double dogleg algorithm */ dog_1step(nr, n, g, a, p, sx, rnwtln, dlt, &nwtake, &fstdog, wrk1, wrk2, &cln, &eta, sc, stepmx); /* check new point and update trust region */ tregup(nr, n, x, f, g, a, (fcn_p)fcn, state, sc, sx, nwtake, stepmx, steptl, dlt, iretcd, wrk3, &fplsp, xpls, fpls, mxtake, 2, wrk1); } while(*iretcd > 1); } /* dogdrv */ static void hook_1step(int nr, int n, double *g, double *a, double *udiag, double *p, double *sx, double rnwtln, double *dlt, double *amu, double dltp, double *phi, double *phip0, Rboolean *fstime, double *sc, Rboolean *nwtake, double *wrk0, double epsm) { /* Find new step by more-hebdon algorithm (iff method == 3); * repeatedly called by hookdrv() only. * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * g(n) --> gradient at current iterate, g(x) * a(n,n) --> cholesky decomposition of hessian in * lower triangular part and diagonal. * hessian or approx in upper triangular part * udiag(n) --> diagonal of hessian in a(.,.) * p(n) --> newton step * sx(n) --> diagonal scaling matrix for n * rnwtln --> newton step length * dlt <--> trust region radius * amu <--> [retain value between successive calls] * dltp --> trust region radius at last exit from this routine * phi <--> [retain value between successive calls] * phip0 <--> [retain value between successive calls] * fstime <--> boolean. =.true. if first entry to this routine * during k-th iteration * sc(n) <-- current step * nwtake <-- boolean, =.true. if newton step taken * wrk0(n) --> workspace * epsm --> machine epsilon */ int one = 1, job = 0, info; int i, j; double phip; double amulo, amuup; double addmax, stepln; double temp1; const double hi = 1.5, alo = 0.75; /* hi and alo are constants used in this routine. */ /* change here if other values are to be substituted. */ /* shall we take newton step ? */ *nwtake = (rnwtln <= hi * *dlt); if (*nwtake) { /* take newton step */ for (i = 0; i < n; ++i) sc[i] = p[i]; *dlt = fmin2(*dlt, rnwtln); *amu = 0.; return; } /* else *nwtake = FALSE : newton step not taken */ if (*amu > 0.) *amu -= (*phi + dltp) * (dltp - *dlt + *phi) / (*dlt * *phip0); *phi = rnwtln - *dlt; if (*fstime) { for (i = 0; i < n; ++i) wrk0[i] = sx[i] * sx[i] * p[i]; /* solve l*y = (sx**2)*p */ F77_CALL(dtrsl)(a, &nr, &n, wrk0, &job, &info FCONE FCONE); /* Computing 2nd power */ temp1 = F77_CALL(dnrm2)(&n, wrk0, &one FCONE FCONE); *phip0 = -(temp1 * temp1) / rnwtln; *fstime = FALSE; } phip = *phip0; amulo = -(*phi) / phip; amuup = 0.; for (i = 0; i < n; ++i) amuup += g[i] * g[i] / (sx[i] * sx[i]); amuup = sqrt(amuup) / *dlt; while (1) { /* test value of amu; generate next amu if necessary */ if (*amu < amulo || *amu > amuup) { *amu = fmax2(sqrt(amulo * amuup), amuup * .001); } /* copy (h,udiag) to l */ /* where h <-- h+amu*(sx**2) [do not actually change (h,udiag)] */ for (i = 0; i < n; ++i) { a[i + i * nr] = udiag[i] + *amu * sx[i] * sx[i]; for (j = 0; j < i; ++i) a[i + j * nr] = a[j + i * nr]; } /* factor h=l(l+) */ temp1 = sqrt(epsm); choldc(nr, n, a, 0.0, temp1, &addmax); /* solve h*p = l(l+)*sc = -g */ for (i = 0; i < n; ++i) wrk0[i] = -g[i]; lltslv(nr, n, a, sc, wrk0); /* reset h. note since udiag has not been destroyed we need do */ /* nothing here. h is in the upper part and in udiag, still intact */ stepln = 0.; for (i = 0; i < n; ++i) stepln += sx[i] * sx[i] * sc[i] * sc[i]; stepln = sqrt(stepln); *phi = stepln - *dlt; for (i = 0; i < n; ++i) wrk0[i] = sx[i] * sx[i] * sc[i]; F77_CALL(dtrsl)(a, &nr, &n, wrk0, &job, &info FCONE FCONE); temp1 = F77_CALL(dnrm2)(&n, wrk0, &one FCONE FCONE); phip = -(temp1 * temp1) / stepln; if ((alo * *dlt <= stepln && stepln <= hi * *dlt) || (amuup - amulo > 0.)) { /* sc is acceptable hookstep */ break; } else { /* sc not acceptable hookstep. select new amu */ temp1 = (*amu - *phi)/phip; amulo = fmax2(amulo, temp1); if (*phi < 0.) amuup = fmin2(amuup,*amu); *amu -= stepln * *phi / (*dlt * phip); } } } /* hook_1step */ static void hookdrv(int nr, int n, double *x, double f, double *g, double *a, double *udiag, double *p, double *xpls, double *fpls, fcn_p fcn, void *state, double *sx, double stepmx, double steptl, double *dlt, int *iretcd, Rboolean *mxtake, double *amu, double *dltp, double *phi, double *phip0, double *sc, double *xplsp, double *wrk0, double epsm, int itncnt) { /* Find a next newton iterate (xpls) by the more-hebdon method. * (iff method == 3) * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> old iterate x[k-1] * f --> function value at old iterate, f(x) * g(n) --> gradient at old iterate, g(x), or approximate * a(n,n) --> cholesky decomposition of hessian in lower * triangular part and diagonal. * hessian in upper triangular part and udiag. * udiag(n) --> diagonal of hessian in a(.,.) * p(n) --> newton step * xpls(n) <-- new iterate x[k] * fpls <-- function value at new iterate, f(xpls) * fcn --> name of subroutine to evaluate function * state <--> information other than x and n that fcn requires. * state is not modified in hookdrv (but can be * modified by fcn). * sx(n) --> diagonal scaling matrix for x * stepmx --> maximum allowable step size * steptl --> relative step size at which successive iterates * considered close enough to terminate algorithm * dlt <--> trust region radius * iretcd <-- return code * =0 satisfactory xpls found * =1 failed to find satisfactory xpls sufficiently * distinct from x * mxtake <-- boolean flag indicating step of maximum length used * amu <--> [retain value between successive calls] * dltp <--> [retain value between successive calls] * phi <--> [retain value between successive calls] * phip0 <--> [retain value between successive calls] * sc(n) --> workspace * xplsp(n) --> workspace * wrk0(n) --> workspace * epsm --> machine epsilon * itncnt --> iteration count * ipr --> device to which to send output */ Rboolean fstime, nwtake; int i, j; double bet, alpha, fplsp, rnwtln, tmp; tmp = 0.; for (i = 0; i < n; ++i) tmp += sx[i] * sx[i] * p[i] * p[i]; rnwtln = sqrt(tmp); if (itncnt == 1) { *amu = 0.; /* if first iteration and trust region not provided by user, compute initial trust region. */ if (*dlt == -1.) { alpha = 0.; for (i = 0; i < n; ++i) alpha += g[i] * g[i] / (sx[i] * sx[i]); bet = 0.; for (i = 0; i < n; ++i) { tmp = 0.; for (j = i; j < n; ++j) tmp += a[j + i * nr] * g[j] / (sx[j] * sx[j]); bet += tmp * tmp; } *dlt = alpha * sqrt(alpha) / bet; if(*dlt > stepmx) *dlt = stepmx; } } *iretcd = 4; fstime = TRUE; do { /* find new step by more-hebdon algorithm */ hook_1step(nr, n, g, a, udiag, p, sx, rnwtln, dlt, amu, *dltp, phi, phip0, &fstime, sc, &nwtake, wrk0, epsm); *dltp = *dlt; /* check new point and update trust region */ tregup(nr, n, x, f, g, a, (fcn_p)fcn, state, sc, sx, nwtake, stepmx, steptl, dlt, iretcd, xplsp, &fplsp, xpls, fpls, mxtake, 3, udiag); } while(*iretcd > 1); } /* hookdrv */ static void secunf(int nr, int n, double *x, double *g, double *a, double *udiag, double *xpls, double *gpls, double epsm, int itncnt, double rnf, int iagflg, Rboolean *noupdt, double *s, double *y, double *t) { /* Update hessian by the bfgs unfactored method (only when method == 3) * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> old iterate, x[k-1] * g(n) --> gradient or approximate at old iterate * a(n,n) <--> on entry: approximate hessian at old iterate * in upper triangular part (and udiag) * on exit: updated approx hessian at new iterate * in lower triangular part and diagonal * [lower triangular part of symmetric matrix] * udiag --> on entry: diagonal of hessian * xpls(n) --> new iterate, x[k] * gpls(n) --> gradient or approximate at new iterate * epsm --> machine epsilon * itncnt --> iteration count * rnf --> relative noise in optimization function fcn * iagflg --> =1 if analytic gradient supplied, =0 otherwise * noupdt <--> boolean: no update yet * [retain value between successive calls] * s(n) --> workspace * y(n) --> workspace * t(n) --> workspace */ double ynrm2, snorm2; int i, j, one = 1; Rboolean skpupd; double gam, tol, den1, den2; /* copy hessian in upper triangular part and udiag to lower triangular part and diagonal */ for (i = 0; i < n; ++i) { a[i + i * nr] = udiag[i]; for (j = 0; j < i; ++j) a[i + j * nr] = a[j + i * nr]; } *noupdt = (itncnt == 1); for (i = 0; i < n; ++i) { s[i] = xpls[i] - x[i]; y[i] = gpls[i] - g[i]; } den1 = F77_CALL(ddot)(&n, s, &one, y, &one FCONE FCONE); snorm2 = F77_CALL(dnrm2)(&n, s, &one FCONE FCONE); ynrm2 = F77_CALL(dnrm2)(&n, y, &one FCONE FCONE); if (den1 < sqrt(epsm) * snorm2 * ynrm2) return; mvmlts(nr, n, a, s, t); den2 = F77_CALL(ddot)(&n, s, &one, t, &one FCONE FCONE); if (*noupdt) { /* h <-- [(s+)y/(s+)hs]h */ gam = den1 / den2; den2 *= gam; for (j = 0; j < n; ++j) { t[j] *= gam; for (i = j; i < n; ++i) a[i + j * nr] *= gam; } *noupdt = FALSE; } skpupd = TRUE; /* check update condition on row i */ for (i = 0; i < n; ++i) { tol = rnf * fmax2(fabs(g[i]), fabs(gpls[i])); if (iagflg == 0) tol /= sqrt(rnf); if (fabs(y[i] - t[i]) >= tol) { skpupd = FALSE; break; } } if (skpupd) return; /* bfgs update */ for (j = 0; j < n; ++j) { for (i = j; i < n; ++i) a[i + j * nr] += y[i] * y[j] / den1 - t[i] * t[j] / den2; } } /* secunf */ static void secfac(int nr, int n, double *x, double *g, double *a, double *xpls, double *gpls, double epsm, int itncnt, double rnf, int iagflg, Rboolean *noupdt, double *s, double *y, double *u, double *w) { /* Update hessian by the bfgs factored method (only when method == 1 or 2) * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> old iterate, x[k-1] * g(n) --> gradient or approximate at old iterate * a(n,n) <--> on entry: cholesky decomposition of hessian in * lower part and diagonal. * on exit: updated cholesky decomposition of hessian * in lower triangular part and diagonal * xpls(n) --> new iterate, x[k] * gpls(n) --> gradient or approximate at new iterate * epsm --> machine epsilon * itncnt --> iteration count * rnf --> relative noise in optimization function fcn * iagflg --> =1 if analytic gradient supplied, =0 itherwise * noupdt <--> boolean: no update yet * [retain value between successive calls] * s(n) --> workspace * y(n) --> workspace * u(n) --> workspace * w(n) --> workspace */ double ynrm2; int i, j, one = 1; Rboolean skpupd; double snorm2, reltol; double alp, den1, den2; *noupdt = (itncnt == 1); for (i = 0; i < n; ++i) { s[i] = xpls[i] - x[i]; y[i] = gpls[i] - g[i]; } den1 = F77_CALL(ddot)(&n, s, &one, y, &one FCONE FCONE); snorm2 = F77_CALL(dnrm2)(&n, s, &one FCONE FCONE); ynrm2 = F77_CALL(dnrm2)(&n, y, &one FCONE FCONE); if (den1 < sqrt(epsm) * snorm2 * ynrm2) return; mvmltu(nr, n, a, s, u); den2 = F77_CALL(ddot)(&n, u, &one, u, &one FCONE FCONE); /* l <-- sqrt(den1/den2)*l */ alp = sqrt(den1 / den2); if (*noupdt) { for (j = 0; j < n; ++j) { u[j] = alp * u[j]; for (i = j; i < n; ++i) { a[i + j * nr] *= alp; } } *noupdt = FALSE; den2 = den1; alp = 1.; } /* w = l(l+)s = hs */ mvmltl(nr, n, a, u, w); if (iagflg == 0) reltol = sqrt(rnf); else reltol = rnf; skpupd = TRUE; for (i = 0; i < n; ++i) { skpupd = (fabs(y[i] - w[i]) < reltol * fmax2(fabs(g[i]), fabs(gpls[i]))); if(!skpupd) break; } if(skpupd) return; /* w = y-alp*l(l+)s */ for (i = 0; i < n; ++i) w[i] = y[i] - alp * w[i]; /* alp=1/sqrt(den1*den2) */ alp /= den1; /* u=(l+)/sqrt(den1*den2) = (l+)s/sqrt((y+)s * (s+)l(l+)s) */ for (i = 0; i < n; ++i) u[i] *= alp; /* copy l into upper triangular part. zero l. */ for (i = 1; i < n; ++i) { for (j = 0; j < i; ++j) { a[j + i * nr] = a[i + j * nr]; a[i + j * nr] = 0.; } } /* find q, (l+) such that q(l+) = (l+) + u(w+) */ qrupdt(nr, n, a, u, w); /* upper triangular part and diagonal of a[] now contain updated * cholesky decomposition of hessian. * copy back to lower triangular part. */ for (i = 1; i < n; ++i) for (j = 0; j < i; ++j) a[i + j * nr] = a[j + i * nr]; } /* secfac */ static void chlhsn(int nr, int n, double *a, double epsm, double *sx, double *udiag) { /* find the l(l-transpose) [written ll+] decomposition of the perturbed * model hessian matrix a+mu*i(where mu\0 and i is the identity matrix) * which is safely positive definite. if a is safely positive definite * upon entry, then mu=0. * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * a(n,n) <--> on entry; "a" is model hessian (only lower * triangular part and diagonal stored) * on exit: a contains l of ll+ decomposition of * perturbed model hessian in lower triangular * part and diagonal and contains hessian in upper * triangular part and udiag * epsm --> machine epsilon * sx(n) --> diagonal scaling matrix for x * udiag(n) <-- on exit: contains diagonal of hessian * INTERNAL VARIABLES * tol tolerance * diagmn minimum element on diagonal of a * diagmx maximum element on diagonal of a * offmax maximum off-diagonal element of a * offrow sum of off-diagonal elements in a row of a * evmin minimum eigenvalue of a * evmax maximum eigenvalue of a * DESCRIPTION * 1. if "a" has any negative diagonal elements, then choose mu>0 * such that the diagonal of a:=a+mu*i is all positive * with the ratio of its smallest to largest element on the * order of sqrt(epsm). * 2. "a" undergoes a perturbed cholesky decomposition which * results in an ll+ decomposition of a+d, where d is a * non-negative diagonal matrix which is implicitly added to * "a" during the decomposition if "a" is not positive definite. * "a" is retained and not changed during this process by * copying l into the upper triangular part of "a" and the * diagonal into udiag. then the cholesky decomposition routine * is called. on return, addmax contains maximum element of d. * 3. if addmax=0, "a" was positive definite going into step 2 * and return is made to calling program. otherwise, * the minimum number sdd which must be added to the * diagonal of a to make it safely strictly diagonally dominant * is calculated. since a+addmax*i and a+sdd*i are safely * positive definite, choose mu=fmin2(addmax,sdd) and decompose * a+mu*i to obtain l. */ int i, j; double evmin, evmax; double addmax, diagmn, diagmx, offmax, offrow, posmax; double sdd, amu, tol, tmp; /* scale hessian */ /* pre- and post- multiply "a" by inv(sx) */ for (j = 0; j < n; ++j) for (i = j; i < n; ++i) a[i + j * nr] /= sx[i] * sx[j]; /* step1 * ----- * note: if a different tolerance is desired throughout this * algorithm, change tolerance here: */ tol = sqrt(epsm); diagmx = a[0]; diagmn = a[0]; if (n > 1) { for (i = 1; i < n; ++i) { tmp = a[i + i * nr]; if(diagmn > tmp) diagmn = tmp; if(diagmx < tmp) diagmx = tmp; } } posmax = fmax2(diagmx, 0.0); if (diagmn <= posmax * tol) { amu = tol * (posmax - diagmn) - diagmn; if (amu == 0.) { /* find largest off-diagonal element of a */ offmax = 0.; for (i = 1; i < n; ++i) { for (j = 0; j < i; ++j) if (offmax < (tmp = fabs(a[i + j * nr]))) offmax = tmp; } if (offmax == 0.) amu = 1.; else amu = offmax * (tol + 1.); } /* a=a + mu*i */ for (i = 0; i < n; ++i) a[i + i * nr] += amu; diagmx += amu; } /* copy lower triangular part of "a" to upper triangular part */ /* and diagonal of "a" to udiag */ for (i = 0; i < n; ++i) { udiag[i] = a[i + i * nr]; for (j = 0; j < i; ++j) a[j + i * nr] = a[i + j * nr]; } choldc(nr, n, a, diagmx, tol, &addmax); /* step3 * if addmax=0, "a" was positive definite going into step 2, * the ll+ decomposition has been done, and we return. * otherwise, addmax>0. perturb "a" so that it is safely * diagonally dominant and find ll+ decomposition */ if (addmax > 0.0) { /* restore original "a" (lower triangular part and diagonal) */ for (i = 0; i < n; ++i) { a[i + i * nr] = udiag[i]; for (j = 0; j < i; ++j) a[i + j * nr] = a[j + i * nr]; } /* find sdd such that a+sdd*i is safely positive definite */ /* note: evmin<0 since a is not positive definite; */ evmin = 0.; evmax = a[0]; for (i = 0; i < n; ++i) { offrow = 0.; for (j = 0; j < i; ++j) offrow += fabs(a[i + j * nr]); for (j = i+1; j < n; ++j) offrow += fabs(a[j + i * nr]); tmp = a[i + i * nr] - offrow; if(evmin > tmp) evmin = tmp; tmp = a[i + i * nr] + offrow; if(evmax < tmp) evmax = tmp; } sdd = tol * (evmax - evmin) - evmin; /* perturb "a" and decompose again */ amu = fmin2(sdd, addmax); for (i = 0; i < n; ++i) { a[i + i * nr] += amu; udiag[i] = a[i + i * nr]; } /* "a" now guaranteed safely positive definite */ choldc(nr, n, a, 0.0, tol, &addmax); } /* unscale hessian and cholesky decomposition matrix */ for (j = 0; j < n; ++j) { for (i = j; i < n; ++i) a[i + j * nr] *= sx[i]; for (i = 0; i < j; ++i) a[i + j * nr] *= sx[i] * sx[j]; udiag[j] *= sx[j] * sx[j]; } } /* chlhsn */ static void hsnint(int nr, int n, double *a, double *sx, int method) { /* Provide initial hessian when using secant updates . * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * a(n,n) <-- initial hessian (lower triangular matrix) * sx(n) --> diagonal scaling matrix for x * method --> algorithm to use to solve minimization problem * =1,2 factored secant method used * =3 unfactored secant method used */ int i, j; for (i = 0; i < n; ++i) { if (method == 3) a[i + i * nr] = sx[i] * sx[i]; else a[i + i * nr] = sx[i]; for (j = 0; j < i; ++j) a[i + j * nr] = 0.; } } /* hsnint */ static void fstofd(int nr, int m, int n, double *xpls, fcn_p fcn, void *state, const double *fpls, double *a, double *sx, double rnoise, double *fhat, int icase) { /* find first order forward finite difference approximation "a" to the * first derivative of the function defined by the subprogram "fname" * evaluated at the new iterate "xpls". * for optimization use this routine to estimate: * 1) the first derivative (gradient) of the optimization function "fcn * analytic user routine has been supplied; * 2) the second derivative (hessian) of the optimization function * if no analytic user routine has been supplied for the hessian but * one has been supplied for the gradient ("fcn") and if the * optimization function is inexpensive to evaluate * note * _m=1 (optimization) algorithm estimates the gradient of the function * (fcn). fcn(x) # f: r(n)-->r(1) * _m=n (systems) algorithm estimates the jacobian of the function * fcn(x) # f: r(n)-->r(n). * _m=n (optimization) algorithm estimates the hessian of the optimizatio * function, where the hessian is the first derivative of "fcn" * PARAMETERS : * nr --> row dimension of matrix * m --> number of rows in a * n --> number of columns in a; dimension of problem * xpls(n) --> new iterate: x[k] * fcn --> name of subroutine to evaluate function * state <--> information other than x and n that fcn requires. * state is not modified in fstofd (but can be * modified by fcn). * fpls(m) --> _m=1 (optimization) function value at new iterate: * fcn(xpls) * _m=n (optimization) value of first derivative * (gradient) given by user function fcn * _m=n (systems) function value of associated * minimization function * a(nr,n) <-- finite difference approximation (see note). only * lower triangular matrix and diagonal are returned * sx(n) --> diagonal scaling matrix for x * rnoise --> relative noise in fcn [f(x)] * fhat(m) --> workspace * icase --> =1 optimization (gradient) * =2 systems * =3 optimization (hessian) * internal variables * stepsz - stepsize in the j-th variable direction */ int i, j; double xtmpj, stepsz, temp1, temp2; /* find j-th column of a each column is derivative of f(fcn) with respect to xpls(j) */ for (j = 0; j < n; ++j) { temp1 = fabs(xpls[j]); temp2 = 1.0/sx[j]; stepsz = sqrt(rnoise) * fmax2(temp1, temp2); xtmpj = xpls[j]; xpls[j] = xtmpj + stepsz; (*fcn)(n, xpls, fhat, state); xpls[j] = xtmpj; for (i = 0; i < m; ++i) a[i + j * nr] = (fhat[i] - fpls[i]) / stepsz; } if (icase == 3 && n > 1) { /* if computing hessian, a must be symmetric */ for (i = 1; i < m; ++i) for (j = 0; j < i; ++j) a[i + j * nr] = (a[i + j * nr] + a[j + i * nr]) / 2.0; } } /* fstofd */ static void fstocd(int n, double *x, fcn_p fcn, void *state, double *sx, double rnoise, double *g) { /* Find central difference approximation g to the first derivative * (gradient) of the function defined by fcn at the point x. * PARAMETERS : * n --> dimension of problem * x --> point at which gradient is to be approximated. * fcn --> name of subroutine to evaluate function. * state <--> information other than x and n that fcn requires. * state is not modified in fstocd (but can be * modified by fcn). * sx --> diagonal scaling matrix for x. * rnoise --> relative noise in fcn [f(x)]. * g <-- central difference approximation to gradient. */ int i; double stepi, fplus, fminus, xtempi, temp1, temp2; /* find i th stepsize, evaluate two neighbors in direction of i th */ /* unit vector, and evaluate i th component of gradient. */ for (i = 0; i < n; ++i) { xtempi = x[i]; temp1 = fabs(xtempi); temp2 = 1.0/sx[i]; stepi = pow(rnoise, 1.0/3.0) * fmax2(temp1, temp2); x[i] = xtempi + stepi; (*fcn)(n, x, &fplus, state); x[i] = xtempi - stepi; (*fcn)(n, x, &fminus, state); x[i] = xtempi; g[i] = (fplus - fminus) / (stepi * 2.); } } /* fstocd */ static void sndofd(int nr, int n, double *xpls, fcn_p fcn, void *state, double fpls, double *a, double *sx, double rnoise, double *stepsz, double *anbr) { /* Find second order forward finite difference approximation "a" * to the second derivative (hessian) of the function defined by the subp * "fcn" evaluated at the new iterate "xpls" . * For optimization use this routine to estimate * 1) the second derivative (hessian) of the optimization function * if no analytical user function has been supplied for either * the gradient or the hessian and if the optimization function * "fcn" is inexpensive to evaluate. * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * xpls(n) --> new iterate: x[k] * fcn --> name of subroutine to evaluate function * state <--> information other than x and n that fcn requires. * state is not modified in sndofd (but can be * modified by fcn). * fpls --> function value at new iterate, f(xpls) * a(n,n) <-- finite difference approximation to hessian * only lower triangular matrix and diagonal * are returned * sx(n) --> diagonal scaling matrix for x * rnoise --> relative noise in fname [f(x)] * stepsz(n) --> workspace (stepsize in i-th component direction) * anbr(n) --> workspace (neighbor in i-th direction) */ double fhat; int i, j; double xtmpi, xtmpj; /* find i-th stepsize and evaluate neighbor in direction of i-th unit vector. */ for (i = 0; i < n; ++i) { xtmpi = xpls[i]; stepsz[i] = pow(rnoise, 1.0/3.0) * fmax2(fabs(xtmpi), 1./sx[i]); xpls[i] = xtmpi + stepsz[i]; (*fcn)(n, xpls, &anbr[i], state); xpls[i] = xtmpi; } /* calculate row i of a */ for (i = 0; i < n; ++i) { xtmpi = xpls[i]; xpls[i] = xtmpi + stepsz[i] * 2.; (*fcn)(n, xpls, &fhat, state); a[i + i * nr] = ((fpls - anbr[i]) + (fhat - anbr[i])) / (stepsz[i] * stepsz[i]); /* calculate sub-diagonal elements of column */ if(i == 0) { xpls[i] = xtmpi; continue; } xpls[i] = xtmpi + stepsz[i]; for (j = 0; j < i; ++j) { xtmpj = xpls[j]; xpls[j] = xtmpj + stepsz[j]; (*fcn)(n, xpls, &fhat, state); a[i + j*nr] = ((fpls - anbr[i]) + (fhat - anbr[j])) / (stepsz[i]*stepsz[j]); xpls[j] = xtmpj; } xpls[i] = xtmpi; } } /* sndofd */ static void grdchk(int n, double *x, fcn_p fcn, void *state, double f, double *g, double *typsiz, double *sx, double fscale, double rnf, double analtl, double *wrk1, int *msg) { /* Check analytic gradient against estimated gradient * PARAMETERS : * n --> dimension of problem * x(n) --> estimate to a root of fcn * fcn --> name of subroutine to evaluate optimization function * must be declared external in calling routine * fcn: r(n) --> r(1) * state <--> information other than x and n that fcn requires. * state is not modified in grdchk (but can be * modified by fcn). * f --> function value: fcn(x) * g(n) --> gradient: g(x) * typsiz(n) --> typical size for each component of x * sx(n) --> diagonal scaling matrix: sx(i)=1./typsiz(i) * fscale --> estimate of scale of objective function fcn * rnf --> relative noise in optimization function fcn * analtl --> tolerance for comparison of estimated and * analytical gradients * wrk1(n) --> workspace * msg <-- message or error code * on output: =-21, probable coding error of gradient */ int i; double gs, wrk; /* compute first order finite difference gradient and compare to analytic gradient. */ fstofd(1, 1, n, x, (fcn_p)fcn, state, &f, wrk1, sx, rnf, &wrk, 1); for (i = 0; i < n; ++i) { gs = fmax2(fabs(f), fscale) / fmax2(fabs(x[i]), typsiz[i]); if (fabs(g[i] - wrk1[i]) > fmax2(fabs(g[i]), gs) * analtl) { *msg = -21; return; } } } /* grdchk */ static void heschk(int nr, int n, double *x, fcn_p fcn, fcn_p d1fcn, d2fcn_p d2fcn, void *state, double f, double *g, double *a, double *typsiz, double *sx, double rnf, double analtl, int iagflg, double *udiag, double *wrk1, double *wrk2, int *msg) { /* Check analytic hessian against estimated hessian * (this may be done only if the user supplied analytic hessian * d2fcn fills only the lower triangular part and diagonal of a) * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> estimate to a root of fcn * fcn --> name of subroutine to evaluate optimization function * must be declared external in calling routine * fcn: r(n) --> r(1) * d1fcn --> name of subroutine to evaluate gradient of fcn. * must be declared external in calling routine * d2fcn --> name of subroutine to evaluate hessian of fcn. * must be declared external in calling routine * state <--> information other than x and n that fcn, * d1fcn and d2fcn requires. * state is not modified in heschk (but can be * modified by fcn, d1fcn or d2fcn). * f --> function value: fcn(x) * g(n) <-- gradient: g(x) * a(n,n) <-- on exit: hessian in lower triangular part and diag * typsiz(n) --> typical size for each component of x * sx(n) --> diagonal scaling matrix: sx(i)=1./typsiz(i) * rnf --> relative noise in optimization function fcn * analtl --> tolerance for comparison of estimated and * analytical gradients * iagflg --> =1 if analytic gradient supplied * udiag(n) --> workspace * wrk1(n) --> workspace * wrk2(n) --> workspace * msg <--> message or error code * on input : if = 1xx do not compare anal + est hess * on output: = -22, probable coding error of hessian */ int i, j; double hs, temp1, temp2; /* compute finite difference approximation a to the hessian. */ if (iagflg) fstofd(nr, n, n, x, (fcn_p)d1fcn, state, g, a, sx, rnf, wrk1, 3); else sndofd(nr, n, x, (fcn_p)fcn, state, f, a, sx, rnf, wrk1, wrk2); /* copy lower triangular part of "a" to upper triangular part and diagonal of "a" to udiag */ for (j = 0; j < n; ++j) { udiag[j] = a[j + j * nr]; for (i = j+1; i < n; ++i) a[j + i * nr] = a[i + j * nr]; } /* compute analytic hessian and compare to finite difference approximation. */ (*d2fcn)(nr, n, x, a, state); for (j = 0; j < n; ++j) { hs = fmax2(fabs(g[j]), 1.0) / fmax2(fabs(x[j]), typsiz[j]); if (fabs(a[j + j * nr] - udiag[j]) > fmax2(fabs(udiag[j]), hs) * analtl) { *msg = -22; return; } for (i = j+1; i < n; ++i) { temp1 = a[i + j * nr]; temp2 = fabs(temp1 - a[j + i * nr]); temp1 = fabs(temp1); if (temp2 > fmax2(temp1, hs) * analtl) { *msg = -22; return; } } } } /* heschk */ static int opt_stop(int n, double *xpls, double fpls, double *gpls, double *x, int itncnt, int *icscmx, double gradtl, double steptl, double *sx, double fscale, int itnlim, int iretcd, Rboolean mxtake, int *msg) { /* Unconstrained minimization stopping criteria : * Find whether the algorithm should terminate, due to any * of the following: * 1) problem solved within user tolerance * 2) convergence within user tolerance * 3) iteration limit reached * 4) divergence or too restrictive maximum step (stepmx) suspected * ARGUMENTS : * n --> dimension of problem * xpls(n) --> new iterate x[k] * fpls --> function value at new iterate f(xpls) * gpls(n) --> gradient at new iterate, g(xpls), or approximate * x(n) --> old iterate x[k-1] * itncnt --> current iteration k * icscmx <--> number consecutive steps >= stepmx * [retain value between successive calls] * gradtl --> tolerance at which relative gradient considered close * enough to zero to terminate algorithm * steptl --> relative step size at which successive iterates * considered close enough to terminate algorithm * sx(n) --> diagonal scaling matrix for x * fscale --> estimate of scale of objective function * itnlim --> maximum number of allowable iterations * iretcd --> return code * mxtake --> boolean flag indicating step of maximum length used * msg --> if msg includes a term 8, suppress output * * VALUE : * `itrmcd' : termination code */ int i, jtrmcd; double d, relgrd, relstp, rgx, rsx; /* last global step failed to locate a point lower than x */ if (iretcd == 1) return 3; /* else : */ /* find direction in which relative gradient maximum. */ /* check whether within tolerance */ d = fmax2(fabs(fpls), fscale); rgx = 0.; for (i = 0; i < n; ++i) { relgrd = fabs(gpls[i]) * fmax2(fabs(xpls[i]), 1./sx[i]) / d; if(rgx < relgrd) rgx = relgrd; } jtrmcd = 1; if (rgx > gradtl) { if (itncnt == 0) return 0; /* find direction in which relative stepsize maximum */ /* check whether within tolerance. */ rsx = 0.; for (i = 0; i < n; ++i) { relstp = fabs(xpls[i] - x[i]) / fmax2(fabs(xpls[i]), 1./sx[i]); if(rsx < relstp) rsx = relstp; } jtrmcd = 2; if (rsx > steptl) { /* check iteration limit */ jtrmcd = 4; if (itncnt < itnlim) { /* check number of consecutive steps \ stepmx */ if (!mxtake) { *icscmx = 0; return 0; } else { ++(*icscmx); if (*icscmx < 5) return 0; jtrmcd = 5; } } } } return jtrmcd; } /* opt_stop */ static void optchk(int n, double *x, double *typsiz, double *sx, double *fscale, double gradtl, int *itnlim, int *ndigit, double epsm, double *dlt, int *method, int *iexp, int *iagflg, int *iahflg, double *stepmx, int *msg) { /* Check input for reasonableness. * Return *msg in {-1,-2,..,-7} if something is wrong * PARAMETERS : * n --> dimension of problem * x(n) --> on entry, estimate to root of fcn * typsiz(n) <--> typical size of each component of x * sx(n) <-- diagonal scaling matrix for x * fscale <--> estimate of scale of objective function fcn * gradtl --> tolerance at which gradient considered close * enough to zero to terminate algorithm * itnlim <--> maximum number of allowable iterations * ndigit <--> number of good digits in optimization function fcn * epsm --> machine epsilon * dlt <--> trust region radius * method <--> algorithm indicator * iexp <--> expense flag * iagflg <--> =1 if analytic gradient supplied * iahflg <--> =1 if analytic hessian supplied * stepmx <--> maximum step size * msg <--> message and error code * ipr --> device to which to send output */ int i; double stpsiz; /* check that parameters only take on acceptable values. if not, set them to default values. */ if (*method < 1 || *method > 3) *method = 1; if (*iagflg != 1) *iagflg = 0; if (*iahflg != 1) *iahflg = 0; if (*iexp != 0) *iexp = 1; if (*msg / 2 % 2 == 1 && *iagflg == 0) { *msg = -6; return;/* 830 write(ipr,906) msg,iagflg */ } if (*msg / 4 % 2 == 1 && *iahflg == 0) { *msg = -7; return;/* 835 write(ipr,907) msg,iahflg */ } /* check dimension of problem */ if (n <= 0) { *msg = -1; return;/* 805 write(ipr,901) n */ } if (n == 1 && *msg % 2 == 0) { *msg = -2; return;/* 810 write(ipr,902) */ } /* compute scale matrix */ for (i = 0; i < n; ++i) { if (typsiz[i] == 0.) typsiz[i] = 1.; else if (typsiz[i] < 0.) typsiz[i] = -typsiz[i]; sx[i] = 1. / typsiz[i]; } /* compute default maximum step size if not provided */ if (*stepmx <= 0.) { stpsiz = 0.; for (i = 0; i < n; ++i) stpsiz += x[i] * x[i] * sx[i] * sx[i]; *stepmx = 1000. * fmax2(sqrt(stpsiz), 1); } /* check function scale */ if (*fscale == 0.) *fscale = 1.; else if (*fscale < 0.) *fscale = -(*fscale); /* check gradient tolerance */ if (gradtl < 0.) { *msg = -3; return;/* 815 write(ipr,903) gradtl */ } /* check iteration limit */ if (*itnlim <= 0) { *msg = -4; return;/* 820 write(ipr,904) itnlim */ } /* check number of digits of accuracy in function fcn */ if (*ndigit == 0) { *msg = -5; return;/* 825 write(ipr,905) ndigit */ } else if (*ndigit < 0) /* use default, = 15 for IEEE double */ *ndigit = (int) (-log10(epsm)); /* check trust region radius */ if (*dlt <= 0.) { *dlt = -1.; } else if (*dlt > *stepmx) { *dlt = *stepmx; } return; } /* optchk */ static void prt_result(int nr, int n, const double x[], double f, const double g[], const double *a, const double p[], int itncnt, int iflg) { /* * PURPOSE * * Print information on current iteration. * * PARAMETERS * * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> iterate x[k] * f --> function value at x[k] * g(n) --> gradient at x[k] * a(n,n) --> hessian at x[k] * p(n) --> step taken * itncnt --> iteration number k * iflg --> flag controlling info to print */ /* Print iteration number */ Rprintf("iteration = %d\n", itncnt); /* Print step */ if (iflg != 0) { Rprintf("Step:\n"); semprintRealVector((double *)p, n, 1); } /* Print current iterate */ Rprintf("Parameter:\n"); semprintRealVector((double *)x, n, 1); /* Print function value */ Rprintf("Function Value\n"); semprintRealVector((double *)&f, 1, 1); /* Print gradient */ Rprintf("Gradient:\n"); semprintRealVector((double *)g, n, 1); #ifdef NEVER /* Print Hessian */ /* We don't do this because the printRealMatrix code takes a SEXP rather than a double*. We could do something ugly like use fixed e format but that would be UGLY! */ if (iflg != 0) { } #endif Rprintf("\n"); } /* prt_result */ static void optdrv_end(int nr, int n, double *xpls, double *x, double *gpls, double *g, double *fpls, double f, double *a, double *p, int itncnt, int itrmcd, int *msg, void (*print_result)(int, int, const double *, double, const double *, const double *, const double *, int, int)) { int i; /* termination : reset xpls,fpls,gpls, if previous iterate solution */ if (itrmcd == 3) { *fpls = f; for (i = 0; i < n; ++i) { xpls[i] = x[i]; gpls[i] = g[i]; } } if (*msg / 8 % 2 == 0) (*print_result)(nr, n, xpls, *fpls, gpls, a, p, itncnt, 0); *msg = 0; } /* optdrv_end */ static void optdrv(int nr, int n, double *x, fcn_p fcn, fcn_p d1fcn, d2fcn_p d2fcn, void *state, double *typsiz, double fscale, int method, int iexp, int *msg, int ndigit, int itnlim, int iagflg, int iahflg, double dlt, double gradtl, double stepmx, double steptl, double *xpls, double *fpls, double *gpls, int *itrmcd, double *a, double *udiag, double *g, double *p, double *sx, double *wrk0, double *wrk1, double *wrk2, double *wrk3, int *itncnt) { /* Driver for non-linear optimization problem -- called by optif0() & optif9() * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> on entry: estimate to a root of fcn * fcn --> name of subroutine to evaluate optimization function * must be declared external in calling routine * fcn: R^n --> R * d1fcn --> (optional) name of subroutine to evaluate gradient * of fcn. must be declared external in calling routine * d2fcn --> (optional) name of subroutine to evaluate * hessian of of fcn. must be declared external * in calling routine * state <--> information other than x and n that fcn, * d1fcn and d2fcn requires. * state is not modified in optdrv (but can be * modified by fcn, d1fcn or d2fcn). * typsiz(n) --> typical size for each component of x * fscale --> estimate of scale of objective function * method --> algorithm to use to solve minimization problem * =1 line search * =2 double dogleg * =3 more-hebdon * iexp --> =1 if optimization function fcn is expensive to * evaluate, =0 otherwise. if set then hessian will * be evaluated by secant update instead of * analytically or by finite differences * msg <--> on input: ( > 0) message to inhibit certain * automatic checks; see do_nlm() in ../main/optimize.c * on output: ( < 0) error code; =0 no error * ndigit --> number of good digits in optimization function fcn * itnlim --> maximum number of allowable iterations * iagflg --> =1 if analytic gradient supplied * iahflg --> =1 if analytic hessian supplied * dlt --> trust region radius * gradtl --> tolerance at which gradient considered close * enough to zero to terminate algorithm * stepmx --> maximum allowable step size * steptl --> relative step size at which successive iterates * considered close enough to terminate algorithm * xpls(n) <--> on exit: xpls is local minimum * fpls <--> on exit: function value at solution, xpls * gpls(n) <--> on exit: gradient at solution xpls * itrmcd <-- termination code * a(n,n) --> workspace for hessian (or estimate) * and its cholesky decomposition * udiag(n) --> workspace [for diagonal of hessian] * g(n) --> workspace (for gradient at current iterate) * p(n) --> workspace for step * sx(n) --> workspace (for diagonal scaling matrix) * wrk0(n) --> workspace * wrk1(n) --> workspace * wrk2(n) --> workspace * wrk3(n) --> workspace * itncnt current iteration, k {{was `internal'}} * internal variables * analtl tolerance for comparison of estimated and * analytical gradients and hessians * epsm machine epsilon * f function value: fcn(x) * rnf relative noise in optimization function fcn. * noise=10.**(-ndigit) */ Rboolean mxtake = FALSE, noupdt; int i, iretcd, icscmx; double dltp = 0., epsm, phip0 = 0., f, analtl; double dlpsav = 0., phisav = 0., dltsav = 0.;/* -Wall */ double amusav = 0., phpsav = 0.; /* -Wall */ double phi = 0., amu = 0., rnf, wrk; *itncnt = 0; epsm = DBL_EPSILON; optchk(n, x, typsiz, sx, &fscale, gradtl, &itnlim, &ndigit, epsm, &dlt, &method, &iexp, &iagflg, &iahflg, &stepmx, msg); if (*msg < 0) return; for (i = 0; i < n; ++i) p[i] = 0.; rnf = pow(10., -ndigit); rnf = fmax2(rnf, epsm); analtl = sqrt(rnf); analtl = fmax2(0.1, analtl); /* evaluate fcn(x) */ (*fcn)(n, x, &f, state); /* evaluate analytic or finite difference gradient and check analytic gradient, if requested. */ if (!iagflg) { fstofd(1, 1, n, x, (fcn_p)fcn, state, &f, g, sx, rnf, &wrk, 1); } else { /* analytic gradient */ (*d1fcn)(n, x, g, state); if (*msg / 2 % 2 == 0) { grdchk(n, x, (fcn_p)fcn, state, f, g, typsiz, sx, fscale, rnf, analtl, wrk1, msg); if (*msg < 0) return; } } iretcd = -1; *itrmcd = opt_stop(n, x, f, g, wrk1, *itncnt, &icscmx, gradtl, steptl, sx, fscale, itnlim, iretcd, /* mxtake = */FALSE, msg); if (*itrmcd != 0) { optdrv_end(nr, n, xpls, x, gpls, g, fpls, f, a, p, *itncnt, 3, msg, prt_result); return; } if (iexp) { /* if optimization function expensive to evaluate (iexp=1), then * hessian will be obtained by secant updates. get initial hessian.*/ hsnint(nr, n, a, sx, method); } else { /* evaluate analytic or finite difference hessian and check analytic * hessian if requested (only if user-supplied analytic hessian * routine d2fcn fills only lower triangular part and diagonal of a). */ if (!iahflg) { /* no analytic hessian */ if (iagflg) /* anal.gradient */ fstofd(nr, n, n, x, (fcn_p)d1fcn, state, g, a, sx, rnf, wrk1,3); else sndofd(nr, n, x, (fcn_p)fcn, state, f, a, sx, rnf, wrk1, wrk2); } else { /* analytic hessian */ if (*msg / 4 % 2 == 1) { (*d2fcn)(nr, n, x, a, state); } else { heschk(nr, n, x, (fcn_p)fcn, (fcn_p)d1fcn, (d2fcn_p)d2fcn, state, f, g, a, typsiz, sx, rnf, analtl, iagflg, udiag, wrk1, wrk2, msg); /* heschk evaluates d2fcn and checks it against the finite * difference hessian which it calculates by calling fstofd * (if iagflg == 1) or sndofd (otherwise). */ if (*msg < 0) return; } } } if (*msg / 8 % 2 == 0) prt_result(nr, n, x, f, g, a, p, *itncnt, 1); /* THE Iterations : */ while(1) { ++(*itncnt); /* find perturbed local model hessian and its LL+ decomposition * ( skip this step if line search or dogstep techniques being used * with secant updates (i.e. method == 1 or 2). * cholesky decomposition L already obtained from secfac.) */ if (iexp && method != 3) { goto L105; } L103: chlhsn(nr, n, a, epsm, sx, udiag); L105: /* solve for newton step: ap=-g */ for (i = 0; i < n; ++i) wrk1[i] = - g[i]; lltslv(nr, n, a, p, wrk1); /* decide whether to accept newton step xpls=x + p */ /* or to choose xpls by a global strategy. */ if (iagflg == 0 && method != 1) { dltsav = dlt; if (method != 2) {/* i.e. method = 3 */ amusav = amu; dlpsav = dltp; phisav = phi; phpsav = phip0; } } switch(method) { case 1: lnsrch(n, x, f, g, p, xpls, fpls, (fcn_p)fcn, state, &mxtake, &iretcd, stepmx, steptl, sx); break; case 2: dogdrv(nr, n, x, f, g, a, p, xpls, fpls, (fcn_p)fcn, state, sx, stepmx, steptl, &dlt, &iretcd, &mxtake, wrk0, wrk1, wrk2, wrk3, itncnt); break; case 3: hookdrv(nr, n, x, f, g, a, udiag, p, xpls, fpls, (fcn_p)fcn, state, sx, stepmx, steptl, &dlt, &iretcd, &mxtake, &amu, &dltp, &phi, &phip0, wrk0, wrk1 , wrk2, epsm, *itncnt); break; } /* if could not find satisfactory step and forward difference */ /* gradient was used, retry using central difference gradient. */ if (iretcd == 1 && iagflg == 0) { iagflg = -1; /* set iagflg for central differences */ fstocd(n, x, (fcn_p)fcn, state, sx, rnf, g); if (method == 1) goto L105; dlt = dltsav; if (method == 2) goto L105; /* else : method == 3 */ amu = amusav; dltp = dlpsav; phi = phisav; phip0 = phpsav; goto L103; } /* calculate step for output */ for (i = 0; i < n; ++i) p[i] = xpls[i] - x[i]; /* calculate gradient at xpls */ switch(iagflg) { case -1: /* central difference gradient */ fstocd(n, xpls, (fcn_p)fcn, state, sx, rnf, gpls); break; case 0: /* forward difference gradient */ fstofd(1, 1, n, xpls, (fcn_p)fcn, state, fpls, gpls, sx, rnf, &wrk, 1); break; default: /* analytic gradient */ (*d1fcn)(n, xpls, gpls, state); } /* check whether stopping criteria satisfied */ *itrmcd = opt_stop(n, xpls, *fpls, gpls, x, *itncnt, &icscmx, gradtl, steptl, sx, fscale, itnlim, iretcd, mxtake, msg); if(*itrmcd != 0) break; /* evaluate hessian at xpls */ if (iexp) { /* expensive obj.fun. */ if (method == 3) secunf(nr, n, x, g, a, udiag, xpls, gpls, epsm, *itncnt, rnf, iagflg, &noupdt, wrk1, wrk2, wrk3); else secfac(nr, n, x, g, a, xpls, gpls, epsm, *itncnt, rnf, iagflg, &noupdt, wrk0, wrk1, wrk2, wrk3); } else { /* iexp == 0 */ if (!iahflg) { if (iagflg) fstofd(nr, n, n, xpls, (fcn_p)d1fcn, state, gpls, a, sx, rnf, wrk1, 3); else /* (iagflg != 1) */ sndofd(nr, n, xpls, (fcn_p)fcn, state, *fpls, a, sx, rnf, wrk1, wrk2); } else /* analytic hessian */ (*d2fcn)(nr, n, xpls, a, state); } if (*msg / 16 % 2 == 1) prt_result(nr, n, xpls, *fpls, gpls, a, p, *itncnt, 1); /* x <-- xpls and g <-- gpls and f <-- fpls */ f = *fpls; for (i = 0; i < n; ++i) { x[i] = xpls[i]; g[i] = gpls[i]; } } /* END while(1) */ optdrv_end(nr, n, xpls, x, gpls, g, fpls, f, a, p, *itncnt, *itrmcd, msg, prt_result); } /* optdrv */ static void dfault(int n, double *x, double *typsiz, double *fscale, int *method, int *iexp, int *msg, int *ndigit, int *itnlim, int *iagflg, int *iahflg, double *dlt, double *gradtl, double *stepmx, double *steptl) { /* Set default values for each input variable to minimization algorithm * for optif0() only. * PARAMETERS : * INPUT: * n dimension of problem * x(n) initial guess to solution (to compute max step size) * OUTPUT: * typsiz(n) typical size for each component of x * fscale estimate of scale of minimization function * method algorithm to use to solve minimization problem * iexp =0 if minimization function not expensive to evaluate * msg message to inhibit certain automatic checks + output * ndigit number of good digits in minimization function * itnlim maximum number of allowable iterations * iagflg =0 if analytic gradient not supplied * iahflg =0 if analytic hessian not supplied * dlt trust region radius * gradtl tolerance at which gradient considered close enough * to zero to terminate algorithm * stepmx value of zero to trip default maximum in optchk * steptl tolerance at which successive iterates considered * close enough to terminate algorithm */ double epsm; int i; /* set typical size of x and minimization function */ for (i = 0; i < n; ++i) typsiz[i] = 1.; *fscale = 1.; /* set tolerances */ epsm = DBL_EPSILON; /* for IEEE : = 2^-52 ~= 2.22 e-16 */ *gradtl = pow(epsm, 1./3.); /* for IEEE : = 2^(-52/3) ~= 6.055 e-6 */ *steptl = sqrt(epsm); /* for IEEE : = 2^-26 ~= 1.490 e-8 */ *stepmx = 0.;/* -> compute default in optchk() */ *dlt = -1.;/* (not needed for method 1) */ /* set flags */ *method = 1; *iexp = 1; *msg = 0; *ndigit = -1;/* -> compute default = floor(-log10(EPS)) in optchk() */ *itnlim = 150; *iagflg = 0;/* no gradient */ *iahflg = 0;/* no hessian */ } /* dfault() */ void optif0(int nr, int n, double *x, fcn_p fcn, void *state, double *xpls, double *fpls, double *gpls, int *itrmcd, double *a, double *wrk) { /* Provide simplest interface to minimization package. * User has no control over options. * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> initial estimate of minimum * fcn --> name of routine to evaluate minimization function. * must be declared external in calling routine. * state <--> information other than x and n that fcn requires * state is not modified in optif0 (but can be * modified by fcn). * xpls(n) <-- local minimum * fpls <-- function value at local minimum xpls * gpls(n) <-- gradient at local minimum xpls * itrmcd <-- termination code * a(n,n) --> workspace * wrk(n,9) --> workspace */ int iexp, iagflg, iahflg; int ndigit, method, itnlim, itncnt; double fscale, gradtl, steptl, stepmx, dlt; int msg; /* Function Body */ dfault(n, x, &wrk[nr], &fscale, &method, &iexp, &msg, &ndigit, &itnlim, &iagflg, &iahflg, &dlt, &gradtl, &stepmx, &steptl); optdrv(nr, n, x, (fcn_p)fcn, (fcn_p)d1fcn_dum, (d2fcn_p)d2fcn_dum, state, &wrk[nr * 3], fscale, method, iexp, &msg, ndigit, itnlim, iagflg, iahflg, dlt, gradtl, stepmx, steptl, xpls, fpls, gpls, itrmcd, a, wrk, &wrk[nr], &wrk[nr * 2], &wrk[nr * 4], &wrk[nr * 5], &wrk[nr * 6], &wrk[nr * 7], &wrk[nr * 8], &itncnt); } /* optif0 */ /* ---- this one is called from ../main/optimize.c : --------------- */ void optif9(int nr, int n, double *x, fcn_p fcn, fcn_p d1fcn, d2fcn_p d2fcn, void *state, double *typsiz, double fscale, int method, int iexp, int *msg, int ndigit, int itnlim, int iagflg, int iahflg, double dlt, double gradtl, double stepmx, double steptl, double *xpls, double *fpls, double *gpls, int *itrmcd, double *a, double *wrk, int *itncnt) { /* provide complete interface to minimization package. * user has full control over options. * PARAMETERS : * nr --> row dimension of matrix * n --> dimension of problem * x(n) --> on entry: estimate to a root of fcn * fcn --> name of subroutine to evaluate optimization function * must be declared external in calling routine * fcn: r(n) --> r(1) * d1fcn --> (optional) name of subroutine to evaluate gradient * of fcn. must be declared external in calling routine * d2fcn --> (optional) name of subroutine to evaluate hessian of * of fcn. must be declared external in calling routine * state <--> information other than x and n that fcn, * d1fcn and d2fcn requires. * state is not modified in optif9 (but can be * modified by fcn, d1fcn or d2fcn). * typsiz(n) --> typical size for each component of x * fscale --> estimate of scale of objective function * method --> algorithm to use to solve minimization problem * =1 line search * =2 double dogleg * =3 more-hebdon * iexp --> =1 if optimization function fcn is expensive to * evaluate, =0 otherwise. if set then hessian will * be evaluated by secant update instead of * analytically or by finite differences * msg <--> on input: ( > 0) to inhibit certain automatic checks * on output: ( < 0) error code; =0 no error * ndigit --> number of good digits in optimization function fcn * itnlim --> maximum number of allowable iterations * iagflg --> =1 if analytic gradient supplied * iahflg --> =1 if analytic hessian supplied * dlt --> trust region radius * gradtl --> tolerance at which gradient considered close * enough to zero to terminate algorithm * stepmx --> maximum allowable step size * steptl --> relative step size at which successive iterates * considered close enough to terminate algorithm * xpls(n) <--> on exit: xpls is local minimum * fpls <--> on exit: function value at solution, xpls * gpls(n) <--> on exit: gradient at solution xpls * itrmcd <-- termination code (in 0..5 ; 0 is "perfect"); * see optcode() in ../main/optimize.c for meaning * a(n,n) --> workspace for hessian (or estimate) * and its cholesky decomposition * wrk(n,8) --> workspace * itncnt <--> iteration count */ optdrv(nr, n, x, (fcn_p)fcn, (fcn_p)d1fcn, (d2fcn_p)d2fcn, state, typsiz, fscale, method, iexp, msg, ndigit, itnlim, iagflg, iahflg, dlt, gradtl, stepmx, steptl, xpls, fpls, gpls, itrmcd, a, wrk, wrk + nr, wrk + nr * 2, wrk + nr * 3, wrk + nr * 4, wrk + nr * 5, wrk + nr * 6, wrk + nr * 7, itncnt); } /* optif9 */ sem/src/utils.h0000644000175000017500000000262412101154620013264 0ustar nileshnilesh/* * ===================================================================================== * * Filename: utils.h * * Description: utlities functions. * * Version: 1.0 * Created: 27/01/2013 01:50:08 * Revision: none * Compiler: gcc * * Author: Zhenghua Nie (ZHN), zhenghua.nie@gmail.com * Company: McMaster University * * Copyright (C) 2013 Zhenghua Nie. All Rights Reserved. * This code is published under GNU GENERAL PUBLIC LICENSE. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed WITHOUT ANY WARRANTY. See the * GNU General Public License for more details. * * If you do not have a copy of the GNU General Public License, * write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * * * ===================================================================================== */ #ifndef __CSEMUTILITY_H__ #define __CSEMUTILITY_H__ #ifdef __cplusplus extern "C" { #endif void semprintRealVector(const double *x, int n, int index); #ifdef __cplusplus } #endif #endif sem/src/csemnlm.c0000644000175000017500000010016613664571423013600 0ustar nileshnilesh/* ******************** * Please note that this function "csenlm" is modified from the R core program main/optimize.c * by Zhenghua Nie. * The major goal of this modification is that we want to call "nlm" in C/C++ when * the objetive function and gradients, or hessians are computed in C/C++. This is for * speed-up of the package "sem". In future, if we find a more efficient solver of * non-linear optimization problems, we may replace the solver. * ********************/ /* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka * Copyright (C) 1998--2011 The R Development Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * http://www.r-project.org/Licenses/ */ #include "csemnlm.h" /* General Nonlinear Optimization */ /* Initialize the storage in the table of computed function values */ static int csem_isnan(double x) { return(x != x); } static void FT_init(int n, int FT_size, function_info *state) { int i, j; int have_gradient, have_hessian; ftable *Ftable; int modeln = state->model->n; int m = state->model->m; have_gradient = state->have_gradient; have_hessian = state->have_hessian; Ftable = (ftable *)R_alloc(FT_size, sizeof(ftable)); for (i = 0; i < FT_size; i++) { Ftable[i].x = (double *)R_alloc(n, sizeof(double)); Ftable[i].A = (double *)R_alloc(m*m, sizeof(double)); //A: m-by-m Ftable[i].P = (double *)R_alloc(m*m, sizeof(double)); //P: m-by-m Ftable[i].C = (double *)R_alloc(modeln*modeln, sizeof(double)); //After the compuation, C is modeln-by-modeln. /* initialize to unlikely parameter values */ for (j = 0; j < n; j++) { Ftable[i].x[j] = DBL_MAX; } if (have_gradient) { Ftable[i].grad = (double *)R_alloc(n, sizeof(double)); if (have_hessian) { Ftable[i].hess = (double *)R_alloc(n * n, sizeof(double)); } } } state->Ftable = Ftable; state->FT_size = FT_size; state->FT_last = -1; } static void msem_FT_init(int n, int FT_size, msem_function_info *state) { int i, j; int have_gradient, have_hessian; msem_ftable *Ftable; int G = state->model->G; //number of groups int *modeln = (int *)R_alloc(G, sizeof(int)); int *m = (int *)R_alloc(G, sizeof(int)); Memcpy(modeln, INTEGER(AS_INTEGER(state->model->n)), G); Memcpy(m, INTEGER(AS_INTEGER(state->model->m)), G); int totalm=0, totaln=0; for(i = 0 ; i < G; ++i) { totalm += m[i]*m[i]; totaln += modeln[i]*modeln[i]; } state->sizeC = totaln; state->sizeAP = totalm; have_gradient = state->have_gradient; have_hessian = state->have_hessian; Ftable = (msem_ftable *)R_alloc(FT_size, sizeof(msem_ftable)); for (i = 0; i < FT_size; i++) { Ftable[i].x = (double *)R_alloc(n, sizeof(double)); Ftable[i].A = (double *)R_alloc(totalm, sizeof(double)); //A: m-by-m Ftable[i].P = (double *)R_alloc(totalm, sizeof(double)); //P: m-by-m Ftable[i].C = (double *)R_alloc(totaln, sizeof(double)); //After the compuation, C is modeln-by-modeln. Ftable[i].ff = (double *)R_alloc(G, sizeof(double)); /* initialize to unlikely parameter values */ for (j = 0; j < n; j++) { Ftable[i].x[j] = DBL_MAX; } if (have_gradient) { Ftable[i].grad = (double *)R_alloc(n, sizeof(double)); if (have_hessian) { Ftable[i].hess = (double *)R_alloc(n * n, sizeof(double)); } } } state->Ftable = Ftable; state->FT_size = FT_size; state->FT_last = -1; return; } /* Store an entry in the table of computed function values */ static void FT_store(int n, const double f, const double *x, const double *grad, const double *hess, const double *A, const double *P, const double *C, function_info *state) { int ind; ind = (++(state->FT_last)) % (state->FT_size); state->Ftable[ind].fval = f; Memcpy(state->Ftable[ind].x, x, n); Memcpy(state->Ftable[ind].C, C, state->model->n*state->model->n); Memcpy(state->Ftable[ind].A, A, state->model->m*state->model->m); Memcpy(state->Ftable[ind].P, P, state->model->m*state->model->m); if (grad) { Memcpy(state->Ftable[ind].grad, grad, n); if (hess) { Memcpy(state->Ftable[ind].hess, hess, n * n); } } } static void msem_FT_store(int n, const double f, const double *x, const double *grad, const double *hess, const double *A, const double *P, const double *C, const double *ff, msem_function_info *state) { int ind; ind = (++(state->FT_last)) % (state->FT_size); state->Ftable[ind].fval = f; Memcpy(state->Ftable[ind].x, x, n); Memcpy(state->Ftable[ind].C, C, state->sizeC); Memcpy(state->Ftable[ind].A, A, state->sizeAP); Memcpy(state->Ftable[ind].P, P, state->sizeAP); Memcpy(state->Ftable[ind].ff, ff, state->model->G); if (grad) { Memcpy(state->Ftable[ind].grad, grad, n); if (hess) { Memcpy(state->Ftable[ind].hess, hess, n * n); } } } /* Check for stored values in the table of computed function values. Returns the index in the table or -1 for failure */ static int FT_lookup(int n, const double *x, function_info *state) { double *ftx; int i, j, ind, matched; int FT_size, FT_last; ftable *Ftable; FT_last = state->FT_last; FT_size = state->FT_size; Ftable = state->Ftable; for (i = 0; i < FT_size; i++) { ind = (FT_last - i) % FT_size; /* why can't they define modulus correctly */ if (ind < 0) ind += FT_size; ftx = Ftable[ind].x; if (ftx) { matched = 1; for (j = 0; j < n; j++) { if (x[j] != ftx[j]) { matched = 0; break; } } if (matched) return ind; } } return -1; } static int msem_FT_lookup(int n, const double *x, msem_function_info *state) { double *ftx; int i, j, ind, matched; int FT_size, FT_last; msem_ftable *Ftable; FT_last = state->FT_last; FT_size = state->FT_size; Ftable = state->Ftable; for (i = 0; i < FT_size; i++) { ind = (FT_last - i) % FT_size; /* why can't they define modulus correctly */ if (ind < 0) ind += FT_size; ftx = Ftable[ind].x; if (ftx) { matched = 1; for (j = 0; j < n; j++) { if (x[j] != ftx[j]) { matched = 0; break; } } if (matched) return ind; } } return -1; } /* This how the optimizer sees them */ static void fcn(int n, const double x[], double *f, function_info *state) { ftable *Ftable; double *g=NULL; double *h=NULL; double *C=NULL; double *A=NULL; double *P=NULL; int i; Ftable = state->Ftable; if ((i = FT_lookup(n, x, state)) >= 0) { *f = Ftable[i].fval; return; } for (i = 0; i < n; i++) { if (!R_FINITE(x[i])) error(("non-finite value supplied by 'nlm'")); } if(state->have_gradient) { g = (double *)R_alloc(n, sizeof(double)); memset(g, 0, n*sizeof(double)); if(state->have_hessian) { h = (double *)R_alloc(n*n, sizeof(double)); memset(h, 0, n*n*sizeof(double)); } } int m = state->model->m; int modeln = state->model->n; int maxmn = (m > modeln ? m: modeln); C = (double *)R_alloc(maxmn*maxmn, sizeof(double)); //After the compuation, C is n-by-n. A = (double *)R_alloc(m*m, sizeof(double)); P = (double *)R_alloc(m*m, sizeof(double)); myfcn_p myobjfun = (myfcn_p)state->myobjfun; (myobjfun)(n, x, f, g, h, A, P, C, state); ++state->n_eval; //number of the evaluations. if((*f != *f) || !R_FINITE(*f)) { warning(("NA//Inf replaced by maximum positive value")); *f = DBL_MAX; } FT_store(n, *f, x, g, h, A, P, C, state); return; } static void msem_fcn(int n, const double x[], double *f, msem_function_info *state) { msem_ftable *Ftable; double *g=NULL; double *h=NULL; double *C=NULL; double *A=NULL; double *P=NULL; double *ff=NULL; int i; Ftable = state->Ftable; if ((i = msem_FT_lookup(n, x, state)) >= 0) { *f = Ftable[i].fval; return; } for (i = 0; i < n; i++) { if (!R_FINITE(x[i])) error(("non-finite value supplied by 'nlm'")); } if(state->have_gradient) { g = (double *)R_alloc(n, sizeof(double)); memset(g, 0, n*sizeof(double)); if(state->have_hessian) { h = (double *)R_alloc(n*n, sizeof(double)); memset(h, 0, n*n*sizeof(double)); } } C = (double *)R_alloc(state->sizeC, sizeof(double)); //After the compuation, C is n-by-n. I need to check the size. A = (double *)R_alloc(state->sizeAP, sizeof(double)); P = (double *)R_alloc(state->sizeAP, sizeof(double)); ff = (double *)R_alloc(state->model->G, sizeof(double)); msem_fcn_p myobjfun = (msem_fcn_p)state->myobjfun; (myobjfun)(n, x, f, g, h, A, P, C,ff, state); ++state->n_eval; //number of the evaluations. if((*f != *f) || !R_FINITE(*f)) { warning(("NA/Inf replaced by maximum positive value")); *f = DBL_MAX; } msem_FT_store(n, *f, x, g, h, A, P, C, ff, state); return; } /* gradient */ static void Cd1fcn(int n, const double x[], double *g, function_info *state) { int ind; if ((ind = FT_lookup(n, x, state)) < 0) { /* shouldn't happen */ fcn(n, x, g, state); if ((ind = FT_lookup(n, x, state)) < 0) { error(("function value caching for optimization is seriously confused")); } } Memcpy(g, state->Ftable[ind].grad, n); return; } static void msem_Cd1fcn(int n, const double x[], double *g, msem_function_info *state) { int ind; if ((ind = msem_FT_lookup(n, x, state)) < 0) { /* shouldn't happen */ msem_fcn(n, x, g, state); if ((ind = msem_FT_lookup(n, x, state)) < 0) { error(("function value caching for optimization is seriously confused")); } } Memcpy(g, state->Ftable[ind].grad, n); return; } /* hessian */ static void Cd2fcn(int nr, int n, const double x[], double *h, function_info *state) { int j, ind; if ((ind = FT_lookup(n, x, state)) < 0) { /* shouldn't happen */ fcn(n, x, h, state); if ((ind = FT_lookup(n, x, state)) < 0) { error(("function value caching for optimization is seriously confused")); } } for (j = 0; j < n; j++) { /* fill in lower triangle only */ Memcpy( h + j*(n + 1), state->Ftable[ind].hess + j*(n + 1), n - j); } return; } static void msem_Cd2fcn(int nr, int n, const double x[], double *h, function_info *state) { int j, ind; if ((ind = FT_lookup(n, x, state)) < 0) { /* shouldn't happen */ fcn(n, x, h, state); if ((ind = FT_lookup(n, x, state)) < 0) { error(("function value caching for optimization is seriously confused")); } } for (j = 0; j < n; j++) { /* fill in lower triangle only */ Memcpy( h + j*(n + 1), state->Ftable[ind].hess + j*(n + 1), n - j); } return; } /* A, P, C */ static void returnAPCfcn(int n, const double x[], double *A, double *P, double *C, function_info *state) { int ind; if ((ind = FT_lookup(n, x, state)) < 0) { /* shouldn't happen */ fcn(n, x, C, state); if ((ind = FT_lookup(n, x, state)) < 0) { error(("function value caching for optimization is seriously confused")); } } const int modeln = state->model->n; const int m = state->model->m; Memcpy(A, state->Ftable[ind].A, m*m); Memcpy(P, state->Ftable[ind].P, m*m); Memcpy(C, state->Ftable[ind].C, modeln*modeln); return; } static void msem_returnAPCfcn(int n, const double x[], double *A, double *P, double *C, double *ff, msem_function_info *state) { int ind; if ((ind = msem_FT_lookup(n, x, state)) < 0) { /* shouldn't happen */ msem_fcn(n, x, C, state); if ((ind = msem_FT_lookup(n, x, state)) < 0) { error(("function value caching for optimization is seriously confused")); } } Memcpy(A, state->Ftable[ind].A, state->sizeAP); Memcpy(P, state->Ftable[ind].P, state->sizeAP); Memcpy(C, state->Ftable[ind].C, state->sizeC); Memcpy(ff, state->Ftable[ind].ff, state->model->G); return; } /* Fatal errors - we don't deliver an answer */ static void opterror(int nerr) { switch(nerr) { case -1: error(("non-positive number of parameters in nlm")); case -2: error(("nlm is inefficient for 1-d problems")); case -3: error(("invalid gradient tolerance in nlm")); case -4: error(("invalid iteration limit in nlm")); case -5: error(("minimization function has no good digits in nlm")); case -6: error(("no analytic gradient to check in nlm!")); case -7: error(("no analytic Hessian to check in nlm!")); case -21: error(("probable coding error in analytic gradient")); case -22: error(("probable coding error in analytic Hessian")); default: error(("*** unknown error message (msg = %d) in nlm()\n*** should not happen!"), nerr); } } /* Warnings - we return a value, but print a warning */ static void optcode(int code) { switch(code) { case 1: Rprintf(("Relative gradient close to zero.\n")); Rprintf(("Current iterate is probably solution.\n")); break; case 2: Rprintf(("Successive iterates within tolerance.\n")); Rprintf(("Current iterate is probably solution.\n")); break; case 3: Rprintf(("Last global step failed to locate a point lower than x.\n")); Rprintf(("Either x is an approximate local minimum of the function,\n\ the function is too non-linear for this algorithm,\n\ or steptol is too large.\n")); break; case 4: Rprintf(("Iteration limit exceeded. Algorithm failed.\n")); break; case 5: Rprintf(("Maximum step size exceeded 5 consecutive times.\n\ Either the function is unbounded below,\n\ becomes asymptotic to a finite value\n\ from above in some direction,\n"\ "or stepmx is too small.\n")); break; } Rprintf("\n"); } /* NOTE: The actual Dennis-Schnabel algorithm `optif9' is in ../appl/uncmin.c */ /* `x0' : inital parameter value */ /* `want_hessian' : H. required? */ /* `typsize' : typical size of parameter elements */ /* `fscale' : expected function size */ /* `msg' (bit pattern) */ /* `iterlim' (def. 100) */ //iagflg = 0; /* No analytic gradient */ // iahflg = 0; /* No analytic hessian */ // n = 0; //x = fixparam(x0, &n); //typsiz = fixparam(typsize, &n); SEXP csemnlm(double *x0, int n, int iagflg, int iahflg, int want_hessian, double *typsiz, double fscale, int msg, int ndigit, double gradtl, double stepmx, double steptol, int itnlim, void *model_input, myfcn_p myobjfun, int optimize) { SEXP value, names; model_info *model = (model_info *)model_input; if(SEM_DEBUG) Rprintf("Optimize: [%d]\n", optimize); double *x,*xpls, *gpls, fpls, *a, *wrk, dlt; int code, i, j, k, method, iexp, omsg, itncnt; x = (double *)R_alloc(n, sizeof(double)); Memcpy(x, x0, n); //initial function_info, this will be transfered into nlm. function_info *state; state = (function_info *) R_alloc(1, sizeof(function_info)); state->model = model; state->have_gradient = iagflg; state->have_hessian = iahflg; state->n_eval = 0; state->myobjfun = (myfcn_p *) myobjfun; /* .Internal( * nlm(function(x) f(x, ...), p, hessian, typsize, fscale, * msg, ndigit, gradtol, stepmax, steptol, iterlim) */ omsg = msg ; if (((msg/4) % 2) && !iahflg) { /* skip check of analytic Hessian */ msg -= 4; } if (((msg/2) % 2) && !iagflg) { /* skip check of analytic gradient */ msg -= 2; } FT_init(n, FT_SIZE, state); method = 1; /* Line Search */ iexp = iahflg ? 0 : 1; /* Function calls are expensive */ dlt = 1.0; xpls = (double*)R_alloc(n, sizeof(double)); gpls = (double*)R_alloc(n, sizeof(double)); a = (double*)R_alloc(n*n, sizeof(double)); wrk = (double*)R_alloc(8*n, sizeof(double)); //we will not optimize, only return the objective value, gradients and Hessian. if(optimize !=1 ) { int m = state->model->m; int modeln = state->model->n; int maxmn = (m > modeln ? m: modeln); double *matrixA = (double *)R_alloc(m*m, sizeof(double)); //After the compuation, C is n-by-n. double *P = (double *)R_alloc(m*m, sizeof(double)); //After the compuation, C is n-by-n. double *C = (double *)R_alloc(maxmn*maxmn, sizeof(double)); //After the compuation, C is n-by-n. memset(gpls, 0, n*sizeof(double)); memset(a, 0, n*n*sizeof(double)); (*myobjfun)(n, x0, &fpls, gpls, a , matrixA, P, C, state); int num_objs=2; //x0, objective, //A, P, C if(!csem_isnan(*matrixA)) ++num_objs; if(!csem_isnan(*P)) ++num_objs; if(!csem_isnan(*C)) ++num_objs; if(iagflg) { ++num_objs; if(!iahflg && want_hessian) {//we need to compute Hessian if it is not provided.) fdhess(n, x0, fpls, (fcn_p)fcn, state, a, n, &wrk[0], &wrk[n], ndigit, typsiz ); for (i = 0; i < n; i++) for (j = 0; j < i; j++) a[i + j * n] = a[j + i * n]; } } else if(want_hessian) { fdhess(n, x0, fpls, (fcn_p)fcn, state, a, n, &wrk[0], &wrk[n], ndigit, typsiz ); for (i = 0; i < n; i++) for (j = 0; j < i; j++) a[i + j * n] = a[j + i * n]; } if(want_hessian) ++num_objs; PROTECT(value = allocVector(VECSXP, num_objs)); PROTECT(names = allocVector(STRSXP, num_objs)); k = 0; SET_STRING_ELT(names, k, mkChar("minimum")); SET_VECTOR_ELT(value, k, ScalarReal(fpls)); k++; SET_STRING_ELT(names, k, mkChar("estimate")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = x0[i]; k++; if(iagflg) { SET_STRING_ELT(names, k, mkChar("gradient")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = gpls[i]; k++; } if(want_hessian){ SET_STRING_ELT(names, k, mkChar("hessian")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, n, n)); for (i = 0; i < n * n; i++) REAL(VECTOR_ELT(value, k))[i] = a[i]; k++; } /* A */ if(!csem_isnan(*matrixA)) { SET_STRING_ELT(names, k, mkChar("A")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, m, m)); for (i = 0; i < m * m; i++) REAL(VECTOR_ELT(value, k))[i] = matrixA[i]; k++; } /* P */ if(!csem_isnan(*P)) { SET_STRING_ELT(names, k, mkChar("P")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, m, m)); for (i = 0; i < m * m; i++) REAL(VECTOR_ELT(value, k))[i] = P[i]; k++; } /* C */ if(!csem_isnan(*C)) { SET_STRING_ELT(names, k, mkChar("C")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, modeln, modeln)); for (i = 0; i < modeln * modeln; i++) REAL(VECTOR_ELT(value, k))[i] = C[i]; k++; } setAttrib(value, R_NamesSymbol, names); UNPROTECT(2); } else { /* * Dennis + Schnabel Minimizer * * SUBROUTINE OPTIF9(NR,N,X,FCN,D1FCN,D2FCN,TYPSIZ,FSCALE, * + METHOD,IEXP,MSG,NDIGIT,ITNLIM,IAGFLG,IAHFLG,IPR, * + DLT,GRADTL,STEPMX,STEPTOL, * + XPLS,FPLS,GPLS,ITRMCD,A,WRK) * * * Note: I have figured out what msg does. * It is actually a sum of bit flags as follows * 1 = don't check/warn for 1-d problems * 2 = don't check analytic gradients * 4 = don't check analytic hessians * 8 = don't print start and end info * 16 = print at every iteration * Using msg=9 is absolutely minimal * I think we always check gradients and hessians */ optif9(n, n, x, (fcn_p) fcn, (fcn_p) Cd1fcn, (d2fcn_p) Cd2fcn, state, typsiz, fscale, method, iexp, &msg, ndigit, itnlim, iagflg, iahflg, dlt, gradtl, stepmx, steptol, xpls, &fpls, gpls, &code, a, wrk, &itncnt); if(SEM_DEBUG) { if(state->have_hessian) Rprintf("Hessian is provided.\n"); else Rprintf("Hessian is not provided.\n"); Rprintf("The number of function evaluations: [%d]\n", state->n_eval); } if (msg < 0) opterror(msg); if (code != 0 && (omsg&8) == 0) optcode(code); int num_objs = 5; if (want_hessian) { ++num_objs; fdhess(n, xpls, fpls, (fcn_p) fcn, state, a, n, &wrk[0], &wrk[n], ndigit, typsiz); for (i = 0; i < n; i++) for (j = 0; j < i; j++) a[i + j * n] = a[j + i * n]; } //A, P, C int modeln = state->model->n; int m = state->model->m; double *matrixA = (double *)R_alloc(m*m, sizeof(double)); //After the compuation, C is n-by-n. double *P = (double *)R_alloc(m*m, sizeof(double)); //After the compuation, C is n-by-n. double *C = (double *)R_alloc(modeln*modeln, sizeof(double)); //After the compuation, C is n-by-n. returnAPCfcn(n, xpls, matrixA, P, C, state); if(!csem_isnan(*matrixA)) ++num_objs; if(!csem_isnan(*P)) ++num_objs; if(!csem_isnan(*C)) ++num_objs; PROTECT(value = allocVector(VECSXP, num_objs)); PROTECT(names = allocVector(STRSXP, num_objs)); k = 0; SET_STRING_ELT(names, k, mkChar("minimum")); SET_VECTOR_ELT(value, k, ScalarReal(fpls)); k++; SET_STRING_ELT(names, k, mkChar("estimate")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = xpls[i]; k++; SET_STRING_ELT(names, k, mkChar("gradient")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = gpls[i]; k++; if (want_hessian) { SET_STRING_ELT(names, k, mkChar("hessian")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, n, n)); for (i = 0; i < n * n; i++) REAL(VECTOR_ELT(value, k))[i] = a[i]; k++; } SET_STRING_ELT(names, k, mkChar("code")); SET_VECTOR_ELT(value, k, allocVector(INTSXP, 1)); INTEGER(VECTOR_ELT(value, k))[0] = code; k++; SET_STRING_ELT(names, k, mkChar("iterations")); SET_VECTOR_ELT(value, k, allocVector(INTSXP, 1)); INTEGER(VECTOR_ELT(value, k))[0] = itncnt; k++; /* A */ if(!csem_isnan(*matrixA)) { SET_STRING_ELT(names, k, mkChar("A")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, m, m)); for (i = 0; i < m * m; i++) REAL(VECTOR_ELT(value, k))[i] = matrixA[i]; k++; } /* P */ if(!csem_isnan(*P)) { SET_STRING_ELT(names, k, mkChar("P")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, m, m)); for (i = 0; i < m * m; i++) REAL(VECTOR_ELT(value, k))[i] = P[i]; k++; } /* C */ if(!csem_isnan(*C)) { SET_STRING_ELT(names, k, mkChar("C")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, modeln, modeln)); for (i = 0; i < modeln * modeln; i++) REAL(VECTOR_ELT(value, k))[i] = C[i]; k++; } setAttrib(value, R_NamesSymbol, names); UNPROTECT(2); } return value; } SEXP cmsemnlm(double *x0, int n, int iagflg, int iahflg, int want_hessian, double *typsiz, double fscale, int msg, int ndigit, double gradtl, double stepmx, double steptol, int itnlim, msem_model_info *model, msem_fcn_p myobjfun, int optimize) { SEXP value, names; if(SEM_DEBUG) Rprintf("Optimize: [%d]\n", optimize); double *x,*xpls, *gpls, fpls, *a, *wrk, dlt; int code, i, j, k, method, iexp, omsg, itncnt; x = (double *)R_alloc(n, sizeof(double)); Memcpy(x, x0, n); //initial function_info, this will be transfered into nlm. msem_function_info *state; state = (msem_function_info *) R_alloc(1, sizeof(msem_function_info)); state->model = model; state->have_gradient = iagflg; state->have_hessian = iahflg; state->n_eval = 0; state->myobjfun = (msem_fcn_p *) myobjfun; /* .Internal( * nlm(function(x) f(x, ...), p, hessian, typsize, fscale, * msg, ndigit, gradtol, stepmax, steptol, iterlim) */ omsg = msg ; if (((msg/4) % 2) && !iahflg) { /* skip check of analytic Hessian */ msg -= 4; } if (((msg/2) % 2) && !iagflg) { /* skip check of analytic gradient */ msg -= 2; } msem_FT_init(n, 2, state); //FT_SIZE method = 1; /* Line Search */ iexp = iahflg ? 0 : 1; /* Function calls are expensive */ dlt = 1.0; xpls = (double*)R_alloc(n, sizeof(double)); gpls = (double*)R_alloc(n, sizeof(double)); a = (double*)R_alloc(n*n, sizeof(double)); wrk = (double*)R_alloc(8*n, sizeof(double)); //we will not optimize, only return the objective value, gradients and Hessian. if(optimize !=1 ) { int sizeAP = state->sizeAP; int sizeC = state->sizeC; int maxmn = (sizeAP > sizeC ? sizeAP: sizeC); double *matrixA = (double *)R_alloc(sizeAP, sizeof(double)); //After the compuation, C is n-by-n. double *P = (double *)R_alloc(sizeAP, sizeof(double)); //After the compuation, C is n-by-n. double *C = (double *)R_alloc(maxmn, sizeof(double)); //After the compuation, C is n-by-n. double *ff = (double *)R_alloc(state->model->G, sizeof(double)); memset(gpls, 0, n*sizeof(double)); memset(a, 0, n*n*sizeof(double)); (*myobjfun)(n, x0, &fpls, gpls, a , matrixA, P, C, ff, state); int num_objs=3; //x0, objective, *ff //A, P, C if(!csem_isnan(*matrixA)) ++num_objs; if(!csem_isnan(*P)) ++num_objs; if(!csem_isnan(*C)) ++num_objs; if(iagflg) { ++num_objs; if(!iahflg && want_hessian) {//we need to compute Hessian if it is not provided.) fdhess(n, x0, fpls, (fcn_p)msem_fcn, state, a, n, &wrk[0], &wrk[n], ndigit, typsiz ); for (i = 0; i < n; i++) for (j = 0; j < i; j++) a[i + j * n] = a[j + i * n]; } } else if(want_hessian) { fdhess(n, x0, fpls, (fcn_p)msem_fcn, state, a, n, &wrk[0], &wrk[n], ndigit, typsiz ); for (i = 0; i < n; i++) for (j = 0; j < i; j++) a[i + j * n] = a[j + i * n]; } if(want_hessian) ++num_objs; PROTECT(value = allocVector(VECSXP, num_objs)); PROTECT(names = allocVector(STRSXP, num_objs)); k = 0; SET_STRING_ELT(names, k, mkChar("minimum")); SET_VECTOR_ELT(value, k, ScalarReal(fpls)); k++; SET_STRING_ELT(names, k, mkChar("estimate")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = x0[i]; k++; if(iagflg) { SET_STRING_ELT(names, k, mkChar("gradient")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = gpls[i]; k++; } if(want_hessian){ SET_STRING_ELT(names, k, mkChar("hessian")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, n, n)); for (i = 0; i < n * n; i++) REAL(VECTOR_ELT(value, k))[i] = a[i]; k++; } /* A */ if(!csem_isnan(*matrixA)) { SET_STRING_ELT(names, k, mkChar("A")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, sizeAP, 1)); for (i = 0; i < sizeAP; i++) REAL(VECTOR_ELT(value, k))[i] = matrixA[i]; k++; } /* P */ if(!csem_isnan(*P)) { SET_STRING_ELT(names, k, mkChar("P")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, sizeAP, 1)); for (i = 0; i < sizeAP; i++) REAL(VECTOR_ELT(value, k))[i] = P[i]; k++; } /* C */ if(!csem_isnan(*C)) { SET_STRING_ELT(names, k, mkChar("C")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, sizeC, 1)); for (i = 0; i < sizeC; i++) REAL(VECTOR_ELT(value, k))[i] = C[i]; k++; } SET_STRING_ELT(names, k, mkChar("f")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, 1, state->model->G)); for (i = 0; i < state->model->G; i++) REAL(VECTOR_ELT(value, k))[i] = ff[i]; k++; setAttrib(value, R_NamesSymbol, names); UNPROTECT(2); } else { /* * Dennis + Schnabel Minimizer * * SUBROUTINE OPTIF9(NR,N,X,FCN,D1FCN,D2FCN,TYPSIZ,FSCALE, * + METHOD,IEXP,MSG,NDIGIT,ITNLIM,IAGFLG,IAHFLG,IPR, * + DLT,GRADTL,STEPMX,STEPTOL, * + XPLS,FPLS,GPLS,ITRMCD,A,WRK) * * * Note: I have figured out what msg does. * It is actually a sum of bit flags as follows * 1 = don't check/warn for 1-d problems * 2 = don't check analytic gradients * 4 = don't check analytic hessians * 8 = don't print start and end info * 16 = print at every iteration * Using msg=9 is absolutely minimal * I think we always check gradients and hessians */ optif9(n, n, x, (fcn_p) msem_fcn, (fcn_p) msem_Cd1fcn, (d2fcn_p) msem_Cd2fcn, state, typsiz, fscale, method, iexp, &msg, ndigit, itnlim, iagflg, iahflg, dlt, gradtl, stepmx, steptol, xpls, &fpls, gpls, &code, a, wrk, &itncnt); if(SEM_DEBUG) { if(state->have_hessian) Rprintf("Hessian is provided.\n"); else Rprintf("Hessian is not provided.\n"); Rprintf("The number of function evaluations: [%d]\n", state->n_eval); } if (msg < 0) opterror(msg); if (code != 0 && (omsg&8) == 0) optcode(code); int num_objs = 6; // ff if (want_hessian) { ++num_objs; fdhess(n, xpls, fpls, (fcn_p) msem_fcn, state, a, n, &wrk[0], &wrk[n], ndigit, typsiz); for (i = 0; i < n; i++) for (j = 0; j < i; j++) a[i + j * n] = a[j + i * n]; } //A, P, C int sizeAP = state->sizeAP; int sizeC = state->sizeC; double *matrixA = (double *)R_alloc(sizeAP, sizeof(double)); //After the compuation, C is n-by-n. double *P = (double *)R_alloc(sizeAP, sizeof(double)); //After the compuation, C is n-by-n. double *C = (double *)R_alloc(sizeC, sizeof(double)); //After the compuation, C is n-by-n. double *ff = (double *)R_alloc(state->model->G, sizeof(double)); msem_returnAPCfcn(n, xpls, matrixA, P, C,ff, state); if(!csem_isnan(*matrixA)) ++num_objs; if(!csem_isnan(*P)) ++num_objs; if(!csem_isnan(*C)) ++num_objs; PROTECT(value = allocVector(VECSXP, num_objs)); PROTECT(names = allocVector(STRSXP, num_objs)); k = 0; SET_STRING_ELT(names, k, mkChar("minimum")); SET_VECTOR_ELT(value, k, ScalarReal(fpls)); k++; SET_STRING_ELT(names, k, mkChar("estimate")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = xpls[i]; k++; SET_STRING_ELT(names, k, mkChar("gradient")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = gpls[i]; k++; if (want_hessian) { SET_STRING_ELT(names, k, mkChar("hessian")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, n, n)); for (i = 0; i < n * n; i++) REAL(VECTOR_ELT(value, k))[i] = a[i]; k++; } SET_STRING_ELT(names, k, mkChar("code")); SET_VECTOR_ELT(value, k, allocVector(INTSXP, 1)); INTEGER(VECTOR_ELT(value, k))[0] = code; k++; SET_STRING_ELT(names, k, mkChar("iterations")); SET_VECTOR_ELT(value, k, allocVector(INTSXP, 1)); INTEGER(VECTOR_ELT(value, k))[0] = itncnt; k++; /* A */ if(!csem_isnan(*matrixA)) { SET_STRING_ELT(names, k, mkChar("A")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, sizeAP, 1)); for (i = 0; i < sizeAP; i++) REAL(VECTOR_ELT(value, k))[i] = matrixA[i]; k++; } /* P */ if(!csem_isnan(*P)) { SET_STRING_ELT(names, k, mkChar("P")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, sizeAP, 1)); for (i = 0; i < sizeAP; i++) REAL(VECTOR_ELT(value, k))[i] = P[i]; k++; } /* C */ if(!csem_isnan(*C)) { SET_STRING_ELT(names, k, mkChar("C")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, sizeC, 1)); for (i = 0; i < sizeC; i++) REAL(VECTOR_ELT(value, k))[i] = C[i]; k++; } /* ff */ SET_STRING_ELT(names, k, mkChar("f")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, 1, state->model->G)); for (i = 0; i < state->model->G; i++) REAL(VECTOR_ELT(value, k))[i] = ff[i]; k++; setAttrib(value, R_NamesSymbol, names); UNPROTECT(2); } return value; } sem/src/init.c0000644000175000017500000000105013076762626013101 0ustar nileshnilesh#include #include #include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .Call calls */ extern SEXP cmsemSolve(SEXP); extern SEXP csemSolve(SEXP); static const R_CallMethodDef CallEntries[] = { {"cmsemSolve", (DL_FUNC) &cmsemSolve, 1}, {"csemSolve", (DL_FUNC) &csemSolve, 1}, {NULL, NULL, 0} }; void R_init_sem(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } sem/R/0000755000175000017500000000000014126411067011373 5ustar nileshnileshsem/R/csem.R0000644000175000017500000004061712101154620012443 0ustar nileshnilesh# File: csem.R # Author: Zhenghua Nie # Date: Mon 26 Dec 2011 23:54:22 EST # # # # Copyright (C) 2011 Zhenghua Nie. All Rights Reserved. # This code is published under GNU GENERAL PUBLIC LICENSE. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed WITHOUT ANY WARRANTY. See the # GNU General Public License for more details. # # If you do not have a copy of the GNU General Public License, # write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # The following function is a wrapper to compute the objective function and its gradient. # If hessian=TRUE, csem will return Hessian computed by the numerical method, but # is flexible to return Hessian computed by the analytical solution. CompiledObjective <- function(par, model.description, gradient=TRUE, hessian=FALSE, objective=c("objectiveML", "objectiveGLS", "objectiveFIML", "objectivelogLik"), ...) { if(missing(objective)) objective <- "objectiveML" objective <- match.arg(objective) res <- csem(model=model.description, start=par, objective=objective, opt.flag=0, gradient=gradient, opts=list("hessian"=hessian, "check.analyticals"=FALSE), ...) ret <- list(); ret$f <- res$minimum ret$parameters <- res$estimate ret$C <- res$C ret$A <- res$A ret$P <- res$P ret$gradient <- res$gradient ret$hessian <- res$hessian return(ret) } msemCompiledObjective <- function(par, model.description, gradient=TRUE, hessian=FALSE, objective=c("objectiveML", "objectiveGLS", "objectiveFIML"), ...) { if(missing(objective)) objective <- "objectiveML" objective <- match.arg(objective) res <- cmsem(model=model.description, start=par, objective=objective, opt.flag=0, gradient=gradient, opts=list("hessian"=hessian, "check.analyticals"=FALSE), ...) AA <- PP <- CC <- vector(model.description$G, mode="list") indAP <- 1 indC <- 1 ff <- numeric(model.description$G) for(g in 1:model.description$G) { m <- model.description$m[g] n <- model.description$n[g] AA[[g]] <- matrix(res$A[indAP:(indAP+m*m-1)], m, m); PP[[g]] <- matrix(res$P[indAP:(indAP+m*m-1)], m, m); indAP <- indAP + m*m; CC[[g]] <- matrix(res$C[indC:(indC+n*n-1)], n, n); indC <- indC + n*n; ff[g] <- as.numeric(res$f[g]) } ret <- list(); ret$f <- res$minimum ret$parameters <- res$estimate ret$C <- CC ret$A <- AA ret$P <- PP ret$ff <- ff ret$gradient <- res$gradient ret$hessian <- res$hessian return(ret) } # The wrapper function for solving optimization problems. Please note that the objective function is written in C/C++, we need to know the name. CompiledSolve <- function(model.description, start, objective=c("objectiveML", "objectiveGLS", "objectiveFIML", "objectivelogLik"), gradient=TRUE, typsize=rep(1.0, length(start)), debug=FALSE, maxiter=100,...) { if(missing(objective)) objective <- "objectiveML" objective <- match.arg(objective) stepmax=max(1000.0 * sqrt(sum((start/typsize)^2)), 1000.0) res <- csem(model=model.description, start, opt.flag=1, typsize=typsize,objective=objective, gradient=gradient, opts=list("iterlim"=maxiter, "print.level"=if(debug) 2 else 0, "hessian"=TRUE, "check.analyticals"=FALSE, "stepmax"=stepmax), ...) return(res) } # The wrapper function for solving optimization problems. Please note that the objective function is written in C/C++, we need to know the name. msemCompiledSolve <- function(model.description, start, objective=c("objectiveML", "objectiveGLS", "objectiveFIML"), gradient=TRUE, typsize=rep(1.0, length(start)), debug=FALSE, maxiter=100,gradtol=1e-6, ...) { if(missing(objective)) objective <- "objectiveML" objective <- match.arg(objective) stepmax=max(1000.0 * sqrt(sum((start/typsize)^2)), 1000.0) res <- cmsem(model=model.description, start, opt.flag=1, typsize=typsize,objective=objective, gradient=gradient, opts=list("iterlim"=maxiter, "print.level"=if(debug) 2 else 0,"gradtol"=gradtol, "hessian"=TRUE, "check.analyticals"=FALSE, "stepmax"=stepmax), ...) #reoraginize the matrix A, P, C AA <- PP <- CC <- vector(model.description$G, mode="list") indAP <- 1 indC <- 1 ff <- numeric(model.description$G) for(g in 1:model.description$G) { m <- as.integer(model.description$m[g]) n <- as.integer(model.description$n[g]) AA[[g]] <- matrix(as.numeric(res$A)[indAP:(indAP+m*m-1)], m, m); PP[[g]] <- matrix(as.numeric(res$P)[indAP:(indAP+m*m-1)], m, m); indAP <- indAP + m*m; CC[[g]] <- matrix(as.numeric(res$C)[indC:(indC+n*n-1)], n, n); indC <- indC + n*n; ff[g] <- as.numeric(res$f[g]) } ret <- list(); ret$minimum <- res$minimum ret$estimate <- res$estimate ret$gradient <- res$gradient ret$hessian <- res$hessian ret$code <- res$code ret$iterations <- res$iterations ret$C <- CC ret$A <- AA ret$P <- PP ret$ff <- ff return(ret) } print.f <- function(input) { print(input); # call R function "print" } #optimze:0 we only compute the objective function, gradients or hessian and return them. # csem <- function(model=NULL, start=NULL,opt.flag=1, typsize=rep(1, model$t), objective=c("objectiveML", "objectiveGLS", "objectiveFIML", "objectivelogLik", "test_objective"), gradient=TRUE, opts=list("hessian"=1, "fscale"=1, "gradtol"=1e-6, "steptol"=1e-6, "stepmax"=max(1000 * sqrt(sum((start/typsize)^2)), 1000), "iterlim"=100, "ndigit"=12,"print.level"=0, "check.analyticals"=1), csem.environment = new.env(), ...){ environment(print.f) <- csem.environment; ## Write wrappers around user-defined functions to pass additional ## arguments print.f.wrapper <- function(x){ print.f(x,...) } if(missing(model)) stop("Must provide the model.") if(missing(objective)) objective <- "objectiveML" objective <- match.arg(objective) if(missing(typsize) || is.null(typsize)) typsize <- rep(1, model$t) if(missing(start)) start <- rep(0.10, model$t) if(length(opts$print.level)==0) print.level <- 0 else print.level <- as.integer(opts$print.level) if(print.level < 0 || print.level > 2) stop("'print.level' must be in {0, 1, 2}") ## the following is for generating gradient. if(objective != "objectivelogLik") { arrows.1.seq <- model$ram[model$ram[, 1]==1 & model$ram[, 4]!=0, 4] arrows.2.seq <- model$ram[model$ram[, 1]==2 & model$ram[, 4]!=0, 4] } # this function is modfied from ipoptr developed by Jelmer Ypma (http://www.ucl.ac.uk/~uctpjyy/ipoptr.html). # Please reference the license of ipoptr. get.option.types <- function(opts) { # define types of nlm options, we should add all options here. nlm.option.types <- list( "fscale"="numeric", "gradtol"="numeric", "steptol"="numeric", "stepmax"="numeric", "hessian"="integer", "iterlim"="integer", "ndigit"="integer", "print.level"="integer", "check.analyticals"="integer" ) # initialize list with options sorted by type converted.opts <- list( "integer"=list(), "string"=list(), "numeric"=list() ) is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol # check if we have at least 1 element in the list, otherwise the # loop runs from 1 to down 0 and we get errors if ( length( opts ) > 0 ) { # loop over all options and give them the correct type for ( i in 1:length( opts ) ) { tmp.type <- nlm.option.types[[match( names(opts)[i], names(nlm.option.types) )]] if ( is.null( tmp.type ) ) { # determine type if ( is.character(opts[[i]]) ) { tmp.type <- "string" } else if ( is.wholenumber(opts[[i]]) ) { tmp.type <- "integer" } else { tmp.type <- "numeric" } cat( paste( "Warning: ", names(opts)[i], " is not a recognized option, we try to pass it to nlm as ", tmp.type, "\n" ) ) } if ( tmp.type=="string" ) { converted.opts$string[[ names(opts)[i] ]] <- as.character(opts[[i]]) } else if ( tmp.type=="integer" ) { converted.opts$integer[[ names(opts)[i] ]] <- as.integer(opts[[i]]) } else if ( tmp.type=="numeric" ) { converted.opts$numeric[[ names(opts)[i] ]] <- as.numeric(opts[[i]]) } else { stop(paste("Type of option ", names(opts)[i], " not recognized")) } } } return ( converted.opts ) } if(objective != "objectivelogLik") { ret <- list( "objective" = objective, "gradient" = as.integer(gradient), "opt.flg" = as.integer(opt.flag), "start" = start, "options" = get.option.types(opts), "data" = model$data, "pattern.number" = model$pattern.number, "valid.data.patterns" = model$valid.data.patterns, "S" = model$S, "logdetS" = as.numeric(model$logdetS), "invS" = model$invS, "N" = as.integer(model$N), "m" = as.integer(model$m), "n" = as.integer(model$n), "t" = as.integer(model$t), "fixed" = model$fixed, "ram" = model$ram, "sel.free" = model$sel.free, "arrows.1" = model$arrows.1, "arrows.1.free" = model$arrows.1.free, "one.head" = model$one.head, "arrows.2t" = model$arrows.2t, "arrows.2" = model$arrows.2, "arrows.2.free" = model$arrows.2.free, "unique.free.1" = model$unique.free.1, "unique.free.2" = model$unique.free.2, "J" = model$J, "correct" = model$correct, "param.names" = model$param.names, "var.names" = model$var.names, "one.free" = model$one.free, "two.free" = model$two.free, "raw" = as.integer(model$raw), "arrows.1.seq" = arrows.1.seq, "arrows.2.seq" = arrows.2.seq, "typsize" = typsize, "print.f" = print.f.wrapper, "csem.environment"=csem.environment) attr(ret, "class") <- "csem" } else { ret <- list( "objective" = objective, "gradient" = as.integer(gradient), "opt.flg" = as.integer(opt.flag), "start" = start, "t" = length(start), "options" = get.option.types(opts), "data" = model$data, "pattern.number" = model$pattern.number, "valid.data.patterns" = model$valid.data.patterns, "tri" = model$tri, "posn.intercept" = model$posn.intercept, "typsize" = typsize, "print.f" = print.f.wrapper, "csem.environment"=csem.environment ) } # add the current call to the list # ret$call <- match.call() solution <- .Call("csemSolve", ret) # ret$environment <- NULL # ret$solution <- solution ret <- solution #this is for simplifing the interface. # add solution variables to object #ret$status <- solution$status return(ret) } cmsem <- function(model=NULL, start=NULL,opt.flag=1, typsize=rep(1, model$t), objective=c("objectiveML", "objectiveGLS", "objectiveFIML", "test_objective"), gradient=TRUE, opts=list("hessian"=1, "fscale"=1, "gradtol"=1e-6, "steptol"=1e-6, "stepmax"=max(1000 * sqrt(sum((start/typsize)^2)), 1000), "iterlim"=100, "ndigit"=12,"print.level"=0, "check.analyticals"=1), csem.environment = new.env(), ...){ environment(print.f) <- csem.environment; ## Write wrappers around user-defined functions to pass additional ## arguments print.f.wrapper <- function(x){ print.f(x,...) } if(missing(model)) stop("Must provide the model.") if(missing(objective)) objective <- "objectiveML" objective <- match.arg(objective) if(missing(typsize) || is.null(typsize)) typsize <- rep(1, model$t) if(missing(start)) start <- rep(0.10, model$t) if(length(opts$print.level)==0) print.level <- 0 else print.level <- as.integer(opts$print.level) if(print.level < 0 || print.level > 2) stop("'print.level' must be in {0, 1, 2}") ## the following is for generating gradient. G <- model$G arrows.1.seq <- arrows.2.seq <- vector(G, mode="list") for(g in 1:G) { arrows.1.seq[[g]] <- model$ram[[g]][model$ram[[g]][, 1]==1 & model$ram[[g]][, 4]!=0, 4] arrows.2.seq[[g]] <- model$ram[[g]][model$ram[[g]][, 1]==2 & model$ram[[g]][, 4]!=0, 4] } get.option.types <- function(opts) { # define types of nlm options, we should add all options here. nlm.option.types <- list( "fscale"="numeric", "gradtol"="numeric", "steptol"="numeric", "stepmax"="numeric", "hessian"="integer", "iterlim"="integer", "ndigit"="integer", "print.level"="integer", "check.analyticals"="integer" ) # initialize list with options sorted by type converted.opts <- list( "integer"=list(), "string"=list(), "numeric"=list() ) is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol # check if we have at least 1 element in the list, otherwise the # loop runs from 1 to down 0 and we get errors if ( length( opts ) > 0 ) { # loop over all options and give them the correct type for ( i in 1:length( opts ) ) { tmp.type <- nlm.option.types[[match( names(opts)[i], names(nlm.option.types) )]] if ( is.null( tmp.type ) ) { # determine type if ( is.character(opts[[i]]) ) { tmp.type <- "string" } else if ( is.wholenumber(opts[[i]]) ) { tmp.type <- "integer" } else { tmp.type <- "numeric" } cat( paste( "Warning: ", names(opts)[i], " is not a recognized option, we try to pass it to nlm as ", tmp.type, "\n" ) ) } if ( tmp.type=="string" ) { converted.opts$string[[ names(opts)[i] ]] <- as.character(opts[[i]]) } else if ( tmp.type=="integer" ) { converted.opts$integer[[ names(opts)[i] ]] <- as.integer(opts[[i]]) } else if ( tmp.type=="numeric" ) { converted.opts$numeric[[ names(opts)[i] ]] <- as.numeric(opts[[i]]) } else { stop(paste("Type of option ", names(opts)[i], " not recognized")) } } } return ( converted.opts ) } ret <- list( "objective" = objective, "gradient" = as.integer(gradient), "opt.flg" = as.integer(opt.flag), "start" = start, "options" = get.option.types(opts), "G" = as.integer(model$G), "data" = model$data, "pattern.number" = model$pattern.number, "valid.data.patterns" = model$valid.data.patterns, "S" = model$S, "logdetS" = model$logdetS, "invS" = model$invS, "N" = model$N, "m" = model$m, "n" = model$n, "t" = as.integer(model$t), "fixed" = model$fixed, "ram" = model$ram, "sel.free" = model$sel.free, "arrows.1" = model$arrows.1, "arrows.1.free" = model$arrows.1.free, "one.head" = model$one.head, "arrows.2t" = model$arrows.2t, "arrows.2" = model$arrows.2, "arrows.2.free" = model$arrows.2.free, "unique.free.1" = model$unique.free.1, "unique.free.2" = model$unique.free.2, "J" = model$J, "correct" = model$correct, "param.names" = model$param.names, "var.names" = model$var.names, "one.free" = model$one.free, "two.free" = model$two.free, "raw" = as.integer(model$raw), "arrows.1.seq" = arrows.1.seq, "arrows.2.seq" = arrows.2.seq, "typsize" = typsize, "print.f" = print.f.wrapper, "csem.environment"=csem.environment) attr(ret, "class") <- "cmsem" # add the current call to the list # ret$call <- match.call() solution <- .Call("cmsemSolve", ret) # ret$environment <- NULL # ret$solution <- solution ret <- solution #this is for simplifing the interface. # add solution variables to object #ret$status <- solution$status return(ret) } sem/R/cfa.R0000644000175000017500000001344012525463420012252 0ustar nileshnilesh# last modified: 2015-05-14 by J. Fox cfa <- function(file="", text, covs=paste(factors, collapse=","), reference.indicators=TRUE, raw=FALSE, subscript=c("name", "number"), ...){ Lines <- if (!missing(text)) scan(text=text, what="", sep=";", strip.white=TRUE, comment.char="#") else scan(file=file, what="", sep=";", strip.white=TRUE, comment.char="#") lines <- character(0) current.line <- "" for (line in Lines){ if (current.line != "") line <- paste(current.line, line) if (length(grep(",$", line)) > 0){ current.line <- line next } current.line <- "" lines <- c(lines, line) } subscript <- match.arg(subscript) variance.lines <- grepl("^[Vv]ar.*:", lines) variances <- lines[variance.lines] lines <- lines[!variance.lines] nfactor <- length(lines) factors <- rep("", nfactor) all.obs.vars <- ram <- character(0) equality.sets <- list() for (i in 1:nfactor){ par.number <- 0 Line <- line <- lines[[i]] line <- gsub(" ", "", line) line <- strsplit(line, ":")[[1]] if (length(line) == 1){ factors[i] <- paste("Factor.", i, sep="") variables <- strsplit(line, ",")[[1]] all.obs.vars <- c(all.obs.vars, variables) } else if (length(line) == 2){ factors[i] <- line[1] variables <- strsplit(line[2], ",")[[1]] all.obs.vars <- c(all.obs.vars, unlist(strsplit(variables, "="))) } else stop("Parse error in ", Line) if (reference.indicators){ if (!grepl("=", variables[1])){ ram <- c(ram, paste(factors[i], " -> ", variables[1], ", NA, 1", sep=""))} else{ vars <- strsplit(variables[1], "=")[[1]] equality.sets[[length(equality.sets) + 1]] <- vars for (var in vars){ ram <- c(ram, paste(factors[i], " -> ", var, ", NA, 1", sep="")) } } variables <- variables[-1] } for (variable in variables){ if (length(grep("\\(", variable)) > 0){ if (length(grep("\\)", variable)) == 0) stop ("Parse error in ", Line) variable <- sub("\\)", "", variable) var.start <- strsplit(variable, "\\(")[[1]] if (length(var.start) != 2) stop("Parse error in ", Line) variable <- var.start[1] start <- var.start[2] if (not.number(start)) stop ("Bad start value ", start, " in ", Line) } else start <- "NA" if (!grepl("=", variable)){ par.number <- par.number + 1 par.name <- if (subscript == "name") variable else as.character(par.number) ram <- c(ram, paste(factors[i], " -> ", variable, ", lam[", par.name, ":", factors[i], "], ", start, sep="")) } else { vars <- strsplit(variable, "=")[[1]] equality.sets[[length(equality.sets) + 1]] <- vars par.number <- par.number + 1 lam <- if (subscript == "name") paste(vars, collapse=".") else as.character(par.number) for (var in vars){ ram <- c(ram, paste(factors[i], " -> ", var, ", lam[", lam, ":", factors[i], "], ", start, sep="")) } } } } ram <- if (reference.indicators) { c(ram, sapply(factors, function(factor) paste(factor, " <-> ", factor, ", ", paste("V[", factor, "]", sep="") , ", NA", sep=""))) } else{ c(ram, sapply(factors, function(factor) paste(factor, " <-> ", factor, ", NA, 1", sep=""))) } if (raw){ all.obs.vars <- unique(all.obs.vars) if (length(equality.sets) == 0){ int <- if (subscript == "name") all.obs.vars else as.character(seq(1, length(all.obs.vars))) names(int) <- all.obs.vars ram <- c(ram, sapply(all.obs.vars, function(var) paste("Intercept -> ", var, ", intercept(", int[var], "), NA", sep=""))) } else{ par.number <- 0 for (set in equality.sets){ par.number <- par.number + 1 int <- if (subscript == "name") paste(set, collapse=".") else as.character(par.number) ram <- c(ram, sapply(set, function(var) paste("Intercept -> ", var, ", intercept(", int, "), NA", sep=""))) all.obs.vars <- setdiff(all.obs.vars, set) } if (length(all.obs.vars) > 0) { int <- if (subscript == "name") all.obs.vars else as.character(seq(par.number + 1, par.number + length(all.obs.vars))) names(int) <- all.obs.vars ram <- c(ram, sapply(all.obs.vars, function(var) paste("Intercept -> ", var, ", intercept(", int[var], "), NA", sep=""))) } } message('NOTE: specify fixed.x="Intercept" in call to sem') } if (length(variances) > 0){ var.number <- 0 variances <- sub("^[Vv]ar.*:", "", variances) variances <- gsub(" ", "", variances) variances <- strsplit(variances, ",") for (vars in variances){ var <- strsplit(vars, "=") sub <- if (subscript == "name") sapply(var, function(x) paste(x, collapse=".")) else as.character(seq(var.number + 1, var.number + length(var) + 1)) var.number <- var.number + length(var) for (i in 1:length(var)){ ram <- c(ram, sapply(var[i], function(x) paste(x, " <-> ", x, ", V[", sub[i], "]", sep=""))) } } } specifyModel(text=ram, covs=covs, ..., quiet=TRUE) } sem/R/sem.R0000644000175000017500000003540113563624470012315 0ustar nileshnilesh# last modified 2019-11-15 by J. Fox sem <- function(model, ...){ if (is.character(model)) class(model) <- "semmod" UseMethod("sem", model) } sem.semmod <- function(model, S, N, data, raw=identical(na.action, na.pass), obs.variables=rownames(S), fixed.x=NULL, formula= ~ ., na.action=na.omit, robust=!missing(data), debug=FALSE, optimizer=optimizerSem, objective=objectiveML, ...){ parse.path <- function(path) { path.1 <- gsub("-", "", gsub(" ","", path)) direction <- if (regexpr("<>", path.1) > 0) 2 else if (regexpr("<", path.1) > 0) -1 else if (regexpr(">", path.1) > 0) 1 else stop(paste("ill-formed path:", path)) path.1 <- strsplit(path.1, "[<>]")[[1]] list(first=path.1[1], second=path.1[length(path.1)], direction=direction) } any.NA <- FALSE unique.patterns <- valid.pattern <- valid <- pattern.number <- valid.data.patterns <- NULL if (missing(S)){ if (missing(data)) stop("S and data cannot both be missing") N.all <- nrow(data) data <- model.frame(formula, data=data, na.action=na.action) data <- model.matrix(formula, data=data) colnames(data)[colnames(data) == "(Intercept)"] <- "Intercept" S <- if (raw) rawMoments(data, na.rm=TRUE) else { data <- data[, colnames(data) != "Intercept"] cov(data, use="complete.obs") } N <- nrow(data) if (N < N.all) warning(N - N.all, " observations removed due to missingness") if (identical(na.action, na.pass) && any(is.na(data))){ any.NA <- TRUE valid <- !is.na(data) colnames(valid) <- colnames(data) } else { valid <- matrix(TRUE, nrow(data), ncol(data)) colnames(valid) <- colnames(data) } } if ((!is.matrix(model)) | ncol(model) != 3) stop ("model argument must be a 3-column matrix") startvalues <- as.numeric(model[,3]) par.names <- model[,2] n.paths <- length(par.names) heads <- from <- to <- rep(0, n.paths) for (p in 1:n.paths){ path <- parse.path(model[p,1]) heads[p] <- abs(path$direction) to[p] <- path$second from[p] <- path$first if (path$direction == -1) { to[p] <- path$first from[p] <- path$second } } ram <- matrix(0, p, 5) all.vars <- unique(c(to, from)) latent.vars <- setdiff(all.vars, obs.variables) not.used <- setdiff(obs.variables, all.vars) if (length(not.used) > 0){ rownames(S) <- colnames(S) <- obs.variables obs.variables <- setdiff(obs.variables, not.used) S <- S[obs.variables, obs.variables] warning("The following observed variables are in the input covariance or raw-moment matrix ", "but do not appear in the model:\n", paste(not.used, collapse=", "), "\n") } vars <- c(obs.variables, latent.vars) pars <- na.omit(unique(par.names)) ram[,1] <- heads ram[,2] <- apply(outer(vars, to, "=="), 2, which) ram[,3] <- apply(outer(vars, from, "=="), 2, which) par.nos <- apply(outer(pars, par.names, "=="), 2, which) if (length(par.nos) > 0) ram[,4] <- unlist(lapply(par.nos, function(x) if (length(x) == 0) 0 else x)) ram[,5]<- startvalues colnames(ram) <- c("heads", "to", "from", "parameter", "start") if (!is.null(fixed.x)) fixed.x <- apply(outer(vars, fixed.x, "=="), 2, which) n <- length(obs.variables) m <- length(all.vars) t <- length(pars) if (debug) { cat("\n observed variables:\n") print(paste(paste(1:n,":", sep=""), obs.variables, sep="")) cat("\n") if (m > n){ cat("\n latent variables:\n") print(paste(paste((n+1):m,":", sep=""), latent.vars, sep="")) cat("\n") } cat("\n parameters:\n") print(paste(paste(1:t,":", sep=""), pars, sep="")) cat("\n\n RAM:\n") print(ram) } if (missing(data)) data <- NULL else { data <- data[, obs.variables] if (!is.null(valid)) { valid <- valid[, obs.variables] valid.pattern <- apply(valid, 1, function(row) paste(row, collapse=".")) unique.patterns <- unique(valid.pattern) pattern.number <- apply(outer(valid.pattern, unique.patterns, `==`), 1, which) valid.data.patterns <- t(sapply(strsplit(unique.patterns, "\\."), as.logical)) } } if (identical(objective, objectiveFIML2) || identical(objective, objectiveFIML)){ message("NOTE: start values computed from preliminary ML fit") opt <- options(warn=-1) on.exit(options(opt)) # in case of error prelim.fit <- sem(ram, S=S, N=N, raw=raw, data=na.omit(data), valid=valid, param.names=pars, var.names=vars, fixed.x=fixed.x, semmod=model, robust=robust, debug=debug, ...) if (!prelim.fit$convergence) message("NOTE: preliminary ML fit may not have converged") options(opt) message("NOTE: preliminary iterations, ", prelim.fit$iterations) message("NOTE: iterations reported for final fit are post preliminary fit") coeff <- coef(prelim.fit) rownames(ram) <- rownames(prelim.fit$ram)[1:nrow(ram)] ram[names(coeff), 5] <- coeff } cls <- gsub("\\.", "", deparse(substitute(objective))) cls <- gsub("2", "", cls) result <- sem(ram, S=S, N=N, raw=raw, data=data, pattern.number=pattern.number, valid.data.patterns=valid.data.patterns, param.names=pars, var.names=vars, fixed.x=fixed.x, semmod=model, robust=robust, debug=debug, optimizer=optimizer, objective=objective, cls=cls, ...) class(result) <- c(cls, "sem") result } sem.default <- function(model, S, N, raw=FALSE, data=NULL, start.fn=startvalues, pattern.number=NULL, valid.data.patterns=NULL, use.means=TRUE, param.names, var.names, fixed.x=NULL, robust=!is.null(data), semmod=NULL, debug=FALSE, analytic.gradient=!identical(objective, objectiveFIML), warn=FALSE, maxiter=1000, par.size=c("ones", "startvalues"), start.tol=1E-6, optimizer=optimizerSem, objective=objectiveML, cls, ...){ ord <- function(x) 1 + apply(outer(unique(x), x, "<"), 2, sum) is.triangular <- function(X) { is.matrix(X) && (nrow(X) == ncol(X)) && (all(0 == X[upper.tri(X)])) || (all(0 == X[lower.tri(X)])) } ram <- model S <- unclass(S) # in case S is a rawmoment object if (nrow(S) > 1 && is.triangular(S)) S <- S + t(S) - diag(diag(S)) if (!isSymmetric(S)) stop("S must be a square triangular or symmetric matrix") if (qr(S)$rank < ncol(S)) warning("S is numerically singular: expect problems") if (any(eigen(S, symmetric=TRUE, only.values=TRUE)$values <= 0)) warning("S is not positive-definite: expect problems") if ((!is.matrix(ram)) | ncol(ram) != 5 | (!is.numeric(ram))) stop ("ram argument must be a 5-column numeric matrix") par.size <- if (missing(par.size)) { range <- range(diag(S)) if (range[2]/range[1] > 100) "startvalues" else "ones" } else match.arg(par.size) n <- nrow(S) observed <- 1:n n.fix <- length(fixed.x) if (!is.null(fixed.x)){ for (i in 1:n.fix){ for (j in 1:i){ ram <- rbind(ram, c(2, fixed.x[i], fixed.x[j], 0, S[fixed.x[i], fixed.x[j]])) } } } m <- max(ram[,c(2,3)]) missing.variances <- setdiff(1:m, ram[,2][ram[,2] == ram[,3]]) if (length(missing.variances) > 0) warning(paste("The following variables have no variance or error-variance parameter (double-headed arrow):\n", paste(var.names[missing.variances], collapse=", "), "\nThe model is almost surely misspecified; check also for missing covariances.\n")) t <- max(ram[,4]) df <- n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2 if (df < 0) stop(paste("The model has negative degrees of freedom =", df)) J <- matrix(0, n, m) correct <- matrix(2, m, m) diag(correct) <- 1 J[cbind(1:n, observed)]<-1 par.posn <- sapply(1:t, function(i) which(ram[,4] == i)[1]) colnames(ram)<-c("heads", "to", "from", "parameter", "start value") rownames(ram)<-rep("",nrow(ram)) if (length(param.names) > 0) rownames(ram)[par.posn]<-param.names fixed <- ram[,4] == 0 sel.free <- ram[,4] sel.free[fixed] <- 1 one.head <- ram[,1] == 1 one.free <- which( (!fixed) & one.head ) two.free <- which( (!fixed) & (!one.head) ) arrows.1 <- ram[one.head, c(2,3), drop=FALSE] arrows.2 <- ram[!one.head, c(2,3), drop=FALSE] arrows.2t <- ram[!one.head, c(3,2), drop=FALSE] arrows.1.free <- ram[one.free,c(2,3), drop=FALSE] arrows.2.free <- ram[two.free,c(2,3), drop=FALSE] sel.free.1 <- sel.free[one.free] sel.free.2 <- sel.free[two.free] unique.free.1 <- unique(sel.free.1) unique.free.2 <- unique(sel.free.2) rownames(S) <- colnames(S) <- var.names[observed] result <- list(var.names=var.names, ram=ram, S=S, J=J, n.fix=n.fix, n=n, N=N, m=m, t=t, raw=raw, data=data, semmod=semmod, optimizer=optimizer, objective=objective, # remaining values to be supplied after optimization coeff=NULL, vcov=NULL, par.posn=NULL, convergence=NULL, iterations=NULL, criterion=NULL, C=NULL, A=NULL, P=NULL, adj.obj=NULL, robust.vcov=NULL) if (length(param.names)== 0){ warning("there are no free parameters in the model") } else { if (!is.null(data) && raw && use.means){ to <- ram[, 2] from <- ram[, 3] rows <- (from == which(var.names == "Intercept")) & (ram[, 1] == 1) & (ram[, 4] != 0) & (to <= n) & is.na(ram[, 5]) ram[rows, 5] <- colMeans(data, na.rm=TRUE)[var.names[to[rows]]] } start <- if (any(is.na(ram[,5][par.posn]))) start.fn(S, ram, debug=debug, tol=start.tol) else ram[,5][par.posn] typsize <- if (par.size == "startvalues") abs(start) else rep(1,t) model.description <- list(data=data, pattern.number=pattern.number, valid.data.patterns=valid.data.patterns, S=S, logdetS=log(det(S)), invS=solve(S), N=N, m=m, n=n, t=t, fixed=fixed, ram=ram, sel.free=sel.free, arrows.1=arrows.1, arrows.1.free=arrows.1.free, one.head=one.head, arrows.2t=arrows.2t, arrows.2=arrows.2, arrows.2.free=arrows.2.free, unique.free.1=unique.free.1, unique.free.2=unique.free.2, J=J, correct=correct, param.names=param.names, var.names=var.names, observed=observed, raw=raw) res <- optimizer(start=start, objective=objective, gradient=analytic.gradient, maxiter=maxiter, debug=debug, par.size=par.size, model.description=model.description, warn=warn, ...) ram[par.posn, 5] <- start par.code <- paste(var.names[ram[,2]], c("<---", "<-->")[ram[,1]], var.names[ram[,3]]) result$coeff <- res$par result$vcov <- res$vcov result$par.posn <- par.posn result$convergence <- res$convergence result$iterations <- res$iterations result$criterion <- res$criterion result$C <- res$C result$A <- res$A result$P <- res$P if (!is.na(result$iterations)) if(result$iterations >= maxiter) warning("maximum iterations exceeded") } if (missing(cls)){ cls <- gsub("\\.", "", deparse(substitute(objective))) cls <- gsub("2", "", cls) } # if(cls == "objectiveCompiledGLS") # cls <- c(cls, "objectiveGLS") # else if(cls == "objectiveCompiledML") # cls <- c(cls, "objectiveML") class(result) <- c(cls, "sem") if (robust && !is.null(data) && inherits(result, "objectiveML")){ result$adj.obj <- sbchisq(result, na.omit(data)) result$robust.vcov <- robustVcov(result, adj.obj=result$adj.obj) } result } vcov.sem <- function(object, robust=FALSE, analytic=inherits(object, "objectiveML") && object$t <= 500, ...) { if (robust) return(object$robust.vcov) if (!analytic) return(object$vcov) if (!inherits(object, "objectiveML")) stop("analytic coefficient covariance matrix unavailable") hessian <- function(model){ # accumulate <- function(A, B, C, D, d) { # res <- matrix(0, d^2, d^2) # B[1:d, 1:d] %x% A[1:d, 1:d] + matrix(rep(rep(t(C[1:d, 1:d]), 1, each=d), d), d^2, d^2, byrow=TRUE) * matrix(rep(rep((D[1:d, 1:d]), 1, each=d), d), d^2, d^2) # } A <- model$A P <- model$P S <- model$S C <- model$C J <- model$J m <- model$m t <- model$t I.Ainv <- solve(diag(m) - A) Cinv <- solve(C) AA <- t(I.Ainv) %*% t(J) BB <- J %*% I.Ainv %*% P %*% t(I.Ainv) CC <- t(I.Ainv) %*% t(J) DD <- J %*% I.Ainv dF.dBdB <- accumulate(AA %*% Cinv %*% t(AA), t(BB) %*% Cinv %*% BB, AA %*% Cinv %*% BB, t(BB) %*% Cinv %*% t(AA), m) dF.dPdP <- accumulate(CC %*% Cinv %*% t(CC), t(DD) %*% Cinv %*% DD, CC %*% Cinv %*% DD, t(DD) %*% Cinv %*% t(CC), m) dF.dBdP <- accumulate(AA %*% Cinv %*% t(CC), t(BB) %*% Cinv %*% DD, AA %*% Cinv %*% DD, t(BB) %*% Cinv %*% t(CC), m) ram <- model$ram fixed <- ram[, 4] == 0 sel.free <- ram[, 4] sel.free[fixed] <- 0 one.head <- ram[, 1] == 1 one.free <- which( (!fixed) & one.head ) two.free <- which( (!fixed) & (!one.head) ) two.free.cov <- which((!fixed) & (!one.head) & (ram[, 2] != ram[, 3])) arrows.1 <- ram[one.head, c(2, 3), drop=FALSE] arrows.2 <- ram[!one.head, c(2, 3), drop=FALSE] arrows.2t <- ram[!one.head, c(3, 2), drop=FALSE] arrows.1.free <- ram[one.free, c(2, 3), drop=FALSE] arrows.2.free <- ram[two.free, c(2, 3), drop=FALSE] sel.free.1 <- sel.free[one.free] sel.free.2 <- sel.free[two.free] unique.free.1 <- unique(sel.free.1) unique.free.2 <- unique(sel.free.2) posn.matrix <- matrix(1:(m^2), m, m) posn.free <- c(posn.matrix[arrows.1.free], (m^2) + posn.matrix[arrows.2.free]) DBB <- dF.dBdB[posn.matrix[arrows.1.free], posn.matrix[arrows.1.free], drop=FALSE] DPP <- dF.dPdP[posn.matrix[arrows.2.free], posn.matrix[arrows.2.free], drop=FALSE] DBP <- dF.dBdP[posn.matrix[arrows.1.free], posn.matrix[arrows.2.free], drop=FALSE] # browser() hessian <- rbind( cbind(DBB, DBP), cbind(t(DBP), DPP)) n1 <- length(one.free) n2 <- length(two.free) nn <- rep(c(sqrt(2), sqrt(2)/2), c(n1, n2)) nn[c(one.free, two.free) %in% two.free.cov] <- sqrt(2) # browser() hessian <- hessian * outer(nn, nn) pars <- ram[, 4][!fixed] Z <- outer(1:t, pars, function(x, y) as.numeric(x == y)) hessian <- Z %*% hessian %*% t(Z) par.names <- c(names(one.free), names(two.free)) par.names <- par.names[par.names != ""] rownames(hessian) <- colnames(hessian) <- par.names nms <- names(coef(object)) hessian[nms, nms] } h <- hessian(object) t <- object$t N <- object$N raw <- object$raw param.names <- rownames(h) vcov <- matrix(NA, t, t) qr.hess <- try(qr(h), silent=TRUE) if (inherits(qr.hess, "try-error")){ warning("Could not compute QR decomposition of Hessian.\nOptimization probably did not converge.\n") } else if (qr.hess$rank < t){ warning(" singular Hessian: model is probably underidentified.\n") which.aliased <- qr.hess$pivot[-(1:qr.hess$rank)] attr(vcov, "aliased") <- param.names[which.aliased] } else { vcov <- (2/(N - (!raw))) * solve(h) if (any(diag(vcov) < 0)) { attr(vcov, "aliased") <- param.names[diag(vcov) < 0] warning("Negative parameter variances.\nModel may be underidentified.\n") } } vcov } coef.sem <- function(object, standardized=FALSE, ...){ if (!standardized) return(object$coeff) sc <- stdCoef(object, ...) names <- as.character(sc[, 1]) which <- names != " " coef <- sc[which, 2] names(coef) <- names[which] coef } # the following auxiliary function is for computing Hessians accumulate <- function(A, B, C, D, d) { B[1:d, 1:d] %x% A[1:d, 1:d] + matrix(rep(rep(t(C[1:d, 1:d]), 1, each=d), d), d^2, d^2, byrow=TRUE) * matrix(rep(rep((D[1:d, 1:d]), 1, each=d), d), d^2, d^2) } sem/R/update.R0000644000175000017500000000565312423534616013015 0ustar nileshnilesh# last modified 2014-10-27 by J. Fox combineModels <- function (...){ UseMethod("combineModels") } combineModels.semmod <- function(..., warn=TRUE){ model <- rbind(...) removeRedundantPaths(model, warn=warn) model } update.semmod <- function (object, file = "", text, ...) { regularizePaths <- function(p) sub(" *-*> *", "->", sub(" *<-* *", "<-", p)) delete.model.element <- function(delete.text, old.model, type = "path") { type <- match.arg(type, c("path", "variable", "coefficient")) if (type == "path") delete.text <- regularizePaths(delete.text) col.index <- c(path = 1, variable = 1, coefficient = 2)[type] delete.text <- strip.white(delete.text) old.model <- old.model[-grep(delete.text, strip.white(old.model[, col.index])),] class(old.model) <- "semmod" return(old.model) } object[, 1] <- regularizePaths(object[, 1]) modmat <- if (!missing(text)) scan(text = text, what = list(change = "", var1 = "", var2 = "", var3 = "", var4 = ""), sep = ",", strip.white = TRUE, comment.char = "#", fill = TRUE) else scan(file = file, what = list(change = "", var1 = "", var2 = "", var3 = "", var4 = ""), sep = ",", strip.white = TRUE, comment.char = "#", fill = TRUE) modmat <- cbind(modmat$change, modmat$var1, modmat$var2, modmat$var3) if ("delete" %in% modmat[, 1]) { deletemat <- modmat[which(modmat[, 1] == "delete"), 2:3, drop=FALSE] deletemat[which(deletemat[, 2] == ""), 2] <- "path" for (i in 1:nrow(deletemat)) object <- delete.model.element(deletemat[i, 1], object, deletemat[i, 2]) } if ("add" %in% modmat[, 1]) { addmat <- modmat[which(modmat[, 1] == "add"), 2:4, drop=FALSE] addmat[addmat[, 3] == "", 3] <- NA addmat[addmat[, 2] == "", 2] <- NA class(addmat) <- "semmod" object <- combineModels(object, addmat, warn=FALSE) } if ("replace" %in% modmat[, 1]) { submat <- modmat[which(modmat[, 1] == "replace"), 2:3, drop=FALSE] for (i in 1:nrow(submat)){ object[, 1:2] <- mapply(function(x, y) gsub(x, y, object[, 1:2]), submat[i, 1], submat[i, 2]) } } if ("fix" %in% modmat[, 1]){ fixmat <- modmat[which(modmat[, 1] == "fix"), 2:3, drop=FALSE] fixmat[, 1] <- regularizePaths(fixmat[, 1]) for (i in 1:nrow(fixmat)){ object[which(object[, 1] == fixmat[i, 1]), 2:3] <- c(NA, fixmat[i, 2]) } } if ("free" %in% modmat[, 1]){ freemat <- modmat[which(modmat[, 1] == "free"), 2:4, drop=FALSE] freemat[, 1] <- regularizePaths(freemat[, 1]) for (i in 1:nrow(freemat)){ object[which(object[, 1] == freemat[i, 1]), 2:3] <- freemat[i, 2:3] } } removeRedundantPaths(object) } # the next function is adapted from code contributed by Michael Friendly edit.semmod <- function(name, ...) { name <- unclass(name) colnames(name) <- c("Path", "Parameter", "StartValue") name <- edit(name) class(name) <- "semmod" name } sem/R/chisqNull.R0000644000175000017500000000101511701252753013456 0ustar nileshnileshchisqNull <- function(object){ UseMethod("chisqNull") } chisqNull.objectiveML <- function(object){ chisq <- if (!object$raw) { S <- object$S CC <- diag(diag(S)) (object$N - 1) * (sum(diag(S %*% solve(CC))) + log(det(CC)) - log(det(S)) - object$n) } else NULL chisq } chisqNull.objectiveGLS <- function(object){ chisq <- if (!object$raw) { S <- object$S CC <- diag(diag(S)) SS <- solve(S) %*% (S - CC) (object$N - 1)*0.5*sum(diag(SS %*% SS)) } else NULL chisq }sem/R/tsls.R0000644000175000017500000001273113563624470012517 0ustar nileshnilesh# Two-Stage Least Squares # John Fox # last modified 2019-11-15 by J. Fox tsls <- function (y, ...) { UseMethod("tsls") } tsls.default <- function (y, X, Z, w, names = NULL, ...) { if (is.null(w)) w <- 1 if (any(w < 0 | is.na(w))) stop("missing or negative weights not allowed") n <- length(y) p <- ncol(X) sqrt.w <- sqrt(w) invZtZ <- solve(crossprod(Z*sqrt.w)) XtZ <- crossprod(X*w, Z) V <- chol2inv(chol(XtZ %*% invZtZ %*% t(XtZ))) b <- V %*% XtZ %*% invZtZ %*% crossprod(Z*w, y) residuals <- (y - X %*% b)*sqrt.w s2 <- sum(residuals^2)/(n - p) V <- s2 * V result <- list() result$n <- n result$p <- p b <- as.vector(b) names(b) <- names result$coefficients <- b rownames(V) <- colnames(V) <- names result$V <- V result$s <- sqrt(s2) result$residuals <- as.vector(residuals) result$response <- y result$model.matrix <- X result$instruments <- Z result$weights <- w result } tsls.formula <- function (formula, instruments, data, subset, weights, na.action, contrasts = NULL, ...) { if (missing(na.action)) na.action <- options()$na.action m <- match.call(expand.dots = FALSE) if (is.matrix(eval(m$data, sys.frame(sys.parent())))) m$data <- as.data.frame(data) response.name <- deparse(formula[[2]]) form <- as.formula(paste(paste(response.name, collapse = ""), "~", paste(deparse(formula[[3]]), collapse = ""), "+", paste(deparse(instruments[[2]]), collapse = ""))) m$formula <- form m$instruments <- m$contrasts <- NULL m[[1]] <- as.name("model.frame") mf <- eval(m, sys.frame(sys.parent())) na.act <- attr(mf, "na.action") w <- as.vector(model.weights(mf)) wt.var <- if(!is.null(w)) deparse(substitute(weights)) else NULL Z <- model.matrix(instruments, data = mf, contrasts) y <- mf[, response.name] X <- model.matrix(formula, data = mf, contrasts) result <- tsls(y, X, Z, w, colnames(X)) result$response.name <- response.name result$formula <- formula result$instruments <- instruments result$wt.var <- wt.var if (!is.null(na.act)) result$na.action <- na.act class(result) <- "tsls" result } print.tsls <- function (x, ...) { cat("\nModel Formula: ") print(x$formula) cat("\nInstruments: ") print(x$instruments) if (!is.null(x$wt.var)){ cat("\nWeights: ", x$wt.var, "\n") } cat("\nCoefficients:\n") print(x$coefficients) cat("\n") invisible(x) } summary.tsls <- function (object, digits=getOption("digits"), ...) { save.digits <- options(digits = digits) on.exit(options(save.digits)) df <- object$n - object$p std.errors <- sqrt(diag(object$V)) b <- object$coefficients t <- b/std.errors p <- 2 * (1 - pt(abs(t), df)) table <- cbind(b, std.errors, t, p) rownames(table) <- names(b) colnames(table) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") result <- list(formula=object$formula, instruments=object$instruments, wt.var=object$wt.var, residuals=summary(residuals(object)), coefficients=table, digits=digits, s=object$s, df=df) class(result) <- "summary.tsls" result } print.summary.tsls <- function(x, ...){ cat("\n 2SLS Estimates\n") cat("\nModel Formula: ") print(x$formula) cat("\nInstruments: ") print(x$instruments) if (!is.null(x$wt.var)){ cat("\nWeights: ", x$wt.var, "\n") } cat("\nResiduals:\n") print(x$residuals) cat("\n") printCoefmat(x$coefficients, digits=x$digits) cat(paste("\nResidual standard error:", round(x$s, x$digits), "on", x$df, "degrees of freedom\n\n")) invisible(x) } residuals.tsls <- function(object, ...){ res <- object$residuals if (is.null(object$na.action)) res else naresid(object$na.action, res) } coef.tsls <- function(object, ...){ object$coefficients } fitted.tsls <- function(object, ...){ yhat <- as.vector(object$model.matrix %*% object$coefficients) if (is.null(object$na.action)) yhat else napredict(object$na.action, yhat) } vcov.tsls <- function(object, ...) object$V anova.tsls <- function(object, model.2, s2, dfe, ...){ if(!inherits(model.2, "tsls")) stop('requires two models of class tsls') s2.1 <- object$s^2 n.1 <- object$n p.1 <- object$p dfe.1 <- n.1 - p.1 s2.2 <- model.2$s^2 n.2 <- model.2$n p.2 <- model.2$p dfe.2 <- n.2 - p.2 SS.1 <- s2.1 * dfe.1 SS.2 <- s2.2 * dfe.2 SS <- abs(SS.1 - SS.2) Df <- abs(dfe.2 - dfe.1) if (missing(s2)){ s2 <- if (dfe.1 > dfe.2) s2.2 else s2.1 f <- (SS/Df) / s2 RSS <- c(SS.1, SS.2) Res.Df <- c(dfe.1, dfe.2) SS <- c(NA, SS) P <- c(NA, 1 - pf(f, Df, min(dfe.1, dfe.2))) Df <- c(NA, Df) f <- c(NA, f) rows <- c("Model 1", "Model 2") } else{ f <- (SS/Df) / s2 RSS <- c(SS.1, SS.2, s2*dfe) Res.Df <- c(dfe.1, dfe.2, dfe) SS <- c(NA, SS, NA) P <- c(NA, 1 - pf(f, Df, dfe), NA) Df <- c(NA, Df, NA) f <- c(NA, f, NA) rows <- c("Model 1", "Model 2", "Error") } table <- data.frame(Res.Df, RSS, Df, SS, f, P) head.1 <- paste("Model 1: ",format(object$formula), " Instruments:", format(object$instruments)) head.2 <- paste("Model 2: ",format(model.2$formula), " Instruments:", format(model.2$instruments)) names(table) <- c("Res.Df", "RSS", "Df", "Sum of Sq", "F", "Pr(>F)") row.names(table) <- rows structure(table, heading = c("Analysis of Variance", "", head.1, head.2, ""), class = c("anova", "data.frame")) } sem/R/objectiveFIML.R0000644000175000017500000000575512070203746014153 0ustar nileshnilesh# last modified 2012-01-06 by J. Fox # Modified for Compiled code in C/C++ by Zhenghua Nie. objectiveFIML <- function (gradient = TRUE, hessian=FALSE) { result <- list(objective = function(par, model.description) { with(model.description, { res <- CompiledObjective(par=par, model.description=model.description, objective="objectiveFIML", gradient=gradient, hessian=hessian) f <- res$f C <- res$C A <- res$A P <- res$P grad <- NULL if(gradient) grad <- res$gradient hess <- NULL if(hessian) hess <- res$hessian attributes(f) <- list(C = C, A = A, P = P, gradient=grad, hessian=hess) f } ) } ) class(result) <- "semObjective" result } #objectivelogLik <- function(gradient=FALSE, hessian=FALSE) #{ # result<-list(objective = function(par, object){ # with(object, { # res <- CompiledObjective(par=par, model.description=object, objective="objectivelogLik", gradient=gradient, hessian=hessian) # f <- res$f # grad <- NULL # if(gradient) # grad <- res$gradient # hess <- NULL # if(hessian) # hess <- res$hessian # attributes(f) <- list(gradient=grad, hessian=hess) # f # } # ) # } # ) # result #} logLik.objectiveFIML <- function(object, saturated=FALSE, intercept="Intercept", iterlim=1000, ...){ logLikSaturated <- function(object, iterlim, ...){ #objective <- function(par) #{ # res <- CompiledObjective(par=par, model.description=object, objective="objectivelogLik", gradient=FALSE, hessian=FALSE) # f <- res$f # f #} data <- object$data valid <- !is.na(data) valid.pattern <- apply(valid, 1, function(row) paste(row, collapse=".")) unique.patterns <- unique(valid.pattern) pattern.number <- apply(outer(valid.pattern, unique.patterns, `==`), 1, which) valid.data.patterns <- t(sapply(strsplit(unique.patterns, "\\."), as.logical)) n.pat <- nrow(valid.data.patterns) log.2pi <- log(2*pi) n <- ncol(data) N <- nrow(data) C <- object$C tri <- lower.tri(C, diag=TRUE) posn.intercept <- which(rownames(C) == intercept) tri[posn.intercept, posn.intercept] <- FALSE start <- C[tri] opt <- options(warn=-1) on.exit(options(opt)) object$posn.intercept <- posn.intercept object$intercept <- intercept object$pattern.number <- pattern.number object$valid.data.patterns <- valid.data.patterns object$tri <- tri #res <- nlm(objective, start, iterlim=iterlim) res <- CompiledSolve(model.description=object, start=start, objective="objectivelogLik", maxiter=iterlim) logL <- - res$minimum/2 C <- matrix(0, n, n) C[tri] <- res$estimate C <- C + t(C) - diag(diag(C)) C[posn.intercept, posn.intercept] <- 1 list(logL=logL, C=C, code=res$code) } if (saturated) { res <- logLikSaturated(object, iterlim=iterlim) if (res$code > 3) warning("nlm return code = ", res$code) logL <- res$logL attr(logL, "C") <- res$C return(logL) } else return(- object$criterion*object$N/2) } sem/R/modIndices.R0000644000175000017500000001640012535651426013604 0ustar nileshnilesh# last modified 2015-06-09 by J. Fox mod.indices <- function(...){ .Deprecated("modIndices", package="sem") modIndices(...) } modIndices <- function(model, ...){ UseMethod("modIndices") } # incorporates corrections by Michael Culbertson: modIndices.objectiveML <- function(model, duplicated, deviance=NULL, ...){ Duplicated <- function(x){ X <- outer(x, x, FUN="==") diag(X) <- NA apply(X, 2, any, na.rm=TRUE) } if (is.null(deviance)) deviance <- deviance(model) A <- model$A P <- model$P S <- model$S C <- model$C J <- model$J m <- model$m t <- model$t NM <- model$N - (!model$raw) vars <- model$var.names I.Ainv <- solve(diag(m) - A) Cinv <- solve(C) AA <- t(I.Ainv) %*% t(J) BB <- J %*% I.Ainv %*% P %*% t(I.Ainv) correct <- matrix(2, m, m) diag(correct) <- 1 grad.P <- correct * AA %*% Cinv %*% (C - S) %*% Cinv %*% t(AA) grad.A <- correct * AA %*% Cinv %*% (C - S) %*% Cinv %*% BB grad <- c(grad.A, grad.P) * NM dF.dBdB <- accumulate(AA %*% Cinv %*% t(AA), t(BB) %*% Cinv %*% BB, AA %*% Cinv %*% BB, t(BB) %*% Cinv %*% t(AA), m) dF.dPdP <- accumulate(AA %*% Cinv %*% t(AA), AA %*% Cinv %*% t(AA), AA %*% Cinv %*% t(AA), AA %*% Cinv %*% t(AA), m) dF.dBdP <- accumulate(AA %*% Cinv %*% t(AA), t(BB) %*% Cinv %*% t(AA), AA %*% Cinv %*% t(AA), t(BB) %*% Cinv %*% t(AA), m) correct.BB <- correct.PP <- correct.BP <- matrix(1, m^2, m^2) d0 <- as.vector(diag(m) ==0 ) d1 <- as.vector(diag(m) == 1) correct.BB[d0, d0] <- 2 correct.PP[d1, d1] <- 0.5 correct.PP[d0, d0] <- 2 correct.BP[d0, d0] <- 2 Hessian <- NM*rbind(cbind(dF.dBdB * correct.BB, dF.dBdP * correct.BP), cbind(t(dF.dBdP * correct.BP), dF.dPdP * correct.PP)) ram <- model$ram fixed <- ram[,4] == 0 one.head <- ram[,1] == 1 one.free <- which( (!fixed) & one.head ) two.free <- which( (!fixed) & (!one.head) ) arrows.1.free <- ram[one.free,c(2,3)] arrows.2.free <- ram[two.free,c(2,3)] duplicated <- if (missing(duplicated)) (Duplicated(ram[, 4]) & (!fixed)) else (duplicated & (!fixed)) one.free.duplicated <- which( (!fixed) & one.head & duplicated) two.free.duplicated <- which( (!fixed) & (!one.head) & duplicated) arrows.1.free.duplicated <- ram[one.free.duplicated, c(2,3)] arrows.2.free.duplicated <- ram[two.free.duplicated, c(2,3)] posn.matrix <- matrix(1:(m^2), m, m) posn.free <- c(posn.matrix[arrows.1.free], (m^2) + posn.matrix[arrows.2.free]) posn.free.duplicated <- c(posn.matrix[arrows.1.free.duplicated], (m^2) + posn.matrix[arrows.2.free.duplicated]) hessian <- Hessian[posn.free, posn.free] par.no <- ram[ram[, 4] > 0, 4] pars <- ram[, 4][!fixed] Z <- outer(1:t, pars, function(x, y) as.numeric(x == y)) hessian <- Z %*% hessian %*% t(Z) E.inv <- solve(hessian) par.change <- mod.indices <- rep(0, 2*(m^2)) n.posn.free.1 <- length(posn.free) + 1 for (i in 1:(2*(m^2))) { if ((! i %in% posn.free.duplicated) && (i %in% posn.free || qr(as.matrix(Hessian[c(posn.free, i), c(posn.free, i)]))$rank < n.posn.free.1)){ par.change[i] <- mod.indices[i] <- NA } else { k <- Hessian[i, i] d <- Hessian[i, posn.free] d <- sapply(1:t, function(i) sum(d[which(par.no==i)])) par.change[i] <- -grad[i]/ (k - d %*% E.inv %*% d) mod.indices[i] <- -0.5 * grad[i] * par.change[i] } } par.change[mod.indices > 1.1*deviance] <- NA mod.indices[mod.indices > 1.1*deviance] <- NA mod.A <- matrix(mod.indices[1:(m^2)], m, m) mod.P <- matrix(mod.indices[-(1:(m^2))], m, m) par.A <- matrix(par.change[1:(m^2)], m, m) par.P <- matrix(par.change[-(1:(m^2))], m, m) diag(mod.A) <- diag(par.A) <- NA rownames(mod.A) <- colnames(mod.A) <- vars rownames(mod.P) <- colnames(mod.P) <- vars rownames(par.A) <- colnames(par.A) <- vars rownames(par.P) <- colnames(par.P) <- vars result <- list(mod.A=mod.A, mod.P=mod.P, par.A=par.A, par.P=par.P) class(result) <- "modIndices" result } summary.modIndices <- function(object, round=2, print.matrices=c('both', 'par.change', 'mod.indices'), ...) { print.matrices <- match.arg(print.matrices) if (print.matrices != "mod.indices"){ cat("\n Parameter change: A matrix (regression coefficients, row <- column)\n") print(object$par.A) } if (print.matrices != "par.change"){ cat("\n Modification indices: A matrix (regression coefficients, row <- column)\n") print(round(object$mod.A, round)) } if (print.matrices != "mod.indices"){ cat("\n Parameter change: P matrix (variances/covariances)\n") print(object$par.P) } if (print.matrices != "par.change"){ cat("\n Modification indices: P matrix (variances/covariances)\n") print(round(object$mod.P, round)) } invisible(NULL) } print.modIndices <- function(x, n.largest=5, ...){ cat("\n", n.largest, "largest modification indices, A matrix (regression coefficients):\n") mod.A <- as.vector(x$mod.A) names <- rownames(x$mod.A) names(mod.A) <- outer(names, names, paste, sep="<-") print(rev(sort(mod.A))[1:n.largest]) cat("\n ", n.largest, "largest modification indices, P matrix (variances/covariances):\n") mod.P <- as.vector(x$mod.P) names(mod.P) <- outer(names, names, paste, sep="<->") print(rev(sort(mod.P[lower.tri(x$mod.P, diag=TRUE)]))[1:n.largest]) } modIndices.msemObjectiveML <- function(model, ...){ deviance <- deviance(model) G <- length(model$groups) t <- model$t N <- model$N raw <- model$raw wts <- (N - !raw)/(sum(N) - !raw*G) hessian <- matrix(0, t, t) rownames(hessian) <- colnames(hessian) <- model$param.names result <- vector(G, mode="list") names(result) <- paste(model$group, model$groups, sep=": ") all.pars <- unlist(lapply(model$ram, function(ram) ram[, 4])) unique.pars <- unique(all.pars) pars.count <- table(all.pars) duplicated.pars <- as.numeric(names(pars.count)[pars.count > 1]) for (g in 1:G){ ram <- model$ram[[g]] parameters <- ram[, 4] duplicated <- parameters %in% duplicated.pars unique.pars <- unique(parameters[parameters != 0]) par.posn <- sapply(unique.pars, function(x) which(x == parameters)[1]) unique.posn <- which(parameters %in% unique.pars) rownames(ram)[unique.posn] <- unique(model$param.names[ram[, 4]]) ram[unique.posn, 4] <- unlist(apply(outer(unique.pars, parameters, "=="), 2, which)) mod.g <- list(var.names=model$var.names[[g]], ram=ram,J=model$J[[g]], n.fix=model$n.fix, n=model$n[[g]], N=model$N[g], m=model$m[[g]], t=length(unique.pars), coeff=model$coeff[parameters], criterion=model$group.criteria[g], S=model$S[[g]], raw=model$raw, C=model$C[[g]], A=model$A[[g]], P=model$P[[g]]) class(mod.g) <- c("objectiveML", "sem") result[[g]] <- modIndices(mod.g, duplicated=duplicated, deviance=deviance, ...) } class(result) <- "msemModIndices" result } print.msemModIndices <- function(x, ...){ G <- length(x) groups <- names(x) for (g in 1:G) { cat("\n\n", groups[g], "\n") print(x[[g]], ...) } } summary.msemModIndices <- function(object, ...){ G <- length(object) groups <- names(object) for (g in 1:G) { cat("\n\n", groups[g], "\n") print(summary(object[[g]], ...)) } }sem/R/ram.R0000644000175000017500000000131212026676605012302 0ustar nileshnilesh# last modified 2012-09-17 by J. Fox ram <- function(object, digits=getOption("digits"), startvalues=FALSE){ var.names <- rownames(object$A) ram <- object$ram if (!startvalues) colnames(ram) <- c(colnames(ram)[1:4], "estimate") par <- object$coeff par.names <- rep(" ", nrow(ram)) t <- object$t for (i in 1:t) { which.par <- ram[,4] == i if (!startvalues) ram[which.par, 5] <- par[i] par.names[which.par] <- names(par)[i] } par.code <- paste(var.names[ram[,2]], c('<---', '<-->')[ram[,1]], var.names[ram[,3]]) ram <- data.frame(ram, arrow = par.code) print(ram, rowlab=par.names, digits=digits) } sem/R/robust.R0000644000175000017500000001012013563624470013036 0ustar nileshnilesh# these functions originally by Jarrett Byrnes # last modified 2019-11-15 by J. Fox robustVcov <- function(sem.obj, adj.obj, data.obj, use.fit=FALSE, use.ginv=FALSE){ if (missing(adj.obj) && missing(data.obj)) stop("Need a data or sbchisq object") if (missing(adj.obj)) { adj.obj <- sbchisq(sem.obj, data.obj, use.fit = use.fit) } ncases <- sem.obj$N hes <- semHessian(adj.obj$w_mat, adj.obj$p_deriv_mat) info_m <- try(solve(hes), silent = TRUE) if (inherits(info_m, "try-error") && isTRUE(use.ginv)) { info_m <- ginv(hes) ginvFlag <- TRUE } acov <- info_m %*% (t(adj.obj$p_deriv_mat) %*% adj.obj$w_mat %*% adj.obj$w_adf %*% adj.obj$w_mat %*% adj.obj$p_deriv_mat) %*% info_m acov <- acov/(ncases - 1) rownames(acov) <- colnames(acov) <- colnames(adj.obj$p_deriv_mat) acov } sbchisq <- function(sem.obj, sem.data, adj=1e-04, use.fit=FALSE, use.ginv=FALSE){ props <- semProps(sem.obj) chisq <- props$chisq df <- props$df w_adf <- adfWmat(sem.data) w_mat <- mlWmat(sem.obj, use.fit = use.fit) p_deriv_mat <- deltaMatrix(sem.obj) ginvFlag <- FALSE invMat <- try(solve(t(p_deriv_mat) %*% w_mat %*% p_deriv_mat), silent = TRUE) if (inherits(invMat, "try-error") && isTRUE(use.ginv)) { invMat <- ginv(t(p_deriv_mat) %*% w_mat %*% p_deriv_mat) ginvFlag <- TRUE } res_u <- w_mat - (w_mat %*% p_deriv_mat %*% invMat %*% t(p_deriv_mat) %*% w_mat) ug <- res_u %*% w_adf scale_stat <- sum(diag(ug))/df chisq.scaled <- chisq/scale_stat p.old <- pchisq(chisq, df, lower.tail=FALSE) p <- pchisq(chisq.scaled, df, lower.tail=FALSE) ret <- list(chisq = chisq, t = sem.obj$t, df = df, p.old = p.old, c = scale_stat, chisq.scaled = chisq.scaled, p = p, w_mat = w_mat, p_deriv_mat = p_deriv_mat, w_adf = w_adf, res_u = res_u, N = length(sem.data[, 1]), ginvFlag = ginvFlag) class(ret) <- "adjchisq" return(ret) } adfWmat <- function(rawdata) { names <- colnames(rawdata) n <- nrow(rawdata) n.col <- ncol(rawdata) nc.star <- n.col * (n.col + 1)/2 nc2 <- n.col^2 z <- scale(rawdata, center=TRUE, scale=FALSE) sc <- vector(nc.star, mode="list") outnames <- vector(nc.star, mode="character") ind <- combn(n.col + 1, 2) ind[2, ] <- ind[2, ] - 1 for (q in 1:nc.star) { i <- ind[1, q] j <- ind[2, q] outnames[q] <- paste(names[i], names[j], sep="_") sc[[q]] <- z[, i] * z[, j] } names(sc) <- outnames adf_mat <- var(data.frame(sc)) * (n - 1)/n return(adf_mat) } mlWmat <- function(sem.obj, use.fit=FALSE) { p <- nrow(sem.obj$C) if (use.fit) { An <- sem.obj$C } else { An <- sem.obj$S } Dp <- Ktrans(p) An.inv <- solve(An) w_mat <- 0.5 * t(Dp) %*% kronecker(An.inv, An.inv) %*% Dp rownames(w_mat) <- vech(matrixNames(sem.obj$C)) colnames(w_mat) <- rownames(w_mat) return(w_mat) } deltaMatrix <- function(sem.object, adj=1e-04) { p.star <- sem.object$n * (sem.object$n + 1)/2 pars <- names(sem.object$coeff) nparms <- length(pars) delta.mat <- matrix(0, nparms, p.star) rownames(delta.mat) <- pars vars <- sem.object$var.names J <- sem.object$J m <- sem.object$m for (j in 1:nparms) { A <- sem.object$A P <- sem.object$P i <- which(rownames(sem.object$ram) == pars[j]) from <- sem.object$ram[i, 2] to <- sem.object$ram[i, 3] path_type <- sem.object$ram[i, 1][1] if (path_type == 1){ AA <- A[cbind(from, to)][1] adjust <- abs(AA) * adj A[cbind(from, to)] <- AA + adjust } else { PP <- P[cbind(to, from)][1] adjust <- PP * adj P[cbind(from, to)] <- P[cbind(to, from)] <- PP + adjust } I.Ainv <- solve(diag(m) - A) C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) delta.mat[j, ] <- (vech(C) - vech(sem.object$C))/adjust } t(delta.mat) } semProps <- function(object){ N <- object$N n <- object$n t <- object$t n.fix <- object$n.fix list(N=N, n=n, t=t, n.fix=n.fix, df=n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2, chisq=object$criterion*(N - (!object$raw))) } Ktrans <- function(num.vars){ duplication.matrix(num.vars) } matrixNames <- function(mat, sep="_"){ outer(rownames(mat), colnames(mat), function(x, y) paste(x, y, sep=sep)) } semHessian <- function(w.mat, delta.mat){ ret <- t(delta.mat) %*% w.mat %*% delta.mat return(ret) } sem/R/optimizerNlminb.R0000644000175000017500000000246711701252753014712 0ustar nileshnilesh# last modified 2011-07-30 optimizerNlminb <- function(start, objective=objectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn, ...){ with(model.description, { obj <- objective(gradient=gradient) objective <- obj$objective grad <- if (gradient) obj$gradient else NULL if (!warn) save.warn <- options(warn=-1) res <- nlminb(start, objective, gradient=grad, model.description=model.description, control=list(trace=if(debug) 1 else 0, iter.max=maxiter, ...)) if (!warn) options(save.warn) result <- list() result$convergence <- res$convergence == 0 result$iterations <- res$iterations par <- res$par names(par) <- param.names result$par <- par if (!result$convergence) warning(paste('Optimization may not have converged; nlminb return code = ', res$convergence, '. Consult ?nlminb.\n', sep="")) result$criterion <- res$objective obj <- objective(par, model.description) C <- attr(obj, "C") rownames(C) <- colnames(C) <- var.names[observed] result$C <- C A <- attr(obj, "A") rownames(A) <- colnames(A) <- var.names result$A <- A P <- attr(obj, "P") rownames(P) <- colnames(P) <- var.names result$P <- P class(result) <- "semResult" result } ) } sem/R/specifyModel.R0000644000175000017500000002446012525463420014150 0ustar nileshnilesh# last modified 2015-05-14 by J. Fox specify.model <- function(...){ .Deprecated("specifyModel", package="sem") specifyModel(...) } specifyModel <- function(file="", text, exog.variances=FALSE, endog.variances=TRUE, covs, suffix="", quiet=FALSE){ add.variances <- function () { if (!quiet) message("NOTE: it is generally simpler to use specifyEquations() or cfa()\n", " see ?specifyEquations") variables <- need.variance() nvars <- length(variables) if (nvars == 0) return(model) message("NOTE: adding ", nvars, " variances to the model") paths <- character(nvars) par.names <- character(nvars) for (i in 1:nvars) { paths[i] <- paste(variables[i], "<->", variables[i]) par.names[i] <- paste("V[", variables[i], "]", sep = "") } model.2 <- cbind(c(model[, 1], paths), c(model[, 2], par.names), c(model[, 3], rep(NA, length(paths)))) class(model.2) <- "semmod" model.2 } need.variance <- function () { all.vars <- classifyVariables(model) exo.vars <- all.vars$exogenous end.vars <- all.vars$endogenous variables <- logical(0) for (paths in model[, 1]) { vars <- strip.white(paths) vars <- sub("-*>", "->", sub("<-*", "<-", vars)) vars <- sub("<->|<-", "->", vars) vars <- strsplit(vars, "->")[[1]] if (vars[1] != vars[2]) { for (a.variable in vars) { if (is.na(variables[a.variable])) variables[a.variable] <- TRUE } } else { variables[vars[1]] <- FALSE } } if (!exog.variances && length(exo.vars) > 0) variables[exo.vars] <- FALSE if (!endog.variances && length(end.vars) > 0) variables[end.vars] <- FALSE names(variables)[variables] } model <- if (!missing(text)) scan(text=text, what=list(path="", par="", start=1, dump=""), sep=",", strip.white=TRUE, comment.char="#", fill=TRUE, quiet=quiet) else scan(file=file, what=list(path="", par="", start=1, dump=""), sep=",", strip.white=TRUE, comment.char="#", fill=TRUE, quiet=quiet) # dump permits comma at line end model$path <- gsub("\\t", " ", model$path) model$par[model$par == ""] <- NA model <- cbind(model$path, model$par, model$start) if (!(missing(covs))){ for (cov in covs){ vars <- strsplit(cov, "[ ,]+")[[1]] nvar <- length(vars) for (i in 1:nvar){ for (j in i:nvar){ row <- c(paste(vars[i], "<->", vars[j]), if (i == j) paste("V[", vars[i], "]", sep="") else paste("C[", vars[i], ",", vars[j], "]", sep=""), NA) if (row[2] %in% model[,2]) next model <- rbind(model, row) } } } } model <- removeRedundantPaths(model, warn=FALSE) result <- add.variances() which.pars <- !is.na(result[, 2]) result[which.pars, 2] <- paste(result[which.pars, 2], suffix, sep="") result } print.semmod <- function(x, ...){ path <- x[,1] parameter <- x[,2] parameter[is.na(parameter)] <- "" startvalue <- as.numeric(x[,3]) startvalue[is.na(startvalue)] <- " " if (all(startvalue == " ")) print(data.frame(Path=path, Parameter=parameter), right=FALSE) else print(data.frame(Path=path, Parameter=parameter, StartValue=startvalue), right=FALSE) invisible(x) } classifyVariables <- function(model) { variables <- logical(0) for (paths in model[, 1]) { vars <- strip.white(paths) vars <- sub("-*>", "->", sub("<-*", "<-", vars)) if (grepl("<->", vars)){ vars <- strsplit(vars, "<->")[[1]] if (is.na(variables[vars[1]])) variables[vars[1]] <- FALSE if (is.na(variables[vars[2]])) variables[vars[2]] <- FALSE } else if (grepl("->", vars)){ vars <- strsplit(vars, "->")[[1]] if (is.na(variables[vars[1]])) variables[vars[1]] <- FALSE variables[vars[2]] <- TRUE } else if (grepl("<-", vars)){ vars <- strsplit(vars, "<-")[[1]] if (is.na(variables[vars[2]])) variables[vars[2]] <- FALSE variables[vars[1]] <- TRUE } else stop("incorrectly specified model") } list(endogenous=names(variables[variables]), exogenous=names(variables[!variables])) } strip.white <- function(x) gsub(' ', '', x) removeRedundantPaths <- function(model, warn=TRUE){ paths <- model[, 1] paths <- strip.white(paths) paths <- sub("-*>", "->", sub("<-*", "<-", paths)) start <- regexpr("<->|<-|->", paths) end <- start + attr(start, "match.length") - 1 arrows <- substr(paths, start, end) vars <- matrix(unlist(strsplit(paths, "<->|<-|->")), ncol=2, byrow=TRUE) for (i in 1:length(arrows)){ if (arrows[i] == "<-"){ arrows[i] <- "->" vars[i, ] <- vars[i, 2:1] } } vars <- cbind(vars, arrows) dupl.paths <- duplicated(vars) if (warn && any(dupl.paths)){ warning("the following duplicated paths were removed: ", paste(model[dupl.paths, 1], collapse=", ")) } model <- model[!dupl.paths, , drop=FALSE] class(model) <- "semmod" model } specifyEquations <- function(file="", text, ...){ par.start <- function(coef, eq){ if (length(grep("\\(", coef)) == 0){ return(c(coef, "NA")) } par.start <- strsplit(coef, "\\(")[[1]] if (length(par.start) != 2) stop("Parse error in equation: ", eq, '\n Start values must be given in the form "parameter(value)".') par <- par.start[[1]] start <- par.start[[2]] if (length(grep("\\)$", start)) == 0) stop("Parse error in equation: ", eq, "\n Unbalanced parentheses.") start <- sub("\\)", "", start) return(c(par, start)) } parseEquation <- function(eqn){ eq <- eqn eqn <- gsub("\\s*", "", eqn) eqn <- strsplit(eqn, "=")[[1]] if (length(eqn) != 2) stop("Parse error in equation: ", eq, "\n An equation must have a left- and right-hand side separated by =.") lhs <- eqn[1] rhs <- eqn[2] if (length(grep("^[cC]\\(", lhs)) > 0){ if (length(grep("\\)$", lhs)) == 0) stop("Parse error in equation: ", eq, "\n Unbalanced parentheses.") lhs <- sub("[cC]\\(", "", lhs) lhs <- sub("\\)", "", lhs) variables <- strsplit(lhs, ",")[[1]] if (length(variables) != 2) stop("Parse error in equation: ", eq, "\n A covariance must be in the form C(var1, var2) = cov12") if (not.number(rhs)){ par.start <- par.start(rhs, eq) if (not.number(par.start[2]) && (par.start[2] != "NA")) stop("Parse error in equation: ", eq, "\n Start values must be numeric constants.") ram <- paste(variables[1], " <-> ", variables[2], ", ", par.start[1], ", ", par.start[2], sep="") } else{ ram <- paste(variables[1], " <-> ", variables[2], ", NA, ", rhs, sep="") } } else if (length(grep("^[vV]\\(", lhs)) > 0){ lhs <- sub("[vV]\\(", "", lhs) if (length(grep("\\)$", lhs)) == 0) stop("Parse error in equation: ", eq, "\n Unbalanced parentheses.") lhs <- sub("\\)", "", lhs) if (not.number(rhs)){ par.start <- par.start(rhs, eq) if (not.number(par.start[2]) && (par.start[2] != "NA")) stop("Parse error in equation: ", eq, "\n Start values must be numeric constants.") ram <- paste(lhs, " <-> ", lhs, ", ", par.start[1], ", ", par.start[2], sep="") } else{ ram <- paste(lhs, " <-> ", lhs, ", NA, ", rhs, sep="") } } else{ terms <- strsplit(rhs, "\\+")[[1]] terms <- strsplit(terms, "\\*") ram <- character(length(terms)) for (term in 1:length(terms)){ trm <- terms[[term]] if (length(trm) != 2) stop("Parse error in equation: ", eq, '\n The term "', trm, '" is malformed.', '\n Each term on the right-hand side of a structural equation must be of the form "parameter*variable".') coef <- trm[1] if (not.number(coef)){ par.start <- par.start(coef, eq) if (not.number(par.start[2]) && (par.start[2] != "NA")) stop("Parse error in equation: ", eq, "\n Start values must be numeric constants.") ram[term] <- paste(trm[2], " -> ", lhs, ", ", par.start[1], ", ", par.start[2], sep="") } else{ ram[term] <- paste(trm[2], " -> ", lhs, ", NA, ", coef, sep="") } } } ram } equations <- if (!missing(text)) scan(text=text, what="", sep=";", strip.white=TRUE, comment.char="#") else scan(file=file, what="", sep=";", strip.white=TRUE, comment.char="#") equations2 <- character(0) eqn <- 0 skip <- FALSE for(equation in equations){ eqn <- eqn + 1 if (skip){ skip <- FALSE next } if (substring(equation, 1, 1) == "+"){ equations2[length(equations2)] <- paste(equations2[length(equations2)], equation) } else if (substring(equation, nchar(equation)) == "+"){ equations2 <- c(equations2, paste(equation, equations[eqn + 1])) skip <- TRUE } else equations2 <- c(equations2, equation) } ram <- unlist(lapply(equations2, parseEquation)) specifyModel(text=ram, ..., quiet=TRUE) } # the following function (not exported) checks whether a text string can be converted into a number not.number <- function(constant){ save <- options(warn = -1) on.exit(save) is.na(as.numeric(constant)) } sem/R/objectiveML2.R0000644000175000017500000000360511715565210014010 0ustar nileshnilesh# last modified 2012-01-11 by J. Fox objectiveML2 <- function(gradient=TRUE){ result <- list( objective = function(par, model.description){ with(model.description, { A <- P <- matrix(0, m, m) val <- ifelse (fixed, ram[,5], par[sel.free]) A[arrows.1] <- val[one.head] P[arrows.2t] <- P[arrows.2] <- val[!one.head] I.Ainv <- solve(diag(m) - A) C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) Cinv <- solve(C) f <- sum(diag(S %*% Cinv)) + log(det(C)) - n - logdetS grad <- NULL if (gradient){ grad.P <- correct * t(I.Ainv) %*% t(J) %*% Cinv %*% (C - S) %*% Cinv %*% J %*% I.Ainv grad.A <- grad.P %*% P %*% t(I.Ainv) grad <- rep(0, t) grad[sort(unique.free.1)] <- tapply(grad.A[arrows.1.free],ram[ram[,1]==1 & ram[,4]!=0, 4], sum) grad[sort(unique.free.2)] <- tapply(grad.P[arrows.2.free],ram[ram[,1]==2 & ram[,4]!=0, 4], sum) } attributes(f) <- list(C=C, A=A, P=P, gradient=grad) f } ) } ) if (gradient) result$gradient <- function(par, model.description){ with(model.description, { A <- P <- matrix(0, m, m) val <- ifelse (fixed, ram[,5], par[sel.free]) A[arrows.1] <- val[one.head] P[arrows.2t] <- P[arrows.2] <- val[!one.head] I.Ainv <- solve(diag(m) - A) C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) Cinv <- solve(C) grad.P <- correct * t(I.Ainv) %*% t(J) %*% Cinv %*% (C - S) %*% Cinv %*% J %*% I.Ainv grad.A <- grad.P %*% P %*% t(I.Ainv) grad <- rep(0, t) grad[sort(unique.free.1)] <- tapply(grad.A[arrows.1.free],ram[ram[,1]==1 & ram[,4]!=0, 4], sum) grad[sort(unique.free.2)] <- tapply(grad.P[arrows.2.free],ram[ram[,1]==2 & ram[,4]!=0, 4], sum) attributes(grad) <- list(C=C, A=A, P=P, gradient=grad) grad } ) } class(result) <- "semObjective" result } sem/R/optimizerSem.R0000644000175000017500000000621413563624470014220 0ustar nileshnilesh# last modified 2019-11-15 by J. Fox # Modified for Compiled Objective and nlm in C/C++ by Zhenghua Nie. optimizerSem <- function(start, objective=objectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn, ...){ with(model.description, { obj <- objective(gradient=gradient)$objective typsize <- if (par.size == 'startvalues') abs(start) else rep(1, t) #we move typesize in csem.cpp. # objectiveCompiled <- "objectiveML" # if(identical(objective, objectiveCompiledML) || identical(objective, objectiveML)) # objectiveCompiled <- "objectiveML" # if(identical(objective, objectiveCompiledGLS) || identical(objective, objectiveGLS)) # objectiveCompiled <- "objectiveGLS" # objectiveCompiled <- deparse(substitute(objective)) # if (!objectiveCompiled %in% c("objectiveML", "objectiveGLS")) stop("optimizerSem requires the objectiveML or objectiveGLS objective function") # objectiveCompiled <- "objectiveML" if(identical(objective, objectiveML)) objectiveCompiled <- "objectiveML" else if (identical(objective, objectiveGLS)) objectiveCompiled <- "objectiveGLS" else if (identical(objective, objectiveFIML)) objectiveCompiled <- "objectiveFIML" else stop("optimizerSem requires the objectiveML or objectiveGLS or objectiveFIML objective function") if (!warn) save.warn <- options(warn=-1) res <- CompiledSolve(model.description=model.description, start=start, objective=objectiveCompiled, gradient=gradient, typsize=typsize, debug=debug, maxiter=maxiter) if (!warn) options(save.warn) result <- list() result$convergence <- res$code <= 2 result$iterations <- res$iterations par <- res$estimate names(par) <- param.names result$par <- par if (!result$convergence) warning(paste('Optimization may not have converged; nlm return code = ', res$code, '. Consult ?nlm.\n', sep="")) vcov <- matrix(NA, t, t) qr.hess <- try(qr(res$hessian), silent=TRUE) if (inherits(qr.hess, "try-error")){ warning("Could not compute QR decomposition of Hessian.\nOptimization probably did not converge.\n") } else if (qr.hess$rank < t){ warning(' singular Hessian: model is probably underidentified.\n') which.aliased <- qr.hess$pivot[-(1:qr.hess$rank)] result$aliased <- param.names[which.aliased] } else { vcov <- (2/(N - (!raw))) * solve(res$hessian) if (any(diag(vcov) < 0)) { result$aliased <- param.names[diag(vcov) < 0] warning("Negative parameter variances.\nModel may be underidentified.\n") } } colnames(vcov) <- rownames(vcov) <- param.names result$vcov <- vcov result$criterion <- res$minimum # c(result$obj) - n - log(det(S)) # if(identical(objective, objectiveML) || identical(objective, objectiveGLS)){ C <- res$C A <- res$A P <- res$P # } else { # obj <- obj(par, model.description) # C <- attr(obj, "C") # A <- attr(obj, "A") # P <- attr(obj, "P") # } rownames(C) <- colnames(C) <- var.names[observed] result$C <- C rownames(A) <- colnames(A) <- var.names result$A <- A rownames(P) <- colnames(P) <- var.names result$P <- P class(result) <- "semResult" result } ) } sem/R/pathDiagram.R0000644000175000017500000004153612701034653013747 0ustar nileshnilesh# with contributions by Adam Kramer and Michael Friendly (originally by J. Fox) # last modified 2015-06-09 by J. Fox globalVariables("dot") Greek <- read.table(system.file("etc/GreekLetters.txt", package="sem"), as.is=TRUE) math <- function(text, html.only=FALSE, hat=FALSE){ if (length(text) > 1) { result <- sapply(text, math, html=html.only, hat=hat) names(result) <- names(text) return(result) } # subscripts <- c("₀", "₁", "₂", "₃", "₄", "₅", "₆", # "₇", "₈", "₉") subscripts <- c("₀", "₁", "₂", "₃", "₄", "₅", "₆", "₇", "₈", "₉") superscripts <- c("⁰", "¹", "²", "³", "⁴", "⁵", "⁶", "⁷", "⁸", "⁹") names(subscripts) <- names(superscripts) <- 0:9 hat <- if (hat) "̂" else "" text <- gsub(" ", "", text) symbol <- regexpr("^[a-zA-Z]+", text) if (symbol != 1) stop("text does not start with an alphabetic symbol name") symbol <- if (html.only) { paste0("&", substring(text, 1, attr(symbol, "match.length")), ";") } else{ s <- substring(text, 1, attr(symbol, "match.length")) s <- Greek[s, "decimal"] if (is.na(s)) stop(s, " is not a Greek letter") s } subscript <- regexpr("_\\{", text) subscript <- if (subscript >= 1){ subscript <- substring(text, subscript + 2) endbrace <- regexpr("\\}", subscript) if (endbrace < 1) stop("unmatched closing brace in ", text) substring(subscript, 1, endbrace - 1) } else "" if (subscript != ""){ subscript <- unlist(strsplit(subscript, split="")) subscript <- subscripts[subscript] if (any(is.na(subscript))) stop ("invalid non-numeral subscript") subscript <- paste(subscript, collapse="") } superscript <- regexpr("\\^\\{", text) superscript <- if (superscript >= 1){ superscript <- substring(text, superscript + 2) endbrace <- regexpr("\\}", superscript) if (endbrace < 1) stop("unmatched closing brace in ", text) substring(superscript, 1, endbrace - 1) } else "" if (superscript != ""){ superscript <- unlist(strsplit(superscript, split="")) superscript <- superscripts[superscript] if (any(is.na(superscript))) stop ("invalid non-numeral superscript") superscript <- paste(superscript, collapse="") } paste0(symbol, hat, subscript, superscript) } path.diagram <- function(...) { .Deprecated("pathDiagram", package = "sem") pathDiagram(...) } pathDiagram <- function (model, ...) { UseMethod("pathDiagram") } pathDiagram.semmod <- function(model, obs.variables, ...) { parse.path <- function(path) { path.1 <- gsub("-", "", gsub(" ","", path)) direction <- if (regexpr("<>", path.1) > 0) 2 else if (regexpr("<", path.1) > 0) - 1 else if (regexpr(">", path.1) > 0) 1 else stop(paste("ill-formed path:", path)) path.1 <- strsplit(path.1, "[<>]")[[1]] list(first = path.1[1], second = path.1[length(path.1)], direction = direction) } if ((!is.matrix(model)) | ncol(model) != 3) stop ("model argument must be a 3-column matrix") startvalues <- as.numeric(model[,3]) par.names <- model[,2] n.paths <- length(par.names) heads <- from <- to <- rep(0, n.paths) for (p in 1:n.paths) { path <- parse.path(model[p,1]) heads[p] <- abs(path$direction) to[p] <- path$second from[p] <- path$first if (path$direction == -1) { to[p] <- path$first from[p] <- path$second } } ram <- matrix(0, p, 5) all.vars <- unique(c(to, from)) latent.vars <- setdiff(all.vars, obs.variables) vars <- c(obs.variables, latent.vars) pars <- na.omit(unique(par.names)) ram[,1] <- heads ram[,2] <- apply(outer(vars, to, "=="), 2, which) ram[,3] <- apply(outer(vars, from, "=="), 2, which) par.nos <- apply(outer(pars, par.names, "=="), 2, which) if (length(par.nos) > 0) ram[,4] <- unlist(lapply(par.nos, function(x) if (length(x) == 0) 0 else x)) ram[,5] <- startvalues colnames(ram) <- c("heads", "to", "from", "parameter", "start value") pars <- unique(na.omit(par.names)) coeff <- rep(0, length(pars)) names(coeff) <- pars fake.sem <- list( ram = ram, n = length(obs.variables), var.names = vars, coeff = coeff, semmod = model ) class(fake.sem) <- "sem" pathDiagram(fake.sem, ...) } pathDiagram.sem <- function (model, file = "pathDiagram", style = c("ram", "traditional"), output.type = c("html", "graphics", "dot"), graphics.fmt = "pdf", dot.options = NULL, size = c(8, 8), node.font = c("Helvetica", 14), edge.font = c("Helvetica", 10), digits = 2, rank.direction = c("LR", "TB"), min.rank = NULL, max.rank = NULL, same.rank = NULL, variables = model$var.names, var.labels, parameters, par.labels, ignore.double = TRUE, ignore.self = FALSE, error.nodes = TRUE, edge.labels = c("names", "values", "both"), edge.colors = c("black", "black"), edge.weight = c("fixed", "proportional"), node.colors = c("transparent", "transparent", "transparent"), standardize = FALSE, ...) { Dot <- function(..., semicolon = TRUE, newline = TRUE) { cat(file = handle, paste0(..., if (semicolon) ";" else "", if (newline) "\n" else "")) } style <- match.arg(style) output.type <- match.arg(output.type) edge.labels <- match.arg(edge.labels) edge.weight <- match.arg(edge.weight) rank.direction <- match.arg(rank.direction) if (output.type == "html") { handle <- textConnection("dot", "w") } else { dot.file <- paste0(file, ".dot") handle <- file(dot.file, "w") if (output.type == "graphics") graph.file <- paste0(file, ".", graphics.fmt) } on.exit(close(handle)) Dot("digraph \"", deparse(substitute(model)), "\" {", semicolon = FALSE) Dot(" rankdir=", rank.direction) Dot(" size=\"", size[1], ",", size[2], "\"") Dot( " node [fontname=\"", node.font[1], "\" fontsize=", node.font[2], " fillcolor=\"", node.colors[1], "\" shape=box style=filled]" ) Dot(" edge [fontname=\"", edge.font[1], "\" fontsize=", edge.font[2], "]") Dot(" center=1") if (!is.null(min.rank)) { min.rank <- paste0("\"", min.rank, "\"") min.rank <- gsub(",", "\" \"", gsub(" ", "", min.rank)) Dot(" {rank=min ", min.rank, "}", semicolon = FALSE) } if (!is.null(max.rank)) { max.rank <- paste0("\"", max.rank, "\"") max.rank <- gsub(",", "\" \"", gsub(" ", "", max.rank)) Dot(" {rank=max ", max.rank, "}", semicolon = FALSE) } if (!is.null(same.rank)) { for (s in 1:length(same.rank)) { same <- paste0("\"", same.rank[s], "\"") same <- gsub(",", "\" \"", gsub(" ", "", same)) Dot(" {rank=same ", same, "}", semicolon = FALSE) } } latent <- variables[-(1:model$n)] for (lat in latent) { Dot(" \"", lat, "\" [shape=ellipse]", semicolon = FALSE) } endogenous <- classifyVariables(model$semmod)$endogenous endogenous <- variables[apply(outer(endogenous, model$var.names, "=="), 1, which)] if (style == "traditional") { variables <- c(variables, paste0(endogenous, ".error")) error.color <- if (length(node.colors) < 3) node.colors[1] else node.colors[3] } for (endog in endogenous) { Dot(" \"", endog, "\" [fillcolor=\"", node.colors[2], "\"]", semicolon = FALSE) if (style == "traditional") { if (error.nodes) Dot( " \"", endog, ".error\" [shape=ellipse] [fillcolor=\"", error.color, "\"]", semicolon = FALSE ) else Dot( " \"", endog, ".error\" [shape=ellipse width=0 height=0 fixedsize=true label=\"\"] [fillcolor=\"", error.color, "\"]", semicolon = FALSE ) } } ram <- model$ram if (missing(parameters)) { par.names <- names(coef(model)) rownames(ram)[ram[, "parameter"] != 0] <- par.names[ram[, "parameter"]] rownames(ram)[ram[, "parameter"] == 0] <- ram[ram[, "parameter"] == 0, "start value"] parameters <- rownames(ram) } if (standardize) ram[, 5] <- stdCoef(model)[, 2] else ram[names(model$coeff), 5] <- model$coeff coefs <- ram[, 5] # handle equality constraints, if any na.coefs <- is.na(coefs) if (any(na.coefs)) { for (coef in which(na.coefs)) { ram[coef, 5] <- (ram[ram[coef, 4] == ram[, 4], 5])[1] # paste in the estimate } } values <- round(ram[, 5], digits) heads <- ram[, 1] to <- ram[, 2] from <- ram[, 3] if (!missing(par.labels)) { check <- names(par.labels) %in% parameters if (any(!check)) { msg <- if (sum(!check) > 1) paste( "The following parameters do not appear in the model:", paste(names(par.labels)[!check], collapse = ", ") ) else paste("The following parameter does not appear in the model:", names(par.labels)[!check]) warning(msg) par.labels <- par.labels[check] } names(parameters) <- parameters parameters[names(par.labels)] <- par.labels } labels <- if (edge.labels == "names") parameters else if (edge.labels == "values") values else paste(parameters, values, sep = "=") colors <- ifelse(values > 0, edge.colors[1], edge.colors[2]) direction <- ifelse((heads == 2), " dir=both", "") lineweight <- rep(1, nrow(ram)) if (edge.weight == "proportional") { lineweight <- abs(values) / mean(values) if (!standardize) warning("proportional edge weights requested for an unstandardized model") } if (style == "ram") { for (par in 1:nrow(ram)) { if ((!ignore.double) || (heads[par] == 1)) { if (ignore.self && to[par] == from[par]) next Dot( " \"", variables[from[par]], "\" -> \"", variables[to[par]], "\" [label=\"", labels[par], "\"", direction[par], " color=", colors[par], " penwidth=", round(lineweight[par] + 0.001, 3), "]" ) } } } else for (par in 1:nrow(ram)) { # style == "traditional" if (heads[par] == 1) { Dot( " \"", variables[from[par]], "\" -> \"", variables[to[par]], "\" [label=\"", labels[par], "\"", direction[par], " color=", colors[par], " penwidth=", round(lineweight[par] + 0.001, 3), "]" ) } else if (variables[to[par]] %in% endogenous) { if (variables[to[par]] == variables[from[par]]) { # convert self-arrow to residual path lab <- labels[par] val <- round(sqrt(values[par]), digits = digits) lab <- if (edge.labels == "names") paste0("sqrt(", lab, ")") else if (edge.labels == "values") val else paste0("sqrt(", parameters[par], ")=", val) Dot( " \"", variables[to[par]], ".error\" -> \"", variables[to[par]], "\" [color=", edge.colors[1], " label=\"", lab, "\" penwidth=", round(sqrt(lineweight[par]) + 0.001, 3)," ]" ) } else{ # convert endogenous covariance to error covariance Dot( " \"", variables[to[par]], ".error\" -> \"", variables[from[par]], ".error\" [dir=both label=\"", labels[par], "\" color=", colors[par], " penwidth=", round(lineweight[par] + 0.001, 3), "]" ) } } else if (!ignore.double && (variables[to[par]] != variables[from[par]])) { Dot( " \"", variables[from[par]], "\" -> \"", variables[to[par]], "\" [label=\"", labels[par], "\"", direction[par], " color=", colors[par], " penwidth=", round(lineweight[par] + 0.001, 3), "]" ) } } if (!missing(var.labels)) { check <- names(var.labels) %in% variables if (any(!check)) { msg <- if (sum(!check) > 1) paste( "The following variables do not appear in the model:", paste(names(var.labels)[!check], collapse = ", ") ) else paste("The following variable does not appear in the model:", names(var.labels)[!check]) warning(msg) var.labels <- var.labels[check] } Dot(" // variable labels: ", semicolon = FALSE) lines <- paste0(' "', names(var.labels), '" [label="', var.labels, '"];\n') Dot(paste(lines, collapse = ""), semicolon = FALSE, newline = FALSE) } Dot("}", semicolon = FALSE) if (output.type == "graphics") { cmd <- paste0("dot -T", graphics.fmt, " -o ", graph.file, " -Gcharset=latin1 ", dot.options, " ", dot.file) cat("Running ", cmd, "\n") result <- try(system(cmd)) } if (output.type == "html" && requireNamespace("DiagrammeR")) { print(DiagrammeR::DiagrammeR(textConnection(dot), type = "grViz")) } result <- if (output.type == "html") dot else readLines(dot.file) invisible(result) } sem/R/readMoments.R0000644000175000017500000000130512423534616013777 0ustar nileshnilesh# last modified 2014-10-27 by J. Fox read.moments <- function(...){ .Deprecated("readMoments", package="sem") readMoments(...) } readMoments <- function(file="", text, diag=TRUE, names=as.character(paste("X", 1:n, sep=""))){ elements <- if (!missing(text)) scan(text=text) else scan(file=file) m <- length(elements) d <- if (diag) 1 else -1 n <- floor((sqrt(1 + 8*m) - d)/2) if (m != n*(n + d)/2) stop("wrong number of elements (cannot make square matrix)") if (length(names) != n) stop("wrong number of variable names") X <- diag(n) X[upper.tri(X, diag=diag)] <- elements rownames(X) <- colnames(X) <- names t(X) } sem/R/multigroup.R0000644000175000017500000012214413563624470013741 0ustar nileshnilesh### multigroup SEMs # last modified J. Fox 2013-06-14 ## model definition multigroupModel <- function(..., groups=names(models), allEqual=FALSE){ models <- list(...) if (length (models) == 1){ if (missing(groups)) stop("group names are missing, yet only 1 model is defined") ngroups <- length(groups) model <- models[[1]] models <- vector(ngroups, mode="list") for (g in 1:ngroups) models[[g]] <- model names(models) <- groups if (!allEqual){ for (g in 1:ngroups){ model <- models[[g]] models[[g]][!is.na(model[, 2]), 2] <- paste(model[!is.na(model[, 2]), 2], ".", groups[g], sep="") } } } else { if (is.null(names(models)) && missing(groups)) groups <- paste("Group.", 1:length(models), sep="") names(models) <- groups } class(models) <- "semmodList" models } print.semmodList <- function(x, ...){ cat("\nMultigroup Structural Equation Model\n") groups <- names(x) for (group in groups){ cat("\n Group: ", group, "\n\n") print(x[[group]]) } invisible(x) } ## sem() method for semmodList objects sem.semmodList <- function(model, S, N, data, raw=FALSE, fixed.x=NULL, robust=!missing(data), formula, group="Group", debug=FALSE, ...){ data.out <- NULL if (missing(S)){ if (missing(data)) stop("S and data cannot both be missing") data.df <- inherits(data, "data.frame") if (data.df && missing(group)) stop("S and group cannot both be missing") if (data.df){ if (!is.factor(data[, group])) stop("Groups variable, ", group, ", is not a factor") levels <- levels(data[, group]) if (missing(formula)) formula <- as.formula(paste("~ . -", group)) } else { if (!all(sapply(data, function(d) inherits(d, "data.frame")))) stop("data must be a data frame or list of data frames") levels <- names(data) if (is.null(levels)) levels <- paste("Group", seq(along=data), sep=".") if (missing(formula)) formula <- ~ . } G <- length(levels) if (is.list(formula) && length(formula) != G) stop("number of formulas, ", length(formula), ", not equal to number of groups, ", G, sep="") if (is.list(formula)){ if (!all(names(model) == names(formula))) warning("names of groups (", paste(names(model), collapse=", "), ") is not the same as names of formulas in formula argument (", paste(names(formula), collapse=", "), ")") } S <- vector(G, mode="list") names(S) <- levels N <- numeric(G) data.out <- vector(G, mode="list") for (g in 1:G){ data.group <- if (data.df) subset(data, subset = data[, group] == levels[g]) else data[[g]] N.all <- nrow(data.group) form <- if (is.list(formula)) formula[[g]] else formula data.group <- model.matrix(form, data=data.group) colnames(data.group)[colnames(data.group) == "(Intercept)"] <- "Intercept" N[g] <- nrow(data.group) if (N[g] < N.all) warning(N.all - N[g]," observations removed due to missingness in group ", levels[g]) S[[g]] <- if (raw) rawMoments(data.group) else{ data.group <- data.group[, colnames(data.group) != "Intercept"] cov(data.group) } data.out[[g]] <- data.group } } else G <- length(S) if (length(model) != G) stop("number of group models, ", length(model), ", not equal to number of moment/data matrices, ", G, sep="") pars <- unique(na.omit(unlist(lapply(model, function(mod) mod[, 2])))) vars <- rams <- vector(length(model), mode="list") all.par.names <- character(0) all.pars <- numeric(0) for (i in 1:G){ obs.variables <- colnames(S[[i]]) mod <- model[[i]] if ((!is.matrix(mod)) | ncol(mod) != 3) stop("model argument must be a 3-column matrix") startvalues <- as.numeric(mod[, 3]) par.names <- mod[, 2] n.paths <- length(par.names) heads <- from <- to <- rep(0, n.paths) for (p in 1:n.paths){ path <- parse.path(mod[p, 1]) heads[p] <- abs(path$direction) to[p] <- path$second from[p] <- path$first if (path$direction == -1) { to[p] <- path$first from[p] <- path$second } } ram <- matrix(0, n.paths, 5) all.vars <- unique(c(to, from)) latent.vars <- setdiff(all.vars, obs.variables) not.used <- setdiff(obs.variables, all.vars) if (length(not.used) > 0){ rownames(S[[i]]) <- colnames(S[[i]]) <- obs.variables obs.variables <- setdiff(obs.variables, not.used) S[[i]] <- S[[i]][obs.variables, obs.variables] data.out[[i]] <- data.out[[i]][, obs.variables] warning("The following observed variables are in the input covariance or raw-moment matrix for group ", i, " but do not appear in the model:\n", paste(not.used, collapse=", "), "\n") } vars[[i]] <- c(obs.variables, latent.vars) ram[,1] <- heads ram[,2] <- apply(outer(vars[[i]], to, "=="), 2, which) ram[,3] <- apply(outer(vars[[i]], from, "=="), 2, which) par.nos <- apply(outer(pars, par.names, "=="), 2, which) if (length(par.nos) > 0) ram[,4] <- unlist(lapply(par.nos, function(x) if (length(x) == 0) 0 else x)) ram[,5]<- startvalues colnames(ram) <- c("heads", "to", "from", "parameter", "start value") rams[[i]] <- ram all.pars <- c(all.pars, par.nos) all.par.names <- c(all.par.names, par.names) } all.pars <- unique(unlist(all.pars)) all.par.names <- unique(na.omit(all.par.names)) class(rams) <- "msemmod" result <- sem(rams, S, N, group=group, groups=names(model), raw=raw, fixed.x=fixed.x, param.names=all.par.names[all.pars], var.names=vars, debug=debug, ...) result$semmodList <- model result$data <- if(missing(data)) NULL else data.out if (robust && !missing(data) && inherits(result, "msemObjectiveML")){ res <- robustVcovMsem(result) result$robust.vcov <- res$vcov result$chisq.scaled <- res$chisq.scaled result$adj.objects <- res$adj.objects } result } parse.path <- function(path) { path.1 <- gsub("-", "", gsub(" ","", path)) direction <- if (regexpr("<>", path.1) > 0) 2 else if (regexpr("<", path.1) > 0) -1 else if (regexpr(">", path.1) > 0) 1 else stop(paste("ill-formed path:", path)) path.1 <- strsplit(path.1, "[<>]")[[1]] list(first=path.1[1], second=path.1[length(path.1)], direction=direction) } ## sem() method for msemmod objects sem.msemmod <- function(model, S, N, start.fn=startvalues, group="Group", groups=names(model), raw=FALSE, fixed.x, param.names, var.names, debug=FALSE, analytic.gradient=TRUE, warn=FALSE, maxiter=5000, par.size = c("ones", "startvalues"), start.tol = 1e-06, start=c("initial.fit", "startvalues"), initial.maxiter=1000, optimizer = optimizerMsem, objective = msemObjectiveML, ...){ par.size <- match.arg(par.size) start <- match.arg(start) G <- length(groups) if (length(model) != G || length(N) != G) stop("inconsistent number of groups in model (", length(model), "), S (", G, "), and N (", length(N), ") arguments") if (is.null(names(S))) names(S) <- groups if (is.null(names(N))) names(N) <- groups if (is.null(names(model))) names(model) <- groups if (!all(groups == names(model))) warning("names of groups (", paste(groups, collapse=", "), ") is not the same as names of models in model argument (", paste(names(model), collapse=", "), ")") if (!all(groups == names(S))) warning("names of groups (", paste(groups, collapse=", "), ") is not the same as names of moment matrices in S argument (", paste(names(S), collapse=", "), ")") if (!all(groups == names(N))) warning("names of groups (", paste(groups, collapse=", "), ") is not the same as names of sample sizes in N argument (", paste(names(N), collapse=", "), ")") if (length(fixed.x) == 1) fixed.x <- lapply(1:G, function(g) fixed.x) n.fix <- 0 if (!is.null(fixed.x)){ n.fix <- numeric(G) for (g in 1:G){ fx <- fixed.x[[g]] n.fix[g] <- length(fx) if (n.fix[g] == 0) next fx <- which(rownames(S[[g]]) %in% fx) mod <- model[[g]] for (i in 1:n.fix[g]){ for (j in 1:i){ mod <- rbind(mod, c(2, fx[i], fx[j], 0, S[[g]][fx[i], fx[j]])) } } model[[g]] <- mod } } t <- max(sapply(model, function(r) max(r[, 4]))) if(missing(param.names)) param.names <- paste("Parameter", 1:t, sep=".") if (missing(var.names)) var.names <- lapply(model, function(mod) paste("Variable", 1:max(mod[, c(2,3)]), sep=".")) n <- sapply(S, nrow) m <- sapply(model, function(r) max(r[, c(2, 3)])) logdetS <- sapply(S, function(s) log(det(unclass(s)))) sel.free.2 <- sel.free.1 <- arrows.2.free <- arrows.1.free <- arrows.2t <- arrows.2 <- arrows.1 <- two.free <- one.free <- one.head <- sel.free <- fixed <- par.posn <- correct <- J <- vector(mode="list", length=G) initial.iterations <- if (start == "initial.fit") numeric(G) else NULL for (g in 1:G){ mod <- model[[g]] # tt <- sum(mod[, 4] != 0) # mod[mod[, 4] != 0, 4] <- 1:tt initial.pars <- mod[, 4] unique.pars <- unique(initial.pars) unique.pars <- unique.pars[unique.pars != 0] tt <- length(unique.pars) if (tt > 0) for (i in 1:tt) mod[initial.pars == unique.pars[i], 4] <- i startvals <- if (start == "initial.fit"){ prelim.fit <- sem(mod, S[[g]], N=N[[g]], raw=raw, param.names=if(tt > 0) as.character(1:tt) else character(0), var.names=as.character(1:m[[g]]), maxiter=initial.maxiter) initial.iterations[g] <- prelim.fit$iterations coef(prelim.fit) } else start.fn(S[[g]], mod) model[[g]][mod[, 4] != 0, 5] <- ifelse(is.na(mod[mod[, 4] != 0, 5]), startvals, mod[mod[, 4] != 0, 5]) J[[g]] <- matrix(0, n[g], m[g]) correct[[g]] <- matrix(2, m[g], m[g]) diag(correct[[g]]) <- 1 observed <- 1:n[g] J[[g]][cbind(observed, observed)] <- 1 par.posn[[g]] <- sapply(1:t, function(i) which(model[[g]][,4] == i)[1]) colnames(model[[g]]) <- c("heads", "to", "from", "parameter", "start value") rownames(model[[g]]) <- rep("", nrow(model[[g]])) fixed[[g]] <- model[[g]][, 4] == 0 sel.free[[g]] <- model[[g]][, 4] sel.free[[g]][fixed[[g]]] <- 1 one.head[[g]] <- model[[g]][, 1] == 1 one.free[[g]] <- which( (!fixed[[g]]) & one.head[[g]] ) two.free[[g]] <- which( (!fixed[[g]]) & (!one.head[[g]]) ) arrows.1[[g]] <- model[[g]][one.head[[g]], c(2, 3), drop=FALSE] arrows.2[[g]] <- model[[g]][!one.head[[g]], c(2, 3), drop=FALSE] arrows.2t[[g]] <- model[[g]][!one.head[[g]], c(3 ,2), drop=FALSE] arrows.1.free[[g]] <- model[[g]][one.free[[g]], c(2, 3), drop=FALSE] arrows.2.free[[g]] <- model[[g]][two.free[[g]], c(2, 3), drop=FALSE] sel.free.1[[g]] <- sel.free[[g]][one.free[[g]]] sel.free.2[[g]] <- sel.free[[g]][two.free[[g]]] } unique.free.1 <- lapply(sel.free.1, unique) unique.free.2 <- lapply(sel.free.2, unique) startvals <- numeric(t) for (j in 1:t) startvals[j] <- mean(unlist(sapply(model, function(r) r[r[, 4] == j, 5])), na.rm=TRUE) model.description <- list(G=G, m=m, n=n, t=t, fixed=fixed, ram=model, sel.free=sel.free, arrows.1=arrows.1, one.head=one.head, arrows.2=arrows.2, arrows.2t=arrows.2t, J=J, S=S, logdetS=logdetS, N=N, raw=raw, correct=correct, unique.free.1=unique.free.1, unique.free.2=unique.free.2, arrows.1.free=arrows.1.free, arrows.2.free=arrows.2.free, param.names=param.names, var.names=var.names) result <- optimizer(start=startvals, objective=objective, gradient=analytic.gradient, maxiter=maxiter, debug=debug, par.size=par.size, model.description=model.description, warn=warn, ...) if (!is.na(result$iterations)) if(result$iterations >= maxiter) warning("maximum iterations exceeded") result <- c(result, list(ram=model, param.names=param.names, var.names=var.names, group=group, groups=groups, S=S, N=N, J=J, n=n, m=m, t=t, raw=raw, optimizer=optimizer, objective=objective, fixed.x=fixed.x, n.fix=n.fix, initial.iterations=initial.iterations)) cls <- gsub("\\.", "", deparse(substitute(objective))) cls <- gsub("2", "", cls) class(result) <- c(cls, "msem") result } ## ML objective function for multigroup SEMs msemObjectiveML2 <- function(gradient=TRUE){ result <- list( objective = function(par, model.description){ with(model.description, { f <- numeric(G) AA <- PP <- CC <- vector(G, mode="list") grad.all <- if (gradient) rep(0, t) else NULL for (g in 1:G){ A <- P <- matrix(0, m[g], m[g]) val <- ifelse (fixed[[g]], ram[[g]][, 5], par[sel.free[[g]]]) A[arrows.1[[g]]] <- val[one.head[[g]]] P[arrows.2t[[g]]] <- P[arrows.2[[g]]] <- val[!one.head[[g]]] I.Ainv <- solve(diag(m[g]) - A) C <- J[[g]] %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J[[g]]) Cinv <- solve(C) f[g] <- sum(diag(S[[g]] %*% Cinv)) + log(det(C)) - n[[g]] - logdetS[g] CC[[g]] <- C AA[[g]] <- A PP[[g]] <- P if (gradient){ grad.P <- correct[[g]] * t(I.Ainv) %*% t(J[[g]]) %*% Cinv %*% (C - S[[g]]) %*% Cinv %*% J[[g]] %*% I.Ainv grad.A <- grad.P %*% P %*% t(I.Ainv) grad <- rep(0, t) grad[sort(unique.free.1[[g]])] <- tapply(grad.A[arrows.1.free[[g]]], ram[[g]][ram[[g]][,1]==1 & ram[[g]][,4]!=0, 4], sum) grad[sort(unique.free.2[[g]])] <- tapply(grad.P[arrows.2.free[[g]]], ram[[g]][ram[[g]][,1]==2 & ram[[g]][,4]!=0, 4], sum) grad.all <- grad.all + ((N[g] - (!raw))/(sum(N) - (!raw)*G))*grad } } ff <- f f <- sum((N - (!raw))*f)/(sum(N) - (!raw)*G) attributes(f) <- list(gradient=grad.all, A=AA, P=PP, C=CC, f=ff) f }) } ) if (gradient) result$gradient <- function(par, model.description){ with(model.description, { grad.total <- rep(0, t) for (g in 1:G){ A <- P <- matrix(0, m[g], m[g]) val <- ifelse (fixed[[g]], ram[[g]][,5], par[sel.free[[g]]]) A[arrows.1[[g]]] <- val[one.head[[g]]] P[arrows.2t[[g]]] <- P[arrows.2[[g]]] <- val[!one.head[[g]]] I.Ainv <- solve(diag(m[g]) - A) C <- J[[g]] %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J[[g]]) Cinv <- solve(C) grad.P <- correct[[g]] * t(I.Ainv) %*% t(J[[g]]) %*% Cinv %*% (C - S[[g]]) %*% Cinv %*% J[[g]] %*% I.Ainv grad.A <- grad.P %*% P %*% t(I.Ainv) grad <- rep(0, t) grad[sort(unique.free.1[[g]])] <- tapply(grad.A[arrows.1.free[[g]]], ram[[g]][ram[[g]][,1]==1 & ram[[g]][,4]!=0, 4], sum) grad[sort(unique.free.2[[g]])] <- tapply(grad.P[arrows.2.free[[g]]], ram[[g]][ram[[g]][,1]==2 & ram[[g]][,4]!=0, 4], sum) grad.total <- grad.total + ((N[g] - (!raw))/(sum(N) - (!raw)*G))*grad } grad.total }) } class(result) <- "msemObjective" result } msemObjectiveML <- function(gradient=TRUE){ result <- list( objective = function(par, model.description){ with(model.description, { res <- msemCompiledObjective(par=par, model.description=model.description, objective="objectiveML") AA <- PP <- CC <- vector(G, mode="list") for(g in 1:model.description$G) { AA[[g]] <- res$A[[g]] PP[[g]] <- res$P[[g]] CC[[g]] <- res$C[[g]] } f <- res$f attributes(f) <- list(gradient=res$gradient, A=AA, P=PP, C=CC, f=res$ff) f }) } ) if (gradient) result$gradient <- function(par, model.description){ with(model.description, { res <- msemCompiledObjective(par=par, model.description=model.description, objective="objectiveML") res$gradient }) } class(result) <- "msemObjective" result } msemObjectiveGLS <- function(gradient=FALSE){ result <- list( objective = function(par, model.description){ with(model.description, { res <- msemCompiledObjective(par=par, model.description=model.description, objective="objectiveGLS") AA <- PP <- CC <- vector(G, mode="list") for(g in 1:model.description$G) { AA[[g]] <- res$A[[g]] PP[[g]] <- res$P[[g]] CC[[g]] <- res$C[[g]] } f <- res$f attributes(f) <- list(A=AA, P=PP, C=CC, f=res$ff) f }) } ) class(result) <- "msemObjective" result } msemObjectiveFIML <- function(gradient=FALSE){ result <- list( objective = function(par, model.description){ with(model.description, { res <- msemCompiledObjective(par=par, model.description=model.description, objective="objectiveFIML") AA <- PP <- CC <- vector(G, mode="list") for(g in 1:model.description$G) { AA[[g]] <- res$A[[g]] PP[[g]] <- res$P[[g]] CC[[g]] <- res$C[[g]] } f <- res$f attributes(f) <- list(A=AA, P=PP, C=CC, f=res$ff) f }) } ) class(result) <- "msemObjective" result } ## nlm()-based optimizer for multigroup SEMs optimizerMsem <- function(start, objective=msemObjectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn=FALSE, ...){ with(model.description, { obj <- objective(gradient=gradient)$objective typsize <- if (par.size == 'startvalues') abs(start) else rep(1, t) if(identical(objective, msemObjectiveML)) objectiveCompiled <- "objectiveML" else if (identical(objective, msemObjectiveGLS)) { objectiveCompiled <- "objectiveGLS" gradient <- FALSE } else if (identical(objective, msemObjectiveFIML)) { objectiveCompiled <- "objectiveFIML" gradient <- FALSE } else stop("optimizerMsem requires the msemObjectiveML, msemObjectiveGLS or msemObjectiveFIML objective function") if (!warn) save.warn <- options(warn=-1) res <- msemCompiledSolve(model.description=model.description, start=start, objective=objectiveCompiled, typsize=typsize, debug=debug, maxiter=maxiter) if (!warn) options(save.warn) result <- list(covergence=NULL, iterations=NULL, coeff=NULL, vcov=NULL, criterion=NULL, C=NULL, A=NULL, P=NULL) result$convergence <- res$code <= 2 result$iterations <- res$iterations par <- res$estimate names(par) <- param.names result$coeff <- par if (!result$convergence) warning(paste('Optimization may not have converged; nlm return code = ', res$code, '. Consult ?nlm.\n', sep="")) vcov <- matrix(NA, t, t) qr.hess <- try(qr(res$hessian), silent=TRUE) if (inherits(qr.hess, "try-error")){ warning("Could not compute QR decomposition of Hessian.\nOptimization probably did not converge.\n") } else if (qr.hess$rank < t){ warning(' singular Hessian: model is probably underidentified.\n') which.aliased <- qr.hess$pivot[-(1:qr.hess$rank)] result$aliased <- param.names[which.aliased] } else { df <- sum(N) - (!raw)*G vcov <- (2/df) * solve(res$hessian) if (any(diag(vcov) < 0)) { result$aliased <- param.names[diag(vcov) < 0] warning("Negative parameter variances.\nModel may be underidentified.\n") } } colnames(vcov) <- rownames(vcov) <- param.names result$vcov <- vcov result$criterion <- res$minimum C <- res$C A <- res$A P <- res$P for (g in 1:G){ rownames(C[[g]]) <- colnames(C[[g]]) <-rownames(S[[g]]) rownames(A[[g]]) <- colnames(A[[g]]) <- var.names[[g]] rownames(P[[g]]) <- colnames(P[[g]]) <- var.names[[g]] } result$C <- C result$A <- A result$P <- P result$group.criteria <- res$ff class(result) <- "msemResult" result }) } msemOptimizerNlm <- function(start, objective=msemObjectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn=FALSE, ...){ with(model.description, { obj <- objective(gradient=gradient)$objective typsize <- if (par.size == 'startvalues') abs(start) else rep(1, t) if (!warn) save.warn <- options(warn=-1) res <- nlm(obj, start, iterlim=maxiter, print.level=if(debug) 2 else 0, typsize=typsize, hessian=TRUE, model.description=model.description, ...) if (!warn) options(save.warn) result <- list(covergence=NULL, iterations=NULL, coeff=NULL, vcov=NULL, criterion=NULL, C=NULL, A=NULL, P=NULL) result$convergence <- res$code <= 2 result$iterations <- res$iterations par <- res$estimate names(par) <- param.names result$coeff <- par if (!result$convergence) warning(paste('Optimization may not have converged; nlm return code = ', res$code, '. Consult ?nlm.\n', sep="")) vcov <- matrix(NA, t, t) qr.hess <- try(qr(res$hessian), silent=TRUE) if (inherits(qr.hess, "try-error")){ warning("Could not compute QR decomposition of Hessian.\nOptimization probably did not converge.\n") } else if (qr.hess$rank < t){ warning(' singular Hessian: model is probably underidentified.\n') which.aliased <- qr.hess$pivot[-(1:qr.hess$rank)] result$aliased <- param.names[which.aliased] } else { df <- sum(N) - (!raw)*G vcov <- (2/df) * solve(res$hessian) if (any(diag(vcov) < 0)) { result$aliased <- param.names[diag(vcov) < 0] warning("Negative parameter variances.\nModel may be underidentified.\n") } } colnames(vcov) <- rownames(vcov) <- param.names result$vcov <- vcov result$criterion <- res$minimum obj <- obj(par, model.description) C <- attr(obj, "C") A <- attr(obj, "A") P <- attr(obj, "P") for (g in 1:G){ rownames(C[[g]]) <- colnames(C[[g]]) <-rownames(S[[g]]) rownames(A[[g]]) <- colnames(A[[g]]) <- var.names[[g]] rownames(P[[g]]) <- colnames(P[[g]]) <- var.names[[g]] } result$C <- C result$A <- A result$P <- P result$group.criteria <- attr(obj, "f") class(result) <- "msemResult" result }) } # methods for msem and msemObjectiveML objects print.msemObjectiveML <- function(x, ...){ n <- x$n n.fix <- x$n.fix df <- sum(n*(n + 1)/2) - x$t - sum(n.fix*(n.fix + 1)/2) chisq <- sum(x$N - !x$raw)*x$criterion cat("\n Model Chisquare =", chisq, " Df =", df, "\n\n") print(x$coeff) invisible(x) } print.msemObjectiveGLS <- function(x, ...) print.msemObjectiveML(x, ...) print.msemObjectiveFIML <- function(x, ...) print.msemObjectiveML(x, ...) summary.msemObjectiveML <- function(object, digits=getOption("digits"), conf.level=.90, robust=FALSE, analytic.se=object$t <= 500, fit.indices=c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR", "AIC", "AICc", "BIC"), ...){ fit.indices <- if (is.null(fit.indices)) "" else { if (missing(fit.indices)){ if (is.null(opt <- getOption("fit.indices"))) c("AIC", "BIC") else opt } else match.arg(fit.indices, several.ok=TRUE) } if(inherits(object, "msemObjectiveGLS")) analytic.se <- FALSE else if(inherits(object, "msemObjectiveFIML")) analytic.se <- FALSE groups <- object$groups G <- length(groups) par <- object$coeff vcov <- vcov(object, robust=robust, analytic.se=analytic.se) n <- object$n m <- object$m S <- object$S C <- object$C A <- object$A P <- object$P N <- object$N J <- object$J n.fix <- object$n.fix if (length(n.fix) == 1) n.fix <- rep(n.fix, G) group.criteria <- object$group.criteria var.names <- object$var.names param.names <- object$param.names semmod <- object$semmodList ram <- object$ram group.summaries <- list(G, model="list") for (g in 1:G){ par.names <- param.names[ram[[g]][, 4]] par.gr <- par[par.names] group <- list(coeff=par.gr, vcov=vcov[par.names, par.names], n.fix=n.fix[g], n=n[[g]], m=m[[g]], S=S[[g]], C=C[[g]], N=N[[g]], t=length(par.gr), raw=object$raw, var.names=var.names[[g]], ram=ram[[g]], J=J[[g]], A=A[[g]], P=P[[g]], criterion=group.criteria[g], par.posn=ram[[g]][, 4] != 0, iterations=object$iterations, semmod=semmod[[g]], adj.obj=object$adj.objects[[g]], robust.vcov=object$robust.vcov[par.names, par.names]) class(group) <- if(inherits(object, "msemObjectiveGLS")) c("objectiveGLS", "sem") else if(inherits(object, "msemObjectiveFIML")) c("objectiveFIML", "sem") else c("objectiveML", "sem") group.summaries[[g]] <- if(inherits(object, "msemObjectiveGLS")) summary(group, digits=digits, conf.level=conf.level, robust=FALSE, fit.indices=fit.indices, ...) else if(inherits(object, "msemObjectiveFIML")) summary(group, digits=digits, conf.level=conf.level, robust=FALSE, fit.indices=fit.indices, ...) else summary(group, digits=digits, conf.level=conf.level, robust=robust, analytic.se=FALSE, fit.indices=fit.indices, ...) group.summaries[[g]]$iterations <- NA } df <- sum(n*(n + 1)/2) - object$t - sum(n.fix*(n.fix + 1)/2) chisq <- sum(N - !object$raw)*object$criterion wt <- (N - object$raw)/(sum(N - object$raw)) SRMR <-if (!object$raw && "SRMR" %in% fit.indices) sum(wt*sapply(group.summaries, function(s) s$SRMR)) else NA GFI <- if (!object$raw && "GFI" %in% fit.indices) sum(wt*sapply(group.summaries, function(s) s$GFI)) else NA chisqNull <- if(!object$raw) sum(sapply(group.summaries, function(s) s$chisqNull)) else NA dfNull <- sum(n*(n - 1)/2) if (!object$raw && df > 0){ AGFI <- if ("AGFI" %in% fit.indices) 1 - (sum(n*(n * 1))/(2*df))*(1 - GFI) else NA NFI <- if ("NFI" %in% fit.indices) (chisqNull - chisq)/chisqNull else NA NNFI <- if ("NNFI" %in% fit.indices) (chisqNull/dfNull - chisq/df)/(chisqNull/dfNull -1) else NA L1 <- max(chisq - df, 0) L0 <- max(L1, chisqNull - dfNull) CFI <- if ("CFI" %in% fit.indices) 1 - L1/L0 else NA RNI <- if ("RNI" %in% fit.indices) 1 - (chisq - df)/(chisqNull - dfNull) else NA IFI <- if ("IFI" %in% fit.indices) (chisqNull - chisq)/(chisqNull - df) else NA if ("RMSEA" %in% fit.indices){ RMSEA <- sqrt(G*max(object$criterion/df - 1/(sum(N - 1)), 0)) tail <- (1 - conf.level)/2 max <- sum(N) while (max > 1) { res <- optimize(function(lam) (tail - pchisq(chisq, df, ncp = lam))^2, interval = c(0, max)) if (is.na(res$objective) || res$objective < 0) { max <- 0 warning("cannot find upper bound of RMSEA") break } if (sqrt(res$objective) < tail/100) break max <- max/2 } lam.U <- if (max <= 1) NA else res$minimum max <- max(max, 1) while (max > 1) { res <- optimize(function(lam) (1 - tail - pchisq(chisq, df, ncp = lam))^2, interval = c(0, max)) if (sqrt(res$objective) < tail/100) break max <- max/2 if (is.na(res$objective) || res$objective < 0) { max <- 0 warning("cannot find lower bound of RMSEA") break } } lam.L <- if (max <= 1) NA else res$minimum RMSEA.U <- sqrt(G*lam.U/(sum(N - 1) * df)) RMSEA.L <- sqrt(G*lam.L/(sum(N - 1) * df)) } else RMSEA.U <- RMSEA.L <- RMSEA <- NA } else RMSEA.U <- RMSEA.L <- RMSEA <- NFI <- NNFI <-IFI <- RNI <- CFI <- AGFI <- NA if (robust){ chisq.adjusted <- sum(sapply(group.summaries, function(x) x$chisq.adjusted)) if (!object$raw && df > 0){ chisqNull.adjusted <- sum(sapply(group.summaries, function(x) x$chisqNull.adjusted)) NFI.adjusted <- if ("NFI" %in% fit.indices) (chisqNull.adjusted - chisq.adjusted)/chisqNull.adjusted else NA NNFI.adjusted <- if ("NNFI" %in% fit.indices) (chisqNull.adjusted/dfNull - chisq.adjusted/df)/(chisqNull.adjusted/dfNull - 1) else NA L1 <- max(chisq.adjusted - df, 0) L0 <- max(L1, chisqNull.adjusted - dfNull) CFI.adjusted <- if ("CFI" %in% fit.indices) 1 - L1/L0 else NA RNI.adjusted <- if ("RNI" %in% fit.indices) 1 - (chisq.adjusted - df)/(chisqNull.adjusted - dfNull) else NA IFI.adjusted <- if ("IFI" %in% fit.indices) (chisqNull.adjusted - chisq.adjusted)/(chisqNull.adjusted - df) else NA } } if (object$raw) cat("\nModel fit to raw moment matrix.\n") if (robust && !is.null(object$robust.vcov)){ cat("\nSatorra-Bentler Corrected Fit Statistics:\n") cat("\n Corrected Model Chisquare = ", chisq.adjusted, " Df = ", df, "Pr(>Chisq) =", if (df > 0) pchisq(chisq.adjusted, df, lower.tail=FALSE) else NA) if (!object$raw) { cat("\n Corrected Chisquare (null model) = ", chisqNull.adjusted, " Df = ", dfNull) } if (df > 0 && !object$raw){ if (!is.na(NFI.adjusted)) cat("\n Corrected Bentler-Bonett NFI = ", NFI.adjusted) if (!is.na(NNFI.adjusted)) cat("\n Corrected Tucker-Lewis NNFI = ", NNFI.adjusted) if (!is.na(CFI.adjusted)) cat("\n Corrected Bentler CFI = ", CFI.adjusted) if (!is.na(RNI.adjusted)) cat("\n Corrected Bentler RNI = ", RNI.adjusted) if (!is.na(IFI.adjusted)) cat("\n Corrected Bollen IFI = ", IFI.adjusted) } cat("\n\nUncorrected Fit Statistics:\n") } if (inherits(object, "msemObjectiveGLS") || inherits(object, "msemObjectiveFIML")) { AIC <- AICc <- BIC <- NA } else { AIC <- if ("AIC" %in% fit.indices) AIC(object) else NA AICc <- if ("AICc" %in% fit.indices) AICc(object) else NA BIC <- if ("BIC" %in% fit.indices) BIC(object) else NA } cat("\n Model Chisquare =", chisq, " Df =", df, " Pr(>Chisq) =", pchisq(chisq, df, lower.tail=FALSE)) if (!is.na(chisqNull)) cat("\n Chisquare (null model) =", chisqNull, " Df =", dfNull) if (!is.na(GFI)) cat("\n Goodness-of-fit index =", GFI) if (!is.na(AGFI)) cat("\n Adjusted goodness-of-fit index =", AGFI) if (!is.na(RMSEA)) cat("\n RMSEA index = ", RMSEA, " ", 100*conf.level, "% CI: (", RMSEA.L, ", ", RMSEA.U, ")", sep="") if (!is.na(NFI)) cat("\n Bentler-Bonett NFI =", NFI) if (!is.na(NNFI)) cat("\n Tucker-Lewis NNFI =", NNFI) if (!is.na(CFI)) cat("\n Bentler CFI =", CFI) if (!is.na(RNI)) cat("\n Bentler RNI = ", RNI) if (!is.na(IFI)) cat("\n Bollen IFI = ", IFI) if (!is.na(SRMR)) cat("\n SRMR =", SRMR) if (!is.na(AIC)) cat("\n AIC =", AIC) if (!is.na(AICc)) cat("\n AICc =", AICc) if (!is.na(BIC)) cat("\n BIC =", BIC) cat("\n\n") if (is.null(object$initial.iterations)) cat("Iterations:", object$iterations, "\n\n") else cat("Iterations: initial fits,", object$initial.iterations, " final fit,", object$iterations, "\n\n") for (g in 1:G){ cat("\n ", object$group, ": ", groups[g], "\n", sep="") print(group.summaries[[g]]) } invisible(object) } summary.msemObjectiveGLS <- function(object, digits=getOption("digits"), conf.level=.90, fit.indices=c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR"), ...){ fit.indices <- if (missing(fit.indices)){ getOption("fit.indices") } else match.arg(fit.indices, several.ok=TRUE) summary.msemObjectiveML(object, digits=digits, conf.level=conf.level, robust=FALSE, fit.indices=fit.indices, ...) invisible(object) } summary.msemObjectiveFIML <- function(object, digits=getOption("digits"), conf.level=.90, ...){ summary.msemObjectiveML(object, digits=digits, conf.level=conf.level, robust=FALSE, ...) invisible(object) } deviance.msemObjectiveML <- function(object, ...) object$criterion * sum(object$N - (!object$raw)) AIC.msemObjectiveML <- function(object, ..., k) { deviance(object) + 2*object$t } AICc.msemObjectiveML <- function(object, ...) { deviance(object) + 2*object$t*(object$t + 1)/(sum(object$N) - object$t - 1) } BIC.msemObjectiveML <- function(object, ...) { # deviance(object) + object$t*log(sum(object$N)) deviance(object) - df.residual(object)*log(sum(object$N)) } residuals.msem <- function(object, ...){ result <- mapply(function(S, C) S - C, S=object$S, C=object$C, SIMPLIFY=FALSE) result <- lapply(result, function(res) {attr(res, "N") <- NULL; res}) result } coef.msem <- function(object, ...) object$coeff df.residual.msem <- function (object, ...){ n <- object$n n.fix <- object$n.fix sum(n*(n + 1)/2) - object$t - sum(n.fix*(n.fix + 1)/2) } anova.msemObjectiveML <- function(object, model.2, ...) { dev.1 <- deviance(object) df.1 <- df.residual(object) dev.2 <- deviance(model.2) df.2 <- df.residual(model.2) name.1 <- deparse(substitute(object)) name.2 <- deparse(substitute(model.2)) df <- abs(df.1 - df.2) if (df == 0) stop("the models have the same Df") if (all(object$N != model.2$N)) stop("the models are fit to different numbers of observations") if (any(sapply(object$S, nrow) != sapply(model.2$S, nrow)) || !all.equal(object$S, model.2$S)) stop("the models are fit to different moment matrices") chisq <- abs(dev.1 - dev.2) table <- data.frame(c(df.1, df.2), c(dev.1, dev.2), c(NA, df), c(NA, chisq), c(NA, pchisq(chisq, df, lower.tail = FALSE))) names(table) <- c("Model Df", "Model Chisq", "Df", "LR Chisq", "Pr(>Chisq)") rownames(table) <- c(name.1, name.2) structure(table, heading = c("LR Test for Difference Between Models", ""), class = c("anova", "data.frame")) } logLik.msemObjectiveML <- function(object, ...) -0.5*deviance(object) effects.msem <- function(object, ...){ eff <- function(A, m, semmod){ I <- diag(m) endog <- classifyVariables(semmod)$endogenous AA <- -A diag(AA) <- 1 Total <- solve(AA) - I Indirect <- Total - A result <- list(Total = Total[endog, ], Direct = A[endog, ], Indirect = Indirect[endog, ]) class(result) <- "semeffects" result } G <- length(object$groups) A <- object$A m <- object$m semmod <- object$semmodList result <- vector(G, mode="list") for (g in 1:G){ result[[g]] <- eff(A[[g]], m[g], semmod[[g]]) } names(result) <- object$groups class(result) <- "semeffectsList" result } print.semeffectsList <- function (x, digits = getOption("digits"), ...) { groups <- names(x) for (group in groups){ cat("\n\n Group: ", group, "\n") print(x[[group]], digits=digits) } invisible(x) } standardizedCoefficients.msem <- function (object, ...){ groups <- object$groups G <- length(groups) param.names <- object$param.names ram <- object$ram A <- object$A P <- object$P par <- coef(object) for (g in 1:G){ par.names <- param.names[ram[[g]][, 4]] par.gr <- par[par.names] t <- length(par.gr) par.posn <- ram[[g]][, 4] != 0 ram[[g]][par.posn, 4] <- 1:t group <- list(coeff=par.gr, t=t, ram=ram[[g]], A=A[[g]], P=P[[g]], par.posn=par.posn, param.names=par.names) class(group) <- "sem" cat("\n\n Group: ", groups[g], "\n") print(standardizedCoefficients(group, ...)) } } standardizedResiduals.msem <- function (object, ...) { res <- residuals(object) S <- object$S for (g in 1:length(S)){ s <- diag(S[[g]]) res[[g]] <- res[[g]]/sqrt(outer(s, s)) } res } normalizedResiduals.msemObjectiveML <- function (object, ...) { res <- residuals(object) N <- object$N - (!object$raw) C <- object$C for (g in 1:length(res)){ c <- diag(C[[g]]) res[[g]] <- res[[g]]/sqrt((outer(c, c) + C[[g]]^2)/N[g]) } res } fscores.msem <- function (model, data = model$data, center = TRUE, scale = FALSE, ...) { m <- model$m P <- model$P A <- model$A var.names <- model$var.names C <- model$C group <- model$group groups <- model$groups G <- length(groups) scores <- B <- vector(G, mode = "list") names(scores) <- names(B) <- groups for (g in 1:G) { observed <- var.names[[g]] %in% rownames(C[[g]]) if (all(observed)) { warning("there are no latent variables in group ", groups[g]) } IAinv <- solve(diag(m[g]) - A[[g]]) Sigma <- IAinv %*% P[[g]] %*% t(IAinv) B[[g]] <- solve(Sigma[observed, observed]) %*% Sigma[observed, !observed] rownames(B[[g]]) <- var.names[[g]][observed] colnames(B[[g]]) <- var.names[[g]][!observed] if (!is.null(data)) { X <- data[[g]][, var.names[[g]][observed]] if (center || scale) X <- scale(X, center = center, scale = scale) scores[[g]] <- X %*% B[[g]] } } if (is.null(data)) return(B) else return(scores) } vcov.msem <- function (object, robust=FALSE, analytic = inherits(object, "msemObjectiveML") && object$t <= 500, ...) { if(robust){ if (is.null(object$robust.vcov)) stop("robust coefficient covariance matrix not available") return(object$robust.vcov) } if (!analytic) return(object$vcov) if (!inherits(object, "msemObjectiveML")) stop("analytic coefficient covariance matrix unavailable") hessian <- function(model) { A <- model$A P <- model$P S <- model$S C <- model$C J <- model$J m <- model$m N <- model$N rams <- model$ram groups <- model$groups G <- length(groups) nms <- names(coef(model)) raw <- model$raw Hessian <- matrix(0, nrow=length(nms), ncol=length(nms)) rownames(Hessian) <- colnames(Hessian) <- nms wts <- (N - !raw)/(sum(N) - G*!raw) for (g in 1:G){ I.Ainv <- solve(diag(m[g]) - A[[g]]) Cinv <- solve(C[[g]]) AA <- t(I.Ainv) %*% t(J[[g]]) BB <- J[[g]] %*% I.Ainv %*% P[[g]] %*% t(I.Ainv) CC <- t(I.Ainv) %*% t(J[[g]]) DD <- J[[g]] %*% I.Ainv dF.dBdB <- accumulate(AA %*% Cinv %*% t(AA), t(BB) %*% Cinv %*% BB, AA %*% Cinv %*% BB, t(BB) %*% Cinv %*% t(AA), m[g]) dF.dPdP <- accumulate(CC %*% Cinv %*% t(CC), t(DD) %*% Cinv %*% DD, CC %*% Cinv %*% DD, t(DD) %*% Cinv %*% t(CC), m[g]) dF.dBdP <- accumulate(AA %*% Cinv %*% t(CC), t(BB) %*% Cinv %*% DD, AA %*% Cinv %*% DD, t(BB) %*% Cinv %*% t(CC), m[g]) ram <- rams[[g]] fixed <- ram[, 4] == 0 sel.free <- ram[, 4] sel.free[fixed] <- 0 one.head <- ram[, 1] == 1 one.free <- which((!fixed) & one.head) two.free <- which((!fixed) & (!one.head)) two.free.cov <- which((!fixed) & (!one.head) & (ram[, 2] != ram[, 3])) arrows.1 <- ram[one.head, c(2, 3), drop = FALSE] arrows.2 <- ram[!one.head, c(2, 3), drop = FALSE] arrows.2t <- ram[!one.head, c(3, 2), drop = FALSE] arrows.1.free <- ram[one.free, c(2, 3), drop = FALSE] arrows.2.free <- ram[two.free, c(2, 3), drop = FALSE] sel.free.1 <- sel.free[one.free] sel.free.2 <- sel.free[two.free] unique.free.1 <- unique(sel.free.1) unique.free.2 <- unique(sel.free.2) posn.matrix <- matrix(1:(m[g]^2), m[g], m[g]) posn.free <- c(posn.matrix[arrows.1.free], (m[g]^2) + posn.matrix[arrows.2.free]) DBB <- dF.dBdB[posn.matrix[arrows.1.free], posn.matrix[arrows.1.free], drop = FALSE] DPP <- dF.dPdP[posn.matrix[arrows.2.free], posn.matrix[arrows.2.free], drop = FALSE] DBP <- dF.dBdP[posn.matrix[arrows.1.free], posn.matrix[arrows.2.free], drop = FALSE] hessian <- rbind(cbind(DBB, DBP), cbind(t(DBP), DPP)) n1 <- length(one.free) n2 <- length(two.free) nn <- rep(c(sqrt(2), sqrt(2)/2), c(n1, n2)) nn[c(one.free, two.free) %in% two.free.cov] <- sqrt(2) hessian <- hessian * outer(nn, nn) pars <- ram[, 4][!fixed] all.pars <- ram[, 4] t <- length(pars) Z <- outer(sort(unique(pars)), pars, function(x, y) as.numeric(x == y)) hessian <- Z %*% hessian %*% t(Z) par.names <- c(nms[all.pars[one.free]], nms[all.pars[two.free]]) rownames(hessian) <- colnames(hessian) <- par.names Hessian[par.names, par.names] <- Hessian[par.names, par.names] + wts[g]*hessian } Hessian } h <- hessian(object) t <- object$t N <- sum(object$N) raw <- object$raw G <- length(object$groups) param.names <- rownames(h) vcov <- matrix(NA, t, t) qr.hess <- try(qr(h), silent = TRUE) if (inherits(qr.hess, "try-error")){ warning("Could not compute QR decomposition of Hessian.\nOptimization probably did not converge.\n") } else if (qr.hess$rank < t) { warning(" singular Hessian: model is probably underidentified.\n") which.aliased <- qr.hess$pivot[-(1:qr.hess$rank)] attr(vcov, "aliased") <- param.names[which.aliased] } else { vcov <- (2/(N - G*(!raw))) * solve(h) if (any(diag(vcov) < 0)) { attr(vcov, "aliased") <- param.names[diag(vcov) < 0] warning("Negative parameter variances.\nModel may be underidentified.\n") } } vcov } robustVcovMsem <- function(model){ G <- length(model$groups) t <- model$t N <- model$N raw <- model$raw wts <- (N - !raw)/(sum(N) - !raw*G) hessian <- matrix(0, t, t) rownames(hessian) <- colnames(hessian) <- model$param.names chisq <- 0 adj.objects <- vector(G, mode="list") for (g in 1:G){ ram <- model$ram[[g]] parameters <- ram[, 4] unique.pars <- unique(parameters[parameters != 0]) par.posn <- sapply(unique.pars, function(x) which(x == parameters)[1]) unique.posn <- which(parameters %in% unique.pars) rownames(ram)[unique.posn] <- unique(model$param.names[ram[, 4]]) ram[unique.posn, 4] <- unlist(apply(outer(unique.pars, parameters, "=="), 2, which)) mod.g <- list(var.names=model$var.names[[g]], ram=ram,J=model$J[[g]], n.fix=model$n.fix, n=model$n[[g]], N=model$N[g], m=model$m[[g]], t=length(unique.pars), coeff=model$coeff[parameters], criterion=model$group.criteria[g], S=model$S[[g]], raw=model$raw, C=model$C[[g]], A=model$A[[g]], P=model$P[[g]]) adj.objects[[g]] <- sbchisq(sem.obj=mod.g, sem.data=model$data[[g]]) hess <- solve((N[g] - 1)*robustVcov(mod.g, adj.obj=adj.objects[[g]])) pars <- rownames(hess) hessian[pars, pars] <- hessian[pars, pars] + wts[g]*hess chisq <- chisq + adj.objects$chisq.scaled } return(list(vcov=solve(hessian)/(sum(N) - !raw*G), chisq.scaled=chisq, adj.objects=adj.objects)) } sem/R/summary.methods.R0000644000175000017500000002416212254716715014672 0ustar nileshnilesh# last modified 2013-12-19 by J. Fox summary.objectiveML <- function(object, digits=getOption("digits"), conf.level=.90, robust=FALSE, analytic.se=object$t <= 500, fit.indices=c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR", "AIC", "AICc", "BIC", "CAIC"), ...) { fit.indices <- if (is.null(fit.indices)) "" else { if (missing(fit.indices)){ if (is.null(opt <- getOption("fit.indices"))) c("AIC", "BIC") else opt } else match.arg(fit.indices, several.ok=TRUE) } vcov <- vcov(object, robust=robust, analytic=analytic.se) if (any(is.na(vcov))) stop("coefficient covariances cannot be computed") norm.res <- normalizedResiduals(object) se <- sqrt(diag(vcov)) z <- object$coeff/se n.fix <- object$n.fix n <- object$n t <- object$t S <- object$S C <- object$C N <- object$N df <- n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2 dfNull <- n*(n - 1)/2 invC <- solve(C) CSC <- invC %*% (S - C) CSC <- CSC %*% CSC CS <- invC %*% S CS <- CS %*% CS chisqNull <- chisqNull(object) chisq <- object$criterion * (N - (!object$raw)) GFI <- if (!"GFI" %in% fit.indices) NA else if (!object$raw) 1 - sum(diag(CSC))/sum(diag(CS)) else NA Rsq <- if (!object$raw) Rsq(object) else NA if ((!object$raw) && df > 0){ AGFI <- if (!"AGFI" %in% fit.indices) NA else 1 - (n*(n + 1)/(2*df))*(1 - GFI) NFI <- if (!"NFI" %in% fit.indices) NA else (chisqNull - chisq)/chisqNull NNFI <- if (!"NNFI" %in% fit.indices) NA else (chisqNull/dfNull - chisq/df)/(chisqNull/dfNull - 1) L1 <- max(chisq - df, 0) L0 <- max(L1, chisqNull - dfNull) CFI <- if (!"CFI" %in% fit.indices) NA else 1 - L1/L0 RNI <- if (!"RNI" %in% fit.indices) NA else 1 - (chisq - df)/(chisqNull - dfNull) IFI <- if (!"IFI" %in% fit.indices) NA else (chisqNull - chisq)/(chisqNull - df) if (!"RMSEA" %in% fit.indices) { RMSEA <- NA } else { RMSEA <- sqrt(max(object$criterion/df - 1/(N - (!object$raw)), 0)) tail <- (1 - conf.level)/2 max <- N while (max > 1){ res <- optimize(function(lam) (tail - pchisq(chisq, df, ncp=lam))^2, interval=c(0, max)) if (is.na(res$objective) || res$objective < 0){ max <- 0 warning("cannot find upper bound of RMSEA") break } if (sqrt(res$objective) < tail/100) break max <- max/2 } lam.U <- if (max <= 1) NA else res$minimum max <- max(max, 1) while (max > 1){ res <- optimize(function(lam) (1 - tail - pchisq(chisq, df, ncp=lam))^2, interval=c(0, max)) if (sqrt(res$objective) < tail/100) break max <- max/2 if (is.na(res$objective) || res$objective < 0){ max <- 0 warning("cannot find lower bound of RMSEA") break } } lam.L <- if (max <= 1) NA else res$minimum RMSEA.U <- sqrt(lam.U/((N - (!object$raw))*df)) RMSEA.L <- sqrt(lam.L/((N - (!object$raw))*df)) } } else RMSEA.U <- RMSEA.L <- RMSEA <- NFI <- NNFI <- CFI <- AGFI <- RNI <- IFI <- NA if (!is.na(RMSEA)) RMSEA <- c(RMSEA, RMSEA.L, RMSEA.U, conf.level) if (!is.null(object$coeff)){ var.names <- rownames(object$A) ram <- object$ram[object$par.posn, , drop=FALSE] par.code <- paste(var.names[ram[,2]], c('<---', '<-->')[ram[,1]], var.names[ram[,3]]) coeff <- data.frame(object$coeff, se, z, 2*pnorm(abs(z), lower.tail=FALSE), par.code) names(coeff) <- c("Estimate", "Std Error", "z value", "Pr(>|z|)", " ") row.names(coeff) <- names(object$coeff) } else coeff <- NULL AIC <- if (!"AIC" %in% fit.indices) NULL else AIC(object) AICc <- if (!"AICc" %in% fit.indices) NULL else AICc(object) BIC <- if (!"BIC" %in% fit.indices) NULL else BIC(object) CAIC <- if (!"CAIC" %in% fit.indices) NULL else CAIC(object) SRMR <- if (!"SRMR" %in% fit.indices) NA else sqrt(sum(standardizedResiduals(object)^2 * upper.tri(diag(n), diag=TRUE))/(n*(n + 1)/2)) if (robust) { chisq.adjusted <- object$adj.obj$chisq.scaled chisqNull.adjusted <- chisqNull/object$adj.obj$c NFI.adjusted <- if (!"NFI" %in% fit.indices) NULL else (chisqNull.adjusted - chisq)/chisqNull.adjusted NNFI.adjusted <- if (!"NNFI" %in% fit.indices) NULL else (chisqNull.adjusted/dfNull - chisq.adjusted/df)/(chisqNull.adjusted/dfNull - 1) L1 <- max(chisq.adjusted - df, 0) L0 <- max(L1, chisqNull.adjusted - dfNull) CFI.adjusted <- if (!"CFI" %in% fit.indices) NULL else 1 - L1/L0 RNI.adjusted <- if (!"RNI" %in% fit.indices) NULL else 1 - (chisq.adjusted - df)/(chisqNull.adjusted - dfNull) IFI.adjusted <- if (!"IFI" %in% fit.indices) NULL else (chisqNull.adjusted - chisq.adjusted)/(chisqNull.adjusted - df) } else{ chisq.adjusted <- chisqNull.adjusted <- NFI.adjusted <- NNFI.adjusted <- CFI.adjusted <- RNI.adjusted <- IFI.adjusted <- NULL } ans <- list(chisq=chisq, df=df, chisqNull=chisqNull, dfNull=dfNull, GFI=GFI, AGFI=AGFI, RMSEA=RMSEA, NFI=NFI, NNFI=NNFI, CFI=CFI, RNI=RNI, IFI=IFI, BIC=BIC, SRMR=SRMR, AIC=AIC, AICc=AICc, CAIC=CAIC, Rsq=Rsq, chisq.adjusted=chisq.adjusted, chisqNull.adjusted=chisqNull.adjusted, NFI.adjusted=NFI.adjusted, NNFI.adjusted=NNFI.adjusted, CFI.adjusted=CFI.adjusted, RNI.adjusted=RNI.adjusted, IFI.adjusted=IFI.adjusted, norm.res=norm.res, coeff=coeff, digits=digits, iterations=object$iterations, aliased=object$aliased, raw=object$raw, robust=robust, robust.vcov=object$robust.vcov, adj.obj=object$adj.obj) class(ans) <- "summary.objectiveML" ans } print.summary.objectiveML <- function(x, digits=getOption("digits"), ...){ old.digits <- options(digits=digits) on.exit(options(old.digits)) if (x$raw) cat("\nModel fit to raw moment matrix.\n") if (x$robust && !is.null(x$robust.vcov)){ cat("\nSatorra-Bentler Corrected Fit Statistics:\n") cat("\n Corrected Model Chisquare = ", x$chisq.adjusted, " Df = ", x$df, "Pr(>Chisq) =", if (x$df > 0) pchisq(x$chisq.adjusted, x$df, lower.tail=FALSE) else NA) if (!x$raw) { cat("\n Corrected Chisquare (null model) = ", x$chisqNull.adjusted, " Df = ", x$dfNull) } if (x$df > 0 && !x$raw){ if (!is.null(x$NFI.adjusted)) cat("\n Corrected Bentler-Bonett NFI = ", x$NFI.adjusted) if (!is.null(x$NNFI.adjusted)) cat("\n Corrected Tucker-Lewis NNFI = ", x$NNFI.adjusted) if (!is.null(x$CFI.adjusted)) cat("\n Corrected Bentler CFI = ", x$CFI.adjusted) if (!is.null(x$RNI.adjusted)) cat("\n Corrected Bentler RNI = ", x$RNI.adjusted) if (!is.null(x$IFI.adjusted)) cat("\n Corrected Bollen IFI = ", x$IFI.adjusted) } cat("\n\nUncorrected Fit Statistics:\n") x$coeff[,2] <- sqrt(diag(x$robust.vcov)) x$coeff[,3] <- x$coeff[,1]/x$coeff[,2] x$coeff[,4] <- 2*pnorm(abs(x$coeff[,3]), lower.tail=FALSE) colnames(x$coeff)[2] <- "Corrected SE" } if (!is.null(x$chisq)) cat("\n Model Chisquare = ", x$chisq, " Df = ", x$df, "Pr(>Chisq) =", if (x$df > 0) pchisq(x$chisq, x$df, lower.tail=FALSE) else NA) else if (!is.null(x$logLik)) cat("\n Model log-likelihood = ", x$logLik, " Df = ", x$df, "\n") if (!x$raw) { if ((!is.null(x$chisqNULL)) && (!is.na(x$chisqNULL))) cat("\n Chisquare (null model) = ", x$chisqNull, " Df = ", x$dfNull) if (!is.na(x$GFI)) cat("\n Goodness-of-fit index = ", x$GFI) } if (x$df > 0 && !x$raw){ if (!is.na(x$AGFI)) cat("\n Adjusted goodness-of-fit index = ", x$AGFI) if (length(x$RMSEA) > 1 || !is.na(x$RMSEA)) cat("\n RMSEA index = ", x$RMSEA[1], " ", 100*x$RMSEA[4], "% CI: (", x$RMSEA[2], ", ", x$RMSEA[3],")", sep="") if (!is.na(x$NFI)) cat("\n Bentler-Bonett NFI = ", x$NFI) if (!is.na(x$NNFI)) cat("\n Tucker-Lewis NNFI = ", x$NNFI) if (!is.na(x$CFI)) cat("\n Bentler CFI = ", x$CFI) if (!is.na(x$RNI)) cat("\n Bentler RNI = ", x$RNI) if (!is.na(x$IFI)) cat("\n Bollen IFI = ", x$IFI) if (!is.na(x$SRMR)) cat("\n SRMR = ", x$SRMR) } if (!is.null(x$AIC) && !is.na(x$AIC)) cat("\n AIC = ", x$AIC) if (!is.null(x$AICc) && !is.na(x$AICc)) cat("\n AICc = ", x$AICc) if (!is.null(x$BIC) && !is.na(x$BIC)) cat("\n BIC = ", x$BIC) if (!is.null(x$CAIC) && !is.na(x$CAIC)) cat("\n CAIC = ", x$CAIC) if (length(x$norm.res) > 1 || !is.na(x$norm.res)){ cat("\n\n Normalized Residuals\n") print(summary(as.vector(x$norm.res))) } if (!is.na(x$Rsq[1])){ cat("\n R-square for Endogenous Variables\n") print(round(x$Rsq, 4)) } if (!is.null(x$coeff)){ if (x$robust && !is.null(x$robust.vcov)) cat("\n Parameter Estimates (with Robust Standard Errors)\n") else cat("\n Parameter Estimates\n") print(x$coeff, right=FALSE, digits=digits) if (!is.na(x$iterations)) cat("\n Iterations = ", x$iterations, "\n") if (!is.null(x$aliased)) cat("\n Aliased parameters:", x$aliased, "\n") } invisible(x) } summary.objectiveGLS <- function(object, digits=getOption("digits"), conf.level=.90, fit.indices=c("GFI", "AGFI", "RMSEA", "NFI", "NNFI", "CFI", "RNI", "IFI", "SRMR"), ...){ fit.indices <- if (missing(fit.indices)){ getOption("fit.indices") } else if (fit.indices[1] != "") match.arg(fit.indices, several.ok=TRUE) summary <- summary.objectiveML(object, digits=digits, conf.level=conf.level, analytic.se=FALSE, fit.indices=fit.indices, ...) S <- object$S Sinv <- solve(S) C <- object$C SinvSmC <- Sinv %*% (S - C) SinvS <- Sinv %*% S n <- object$n if ("GFI" %in% fit.indices) summary$GFI <- 1 - sum(diag(SinvSmC %*% SinvSmC))/sum(diag(SinvS %*% SinvS)) if ("AGFI" %in% fit.indices) summary$AGFI <- 1 - (n*(n + 1)/(2*summary$df))*(1 - summary$GFI) summary } deviance.objectiveML <- function(object, ...) object$criterion * (object$N - (!object$raw)) df.residual.sem <- function(object, ...) { n.fix <- object$n.fix n <- object$n t <- object$t n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2 } Rsq <- function(model){ A <- model$A P <- model$P IAinv <- solve(diag(nrow(A)) - A) C <- IAinv %*% P %*% t(IAinv) R2 <- 1 - diag(P)/diag(C) R2 <- R2[classifyVariables(model$semmod)$endogenous] R2 } sem/R/residuals.R0000644000175000017500000000212011701252753013505 0ustar nileshnilesh# last modified 2011-11-04 by J. Fox residuals.sem <- function(object, ...){ object$S - object$C } standardized.residuals <- function(...){ .Deprecated("standardizedResiduals", package="sem") standardizedResiduals(...) } standardizedResiduals <- function(object, ...){ UseMethod("standardizedResiduals") } standardizedResiduals.sem <- function(object, ...){ res <- residuals(object) s <- diag(object$S) res/sqrt(outer(s, s)) } normalized.residuals <- function(...){ .Deprecated("normalizedResiduals", package="sem") normalizedResiduals(...) } normalizedResiduals <- function(object, ...){ UseMethod("normalizedResiduals") } normalizedResiduals.objectiveML <- function(object, ...){ res <- residuals(object) N <- object$N - (!object$raw) C <- object$C c <- diag(C) res/sqrt((outer(c,c) + C^2)/N) } normalizedResiduals.objectiveGLS <- function(object, ...){ res <- residuals(object) N <- object$N - (!object$raw) S <- object$S s <- diag(S) res/sqrt((outer(s,s) + S^2)/N) } sem/R/rawMoments.R0000644000175000017500000000334212005517445013655 0ustar nileshnilesh# last modified 2012-07-29 by J. Fox raw.moments <- function(...){ .Deprecated("rawMoments", package="sem") rawMoments(...) } rawMoments <- function(object, ...) UseMethod("rawMoments") rawMoments.formula <- function(formula, data, subset, na.action, contrasts = NULL, ...) { if (missing(na.action)) na.action <- options()$na.action m <- match.call(expand.dots = FALSE) if (is.matrix(eval(m$data, sys.frame(sys.parent())))) m$data <- as.data.frame(data) m$instruments <- m$contrasts <- NULL m[[1]] <- as.name("model.frame") mf <- eval(m, sys.frame(sys.parent())) response <- attr(attr(mf, "terms"), "response") if (response) stop("formula cannot have a response") na.act <- attr(mf, "na.action") X <- model.matrix(formula, data = mf, contrasts) nms <- colnames(X) if ("(Intercept)" %in% nms) colnames(X)[nms == "(Intercept)"] <- "Intercept" rawMoments(X) } rawMoments.default <- function(object, na.rm=FALSE, ...){ object <- as.matrix(object) if (na.rm) object <- na.omit(object) N <- nrow(object) result <- crossprod(object, object)/N attr(result, "N") <- N class(result) <- "rawmoments" result } print.rawmoments <- function(x, ...){ xx <- unclass(x) attr(xx, "N") <- NULL cat("\nRaw Moments\n") print(xx, ...) cat("\nN = ", attr(x, "N"), "\n") invisible(x) } cov2raw <- function(cov, mean, N, sd){ if (all(1 == diag(cov)) && !missing(sd)) cov <- cov * outer(sd, sd) raw <- ((N - 1)*cov + N*outer(mean, mean))/N colnames(raw) <- rownames(raw) <- rownames(cov) raw <- rbind(c(1, mean), cbind(mean, raw)) if (!("Intercept" %in% rownames(raw))) colnames(raw)[1] <- rownames(raw)[1] <- "Intercept" attr(raw, "N") <- N class(raw) <- "rawmoments" raw } sem/R/miSem.R0000644000175000017500000001535112535651426012604 0ustar nileshnilesh# last modified 2015-06-09 by J. Fox # some changes by Benjamin K Goodrich 2015-01-20 miSem <- function(model, ...){ UseMethod("miSem") } miSem.semmod <- function(model, ..., data, formula = ~., raw=FALSE, fixed.x=NULL, objective=objectiveML, n.imp=5, n.chains=n.imp, n.iter=30, seed=sample(1e6, 1), mi.args=list(), show.progress=TRUE){ cls <- gsub("\\.", "", deparse(substitute(objective))) cls <- gsub("2", "", cls) cls <- c(cls, "sem") warn <- options(warn=-1) on.exit(options(warn)) initial.fit <- sem(model, ..., data=data, formula=formula, raw=raw, fixed.x=fixed.x, objective = if (raw) objectiveFIML else objective) options(warn) class(initial.fit) <- if (raw) c("objectiveFIML", "sem") else cls coefficients <- coefficients(initial.fit) coef.names <- names(coefficients) var.names <- initial.fit$var.names ram <- initial.fit$ram ram[coef.names, "start value"] <- coefficients N <- nrow(data) if (!is.null(fixed.x)) fixed.x <- apply(outer(var.names, fixed.x, "=="), 2, which) mi.args$n.chains <- n.chains mi.args$n.iter <- n.iter mi.args$seed <- seed mi.args$y <- data if (show.progress) cat("\n Beginning", n.imp, "imputations\n") mi.data <- do.call("mi", mi.args) if (show.progress) cat("\n Imputations complete\n") # has.tcltk <- require("tcltk") # if (has.tcltk) pb <- tkProgressBar("Fitting", "Imputation no.: ", 0, n.imp) if (show.progress) { cat("\n Fitting model to imputations:\n") pb <- txtProgressBar(min=0, max=n.imp, style=3) } fits <- complete(mi.data, m = n.imp, include_missing = FALSE) for (i in seq_along(fits)) { # if (has.tcltk) setTkProgressBar(pb, i, label=sprintf("Imputation no.: %d", i)) if (show.progress) setTxtProgressBar(pb, i) data.i <- model.frame(formula, data=fits[[i]]) data.i <- model.matrix(formula, data=fits[[i]]) colnames(data.i)[colnames(data.i) == "(Intercept)"] <- "Intercept" S <- if (raw) rawMoments(data.i) else { data.i <- data.i[, colnames(data.i) != "Intercept"] cov(data.i) } fit <- sem(ram, S=S, N=N, data=data.i, raw=raw, param.names=coef.names, var.names=var.names, fixed.x=fixed.x, optimizer=initial.fit$optimizer, objective=objective, ...) class(fit) <- cls fits[[i]] <- fit } # if (has.tcltk) close(pb) if (show.progress) close(pb) result <- list(initial.fit=initial.fit, mi.fits=fits, imputations=mi.data, seed=seed, mi.data=mi.data) class(result) <- "miSem" result } miSem.semmodList <- function(model, ..., data, formula = ~., group, raw=FALSE, fixed.x=NULL, objective=msemObjectiveML, n.imp=5, n.chains=n.imp, n.iter=30, seed=sample(1e6, 1), mi.args=list(), show.progress=TRUE){ if (missing(formula)) formula <- as.formula(paste("~ . -", group)) warn <- options(warn=-1) on.exit(options(warn)) initial.fit <- sem(model, ..., data=na.omit(data), formula=formula, group=group, raw=raw, fixed.x=fixed.x, objective = objective) options(warn) coefficients <- coefficients(initial.fit) coef.names <- names(coefficients) var.names <- initial.fit$var.names ram <- initial.fit$ram groups <- initial.fit$groups group <- initial.fit$group G <- length(groups) for (g in 1:G){ pars <- ram[[g]][, "parameter"] free <- pars != 0 ram[[g]][free, "start value"] <- coefficients[pars[free]] } mi.args$n.chains <- n.chains mi.args$n.iter <- n.iter mi.args$seed <- seed mi.args$y <- data if (show.progress) cat("\n Beginning", n.imp, "imputations\n") mi.data <- do.call("mi", mi.args) if (show.progress) cat("\n Imputations complete\n") fits <- complete(mi.data, m = n.imp, include_missing = FALSE) # has.tcltk <- require("tcltk") # if (has.tcltk) pb <- tkProgressBar("Fitting", "Imputation no.: ", 0, n.imp) if (show.progress) { cat("\n Fitting model to imputations:\n") pb <- txtProgressBar(min=0, max=n.imp, style=3) } for (i in 1:n.imp){ # if (has.tcltk) setTkProgressBar(pb, i, label=sprintf("Imputation no.: %d", i)) if (show.progress) setTxtProgressBar(pb, i) data.i <- fits[[i]] group.i <- data.i[, group] data.i <- model.frame(formula, data=data.i) data.i <- model.matrix(formula, data=data.i) colnames(data.i)[colnames(data.i) == "(Intercept)"] <- "Intercept" S <- data.out <- vector(G, mode="list") N <- numeric(G) for (g in 1:G){ data.g <- data.i[group.i == groups[g], ] data.out[[g]] <- data.g N[g] <- nrow(data.g) S[[g]] <- if (raw) rawMoments(data.g) else { data.g <- data.g[, colnames(data.g) != "Intercept"] cov(data.g) } } fit <- sem(ram, S=S, N=N, group=group, groups=groups, raw=raw, data=data.out, fixed.x=initial.fit$fixed.x, param.names=coef.names, var.names=var.names, optimizer=initial.fit$optimizer, objective=objective, ...) fits[[i]] <- fit } # if (has.tcltk) close(pb) if (show.progress) close(pb) result <- list(initial.fit=initial.fit, mi.fits=fits, imputations=mi.data, seed=seed, mi.data=mi.data) class(result) <- "miSem" result } print.miSem <- function(x, ...){ coefs <- sapply(x$mi.fits, coef) vars <- sapply(x$mi.fits, function(x) diag(vcov(x))) table <- matrix(0, NROW(coefs), 4) table[, 1] <- rowMeans(coefs) ses <- sqrt(rowMeans(vars) + apply(coefs, 1, var) * (1 + 1/NCOL(coefs))) table[, 2] <- ses table[, 3] <- table[, 1]/table[, 2] table[, 4] <- 2*pnorm(abs(table[, 3]), lower.tail=FALSE) rownames(table) <- rownames(coefs) cat("\nCoefficients:\n") colnames(table) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)") printCoefmat(table, ...) invisible(x) } summary.miSem <- function(object, digits=max(3, getOption("digits") - 2), ...){ coefs <- lapply(object$mi.fits, coef) table <- do.call("cbind", coefs) rownames(table) <- names(coefs[[1]]) table <- cbind(table, rowMeans(table), coef(object$initial.fit)) colnames(table) <- c(paste("Imputation", 1:length(coefs)), "Averaged", "Initial Fit") result <- list(object=object, mi.results=table, digits=digits) class(result) <- "summary.miSem" result } print.summary.miSem <- function(x, ...){ cat("\nCoefficients by imputation:\n") print(x$mi.results, digits=x$digits, ...) print(x$object, digits=x$digits, ...) invisible(x) } sem/R/objectiveGLS.R0000644000175000017500000000113012042646337014036 0ustar nileshnilesh# last modified 2012-01-06 by J. Fox # Modified for Compiled code in C/C++ by Zhenghua Nie. objectiveGLS <- function (gradient = FALSE) { result <- list(objective = function(par, model.description) { with(model.description, { res <- CompiledObjective(par=par, model.description=model.description, objective="objectiveGLS", gradient=gradient) f <- res$f C <- res$C A <- res$A P <- res$P attributes(f) <- list(C = C, A = A, P = P) f } ) } ) class(result) <- "semObjective" result } sem/R/standardizedCoefficients.R0000644000175000017500000000371312026676605016530 0ustar nileshnilesh# modified 24 May 2009 by Adam Kramer (original by J. Fox) # last modified 2011-08-10 by J. Fox standardized.coefficients <- function(...){ .Deprecated("standardizedCoefficients", package="sem") standardizedCoefficients(...) } standardizedCoefficients <- function(object, ...){ UseMethod("standardizedCoefficients") } standardizedCoefficients.sem <- function (object, digits = getOption("digits"), oneheaded = TRUE, twoheaded = TRUE, ...) { if (!oneheaded && !twoheaded) { stop("No coefficients requested.") } old.digits <- options(digits = digits) on.exit(options(old.digits)) P <- object$P A <- object$A t <- object$t par <- object$coeff par.posn <- object$par.posn IAinv <- solve(diag(nrow(A)) - A) C <- IAinv %*% P %*% t(IAinv) ram <- object$ram par.names <- rep(" ", nrow(ram)) for (i in 1:t) { which.par <- ram[, 4] == i ram[which.par, 5] <- par[i] par.names[which.par] <- names(par)[i] } coeff <- ram[, 5] if (oneheaded) { one.head <- ram[, 1] == 1 coeff[one.head] <- coeff[one.head] * sqrt(diag(C[ram[one.head, 3], ram[one.head, 3], drop = FALSE])/diag(C[ram[one.head, 2], ram[one.head, 2], drop = FALSE])) } if (twoheaded) { two.head <- ram[, 1] == 2 coeff[two.head] <- coeff[two.head]/sqrt(diag(C[ram[two.head, 3], ram[two.head, 3], drop = FALSE]) * diag(C[ram[two.head, 2], ram[two.head, 2], drop = FALSE])) } var.names <- rownames(A) par.code <- paste(var.names[ram[, 2]], c("<---", "<-->")[ram[, 1]], var.names[ram[, 3]]) coeff <- data.frame(par.names, coeff, par.code) colnames(coeff) <- c(" ", "Std. Estimate", " ") if (oneheaded && twoheaded) { coeff } else if (oneheaded) { coeff[one.head, ] } else { coeff[two.head, ] } } std.coef <- function(...){ .Deprecated("stdCoef", package="sem") standardizedCoefficients(...) } stdCoef <- function (...){ standardizedCoefficients(...) } sem/R/bootSem.R0000644000175000017500000001601412535651426013137 0ustar nileshnilesh# bootstrapped standard errors and confidence intervals for sem # last modified 2015-06-09 by J. Fox boot.sem <- function(...) { .Deprecated("bootSem", package="sem") bootSem(...) } bootSem <- function (model, ...){ UseMethod("bootSem") } bootSem.sem <- function(model, R=100, Cov=cov, data=model$data, max.failures=10, show.progress=TRUE, ...){ refit <- function(){ indices <- sample(N, N, replace=TRUE) S <- Cov(data[indices,]) refitted.model <- sem(ram, S, N, param.names=coef.names, var.names=var.names, optimizer=model$optimizer, objective=model$objective, ...) refitted.model$coeff } # if (!require("boot")) stop("package boot not available") # has.tcltk <- require("tcltk") # pb <- tkProgressBar("Bootstrap Sampling", "Bootstrap sample: ", 0, R) if (show.progress){ cat("\n", R, "bootstrap replications\n") pb <- txtProgressBar(min=0, max=R, style=3) } # the following 2 lines borrowed from boot in package boot if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE) warn <- options(warn=-2) on.exit(options(warn)) # insure restore even in event of error nErrors <- 0 if (is.null(data)) stop("the model object doesn't contain a data matrix") N <- nrow(data) coefficients <- model$coeff coef.names <- names(coefficients) var.names <- model$var.names ram <- model$ram ram[coef.names, "start value"] <- coefficients coefs <- matrix(numeric(0), R, length(coefficients)) colnames(coefs) <- coef.names for (b in 1:R){ # setTkProgressBar(pb, b, label=sprintf("Bootstrap sample: %d", b)) if (show.progress) setTxtProgressBar(pb, b) for (try in 1:(max.failures + 1)){ if (try > max.failures) stop("more than ", max.failures, " consecutive convergence failures") res <- try(refit(), silent=TRUE) if (inherits(res, "try-error")) nErrors <- nErrors + 1 else { coefs[b,] <- res break() } } } options(warn) if (nErrors > 0) warning("there were", nErrors, "apparent convergence failures;\nthese are discarded from the", R, "bootstrap replications returned") res <- list(t0=coefficients, t=coefs, R=R, data=data, seed=seed, statistic=refit, sim="ordinary", stype="i", call=match.call(), strata=rep(1, N), weights=rep(1/N, N)) res$call[[1]] <- as.name("bootSem") if (show.progress) close(pb) class(res) <- c("bootsem", "boot") res } bootSem.msem <- function(model, R=100, Cov=cov, data=model$data, max.failures=10, show.progress=TRUE, ...){ refit <- function(){ for (g in 1:G){ indices <- sample(N[g], N[g], replace=TRUE) S[[g]] <- Cov(data[[g]][indices, ]) } refitted.model <- sem(ram, S, N, param.names=coef.names, var.names=var.names, optimizer=model$optimizer, objective=model$objective, fixed.x=model$fixed.x, ...) refitted.model$coeff } # if (!require("boot")) stop("package boot not available") # has.tcltk <- require("tcltk") # pb <- tkProgressBar("Bootstrap Sampling", "Bootstrap sample: ", 0, R) if (show.progress){ cat("\n", R, "bootstrap replications\n") pb <- txtProgressBar(min=0, max=R, style=3) } # the following 2 lines borrowed from boot in package boot if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE) warn <- options(warn=-2) on.exit(options(warn)) # insure restore even in event of error nErrors <- 0 if (is.null(data)) stop("the model object doesn't contain data matrices") N <- sapply(data, nrow) coefficients <- model$coeff coef.names <- names(coefficients) var.names <- model$var.names ram <- model$ram groups <- model$groups group <- model$group G <- length(groups) S <- vector(G, mode="list") for (g in 1:G){ pars <- ram[[g]][, "parameter"] free <- pars != 0 ram[[g]][free, "start value"] <- coefficients[pars[free]] } coefs <- matrix(numeric(0), R, length(coefficients)) colnames(coefs) <- coef.names for (b in 1:R){ # setTkProgressBar(pb, b, label=sprintf("Bootstrap sample: %d", b)) if (show.progress) setTxtProgressBar(pb, b) for (try in 1:(max.failures + 1)){ if (try > max.failures) stop("more than ", max.failures, " consecutive convergence failures") res <- try(refit(), silent=TRUE) if (inherits(res, "try-error")) nErrors <- nErrors + 1 else { coefs[b,] <- res break() } } } options(warn) if (nErrors > 0) warning("there were", nErrors, "apparent convergence failures;\nthese are discarded from the", R, "bootstrap replications returned") res <- list(t0=coefficients, t=coefs, R=R, data=data, seed=seed, statistic=refit, sim="ordinary", stype="i", call=match.call(), strata=rep(1:G, N), weights=rep(1/N, N)) res$call[[1]] <- as.name("bootSem") if (show.progress) close(pb) class(res) <- c("bootsem", "boot") res } print.bootsem <- function(x, digits=getOption("digits"), ...){ t <- x$t t0 <- x$t0 result <- data.frame("Estimate"=t0, "Bias"=colMeans(t) - t0, "Std.Error"=apply(t, 2, sd)) rownames(result) <- names(t0) cat("Call: ") dput(x$call) cat("\n") print(result, digits=digits) invisible(x) } summary.bootsem <- function(object, type=c("perc", "bca", "norm", "basic", "none"), level=0.95, ...){ type <- match.arg(type) t <- object$t t0 <- object$t0 result <- data.frame("Estimate"=t0, "Bias"=colMeans(t) - t0, "Std.Error"=apply(t, 2, sd)) if (type != "none"){ p <- length(t0) lower <- upper <- rep(0, p) low <- if (type == "norm") 2 else 4 up <- if (type == "norm") 3 else 5 for (i in 1:p){ ci <- as.vector(boot.ci(object, type=type, index=i, conf=level)[[type, exact=FALSE]]) lower[i] <- ci[low] upper[i] <- ci[up] } result$Lower <- lower result$Upper <- upper } rownames(result) <- names(t0) result <- list(table=result, call=object$call, level=level, type=type) class(result) <- "summary.bootsem" result } print.summary.bootsem <- function(x, digits=getOption("digits"), ...){ cat("Call: ") dput(x$call) cat("\n") if (x$type != "none") { cat(paste("Lower and upper limits are for the", 100*x$level, "percent", x$type, "confidence interval\n\n")) } print(x$table, digits=digits) invisible(return(x)) } sem/R/optimizerNlm.R0000644000175000017500000000372513563624470014226 0ustar nileshnilesh# last modified 2019-11-15 by J. Fox optimizerNlm <- function(start, objective=objectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn, ...){ with(model.description, { obj <- objective(gradient=gradient)$objective typsize <- if (par.size == 'startvalues') abs(start) else rep(1, t) if (!warn) save.warn <- options(warn=-1) res <- nlm(obj, start, iterlim=maxiter, print.level=if(debug) 2 else 0, typsize=typsize, hessian=TRUE, model.description, ...) if (!warn) options(save.warn) result <- list() result$convergence <- res$code <= 2 result$iterations <- res$iterations par <- res$estimate names(par) <- param.names result$par <- par if (!result$convergence) warning(paste('Optimization may not have converged; nlm return code = ', res$code, '. Consult ?nlm.\n', sep="")) vcov <- matrix(NA, t, t) qr.hess <- try(qr(res$hessian), silent=TRUE) if (inherits(qr.hess, "try-error")){ warning("Could not compute QR decomposition of Hessian.\nOptimization probably did not converge.\n") } else if (qr.hess$rank < t){ warning(' singular Hessian: model is probably underidentified.\n') which.aliased <- qr.hess$pivot[-(1:qr.hess$rank)] result$aliased <- param.names[which.aliased] } else { vcov <- (2/(N - (!raw))) * solve(res$hessian) if (any(diag(vcov) < 0)) { result$aliased <- param.names[diag(vcov) < 0] warning("Negative parameter variances.\nModel may be underidentified.\n") } } colnames(vcov) <- rownames(vcov) <- param.names result$vcov <- vcov result$criterion <- res$minimum # c(result$obj) - n - log(det(S)) obj <- obj(par, model.description) C <- attr(obj, "C") rownames(C) <- colnames(C) <- var.names[observed] result$C <- C A <- attr(obj, "A") rownames(A) <- colnames(A) <- var.names result$A <- A P <- attr(obj, "P") rownames(P) <- colnames(P) <- var.names result$P <- P class(result) <- "semResult" result } ) }sem/R/objectiveML.R0000644000175000017500000000276212042646337013735 0ustar nileshnilesh# last modified 2012-01-06 by J. Fox # Modified for Compiled code in C/C++ by Zhenghua Nie. objectiveML <- function(gradient=TRUE, hessian=FALSE){ result <- list( objective = function(par, model.description){ with(model.description, { res <- CompiledObjective(par=par, model.description=model.description, objective="objectiveML",gradient=gradient, hessian=hessian) f <- res$f C <- res$C A <- res$A P <- res$P grad <- NULL if(gradient) grad <- res$gradient hess <- NULL if(hessian) hess <- res$hessian attributes(f) <- list(C=C, A=A, P=P, gradient=grad, hessian=hess) f } ) } ) if (gradient) result$gradient <- function(par, model.description){ with(model.description, { res <- CompiledObjective(par=par, model.description=model.description, objective="objectiveML",hessian=hessian) A <- res$A P <- res$P C <- res$C grad <- res$gradient attributes(grad) <- list(C=C, A=A, P=P, gradient=grad) grad } ) } if (hessian) result$hessian <- function(par, model.description){ with(model.description, { res <- CompiledObjective(par=par, model.description=model.description, objective="objectiveML",hessian=hessian) A <- res$A P <- res$P C <- res$C hess <- res$hessian attributes(grad) <- list(C=C, A=A, P=P, hessian=hess) grad } ) } class(result) <- "semObjective" result } sem/R/optimizerOptim.R0000644000175000017500000000433213563624470014563 0ustar nileshnilesh# last modified 2019-11-15 by J. Fox optimizerOptim <- function(start, objective=objectiveML, gradient=TRUE, maxiter, debug, par.size, model.description, warn, method="CG", ...){ with(model.description, { obj <- objective(gradient=gradient) grad <- if (gradient) obj$gradient else NULL obj <- obj$objective typsize <- if (par.size == 'startvalues') abs(start) else rep(1, t) if (!warn) save.warn <- options(warn=-1) res <- optim(start, obj, gr=grad, hessian=TRUE, control=list(maxit=maxiter, trace=if(debug) 6 else 0, parscale=typsize), method=method, ..., model.description) if (!warn) options(save.warn) result <- list() result$convergence <- res$convergence == 0 result$iterations <- if (res$convergence == 1) maxiter else NA par <- res$par names(par) <- param.names result$par <- par if (!result$convergence) warning(paste('Optimization may not have converged; optim return code = ', res$convergence, '. Consult ?optim.\n', sep="")) vcov <- matrix(NA, t, t) qr.hess <- try(qr(res$hessian), silent=TRUE) if (inherits(qr.hess, "try-error")){ warning("Could not compute QR decomposition of Hessian.\nOptimization probably did not converge.\n") } else if (qr.hess$rank < t){ warning(' singular Hessian: model is probably underidentified.\n') which.aliased <- qr.hess$pivot[-(1:qr.hess$rank)] result$aliased <- param.names[which.aliased] } else { vcov <- (2/(N - (!raw))) * solve(res$hessian) if (any(diag(vcov) < 0)) { result$aliased <- param.names[diag(vcov) < 0] warning("Negative parameter variances.\nModel may be underidentified.\n") } } colnames(vcov) <- rownames(vcov) <- param.names result$vcov <- vcov result$criterion <- res$value if (!raw) { CC <- diag(diag(S)) result$chisqNull <- (N - 1) * (sum(diag(S %*% solve(CC))) + log(det(CC)) - log(det(S)) - n) } obj <- obj(par, model.description) C <- attr(obj, "C") rownames(C) <- colnames(C) <- var.names[observed] result$C <- C A <- attr(obj, "A") rownames(A) <- colnames(A) <- var.names result$A <- A P <- attr(obj, "P") rownames(P) <- colnames(P) <- var.names result$P <- P class(result) <- "semResult" result } ) }sem/R/objectiveGLS2.R0000644000175000017500000000132511701663125014121 0ustar nileshnilesh# last modified 2012-01-06 by J. Fox objectiveGLS2 <- function (gradient = FALSE) { result <- list(objective = function(par, model.description) { with(model.description, { A <- P <- matrix(0, m, m) val <- ifelse(fixed, ram[, 5], par[sel.free]) A[arrows.1] <- val[one.head] P[arrows.2t] <- P[arrows.2] <- val[!one.head] I.Ainv <- solve(diag(m) - A) C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) # Cinv <- solve(C) SS <- invS %*% (S - C) f <- 0.5 * sum(diag(SS %*% SS)) attributes(f) <- list(C = C, A = A, P = P) f }) }) class(result) <- "semObjective" result }sem/R/fscores.R0000644000175000017500000000133111701252753013161 0ustar nileshnilesh# last modified 2011-08-06 by J. Fox fscores <- function(model, ...){ UseMethod("fscores") } fscores.sem <- function(model, data=model$data, center=TRUE, scale=FALSE, ...){ m <- model$m P <- model$P A <- model$A var.names <- model$var.names observed <- var.names %in% rownames(model$C) if (all(observed)) stop("there are no latent variables") IAinv <- solve(diag(m) - A) Sigma <- IAinv %*% P %*% t(IAinv) B <- solve(Sigma[observed, observed]) %*% Sigma[observed, !observed] rownames(B) <- var.names[observed] colnames(B) <- var.names[!observed] if (is.null(data)) return(B) X <- as.matrix(data[,var.names[observed]]) if (center || scale) X <- scale(X, center=center, scale=scale) X %*% B }sem/R/from-matrixcalc.R0000644000175000017500000000474214126352057014620 0ustar nileshnilesh# The following functions were originally in the matrixcalc package # written by Frederick Novomestky # and distributed under the GPL license version 2 or higher # last modified 2021-06-02 duplication.matrix <- function (n = 1) { return(D.matrix(n)) } D.matrix <- function (n) { if (missing(n)) stop("argument n is missing") if (!is.numeric(n)) stop("argument n is not numeric") if (n != trunc(n)) stop("argument n is not an integer") if (n < 2) stop("argument n is less than 2") p <- n * (n + 1)/2 nsq <- n * n Dt <- matrix(0, nrow = p, ncol = nsq) T <- T.matrices(n) u <- u.vectors(n) k <- u$k I <- u$I for (j in 1:n) { for (i in j:n) { Dt <- Dt + I[, k[i, j]] %*% t(vec(T[[i]][[j]])) } } return(t(Dt)) } vech <- function (x) { if (!is.square.matrix(x)) stop("argument x is not a square numeric matrix") return(t(t(x[!upper.tri(x)]))) } is.square.matrix <- function (x) { if (!is.matrix(x)) stop("argument x is not a matrix") return(nrow(x) == ncol(x)) } T.matrices <- function (n) { if (missing(n)) stop("argument n is missing") if (!is.numeric(n)) stop("argument n is not numeric") if (n != trunc(n)) stop("argument n is not an integer") if (n < 2) stop("argument n is less than 2") E <- E.matrices(n) T <- list() for (i in 1:n) { T[[i]] <- list() for (j in 1:n) { if (i == j) { T[[i]][[j]] <- E[[i]][[j]] } else { T[[i]][[j]] <- E[[i]][[j]] + E[[j]][[i]] } } } return(T) } u.vectors <- function (n) { if (n != trunc(n)) stop("argument n is not an integer") if (n < 2) stop("argument n is less than 2") p <- n * (n + 1)/2 I <- diag(rep(1, p)) k <- matrix(0, nrow = n, ncol = n) for (j in 1:n) { for (i in j:n) { k[i, j] <- (j - 1) * n + i - 0.5 * j * (j - 1) } } return(list(k = k, I = I)) } vec <- function (x) { if (!is.matrix(x)) { stop("argument x is not a matrix") } if (!is.numeric(x)) { stop("argument x is not a numeric matrix") } return(t(t(as.vector(x)))) } E.matrices <- function (n) { if (missing(n)) stop("argument n is missing") if (!is.numeric(n)) stop("argument n is not numeric") if (n != trunc(n)) stop("argument n is not an integer") if (n < 2) stop("argument n is less than 2") I <- diag(rep(1, n)) E <- list() for (i in 1:n) { E[[i]] <- list() for (j in 1:n) { E[[i]][[j]] <- I[i, ] %o% I[j, ] } } return(E) } sem/R/objectiveFIML2.R0000644000175000017500000004537312070203746014235 0ustar nileshnilesh# last modified 2012-10-25 by J. Fox ## this is the straightforward approach summing over observations: # objectiveFIML2 <- function(gradient=FALSE){ # result <- list( # objective = function(par, model.description){ # with(model.description, { # A <- P <- matrix(0, m, m) # val <- ifelse (fixed, ram[,5], par[sel.free]) # A[arrows.1] <- val[one.head] # P[arrows.2t] <- P[arrows.2] <- val[!one.head] # I.Ainv <- solve(diag(m) - A) # C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) # f <- 0 # log.2pi <- log(2*pi) # for (i in 1:nrow(data)){ # sel <- valid[i, ] # x <- data[i, sel] # CC <- C[sel, sel] # f <- f + log(det(CC)) + as.vector(x %*% solve(CC) %*% x) + log.2pi*(sum(sel)) # } # f <- f/N # attributes(f) <- list(C=C, A=A, P=P) # f # }) # } # ) # class(result) <- "semObjective" # result # } ## this avoids duplication by using the svd for the inverse and determinant but is actually slower in R # objectiveFIML2 <- function(gradient=FALSE){ # result <- list( # objective = function(par, model.description){ # with(model.description, { # A <- P <- matrix(0, m, m) # val <- ifelse (fixed, ram[,5], par[sel.free]) # A[arrows.1] <- val[one.head] # P[arrows.2t] <- P[arrows.2] <- val[!one.head] # I.Ainv <- solve(diag(m) - A) # C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) # f <- 0 # log.2pi <- log(2*pi) # for (i in 1:nrow(data)){ # sel <- valid[i, ] # x <- data[i, sel] # CC <- C[sel, sel] # svdCC <-svd(CC) # f <- f + log(prod(svdCC$d)) + as.vector(x %*% svdCC$v %*% diag(1/svdCC$d) %*% t(svdCC$u) %*% x) + log.2pi*(sum(sel)) # } # f <- f/N # attributes(f) <- list(C=C, A=A, P=P) # f # }) # } # ) # class(result) <- "semObjective" # result # } ## this caches results for distinct valid-data patterns but still sums over observations: # objectiveFIML2 <- function(gradient=FALSE){ # result <- list( # objective = function(par, model.description){ # with(model.description, { # A <- P <- matrix(0, m, m) # val <- ifelse (fixed, ram[,5], par[sel.free]) # A[arrows.1] <- val[one.head] # P[arrows.2t] <- P[arrows.2] <- val[!one.head] # I.Ainv <- solve(diag(m) - A) # C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) # f <- 0 # log.2pi <- log(2*pi) # CC <- vector(length(unique.patterns), mode="list") # names(CC) <- unique.patterns # log.det.plus <- CC # for (i in 1:nrow(data)){ # sel <- valid[i, ] # if (is.null(log.det.plus[[valid.pattern[i]]])){ # log.det.plus[[valid.pattern[i]]] <- log(det(C[sel, sel])) + log.2pi*(sum(sel)) # CC[[valid.pattern[i]]] <- solve(C[sel, sel]) # } # x <- data[i, sel] # f <- f + as.vector(x %*% CC[[valid.pattern[i]]] %*% x) + log.det.plus[[valid.pattern[i]]] # } # f <- f/N # attributes(f) <- list(C=C, A=A, P=P) # f # }) # } # ) # class(result) <- "semObjective" # result # } ## this sums over distinct valid data patterns rather than observations: objectiveFIML2 <- function(gradient=TRUE, hessian=FALSE){ ## the following two functions are commented since right now we require the package matriccalc when loading sem. # commutation <- function( m, n ) # { # this function return a permutation matrix T_(m, n) which # have the following definition: # T_(m, n)vec(A) = vec(A^T) # where A is a m-by-n matrix. # T_(m, n) is an orthogonal matrix. T_(m, n)=T(n, m)^T # Magnus, J. R. and H. Neudecker (1979). The commutation matrix: some properties and applica- tions, The Annals of Statistics, 7(2), 381-394. # a square mn-by-mn matrix partioned into mn submatrices of n-by-m such that the i, jth matrix has a 1 in its j, ith position, others are zeros. # p <- m * n # C <- matrix( 0, nrow=p, ncol=p ) # r <- 0 # for ( i in 1:m ) { # c <- i # the ith row submatrix # for ( j in 1:n ) { # the ith row, jth column submatrix # r <- r + 1 # C[r,c] <- 1 # the (j, i) enttry # c <- c + m # for the next submatrix # } # } # return( C ) # } # vec <- function( x ) # { ## a matrix (m, n) to a vec matrix (1*(m*n)) # return( t( t( as.vector( x ) ) ) ) # } extendMatrix <- function(selA, sel) { nsel <- length(sel); A <- matrix(data=0, nrow=nsel, ncol=nsel); B <- matrix(data=0, nrow=nsel, ncol=ncol(selA)); iselA <- 1 #extend row for(i in (1:nsel)) { if(sel[i]) { B[i, ] = selA[iselA, ]; iselA <- iselA + 1; } } #extend column iselA <- 1; for(i in (1:nsel)) { if(sel[i]) { A[, i]=B[, iselA]; iselA <- iselA + 1; } } A } result <- list( objective = function(par, model.description){ with(model.description, { A <- P <- matrix(0, m, m) val <- ifelse (fixed, ram[,5], par[sel.free]) A[arrows.1] <- val[one.head] P[arrows.2t] <- P[arrows.2] <- val[!one.head] I.Ainv <- solve(diag(m) - A) C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) f <- 0 grad <- NULL dfdC <- matrix(data=0.0, nrow=nrow(C), ncol=ncol(C)) log.2pi <- log(2*pi) n.pat <- nrow(valid.data.patterns) for (i in 1:n.pat){ sel <- valid.data.patterns[i, ] X <- data[pattern.number == i, sel, drop=FALSE] Ci <- C[sel, sel] Cinv <- solve(Ci) # note: in the matrix product X %*% solve(C[sel, sel]) %*% t(X)) # only the trace is required and so it isn't necessary to form the whole product # f <- f + sum(diag(X %*% Cinv %*% t(X))) + nrow(X)*(log.2pi + log(det(Ci))) for (j in 1:nrow(X)){ f <- f + as.vector(X[j, ] %*% Cinv %*% X[j, ]) } f <- f + nrow(X)*(log.2pi + log(det(Ci))) #gradient if(gradient) { dfdCi <- nrow(X)*t(vec(t(Cinv))) - t(vec(t(X))) %*% (X %x% diag(nrow(Cinv))) %*% (t(Cinv) %x% Cinv) dfdCiExtend <- extendMatrix(matrix(dfdCi, nrow(Cinv), ncol(Cinv)), sel); dfdC <- dfdC + dfdCiExtend; } } f <- f/N if(gradient) { dfdC <- t(vec(dfdC/N)) #dfdC is a 1*(n^2) matrix. dCdP <- (J %*% I.Ainv) %x% (J %*% I.Ainv) B <- J %*% I.Ainv dBdA <- (diag(m) %x% J) %*% (t(I.Ainv) %x% I.Ainv) Tmn <- commutation.matrix(nrow(B),ncol(B)) dCdA <- (((B %*% t(P)) %x% diag(nrow(B)) )+(diag(nrow(B)) %x% B) %*% Tmn %*% (P %x% diag(nrow(B)))) %*% dBdA dfdP <- t(vec(correct)) * (dfdC %*% dCdP) dfdA <- dfdC %*% dCdA grad <- rep(0,t) myarrows.1.free <- rep(0,nrow(arrows.1.free)) myarrows.2.free <- rep(0,nrow(arrows.2.free)) for(i in 1:nrow(arrows.1.free)) myarrows.1.free[i] <- (arrows.1.free[i,][2]-1)*m+arrows.1.free[i,][1] for(i in 1:nrow(arrows.2.free)) myarrows.2.free[i] <- (arrows.2.free[i,][2]-1)*m+arrows.2.free[i,][1] grad[sort(unique.free.1)] <- tapply(t(dfdA)[myarrows.1.free],ram[ram[,1]==1 & ram[,4]!=0, 4], sum) grad[sort(unique.free.2)] <- tapply(t(dfdP)[myarrows.2.free],ram[ram[,1]==2 & ram[,4]!=0, 4], sum) } attributes(f) <- list(C=C, A=A, P=P, gradient=grad) f }) } ) if (gradient) result$gradient <- function(par, model.description){ with(model.description, { A <- P <- matrix(0, m, m) val <- ifelse (fixed, ram[,5], par[sel.free]) A[arrows.1] <- val[one.head] P[arrows.2t] <- P[arrows.2] <- val[!one.head] I.Ainv <- solve(diag(m) - A) C <- J %*% I.Ainv %*% P %*% t(I.Ainv) %*% t(J) dfdC <- matrix(data=0.0, nrow=nrow(C), ncol=ncol(C)) n.pat <- nrow(valid.data.patterns) for (i in 1:n.pat){ sel <- valid.data.patterns[i, ] X <- data[pattern.number == i, sel, drop=FALSE] Ci <- C[sel, sel] Cinv <- solve(Ci) dfdCi <- nrow(X)*t(vec(t(Cinv))) - t(vec(t(X))) %*% (X %x% diag(nrow(Cinv))) %*% (t(Cinv) %x% Cinv) dfdCiExtend <- extendMatrix(matrix(dfdCi, nrow(Cinv), ncol(Cinv)), sel); dfdC <- dfdC + dfdCiExtend; } dfdC <- t(vec(dfdC/N)) #dfdC is a 1*(n^2) matrix. dCdP <- (J %*% I.Ainv) %x% (J %*% I.Ainv) B <- J %*% I.Ainv dBdA <- (diag(nrow(A)) %x% J) %*% (t(I.Ainv) %x% I.Ainv) Tmn <- commutation.matrix(nrow(B),ncol(B)) dCdA <- (((B %*% t(P)) %x% diag(nrow(B)) )+(diag(nrow(B)) %x% B) %*% Tmn %*% (P %x% diag(nrow(B)))) %*% dBdA dfdP <- dfdC %*% dCdP dfdA <- dfdC %*% dCdA grad <- rep(0,t) myarrows.1.free <- rep(0,nrow(arrows.1.free)) myarrows.2.free <- rep(0,nrow(arrows.2.free)) for(i in 1:nrow(arrows.1.free)) myarrows.1.free[i] <- (arrows.1.free[i,][2]-1)*9+arrows.1.free[i,][1] for(i in 1:nrow(arrows.2.free)) myarrows.2.free[i] <- (arrows.2.free[i,][2]-1)*9+arrows.2.free[i,][1] grad[sort(unique.free.1)] <- tapply(t(dfdA)[myarrows.1.free],ram[ram[,1]==1 & ram[,4]!=0, 4], sum) grad[sort(unique.free.2)] <- tapply(t(dfdP)[myarrows.2.free],ram[ram[,1]==2 & ram[,4]!=0, 4], sum) attributes(grad) <- list(C=C, A=A, P=P, gradient=grad) grad } ) } class(result) <- "semObjective" result } #logLik2.objectiveFIML <- function(object, saturated=FALSE, intercept="Intercept", iterlim=1000, ...){ # logLikSaturated <- function(object, iterlim, ...){ # objective <- function(par){ # C <- matrix(0, n, n) # C[posn.intercept, posn.intercept] <- 1 # C[tri] <- par # C <- C + t(C) - diag(diag(C)) # f <- 0 # for (i in 1:n.pat){ # sel <- valid.data.patterns[i, ] # X <- data[pattern.number == i, sel, drop=FALSE] # f <- f + sum(diag(X %*% solve(C[sel, sel]) %*% t(X))) + nrow(X)*(log.2pi + log(det(C[sel, sel]))) # } # f # } # data <- object$data # valid <- !is.na(data) # valid.pattern <- apply(valid, 1, function(row) paste(row, collapse=".")) # unique.patterns <- unique(valid.pattern) # pattern.number <- apply(outer(valid.pattern, unique.patterns, `==`), 1, which) # valid.data.patterns <- t(sapply(strsplit(unique.patterns, "\\."), as.logical)) # n.pat <- nrow(valid.data.patterns) # log.2pi <- log(2*pi) # n <- ncol(data) # N <- nrow(data) # C <- object$C # tri <- lower.tri(C, diag=TRUE) # posn.intercept <- which(rownames(C) == intercept) # tri[posn.intercept, posn.intercept] <- FALSE # start <- C[tri] # opt <- options(warn=-1) # on.exit(options(opt)) # res <- nlm(objective, start, iterlim=iterlim) # logL <- - res$minimum/2 # C <- matrix(0, n, n) # C[tri] <- res$estimate # C <- C + t(C) - diag(diag(C)) # C[posn.intercept, posn.intercept] <- 1 # list(logL=logL, C=C, code=res$code) # } # if (saturated) { # res <- logLikSaturated(object, iterlim=iterlim) # if (res$code > 3) warning("nlm return code = ", res$code) # logL <- res$logL # attr(logL, "C") <- res$C # return(logL) # } # else return(- object$criterion*object$N/2) #} residuals.objectiveFIML <- function(object, S, ...){ if (missing(S)) S <- attr(logLik(object, saturated=TRUE), "C") S - object$C } normalizedResiduals.objectiveFIML <- function(object, S, ...){ if (missing(S)) S <- attr(logLik(object, saturated=TRUE), "C") res <- residuals(object, S=S) N <- object$N - (!object$raw) C <- object$C c <- diag(C) res/sqrt((outer(c, c) + C^2)/N) } standardizedResiduals.objectiveFIML <- function(object, S, ...){ if (missing(S)) S <- attr(logLik(object, saturated=TRUE), "C") res <- residuals(object, S=S) s <- diag(S) res/sqrt(outer(s, s)) } deviance.objectiveFIML <- function(object, saturated.logLik, ...){ if (missing(saturated.logLik)) saturated.logLik <- logLik(object, saturated=TRUE) 2*(as.vector(saturated.logLik) - logLik(object)) } AIC.objectiveFIML <- function(object, saturated.logLik, ..., k) { if (missing(saturated.logLik)) saturated.logLik <- logLik(object, saturated=TRUE) deviance(object, saturated.logLik) + 2*object$t } AICc.objectiveFIML <- function(object, saturated.logLik, ...) { if (missing(saturated.logLik)) saturated.logLik <- logLik(object, saturated=TRUE) deviance(object, saturated.logLik) + 2*object$t*(object$t + 1)/(object$N - object$t - 1) } CAIC.objectiveFIML <- function(object, saturated.logLik, ...) { props <- semProps(object) if (missing(saturated.logLik)) saturated.logLik <- logLik(object, saturated=TRUE) deviance(object, saturated.logLik) - props$df*(1 + log(object$N)) } BIC.objectiveFIML <- function(object, saturated.logLik, ...) { n <- object$n n.fix <- object$n.fix N <- object$N t <- object$t df <- n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2 if (missing(saturated.logLik)) saturated.logLik <- logLik(object, saturated=TRUE) # deviance(object, saturated.logLik) + object$t*log(object$N) deviance(object, saturated.logLik) - df*log(N) } summary.objectiveFIML <- function(object, digits=getOption("digits"), conf.level=.90, fit.indices=c("AIC", "AICc", "BIC", "CAIC"), saturated=FALSE, intercept="Intercept", saturated.logLik, ...) { fit.indices <- if (missing(fit.indices)){ if (is.null(opt <- getOption("fit.indices"))) c("AIC", "BIC") else opt } else match.arg(fit.indices, several.ok=TRUE) vcov <- vcov(object, robust=FALSE, analytic=FALSE) if (any(is.na(vcov))) stop("coefficient covariances cannot be computed") if (missing(saturated.logLik)) saturated.logLik <- if (saturated) logLik(object, saturated=TRUE, intercept=intercept) else NULL S <- attr(saturated.logLik, "C") norm.res <- if (saturated) normalizedResiduals(object, S) else NA se <- sqrt(diag(vcov)) z <- object$coeff/se n.fix <- object$n.fix n <- object$n t <- object$t C <- object$C N <- object$N df <- n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2 if (saturated){ invC <- solve(C) CSC <- invC %*% (S - C) CSC <- CSC %*% CSC CS <- invC %*% S CS <- CS %*% CS chisq <- 2*(saturated.logLik - logLik(object)) logLik <- NULL } else { chisq <- NULL logLik <- logLik(object) } Rsq <- RMSEA.U <- RMSEA.L <- RMSEA <- NFI <- NNFI <- CFI <- AGFI <- SRMR <- NA RMSEA <- c(RMSEA, RMSEA.L, RMSEA.U, conf.level) if (!is.null(object$coeff)){ var.names <- rownames(object$A) ram <- object$ram[object$par.posn, , drop=FALSE] par.code <- paste(var.names[ram[,2]], c('<---', '<-->')[ram[,1]], var.names[ram[,3]]) coeff <- data.frame(object$coeff, se, z, 2*pnorm(abs(z), lower.tail=FALSE), par.code) names(coeff) <- c("Estimate", "Std Error", "z value", "Pr(>|z|)", " ") row.names(coeff) <- names(object$coeff) } else coeff <- NULL if(saturated){ AIC <- if ("AIC" %in% fit.indices) AIC(object, saturated.logLik=saturated.logLik) else NA AICc <- if ("AICc" %in% fit.indices) AICc(object, saturated.logLik=saturated.logLik) else NA BIC <- if ("BIC" %in% fit.indices) BIC(object, saturated.logLik=saturated.logLik) else NA CAIC <- if ("CAIC" %in% fit.indices) CAIC(object, saturated.logLik=saturated.logLik) else NA # SRMR <- if ("SRMR" %in% fit.indices && !object$raw) sqrt(sum(standardizedResiduals(object, S=S)^2 * # upper.tri(diag(n), diag=TRUE))/(n*(n + 1)/2)) else NA } else AIC <- AICc <- BIC <- CAIC <- NA # if (robust) { # chisq.adjusted <- object$adj.obj$chisq.scaled # chisqNull.adjusted <- chisqNull/object$adj.obj$c # NFI.adjusted <- (chisqNull.adjusted - chisq)/chisqNull.adjusted # NNFI.adjusted <- (chisqNull.adjusted/dfNull - chisq.adjusted/df)/(chisqNull.adjusted/dfNull - 1) # L1 <- max(chisq.adjusted - df, 0) # L0 <- max(L1, chisqNull.adjusted - dfNull) # CFI.adjusted <- 1 - L1/L0 # } # else{ chisq.adjusted <- chisqNull.adjusted <- NFI.adjusted <- NNFI.adjusted <- CFI.adjusted <- NULL # } ans <- list(chisq=chisq, logLik=logLik, df=df, chisqNull=chisqNull, dfNull=NA, GFI=NULL, AGFI=AGFI, RMSEA=RMSEA, NFI=NFI, NNFI=NNFI, CFI=CFI, BIC=BIC, SRMR=SRMR, AIC=AIC, AICc=AICc, CAIC=CAIC, Rsq=Rsq, chisq.adjusted=chisq.adjusted, chisqNull.adjusted=chisqNull.adjusted, NFI.adjusted=NFI.adjusted, NNFI.adjusted=NNFI.adjusted, CFI.adjusted=CFI.adjusted, norm.res=norm.res, coeff=coeff, digits=digits, iterations=object$iterations, aliased=object$aliased, raw=object$raw, robust=FALSE, robust.vcov=NULL, adj.obj=NULL) class(ans) <- "summary.objectiveML" ans } print.objectiveFIML <- function(x, saturated=FALSE, ...) { n <- x$n t <- x$t n.fix <- x$n.fix df <- n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2 if (saturated){ cat("\n Model Chisquare = ", 2*(logLik(x, saturated=TRUE) - logLik(x)), " Df = ", df, "\n\n") } else{ cat("\n Model log-likelihood = ", logLik(x), " Df = ", df, "\n\n") } if (!is.null(x$coef)){ print(x$coeff) if (!is.na(x$iterations)) cat("\n Iterations = ", x$iterations, "\n") if (!is.null(x$aliased)) cat("\n Aliased parameters:", x$aliased, "\n") } invisible(x) } sem/R/print.methods.R0000644000175000017500000000112511701252753014314 0ustar nileshnilesh# last modified 2011-08-04 by J. Fox print.objectiveML <- function(x, ...) { n <- x$n t <- x$t n.fix <- x$n.fix df <- n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2 cat("\n Model Chisquare = ", x$criterion * (x$N - (!x$raw)), " Df = ", df, "\n\n") if (!is.null(x$coef)){ print(x$coeff) if (!is.na(x$iterations)) cat("\n Iterations = ", x$iterations, "\n") if (!is.null(x$aliased)) cat("\n Aliased parameters:", x$aliased, "\n") } invisible(x) } print.objectiveGLS <- function(x, ...) print.objectiveML(x, ...) sem/R/startvalues.R0000644000175000017500000001432612100340307014065 0ustar nileshnilesh# last modified 2013-01-24 by J. Fox startvalues2 <- function(S, ram, debug=FALSE, tol=1E-6){ ram <- ram[!(ram[, "parameter"] == 0 & ram[, "start value"] == 0), , drop=FALSE] # eliminate fixed parameters set to 0 n <- nrow(S) observed <- 1:n m <- max(ram[,c(2,3)]) t <- max(ram[,4]) s <- sqrt(diag(S)) R <- S/outer(s,s) latent<-(1:m)[-observed] par.posn <- sapply(1:t, function(i) which(ram[,4] == i)[1]) one.head <- ram[,1] == 1 start <- (ram[,5])[par.posn] A.pat <-matrix(FALSE, m, m) A.pat[ram[one.head, c(2,3), drop=FALSE]] <- TRUE P.pat <- C <- matrix(0, m, m) P.pat[ram[!one.head, c(2,3), drop=FALSE]] <- P.pat[ram[!one.head, c(3,2), drop=FALSE]] <- 1 C[observed, observed] <- R for (l in latent) { indicators <- A.pat[observed, l] for (j in observed){ C[j, l] <- C[l, j] <- if (!any(indicators)) runif(1, .3, .5) else { numerator <- sum(R[j, observed[indicators]]) denominator <- sqrt(sum(R[observed[indicators], observed[indicators]])) numerator/denominator } } } for (l in latent){ for (k in latent){ C[l, k] <- C[k,l] <- if (l==k) 1 else { indicators.l <- A.pat[observed, l] indicators.k <- A.pat[observed, k] if ((!any(indicators.l)) | (!any(indicators.k))) runif(1, .3, .5) else { numerator <- sum(R[observed[indicators.l], observed[indicators.k]]) denominator <- sqrt( sum(R[observed[indicators.l], observed[indicators.l]]) * sum(R[observed[indicators.k], observed[indicators.k]])) numerator/denominator} } } } A <- matrix(0, m, m) for (j in 1:m){ ind <- A.pat[j,] if (!any(ind)) next A[j, ind] <- solve(C[ind, ind]) %*% C[ind, j] } A[observed,] <- A[observed,]*matrix(s, n, m) A[,observed] <- A[,observed]/matrix(s, m, n, byrow=TRUE) C[observed,] <- C[observed,]*matrix(s, n, m) C[,observed] <- C[,observed]*matrix(s, m, n, byrow=TRUE) P <- (diag(m) - A) %*% C %*% t(diag(m) - A) P <- P.pat * P for (par in 1:t){ if (!is.na(start[par])) next posn <- par.posn[par] if (ram[posn, 1] == 1) start[par] <- A[ram[posn, 2], ram[posn, 3]] else start[par] <- P[ram[posn, 2], ram[posn, 3]] if (abs(start[par]) < tol) start[par] <- tol } if (debug){ cat('\nStart values:\n') print(start) cat('\n') } start } startvalues <- function(S, ram, debug=FALSE, tol=1E-6){ ram <- ram[!(ram[, "parameter"] == 0 & ram[, "start value"] == 0), , drop=FALSE] # eliminate fixed parameters set to 0 n <- nrow(S) observed <- 1:n m <- max(ram[,c(2,3)]) t <- max(ram[,4]) s <- sqrt(diag(S)) R <- S/outer(s,s) latent<-(1:m)[-observed] par.posn <- sapply(1:t, function(i) which(ram[,4] == i)[1]) one.head <- ram[,1] == 1 start <- (ram[,5])[par.posn] A.pat <-matrix(FALSE, m, m) A.pat[ram[one.head, c(2,3), drop=FALSE]] <- TRUE P.pat <- C <- matrix(0, m, m) P.pat[ram[!one.head, c(2,3), drop=FALSE]] <- P.pat[ram[!one.head, c(3,2), drop=FALSE]] <- 1 C[observed, observed] <- R ref.indicators <- numeric(m) for (l in latent) { indicators <- A.pat[observed, l] if (any(indicators)){ ref <- which.max(rowMeans(abs(R[observed[indicators], observed[indicators], drop=FALSE]))) ref.indicators[l] <- observed[indicators][ref] } for (j in observed){ C[j, l] <- C[l, j] <- if (!any(indicators)) runif(1, .3, .5) else { numerator <- mean(abs(R[j, observed[indicators]])) denominator <- sqrt(mean(abs(R[observed[indicators], observed[indicators]]))) c <- numerator/denominator sign(R[j, ref.indicators[l]])*abs(c) } } } for (l in latent){ indicators.l <- A.pat[observed, l] for (k in latent){ C[l, k] <- C[k, l] <- if (l==k) 1 else { # if (l == k){ # if (!any(indicators.l)) runif(1, .3, .5) # else mean(abs(R[indicators.l, ref.indicators[l]])) # } # else { indicators.k <- A.pat[observed, k] if ((!any(indicators.l)) | (!any(indicators.k))) runif(1, .3, .5) else { numerator <- mean(abs(R[observed[indicators.l], observed[indicators.k]])) denominator <- sqrt( mean(abs(R[observed[indicators.l], observed[indicators.l]])) * mean(abs(R[observed[indicators.k], observed[indicators.k]]))) c <- numerator/denominator sign(R[ref.indicators[l], ref.indicators[k]])*abs(c) } } } } A <- matrix(0, m, m) for (j in 1:m){ ind <- A.pat[j,] if (!any(ind)) next A[j, ind] <- solve(C[ind, ind]) %*% C[ind, j] } A[observed,] <- A[observed,]*matrix(s, n, m) A[,observed] <- A[,observed]/matrix(s, m, n, byrow=TRUE) C[observed,] <- C[observed,]*matrix(s, n, m) C[,observed] <- C[,observed]*matrix(s, m, n, byrow=TRUE) P <- (diag(m) - A) %*% C %*% t(diag(m) - A) P <- P.pat * P diag(P) <- abs(diag(P)) for (par in 1:t){ if (!is.na(start[par])) next posn <- par.posn[par] if (ram[posn, 1] == 1) start[par] <- A[ram[posn, 2], ram[posn, 3]] else start[par] <- P[ram[posn, 2], ram[posn, 3]] if (abs(start[par]) < tol) start[par] <- tol } if (debug){ cat('\nStart values:\n') print(start) cat('\n') } start } sem/R/effects.R0000644000175000017500000000163511701252753013143 0ustar nileshnilesh# added 2011-11-04 by J. Fox effects.sem <- function(object, ...) { A <- object$A m <- object$m I <- diag(m) endog <- classifyVariables(object$semmod)$endogenous AA <- - A diag(AA) <- 1 Total <- solve(AA) - I Indirect <- Total - A result <- list(Total=Total[endog, ], Direct=A[endog, ], Indirect=Indirect[endog, ]) class(result) <- "semeffects" result } print.semeffects <- function(x, digits=getOption("digits"), ...){ cat("\n Total Effects (column on row)\n") Total <- x$Total Direct <- x$Direct Indirect <- x$Indirect select <- !(apply(Total, 2, function(x) all( x == 0)) & apply(Direct, 2, function(x) all( x == 0)) & apply(Indirect, 2, function(x) all( x == 0))) print(Total[, select], digits=digits) cat("\n Direct Effects\n") print(Direct[, select], digits=digits) cat("\n Indirect Effects\n") print(Indirect[, select], digits=digits) invisible(x) } sem/R/icmethods.R0000644000175000017500000000343312032666120013474 0ustar nileshnilesh# these functions originally by Jarrett Byrnes # methods to generate # various information criteria from # sem or adjchisq objects # as well as generate and AIC table # last modified 2012-10-02 by J. Fox logLik.objectiveML <- function(object, ...){ -0.5*deviance(object) } # generics AICc <- function(object, ...) UseMethod("AICc") CAIC <- function(object, ...) UseMethod ("CAIC") # methods for sem objects AIC.objectiveML <- function(object, ..., k) { deviance(object) + 2*object$t } # small sample second order corrected aic AICc.objectiveML <- function(object, ...) { deviance(object) + 2*object$t*(object$t + 1)/(object$N - object$t - 1) } # Consistent Akaike Information Criterion CAIC.objectiveML <- function(object, ...) { props <- semProps(object) props$chisq - props$df*(1 + log(object$N)) } BIC.objectiveML <- function(object, ...) { n <- object$n n.fix <- object$n.fix N <- object$N t <- object$t df <- n*(n + 1)/2 - t - n.fix*(n.fix + 1)/2 # deviance(object) + object$t*log(object$N) deviance(object) - df*log(N) } # the following are not exported and are just place-keepers for the summary method BIC.objectiveGLS <- function(object, ...) NULL AIC.objectiveGLS <- function(object, ...) NULL AICc.objectiveGLS <- function(object, ...) NULL CAIC.objectiveGLS <- function(object, ...) NULL # weights aicW <- function(a.list, func=AICc){ aiclist <- sapply(a.list, function(x) eval(func(x)), simplify=TRUE) delta.i <- aiclist - min(aiclist) aicw <- exp(-0.5*delta.i)/sum(exp(-0.5*delta.i)) return.matrix <- matrix(c(aiclist,delta.i, aicw), ncol=3) colnames(return.matrix) <- c("IC", "delta.i", "weight") rownames(return.matrix) <- 1:length(return.matrix[,1]) return(return.matrix) } sem/R/anova.methods.R0000644000175000017500000000556312006313212014261 0ustar nileshnilesh# last modified 2012-08-01 by J. Fox anova.objectiveML <- function(object, model.2, robust=FALSE, ...){ anovaAdjchisq <- function(adjobj0, adjobj1){ # this subfunction oringally by Jarrett Byrnes #from http://www.statmodel.com/chidiff.shtml # Satorra-bentler adjusted chi sq #switching to get order right sbs.nested <- adjobj0 sbs.full <- adjobj1 t0 <- sbs.nested$chisq tr0 <- sbs.nested$chisq.scaled t1 <- sbs.full$chisq tr1 <- sbs.full$chisq.scaled c0 <- sbs.nested$c c1 <- sbs.full$c d0 <- sbs.nested$df d1 <- sbs.full$df cd <- (d0 * c0 - d1*c1)/(d0 - d1) trd <- abs((t0 - t1)/cd) df <- abs(d0 - d1) table <- data.frame(c(d0, d1), c(tr0, tr1), c(NA, df), c(NA, trd), c(NA, pchisq(trd, df, lower.tail=FALSE))) return(table) } dev.1 <- deviance(object) df.1 <- df.residual(object) dev.2 <- deviance(model.2) df.2 <- df.residual(model.2) name.1 <- deparse(substitute(object)) name.2 <- deparse(substitute(model.2)) df <- abs(df.1 - df.2) if (df == 0) stop("the models have the same Df") if (object$N != model.2$N) stop("the models are fit to different numbers of observations") if ((nrow(object$S) != nrow(model.2$S)) || !all.equal(object$S, model.2$S)) stop("the models are fit to different moment matrices") if(!robust){ chisq <- abs(dev.1 - dev.2) table <- data.frame(c(df.1, df.2), c(dev.1, dev.2), c(NA, df), c(NA, chisq), c(NA, pchisq(chisq, df, lower.tail=FALSE))) } else{ cat("Adjusted Using Satorra-Bentler Correction\n"); table <- anovaAdjchisq(object$adj.obj, model.2$adj.obj) } names(table) <- c("Model Df", "Model Chisq", "Df", "LR Chisq", "Pr(>Chisq)") rownames(table) <- c(name.1, name.2) structure(table, heading = c("LR Test for Difference Between Models", ""), class = c("anova", "data.frame")) } anova.objectiveFIML <- function(object, model.2, ...){ logLik.1 <- logLik(object) df.1 <- df.residual(object) logLik.2 <- logLik(model.2) df.2 <- df.residual(model.2) name.1 <- deparse(substitute(object)) name.2 <- deparse(substitute(model.2)) df <- abs(df.1 - df.2) if (df == 0) stop("the models have the same Df") if (object$N != model.2$N) stop("the models are fit to different numbers of observations") if ((nrow(object$S) != nrow(model.2$S)) || !all.equal(object$S, model.2$S)) stop("the models are fit to different data sets") chisq <- 2*(abs(logLik.1 - logLik.2)) table <- data.frame(c(df.1, df.2), c(logLik.1, logLik.2), c(NA, df), c(NA, chisq), c(NA, pchisq(chisq, df, lower.tail=FALSE))) names(table) <- c("Model Df", "Model Log-Likelihood", "Df", "LR Chisq", "Pr(>Chisq)") rownames(table) <- c(name.1, name.2) structure(table, heading = c("LR Test for Difference Between Models", ""), class = c("anova", "data.frame")) } sem/NEWS0000644000175000017500000001047114126410440011666 0ustar nileshnileshChanges to Version 3.1-13 o Eliminate dependency on matrixcalc package. o Revise Fortran BLAS/LAPACK function calls in C/C++ code as suggested by Brian Ripley. Changes to Version 3.1-12 o Fix to C++ code to correct PROTECT/UNPROTECT imbalance (reported by CRAN). Changes to Version 3.1-11 o Replaced HS.data data set formerly in the MBESS package (following problem reported by Brian Ripley for CRAN). o Small fixes to docs. Changes to Version 3.1-10 o Change instances of class(x) == "y" to inherits(x, "y"), and of x == TRUE to isTRUE(x). Changes to Version 3.1-9 o Changes to C++ code (requested by CRAN). Changes to Version 3.1-8 o Fixed compilation flags (at request of CRAN). o Small correction to DESCRIPTION file. Changes to Version 3.1-7 o Imports from standard packages to conform to new CRAN policy. o Moved DiagrammeR to Suggests: . Changes to Version 3.1-6 o Added text argument to specifyModel(), specifyEquations(), cfa(), readMoments(), and update.semmod(). o Modifications to miSem() etc. to work with version 0.9-99 of the mi package (contributed by Benjamin K Goodrich). o Major upgrades to pathDiagram() and cfa() (implementing suggestions of Michael Friendly). o Get rid of calls to require(). Changes to Version 3.1-5 o Small corrections to C++ code by Zhenghua Nie to prevent warnings from valgrind. Changes to Version 3.1-4 o Fixed multigroup start values (which failed under some circumstances, reported by Timo von Oertzen). o specifyModel() no longer fails when the model has just one input line. o specifyEquations() allows structural equations to be split over more than one line. o Fixed bug in computing robust statistics in multigroup models when not all variables in the input data set are in the model (reported by Thomas Niemand). o summary.tsls() now returns an object, printed by print.summary.tsls() (after question by Steve Koch). o Fixed bug suppressing computation of R^2s in a just-identified model (after report by Paula N. Fergnani). o Added message to specifyModel() suggesting use of specifyEquations() or cfa(). o Small fixes. Changes to Version 3.1-3 o No substantive changes. Changes to Version 3.1-2 o specifyModel() now allows tabs in path specifications. o Small fixes for compatibility with R 3.0.0. Changes to Version 3.1-1 o Fixed bug in startvalues() when a latent variable has only 1 indicator. o Added custom c++ function to print debug iteration history. Changes to Version 3.1-0 o Added FIML objective function for handling missing data. (Preliminary implementation for single-group models only.) o Added miSem() for multiple imputation. o Added bootSem.msem() method for multi-group models; bootSem() now generic. o Redefinition of BIC (differs by constant from previous version, suggested by Ken Bollen). o Default reference.indicators=TRUE in cfa() (suggestion of Ken Bollen). o Added fit.indices argument to summary() methods, along with fit.indices global option. AIC and BIC provided by default; other fit indices (including addition of RNI and IFI, suggested by Ken Bollen) available optionally. o New version of startvalues(); old version now named startvalues2(). o New pathDiagram.semmod() method. o Small fixes and changes. Changes to Version 3.0-0 o Compiled code for optimization. o Added multi-group models. o Modification indices for equality-constrained parameters. o weights argument added to tsls(). o raw argument added to cfa(). Changes to Version 2.1-2 o Suppress R^2s when raw moments used (they were not computed correctly). o Enhancements to update.semmod() and new edit.semmod() (suggestions of Michael Friendly). Changes to Version 2.1-1 o Added cfa() for compact specification of confirmatory factor analysis models. Changes to Version 2.1-0 o Added equation-style model specification, via specifyEquations() o Fixed bug in classifyVariables() when <- (rather than ->) used in the model. o Fixed bug in vcov.sem() when the model has just one structural parameter or just one variance/covariance parameter. Changes to Version 2.0-0 o Completely reworked version of the sem package. See CHANGES file for changes to older versions. sem/inst/0000755000175000017500000000000014126411066012146 5ustar nileshnileshsem/inst/etc/0000755000175000017500000000000014126411067012722 5ustar nileshnileshsem/inst/etc/R-Kerchoff.txt0000644000175000017500000000042111701252753015407 0ustar nileshnilesh -.100 .277 -.152 .250 -.108 .611 .572 -.105 .294 .248 .489 -.213 .446 .410 .597 .335 -.153 .303 .331 .478 .651 sem/inst/etc/model-McArdle.txt0000644000175000017500000000046011701252753016071 0ustar nileshnilesh# McArdle/Epstein latent-growth-curve model C -> WISC1, NA, 6.07 C -> WISC2, B2, NA C -> WISC3, B3, NA C -> WISC4, B4, NA UNIT -> C, Mc, NA C <-> C, Vc, NA, WISC1 <-> WISC1, Vd, NA WISC2 <-> WISC2, Vd, NA WISC3 <-> WISC3, Vd, NA WISC4 <-> WISC4, Vd, NA sem/inst/etc/R-DHP.txt0000644000175000017500000000055311701252753014301 0ustar nileshnilesh .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 sem/inst/etc/model-DHP.txt0000644000175000017500000000074311701252753015201 0ustar nileshnilesh# Duncan, Haller, and Portes peer-influences model RParAsp -> RGenAsp, gam11 RIQ -> RGenAsp, gam12 RSES -> RGenAsp, gam13 FSES -> RGenAsp, gam14 RSES -> FGenAsp, gam23 FSES -> FGenAsp, gam24 FIQ -> FGenAsp, gam25 FParAsp -> FGenAsp, gam26 FGenAsp -> RGenAsp, beta12 RGenAsp -> FGenAsp, beta21 RGenAsp -> ROccAsp, NA, 1 RGenAsp -> REdAsp, lam21 FGenAsp -> FOccAsp, NA, 1 FGenAsp -> FEdAsp, lam42 RGenAsp <-> FGenAsp, ps12 sem/inst/etc/R-Thurstone.txt0000644000175000017500000000072711701252753015664 0ustar nileshnilesh .828 .776 .779 .439 .493 .46 .432 .464 .425 .674 .447 .489 .443 .59 .541 .447 .432 .401 .381 .402 .288 .541 .537 .534 .35 .367 .32 .555 .38 .358 .359 .424 .446 .325 .598 .452 sem/inst/etc/M-McArdle.txt0000644000175000017500000000041511701252753015165 0ustar nileshnilesh 365.661 503.175 719.905 675.656 958.479 1303.392 890.680 1265.846 1712.475 2278.257 18.034 25.819 35.255 46.593 1.000 sem/inst/etc/model-Wheaton-1.txt0000644000175000017500000000202611701252753016325 0ustar nileshnilesh# Wheaton et al. alienation model 1 Alienation67 -> Anomia67, NA, 1 Alienation67 -> Powerless67, NA, 0.833 Alienation71 -> Anomia71, NA, 1 Alienation71 -> Powerless71, NA, 0.833 SES -> Education, NA, 1 SES -> SEI, lamb, NA SES -> Alienation67, gam1, NA Alienation67 -> Alienation71, beta, NA SES -> Alienation71, gam2, NA Anomia67 <-> Anomia67, the1, NA Anomia71 <-> Anomia71, the1, NA Powerless67 <-> Powerless67, the2, NA Powerless71 <-> Powerless71, the2, NA Education <-> Education, the3, NA SEI <-> SEI, the4, NA Anomia67 <-> Anomia71, the5, NA Powerless67 <-> Powerless71, the5, NA Alienation67 <-> Alienation67, psi1, NA Alienation71 <-> Alienation71, psi2, NA SES <-> SES, phi, NA sem/inst/etc/GreekLetters.txt0000644000175000017500000000246112525463420016067 0ustar nileshnilesh "decimal" "hex" "Alpha" "Α" "Α" "Beta" "Β" "Β" "Gamma" "Γ" "Γ" "Delta" "Δ" "Δ" "Epsilon" "Ε" "Ε" "Zeta" "Ζ" "Ζ" "Eta" "Η" "Η" "Theta" "Θ" "Θ" "Iota" "Ι" "Ι" "Kappa" "Κ" "Κ" "Lambda" "Λ" "Λ" "Mu" "Μ" "Μ" "Nu" "Ν" "Ν" "Xi" "Ξ" "Ξ" "Omicron" "Ο" "Ο" "Pi" "Π" "Π" "Rho" "Ρ" "Ρ" "Sigma" "Σ" "Σ" "Tau" "Τ" "Τ" "Upsilon" "Υ" "Υ" "Phi" "Φ" "Φ" "Chi" "Χ" "Χ" "Psi" "Ψ" "Ψ" "Omega" "Ω" "Ω" "alpha" "α" "α" "beta" "β" "β" "gamma" "γ" "γ" "delta" "δ" "δ" "epsilon" "ε" "ε" "zeta" "ζ" "ζ" "eta" "η" "η" "theta" "θ" "θ" "iota" "ι" "ι" "kappa" "κ" "κ" "lambda" "λ" "λ" "mu" "μ" "μ" "nu" "ν" "ν" "xi" "ξ" "ξ" "omicron" "ο" "ο" "pi" "π" "π" "rho" "ρ" "ρ" "sigma" "σ" "σ" "tau" "τ" "τ" "upsilon" "υ" "υ" "phi" "φ" "φ" "chi" "χ" "χ" "psi" "ψ" "ψ" "omega" "ω" "ω" sem/inst/etc/R-Blau-Duncan.txt0000644000175000017500000000010111701252753015744 0ustar nileshnilesh1 .516 1 .453 .438 1 .332 .417 .538 1 .322 .405 .596 .541 1 sem/inst/etc/S-Wheaton.txt0000644000175000017500000000045411701252753015274 0ustar nileshnilesh 11.834 6.947 9.364 6.819 5.091 12.532 4.783 5.028 7.495 9.986 -3.839 -3.889 -3.841 -3.625 9.610 -21.899 -18.831 -21.748 -18.775 35.522 450.288 sem/inst/etc/model-Tests.txt0000644000175000017500000000004612005517445015664 0ustar nileshnileshverbal: x1, x2, x3 math: y1, y2, y3 sem/inst/etc/model-Thurstone.txt0000644000175000017500000000152011701252753016553 0ustar nileshnilesh# Second-order CFA model for Thurstone tests data F1 -> Sentences, lam11 F1 -> Vocabulary, lam21 F1 -> Sent.Completion, lam31 F2 -> First.Letters, lam42 F2 -> 4.Letter.Words, lam52 F2 -> Suffixes, lam62 F3 -> Letter.Series, lam73 F3 -> Pedigrees, lam83 F3 -> Letter.Group, lam93 F4 -> F1, gam1 F4 -> F2, gam2 F4 -> F3, gam3 F1 <-> F1, NA, 1 F2 <-> F2, NA, 1 F3 <-> F3, NA, 1 F4 <-> F4, NA, 1 sem/inst/etc/model-Kerchoff.txt0000644000175000017500000000122111701252753016305 0ustar nileshnilesh# Kerchoff/Kenney path-analysis model Intelligence -> Grades, gam51 Siblings -> Grades, gam52 FatherEd -> Grades, gam53 FatherOcc -> Grades, gam54 Intelligence -> EducExp, gam61 Siblings -> EducExp, gam62 FatherEd -> EducExp, gam63 FatherOcc -> EducExp, gam64 Grades -> EducExp, beta65 Intelligence -> OccupAsp, gam71 Siblings -> OccupAsp, gam72 FatherEd -> OccupAsp, gam73 FatherOcc -> OccupAsp, gam74 Grades -> OccupAsp, beta75 EducExp -> OccupAsp, beta76 sem/inst/etc/model-Bollen.txt0000644000175000017500000000103211701252753015771 0ustar nileshnilesh# Bollen industrialization and democracy model Demo60 -> y1, NA, 1 Demo60 -> y2, lam2, Demo60 -> y3, lam3, Demo60 -> y4, lam4, Demo65 -> y5, NA, 1 Demo65 -> y6, lam2, Demo65 -> y7, lam3, Demo65 -> y8, lam4, Indust -> x1, NA, 1 Indust -> x2, lam6, Indust -> x3, lam7, y1 <-> y5, theta15 y2 <-> y4, theta24 y2 <-> y6, theta26 y3 <-> y7, theta37 y4 <-> y8, theta48 y6 <-> y8, theta68 Indust -> Demo60, gamma11, Indust -> Demo65, gamma21, Demo60 -> Demo65, beta21, Indust <-> Indust, phi sem/inst/etc/model-Blau-Duncan.txt0000644000175000017500000000025011701252753016650 0ustar nileshnilesh# Blau and Duncan basic stratification model y3 <- x1, gam31 y3 <- x2, gam32 y4 <- x2, gam42 y4 <- y3, beta43 y5 <- x2, gam52 y5 <- y3, beta53 y5 <- y4, beta54 sem/inst/etc/model-Wheaton-2.txt0000644000175000017500000000207411701252753016331 0ustar nileshnilesh# Wheaton et al. alienation model 2 Alienation67 -> Anomia67, NA, 1 Alienation67 -> Powerless67, lamby, NA Alienation71 -> Anomia71, NA, 1 Alienation71 -> Powerless71, lamby, NA SES -> Education, NA, 1 SES -> SEI, lambx, NA SES -> Alienation67, gam1, NA Alienation67 -> Alienation71, beta, NA SES -> Alienation71, gam2, NA Anomia67 <-> Anomia67, the1, NA Anomia71 <-> Anomia71, the1, NA Powerless67 <-> Powerless67, the2, NA Powerless71 <-> Powerless71, the2, NA Education <-> Education, the3, NA SEI <-> SEI, the4, NA Anomia67 <-> Anomia71, the5, NA Powerless67 <-> Powerless71, the5, NA Alienation67 <-> Alienation67, psi1, NA Alienation71 <-> Alienation71, psi2, NA SES <-> SES, phi, NA sem/inst/etc/model-HS.txt0000644000175000017500000000031311702141310015054 0ustar nileshnilesh spatial: visual, cubes, paper, flags verbal: general, paragrap, sentence, wordc, wordm memory: wordr, numberr, figurer, object, numberf, figurew math: deduct, numeric, problemr, series, arithmet sem/inst/CHANGES0000644000175000017500000001722111701252753013146 0ustar nileshnileshChanges to Version 0.7-0 o Fixed problem with analytic gradient, which caused convergence problems for some models. o Simplified optimization. o Added modification indices (preliminary version). o Some changes to start values. o Added RMSEA to fit indices. o Added Klein dataset. Changes to Version 0.7-1 o Replaced F with FALSE as logical, F with f as variable. Changes to Version 0.7-2 o Fixed tsls examples to run even if a global variable named F exists. Changes to Version 0.7-3 o Fixed bug in start-values routine that affected unstandardized models. o Added code to try to detect under-identified models. Changes to Version 0.7-5 o Made arguments to method functions consistent with generics (mostly adding ...). Changes to Version 0.7-6 o Check for negative degrees of freedom. Changes to Version 0.7-7 o Fixed bug in sem.default and startvalues that prevented the model from being fit when there are all equality constraints affect precisely the same numbers of parameters. o Fixed bug in tsls.formula that could cause the wrong variable to be identified as the response (causing the fit to fail). o Added code to detect more instances of nonconvergence. o Other small changes to code and documentation. Changes to Version 0.7-8 o Fixed bug in tsls.formula that could prevent model formula from being evaluated (producing an error). Changes to Version 0.7-9 o Fixed bug that caused a model with only one endogenous variable to fail. o Improved computation of RMSEA confidence limits Changes to Version 0.8-0 o Put in a warning for (unfixed) observed or latent variable without either variance or error-variance term specified (i.e., double-headed arrow). As far as I know, this will always produce an error. Changes to Version 0.8-1 o Fixed a bug in tsls.formula that caused the function to fail when the printed representation of the instruments formula extended over more than one line. Changes to Version 0.8-2 o Fixed a bug in startvalues() and sem.default() that caused the variables (observed + unobserved) in the model to be counted incorrectly in some (unusual) circumstances. Changes to Version 0.9-0 o Added function specify.model() to simplify the process of sem model specification. o Added boot.sem() to compute bootstrapped standard-error estimates and confidence intervals for sem parameters. o sem package now has a namespace. o Added digits argument to path.diagram(), as suggested by William Revelle. o New argument raw to sem() to accommodate raw moment matrices (for models with intercepts, after a suggestion by Mike Stoolmiller). Added generic function raw.moments() with formula and default methods to compute raw-moment matrices. o Small changes. Changes to Version 0.9-1 o Compute and report the chisquare for a null model in which the observed variables are uncorrelated. Added associated additional fit indices (suggested by Barbara Bredner). Changes to Version 0.9-2 o Added sem methods for deviance and df.residual generics. o Small changes. Changes to Version 0.9-3 o Changed definition to BIC to conform to recommendation in Raftery (1995), from his previous suggestion in Raftery (1993). Thanks for Ken Bollen for pointing this out. o Small documentation changes. o Fixed a bug that could cause summary.sem() to fail for a just-identified model (reported by Ingo Feinerer). Changes to Version 0.9-4 o Fixed version synchronization error. o Small changes. Changes to Version 0.9-5 o Corrected null model chisquare and, consequently, fit statistics that depend on it. o Added package stats to dependencies, and changed coefficients.tsls() to coef.tsls(), so sem compiles without a warning under R 2.4.0. o Brought sample output in ?sem up to date. o Fixed another version synchronization error. Changes to Version 0.9-6 o The input observed-variable covariance or moment matrix may now contain variables that are not used in the model, in which case a warning is printed (in response to a problem reported by Jarrett Byrnes). o Stray escape character removed from print.summary.sem to avoid warning in r-devel. Changes to Version 0.9-7 o Small fixes to docs. o Fixed standardized.coefficients() so that it works even when there is only one structural parameter in the model (fixing a bug reported by Jarrett Byrnes). o Added read.moments() to facilitate input of covariance, correlation, and raw-moment matrices; cov2raw() to compute raw-moments matrix from a covariance or correlation matrix; and anova.sem() to compare nested models (all suggested by Michael Friendly). o Added SRMR fit index (requested by Mathieu d'Acremont). Changes to Version 0.9-8 o sem() will now fit a model with no free parameters (as suggested by Ken Bollen). Changes to Version 0.9-9 o Fixed bug in summary.sem() that sometimes prevented computation of confidence limits for RMSEA (problem reported by Frank Lawrence). o Small fixes to docs. Changes to Version 0.9-10 o Fixed bug in anova.tsls(), which was computing the error variance from the smaller rather than the larger model (pointed out by Achim Zeleis). Changes to Version 0.9-11 o Replaced obsolete \non_function{} markup in Rd files (reported by Kurt Hornik). Changes to Version 0.9-12 o Fixed bug in path.diagram() that prevented customized parameter labels (reported by Christopher Marcum). o Small fixes/changes. Changes to Version 0.9-13 o Added vcov.sem() and coef.sem() methods (suggested by Achim Zeileis). o If sem() can't compute the covariance matrix of the coefficients, then summary.sem() now prints an informative error message rather than failing cryptically. Changes to Version 0.9-14 o Changed test for symmetry of input covariance matrix so that it uses isSymmetric() (suggested by Brian Lai). o Added a test for positive-definiteness of the input covariance matrix. o Intercept error in summary.sem() when bounds of RMSEA can't be computed (following problem reported by Andrew J. Wawrzyniak). o Fixed error in summary.bootsem() produced by elimination in R of partial matching of list elements (problem reported by Sergio A. Estay). Changes to Version 0.9-15 o Small fixes to mod.indices.Rd. Changes to Version 0.9-16 o tsls() now uses Cholesky decomposition to speed up computations with many variables (slightly). o Added fscores() function to compute factor scores, after serveral requests. Changes to Verstion 0.9-17 o standardized.coefficients() now standardizes both path coefficients and variances/covariances (thanks to code provided by Adam Kramer). o path.digram() now has an option for displaying standardized coefficients (again thanks to Adam Kramer). Changes to Version 0.9-18 o path.diagram() can now run dot to create graphics output (thanks to a contribution from Michael Friendly). o Fix to URL in DESCRIPTION (broken link reported by Jonathan Henkelman). o Fix to startvalues() to avoid an error when the user unnecessarily but not incorrectly sets fixed variance parameters to 0 (problem reported by Rob Cribbie). Changes to Version 0.9-19 o Fix to cross-reference in sem.Rd. Changes to Version 0.9-20 o Fixed a bug that occurred when there is only one observed variable (reported by Anna Simonetto). sem/cleanup0000755000175000017500000000035314126411127012545 0ustar nileshnilesh#! /bin/sh /bin/rm -rf ./chm /bin/rm -rf ./config.* /bin/rm -rf src/*.so src/*/*.so src/*.o src/*/*.o src/*.d src/*/*.d src/*.dll src/*/*.dll src/*.a src/*/*.a src/*.rc src/*/*.rc src/Makedeps src/*/Makedeps /bin/rm -rf *~ */*~ */*/*~ sem/data/0000755000175000017500000000000014126411127012100 5ustar nileshnileshsem/data/Bollen.rda0000644000175000017500000000544214126411127014010 0ustar nileshnileshY XG$IrIH(Z,GET֪EX G8 Z)hD(< ֲH\`"Q3%of1o~̃,w^9`04, &Sei4,j;1LjpT%b/}7C|)'$ /4pQdQ8 Ԟh`N#ϏpDjtt㱌FQScH*go`J/OvEc5?zt|65Jetr&TH(#B9јDANd5:j:J@UQ~L25?__d!*#Zy|R#ٯZ}t⭾S>(\ .'Jٵ$⃨4 ?hp/Rd̨yxecT) 򡁑6fx59q#TMjEmIG'GwoP5{=LqUO>dTA.[(uA2?B@OϣwBb#Q4/5}ޙ3?S?WdE#M)c蓔\GJ)\dI=hhdqR 7قCosdޗU{.ށ}"&5C(Zs.׾^Lirjv.bg'YOӮOSW[zD~;9xˠ>?}ou{qEgP7bWI_[tON zEgiwȲ+qב\+DݧArXz?eLi3Ilzߍҟ6[oZiW<,&ԣ_`p8|>9+^۲c|z''BUլ:x}au.ȱkRţNlw07?7yǾU 1ۦzG2u/(+cmiuoӽ C•ěfIsi9]>,.RΑ~0jRݥ`3Jۧ [D!K\@,qZ5{~F3>7IѮvv_1b Xw6+ĺٚ9lzx\s4Ax51vS֗Ir ~<A(v0Nq'w- _w<"ڪ"w?(# [K)w RAp,8d>B1Ou4W 9y)eAkм ܌Ɍ׌_ՠ:݋|\VXMgɥ% {ͽla倥v eDZFtVo)ffNn EZU>wP Y?(NT/kS?Љ;3^iAVA0YF/i wi"WP]+,lƋa~8uA8=yvmڒbB#k63y0"+>%>vF+&iPxa:c{.~uIh[٘i ƛ:|0* ~U^w,6 v@tӪ;9OEʕ~ ;!K#7ղz6}o)״voVÃ:f@qa=*ުR+K@rM*ޱo]N:.%H|*Fԣ:Հa׌chMゼkEE؁Tę >ZqKwi6fH09ez {.ޗv'vV'5.1pvCW۾ι41ZV_<.oi98X|ImxUo5VR̵_NUeov3mKX7p#|gsxZꪁB-!-sݚ@~O5 !-+wP>[ۀ%tKXRfˬ;У+OǸ!92 Ö%٣^E"\Ua1x&R*Hh+WWƟ}rBu<sem/data/CNES.rda0000644000175000017500000001010314126411127013313 0ustar nileshnileshBZh91AY&SYdTDRH>@/!}4`t,ʙiV6CUR5R@4dh%?oR**zP @SThSUS~ڙU@ BoMLOmPh4 Om:j-\u#+rݶ$#NV[8FTکF%dfTԮ.nAsysb.-LPе.j%./G㻏p:C$H2 D[ H $mnTBGHveAݝٚeL8R6sRJUUUmv UZUVTu@T*h۝ VUU J*YTmmeUNwAUJUP6gm  MUJd@ 6VUUP$ BP%VUUUVUFJmUmJ][ JQ*JmI**kh@UU T YUUUU BFmJmUU@Z@6Ѳ5JejP VUR& "P UUڙUBP V%eSw>N!ZPl l] UJUZ%UiUUjvP TUJ*UTd%&UX UTU@ UPUim)mIUZUeUV@ Y_W꟟ jI{yp(ϻ^N` I$z.ZI%فawT>>SoK*jvURUPJU*Hm=kf^WmFfUԭ9{w[mMe{w)dIkovw<*۶$W9mVs2[[ih;Ax~?>>0C4| #@P ;^`l`9=pl9<H  sem/data/Kmenta.txt.gz0000644000175000017500000000047714126411127014507 0ustar nileshnilesh5R9n1W0`\FR'L`6iY}}}}<{o۴ -mirIenxh ûavHp6H]^5QZKUh3GXKwԕ%.ޯLUC^{b&' l6&ě['&T-&^Ym w!{X[rgG9qcmTނ5"C=g[{[ szt~ZDsrm {hOkɶzv)L_|sem/data/Tests.rda0000644000175000017500000000053014126411127013670 0ustar nileshnileshBZh91AY&SYQwP8@/@@$ MG6ɠ44mU=SLC@ILM12h 2h#00daQS4mͣT61_u.(K&rJ SR -kTJ2 %!&-lL,Rш E,M[OYs RWUf> x#_RyYH %Q) AQS\jE)dBEdRBH !"H"-"aAKlk"EcR2ܡALҾ.߷]BCyEPsem/data/HS.data.rda0000644000175000017500000001656414126411127014026 0ustar nileshnilesh7zXZi"6!X'6])ThnRʠ3$,~KA/pXLxe?<6mMF~He Pk:&Oѥkw}DCX>dWw<UҤS1ONzY&}\,] &:&hYiQ`q<ۖ\۠pW Q!+<< Ǥ?p $\X+qrQclU D c`N8Uڱy(^kL!:İ\5l:^~I?Xǝw0Al!ge鵕7QVGUA^/Ma[տuw|K]ORfgWeVlDeQ, |%,"vĶ*P8|n83'v`[A:k-!Ջܘbs'<Q؜C"˙YnMA>{Ґ y^x e"7zo7@!9N Dyz9D7;5Q~2O=P?b e21;4~,JzJ: ԛ1K TkEER6`)+DǸ{|9G 8VydVd9V{PZH:&: m kBT-BːqS'ls2D`i"OEֱ̮26֗lf_*y=yy5s֘I _nK9+ҽĸ[;⿹ !w@ᱤ:˪;Fg²6@Dpv yauDd6ӝն?D 8q3>݋8X>R9Sj(+n'r+^l*jWSmTTnjw 6.4TW^*Z宖KRrR::)bS4U:&G<IFO h2dx_+6c `)uBl^:z;7g]fG5cxĖZey t~ l_f$ۊ oz¯xk23 f0x"*>3"ޮ29^*@ylyK:˙1ai}NXDIӯzߞcE'o }vJ)T $ Yg<;BA =龭׆Nܤ^7 `p)8G0M ww;'c.8ٮ3G YĐ=xDW]C5`;]5m?MP,g,G҉<0B2kqM a b t&AŷSX7h%J3`Jb&D²mΈՇD[R?1׏ Q֦yW L.W^jG:dI<:6J4_J'Ҭc6%&=(rLw&LTe҈`ur3-^p[m֎wc$aJ, Uj"2a'(џޑJg(Zh|޴C1Pdyzw.ӜTCDPKx_vC}N UQԺ|͋E=3 v>IF}o oǍ#18Dkl'ɘpxdN瘊X/S"fwm߬TҰ4&TBb)8Zi#Ea96# ٩p؇5}E;o:r\/}^Q.8t\G\Ak*>@qy`/REq'OK^ȭ h@;ܤ31=]M Tv"EiOLmP0Ox@i#CI>Z2qgJdD?z١nj%KܤA :dV >N0pUDN*g`0oɧq80dME0l榨4Pz`|YE3\Ɋpݼ&Ua l0$ic.Ԙm9r~u+;Ijڌe2Bo^ 0XZ^lg _~]fI,,7 XO#i 9)&PuIYʢ7 Mrݪ|Vrn"dj}~&l*<0 #pVWf]x\ې0l[Jm2c1fZ>oTY"{&]gGa9T跇!n<::P:}yf $45YgPe r/ҝb)0~rb;JiX 9> R c}lj\,]t¼J%b#i^EL4p=?zk!+#)y?M'#6j:wl a'`KT12Zr E{+6J-'Zoܓ>!`Hh {Z|Wi,qy)n'+uYip'<K(?g=84)(8Wzf= czAG&Yz7 Lv}Z KIy+͌VN4%/5=I_ˡ5,Z8ĎNX:SҌ? $;E HrA?E;eX7s9:i,V{(| )s')(w.0dQ9C,3K}N*m}rZ[ u?A>r`[Pz"*`ONo6!WFwЂV6% +5!hK?75lI2KoPڠ)K:?뭍CaHwnwcʭqNw&*$[qnEvnH-4z]E^mh6K,;;IgA̘[ !NڢrǘGćO.7w~c,xBAˉGQMdeC\|P~whDpؘf<D]I􇂥aPLUsܪiHk̦Ss͋;ڮq V j&-BjÑ-J09pA! G) ne +r'~_5 {KI|_TLSZK ] +t*zq ?w|Q]JWKM(paw3@5 ё~+j8= y`6dK'Xl !3ۅd}mTTK3fAhy*Ph[*"' X8wfa(g[",Dq"N^ݶMɬ]'Sw-'snA sѹA䟋#j˦X3dɫڮ n@ lz6c9{g\{{ :64vrDmG$1|rBYq9`O]U| P\Wc)Z(|8[s4>Y+"eSSLlp^ "|YsSN9 > w~WZUl_f[/Ӟ9М Ȃ,/2<юOȌY@g(Q&ʙwp͉)1"_OCĭD]6~'Yόx$[̡TSq>Ӛz>*bq1d2pjkKḌapNc/P!N:Pp\U l;Խ.0Uj÷扻@͋vNZ:5VmGB$G;eôes>W5'|_\(E KBsprx* bT|Xރk/lJt /i1zYư4YQyn&ޮi8POKd )bM%֖ `G3^R!X<^oRĕ*]ڷS!}@m^KGxn`ŵ`4<'lѝjbO턒$pH(G22Ӆќee]`+Ddqn שt.VW lG3`Pv5;axV^7$ןpZr@a{vkmAu" 'B ;QR96~DWp2{@f 33Y8e o1m %hb{N?/ˠ`Pwiԉx|bӾopO&ܳMv`p+e?P+aG%C&Fb h)pvXW Q~;^Nʂ59I*#uJXXA Yʆ^X^[)5zSk !8K, bC@&z #vo_ KC UV»zm@]2+h̢->-NiK}=Jvц7G$vvUdž/2c P̌|_(,T0Ú]{^g|UECYII᠀.m ^S[R\V pӝ+#f0sNmewv~;% RPLpE3FDG]ot*˿{0 іQ@kA@j7RX cWZ\_GZ,77Nc6tgX†Sڡ@mUZ-Į*9gw6b ߿gadi(wJLz)G Х)$|1`A)yݭ8XڀZ̹@R` +?bE\H$og 3!GP#ʃ[t~Sk'Ph卪IR O2G b}>/QQʖ݊IMwlM Os+~3~*?b6OeB:3MLe vyG2"]`(\~Rc=-av 4fl<@@sr B^G //h"qW΅7Xk(807g'Äŋf}综 2'ؠLv`9/KP7m V҃wcZz8 /4zdii70pe-vtW:">0 YZsem/data/Klein.txt.bz20000644000175000017500000000074714126411127014407 0ustar nileshnileshBZh91AY&SY_b<߀`@D"P@I Xh)ѧAjzd S HAHFLm+$Ez0q]1L_Wf7wFrhpa=COY M jΊ,S5QOs9 mU+h 2y,:w拂vOh֞Qn/2b j:潚W{Jh6"N' &&5ܸXeWrʻPRl(nZI/!Fxv[1"T:Rb;q a~~b[Bl/g٨aYM@1/АD'L}IjX 4"|aOqt=m67 Cv>%7G@^A& B1I0j dJ""ɰ98+% c+ 6i0(}@ֵki~.p!sem/NAMESPACE0000644000175000017500000001001214126352057012406 0ustar nileshnilesh# last modified 2020-06-02 by J. Fox useDynLib(sem) useDynLib(sem, csemSolve) import(stats) # importFrom(matrixcalc, duplication.matrix, vech) importFrom(MASS, ginv) importFrom(boot, boot.ci) importFrom(mi, mi, complete) #importFrom(DiagrammeR, DiagrammeR) importFrom(utils, combn, edit, setTxtProgressBar, txtProgressBar) export(ram, standardizedResiduals, normalizedResiduals, readMoments, startvalues, startvalues2, standardizedCoefficients, stdCoef, tsls, sem, modIndices, pathDiagram, specifyModel, specifyEquations, cfa, math, bootSem, rawMoments, cov2raw, fscores, AICc, CAIC, classifyVariables, combineModels, removeRedundantPaths, optimizerNlm, optimizerOptim, optimizerNlminb, objectiveML2, objectiveGLS2, objectiveFIML2, optimizerSem, objectiveML, objectiveGLS, objectiveFIML, multigroupModel, msemObjectiveML, msemObjectiveGLS, msemOptimizerNlm, # msemObjectiveFIML, msemObjectiveML2, optimizerMsem, miSem, # deprecated: standardized.residuals, normalized.residuals, read.moments, standardized.coefficients, std.coef, mod.indices, path.diagram, specify.model, boot.sem, raw.moments ) S3method(effects, sem) S3method(logLik, objectiveML) S3method(logLik, objectiveFIML) S3method(miSem, semmod) S3method(miSem, semmodList) S3method(bootSem, sem) S3method(bootSem, msem) S3method(residuals, sem) S3method(residuals, objectiveFIML) S3method(standardizedCoefficients, sem) S3method(standardizedResiduals, sem) S3method(standardizedResiduals, objectiveFIML) S3method(normalizedResiduals, objectiveML) S3method(normalizedResiduals, objectiveGLS) S3method(normalizedResiduals, objectiveFIML) S3method(tsls, default) S3method(tsls, formula) S3method(print, tsls) S3method(summary, tsls) S3method(print, summary.tsls) S3method(residuals, tsls) S3method(coef, tsls) S3method(coef, sem) S3method(fitted, tsls) S3method(anova, tsls) S3method(anova, objectiveML) S3method(anova, objectiveFIML) S3method(vcov, tsls) S3method(vcov, sem) S3method(sem, semmod) S3method(sem, default) S3method(summary, objectiveML) S3method(summary, objectiveGLS) S3method(summary, objectiveFIML) S3method(print, summary.objectiveML) S3method(summary, modIndices) S3method(summary, msemModIndices) S3method(print, modIndices) S3method(print, msemModIndices) S3method(modIndices, objectiveML) S3method(modIndices, msemObjectiveML) S3method(pathDiagram, sem) S3method(pathDiagram, semmod) S3method(print, semeffects) S3method(print, semmod) S3method(print, bootsem) S3method(print, objectiveML) S3method(print, objectiveGLS) S3method(print, objectiveFIML) S3method(summary, bootsem) S3method(print, summary.bootsem) S3method(print, miSem) S3method(summary, miSem) S3method(print, summary.miSem) S3method(rawMoments, formula) S3method(rawMoments, default) S3method(print, rawmoments) S3method(deviance, objectiveML) S3method(deviance, objectiveFIML) S3method(df.residual, sem) S3method(coef, sem) S3method(vcov, sem) S3method(fscores, sem) S3method(AIC, objectiveML) S3method(AIC, objectiveFIML) S3method(AICc, objectiveML) S3method(AICc, objectiveFIML) S3method(CAIC, objectiveML) S3method(CAIC, objectiveFIML) S3method(BIC, objectiveML) S3method(BIC, objectiveFIML) S3method(combineModels, semmod) S3method(update, semmod) S3method(edit, semmod) S3method(print, semmodList) S3method(sem, semmodList) S3method(sem, msemmod) S3method(print, msemObjectiveML) S3method(summary, msemObjectiveML) S3method(print, msemObjectiveGLS) S3method(summary, msemObjectiveGLS) S3method(print, msemObjectiveFIML) S3method(summary, msemObjectiveFIML) S3method(deviance, msemObjectiveML) S3method(AIC, msemObjectiveML) S3method(AICc, msemObjectiveML) S3method(BIC, msemObjectiveML) S3method(residuals, msem) S3method(coef, msem) S3method(vcov, msem) S3method(df.residual, msem) S3method(anova, msemObjectiveML) S3method(logLik, msemObjectiveML) S3method(effects, msem) S3method(print, semeffectsList) S3method(standardizedCoefficients, msem) S3method(standardizedResiduals, msem) S3method(normalizedResiduals, msemObjectiveML) S3method(fscores, msem) S3method(vcov, msem)