flexmix/ 0000755 0001762 0000144 00000000000 14760252622 011734 5 ustar ligges users flexmix/MD5 0000644 0001762 0000144 00000017043 14760252622 012251 0 ustar ligges users 18729aaf42b73f8ccbd8f9c26ded5bd6 *DESCRIPTION
c3c7eb6308b8b029dc5ebedd55ed8e6e *NAMESPACE
88a1e6330ac4ebbddc726522a46dfc34 *R/FLXMCdist1.R
0964dce38a7f3b491c9eef5ecedbcd68 *R/allClasses.R
ebb73a41e988babd784db3f85a6b966e *R/allGenerics.R
b03125231df0214b1395c7ef7f9bf25d *R/boot.R
889f52aae8df7342a12c941702964959 *R/concomitant.R
ffe49b2c72bb7df7681bfd5f450468de *R/condlogit.R
3495256b72a55590ac6bd775f3782028 *R/examples.R
3ebb141d9c261a101e884f66f81a9120 *R/factanal.R
c4cbbe2b096216ad85ac881bb13713a1 *R/flexmix.R
ac7d510575121174e050def18e1d8f68 *R/flexmixFix.R
3eba12e52c7117b1dc31709f780bace7 *R/flxdist.R
e46cc7864775903c177d0fbebf62562f *R/flxmcmvpois.R
5c565a257540fde3a2c29cde0181421b *R/flxmcsparse.R
94cecec609f15f14b8eed8002bb5b792 *R/glmFix.R
d49134e08be219e48ebaa680b76e647e *R/glmnet.R
c5c5f8a325c65778e15792e456459ff2 *R/group.R
f929b19e094c157d3556617cafd46a31 *R/infocrit.R
296476baa3f3e730cc906d1d1663c0a0 *R/initFlexmix.R
d509a6bcd55ec42162950011f0e3fedb *R/kldiv.R
2ad7b23c815889ec724736b3b5b0f017 *R/lattice.R
49e5b8eef79f6f8884e4e0323329b78f *R/lmer.R
5e9c1a083238f5e9310c257036db61cc *R/lmm.R
223499c1d0b23482e2f62049b4749e25 *R/lmmc.R
5a7ccfa40918a1075c31b79b9b713c64 *R/mgcv.R
2d40a708cbab9a061aa4f2ab7e8c978a *R/models.R
e248e582afbd8bebd1dd48d4292d5b73 *R/multcomp.R
a1543ac30b4c4faca4ebdd23e5fca5f3 *R/multinom.R
7a1d8c2b82999b1bd1e0419b00117498 *R/plot-FLXboot.R
621a422936c28bdebd8fbbe1d9fe7970 *R/plot-flexmix.R
4a0e5ec24c4b8b048afd7473bbbf0cb4 *R/plot-refit.R
2f3409e7007c2e61fc5ee28aaef440d5 *R/plot.R
ce8c2d2018c456648e26145ea1cbc038 *R/rFLXmodel.R
2dce928976d164087b04d9edba0771eb *R/refit.R
89f17e9ffcdd456af71b16580a64ab7c *R/relabel.R
942a774a6195a4cf5da06bfefc18adb6 *R/rflexmix.R
aee09f6209b75aaf4f4aaea863fb9352 *R/robust.R
6c4c0f1663db73d9c2a8fd48ab78eb2e *R/stepFlexmix.R
06a4c9417ad23fc25ba52d1be38dee42 *R/utils.R
90d0c43f34243c3adbb022664bd0ddf5 *R/z.R
16c7cb035f39de0d66d086ecdb22fbe5 *R/ziglm.R
edb1dac66e4cc84ef6813fe4767c8400 *build/vignette.rds
52f6bcc1bca4a592e3351b49431d9065 *data/BregFix.RData
47aa26e91f34698eeb226da6a4c04e35 *data/Mehta.RData
90b92021bfed03931e116746bd331d76 *data/NPreg.RData
33ab45d7bc0754237ecd01ce0254a604 *data/Nclus.RData
cce71227ec0d604bcb2c78fcb6249384 *data/NregFix.RData
b30604eae02b77396b87d0defb8d5c98 *data/betablocker.RData
45576be271f3423ba91c02ebfaca00b9 *data/bioChemists.RData
43669f632f40def80fbb61ca25c657fd *data/candy.RData
a8bd5b28da6a6a50d872c0ff711d70f5 *data/dmft.RData
6556322b93e01104ed3485ee02993d6d *data/fabricfault.RData
ca4de9fc56b9223d9612d44d3df1420e *data/patent.RData
f53cbfa94cbb22c8ba6a8c5d54895e88 *data/salmonellaTA98.txt.gz
aadfe3a142210960be9cfaa393a420ab *data/seizure.RData
309d62c6fbd66165dda517923e184109 *data/tribolium.RData
c444a25f85d0973c69e2063fa5349b7d *data/trypanosome.RData
84ee26fbac9ffa0d64a1ed89ad17f751 *data/whiskey.RData
165509cd86d080d103cd33fb52bef045 *inst/CITATION
bf41d9988531311a7ab94b51ca7abc60 *inst/NEWS.Rd
66abb9674dc55bc9dda69ea64ec37fcb *inst/doc/bootstrapping.R
e2057aa3aa3ee107d93fbe11a6f356d6 *inst/doc/bootstrapping.Rnw
4e0501e5f3494ae0046349f43538af71 *inst/doc/bootstrapping.pdf
6c97ed30becc0159537caec0aa6f7835 *inst/doc/flexmix-intro.R
c5a3e4b887e0caf2b8e1383071e699fd *inst/doc/flexmix-intro.Rnw
86cc4b985390ec32ae3d711a7a691534 *inst/doc/flexmix-intro.pdf
be0a42d594f1a80d36f502d9f5504e0f *inst/doc/mixture-regressions.R
c6745ee38eba2a694d6df5d497adb7b9 *inst/doc/mixture-regressions.Rnw
4ee2a7790a11e9c273c87448548a2ec6 *inst/doc/mixture-regressions.pdf
6dca73aa5a96343db67a43c9835bb4bf *inst/doc/myConcomitant.R
fb9ae81acbfcada421bbd89681834714 *inst/doc/mymclust.R
76f412c774347259ff904027864b759c *inst/doc/regression-examples.R
94b27d5378111289be0df2afaf4bdfab *inst/doc/regression-examples.Rnw
f2d8005a6a7bcdf293f249ea23150efa *inst/doc/regression-examples.pdf
76a28867c7670eb60951e3b1d2d77d33 *inst/doc/ziglm.R
1d39a7829ea8ce6aa54c11a0a06780eb *man/AIC-methods.Rd
00c18dd702febe081d951aa73ebb95d0 *man/BIC-methods.Rd
cad198606d9e736399f5c5fc540786a1 *man/BregFix.Rd
f66f6ead6d7d0350c6a722f53193094e *man/EIC.Rd
85d7f2bae2833443d209050759a2e525 *man/ExLinear.Rd
7ed1926717bdc73c8e6b117c5075eb0e *man/ExNPreg.Rd
cf4a61f3fbf44d92894525cbff4ef003 *man/ExNclus.Rd
78e72f66ed96403287e0eb63e33f4ab1 *man/FLXMCdist1.Rd
4c28bd39db43ab9080cbfb2b2d495054 *man/FLXMCfactanal.Rd
692b3facd60fa4125aff97003555a621 *man/FLXMCmvcombi.Rd
ba41986c3ff634c81b8f5b44955efe92 *man/FLXMCmvpois.Rd
421d04a7a1c7c967877e1ed3882965f1 *man/FLXMRcondlogit.Rd
4628f562cd45f409dd16ea2cc7d3eca4 *man/FLXMRglmnet.Rd
ec245ed6ad4ca2e7d3efee4256b0bdca *man/FLXMRlmer.Rd
9cad098f9190aacb6862d10a2118ffda *man/FLXMRlmmc.Rd
1f2a4972f51646f42278445d3ee41933 *man/FLXMRmgcv.Rd
d5fb9e73fdba6d6586e186d93a61a385 *man/FLXMRmultinom.Rd
9a268e3bbf0b5c8ee42fcfaacbdd793e *man/FLXMRrobglm.Rd
b2d3a3fdb9370376403175e23e8acea3 *man/FLXMRziglm.Rd
1be70d781f7affbf5b51663f75fa87e0 *man/FLXP-class.Rd
e5014d435eb288059e6daa8b9163d8e7 *man/FLXbclust.Rd
c87d4220e578ac71869feed74a3d7c47 *man/FLXcomponent-class.Rd
1348b9444773021d770f7463579961ff *man/FLXconcomitant.Rd
eff1590dbfbd4e3f1783fe61b20037d6 *man/FLXcontrol-class.Rd
f8a1b4148b65b193d78ca1159e888a0d *man/FLXdist-class.Rd
28a563506d19c0836af092f0c3f15775 *man/FLXdist.Rd
47b0ea1622e710755d45534208ad95ea *man/FLXfit.Rd
35115ac86c9cb6a8aa266a755f26cf16 *man/FLXglm.Rd
8403da6e3cb9d44b66a22e98ef944422 *man/FLXglmFix.Rd
24a39ce85331b9ddbb500d499932fee1 *man/FLXmclust.Rd
8a86639f3d09af06293225779bedfd05 *man/FLXmodel-class.Rd
afbfbeb61e91eda57fbaa568a39b38e2 *man/FLXnested-class.Rd
0768bed00f7a8554c33329522dfaa169 *man/ICL.Rd
028fc57ce85d724790cc7da759a24dd7 *man/KLdiv.Rd
f6736b5d249d5d684f1464180b8a3b59 *man/Lapply-methods.Rd
edab8de219b6630db7a71d22366f70d3 *man/Mehta.Rd
8f55051592bec6c88e23c7d81f57d38b *man/NregFix.Rd
fb40ad5bde9b66ddc38d3572ff7429cb *man/betablocker.Rd
d67b3b5b0eed42a593c0268b9e6aab21 *man/bioChemists.Rd
fdb60dd3f31d2281e7fa21e38a3c8b76 *man/boot.Rd
b4db12be29b542f9bea083d3bd0c980e *man/candy.Rd
5a69cda63c913ef33f104c6d5adb3729 *man/dmft.Rd
78db5196994b9a665e4c17fb6653c93f *man/fabricfault.Rd
cd0b0ac588e0bd163a930ddbd0d8ae4f *man/fitted.Rd
b53ba6892c3f8d98ea7ee048c854a47d *man/flexmix-class.Rd
022f7bae58eaccd9203737c47cf2473d *man/flexmix-internal.Rd
57ab6985b4c34528329c50c5bd67b30c *man/flexmix.Rd
06e1f559377487d225abdb6faf160481 *man/flxglht.Rd
83812639242cedc8bf5bc226f2747c88 *man/group.Rd
a187b88ae5b3121c29579aceef270951 *man/logLik-methods.Rd
2ac267bede3099e4807c4b3cc4ef2d86 *man/patent.Rd
3aae6929d9c9dbe26b4be487330edada *man/plot-methods.Rd
b3773bddc048e6fbe106eed18469702d *man/plotEll.Rd
2195e1c45f80a83a7b3baf8d449e5b05 *man/posterior.Rd
f8ae3162f10350e983e30a5926fa6025 *man/refit.Rd
d627472edfd9f5b2c3ec8e0f56296ae4 *man/relabel.Rd
cf67fee780ab0ea895b55f07989aa76c *man/rflexmix.Rd
2aa3384e40b6d464a252378e17f08b1c *man/salmonellaTA98.Rd
4d165b442083a5ba599fa8e1c5e5f28a *man/seizure.Rd
4d457ddbe589788191b3ee5021e0ac6d *man/stepFlexmix.Rd
168fda1db5cdaf8e9d0a5410c145e825 *man/tribolium.Rd
b5fad3c3973f6658af8b3663f20f4ac3 *man/trypanosome.Rd
ab4b6ba1cdfc93f1639c53cc091ed513 *man/whiskey.Rd
e2057aa3aa3ee107d93fbe11a6f356d6 *vignettes/bootstrapping.Rnw
c5a3e4b887e0caf2b8e1383071e699fd *vignettes/flexmix-intro.Rnw
b7d9b60485cfd57d1bfc13e98ff25f48 *vignettes/flexmix.bib
0a928bdb28d680f31098902b157e9204 *vignettes/flexmix.png
c6745ee38eba2a694d6df5d497adb7b9 *vignettes/mixture-regressions.Rnw
5a4252ff70a91d81feef9be2ff79c5bf *vignettes/mixture.bib
6dca73aa5a96343db67a43c9835bb4bf *vignettes/myConcomitant.R
fb9ae81acbfcada421bbd89681834714 *vignettes/mymclust.R
94b27d5378111289be0df2afaf4bdfab *vignettes/regression-examples.Rnw
76a28867c7670eb60951e3b1d2d77d33 *vignettes/ziglm.R
flexmix/R/ 0000755 0001762 0000144 00000000000 14757625373 012152 5 ustar ligges users flexmix/R/z.R 0000644 0001762 0000144 00000000631 14404637304 012530 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: z.R 5079 2016-01-31 12:21:12Z gruen $
#
###**********************************************************
## Backward compatibility
## component model driver
FLXglm <- FLXMRglm
FLXglmFix <- FLXMRglmfix
FLXmclust <- FLXMCmvnorm
FLXbclust <- FLXMCmvbinary
## concomitant model driver
FLXmultinom <- FLXPmultinom
FLXconstant <- FLXPconstant
flexmix/R/plot.R 0000644 0001762 0000144 00000006565 14404637304 013251 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: plot.R 5079 2016-01-31 12:21:12Z gruen $
#
###**********************************************************
plotEll <- function(object, data, which=1:2,
model = 1,
project=NULL, points=TRUE,
eqscale=TRUE, col=NULL,
number = TRUE, cex=1.5, numcol="black",
pch=NULL, ...)
{
if(is.null(col)) col <- rep(FullColors, length.out = object@k)
if (!is.list(data)) {
response <- data
data <- list()
data[[deparse(object@model[[model]]@fullformula[[2]])]] <- response
}
else {
mf <- model.frame(object@model[[model]]@fullformula, data=data, na.action = NULL)
response <- as.matrix(model.response(mf))
response <- object@model[[model]]@preproc.y(response)
}
clustering <- clusters(object, newdata = data)
if(!is.null(project))
response <- predict(project, response)
type=ifelse(points, "p", "n")
if(is.null(pch)){
pch <- (clustering %% 10)
pch[pch==0] <- 10
}
else if(length(pch)!=nrow(response)){
pch <- rep(pch, length.out = object@k)
pch <- pch[clustering]
}
if(eqscale)
plot(response[,which], asp = 1, col=col[clustering],
pch=pch, type=type, ...)
else
plot(response[,which], col=col[clustering],
pch=pch, type=type, ...)
for(k in seq_along(object@components)){
p = parameters(object, k, model, simplify=FALSE)
if(!is.null(project)){
p <- projCentCov(project, p)
}
lines(ellipse::ellipse(p$cov[which,which],
centre=p$center[which], level=0.5),
col=col[k], lwd=2)
lines(ellipse::ellipse(p$cov[which,which],
centre=p$center[which], level=0.95),
col=col[k], lty=2)
}
## und nochmal fuer die zentren und nummern (damit die immer oben sind)
for(k in seq_along(object@components)){
p = parameters(object, k, model, simplify=FALSE)
if(!is.null(project)){
p <- projCentCov(project, p)
}
if(number){
rad <- ceiling(log10(object@k)) + 1.5
points(p$center[which[1]],
p$center[which[2]],
col=col[k], pch=21, cex=rad*cex, lwd=cex,
bg="white")
text(p$center[which[1]],
p$center[which[2]], k, cex=cex, col=numcol)
}
else{
points(p$center[which[1]],
p$center[which[2]],
pch=16, cex=cex, col=col[k])
}
}
}
projCentCov <- function(object, p) UseMethod("projCentCov")
projCentCov.default <- function(object, p)
stop(paste("Cannot handle projection objects of class",
sQuote(class(object))))
projCentCov.prcomp <- function(object, p)
{
cent <- matrix(p$center, ncol=length(p$center))
cent <- scale(cent, object$center, object$scale) %*% object$rotation
cov <- p$cov
if(length(object$scale)>1)
cov <- cov/outer(object$scale, object$scale, "*")
cov <- t(object$rotation) %*% cov %*% object$rotation
list(center=cent, cov=cov)
}
flexmix/R/condlogit.R 0000644 0001762 0000144 00000005532 14404637304 014246 0 ustar ligges users setClass("FLXMRcondlogit",
representation(strata="ANY",
strata_formula="ANY"),
contains = "FLXMRglm")
FLXMRcondlogit <- function(formula=.~., strata) {
z <- new("FLXMRcondlogit", weighted=TRUE, formula=formula, strata_formula=strata,
family="multinomial", name=paste("FLXMRcondlogit"))
z@defineComponent <- function(para) {
predict <- function(x, ...)
tcrossprod(x, t(para$coef))
logLik <- function(x, y, strata) {
llh_all <- vector("numeric", length = length(y))
eta <- predict(x)
llh_all[as.logical(y)] <- eta[as.logical(y)]
((tapply(llh_all, strata, sum) - tapply(exp(eta), strata, function(z) log(sum(z))))/tabulate(strata))[strata]
}
new("FLXcomponent",
parameters=list(coef=para$coef),
logLik=logLik, predict=predict,
df=para$df)
}
z@fit <- function(x, y, w, strata){
index <- w > 0
fit <- survival::coxph.fit(x[index,,drop=FALSE], survival::Surv(1-y, y)[index], strata[index], weights=w[index], control = survival::coxph.control(),
method = "exact", rownames = seq_len(nrow(y))[index])
coef <- coef(fit)
df <- length(coef)
z@defineComponent(list(coef = coef, df = df))
}
z
}
setMethod("FLXgetModelmatrix", signature(model="FLXMRcondlogit"),
function(model, data, formula, lhs=TRUE, ...)
{
formula <- RemoveGrouping(formula)
if(is.null(model@formula))
model@formula = formula
model@fullformula = update(terms(formula, data=data), model@formula)
## Ensure that an intercept is included
model@fullformula <- update(model@fullformula, ~ . + 1)
if (lhs) {
mf <- model.frame(model@fullformula, data=data, na.action = NULL)
model@x <- model.matrix(attr(mf, "terms"), data=mf)
response <- as.matrix(model.response(mf))
model@y <- model@preproc.y(response)
}
else {
mt1 <- terms(model@fullformula, data=data)
mf <- model.frame(delete.response(mt1), data=data, na.action = NULL)
mt <- attr(mf, "terms")
model@x <- model.matrix(mt, data=mf)
}
strata <- update(model@strata_formula, ~ . + 0)
mf <- model.frame(strata, data=data, na.action=NULL)
model@strata <- as.integer(model.matrix(attr(mf, "terms"), data=mf))
## Omit the intercept for identifiability
model@x <- model@x[,attr(model@x, "assign") != 0, drop=FALSE]
model@x <- model@preproc.x(model@x)
model
})
setMethod("FLXmstep", signature(model = "FLXMRcondlogit"), function(model, weights, ...) {
apply(weights, 2, function(w) model@fit(model@x, model@y, w, model@strata))
})
setMethod("FLXdeterminePostunscaled", signature(model = "FLXMRcondlogit"), function(model, components, ...) {
sapply(components, function(x) x@logLik(model@x, model@y, model@strata))
})
setMethod("existGradient", signature(object = "FLXMRcondlogit"),
function(object) FALSE)
flexmix/R/lmm.R 0000644 0001762 0000144 00000024132 14404637304 013046 0 ustar ligges users setClass("FLXcomponentlmm",
representation(random="list"),
contains = "FLXcomponent")
setClass("FLXMRlmm",
representation(family = "character",
random = "formula",
group = "factor",
z = "matrix",
which = "ANY"),
contains = "FLXMR")
setClass("FLXMRlmmfix",
contains = "FLXMRlmm")
setMethod("allweighted", signature(model = "FLXMRlmm", control = "ANY", weights = "ANY"), function(model, control, weights) {
if (!control@classify %in% c("auto", "weighted"))
stop("Model class only supports weighted ML estimation.")
model@weighted
})
FLXMRlmm <- function(formula = . ~ ., random, lm.fit = c("lm.wfit", "smooth.spline"),
varFix = c(Random = FALSE, Residual = FALSE), ...)
{
family <- "gaussian"
lm.fit <- match.arg(lm.fit)
if (length(varFix) != 2 || is.null(names(varFix)) || any(is.na(pmatch(names(varFix), c("Random", "Residual")))))
stop("varFix has to be a named vector of length two")
else names(varFix) <- c("Random", "Residual")[pmatch(names(varFix), c("Random", "Residual"))]
random <- if (length(random) == 3) random else formula(paste(".", paste(deparse(random), collapse = "")))
object <- new("FLXMRlmm", formula = formula, random = random,
weighted = TRUE, family = family, name = "FLXMRlmm:gaussian")
if (any(varFix)) object <- new("FLXMRlmmfix", object)
object@preproc.y <- function(x){
if (ncol(x) > 1)
stop(paste("y must be univariate"))
x
}
if (lm.fit == "smooth.spline") {
object@preproc.x <- function(x){
if (ncol(x) > 1)
stop(paste("x must be univariate"))
x
}
}
add <- function(x) Reduce("+", x)
lmm.wfit <- function(x, y, w, z, which, random) {
effect <- lapply(seq_along(which), function(i) z[[which[i]]] %*% random$beta[[i]])
W <- rep(w, sapply(x, nrow))
X <- do.call("rbind", x)
Y <- do.call("rbind", y)
Effect <- do.call("rbind", effect)
fit <- get(lm.fit)(X, Y - Effect, W, ...)
XSigmaX <- sapply(seq_along(z), function(i) sum(diag(crossprod(z[[i]]) %*% random$Sigma[[i]])))
wSum <- tapply(w, which, sum)
sigma2 <- (sum(W*residuals(fit)^2) + sum(wSum*XSigmaX))/sum(W)
wSigma <- add(lapply(seq_along(z), function(i) wSum[i]*random$Sigma[[i]]))
bb <- add(lapply(seq_along(which), function(i) tcrossprod(random$beta[[i]])*w[i]))
psi <- (wSigma + bb)/sum(w)
list(coefficients = if (is(fit, "smooth.spline")) fit$fit else coef(fit),
sigma2 = list(Random = psi,
Residual = sigma2),
df = if (is(fit, "smooth.spline")) fit$df else ncol(x[[1]]))
}
object@defineComponent <- function(para) {
predict <- function(x, ...) {
if (is(para$coef, "smooth.spline.fit")) lapply(x, function(X) getS3method("predict", "smooth.spline.fit")(para$coef, X)$y)
else lapply(x, function(X) X %*% para$coef)
}
logLik <- function(x, y, z, which, group, ...) {
V <- lapply(z, function(Z) tcrossprod(tcrossprod(Z, para$sigma2$Random), Z) + diag(nrow(Z)) * para$sigma2$Residual)
mu <- predict(x, ...)
llh <- sapply(seq_along(x), function(i)
mvtnorm::dmvnorm(t(y[[i]]), mean = mu[[i]], sigma = V[[which[i]]], log=TRUE)/nrow(V[[which[i]]]))
as.vector(ungroupPriors(matrix(llh), group, !duplicated(group)))
}
new("FLXcomponentlmm",
parameters = list(coef = para$coef, sigma2 = para$sigma2),
random = list(),
logLik = logLik, predict = predict,
df = para$df)
}
determineRandom <- function(mu, y, z, which, sigma2) {
Sigma <- lapply(z, function(Z)
solve(crossprod(Z) / sigma2$Residual + solve(sigma2$Random)))
Sigma_tilde <- lapply(seq_along(z), function(i) (tcrossprod(Sigma[[i]], z[[i]])/sigma2$Residual))
beta <- lapply(seq_along(which), function(i) Sigma_tilde[[which[i]]] %*% (y[[i]] - mu[[i]]))
list(beta = beta, Sigma = Sigma)
}
object@fit <- if (any(varFix)) {
function(x, y, w, z, which, random) {
fit <- lapply(seq_len(ncol(w)), function(k) lmm.wfit(x, y, w[,k], z, which, random[[k]]))
if (varFix["Random"]) {
prior_w <- apply(w, 2, weighted.mean, w = sapply(x, length))
Random <- add(lapply(seq_along(fit), function(i) fit[[i]]$sigma2$Random * prior_w[i]))
for (i in seq_along(fit)) fit[[i]]$sigma2$Random <- Random
}
if (varFix["Residual"]) {
prior <- colMeans(w)
Residual <- sum(sapply(fit, function(x) x$sigma2$Residual) * prior)
for (i in seq_along(fit)) fit[[i]]$sigma2$Residual <- Residual
}
n <- nrow(fit[[1]]$sigma2$Random)
lapply(fit, function(Z) {
comp <- object@defineComponent(list(coef = coef(Z),
sigma2 = Z$sigma2,
df = Z$df + n*(n+1)/(2*ifelse(varFix["Random"], ncol(w), 1)) + ifelse(varFix["Residual"], 1/ncol(w), 1)))
comp@random <- determineRandom(comp@predict(x), y, z, which, comp@parameters$sigma2)
comp
})
}
} else {
function(x, y, w, z, which, random){
fit <- lmm.wfit(x, y, w, z, which, random)
n <- nrow(fit$sigma2$Random)
comp <- object@defineComponent(
list(coef = coef(fit),
df = fit$df + n*(n+1)/2 + 1,
sigma2 = fit$sigma2))
comp@random <- determineRandom(comp@predict(x), y, z, which, comp@parameters$sigma2)
comp
}
}
object
}
setMethod("FLXmstep", signature(model = "FLXMRlmm"),
function(model, weights, components)
{
weights <- weights[!duplicated(model@group),,drop=FALSE]
if (!is(components[[1]], "FLXcomponentlmm")) {
random <- list(beta = lapply(model@which, function(i) rep(0, ncol(model@z[[i]]))),
Sigma = lapply(model@z, function(x) diag(ncol(x))))
return(sapply(seq_len(ncol(weights)),
function(k) model@fit(model@x, model@y, weights[,k], model@z, model@which, random)))
}else {
return(sapply(seq_len(ncol(weights)),
function(k) model@fit(model@x, model@y, weights[,k], model@z, model@which,
components[[k]]@random)))
}
})
setMethod("FLXmstep", signature(model = "FLXMRlmmfix"),
function(model, weights, components)
{
weights <- weights[!duplicated(model@group),,drop=FALSE]
if (!is(components[[1]], "FLXcomponentlmm")) {
random <- rep(list(list(beta = lapply(model@which, function(i) rep(0, ncol(model@z[[i]]))),
Sigma = lapply(model@z, function(x) diag(ncol(x))))), ncol(weights))
return(model@fit(model@x, model@y, weights, model@z, model@which, random))
}else
return(model@fit(model@x, model@y, weights, model@z, model@which, lapply(components, function(x) x@random)))
})
setMethod("FLXgetModelmatrix", signature(model="FLXMRlmm"),
function(model, data, formula, lhs=TRUE, ...)
{
formula_nogrouping <- RemoveGrouping(formula)
if (identical(paste(deparse(formula_nogrouping), collapse = ""), paste(deparse(formula), collapse = ""))) stop("please specify a grouping variable")
model <- callNextMethod(model, data, formula, lhs)
model@fullformula <- update(model@fullformula,
paste(".~. |", .FLXgetGroupingVar(formula)))
mt1 <- terms(model@random, data=data)
mf <- model.frame(delete.response(mt1), data=data, na.action = NULL)
model@z <- model.matrix(attr(mf, "terms"), data)
model@group <- grouping <- .FLXgetGrouping(formula, data)$group
rownames(model@z) <- rownames(model@x) <- rownames(model@y) <- NULL
model@x <- matrix(lapply(unique(grouping), function(g) model@x[grouping == g, , drop = FALSE]), ncol = 1)
if (lhs) model@y <- matrix(lapply(unique(grouping), function(g) model@y[grouping == g, , drop = FALSE]), ncol = 1)
z <- lapply(unique(grouping), function(g) model@z[grouping == g, , drop = FALSE])
z1 <- unique(z)
if (length(z) == length(z1)) {
model@which <- seq_along(z)
} else {
model@which <- sapply(z, function(y) which(sapply(z1, function(x) isTRUE(all.equal(x, y)))))
}
model@z <- matrix(z1, ncol = 1)
model
})
setMethod("FLXgetObs", "FLXMRlmm", function(model) sum(sapply(model@x, nrow)))
setMethod("FLXdeterminePostunscaled", signature(model = "FLXMRlmm"), function(model, components, ...) {
sapply(components, function(x) x@logLik(model@x, model@y, model@z, model@which, model@group))
})
setMethod("predict", signature(object="FLXMRlmm"), function(object, newdata, components, ...)
{
object <- FLXgetModelmatrix(object, newdata, formula = object@fullformula, lhs = FALSE)
lapply(components, function(comp) unlist(comp@predict(object@x, ...)))
})
setMethod("rFLXM", signature(model="FLXMRlmm", components="list"),
function(model, components, class, group, ...) {
class <- class[!duplicated(group)]
y <- NULL
for (l in seq_along(components)) {
yl <- as.matrix(rFLXM(model, components[[l]], ...))
if (is.null(y)) y <- matrix(NA, nrow = length(class), ncol = ncol(yl))
y[class == l,] <- yl[class==l,,drop=FALSE]
y <- matrix(y, ncol = ncol(yl))
}
y
})
setMethod("rFLXM", signature(model = "FLXMRlmm", components = "FLXcomponent"),
function(model, components, ...) {
sigma2 <- components@parameters$sigma2
V <- lapply(model@z, function(Z) tcrossprod(tcrossprod(Z, sigma2$Random), Z) + diag(nrow(Z)) * sigma2$Residual)
mu <- components@predict(model@x)
matrix(lapply(seq_along(model@x), function(i)
t(mvtnorm::rmvnorm(1, mean = mu[[i]], sigma = V[[model@which[i]]]))), ncol = 1)
})
setMethod("FLXgetNewModelmatrix", "FLXMRlmm", function(object, model, indices, groups) {
object@y <- model@y[indices,,drop=FALSE]
object@x <- model@x[indices,,drop=FALSE]
object@which <- model@which[indices]
if (length(unique(object@which)) < length(object@z)) {
object@z <- model@z[sort(unique(object@which)),,drop=FALSE]
object@which <- match(object@which, sort(unique(object@which)))
}
object
})
flexmix/R/mgcv.R 0000644 0001762 0000144 00000013731 14404637304 013220 0 ustar ligges users setOldClass("gam.prefit")
setClassUnion("listOrgam.prefit", c("list", "gam.prefit"))
setClass("FLXMRmgcv",
representation(G = "listOrgam.prefit",
control = "list"),
contains="FLXMRglm")
FLXMRmgcv <- function(formula = .~., family = c("gaussian", "binomial", "poisson"),
offset = NULL, control = NULL, optimizer = c("outer", "newton"),
in.out = NULL, eps = .Machine$double.eps, ...)
{
if (is.null(control)) control <- mgcv::gam.control()
family <- match.arg(family)
am <- if (family == "gaussian" && get(family)()$link == "identity") TRUE else FALSE
z <- new("FLXMRmgcv", FLXMRglm(formula = formula, family = family, offset = offset),
name=paste("FLXMRmgcv", family, sep=":"), control = control)
scale <- if (family %in% c("binomial", "poisson")) 1 else -1
gam_fit <- function(G, w) {
G$family <- get(family)()
G$am <- am
G$w <- w
G$conv.tol <- control$mgcv.tol
G$max.half <- control$mgcv.half
zero_weights <- any(w < eps)
if (zero_weights) {
ok <- w >= eps
w <- w[ok]
G$X <- G$X[ok,,drop=FALSE]
if (is.matrix(G$y)) G$y <- G$y[ok,,drop=FALSE] else G$y <- G$y[ok]
G$mf <- G$mf[ok,,drop=FALSE]
G$w <- G$w[ok]
G$offset <- G$offset[ok]
if (G$n.paraPen > 0) {
OMIT <- which(colSums(abs(G$X)) == 0)
if (length(OMIT) > 0) {
Ncol <- ncol(G$X)
Assign <- unique(G$assign[OMIT])
G$assign <- G$assign[-OMIT]
G$nsdf <- G$nsdf - length(OMIT)
G$X <- G$X[,-OMIT,drop=FALSE]
G$mf$Grouping <- G$mf$Grouping[,-which(colSums(abs(G$mf$Grouping))==0),drop=FALSE]
if (length(G$off) > 1) G$off[2] <- G$off[2] - length(OMIT)
for (i in seq_along(G$smooth)) {
G$smooth[[i]]$first.para <- G$smooth[[i]]$first.para - length(OMIT)
G$smooth[[i]]$last.para <- G$smooth[[i]]$last.para - length(OMIT)
}
G$S[[1]] <- G$S[[1]][-c(OMIT-sum(G$assign != Assign)),
-c(OMIT-sum(G$assign != Assign))]
}
}
}
z <- mgcv::gam(G = G, method = "ML", optimizer = optimizer, control = control, scale = scale,
in.out = in.out, ...)
if (zero_weights) {
residuals <- z$residuals
z$residuals <- rep(0, length(ok))
z$residuals[ok] <- residuals
if (G$n.paraPen > 0 && length(OMIT) > 0) {
coefficients <- z$coefficients
z$coefficients <- rep(0, Ncol)
z$coefficients[-OMIT] <- coefficients
}
}
z
}
if (family=="gaussian"){
z@fit <- function(x, y, w, G){
gam.fit <- gam_fit(G, w)
z@defineComponent(list(coef = gam.fit$coefficients, df = sum(gam.fit$edf)+1,
sigma = sqrt(sum(w * gam.fit$residuals^2 /
mean(w))/ (nrow(x)-sum(gam.fit$edf)))))
}
}
else if(family %in% c("binomial", "poisson")){
z@fit <- function(x, y, w, G){
gam.fit <- gam_fit(G, w)
z@defineComponent(
list(coef = gam.fit$coefficients, df = sum(gam.fit$edf)))
}
}
else stop(paste("Unknown family", family))
z
}
setMethod("FLXmstep", signature(model = "FLXMRmgcv"), function(model, weights, ...)
{
apply(weights, 2, function(w) model@fit(model@x, model@y, w, model@G))
})
setMethod("FLXgetModelmatrix", signature(model="FLXMRmgcv"), function(model, data, formula, lhs=TRUE,
paraPen = list(), ...)
{
formula <- RemoveGrouping(formula)
if (length(grep("\\|", deparse(model@formula)))) stop("no grouping variable allowed in the model")
if(is.null(model@formula))
model@formula <- formula
model@fullformula <- update(terms(formula, data=data), model@formula)
gp <- mgcv::interpret.gam(model@fullformula)
if (lhs) {
model@terms <- terms(gp$fake.formula, data = data)
mf <- model.frame(model@terms, data=data, na.action = NULL, drop.unused.levels = TRUE)
response <- as.matrix(model.response(mf, "numeric"))
model@y <- model@preproc.y(response)
}
else {
model@terms <- terms(gp$fake.formula, data = data)
mf <- model.frame(delete.response(model@terms), data=data, na.action = NULL, drop.unused.levels = TRUE)
}
model@G <- mgcv::gam(model@fullformula, data = data, fit = FALSE)
model@x <- model@G$X
model@contrasts <- attr(model@x, "contrasts")
model@x <- model@preproc.x(model@x)
model@xlevels <- .getXlevels(delete.response(model@terms), mf)
model
})
setMethod("predict", signature(object="FLXMRmgcv"), function(object, newdata, components, ...)
{
predict_gam <- function (object, newdata, ...) {
nn <- names(newdata)
mn <- colnames(object$model)
for (i in 1:length(newdata)) if (nn[i] %in% mn && is.factor(object$model[, nn[i]])) {
newdata[[i]] <- factor(newdata[[i]], levels = levels(object$model[, nn[i]]))
}
if (length(newdata) == 1)
newdata[[2]] <- newdata[[1]]
n.smooth <- length(object$smooth)
Terms <- delete.response(object$pterms)
X <- matrix(0, nrow(newdata), length(object$coefficients))
Xoff <- matrix(0, nrow(newdata), n.smooth)
mf <- model.frame(Terms, newdata, xlev = object$xlevels)
if (!is.null(cl <- attr(object$pterms, "dataClasses")))
.checkMFClasses(cl, mf)
Xp <- model.matrix(Terms, mf, contrasts = object$contrasts)
if (object$nsdf)
X[, 1:object$nsdf] <- Xp
if (n.smooth)
for (k in 1:n.smooth) {
Xfrag <- mgcv::PredictMat(object$smooth[[k]], newdata)
X[, object$smooth[[k]]$first.para:object$smooth[[k]]$last.para] <- Xfrag
Xfrag.off <- attr(Xfrag, "offset")
if (!is.null(Xfrag.off)) {
Xoff[, k] <- Xfrag.off
}
}
X
}
object@G$model <- object@G$mf
z <- list()
for(k in seq_along(components)) {
object@G$coefficients <- components[[k]]@parameters$coef
X <- predict_gam(object@G, newdata)
z[[k]] <- components[[k]]@predict(X, ...)
}
z
})
flexmix/R/initFlexmix.R 0000644 0001762 0000144 00000003556 14404637304 014570 0 ustar ligges users setClass("initMethod",
representation(step1 = "FLXcontrol",
step2 = "FLXcontrol"))
initMethod <- function(name = c("tol.em", "cem.em", "sem.em"),
step1 = list(tolerance = 10^-2), step2 = list(), control = list(), nrep = 3L) {
name <- match.arg(name)
z <- new("initMethod",
step1 = as(c(step1, control), "FLXcontrol"),
step2 = as(c(step2, control), "FLXcontrol"))
z@step1@nrep <- as.integer(nrep)
z@step2@nrep <- 1L
z@step1@classify <- switch(name,
cem.em = "CEM",
sem.em = "SEM",
tol.em = "weighted")
z
}
initFlexmix <- function(..., k, init = list(), control = list(), nrep = 3L, verbose = TRUE, drop = TRUE, unique = FALSE)
{
MYCALL <- match.call()
if (missing(k)) stop("'k' is missing.")
if (!missing(control) & is(init, "initMethod")) warning("'control' argument ignored.")
init <- do.call("initMethod", c(init, list(control = control, nrep = nrep)))
MYCALL1 <- lapply(k, function(K) {
MYCALL[["k"]] <- as.numeric(K)
MYCALL
})
names(MYCALL1) <- paste(k)
STEP1 <- stepFlexmix(..., k = k, verbose = verbose, drop = FALSE, unique = FALSE,
nrep = init@step1@nrep, control = init@step1)
models <- lapply(k, function(K) {
if (length(k) > 1 && verbose) cat("* ")
new("flexmix",
flexmix(..., control = init@step2,
cluster = posterior(getModel(STEP1, paste(K)))),
k0 = as.integer(K), call = MYCALL1[[paste(K)]])
})
if (length(k) > 1 && verbose) cat("\n")
names(models) <- paste(k)
if (drop & length(models) == 1) {
return(models[[1]])
} else {
z <- new("stepFlexmix", models = models, k = as.integer(k), logLiks = STEP1@logLiks, nrep = STEP1@nrep, call = MYCALL)
if (unique)
z <- unique(z)
return(z)
}
}
flexmix/R/factanal.R 0000644 0001762 0000144 00000003452 14404637304 014034 0 ustar ligges users setClass("FLXMCfactanal",
contains = "FLXMC")
###**********************************************************
FLXMCfactanal <- function(formula=.~., factors = 1, ...)
{
z <- new("FLXMCfactanal", weighted=TRUE, formula=formula,
dist = "mvnorm", name="mixtures of factor analyzers")
z@fit <- function(x, y, w, ...){
cov.weighted <- cov.wt(y, wt = w)[c("center","cov")]
cov <- cov.weighted$cov; center <- cov.weighted$center
fa <- factanal(covmat = cov, factors = factors, ...)
Sigma <- fa$loadings %*% t(fa$loadings) + diag(fa$uniquenesses)
df <- (factors + 2) * ncol(y)
predict <- function(x)
matrix(center, nrow=nrow(x), ncol=length(center),
byrow=TRUE)
logLik <- function(x, y){
sds <- sqrt(diag(cov))
mvtnorm::dmvnorm(y, mean = center, sigma = Sigma * (sds %o% sds), log = TRUE)
}
new("FLXcomponent", parameters=list(mu = center,
variance = diag(cov),
loadings = fa$loadings,
uniquenesses = fa$uniquenesses),
df=df, logLik=logLik, predict=predict)
}
z
}
###**********************************************************
setMethod("rFLXM", signature(model = "FLXMCfactanal", components = "FLXcomponent"),
function(model, components, class, ...) {
FUN <- paste("r", model@dist, sep = "")
Sigma <- components@parameters$loadings %*% t(components@parameters$loadings) + diag(components@parameters$uniquenesses)
sds <- sqrt(components@parameters$variance)
args <- list(n = nrow(model@x), mean = components@parameters$mu,
sigma = Sigma * (sds %o% sds))
return(do.call(FUN, args))
})
flexmix/R/robust.R 0000644 0001762 0000144 00000004217 14404637304 013601 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: robust.R 5079 2016-01-31 12:21:12Z gruen $
#
###*********************************************************
setClass("FLXMRrobglm",
representation(bgw="logical"),
prototype(bgw=FALSE),
contains = "FLXMRglm")
FLXMRrobglm <- function(formula = . ~ .,
family=c("gaussian", "poisson"),
bgw=FALSE, ...)
{
family <- match.arg(family)
new("FLXMRrobglm", FLXMRglm(formula, family, ...),
name = paste("FLXMRrobglm", family, sep=":"),
bgw = bgw)
}
setMethod("FLXgetModelmatrix", signature(model="FLXMRrobglm"),
function(model, data, formula, lhs=TRUE, ...)
{
model <- callNextMethod(model, data, formula, lhs)
if (attr(terms(model@fullformula), "intercept")==0)
stop("please include an intercept")
new("FLXMRrobglm", model)
})
setMethod("FLXremoveComponent", signature(model = "FLXMRrobglm"),
function(model, nok, ...)
{
if (1 %in% nok) model <- as(model, "FLXMRglm")
model
})
setMethod("FLXmstep", signature(model = "FLXMRrobglm"),
function(model, weights, ...)
{
if(model@bgw){
w <- weights[,1]
}
else{
w <- rep(1, nrow(weights))
}
if(model@family=="gaussian")
{
cwt <- cov.wt(model@y, w)
coef <- c(cwt$center, rep(0, ncol(model@x)-1))
names(coef) <- colnames(model@x)
comp.1 <- model@defineComponent(list(coef = coef, df = 0, offset = NULL,
sigma=sqrt(cwt$cov),
family = model@family))
}
else if(model@family=="poisson")
{
cwt <- cov.wt(model@y, w)
coef <- c(log(3*cwt$center), rep(0, ncol(model@x)-1))
names(coef) <- colnames(model@x)
comp.1 <- model@defineComponent(list(coef = coef, df = 0, offset = NULL,
family = model@family))
}
else{
stop("Other families not implemented yet!")
}
c(list(comp.1), FLXmstep(as(model, "FLXMRglm"),
weights[, -1, drop=FALSE], ...))
})
flexmix/R/glmnet.R 0000644 0001762 0000144 00000007241 14404637304 013551 0 ustar ligges users #' @title flexmix model driver for adaptive lasso (elastic-net) with GLMs
#' @author F. Mortier (fmortier@cirad.fr) and N. Picard (nicolas.picard@cirad.fr)
#' @param formula A symbolic description of the model to be fit.
#' The general form is y~x|g where y is the response, x the set of predictors and g an
#' optional grouping factor for repeated measurements.
#' @param family a description of the error distribution and link function to be used in the model.
#' "gausian", "poisson" and "binomial" are allowed.
#' @param adaptive boolean indicating if algorithm should perform adaptive lasso or not
#' @param select boolean vector indicating which covariates will be included in the selection process.
#' Others will be included in the model.
#' @details Some care is needed to ensure convergence of the
#' algorithm, which is computationally more challenging than a standard EM.
#' In the proposed method, not only are cluster allocations identified
#' and component parameters estimated as commonly done in mixture models,
#' but there is also variable selection via penalized regression using
#' $k$-fold cross-validation to choose the penalty parameter.
#' For the algorithm to converge, it is necessary that the same cross-validation
#' partitioning be used across the EM iterations, i.e.,
#' the subsamples for cross-validation must be defined at the beginning
#' This is accomplished using the {\tt foldid} option
#' as an additional parameter to be passed to \code{\link{cv.glmnet}} (see \link{glmnet} package documentation).
FLXMRglmnet <-
function(formula = .~., family = c("gaussian", "binomial", "poisson"), adaptive = TRUE, select = TRUE, offset = NULL, ...) {
family <- match.arg(family)
z <- FLXMRglm(formula = formula, family = family)
z@preproc.x <- function(x) {
if (!isTRUE(all.equal(x[, 1], rep(1, nrow(x)), check.attributes = FALSE)))
stop("The model needs to include an intercept in the first column.")
x
}
z@fit <- function(x, y, w) {
if (all(!select)) {
coef <- if (family == "gaussian")
lm.wfit(x, y, w = w)$coef
else if (family == "binomial")
glm.fit(x, y, family = binomial(), weights = w)$coef
else if (family == "poisson")
glm.fit(x, y, family=poisson(), weights = w)$coef
} else {
if (adaptive) {
coef <- if (family == "gaussian")
lm.wfit(x, y, w = w)$coef[-1]
else if(family == "binomial")
glm.fit(x, y, family = binomial(), weights = w)$coef[-1]
else if (family == "poisson")
glm.fit(x, y, family = poisson(), weights = w)$coef[-1]
penalty <- mean(w) / abs(coef)
} else
penalty <- rep(1, ncol(x) - 1)
if (any(!select)){
select <- which(!select)
penalty[select] <- 0
}
m <- glmnet::cv.glmnet(x[, -1, drop = FALSE], y, family = family, weights = w,
penalty.factor = penalty, ...)
coef <- as.vector(coef(m, s = "lambda.min"))
}
df <- sum(coef != 0)
sigma <- if (family == "gaussian") sqrt(sum(w * (y - x %*% coef)^2/mean(w))/(nrow(x) - df)) else NULL
z@defineComponent(
list(coef = coef, sigma = sigma, df = df + ifelse(family == "gaussian", 1, 0)))
}
z
}
flexmix/R/flxmcmvpois.R 0000644 0001762 0000144 00000001537 14404637304 014634 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: flxmcmvpois.R 5079 2016-01-31 12:21:12Z gruen $
#
FLXMCmvpois <- function(formula=.~.)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
dist="mvpois", name="model-based Poisson clustering")
z@preproc.y <- function(x){
storage.mode(x) <- "integer"
x
}
z@defineComponent <- function(para) {
logLik <- function(x, y){
colSums(dpois(t(y), para$lambda, log=TRUE))
}
predict <- function(x, ...){
matrix(para$lambda, nrow = nrow(x), ncol=length(para$lambda),
byrow=TRUE)
}
new("FLXcomponent", parameters=list(lambda=para$lambda), df=para$df,
logLik=logLik, predict=predict)
}
z@fit <- function(x, y, w, ...){
z@defineComponent(list(lambda = colSums(w*y)/sum(w), df = ncol(y)))
}
z
}
flexmix/R/models.R 0000644 0001762 0000144 00000026412 14404637304 013547 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: models.R 5079 2016-01-31 12:21:12Z gruen $
#
FLXMRglm <- function(formula=.~.,
family=c("gaussian", "binomial", "poisson", "Gamma"),
offset=NULL)
{
family <- match.arg(family)
glmrefit <- function(x, y, w) {
fit <- c(glm.fit(x, y, weights=w, offset=offset,
family=get(family, mode="function")()),
list(call = sys.call(), offset = offset,
control = eval(formals(glm.fit)$control),
method = "weighted.glm.fit"))
fit$df.null <- sum(w) + fit$df.null - fit$df.residual - fit$rank
fit$df.residual <- sum(w) - fit$rank
fit$x <- x
fit
}
z <- new("FLXMRglm", weighted=TRUE, formula=formula,
name=paste("FLXMRglm", family, sep=":"), offset = offset,
family=family, refit=glmrefit)
z@preproc.y <- function(x){
if (ncol(x) > 1)
stop(paste("for the", family, "family y must be univariate"))
x
}
if(family=="gaussian"){
z@defineComponent <- function(para) {
predict <- function(x, ...) {
dotarg = list(...)
if("offset" %in% names(dotarg)) offset <- dotarg$offset
p <- x %*% para$coef
if (!is.null(offset)) p <- p + offset
p
}
logLik <- function(x, y, ...)
dnorm(y, mean=predict(x, ...), sd=para$sigma, log=TRUE)
new("FLXcomponent",
parameters=list(coef=para$coef, sigma=para$sigma),
logLik=logLik, predict=predict,
df=para$df)
}
z@fit <- function(x, y, w, component){
fit <- lm.wfit(x, y, w=w, offset=offset)
z@defineComponent(para = list(coef = coef(fit), df = ncol(x)+1,
sigma = sqrt(sum(fit$weights * fit$residuals^2 /
mean(fit$weights))/ (nrow(x)-fit$rank))))
}
}
else if(family=="binomial"){
z@preproc.y <- function(x){
if (ncol(x) != 2)
stop("for the binomial family, y must be a 2 column matrix\n",
"where col 1 is no. successes and col 2 is no. failures")
if (any(x < 0))
stop("negative values are not allowed for the binomial family")
x
}
z@defineComponent <- function(para) {
predict <- function(x, ...) {
dotarg = list(...)
if("offset" %in% names(dotarg)) offset <- dotarg$offset
p <- x %*% para$coef
if (!is.null(offset)) p <- p + offset
get(family, mode = "function")()$linkinv(p)
}
logLik <- function(x, y, ...)
dbinom(y[,1], size=rowSums(y), prob=predict(x, ...), log=TRUE)
new("FLXcomponent",
parameters=list(coef=para$coef),
logLik=logLik, predict=predict,
df=para$df)
}
z@fit <- function(x, y, w, component){
fit <- glm.fit(x, y, weights=w, family=binomial(), offset=offset, start=component$coef)
z@defineComponent(para = list(coef = coef(fit), df = ncol(x)))
}
}
else if(family=="poisson"){
z@defineComponent <- function(para) {
predict <- function(x, ...) {
dotarg = list(...)
if("offset" %in% names(dotarg)) offset <- dotarg$offset
p <- x %*% para$coef
if (!is.null(offset)) p <- p + offset
get(family, mode = "function")()$linkinv(p)
}
logLik <- function(x, y, ...)
dpois(y, lambda=predict(x, ...), log=TRUE)
new("FLXcomponent",
parameters=list(coef=para$coef),
logLik=logLik, predict=predict,
df=para$df)
}
z@fit <- function(x, y, w, component){
fit <- glm.fit(x, y, weights=w, family=poisson(), offset=offset, start=component$coef)
z@defineComponent(para = list(coef = coef(fit), df = ncol(x)))
}
}
else if(family=="Gamma"){
z@defineComponent <- function(para) {
predict <- function(x, ...) {
dotarg = list(...)
if("offset" %in% names(dotarg)) offset <- dotarg$offset
p <- x %*% para$coef
if (!is.null(offset)) p <- p + offset
get(family, mode = "function")()$linkinv(p)
}
logLik <- function(x, y, ...)
dgamma(y, shape = para$shape, scale=predict(x, ...)/para$shape, log=TRUE)
new("FLXcomponent",
parameters = list(coef = para$coef, shape = para$shape),
predict = predict, logLik = logLik,
df = para$df)
}
z@fit <- function(x, y, w, component){
fit <- glm.fit(x, y, weights=w, family=Gamma(), offset=offset, start=component$coef)
z@defineComponent(para = list(coef = coef(fit), df = ncol(x)+1,
shape = sum(fit$prior.weights)/fit$deviance))
}
}
else stop(paste("Unknown family", family))
z
}
###**********************************************************
FLXMCmvnorm <- function(formula=.~., diagonal=TRUE)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
dist = "mvnorm", name="model-based Gaussian clustering")
z@defineComponent <- function(para) {
logLik <- function(x, y)
mvtnorm::dmvnorm(y, mean=para$center, sigma=para$cov, log=TRUE)
predict <- function(x, ...)
matrix(para$center, nrow=nrow(x), ncol=length(para$center),
byrow=TRUE)
new("FLXcomponent", parameters=list(center = para$center, cov = para$cov),
df=para$df, logLik=logLik, predict=predict)
}
z@fit <- function(x, y, w, ...){
para <- cov.wt(y, wt=w)[c("center","cov")]
para$df <- (3*ncol(y) + ncol(y)^2)/2
if(diagonal){
para$cov <- diag(diag(para$cov))
para$df <- 2*ncol(y)
}
z@defineComponent(para)
}
z
}
FLXMCnorm1 <- function(formula=.~.)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
dist = "mvnorm", name="model-based univariate Gaussian clustering")
z@defineComponent <- function(para) {
logLik <- function(x, y)
dnorm(y, mean=para$center, sd=sqrt(para$cov), log=TRUE)
predict <- function(x, ...)
matrix(para$center, nrow=nrow(x), ncol=1,
byrow=TRUE)
new("FLXcomponent",
parameters=list(mean = as.vector(para$center), sd = as.vector(sqrt(para$cov))),
df=para$df, logLik=logLik, predict=predict)
}
z@fit <- function(x, y, w, ...){
para <- cov.wt(as.matrix(y), wt=w)[c("center","cov")]
z@defineComponent(c(para, list(df = 2)))
}
z
}
###**********************************************************
FLXMCmvbinary <- function(formula=.~., truncated = FALSE) {
if (truncated) return(MCmvbinary_truncated(formula))
else return(MCmvbinary(formula))
}
MCmvbinary <- function(formula=.~.)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
dist = "mvbinary", name="model-based binary clustering")
## make sure that y is binary
z@preproc.y <- function(x){
storage.mode(x) <- "logical"
storage.mode(x) <- "integer"
x
}
z@defineComponent <- function(para) {
predict <- function(x, ...){
matrix(para$center, nrow=nrow(x), ncol=length(para$center),
byrow=TRUE)
}
logLik <- function(x, y){
p <- matrix(para$center, nrow=nrow(x), ncol=length(para$center),
byrow=TRUE)
rowSums(log(y*p+(1-y)*(1-p)))
}
new("FLXcomponent", parameters=list(center=para$center), df=para$df,
logLik=logLik, predict=predict)
}
z@fit <- function(x, y, w, ...)
z@defineComponent(list(center = colSums(w*y)/sum(w), df = ncol(y)))
z
}
###**********************************************************
binary_truncated <- function(y, w, maxit = 200, epsilon = .Machine$double.eps) {
r_k <- colSums(y*w)/sum(w)
r_0 <- 0
llh.old <- -Inf
for (i in seq_len(maxit)) {
p <- r_k/(1+r_0)
llh <- sum((r_k*log(p))[r_k > 0])+ sum(((1 - r_k + r_0) * log(1-p))[(1-r_k+r_0) > 0])
if (abs(llh - llh.old)/(abs(llh) + 0.1) < epsilon) break
llh.old <- llh
prod_p <- prod(1-p)
r_0 <- prod_p/(1-prod_p)
}
p
}
MCmvbinary_truncated <- function(formula=.~.)
{
z <- MCmvbinary(formula=formula)
z@defineComponent <- function(para) {
predict <- function(x, ...) {
matrix(para$center, nrow = nrow(x), ncol = length(para$center),
byrow = TRUE)
}
logLik <- function(x, y) {
p <- matrix(para$center, nrow = nrow(x), ncol = length(para$center),
byrow = TRUE)
rowSums(log(y * p + (1 - y) * (1 - p))) - log(1 - prod(1-para$center))
}
new("FLXcomponent", parameters = list(center = para$center), df = para$df,
logLik = logLik, predict = predict)
}
z@fit <- function(x, y, w, ...){
z@defineComponent(list(center = binary_truncated(y, w), df = ncol(y)))
}
z
}
###**********************************************************
setClass("FLXMCmvcombi",
representation(binary = "vector"),
contains = "FLXMC")
FLXMCmvcombi <- function(formula=.~.)
{
z <- new("FLXMCmvcombi", weighted=TRUE, formula=formula,
dist = "mvcombi",
name="model-based binary-Gaussian clustering")
z@defineComponent <- function(para) {
predict <- function(x, ...){
matrix(para$center, nrow=nrow(x), ncol=length(para$center),
byrow=TRUE)
}
logLik <- function(x, y){
if(any(para$binary)){
p <- matrix(para$center[para$binary], nrow=nrow(x),
ncol=sum(para$binary), byrow=TRUE)
z <- rowSums(log(y[,para$binary,drop=FALSE]*p +
(1-y[,para$binary,drop=FALSE])*(1-p)))
} else z <- rep(0, nrow(x))
if(!all(para$binary)){
if(sum(!para$binary)==1)
z <- z + dnorm(y[,!para$binary],
mean=para$center[!para$binary], sd=sqrt(para$var),
log=TRUE)
else
z <- z + mvtnorm::dmvnorm(y[,!para$binary,drop=FALSE],
mean=para$center[!para$binary], sigma=diag(para$var),
log=TRUE)
}
z
}
new("FLXcomponent", parameters=list(center=para$center, var=para$var), df=para$df,
logLik=logLik, predict=predict)
}
z@fit <- function(x, y, w, binary, ...){
para <- cov.wt(y, wt=w)[c("center","cov")]
para <- list(center = para$center, var = diag(para$cov)[!binary],
df = ncol(y) + sum(!binary),
binary = binary)
z@defineComponent(para)
}
z
}
setMethod("FLXgetModelmatrix", signature(model="FLXMCmvcombi"),
function(model, data, formula, lhs=TRUE, ...)
{
model <- callNextMethod(model, data, formula, lhs)
model@binary <- apply(model@y, 2, function(z) all(unique(z) %in% c(0,1)))
model
})
setMethod("FLXmstep", signature(model = "FLXMCmvcombi"),
function(model, weights, components)
{
return(sapply(seq_len(ncol(weights)),
function(k) model@fit(model@x, model@y, weights[,k], model@binary)))
})
flexmix/R/boot.R 0000644 0001762 0000144 00000034013 14404637304 013223 0 ustar ligges users setGeneric("boot", function(object, ...) standardGeneric("boot"))
setGeneric("LR_test", function(object, ...) standardGeneric("LR_test"))
setClass("FLXboot",
representation(call="call",
object="flexmix",
parameters="list",
concomitant="list",
priors="list",
logLik="matrix",
k="matrix",
converged="matrix",
models="list",
weights="list"))
setMethod("show", "FLXboot",
function(object) {
cat("\nCall:", deparse(object@call,0.75*getOption("width")),
sep="\n")
})
generate_weights <- function(object) {
if(is.null(object@weights) & is(object@model, "FLXMC")) {
X <- do.call("cbind", lapply(object@model, function(z) z@y))
x <- apply(X, 1, paste, collapse = "")
x <- as.integer(factor(x, unique(x)))
object@weights <- as.vector(table(x))
indices_unique <- !duplicated(x)
for (i in seq_along(object@model)) {
object@model[[i]]@x <- object@model[[i]]@x[indices_unique,,drop=FALSE]
object@model[[i]]@y <- object@model[[i]]@y[indices_unique,,drop=FALSE]
}
object@concomitant@x <- object@concomitant@x[indices_unique,,drop=FALSE]
}
object
}
setGeneric("FLXgetNewModelmatrix", function(object, ...) standardGeneric("FLXgetNewModelmatrix"))
setMethod("FLXgetNewModelmatrix", "FLXM", function(object, model, indices, groups) {
if (length(groups$group) > 0) {
obs_groups <- lapply(groups$group[groups$groupfirst][indices],
function(x) which(x == groups$group))
indices_grouped <- unlist(obs_groups)
} else {
indices_grouped <- indices
}
object@y <- model@y[indices_grouped,,drop=FALSE]
object@x <- model@x[indices_grouped,,drop=FALSE]
object
})
setMethod("FLXgetNewModelmatrix", "FLXMRglmfix", function(object, model, indices, groups) {
if (length(groups$group) > 0) {
obs_groups <- lapply(groups$group[groups$groupfirst][indices],
function(x) which(x == groups$group))
indices_grouped <- unlist(obs_groups)
} else {
indices_grouped <- indices
}
object@y <- do.call("rbind", rep(list(model@y[indices_grouped,,drop=FALSE]), sum(model@nestedformula@k)))
object@x <- do.call("rbind", lapply(seq_len(sum(model@nestedformula@k)), function(K)
model@x[model@segment[,K],,drop=FALSE][indices_grouped,,drop=FALSE]))
N <- nrow(object@x)/sum(model@nestedformula@k)
object@segment <- matrix(FALSE, ncol = sum(model@nestedformula@k), nrow = nrow(object@x))
for (m in seq_len(sum(model@nestedformula@k))) object@segment[(m - 1) * N + seq_len(N), m] <- TRUE
object
})
boot_flexmix <- function(object, R, sim = c("ordinary", "empirical", "parametric"), initialize_solution = FALSE,
keep_weights = FALSE, keep_groups = TRUE, verbose = 0, control, k, model = FALSE, ...) {
sim <- match.arg(sim)
if (missing(R)) stop("R needs to be specified")
if (!missing(control)) object@control <- do.call("new", c(list(Class = "FLXcontrol", object@control), control))
if (missing(k)) k <- object@k
m <- length(object@model)
has_weights <- !keep_weights & !is.null(object@weights)
if (has_weights) object <- undo_weights(object)
if (!keep_groups & length(object@group)) {
object@concomitant@x <- object@concomitant@x[as.integer(object@group),,drop = FALSE]
object@group <- factor()
}
groups <- list()
groups$group <- object@group
groups$groupfirst <- if (length(groups$group) > 0) groupFirst(groups$group)
else rep(TRUE, FLXgetObs(object@model[[1]]))
concomitant <- parameters <- priors <- models <- weights <- vector("list", R)
logLik <- ks <- converged <- matrix(nrow=R, ncol = length(k), dimnames = list(BS = seq_len(R), k = k))
for (iter in seq_len(R)) {
new <- object
newgroups <- groups
if(verbose && !(iter%%verbose)) cat("* ")
if (iter > 1) {
if (sim == "parametric") {
y <- rflexmix(object, ...)$y
for (i in seq_len(m))
new@model[[i]]@y <- matrix(as.vector(t(y[[i]])),
nrow = nrow(new@model[[i]]@x),
ncol = ncol(y[[i]]), byrow = TRUE)
} else {
n <- sum(groups$groupfirst)
indices <- sample(seq_len(n), n, replace = TRUE)
if (length(groups$group) > 0) {
obs_groups <- lapply(groups$group[groups$groupfirst][indices],
function(x) which(x == groups$group))
newgroups$group <- factor(rep(seq_along(obs_groups), sapply(obs_groups, length)))
newgroups$groupfirst <- !duplicated(newgroups$group)
}
for (i in seq_len(m)) {
new@model[[i]] <- FLXgetNewModelmatrix(new@model[[i]], object@model[[i]],
indices, groups)
}
new@concomitant@x <- new@concomitant@x[indices,,drop=FALSE]
}
}
if (has_weights & !length(groups$group) > 0) {
new <- generate_weights(new)
newgroups$groupfirst <- rep(TRUE, FLXgetObs(new@model[[1]]))
}
parameters[[iter]] <- concomitant[[iter]] <- priors[[iter]] <- list()
NREP <- rep(object@control@nrep, length(k))
if (initialize_solution & object@k %in% k) NREP[k == object@k] <- 1L
for (K in seq_along(k)) {
fit <- new("flexmix", logLik = -Inf)
for (nrep in seq_len(NREP[K])) {
if (k[K] != object@k | !initialize_solution) {
postunscaled <- initPosteriors(k[K], NULL, FLXgetObs(new@model[[1]]), newgroups)
} else {
postunscaled <- matrix(0, nrow = FLXgetObs(new@model[[1]]), ncol = k[K])
for (i in seq_len(m))
postunscaled <- postunscaled + FLXdeterminePostunscaled(new@model[[i]], lapply(new@components, function(x) x[[i]]))
if(length(newgroups$group)>0)
postunscaled <- groupPosteriors(postunscaled, newgroups$group)
prior <- evalPrior(new@prior, new@concomitant)
postunscaled <- if (is(prior, "matrix")) postunscaled + log(prior)
else sweep(postunscaled, 2, log(prior), "+")
postunscaled <- exp(postunscaled - log_row_sums(postunscaled))
}
x <- try(FLXfit(new@model, new@concomitant, new@control, postunscaled, newgroups, weights = new@weights))
if (!is(x, "try-error")) {
if(logLik(x) > logLik(fit))
fit <- x
}
}
if (is.finite(logLik(fit))) {
parameters[[iter]][paste(k[K])] <- list(parameters(fit, simplify = FALSE, drop = FALSE))
concomitant[[iter]][paste(k[K])] <- list(parameters(fit, which = "concomitant"))
priors[[iter]][[paste(k[K])]] <- prior(fit)
logLik[iter, paste(k[K])] <- logLik(fit)
ks[iter, paste(k[K])] <- fit@k
converged[iter, paste(k[K])] <- fit@converged
if (model) {
models[[iter]] <- fit@model
weights[[iter]] <- fit@weights
}
} else {
parameters[[iter]][[paste(k[K])]] <- concomitant[[iter]][[paste(k[K])]] <- priors[[iter]][[paste(k[K])]] <- NULL
}
}
}
if(verbose) cat("\n")
new("FLXboot", call = sys.call(-1), object = object, parameters = parameters,
concomitant = concomitant, priors = priors, logLik = logLik, k = ks,
converged = converged, models = models, weights = weights)
}
setMethod("boot", signature(object="flexmix"), boot_flexmix)
setMethod("LR_test",
signature(object="flexmix"),
function(object, R, alternative = c("greater", "less"), control, ...) {
alternative <- match.arg(alternative)
if (missing(control)) control <- object@control
if (object@k == 1 & alternative == "less") stop(paste("alternative", alternative, "only possible for a mixture\n",
"with at least two components"))
k <- object@k + switch(alternative, greater = 0:1, less = 0:-1)
names(k) <- k
boot <- boot(object, R, sim = "parametric", k = k,
initialize_solution = TRUE, control = control, ...)
ok <- apply(boot@k, 1, identical, k)
lrts <- 2*apply(boot@logLik[ok,order(k)], 1, diff)
STATISTIC <- lrts[1]
names(STATISTIC) <- "LRTS"
PARAMETER <- length(lrts)
names(PARAMETER) <- "BS"
RETURN <- list(parameter = PARAMETER,
p.value = sum(lrts[1] <= lrts)/length(lrts),
alternative = alternative,
null.value = object@k,
method = "Bootstrap likelihood ratio test",
data.name = deparse(substitute(object)),
bootstrap.results = boot)
class(RETURN) <- "htest"
RETURN
})
setMethod("parameters", "FLXboot", function(object, k, ...) {
if (missing(k)) k <- object@object@k
Coefs <- lapply(seq_along(object@parameters), function(i)
if (is.na(object@k[i])) NULL
else do.call("cbind", c(lapply(seq_len(object@k[i]), function(j)
unlist(sapply(seq_along(object@object@model), function(m)
FLXgetParameters(as(object@object@model[[m]], "FLXMR"),
if (is(object@object@model[[m]]@defineComponent, "expression"))
list(eval(object@object@model[[m]]@defineComponent,
c(object@parameters[[i]][[paste(k)]][[m]][[j]],
list(df = object@object@components[[j]][[m]]@df))))
else {
list(object@object@model[[m]]@defineComponent(
c(object@parameters[[i]][[paste(k)]][[m]][[j]],
list(df = object@object@components[[j]][[m]]@df))))
})))),
as.list(rep(NA, k - object@k[i])))))
Coefs <- t(do.call("cbind", Coefs))
colnames(Coefs) <- gsub("Comp.1_", "", colnames(Coefs))
Prior <- t(do.call("cbind", lapply(object@concomitant,
function(x) do.call("cbind", c(list(x[[paste(k)]]),
as.list(rep(NA, k - ifelse(length(x), ncol(x[[paste(k)]]), k))))))))
cbind(Coefs, Prior)
})
setMethod("clusters", signature(object = "FLXboot", newdata = "listOrdata.frame"), function(object, newdata, k, ...) {
if (missing(k)) k <- object@object@k
lapply(seq_len(length(object@priors)), function(i) {
new <- object@object
new@prior <- object@priors[[i]][[paste(k)]]
new@k <- length(new@prior)
new@components <- rep(list(vector("list", length(object@object@model))), length(new@prior))
for (m in seq_along(new@model)) {
variables <- c("x", "y", "offset", "family")
variables <- variables[variables %in% slotNames(new@model[[m]])]
for (var in variables) assign(var, slot(new@model[[m]], var))
for (K in seq_len(object@k[i])) {
new@components[[K]][[m]] <-
if (is(object@object@model[[m]]@defineComponent, "expression"))
eval(object@object@model[[m]]@defineComponent,
c(object@parameters[[i]][[paste(k)]][[m]][[K]],
list(df = object@object@components[[K]][[m]]@df)))
else
object@object@model[[m]]@defineComponent(
c(object@parameters[[i]][[paste(k)]][[m]][[K]],
list(df = object@object@components[[K]][[m]]@df)))
}
}
clusters(new, newdata = newdata)})
})
setMethod("posterior", signature(object = "FLXboot", newdata = "listOrdata.frame"), function(object, newdata, k, ...) {
if (missing(k)) k <- object@object@k
lapply(seq_len(length(object@priors)), function(i) {
new <- object@object
new@prior <- object@priors[[i]][[paste(k)]]
new@k <- length(new@prior)
new@components <- rep(list(vector("list", length(object@object@model))), length(new@prior))
for (m in seq_along(new@model)) {
variables <- c("x", "y", "offset", "family")
variables <- variables[variables %in% slotNames(new@model[[m]])]
for (var in variables) assign(var, slot(new@model[[m]], var))
for (K in seq_len(object@k[i])) {
new@components[[K]][[m]] <-
if (is(object@object@model[[m]]@defineComponent, "expression"))
eval(object@object@model[[m]]@defineComponent,
c(object@parameters[[i]][[paste(k)]][[m]][[K]],
list(df = object@object@components[[K]][[m]]@df)))
else
object@object@model[[m]]@defineComponent(
c(object@parameters[[i]][[paste(k)]][[m]][[K]],
list(df = object@object@components[[K]][[m]]@df)))
}
}
posterior(new, newdata = newdata)})
})
setMethod("predict", signature(object = "FLXboot"), function(object, newdata, k, ...) {
if (missing(k)) k <- object@object@k
lapply(seq_len(length(object@priors)), function(i) {
new <- object@object
new@components <- vector("list", object@k[i, paste(k)])
new@components <- lapply(new@components, function(x) vector("list", length(new@model)))
for (m in seq_along(new@model)) {
variables <- c("x", "y", "offset", "family")
variables <- variables[variables %in% slotNames(new@model[[m]])]
for (var in variables) assign(var, slot(new@model[[m]], var))
for (K in seq_len(object@k[i, paste(k)])) {
new@components[[K]][[m]] <-
if (is(object@object@model[[m]]@defineComponent, "expression"))
eval(object@object@model[[m]]@defineComponent,
c(object@parameters[[i]][[paste(k)]][[m]][[K]],
list(df = object@object@components[[1]][[m]]@df)))
else
object@object@model[[m]]@defineComponent(
c(object@parameters[[i]][[paste(k)]][[m]][[K]],
list(df = object@object@components[[1]][[m]]@df)))
}
}
predict(new, newdata = newdata, ...)})
})
flexmix/R/glmFix.R 0000644 0001762 0000144 00000016226 14404637304 013514 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: glmFix.R 5156 2019-02-12 08:11:16Z gruen $
#
FLXMRglmfix <- function(formula=.~., fixed=~0, varFix = FALSE, nested = NULL,
family=c("gaussian", "binomial", "poisson", "Gamma"),
offset=NULL)
{
family <- match.arg(family)
nested <- as(nested, "FLXnested")
if (length(fixed) == 3) stop("no left hand side allowed for fixed")
z <- new("FLXMRglmfix", FLXMRglm(formula, family, offset),
fixed=fixed, name=paste("FLXMRglmfix", family, sep=":"), nestedformula=nested,
variance = varFix)
if(family=="gaussian"){
z@fit <- function(x, y, w, incidence, variance, ...){
fit <- lm.wfit(x, y, w=w, offset=offset)
k <- nrow(incidence)
n <- nrow(x)/k
sigma <- vector(length=k)
cumVar <- cumsum(c(0, variance))
for (i in seq_along(variance)) {
ind <- cumVar[i]*n + seq_len(n*variance[i])
sigma[cumVar[i] + seq_len(variance[i])] <- sqrt(sum(fit$weights[ind] * fit$residuals[ind]^2 /
mean(fit$weights[ind]))/ (length(ind) - sum(incidence[i,])))
}
fit <- fit[c("coefficients")]
coefs <- coef(fit)
names(coefs) <- colnames(incidence)
df <- rowSums(incidence/rep(colSums(incidence), each = nrow(incidence))) + rep(1/variance, variance)
lapply(seq_len(k),
function(K) z@defineComponent(
list(coef = coefs[as.logical(incidence[K, ])],
sigma = sigma[K],
df = df[K])))
}
}
else if(family=="binomial"){
z@fit <- function(x, y, w, incidence, ...){
fit <- glm.fit(x, y, weights=w, family=binomial(), offset=offset)
fit <- fit[c("coefficients","family")]
k <- nrow(incidence)
coefs <- coef(fit)
names(coefs) <- colnames(incidence)
df <- rowSums(incidence/rep(colSums(incidence), each = nrow(incidence)))
lapply(seq_len(k),
function(K) z@defineComponent(
list(coef = coefs[as.logical(incidence[K, ])],
df = df[K])))
}
}
else if(family=="poisson"){
z@fit <- function(x, y, w, incidence, ...){
fit <- glm.fit(x, y, weights=w, family=poisson(), offset=offset)
fit <- fit[c("coefficients","family")]
k <- nrow(incidence)
coefs <- coef(fit)
names(coefs) <- colnames(incidence)
df <- rowSums(incidence/rep(colSums(incidence), each = nrow(incidence)))
lapply(seq_len(k),
function(K) z@defineComponent(
list(coef = coefs[as.logical(incidence[K, ])],
df = df[K])))
}
}
else if(family=="Gamma"){
z@fit <- function(x, y, w, incidence, ...){
fit <- glm.fit(x, y, weights=w, family=Gamma(), offset=offset)
shape <- sum(fit$prior.weights)/fit$deviance
fit <- fit[c("coefficients","family")]
k <- nrow(incidence)
coefs <- coef(fit)
names(coefs) <- colnames(incidence)
df <- rowSums(incidence/rep(colSums(incidence), each = nrow(incidence)))
lapply(seq_len(k),
function(K) z@defineComponent(
list(coef = coefs[as.logical(incidence[K, ])],
df = df[K],
shape = shape)))
}
}
else stop(paste("Unknown family", family))
z
}
###**********************************************************
setMethod("refit_mstep", signature(object="FLXMRglmfix", newdata="missing"),
function(object, newdata, weights, ...)
{
warning("Separate regression models are fitted using posterior weights.")
lapply(seq_len(ncol(weights)), function(k) {
x <- object@x[object@segment[, k],
as.logical(object@design[k,]), drop = FALSE]
colnames(x) <- colnames(object@design)[as.logical(object@design[k,])]
y <- object@y[object@segment[, k],, drop = FALSE]
fit <- object@refit(x, y, weights[,k], ...)
fit <- c(fit,
list(formula = object@fullformula,
terms = object@terms,
contrasts = object@contrasts,
xlevels = object@xlevels))
class(fit) <- c("glm", "lm")
fit
})
})
###**********************************************************
setMethod("fitted", signature(object="FLXMRglmfix"),
function(object, components, ...)
{
N <- nrow(object@x)/length(components)
z <- list()
for(n in seq_along(components)){
x <- object@x[(n-1)*N + seq_len(N), as.logical(object@design[n,]), drop=FALSE]
z[[n]] <- list(components[[n]]@predict(x))
}
z
})
###**********************************************************
setMethod("predict", signature(object="FLXMRglmfix"),
function(object, newdata, components, ...)
{
model <- FLXgetModelmatrix(object, newdata, object@fullformula, lhs=FALSE)
k <- sum(object@nestedformula@k)
N <- nrow(model@x)/k
z <- list()
for (m in seq_len(k)) {
z[[m]] <- components[[m]]@predict(model@x[model@segment[,m], as.logical(model@design[m,]), drop=FALSE], ...)
}
z
})
###**********************************************************
setMethod("FLXgetModelmatrix", signature(model="FLXMRfix"), function(model, data, formula, lhs=TRUE, ...)
{
formula <- RemoveGrouping(formula)
if (length(grep("\\|", deparse(model@formula)))) stop("no grouping variable allowed in the model")
if(is.null(model@formula))
model@formula <- formula
model@fullformula <- update.formula(formula, model@formula)
k <- model@nestedformula
mm.all <- modelMatrix(model@fullformula, model@fixed, k@formula, data, lhs, model@xlevels)
model@design <- modelDesign(mm.all, k)
desNested <- if (sum(sapply(mm.all$nested, ncol))) {
rbind(ncol(mm.all$fixed) + seq_len(sum(sapply(mm.all$nested, ncol))),
unlist(lapply(seq_along(mm.all$nested), function(i) rep(i, ncol(mm.all$nested[[i]])))))
}else matrix(ncol=0, nrow=2)
model@x <- cbind(kronecker(rep(1, sum(k@k)), mm.all$fixed),
do.call("cbind", lapply(unique(desNested[2,]), function(i) {
kronecker(model@design[,desNested[1, desNested[2, ] == i][1]],
mm.all$nested[[i]])})),
kronecker(diag(sum(k@k)), mm.all$random))
N <- nrow(model@x)/sum(k@k)
model@segment <- matrix(FALSE, ncol = sum(k@k), nrow = nrow(model@x))
for (m in seq_len(sum(k@k))) model@segment[(m - 1) * N + seq_len(N), m] <- TRUE
if (lhs) {
y <- mm.all$response
rownames(y) <- NULL
response <- as.matrix(apply(y, 2, rep, sum(k@k)))
model@y <- model@preproc.y(response)
}
model@x <- model@preproc.x(model@x)
model@xlevels <- mm.all$xlevels
model
})
flexmix/R/FLXMCdist1.R 0000644 0001762 0000144 00000021231 14404637304 014074 0 ustar ligges users ## Note that the implementation of the weighted ML estimation is
## influenced and inspired by the function fitdistr from package MASS
## and function fitdist from package fitdistrplus.
FLXMCdist1 <- function(formula=.~., dist, ...) {
foo <- paste("FLXMC", dist, sep = "")
if (!exists(foo))
stop("This distribution has not been implemented yet.")
get(foo)(formula, ...)
}
prepoc.y.pos.1 <- function(x) {
if (ncol(x) > 1)
stop("for the inverse gaussian family y must be univariate")
if (any(x < 0))
stop("values must be >= 0")
x
}
FLXMClnorm <- function(formula=.~., ...)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
dist = "lnorm", name="model-based log-normal clustering")
z@preproc.y <- prepoc.y.pos.1
z@defineComponent <- function(para) {
predict <- function(x, ...)
matrix(para$meanlog, nrow = nrow(x), ncol = 1, byrow = TRUE)
logLik <- function(x, y)
dlnorm(y, meanlog = predict(x, ...), sdlog = para$sdlog, log = TRUE)
new("FLXcomponent", parameters = list(meanlog = para$meanlog, sdlog = para$sdlog),
predict = predict, logLik = logLik, df = para$df)
}
z@fit <- function(x, y, w, ...) {
logy <- log(y); meanw <- mean(w)
meanlog <- mean(w * logy) / meanw
sdlog <- sqrt(mean(w * (logy - meanlog)^2) / meanw)
z@defineComponent(list(meanlog = meanlog, sdlog = sdlog, df = 2))
}
z
}
FLXMCinvGauss <- function(formula=.~., ...)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
name = "model-based inverse Gaussian clustering",
dist = "invGauss")
z@preproc.y <- prepoc.y.pos.1
z@defineComponent <- function(para) {
predict <- function(x, ...)
matrix(para$nu, nrow = nrow(x), ncol = length(para$nu),
byrow = TRUE)
logLik <- function(x, y, ...)
SuppDists::dinvGauss(y, nu = predict(x, ...), lambda = para$lambda, log = TRUE)
new("FLXcomponent", parameters = list(nu = para$nu, lambda = para$lambda),
predict = predict, logLik = logLik, df = para$df)
}
z@fit <- function(x, y, w, ...){
nu <- mean(w * y) / mean(w)
lambda <- mean(w) / mean(w * (1 / y - 1 / nu))
z@defineComponent(list(nu = nu, lambda = lambda, df = 2))
}
z
}
FLXMCgamma <- function(formula=.~., method = "Nelder-Mead", warn = -1, ...)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
name = "model-based gamma clustering",
dist = "gamma")
z@preproc.y <- prepoc.y.pos.1
z@defineComponent <- function(para) {
predict <- function(x, ...)
matrix(para$shape, nrow = nrow(x), ncol = length(para$shape),
byrow = TRUE)
logLik <- function(x, y, ...)
dgamma(y, shape = predict(x, ...), rate = para$rate, log = TRUE)
new("FLXcomponent", parameters = list(shape = para$shape, rate = para$rate),
predict = predict, logLik = logLik, df = para$df)
}
z@fit <- function(x, y, w, component){
if (!length(component)) {
sw <- sum(w)
mean <- sum(y * w) / sw
var <- (sum(y^2 * w) / sw - mean^2) * sw / (sw - 1)
start <- c(mean^2/var, mean/var)
} else start <- unname(unlist(component))
control <- list(parscale = c(1, start[2]))
f <- function(parms) -sum(dgamma(y, shape = parms[1], rate = parms[2], log = TRUE) * w)
oop <- options(warn = warn)
on.exit(oop)
parms <- optim(start, f, method = method, control = control)$par
z@defineComponent(list(shape = parms[1], rate = parms[2], df = 2))
}
z
}
FLXMCexp <- function(formula=.~., ...)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
name = "model-based exponential clustering",
dist = "exp")
z@preproc.y <- prepoc.y.pos.1
z@defineComponent <- function(para) {
predict <- function(x, ...)
matrix(para$rate, nrow = nrow(x), ncol = length(para$rate),
byrow = TRUE)
logLik <- function(x, y, ...)
dexp(y, rate = predict(x, ...), log = TRUE)
new("FLXcomponent", parameters = list(rate = para$rate),
predict = predict, logLik = logLik, df = para$df)
}
z@fit <- function(x, y, w, component)
z@defineComponent(list(rate = mean(w) / mean(w * y), df = 1))
z
}
FLXMCweibull <- function(formula=.~., method = "Nelder-Mead", warn = -1, ...)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
name = "model-based Weibull clustering",
dist = "weibull")
z@preproc.y <- prepoc.y.pos.1
z@defineComponent <- function(para) {
predict <- function(x, ...)
matrix(para$shape, nrow = nrow(x), ncol = length(para$shape),
byrow = TRUE)
logLik <- function(x, y, ...)
dweibull(y, shape = predict(x, ...), scale = para$scale, log = TRUE)
new("FLXcomponent", parameters = list(shape = para$shape, scale = para$scale),
predict = predict, logLik = logLik, df = para$df)
}
z@fit <- function(x, y, w, component){
if (!length(component)) {
ly <- log(y)
sw <- sum(w)
mean <- sum(ly * w) / sw
var <- (sum(ly^2 * w) / sw - mean^2) * sw / (sw - 1)
shape <- 1.2/sqrt(var)
scale <- exp(mean + 0.572/shape)
start <- c(shape, scale)
} else start <- unname(unlist(component))
f <- function(parms) -sum(dweibull(y, shape = parms[1], scale = parms[2], log = TRUE) * w)
oop <- options(warn = warn)
on.exit(oop)
parms <- optim(start, f, method = method)$par
z@defineComponent(list(shape = parms[1], scale = parms[2], df = 2))
}
z
}
FLXMCburr <- function(formula=.~., start = NULL, method = "Nelder-Mead", warn = -1, ...)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
name = "model-based Burr clustering",
dist = "burr")
z@preproc.y <- prepoc.y.pos.1
z@defineComponent <- function(para) {
predict <- function(x, ...)
matrix(para$shape1, nrow = nrow(x), ncol = length(para$shape1),
byrow = TRUE)
logLik <- function(x, y, ...)
actuar::dburr(y, shape1 = predict(x, ...), shape2 = para$shape2, scale = para$scale, log = TRUE)
new("FLXcomponent", parameters = list(shape1 = para$shape1, shape2 = para$shape2, scale = para$scale),
predict = predict, logLik = logLik, df = para$df)
}
z@fit <- function(x, y, w, component){
if (!length(component)) {
if (is.null(start)) start <- c(1, 1)
} else start <- unname(unlist(component[2:3]))
f <- function(parms) {
shape1 <- sum(w) / sum(w * log(1 + (y/parms[2])^parms[1]))
-sum(actuar::dburr(y, shape1 = shape1, shape2 = parms[1], scale = parms[2], log = TRUE) * w)
}
oop <- options(warn = warn)
on.exit(oop)
parms <- optim(start, f, method = method)$par
z@defineComponent(list(shape1 = sum(w) / sum(w * log(1 + (y/parms[2])^parms[1])), shape2 = parms[1], scale = parms[2], df = 3))
}
z
}
FLXMCinvburr <- function(formula=.~., start = NULL, warn = -1, ...)
{
z <- new("FLXMC", weighted=TRUE, formula=formula,
name = "model-based Inverse Burr clustering",
dist = "invburr")
z@preproc.y <- prepoc.y.pos.1
z@defineComponent <- function(para) {
predict <- function(x, ...)
matrix(para$shape1, nrow = nrow(x), ncol = length(para$shape1),
byrow = TRUE)
logLik <- function(x, y, ...)
actuar::dinvburr(y, shape1 = predict(x, ...), shape2 = para$shape2, scale = para$scale, log = TRUE)
new("FLXcomponent", parameters = list(shape1 = para$shape1, shape2 = para$shape2, scale = para$scale),
predict = predict, logLik = logLik, df = para$df)
}
z@fit <- function(x, y, w, component){
if (!length(component)) {
if (is.null(start)) start <- c(1, 1)
} else start <- unname(unlist(component[2:3]))
f <- function(parms) {
shape1 <- sum(w) / sum(w * log(1 + (parms[2]/y)^parms[1]))
-sum(actuar::dinvburr(y, shape1 = shape1, shape2 = parms[1], scale = parms[2], log = TRUE) * w)
}
oop <- options(warn = warn)
on.exit(oop)
parms <- optim(start, f, method = "Nelder-Mead")$par
z@defineComponent(list(shape1 = sum(w) / sum(w * log(1 + (parms[2]/y)^parms[1])), shape2 = parms[1], scale = parms[2], df = 3))
}
z
}
flexmix/R/refit.R 0000644 0001762 0000144 00000052705 14404637304 013401 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: refit.R 5079 2016-01-31 12:21:12Z gruen $
#
###*********************************************************
setMethod("FLXgetParameters", signature(object="FLXdist"),
function(object, model) {
if (missing(model)) model <- seq_along(object@model)
coefficients <- unlist(lapply(model, function(m) {
Model <- unlist(FLXgetParameters(object@model[[m]], lapply(object@components, "[[", m)))
names(Model) <- paste("model.", m, "_", names(Model), sep = "")
Model
}))
c(coefficients, FLXgetParameters(object@concomitant))
})
setMethod("FLXgetParameters", signature(object="FLXM"),
function(object, components, ...) {
lapply(components, function(x) unlist(slot(x, "parameters")))
})
setMethod("FLXgetParameters", signature(object="FLXMC"),
function(object, components, ...) {
if (object@dist == "mvnorm") {
return(lapply(components, function(x) {
pars <- x@parameters
if (identical(pars$cov, diag(diag(pars$cov)))) return(c(pars$center, diag(pars$cov)))
else return(c(pars$center, pars$cov[lower.tri(pars$cov, diag = TRUE)]))
}))
} else return(lapply(components, function(x) unlist(slot(x, "parameters"))))
})
setMethod("FLXgetParameters", signature(object="FLXMRglm"),
function(object, components, ...) {
parms <- lapply(components, function(x) unlist(slot(x, "parameters")))
Design <- FLXgetDesign(object, components)
if (object@family == "gaussian") {
parms <- lapply(parms, function(x) {
x["sigma"] <- log(x["sigma"])
x
})
colnames(Design) <- gsub("sigma$", "log(sigma)", colnames(Design))
}
parms_unique <- vector(length = ncol(Design))
names(parms_unique) <- colnames(Design)
for (k in seq_along(parms))
parms_unique[as.logical(Design[k,])] <- parms[[k]]
parms_unique
})
setMethod("FLXgetParameters", signature(object="FLXP"),
function(object, ...) {
if (length(object@coef) == 1) return(NULL)
alpha <- log(object@coef[-1]) - log(object@coef[1])
names(alpha) <- paste("concomitant", paste("Comp", seq_along(object@coef)[-1], "alpha", sep = "."), sep = "_")
return(alpha)
})
setMethod("FLXgetParameters", signature(object="FLXPmultinom"),
function(object, ...) {
coefficients <- object@coef[,-1,drop=FALSE]
if (ncol(coefficients) > 0) {
Names <- paste("Comp", rep(seq_len(ncol(coefficients)+1)[-1], each = nrow(coefficients)),
rownames(coefficients), sep = ".")
coefficients <- as.vector(coefficients)
names(coefficients) <- paste("concomitant", Names, sep = "_")
return(coefficients)
}else return(NULL)
})
setMethod("VarianceCovariance", signature(object="flexmix"),
function(object, model = TRUE, gradient, optim_control = list(), ...) {
if (object@control@classify != "weighted") stop("Only for weighted ML estimation possible.")
if (length(FLXgetParameters(object)) != object@df) stop("not implemented yet for restricted parameters.")
if (missing(gradient)) gradient <- FLXgradlogLikfun(object)
optim_control$fnscale <- -1
fit <- optim(fn = FLXlogLikfun(object), par = FLXgetParameters(object), gr = gradient,
hessian = TRUE, method = "BFGS", control = optim_control, ...)
list(coef = fit$par, vcov = -solve(as.matrix(fit$hessian)))
})
setMethod("logLikfun_comp", signature(object="flexmix"),
function(object) {
postunscaled <- matrix(0, nrow = FLXgetObs(object@model[[1]]), ncol = object@k)
for (m in seq_along(object@model))
postunscaled <- postunscaled + FLXdeterminePostunscaled(object@model[[m]], lapply(object@components, "[[", m))
if(length(object@group)>0)
postunscaled <- groupPosteriors(postunscaled, object@group)
postunscaled
})
setMethod("FLXlogLikfun", signature(object="flexmix"),
function(object, ...) function(parms) {
object <- FLXreplaceParameters(object, parms)
groupfirst <- if (length(object@group) > 1) groupFirst(object@group) else rep(TRUE, FLXgetObs(object@model[[1]]))
logpostunscaled <- logLikfun_comp(object) +
log(getPriors(object@concomitant, object@group, groupfirst))
if (is.null(object@weights)) sum(log_row_sums(logpostunscaled[groupfirst,,drop=FALSE]))
else sum(log_row_sums(logpostunscaled[groupfirst,,drop=FALSE])*object@weights[groupfirst])
})
setMethod("getPriors", signature(object="FLXP"),
function(object, group, groupfirst) {
priors <- matrix(apply(object@coef, 2, function(x) object@x %*% x),
nrow = nrow(object@x))
ungroupPriors(priors/rowSums(priors), group, groupfirst)
})
setMethod("getPriors", signature(object="FLXPmultinom"),
function(object, group, groupfirst) {
priors <- matrix(apply(object@coef, 2, function(x) exp(object@x %*% x)),
nrow = nrow(object@x))
ungroupPriors(priors/rowSums(priors), group, groupfirst)
})
setMethod("FLXreplaceParameters", signature(object="FLXdist"),
function(object, parms) {
comp_names <- names(object@components)
components <- list()
for (m in seq_along(object@model)) {
indices <- grep(paste("^model.", m, sep = ""), names(parms))
components[[m]] <- FLXreplaceParameters(object@model[[m]], lapply(object@components, "[[", m), parms[indices])
}
object@components <- lapply(seq_along(object@components), function(k) lapply(components, "[[", k))
names(object@components) <- comp_names
if (object@k > 1) {
indices <- grep("^concomitant_", names(parms))
object@concomitant <- FLXreplaceParameters(object@concomitant, parms[indices])
}
object
})
setMethod("FLXreplaceParameters", signature(object="FLXM"),
function(object, components, parms) {
Design <- FLXgetDesign(object, components)
lapply(seq_along(components), function(k) {
Parameters <- list()
parms_k <- parms[as.logical(Design[k,])]
for (i in seq_along(components[[k]]@parameters)) {
Parameters[[i]] <- parms_k[seq_along(components[[k]]@parameters[[i]])]
attributes(Parameters[[i]]) <- attributes(components[[k]]@parameters[[i]])
parms_k <- parms_k[-seq_along(components[[k]]@parameters[[i]])]
}
names(Parameters) <- names(components[[k]]@parameters)
Parameters$df <- components[[k]]@df
variables <- c("x", "y")
for (var in variables)
assign(var, slot(object, var))
if (is(object@defineComponent, "expression"))
eval(object@defineComponent, Parameters)
else
object@defineComponent(Parameters)
})
})
setMethod("FLXreplaceParameters", signature(object="FLXMC"),
function(object, components, parms) {
Design <- FLXgetDesign(object, components)
if (object@dist == "mvnorm") {
p <- sqrt(1/4+ncol(Design)/nrow(Design)) - 1/2
diagonal <- get("diagonal", environment(object@fit))
if (diagonal) {
cov <- diag(seq_len(p))
parms_comp <- as.vector(sapply(seq_len(nrow(Design)), function(i)
c(parms[(i-1) * 2 * p + seq_len(p)], as.vector(diag(diag(parms[(i-1) * 2 * p + p + seq_len(p)]))))))
parms <- c(parms_comp, parms[(nrow(Design) * 2 * p + 1):length(parms)])
} else {
cov <- matrix(NA, nrow = p, ncol = p)
cov[lower.tri(cov, diag = TRUE)] <- seq_len(sum(lower.tri(cov, diag = TRUE)))
cov[upper.tri(cov)] <- t(cov)[upper.tri(cov)]
parms <- parms[c(as.vector(sapply(seq_len(nrow(Design)), function(i) (i-1)*(max(cov)+p) + c(seq_len(p), as.vector(cov) + p))),
(nrow(Design) * (max(cov) + p)+1):length(parms))]
}
}
callNextMethod(object = object, components = components, parms = parms)
})
setMethod("FLXreplaceParameters", signature(object="FLXMRglm"),
function(object, components, parms) {
Design <- FLXgetDesign(object, components)
lapply(seq_along(components), function(k) {
Parameters <- list()
parms_k <- parms[as.logical(Design[k,])]
for (i in seq_along(components[[k]]@parameters)) {
Parameters[[i]] <- parms_k[seq_along(components[[k]]@parameters[[i]])]
attributes(Parameters[[i]]) <- attributes(components[[k]]@parameters[[i]])
parms_k <- parms_k[-seq_along(components[[k]]@parameters[[i]])]
}
names(Parameters) <- names(components[[k]]@parameters)
if (object@family == "gaussian") {
Parameters[["sigma"]] <- exp(Parameters[["sigma"]])
}
Parameters$df <- components[[k]]@df
variables <- c("x", "y", "offset", "family")
for (var in variables) {
assign(var, slot(object, var))
}
if (is(object@defineComponent, "expression"))
eval(object@defineComponent, Parameters)
else
object@defineComponent(Parameters)
})
})
setMethod("FLXreplaceParameters", signature(object="FLXP"),
function(object, parms) {
parms <- exp(c(0, parms))
parms <- parms/sum(parms)
attributes(parms) <- attributes(object@coef)
object@coef <- parms
object
})
setMethod("FLXreplaceParameters", signature(object="FLXPmultinom"),
function(object, parms) {
parms <- cbind(0, matrix(parms, nrow = nrow(object@coef)))
attributes(parms) <- attributes(object@coef)
object@coef <- parms
object
})
setMethod("FLXgradlogLikfun", signature(object="flexmix"),
function(object, ...) {
existFunction <- all(sapply(object@model, existGradient))
if (object@k > 1) existFunction <- c(existFunction,
existGradient(object@concomitant))
if (any(!existFunction)) return(NULL)
function(parms) {
object <- FLXreplaceParameters(object, parms)
groupfirst <- if (length(object@group) > 1) groupFirst(object@group) else rep(TRUE, FLXgetObs(object@model[[1]]))
logLik_comp <- logLikfun_comp(object)
Priors <- getPriors(object@concomitant, object@group, groupfirst)
Priors_Lik_comp <- logLik_comp + log(Priors)
weights <- exp(Priors_Lik_comp - log_row_sums(Priors_Lik_comp))
if (object@k > 1) {
ConcomitantScores <- FLXgradlogLikfun(object@concomitant, Priors[groupfirst,,drop=FALSE],
weights[groupfirst,,drop=FALSE])
if (!is.null(object@weights))
ConcomitantScores <- lapply(ConcomitantScores, "*", object@weights[groupfirst])
}
else ConcomitantScores <- list()
ModelScores <- lapply(seq_along(object@model), function(m)
FLXgradlogLikfun(object@model[[m]],
lapply(object@components, "[[", m), weights))
ModelScores <- lapply(ModelScores, lapply, groupPosteriors, object@group)
if (!is.null(object@weights))
ModelScores <- lapply(ModelScores, lapply, "*", object@weights)
colSums(cbind(do.call("cbind", lapply(ModelScores, function(x) do.call("cbind", x)))[groupfirst,,drop=FALSE],
do.call("cbind", ConcomitantScores)))
}
})
setMethod("existGradient", signature(object = "FLXM"),
function(object) FALSE)
setMethod("existGradient", signature(object = "FLXMRglm"),
function(object) {
if (object@family == "Gamma") return(FALSE)
TRUE
})
setMethod("existGradient", signature(object = "FLXMRglmfix"),
function(object) FALSE)
setMethod("existGradient", signature(object = "FLXP"),
function(object) TRUE)
setMethod("FLXgradlogLikfun", signature(object="FLXMRglm"),
function(object, components, weights, ...) {
lapply(seq_along(components), function(k) {
res <- if (object@family == "binomial") as.vector(object@y[,1] - rowSums(object@y)*components[[k]]@predict(object@x))
else as.vector(object@y - components[[k]]@predict(object@x))
Scores <- weights[,k] * res * object@x
if (object@family == "gaussian") {
Scores <- cbind(Scores/components[[k]]@parameters$sigma^2,
weights[,k] * (-1 + res^2/components[[k]]@parameters$sigma^2))
}
Scores
})
})
setMethod("FLXgradlogLikfun", signature(object="FLXP"),
function(object, fitted, weights, ...) {
Pi <- lapply(seq_len(ncol(fitted))[-1], function(i) - fitted[,i] + weights[,i])
lapply(Pi, function(p) apply(object@x, 2, "*", p))
})
setMethod("refit", signature(object = "flexmix"),
function(object, newdata, method = c("optim", "mstep"), ...) {
method <- match.arg(method)
if (method == "optim") {
VarCov <- VarianceCovariance(object, ...)
z <- new("FLXRoptim",
call=sys.call(-1), k = object@k,
coef = VarCov$coef, vcov = VarCov$vcov)
z@components <- lapply(seq_along(object@model), function(m) {
begin_name <- paste("^model", m, sep = ".")
indices <- grep(begin_name, names(z@coef))
refit_optim(object@model[[m]], components = lapply(object@components, "[[", m), coef = z@coef[indices], se = sqrt(diag(z@vcov)[indices]))
})
z@concomitant <- if (object@k > 1) {
indices <- grep("^concomitant_", names(z@coef))
refit_optim(object@concomitant, coef = z@coef[indices], se = sqrt(diag(z@vcov)[indices]))
} else NULL
} else {
z <- new("FLXRmstep",
call=sys.call(-1), k = object@k)
z@components <- lapply(object@model, function(x) {
x <- refit_mstep(x, weights=object@posterior$scaled)
names(x) <- paste("Comp", seq_len(object@k), sep=".")
x
})
z@concomitant <- if (object@k > 1) refit_mstep(object@concomitant, posterior = object@posterior$scaled,
group = object@group, w = object@weights) else NULL
}
z
})
setMethod("refit_optim", signature(object = "FLXM"),
function(object, components, coef, se) {
Design <- FLXgetDesign(object, components)
x <- lapply(seq_len(nrow(Design)), function(k) {
rval <- cbind(Estimate = coef[as.logical(Design[k,])],
"Std. Error" = se[as.logical(Design[k,])])
pars <- components[[k]]@parameters[[1]]
rval <- rval[seq_along(pars),,drop=FALSE]
rownames(rval) <- names(pars)
zval <- rval[,1]/rval[,2]
new("Coefmat", cbind(rval, "z value" = zval, "Pr(>|z|)" = 2 * pnorm(abs(zval), lower.tail = FALSE)))
})
names(x) <- paste("Comp", seq_along(x), sep = ".")
x
})
setMethod("refit_optim", signature(object = "FLXMC"),
function(object, components, coef, se) {
Design <- FLXgetDesign(object, components)
if (object@dist == "mvnorm") {
p <- length(grep("Comp.1_center", colnames(Design), fixed = TRUE))
diagonal <- get("diagonal", environment(object@fit))
if (diagonal) {
cov <- diag(seq_len(p))
coef_comp <- as.vector(sapply(seq_len(nrow(Design)), function(i)
c(coef[(i-1) * 2 * p + seq_len(p)],
as.vector(diag(diag(coef[(i-1) * 2 * p + p + seq_len(p)]))))))
coef <- c(coef_comp, coef[(nrow(Design) * 2 * p + 1):length(coef)])
se_comp <- as.vector(sapply(seq_len(nrow(Design)), function(i)
c(se[(i-1) * 2 * p + seq_len(p)],
as.vector(diag(diag(se[(i-1) * 2 * p + p + seq_len(p)]))))))
se <- c(se_comp, se[(nrow(Design) * 2 * p + 1):length(se)])
} else {
cov <- matrix(NA, nrow = p, ncol = p)
cov[lower.tri(cov, diag = TRUE)] <- seq_len(sum(lower.tri(cov, diag = TRUE)))
cov[upper.tri(cov)] <- t(cov)[upper.tri(cov)]
coef <- coef[c(as.vector(sapply(seq_len(nrow(Design)),
function(i) (i-1)*(max(cov)+p) + c(seq_len(p), as.vector(cov) + p))),
(nrow(Design) * (max(cov) + p)+1):length(coef))]
se <- se[c(as.vector(sapply(seq_len(nrow(Design)),
function(i) (i-1)*(max(cov)+p) + c(seq_len(p), as.vector(cov) + p))),
(nrow(Design) * (max(cov) + p)+1):length(se))]
}
}
callNextMethod(object = object, components = components, coef = coef, se = se)
})
setMethod("refit_optim", signature(object = "FLXP"),
function(object, coef, se) {
x <- lapply(seq_len(ncol(object@coef))[-1], function(k) {
indices <- grep(paste("Comp", k, sep = "."), names(coef))
rval <- cbind(Estimate = coef[indices],
"Std. Error" = se[indices])
rval <- rval[seq_len(nrow(object@coef)),,drop=FALSE]
rownames(rval) <- rownames(object@coef)
zval <- rval[,1]/rval[,2]
new("Coefmat", cbind(rval, "z value" = zval, "Pr(>|z|)" = 2 * pnorm(abs(zval), lower.tail = FALSE)))
})
names(x) <- paste("Comp", 1 + seq_along(x), sep = ".")
x
})
setMethod("FLXgetDesign", signature(object = "FLXM"),
function(object, components, ...) {
parms <- lapply(components, function(x) unlist(slot(x, "parameters")))
nr_parms <- sapply(parms, length)
cumSum <- cumsum(c(0, nr_parms))
Design <- t(sapply(seq_len(length(cumSum)-1), function(i) rep(c(0, 1, 0), c(cumSum[i], nr_parms[i], max(cumSum) - cumSum[i] - nr_parms[i]))))
colnames(Design) <- paste(rep(paste("Comp", seq_len(nrow(Design)), sep = "."), nr_parms),
unlist(lapply(parms, names)), sep = "_")
Design
})
setMethod("FLXgetDesign", signature(object = "FLXMRglmfix"),
function(object, components, ...) {
if (length(components) == 1) return(callNextMethod(object, components, ...))
Design <- object@design
if (object@family == "gaussian") {
cumSum <- cumsum(c(0, object@variance))
variance <- matrix(sapply(seq_len(length(cumSum)-1), function(i)
rep(c(0, 1, 0), c(cumSum[i], object@variance[i], length(components) - cumSum[i] - object@variance[i]))),
nrow = length(components))
colnames(variance) <- paste("Comp", apply(variance, 2, function(x) which(x == 1)[1]), "sigma", sep= ".")
Design <- cbind(Design, variance)
}
Design
})
###*********************************************************
setMethod("refit_mstep", signature(object="FLXM"),
function(object, newdata, weights, ...)
{
lapply(seq_len(ncol(weights)), function(k)
object@fit(object@x,
object@y,
weights[,k], ...)@parameters)
})
setMethod("refit_mstep", signature(object="FLXMRglm"),
function(object, newdata, weights, ...)
{
lapply(seq_len(ncol(weights)), function(k) {
fit <- object@refit(object@x,
object@y,
weights[,k], ...)
fit <- c(fit,
list(formula = object@fullformula,
terms = object@terms,
contrasts = object@contrasts,
xlevels = object@xlevels))
class(fit) <- c("glm", "lm")
fit
})
})
###**********************************************************
setMethod("fitted", signature(object="flexmix"),
function(object, drop=TRUE, aggregate = FALSE, ...)
{
x<- list()
for(m in seq_along(object@model)) {
comp <- lapply(object@components, "[[", m)
x[[m]] <- fitted(object@model[[m]], comp, ...)
}
if (aggregate) {
group <- group(object)
prior_weights <- determinePrior(object@prior, object@concomitant, group)[as.integer(group),]
z <- lapply(x, function(z) matrix(rowSums(do.call("cbind", z) * prior_weights),
nrow = nrow(z[[1]])))
if(drop && all(lapply(z, ncol)==1)){
z <- sapply(z, unlist)
}
}
else {
z <- list()
for (k in seq_len(object@k)) {
z[[k]] <- do.call("cbind", lapply(x, "[[", k))
}
names(z) <- paste("Comp", seq_len(object@k), sep=".")
if(drop && all(lapply(z, ncol)==1)){
z <- sapply(z, unlist)
}
}
z
})
setMethod("fitted", signature(object="FLXM"),
function(object, components, ...) {
lapply(components, function(z) z@predict(object@x))
})
setMethod("predict", signature(object="FLXM"), function(object, newdata, components, ...)
{
object <- FLXgetModelmatrix(object, newdata, formula = object@fullformula, lhs = FALSE)
z <- list()
for(k in seq_along(components))
z[[k]] <- components[[k]]@predict(object@x, ...)
z
})
###**********************************************************
setMethod("Lapply", signature(object="FLXRmstep"), function(object, FUN, model = 1, component = TRUE, ...) {
X <- object@components[[model]]
lapply(X[component], FUN, ...)
})
###*********************************************************
setMethod("refit_mstep", signature(object="flexmix", newdata="listOrdata.frame"),
function(object, newdata, ...)
{
z <- new("FLXR",
call=sys.call(-1), k = object@k)
z@components <- lapply(object@model, function(x) {
x <- refit_mstep(x, newdata = newdata,
weights=posterior(object, newdata = newdata))
names(x) <- paste("Comp", seq_len(object@k), sep=".")
x
})
z@concomitant <- if (object@k > 1) refit_mstep(object@concomitant, newdata, object@posterior$scaled, object@group, w = object@weights)
else NULL
z
})
setMethod("refit_mstep", signature(object="FLXMRglm", newdata="listOrdata.frame"),
function(object, newdata, weights, ...)
{
w <- weights
lapply(seq_len(ncol(w)), function(k) {
newdata$weights <- weights <- w[,k]
weighted.glm(formula = object@fullformula, data = newdata,
family = object@family, weights = weights, ...)
})
})
weighted.glm <- function(weights, ...) {
fit <- eval(as.call(c(as.symbol("glm"), c(list(...), list(weights = weights, x = TRUE)))))
fit$df.null <- sum(weights) + fit$df.null - fit$df.residual - fit$rank
fit$df.residual <- sum(weights) - fit$rank
fit$method <- "weighted.glm.fit"
fit
}
weighted.glm.fit <- function(x, y, weights, offset = NULL, family = "gaussian", ...) {
if (!is.function(family) & !is(family, "family"))
family <- get(family, mode = "function", envir = parent.frame())
fit <- c(glm.fit(x, y, weights = weights, offset=offset,
family=family),
list(call = sys.call(), offset = offset,
control = eval(formals(glm.fit)$control),
method = "weighted.glm.fit"))
fit$df.null <- sum(weights) + fit$df.null - fit$df.residual - fit$rank
fit$df.residual <- sum(weights) - fit$rank
fit$x <- x
fit
}
flexmix/R/kldiv.R 0000644 0001762 0000144 00000010065 14404637304 013372 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: kldiv.R 5079 2016-01-31 12:21:12Z gruen $
#
setMethod("KLdiv", "matrix",
function(object, eps=10^-4, overlap=TRUE,...)
{
if(!is.numeric(object))
stop("object must be a numeric matrix\n")
z <- matrix(NA, nrow=ncol(object), ncol=ncol(object))
colnames(z) <- rownames(z) <- colnames(object)
w <- object < eps
if (any(w)) object[w] <- eps
object <- sweep(object, 2, colSums(object) , "/")
for(k in seq_len(ncol(object)-1)){
for(l in 2:ncol(object)){
ok <- (object[, k] > eps) & (object[, l] > eps)
if (!overlap | any(ok)) {
z[k,l] <- sum(object[,k] *
(log(object[,k]) - log(object[,l])))
z[l,k] <- sum(object[,l] *
(log(object[,l]) - log(object[,k])))
}
}
}
diag(z)<-0
z
})
setMethod("KLdiv", "flexmix",
function(object, method = c("continuous", "discrete"), ...) {
method <- match.arg(method)
if (method == "discrete") z <- KLdiv(object@posterior$scaled, ...)
else {
z <- matrix(0, object@k, object@k)
for (i in seq_along(object@model)) {
comp <- lapply(object@components, "[[", i)
z <- z + KLdiv(object@model[[i]], comp)
}
}
z
})
setMethod("KLdiv", "FLXMRglm",
function(object, components, ...) {
z <- matrix(NA, length(components), length(components))
mu <- lapply(components, function(x) x@predict(object@x))
if (object@family == "gaussian") {
sigma <- lapply(components, function(x) x@parameters$sigma)
for (k in seq_len(ncol(z)-1)) {
for (l in seq_len(ncol(z))[-1]) {
z[k,l] <- sum(log(sigma[[l]]) - log(sigma[[k]]) + 1/2 * (-1 + ((sigma[[k]]^2 + (mu[[k]] - mu[[l]])^2))/sigma[[l]]^2))
z[l,k] <- sum(log(sigma[[k]]) - log(sigma[[l]]) + 1/2 * (-1 + ((sigma[[l]]^2 + (mu[[l]] - mu[[k]])^2))/sigma[[k]]^2))
}
}
}
else if (object@family == "binomial") {
for (k in seq_len(ncol(z)-1)) {
for (l in seq_len(ncol(z))[-1]) {
z[k,l] <- sum(mu[[k]] * log(mu[[k]]/mu[[l]]) + (1-mu[[k]]) * log((1-mu[[k]])/(1-mu[[l]])))
z[l,k] <- sum(mu[[l]] * log(mu[[l]]/mu[[k]]) + (1-mu[[l]]) * log((1-mu[[l]])/(1-mu[[k]])))
}
}
}
else if (object@family == "poisson") {
for (k in seq_len(ncol(z)-1)) {
for (l in seq_len(ncol(z))[-1]) {
z[k,l] <- sum(mu[[k]] * log(mu[[k]]/mu[[l]]) + mu[[l]] - mu[[k]])
z[l,k] <- sum(mu[[l]] * log(mu[[l]]/mu[[k]]) + mu[[k]] - mu[[l]])
}
}
}
else if (object@family == "gamma") {
shape <- lapply(components, function(x) x@parameters$shape)
for (k in seq_len(ncol(z)-1)) {
for (l in seq_len(ncol(z))[-1]) {
X <- mu[[k]]*shape[[l]]/mu[[l]]/shape[[k]]
z[k,l] <- sum(log(gamma(shape[[l]])/gamma(shape[[k]])) + shape[[l]] * log(X) - shape[[k]] * (1 - 1/X) +
(shape[[k]] - shape[[l]])*digamma(shape[[k]]))
z[l,k] <- sum(log(gamma(shape[[k]])/gamma(shape[[l]])) - shape[[k]] * log(X) - shape[[l]] * (1 - X) +
(shape[[l]] - shape[[k]])*digamma(shape[[l]]))
}
}
}
else stop(paste("Unknown family", object@family))
diag(z) <- 0
z
})
setMethod("KLdiv", "FLXMC",
function(object, components, ...) {
z <- matrix(NA, length(components), length(components))
if (object@dist == "mvnorm") {
center <- lapply(components, function(x) x@parameters$center)
cov <- lapply(components, function(x) x@parameters$cov)
for (k in seq_len(ncol(z)-1)) {
for (l in seq_len(ncol(z))[-1]) {
z[k,l] <- 1/2 * (log(det(cov[[l]])) - log(det(cov[[k]])) - length(center[[k]]) +
sum(diag(solve(cov[[l]]) %*% (cov[[k]] + tcrossprod(center[[k]] - center[[l]])))))
z[l,k] <- 1/2 * (log(det(cov[[k]])) - log(det(cov[[l]])) - length(center[[l]]) +
sum(diag(solve(cov[[k]]) %*% (cov[[l]] + tcrossprod(center[[l]] - center[[k]])))))
}
}
}
else stop(paste("Unknown distribution", object@dist))
diag(z) <- 0
z
})
###**********************************************************
flexmix/R/infocrit.R 0000644 0001762 0000144 00000002776 14404637304 014110 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: infocrit.R 5079 2016-01-31 12:21:12Z gruen $
#
setMethod("nobs", signature(object="flexmix"),
function(object, ...) {
if (is.null(object@weights)) nrow(object@posterior$scaled) else sum(object@weights)
})
setMethod("logLik", signature(object="flexmix"),
function(object, newdata, ...){
if (missing(newdata)) {
z <- object@logLik
attr(z, "df") <- object@df
attr(z, "nobs") <- nobs(object)
class(z) <- "logLik"
} else {
z <- sum(log(rowSums(posterior(object, newdata = newdata, unscaled = TRUE))))
attr(z, "df") <- object@df
attr(z, "nobs") <- nrow(newdata)
class(z) <- "logLik"
}
z
})
setMethod("ICL", signature(object="flexmix"),
function(object, ...){
-2 * clogLik(object) + object@df * log(nobs(object))
})
setMethod("clogLik", signature(object="flexmix"),
function(object, ...){
first <- if (length(object@group)) groupFirst(object@group) else TRUE
post <- object@posterior$unscaled[first,,drop=FALSE]
n <- nrow(post)
sum(log(post[seq_len(n) + (clusters(object)[first] - 1)*n]))
})
setMethod("EIC", signature(object="flexmix"),
function(object, ...) {
first <- if (length(object@group)) groupFirst(object@group) else TRUE
post <- object@posterior$scaled[first,,drop=FALSE]
n <- nrow(post)
lpost <- log(post)
if (any(is.infinite(lpost))) lpost[is.infinite(lpost)] <- -10^3
1 + sum(post * lpost)/(n * log(object@k))
})
flexmix/R/allClasses.R 0000644 0001762 0000144 00000022771 14404637304 014356 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: allClasses.R 5185 2020-06-23 13:24:06Z gruen $
#
setClass("FLXcontrol",
representation(iter.max="numeric",
minprior="numeric",
tolerance="numeric",
verbose="numeric",
classify="character",
nrep="numeric"),
prototype(iter.max=200,
minprior=0.05,
tolerance=10e-7,
verbose=0,
classify="auto",
nrep=1),
validity=function(object) {
(object@iter.max > 0)
})
setAs("list", "FLXcontrol",
function(from, to){
z = list2object(from, to)
z@classify = match.arg(z@classify,
c("auto", "weighted", "hard", "random", "SEM", "CEM"))
z
})
setAs("NULL", "FLXcontrol",
function(from, to){
new(to)
})
###**********************************************************
setClassUnion("expressionOrfunction", c("expression", "function"))
setClass("FLXM",
representation(fit="function",
defineComponent="expressionOrfunction",
weighted="logical",
name="character",
formula="formula",
fullformula="formula",
x="matrix",
y="ANY",
terms="ANY",
xlevels="ANY",
contrasts="ANY",
preproc.x="function",
preproc.y="function",
"VIRTUAL"),
prototype(formula=.~.,
fullformula=.~.,
preproc.x = function(x) x,
preproc.y = function(x) x))
## model-based clustering
setClass("FLXMC",
representation(y="matrix",
dist="character"),
contains = "FLXM")
## regression
setClass("FLXMR",
representation(y="matrix",
offset="ANY"),
contains = "FLXM")
setMethod("show", "FLXM",
function(object){
cat("FlexMix model of type", object@name,"\n\nformula: ")
print(object@formula)
cat("Weighted likelihood possible:", object@weighted,"\n\n")
if(!is.null(object@x) && nrow(object@x)>0){
cat("Regressors:\n")
print(summary(object@x))
}
if(!is.null(object@y) && nrow(object@y)>0){
cat("Response:\n")
print(summary(object@y))
}
cat("\n")
})
setClass("FLXcomponent",
representation(df="numeric",
logLik="function",
parameters="list",
predict="function"))
setMethod("show", "FLXcomponent",
function(object){
if(length(object@parameters)>0)
print(object@parameters)
})
###**********************************************************
setClass("FLXP",
representation(name="character",
formula="formula",
x="matrix",
fit="function",
refit="function",
coef="matrix",
df="function"),
prototype(formula=~1, df = function(x, k, ...) (k-1)*ncol(x)))
setMethod("initialize", signature(.Object="FLXP"), function(.Object, ...) {
.Object <- callNextMethod(.Object=.Object, ...)
if (is.null(formals(.Object@refit))) .Object@refit <- .Object@fit
.Object
})
setClass("FLXPmultinom",
contains="FLXP")
setMethod("show", "FLXP",
function(object){
cat("FlexMix concomitant model of type", object@name,"\n\nformula: ")
print(object@formula)
if(!is.null(object@x) && nrow(object@x)>0){
cat("\nRegressors:\n")
print(summary(object@x))
}
cat("\n")
})
###**********************************************************
setClass("FLXdist",
representation(model="list",
prior="numeric",
components="list",
concomitant="FLXP",
formula="formula",
call="call",
k="integer"),
validity=function(object) {
(object@k == length(object@prior))
},
prototype(formula=.~.))
setClass("flexmix",
representation(posterior="ANY",
weights="ANY",
iter="numeric",
cluster="integer",
logLik="numeric",
df="numeric",
control="FLXcontrol",
group="factor",
size="integer",
converged="logical",
k0="integer"),
prototype(group=(factor(integer(0))),
formula=.~.),
contains="FLXdist")
setMethod("show", "flexmix",
function(object){
cat("\nCall:", deparse(object@call,0.75*getOption("width")),
sep="\n")
cat("\nCluster sizes:\n")
print(object@size)
cat("\n")
if(!object@converged) cat("no ")
cat("convergence after", object@iter, "iterations\n")
})
###**********************************************************
setClass("summary.flexmix",
representation(call="call",
AIC="numeric",
BIC="numeric",
logLik="logLik",
comptab="ANY"))
setMethod("show", "summary.flexmix",
function(object){
cat("\nCall:", deparse(object@call,0.75*getOption("width")),
sep="\n")
cat("\n")
print(object@comptab, digits=3)
cat("\n")
print(object@logLik)
cat("AIC:", object@AIC, " BIC:", object@BIC, "\n")
cat("\n")
})
###**********************************************************
setClass("FLXMRglm",
representation(family="character",
refit="function"),
contains="FLXMR")
setClass("FLXR",
representation(k="integer",
components = "list",
concomitant = "ANY",
call="call",
"VIRTUAL"))
setClass("FLXRoptim",
representation(coef="vector",
vcov="matrix"),
contains="FLXR")
setClass("FLXRmstep",
contains="FLXR")
setMethod("show", signature(object = "FLXR"),
function(object) {
cat("\nCall:", deparse(object@call,0.75*getOption("width")),
sep="\n")
cat("\nNumber of components:", object@k, "\n\n")
})
setMethod("summary", signature(object = "FLXRoptim"),
function(object, model = 1, which = c("model", "concomitant"), ...) {
which <- match.arg(which)
z <- if (which == "model") object@components[[model]] else object@concomitant
show(z)
invisible(object)
})
setMethod("summary", signature(object = "FLXRmstep"),
function(object, model = 1, which = c("model", "concomitant"), ...) {
which <- match.arg(which)
if (which == "model") {
z <- object@components[[model]]
if (!is.null(z)) lapply(seq_along(z), function(k) {
cat(paste("$", names(z)[k], "\n", sep = ""))
printCoefmat(coef(summary(z[[k]])))
cat("\n")
})
} else {
z <- object@concomitant
fitted.summary <- summary(z)
k <- nrow(coef(fitted.summary)) + 1
coefs <- lapply(2:k, function(n) {
coef.p <- fitted.summary$coefficients[n - 1, , drop = FALSE]
s.err <- fitted.summary$standard.errors[n - 1, ,
drop = FALSE]
tvalue <- coef.p/s.err
pvalue <- 2 * pnorm(-abs(tvalue))
coef.table <- t(rbind(coef.p, s.err, tvalue, pvalue))
dimnames(coef.table) <- list(colnames(coef.p), c("Estimate",
"Std. Error", "z value", "Pr(>|z|)"))
new("Coefmat", coef.table)
})
names(coefs) <- paste("Comp", 2:k, sep = ".")
print(coefs)
}
invisible(object)
})
setClass("Coefmat",
contains = "matrix")
setMethod("show", signature(object="Coefmat"), function(object) {
printCoefmat(object, signif.stars = getOption("show.signif.stars"))
})
###**********************************************************
setClass("FLXnested",
representation(formula = "list",
k = "numeric"),
validity = function(object) {
length(object@formula) == length(object@k)
})
setAs("numeric", "FLXnested",
function(from, to) {
new("FLXnested", formula = rep(list(~0), length(from)), k = from)
})
setAs("list", "FLXnested",
function(from, to) {
z <- list2object(from, to)
})
setAs("NULL", "FLXnested",
function(from, to) {
new(to)
})
setMethod("initialize", "FLXnested", function(.Object, formula = list(), k = numeric(0), ...) {
if (is(formula, "formula")) formula <- rep(list(formula), length(k))
.Object <- callNextMethod(.Object, formula = formula, k = k, ...)
.Object
})
###**********************************************************
setClass("FLXMRfix",
representation(design = "matrix",
nestedformula = "FLXnested",
fixed = "formula",
segment = "matrix",
variance = "vector"),
contains="FLXMR")
setClass("FLXMRglmfix",
contains=c("FLXMRfix", "FLXMRglm"))
###**********************************************************
setClassUnion("listOrdata.frame", c("list", "data.frame"))
###**********************************************************
flexmix/R/relabel.R 0000644 0001762 0000144 00000005554 14404637304 013676 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id$
#
setGeneric("dorelabel", function(object, perm, ...) standardGeneric("dorelabel"))
setMethod("dorelabel", signature(object="flexmix", perm="vector"), function(object, perm, ...) {
object <- callNextMethod(object, perm)
object@posterior$scaled <- object@posterior$scaled[,perm,drop=FALSE]
object@posterior$unscaled <- object@posterior$unscaled[,perm,drop=FALSE]
object@cluster <- order(perm)[object@cluster]
object@size <- object@size[perm]
names(object@size) <- seq_along(perm)
object
})
setMethod("dorelabel", signature(object="FLXdist", perm="vector"), function(object, perm, ...) {
if (length(perm) != object@k) stop("length of order argument does not match number of components")
if (any(sort(perm) != seq_len(object@k))) stop("order argument not specified correctly")
object@prior <- object@prior[perm]
object@components <- object@components[perm]
names(object@components) <- sapply(seq_along(object@components), function(k)
gsub("[0-9]+", k, names(object@components)[k]))
object@concomitant <- dorelabel(object@concomitant, perm, ...)
object
})
setMethod("dorelabel", signature(object="FLXP", perm="vector"), function(object, perm, ...) {
object@coef <- object@coef[,perm,drop=FALSE]
colnames(object@coef) <- sapply(seq_len(ncol(object@coef)), function(k)
gsub("[0-9]+", k, colnames(object@coef)[k]))
object
})
setMethod("dorelabel", signature(object="FLXPmultinom", perm="vector"), function(object, perm, ...) {
object@coef <- object@coef[,perm,drop=FALSE]
object@coef <- sweep(object@coef, 1, object@coef[,1], "-")
colnames(object@coef) <- sapply(seq_len(ncol(object@coef)), function(k)
gsub("[0-9]+", k, colnames(object@coef)[k]))
object
})
setMethod("relabel", signature(object="FLXdist", by="character"),
function(object, by, which=NULL, ...)
{
by <- match.arg(by, c("prior", "model", "concomitant"))
if(by=="prior"){
perm <- order(prior(object), ...)
}
else if(by %in% c("model", "concomitant")) {
pars <- parameters(object, which = by)
index <- grep(which, rownames(pars))
if (length(index) != 1)
stop("no suitable ordering variable given in 'which'")
perm <- order(pars[index,], ...)
}
object <- dorelabel(object, perm=perm)
object
})
setMethod("relabel", signature(object="FLXdist", by="missing"),
function(object, by, ...)
{
object <- relabel(object, by="prior", ...)
object
})
setMethod("relabel", signature(object="FLXdist", by="integer"),
function(object, by, ...)
{
if(!all(sort(by) == seq_len(object@k)))
stop("if integer, ", sQuote("by"),
" must be a permutation of the numbers 1 to ", object@k)
object <- dorelabel(object, by)
object
})
flexmix/R/plot-refit.R 0000644 0001762 0000144 00000011250 14404637304 014343 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: plot-refit.R 5079 2016-01-31 12:21:12Z gruen $
#
prepanel.default.coef <- function (x, y, subscripts, groups=NULL, horizontal = TRUE, nlevels, origin = NULL,
...)
{
if (any(!is.na(x) & !is.na(y))) {
if (horizontal) {
if (!is.factor(y)) {
if (missing(nlevels))
nlevels <- length(unique(y))
y <- factor(y, levels = seq_len(nlevels))
}
if (!is.null(groups)) {
if (!is.numeric(x)) stop("x must be numeric")
x <- rep(x, each = 2) + rep(groups[subscripts], each = 2) *c(-1,1)
}
list(xlim = if (is.numeric(x)) range(x, origin, finite = TRUE) else levels(x),
ylim = levels(y), yat = sort(unique(as.numeric(y))),
dx = 1, dy = 1)
}
else {
if (!is.factor(x)) {
if (missing(nlevels))
nlevels <- length(unique(x))
x <- factor(x, levels = seq_len(nlevels))
}
if (!is.null(groups)) {
if (!is.numeric(y)) stop("y must be numeric")
y <- rep(as.numeric(y), each = 2) + rep(groups[subscripts], each = 2) *c(-1,1)
}
list(xlim = levels(x), xat = sort(unique(as.numeric(x))),
ylim = if (is.numeric(y)) range(y, origin, finite = TRUE) else levels(y),
dx = 1, dy = 1)
}
}
else list(xlim = c(NA, NA), ylim = c(NA, NA), dx = 1, dy = 1)
}
panel.coef <- function(x, y, subscripts, groups, significant = NULL, horizontal = TRUE,
lwd = 2, col, col.line = c("black", "grey"), ...)
{
col.sig <- rep(col.line[1], length(x))
if (!is.null(significant)) {
if (missing(col)) col <- c("grey", "white")
col.fill <- rep(col[1], length(x))
col.sig[!significant[subscripts]] <- col.line[2]
col.fill[!significant[subscripts]] <- col[2]
}
else if (missing(col)) col.fill <- "grey" else col.fill <- col
panel.barchart(x, y, border = col.sig, col = col.fill, horizontal = horizontal, ...)
if (!missing(groups)) {
if (horizontal) {
z <- x + rep(c(-1,1), each = length(x)) * matrix(rep(groups[subscripts], 2), ncol = 2)
for (i in seq_along(x)) {
panel.xyplot(z[i,], rep(y[i], 2), type = "l", col = col.sig[i], lwd = lwd)
}
}
else {
z <- y + rep(c(-1,1), each = length(y)) * matrix(rep(groups[subscripts], 2), ncol = 2)
for (i in seq_along(y)) {
panel.xyplot(rep(x[i], 2), z[i,], type = "l", col = col.sig[i], lwd = lwd)
}
}
}
}
getCoefs <- function(x, alpha = 0.05, components, ...) {
names(x) <- sapply(names(x), function(z) strsplit(z, "Comp.")[[1]][2])
x <- x[names(x) %in% components]
Comp <- lapply(names(x), function(n)
data.frame(Value = x[[n]][,1],
SD = x[[n]][,2] * qnorm(1-alpha/2),
Variable = rownames(x[[n]]),
Component = n,
Significance = x[[n]][,4] <= alpha))
do.call("rbind", Comp)
}
setMethod("plot", signature(x="FLXRoptim", y="missing"),
function(x, y, model = 1, which = c("model", "concomitant"),
bycluster=TRUE, alpha=0.05, components, labels=NULL,
significance = FALSE, xlab = NULL, ylab = NULL,
ci = TRUE, scales = list(), as.table = TRUE, horizontal = TRUE, ...)
{
which <- match.arg(which)
if (missing(components)) components <- seq_len(x@k)
plot.data <- if (which == "model") getCoefs(x@components[[model]], alpha, components) else getCoefs(x@concomitant, alpha, components)
if (!is.null(labels)) plot.data$Variable <- factor(plot.data$Variable, labels = labels)
plot.data$Component <- with(plot.data, factor(Component, sort(unique(Component)), labels = paste("Comp.", sort(unique(Component)))))
if (bycluster) {
formula <- if (horizontal) Variable ~ Value | Component else Value ~ Variable | Component
plot.data$Variable <- with(plot.data, factor(Variable, levels = rev(unique(Variable))))
}
else {
formula <- if (horizontal) Component ~ Value | Variable else Value ~ Component | Variable
plot.data$Component <- with(plot.data, factor(Component, levels = rev(levels(Component))))
}
groups <- if (ci) plot.data$SD else NULL
significant <- if (significance) plot.data$Significance else NULL
xyplot(formula, data = plot.data, xlab = xlab, ylab = ylab, origin = 0, horizontal = horizontal,
scales = scales, as.table = as.table, significant = significant,
groups = groups, prepanel = function(...) prepanel.default.coef(...),
panel = function(x, y, subscripts, groups, ...)
panel.coef(x, y, subscripts, groups, ...), ...)
})
flexmix/R/lmmc.R 0000644 0001762 0000144 00000065226 14404661473 013226 0 ustar ligges users setClass("FLXMRlmc",
representation(family = "character",
group = "factor",
censored = "formula",
C = "matrix"),
contains = "FLXMR")
setClass("FLXMRlmcfix",
contains = "FLXMRlmc")
setClass("FLXMRlmmc",
representation(random = "formula",
z = "matrix",
which = "ANY"),
contains = "FLXMRlmc")
setClass("FLXMRlmmcfix",
contains = "FLXMRlmmc")
setMethod("allweighted", signature(model = "FLXMRlmc", control = "ANY", weights = "ANY"), function(model, control, weights) {
if (!control@classify %in% c("auto", "weighted"))
stop("Model class only supports weighted ML estimation.")
model@weighted
})
update_Residual <- function(fit, w, z, C, which, random, censored) {
index <- lapply(C, function(x) x == 1)
W <- rep(w, sapply(which, function(x) nrow(z[[x]])))
ZGammaZ <- sapply(seq_along(which), function(i) sum(diag(crossprod(z[[which[i]]]) %*% random$Gamma[[i]])))
WHICH <- which(sapply(C, sum) > 0)
Residual <- if (length(WHICH) > 0)
sum(sapply(WHICH, function(i)
w[i] * sum(diag(censored$Sigma[[i]]) - 2 * z[[which[i]]][index[[i]],,drop=FALSE] * censored$psi[[i]]))) else 0
(sum(W * residuals(fit)^2) + Residual + sum(w * ZGammaZ))/sum(W)
}
update_latent <- function(x, y, C, fit) {
AnyMissing <- which(sapply(C, sum) > 0)
index <- lapply(C, function(x) x == 1)
Sig <- lapply(seq_along(x), function(i) fit$sigma2 * diag(nrow = nrow(x[[i]])))
SIGMA <- rep(list(matrix(nrow = 0, ncol = 0)), length(x))
if (length(AnyMissing) > 0) {
SIGMA[AnyMissing] <- lapply(AnyMissing, function(i) {
S <- Sig[[i]]
SIG <- S[index[[i]], index[[i]]]
if (sum(!index[[i]]) > 0) SIG <-
SIG - S[index[[i]],!index[[i]]] %*% solve(S[!index[[i]],!index[[i]]]) %*% S[!index[[i]],index[[i]]]
SIG
})
}
Sigma <- MU <- rep(list(vector("numeric", length = 0)), length(x))
if (length(AnyMissing) > 0) {
MU[AnyMissing] <- lapply(AnyMissing, function(i) {
S <- Sig[[i]]
Mu <- x[[i]][index[[i]],,drop=FALSE] %*% fit$coef
if (sum(!index[[i]]) > 0) Mu <- Mu + S[index[[i]],!index[[i]]] %*% solve(S[!index[[i]],!index[[i]]]) %*%
(y[[i]][!index[[i]]] - x[[i]][!index[[i]],,drop=FALSE] %*% fit$coef)
Mu
})
}
moments <- lapply(seq_along(x), function(i) {
if (sum(index[[i]]) > 0) moments_truncated(MU[[i]], SIGMA[[i]], y[[i]][C[[i]] == 1])
})
Sigma <- lapply(moments, "[[", "variance")
censored <- list(mu = lapply(moments, "[[", "mean"),
Sigma = Sigma)
list(censored = censored)
}
update_latent_random <- function(x, y, z, C, which, fit) {
index <- lapply(C, function(x) x == 1)
AnyMissing <- which(sapply(C, sum) > 0)
Residual <- fit$sigma2$Residual
Psi <- fit$sigma2$Random
EVbeta <- lapply(seq_along(z), function(i) solve(1/Residual * crossprod(z[[i]]) + solve(Psi)))
Sig <- lapply(seq_along(z), function(i) z[[i]] %*% Psi %*% t(z[[i]]) + Residual * diag(nrow = nrow(z[[i]])))
SIGMA <- rep(list(matrix(nrow = 0, ncol = 0)), length(x))
if (length(AnyMissing) > 0) {
SIGMA[AnyMissing] <- lapply(AnyMissing, function(i) {
S <- Sig[[which[i]]]
SIG <- S[index[[i]], index[[i]]]
if (sum(!index[[i]]) > 0) SIG <-
SIG - S[index[[i]],!index[[i]]] %*% solve(S[!index[[i]],!index[[i]]]) %*% S[!index[[i]],index[[i]]]
SIG
})
}
Sigma <- MU <- rep(list(vector("numeric", length = 0)), length(x))
if (length(AnyMissing) > 0) {
MU[AnyMissing] <- lapply(AnyMissing, function(i) {
S <- Sig[[which[i]]]
Mu <- x[[i]][index[[i]],,drop=FALSE] %*% fit$coef
if (sum(!index[[i]]) > 0) {
Mu <- Mu + S[index[[i]],!index[[i]]] %*% solve(S[!index[[i]],!index[[i]]]) %*%
(y[[i]][!index[[i]]] - x[[i]][!index[[i]],,drop=FALSE] %*% fit$coef)
}
Mu
})
}
moments <- lapply(seq_along(x), function(i) {
if (sum(index[[i]]) > 0) moments_truncated(MU[[i]], SIGMA[[i]], y[[i]][C[[i]] == 1])
})
Sigma <- lapply(moments, "[[", "variance")
censored <- list(mu = lapply(moments, "[[", "mean"),
Sigma = Sigma,
psi = lapply(seq_along(x), function(i) {
if (sum(index[[i]]) > 0) return(diag(Sigma[[i]] %*%
z[[which[i]]][index[[i]],,drop=FALSE] %*% EVbeta[[which[i]]])/Residual)
else return(vector("numeric", length = 0))
}))
ybar <- lapply(seq_along(y), function(i) {
Y <- y[[i]]
Y[index[[i]]] <- censored$mu[[i]]
Y
})
random <- list(beta = lapply(seq_along(x), function(i)
EVbeta[[which[i]]] %*% t(z[[which[i]]]) %*% (ybar[[i]] - x[[i]] %*% fit$coef)/Residual),
Gamma = lapply(seq_along(x), function(i) {
if (sum(index[[i]]) > 0) {
return(EVbeta[[which[i]]] + (EVbeta[[which[i]]] %*%
(t(z[[which[i]]][index[[i]],,drop=FALSE]) %*% censored$Sigma[[i]] %*%
z[[which[i]]][index[[i]],,drop=FALSE]) %*%
t(EVbeta[[which[i]]]))/Residual^2)
} else return(EVbeta[[which[i]]])
}))
list(random = random,
censored = censored)
}
moments_truncated <- function(mu, Sigma, T, ...) {
Sigma <- as.matrix(Sigma)
mu <- as.vector(mu)
T <- as.vector(T)
S <- 1/sqrt(diag(Sigma))
T1 <- S * (T - mu)
if (length(mu) == 1) {
alpha <- pnorm(T1)
dT1 <- dnorm(T1)
Ex <- - dT1 / alpha
Ex2 <- 1 - T1 * dT1 / alpha
} else {
R <- S * Sigma * rep(S, each = ncol(Sigma))
diag(R) <- 1L
alpha <- mvtnorm::pmvnorm(upper = T1, sigma = R, ...)
rq <- lapply(seq_along(T1), function(q) (R - tcrossprod(R[,q])))
R2 <- R^2
Vq <- 1 - R2
Sq <- sqrt(Vq)
Rq <- lapply(seq_along(T1), function(q) rq[[q]]/(tcrossprod(Sq[,q])))
Tq <- lapply(seq_along(T1), function(q) (T1 - R[,q] * T1[q])/Sq[,q])
Phiq <- if (length(mu) == 1) 1 else
sapply(seq_along(Rq), function(q) mvtnorm::pmvnorm(upper = Tq[[q]][-q], sigma = Rq[[q]][-q,-q], ...))
phi_Phiq <- dnorm(T1) * Phiq
Ex <- - (R %*% phi_Phiq)/alpha
T2_entries <- lapply(seq_along(T1),
function(j) sapply(lapply(seq_along(T1)[seq_len(j)], function(i) R[,i] * T1 * phi_Phiq), function(z) sum(z * R[j,])))
T2 <- diag(length(T1))
T2[upper.tri(T2, diag = TRUE)] <- unlist(T2_entries)
T2[lower.tri(T2)] <- t(T2)[lower.tri(T2)]
phiqr <- lapply(seq_along(T1), function(q)
sapply(seq_along(T1), function(r) {
if (r == q) return(0) else return(mvtnorm::dmvnorm(T1[c(q, r)],
mean = rep(0, length.out = length(c(q,r))), sigma = R[c(q,r), c(q,r)]))}))
if (length(mu) == 2) {
Ex2 <- R - T2 / alpha +
Reduce("+", lapply(seq_along(Tq), function(q)
tcrossprod(R[q,],
rowSums(sapply(seq_along(Tq)[-q], function(r)
phiqr[[q]][r] * (R[,r] - R[q,r] * R[q,])))))) / alpha
} else {
betaq <- lapply(seq_along(T1), function(q) sweep(rq[[q]], 2, Vq[,q], "/"))
Rqr <- lapply(seq_along(T1), function(q)
lapply(seq_along(T1), function(r) if (r == q) return(0) else
return((Rq[[q]][-c(q,r),-c(q,r)] - tcrossprod(Rq[[q]][-c(q,r),r]))/tcrossprod(sqrt(1 - Rq[[q]][-c(q,r),r]^2)))))
Tqr <- lapply(seq_along(T1), function(q) {
lapply(seq_along(T1), function(r) if (r == q) return(0) else
return((T1[-c(q,r)] - betaq[[r]][-c(r,q),q] * T1[q] - betaq[[q]][-c(r,q),r] * T1[r])/
(Sq[-c(r,q),q] * sqrt(1 - Rq[[q]][-c(r,q),r]^2))))})
T3 <- Reduce("+", lapply(seq_along(Tq), function(q)
tcrossprod(R[q,],
rowSums(sapply(seq_along(Tq)[-q], function(r)
phiqr[[q]][r] * (R[,r] - R[q,r] * R[q,]) *
mvtnorm::pmvnorm(upper = Tqr[[q]][[r]], sigma = Rqr[[q]][[r]], ...)))))) / alpha
Ex2 <- R - T2 / alpha + 1/2 * (T3 + t(T3))
}
}
moments <- list(mean = 1/S * Ex + mu,
variance = diag(1/S, nrow = length(T)) %*% (Ex2 - tcrossprod(Ex)) %*% diag(1/S, nrow = length(T)))
if (!all(is.finite(unlist(moments))) || any(moments$mean > T) || any(eigen(moments$variance)$values < 0)) {
moments <- list(mean = T - abs(diag(Sigma)),
variance = Sigma)
}
moments
}
FLXMRlmmc <- function(formula = . ~ ., random, censored, varFix, eps = 10^-6, ...)
{
family <- "gaussian"
censored <- if (length(censored) == 3) censored else formula(paste(".", paste(deparse(censored), collapse = "")))
if (missing(random)) {
if (missing(varFix)) varFix <- FALSE
else if ((length(varFix) > 1) || (is.na(as.logical(varFix)))) stop("varFix has to be a logical vector of length one")
object <- new("FLXMRlmc", formula = formula, censored = censored,
weighted = TRUE, family = family, name = "FLXMRlmc:gaussian")
if (varFix) object <- new("FLXMRlmcfix", object)
lmc.wfit <- function(x, y, w, C, censored) {
W <- rep(w, sapply(x, nrow))
X <- do.call("rbind", x)
AnyMissing <- which(sapply(C, sum) > 0)
ybar <- lapply(seq_along(y), function(i) {
Y <- y[[i]]
Y[C[[i]] == 1] <- censored$mu[[i]]
Y
})
Y <- do.call("rbind", ybar)
fit <- lm.wfit(X, Y, W, ...)
fit$sigma2 <- if (length(AnyMissing) > 0) (sum(W * residuals(fit)^2) +
sum(sapply(AnyMissing, function(i)
w[i] * sum(diag(censored$Sigma[[i]])))))/sum(W) else sum(W * residuals(fit)^2)/sum(W)
fit$df <- ncol(X)
fit
}
object@defineComponent <- function(para) {
predict <- function(x, ...)
lapply(x, function(X) X %*% para$coef)
logLik <- function(x, y, C, group, censored, ...) {
AnyMissing <- which(sapply(C, sum) > 0)
index <- lapply(C, function(x) x == 1)
V <- lapply(x, function(X) diag(nrow(X)) * para$sigma2)
mu <- predict(x, ...)
SIGMA <- rep(list(matrix(nrow = 0, ncol = 0)), length(x))
if (length(AnyMissing) > 0) {
SIGMA[AnyMissing] <- lapply(AnyMissing, function(i) {
S <- V[[i]]
SIG <- S[index[[i]], index[[i]]]
if (sum(!index[[i]]) > 0) SIG <-
SIG - S[index[[i]],!index[[i]]] %*% solve(S[!index[[i]],!index[[i]]]) %*% S[!index[[i]],index[[i]]]
SIG
})
}
MU <- rep(list(vector("numeric", length = 0)), length(x))
if (length(AnyMissing) > 0) {
MU[AnyMissing] <- lapply(AnyMissing, function(i) {
S <- V[[i]]
Mu <- mu[[i]][index[[i]]]
if (sum(!index[[i]]) > 0) Mu <- Mu + S[index[[i]],!index[[i]]] %*% solve(S[!index[[i]],!index[[i]]]) %*%
(y[[i]][!index[[i]]] - mu[[i]][!index[[i]]])
Mu
})
}
llh <- sapply(seq_along(x), function(i) {
LLH <- 0
if (sum(index[[i]]) > 0) LLH <- log(mvtnorm::pmvnorm(upper = y[[i]][index[[i]]], mean = as.vector(MU[[i]]),
sigma = SIGMA[[i]]))
if (sum(!index[[i]]) > 0) LLH <- LLH + mvtnorm::dmvnorm(t(y[[i]][!index[[i]]]), mean = mu[[i]][!index[[i]]],
sigma = V[[i]][!index[[i]], !index[[i]], drop = FALSE], log=TRUE)
LLH/nrow(V[[i]])
})
as.vector(ungroupPriors(matrix(llh), group, !duplicated(group)))
}
new("FLXcomponent", parameters = list(coef = para$coef, sigma2 = para$sigma2,
censored = para$censored), logLik = logLik, predict = predict,
df = para$df)
}
object@fit <- if (varFix) {
function(x, y, w, C, fit) {
any_removed <- any(w <= eps)
if (any_removed) {
ok <- apply(w, 2, function(x) x > eps)
W <- lapply(seq_len(ncol(ok)), function(i) w[ok[,i],i])
X <- lapply(seq_len(ncol(ok)), function(i) x[ok[,i],,drop = FALSE])
y <- lapply(seq_len(ncol(ok)), function(i) y[ok[,i]])
C <- lapply(seq_len(ncol(ok)), function(i) C[ok[,i]])
} else {
X <- rep(list(x), ncol(w))
y <- rep(list(y), ncol(w))
C <- rep(list(C), ncol(w))
W <- lapply(seq_len(ncol(w)), function(i) w[,i])
}
if ("coef" %in% names(fit[[1]]))
fit <- lapply(seq_len(ncol(w)), function(k) update_latent(X[[k]], y[[k]], C[[k]], fit[[k]]))
else {
fit <- lapply(seq_len(ncol(w)), function(k)
list(censored = list(mu = lapply(seq_along(y[[k]]), function(i) y[[k]][[i]][C[[k]][[i]] == 1]),
Sigma = lapply(C[[k]], function(x) diag(1, nrow = sum(x)) * var(unlist(y[[k]]))))))
}
fit <- lapply(seq_len(ncol(w)), function(k) c(lmc.wfit(X[[k]], y[[k]], W[[k]], C[[k]], fit[[k]]$censored),
censored = list(fit[[k]]$censored)))
sigma2 <- sum(sapply(fit, function(x) x$sigma2) * colMeans(w))
for (k in seq_len(ncol(w))) fit[[k]]$sigma2 <- sigma2
lapply(fit, function(Z) object@defineComponent(list(coef = coef(Z),
df = Z$df + 1/ncol(w),
sigma2 = Z$sigma2,
censored = Z$censored)))
}
} else {
function(x, y, w, C, fit){
any_removed <- any(w <= eps)
if (any_removed) {
ok <- w > eps
w <- w[ok]
x <- x[ok,,drop = FALSE]
y <- y[ok]
C <- C[ok]
}
if ("coef" %in% names(fit)) {
fit <- update_latent(x, y, C, fit)
} else {
fit$censored <- list(mu = lapply(seq_along(y), function(i) y[[i]][C[[i]] == 1]),
Sigma = lapply(C, function(x) diag(1, nrow = sum(x)) * var(unlist(y))))
}
fit <- c(lmc.wfit(x, y, w, C, fit$censored),
censored = list(fit$censored))
object@defineComponent(
list(coef = coef(fit),
df = fit$df + 1,
sigma2 = fit$sigma2,
censored = fit$censored))
}
}
} else {
if (missing(varFix)) varFix <- c(Random = FALSE, Residual = FALSE)
else if (length(varFix) != 2 || is.null(names(varFix)) || any(is.na(pmatch(names(varFix), c("Random", "Residual")))))
stop("varFix has to be a named vector of length two")
else names(varFix) <- c("Random", "Residual")[pmatch(names(varFix), c("Random", "Residual"))]
random <- if (length(random) == 3) random else formula(paste(".", paste(deparse(random), collapse = "")))
object <- new("FLXMRlmmc", formula = formula, random = random, censored = censored,
weighted = TRUE, family = family, name = "FLXMRlmmc:gaussian")
if (any(varFix)) object <- new("FLXMRlmmcfix", object)
add <- function(x) Reduce("+", x)
lmmc.wfit <- function(x, y, w, z, C, which, random, censored) {
effect <- lapply(seq_along(which), function(i) z[[which[i]]] %*% random$beta[[i]])
Effect <- do.call("rbind", effect)
W <- rep(w, sapply(x, nrow))
X <- do.call("rbind", x)
ybar <- lapply(seq_along(y), function(i) {
Y <- y[[i]]
Y[C[[i]] == 1] <- censored$mu[[i]]
Y
})
Y <- do.call("rbind", ybar)
fit <- lm.wfit(X, Y - Effect, W, ...)
wGamma <- add(lapply(seq_along(which), function(i) w[i] * random$Gamma[[i]]))
bb <- add(lapply(seq_along(which), function(i) tcrossprod(random$beta[[i]]) * w[i]))
fit$sigma2 <- list(Random = (wGamma + bb)/sum(w))
fit$df <- ncol(X)
fit
}
object@defineComponent <- function(para) {
predict <- function(x, ...)
lapply(x, function(X) X %*% para$coef)
logLik <- function(x, y, z, C, which, group, censored, ...) {
AnyMissing <- which(sapply(C, sum) > 0)
index <- lapply(C, function(x) x == 1)
V <- lapply(z, function(Z) tcrossprod(tcrossprod(Z, para$sigma2$Random), Z) + diag(nrow(Z)) * para$sigma2$Residual)
mu <- predict(x, ...)
SIGMA <- rep(list(matrix(nrow = 0, ncol = 0)), length(x))
if (length(AnyMissing) > 0) {
SIGMA[AnyMissing] <- lapply(AnyMissing, function(i) {
S <- V[[which[i]]]
SIG <- S[index[[i]], index[[i]]]
if (sum(!index[[i]]) > 0) SIG <-
SIG - S[index[[i]],!index[[i]]] %*% solve(S[!index[[i]],!index[[i]]]) %*% S[!index[[i]],index[[i]]]
SIG
})
}
MU <- rep(list(vector("numeric", length = 0)), length(x))
if (length(AnyMissing) > 0) {
MU[AnyMissing] <- lapply(AnyMissing, function(i) {
S <- V[[which[i]]]
Mu <- mu[[i]][index[[i]]]
if (sum(!index[[i]]) > 0) Mu <- Mu + S[index[[i]],!index[[i]]] %*% solve(S[!index[[i]],!index[[i]]]) %*%
(y[[i]][!index[[i]]] - mu[[i]][!index[[i]]])
Mu
})
}
llh <- sapply(seq_along(x), function(i) {
LLH <- 0
if (sum(index[[i]]) > 0) LLH <- log(mvtnorm::pmvnorm(upper = y[[i]][index[[i]]], mean = as.vector(MU[[i]]),
sigma = SIGMA[[i]]))
if (sum(!index[[i]]) > 0) LLH <- LLH + mvtnorm::dmvnorm(t(y[[i]][!index[[i]]]), mean = mu[[i]][!index[[i]]],
sigma = V[[which[i]]][!index[[i]], !index[[i]], drop = FALSE], log=TRUE)
LLH/nrow(V[[which[i]]])
})
as.vector(ungroupPriors(matrix(llh), group, !duplicated(group)))
}
new("FLXcomponent", parameters = list(coef = para$coef, sigma2 = para$sigma2,
censored = para$censored, random = para$random), logLik = logLik, predict = predict,
df = para$df)
}
object@fit <- if (any(varFix)) {
function(x, y, w, z, C, which, fit) {
any_removed <- any(w <= eps)
if (any_removed) {
ok <- apply(w, 2, function(x) x > eps)
W <- lapply(seq_len(ncol(ok)), function(i) w[ok[,i],i])
X <- lapply(seq_len(ncol(ok)), function(i) x[ok[,i],,drop = FALSE])
y <- lapply(seq_len(ncol(ok)), function(i) y[ok[,i]])
C <- lapply(seq_len(ncol(ok)), function(i) C[ok[,i]])
which <- lapply(seq_len(ncol(ok)), function(i) which[ok[,i]])
} else {
X <- rep(list(x), ncol(w))
y <- rep(list(y), ncol(w))
C <- rep(list(C), ncol(w))
which <- rep(list(which), ncol(w))
W <- lapply(seq_len(ncol(w)), function(i) w[,i])
}
if ("coef" %in% names(fit[[1]]))
fit <- lapply(seq_len(ncol(w)), function(k) update_latent_random(X[[k]], y[[k]], z, C[[k]], which[[k]],
fit[[k]]))
else {
fit <- lapply(seq_len(ncol(w)), function(k)
list(random = list(beta = lapply(seq_along(W[[k]]), function(i) rep(0, ncol(z[[which[[k]][i]]]))),
Gamma = lapply(seq_along(W[[k]]), function(i) diag(ncol(z[[which[[k]][i]]])))),
censored = list(mu = lapply(seq_along(y[[k]]), function(i) y[[k]][[i]][C[[k]][[i]] == 1]),
Sigma = lapply(C[[k]], function(x) diag(1, nrow = sum(x)) * var(unlist(y[[k]]))),
psi = lapply(C[[k]], function(x) rep(0, sum(x))))))
}
fit <- lapply(seq_len(ncol(w)), function(k) c(lmmc.wfit(X[[k]], y[[k]], W[[k]], z, C[[k]],
which[[k]], fit[[k]]$random, fit[[k]]$censored),
random = list(fit[[k]]$random),
censored = list(fit[[k]]$censored)))
if (varFix["Random"]) {
prior_w <- apply(w, 2, weighted.mean, w = sapply(x, length))
Psi <- add(lapply(seq_len(ncol(w)), function(k) fit[[k]]$sigma2$Random * prior_w[k]))
for (k in seq_len(ncol(w))) fit[[k]]$sigma2$Random <- Psi
}
for (k in seq_len(ncol(w)))
fit[[k]]$sigma2$Residual <- update_Residual(fit[[k]], W[[k]], z, C[[k]], which[[k]],
fit[[k]]$random, fit[[k]]$censored)
if (varFix["Residual"]) {
prior <- colMeans(w)
Residual <- sum(sapply(fit[[k]]$sigma2$Residual, function(x) x) * prior)
for (k in seq_len(ncol(w))) fit[[k]]$sigma2$Residual <- Residual
}
n <- nrow(fit[[1]]$sigma2$Random)
lapply(fit, function(Z) object@defineComponent(
list(coef = coef(Z),
df = Z$df + n*(n+1)/(2*ifelse(varFix["Random"], ncol(w), 1)) +
ifelse(varFix["Residual"], 1/ncol(w), 1),
sigma2 = Z$sigma2,
random = Z$random,
censored = Z$censored)))
}
} else {
function(x, y, w, z, C, which, fit){
any_removed <- any(w <= eps)
if (any_removed) {
ok <- w > eps
w <- w[ok]
x <- x[ok,,drop = FALSE]
y <- y[ok]
C <- C[ok]
which <- which[ok]
}
if ("coef" %in% names(fit)) fit <- update_latent_random(x, y, z, C, which, fit)
else {
fit <- list(random = list(beta = lapply(which, function(i) rep(0, ncol(z[[i]]))),
Gamma = lapply(which, function(i) diag(ncol(z[[i]])))),
censored = list(mu = lapply(seq_along(y), function(i) y[[i]][C[[i]] == 1]),
Sigma = lapply(C, function(x) diag(1, nrow = sum(x)) * var(unlist(y))),
psi = lapply(C, function(x) rep(0, sum(x)))))
}
fit <- c(lmmc.wfit(x, y, w, z, C, which, fit$random, fit$censored),
random = list(fit$random), censored = list(fit$censored))
fit$sigma2$Residual <- update_Residual(fit, w, z, C, which, fit$random, fit$censored)
n <- nrow(fit$sigma2$Random)
object@defineComponent(
list(coef = coef(fit),
df = fit$df + n*(n+1)/2 + 1,
sigma2 = fit$sigma2,
random = fit$random,
censored = fit$censored))
}
}
}
object
}
setMethod("FLXmstep", signature(model = "FLXMRlmc"),
function(model, weights, components)
{
weights <- weights[!duplicated(model@group),,drop=FALSE]
return(sapply(1:ncol(weights), function(k) model@fit(model@x, model@y, weights[,k], model@C,
components[[k]]@parameters)))
})
setMethod("FLXmstep", signature(model = "FLXMRlmcfix"),
function(model, weights, components)
{
weights <- weights[!duplicated(model@group),,drop=FALSE]
return(model@fit(model@x, model@y, weights, model@C,
lapply(components, function(x) x@parameters)))
})
setMethod("FLXmstep", signature(model = "FLXMRlmmc"),
function(model, weights, components)
{
weights <- weights[!duplicated(model@group),,drop=FALSE]
return(sapply(1:ncol(weights), function(k) model@fit(model@x, model@y, weights[,k], model@z, model@C, model@which,
components[[k]]@parameters)))
})
setMethod("FLXmstep", signature(model = "FLXMRlmmcfix"),
function(model, weights, components)
{
weights <- weights[!duplicated(model@group),,drop=FALSE]
return(model@fit(model@x, model@y, weights, model@z, model@C, model@which,
lapply(components, function(x) x@parameters)))
})
setMethod("FLXgetModelmatrix", signature(model="FLXMRlmc"),
function(model, data, formula, lhs=TRUE, ...)
{
formula_nogrouping <- RemoveGrouping(formula)
if (formula_nogrouping == formula) stop("please specify a grouping variable")
model <- callNextMethod(model, data, formula, lhs)
model@fullformula <- update(model@fullformula,
paste(".~. |", .FLXgetGroupingVar(formula)))
mt2 <- terms(model@censored, data=data)
mf2 <- model.frame(delete.response(mt2), data=data, na.action = NULL)
model@C <- model.matrix(attr(mf2, "terms"), data)
model@group <- grouping <- .FLXgetGrouping(formula, data)$group
model@x <- matrix(lapply(unique(grouping), function(g) model@x[grouping == g, , drop = FALSE]), ncol = 1)
if (lhs) model@y <- matrix(lapply(unique(grouping), function(g) model@y[grouping == g, , drop = FALSE]), ncol = 1)
model@C <- matrix(lapply(unique(grouping), function(g) model@C[grouping == g, , drop = FALSE]), ncol = 1)
model
})
setMethod("FLXgetModelmatrix", signature(model="FLXMRlmmc"),
function(model, data, formula, lhs=TRUE, ...)
{
model <- callNextMethod(model, data, formula, lhs)
mt1 <- terms(model@random, data=data)
mf1 <- model.frame(delete.response(mt1), data=data, na.action = NULL)
model@z <- model.matrix(attr(mf1, "terms"), data)
rownames(model@z) <- NULL
grouping <- .FLXgetGrouping(formula, data)$group
z <- matrix(lapply(unique(grouping), function(g) model@z[grouping == g, , drop = FALSE]), ncol = 1)
model@z <- unique(z)
model@which <- match(z, model@z)
model
})
setMethod("FLXgetObs", "FLXMRlmc", function(model) sum(sapply(model@x, nrow)))
setMethod("FLXdeterminePostunscaled", signature(model = "FLXMRlmc"), function(model, components, ...) {
sapply(components, function(x) x@logLik(model@x, model@y, model@C, model@group, x@parameters$censored))
})
setMethod("FLXdeterminePostunscaled", signature(model = "FLXMRlmmc"), function(model, components, ...) {
sapply(components, function(x) x@logLik(model@x, model@y, model@z, model@C, model@which, model@group, x@parameters$censored))
})
setMethod("predict", signature(object="FLXMRlmc"), function(object, newdata, components, ...)
{
object <- FLXgetModelmatrix(object, newdata, formula = object@fullformula, lhs = FALSE)
lapply(components, function(comp) unlist(comp@predict(object@x, ...)))
})
setMethod("rFLXM", signature(model = "FLXMRlmc", components = "FLXcomponent"),
function(model, components, ...) {
stop("This model driver is not implemented yet.")
})
flexmix/R/concomitant.R 0000644 0001762 0000144 00000006474 14404637304 014610 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: concomitant.R 5079 2016-01-31 12:21:12Z gruen $
#
FLXPmultinom <- function(formula=~1) {
z <- new("FLXPmultinom", name="FLXPmultinom", formula=formula)
multinom.fit <- function(x, y, w, ...) {
r <- ncol(x)
p <- ncol(y)
if (p < 2) stop("Multinom requires at least two components.")
mask <- c(rep(0, r + 1), rep(c(0, rep(1, r)), p - 1))
nnet.default(x, y, w, mask = mask, size = 0,
skip = TRUE, softmax = TRUE, censored = FALSE,
rang = 0, trace=FALSE,...)
}
z@fit <- function(x, y, w, ...) multinom.fit(x, y, w, ...)$fitted.values
z@refit <- function(x, y, w, ...) {
if (missing(w) || is.null(w)) w <- rep(1, nrow(y))
rownames(y) <- rownames(x) <- NULL
fit <- multinom(y ~ 0 + x, weights = w, data = list(y = y, x = x), Hess = TRUE, trace = FALSE)
fit$coefnames <- colnames(x)
fit$vcoefnames <- fit$coefnames[seq_along(fit$coefnames)]
dimnames(fit$Hessian) <- lapply(dim(fit$Hessian) / ncol(x), function(i) paste(rep(seq_len(i) + 1, each = ncol(x)), colnames(x), sep = ":"))
fit
}
z
}
FLXPconstant <- function() {
new("FLXP", name="FLXPconstant", formula = ~1,
fit = function(x, y, w, ...){
if (missing(w) || is.null(w)) return(matrix(colMeans(y), ncol=ncol(y), dimnames = list("prior", seq_len(ncol(y)))))
else return(matrix(colMeans(w*y)/mean(w), ncol=ncol(y), dimnames = list("prior", seq_len(ncol(y)))))
})
}
###**********************************************************
setMethod("FLXgetModelmatrix", signature(model="FLXP"),
function(model, data, groups, lhs, ...)
{
mt <- terms(model@formula, data=data)
mf <- model.frame(delete.response(mt), data=data, na.action = NULL)
X <- model.matrix(mt, data=mf)
if (nrow(X)){
if (!checkGroup(X, groups$group))
stop("model variables have to be constant for grouping variable")
model@x <- X[groups$groupfirst,,drop=FALSE]
}
else{
model@x <- matrix(1, nrow=sum(groups$groupfirst))
}
model
})
checkGroup <- function(x, group) {
check <- TRUE
for(g in levels(group)){
gok <- group==g
if(any(gok)){
check <- all(c(check, apply(x[gok,,drop=FALSE], 2, function(z) length(unique(z)) == 1)))
}
}
check
}
###**********************************************************
setMethod("refit_mstep", signature(object="FLXP", newdata="missing"),
function(object, newdata, posterior, group, ...) NULL)
setMethod("refit_mstep", signature(object="FLXPmultinom", newdata="missing"),
function(object, newdata, posterior, group, ...) {
groupfirst <- if (length(group)) groupFirst(group) else rep(TRUE, nrow(posterior))
object@refit(object@x, posterior[groupfirst,,drop=FALSE], ...)
})
###**********************************************************
setMethod("FLXfillConcomitant", signature(concomitant="FLXP"), function(concomitant, posterior, weights) {
concomitant@coef <- concomitant@refit(concomitant@x, posterior, weights)
concomitant
})
setMethod("FLXfillConcomitant", signature(concomitant="FLXPmultinom"), function(concomitant, posterior, weights) {
concomitant@coef <- cbind("1" = 0, t(coef(concomitant@refit(concomitant@x, posterior, weights))))
concomitant
})
###**********************************************************
flexmix/R/group.R 0000644 0001762 0000144 00000000615 14404637304 013415 0 ustar ligges users setMethod("group", signature(object="flexmix"), function(object) {
group <- object@group
if (!length(group)) group <- group(object@model[[1]])
group
})
setMethod("group", signature(object="FLXM"), function(object) {
factor(seq_len(nrow(object@x)))
})
setMethod("group", signature(object="FLXMRglmfix"), function(object) {
factor(seq_len(nrow(object@x)/sum(object@nestedformula@k)))
})
flexmix/R/flexmix.R 0000644 0001762 0000144 00000052244 14404637304 013742 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: flexmix.R 5184 2020-06-20 18:27:29Z gruen $
#
log_row_sums <- function(m) {
M <- m[cbind(seq_len(nrow(m)), max.col(m, "first"))]
M + log(rowSums(exp(m - M)))
}
## The following two methods only fill in and rearrange the model argument
setMethod("flexmix",
signature(formula = "formula", model="missing"),
function(formula, data=list(), k=NULL, cluster=NULL,
model=NULL, concomitant=NULL, control=NULL, weights=NULL)
{
mycall = match.call()
z <- flexmix(formula=formula, data=data, k=k, cluster=cluster,
model=list(FLXMRglm()), concomitant=concomitant,
control=control, weights = weights)
z@call <- mycall
z
})
setMethod("flexmix",
signature(formula = "formula", model="FLXM"),
function(formula, data=list(), k=NULL, cluster=NULL,
model=NULL, concomitant=NULL, control=NULL, weights=NULL)
{
mycall = match.call()
z <- flexmix(formula=formula, data=data, k=k, cluster=cluster,
model=list(model), concomitant=concomitant,
control=control, weights=weights)
z@call <- mycall
z
})
## This is the real thing
setMethod("flexmix",
signature(formula = "formula", model="list"),
function(formula, data=list(), k=NULL, cluster=NULL,
model=NULL, concomitant=NULL, control=NULL, weights=NULL)
{
mycall = match.call()
control = as(control, "FLXcontrol")
if (!is(concomitant, "FLXP")) concomitant <- FLXPconstant()
groups <- .FLXgetGrouping(formula, data)
model <- lapply(model, FLXcheckComponent, k, cluster)
k <- unique(unlist(sapply(model, FLXgetK, k)))
if (length(k) > 1) stop("number of clusters not specified correctly")
model <- lapply(model, FLXgetModelmatrix, data, formula)
groups$groupfirst <-
if (length(groups$group)) groupFirst(groups$group)
else rep(TRUE, FLXgetObs(model[[1]]))
if (is(weights, "formula")) {
weights <- model.frame(weights, data = data, na.action = NULL)[,1]
}
## check if the weights are integer
## if non-integer weights are wanted modifications e.g.
## for classify != weighted and
## plot,flexmix,missing-method are needed
if (!is.null(weights) & !identical(weights, as.integer(weights)))
stop("only integer weights allowed")
## if weights and grouping is specified the weights within each
## group need to be the same
if (!is.null(weights) & length(groups$group)>0) {
unequal <- tapply(weights, groups$group, function(x) length(unique(x)) > 1)
if (any(unequal)) stop("identical weights within groups needed")
}
postunscaled <- initPosteriors(k, cluster, FLXgetObs(model[[1]]), groups)
if (ncol(postunscaled) == 1L)
concomitant <- FLXPconstant()
concomitant <- FLXgetModelmatrix(concomitant, data = data,
groups = groups)
z <- FLXfit(model=model, concomitant=concomitant, control=control,
postunscaled=postunscaled, groups=groups, weights = weights)
z@formula = formula
z@call = mycall
z@k0 = as.integer(k)
z
})
###**********************************************************
setMethod("FLXgetK", signature(model = "FLXM"), function(model, k, ...) k)
setMethod("FLXgetObs", signature(model = "FLXM"), function(model) nrow(model@x))
setMethod("FLXcheckComponent", signature(model = "FLXM"), function(model, ...) model)
setMethod("FLXremoveComponent", signature(model = "FLXM"), function(model, ...) model)
setMethod("FLXmstep", signature(model = "FLXM"), function(model, weights, components, ...) {
if ("component" %in% names(formals(model@fit)))
sapply(seq_len(ncol(weights)), function(k) model@fit(model@x, model@y, weights[,k], component = components[[k]]@parameters))
else
sapply(seq_len(ncol(weights)), function(k) model@fit(model@x, model@y, weights[,k]))
})
setMethod("FLXdeterminePostunscaled", signature(model = "FLXM"), function(model, components, ...) {
matrix(sapply(components, function(x) x@logLik(model@x, model@y)), nrow = nrow(model@y))
})
###**********************************************************
setMethod("FLXfit", signature(model="list"),
function(model, concomitant, control, postunscaled=NULL, groups, weights)
{
### initialize
k <- ncol(postunscaled)
N <- nrow(postunscaled)
control <- allweighted(model, control, weights)
if(control@verbose>0)
cat("Classification:", control@classify, "\n")
if (control@classify %in% c("SEM", "random")) iter.rm <- 0
group <- groups$group
groupfirst <- groups$groupfirst
if(length(group)>0) postunscaled <- groupPosteriors(postunscaled, group)
logpostunscaled <- log(postunscaled)
postscaled <- exp(logpostunscaled - log_row_sums(logpostunscaled))
llh <- -Inf
if (control@classify %in% c("SEM", "random")) llh.max <- -Inf
converged <- FALSE
components <- rep(list(rep(list(new("FLXcomponent")), k)), length(model))
### EM
for(iter in seq_len(control@iter.max)) {
### M-Step
postscaled = .FLXgetOK(postscaled, control, weights)
prior <- if (is.null(weights))
ungroupPriors(concomitant@fit(concomitant@x, postscaled[groupfirst,,drop=FALSE]),
group, groupfirst)
else ungroupPriors(concomitant@fit(concomitant@x, (postscaled/weights)[groupfirst & weights > 0,,drop=FALSE], weights[groupfirst & weights > 0]),
group, groupfirst)
# Check min.prior
nok <- if (nrow(prior) == 1) which(prior < control@minprior) else {
if (is.null(weights)) which(colMeans(prior[groupfirst,,drop=FALSE]) < control@minprior)
else which(colSums(prior[groupfirst,] * weights[groupfirst])/sum(weights[groupfirst]) < control@minprior)
}
if(length(nok)) {
if(control@verbose>0)
cat("*** Removing", length(nok), "component(s) ***\n")
prior <- prior[,-nok,drop=FALSE]
prior <- prior/rowSums(prior)
postscaled <- postscaled[,-nok,drop=FALSE]
postscaled[rowSums(postscaled) == 0,] <- if (nrow(prior) > 1) prior[rowSums(postscaled) == 0,]
else prior[rep(1, sum(rowSums(postscaled) == 0)),]
postscaled <- postscaled/rowSums(postscaled)
if (!is.null(weights)) postscaled <- postscaled * weights
k <- ncol(prior)
if (k == 0) stop("all components removed")
if (control@classify=="random") {
llh.max <- -Inf
iter.rm <- iter
}
model <- lapply(model, FLXremoveComponent, nok)
components <- lapply(components, "[", -nok)
}
components <- lapply(seq_along(model), function(i) FLXmstep(model[[i]], postscaled, components[[i]]))
postunscaled <- matrix(0, nrow = N, ncol = k)
for (n in seq_along(model))
postunscaled <- postunscaled + FLXdeterminePostunscaled(model[[n]], components[[n]])
if(length(group)>0)
postunscaled <- groupPosteriors(postunscaled, group)
### E-Step
## Code changed thanks to Nicolas Picard
## to avoid problems with small likelihoods
postunscaled <- if (nrow(prior) > 1) postunscaled + log(prior)
else sweep(postunscaled, 2, log(prior), "+")
logpostunscaled <- postunscaled
postunscaled <- exp(postunscaled)
postscaled <- exp(logpostunscaled - log_row_sums(logpostunscaled))
##: wenn eine beobachtung in allen Komonenten extrem
## kleine postunscaled-werte hat, ist exp(-postunscaled)
## numerisch Null, und damit postscaled NaN
## log(rowSums(postunscaled)) ist -Inf
##
if (any(is.nan(postscaled))) {
index <- which(as.logical(rowSums(is.nan(postscaled))))
postscaled[index,] <- if(nrow(prior)==1) rep(prior, each = length(index)) else prior[index,]
postunscaled[index,] <- .Machine$double.xmin
}
### check convergence
llh.old <- llh
llh <- if (is.null(weights)) sum(log_row_sums(logpostunscaled[groupfirst,,drop=FALSE]))
else sum(log_row_sums(logpostunscaled[groupfirst,,drop=FALSE])*weights[groupfirst])
if(is.na(llh) | is.infinite(llh))
stop(paste(formatC(iter, width=4),
"Log-likelihood:", llh))
if (abs(llh-llh.old)/(abs(llh)+0.1) < control@tolerance){
if(control@verbose>0){
printIter(iter, llh)
cat("converged\n")
}
converged <- TRUE
break
}
if (control@classify=="random") {
if (llh.max < llh) {
components.max <- components
prior.max <- prior
postscaled.max <- postscaled
postunscaled.max <- postunscaled
llh.max <- llh
}
}
if(control@verbose && (iter%%control@verbose==0))
printIter(iter, llh)
}
### Construct return object
if (control@classify=="random") {
components <- components.max
prior <- prior.max
postscaled <- postscaled.max
postunscaled <- postunscaled.max
llh <- llh.max
iter <- control@iter.max - iter.rm
}
components <- lapply(seq_len(k), function(i) lapply(components, function(x) x[[i]]))
names(components) <- paste("Comp", seq_len(k), sep=".")
cluster <- max.col(postscaled)
size <- if (is.null(weights)) tabulate(cluster, nbins=k) else tabulate(rep(cluster, weights), nbins=k)
names(size) <- seq_len(k)
concomitant <- FLXfillConcomitant(concomitant, postscaled[groupfirst,,drop=FALSE], weights[groupfirst])
df <- concomitant@df(concomitant@x, k) + sum(sapply(components, sapply, slot, "df"))
control@nrep <- 1
prior <- if (is.null(weights)) colMeans(postscaled[groupfirst,,drop=FALSE])
else colSums(postscaled[groupfirst,,drop=FALSE] * weights[groupfirst])/sum(weights[groupfirst])
retval <- new("flexmix", model=model, prior=prior,
posterior=list(scaled=postscaled,
unscaled=postunscaled),
weights = weights,
iter=iter, cluster=cluster, size = size,
logLik=llh, components=components,
concomitant=concomitant,
control=control, df=df, group=group, k=as(k, "integer"),
converged=converged)
retval
})
###**********************************************************
.FLXgetOK = function(p, control, weights){
n = ncol(p)
N = seq_len(n)
if (is.null(weights)) {
if (control@classify == "weighted")
return(p)
else {
z = matrix(FALSE, nrow = nrow(p), ncol = n)
if(control@classify %in% c("CEM", "hard"))
m = max.col(p)
else if(control@classify %in% c("SEM", "random"))
m = apply(p, 1, function(x) sample(N, size = 1, prob = x))
else stop("Unknown classification method")
z[cbind(seq_len(nrow(p)), m)] = TRUE
}
}else {
if(control@classify=="weighted")
z <- p * weights
else{
z = matrix(FALSE, nrow=nrow(p), ncol=n)
if(control@classify %in% c("CEM", "hard")) {
m = max.col(p)
z[cbind(seq_len(nrow(p)), m)] = TRUE
z <- z * weights
}
else if(control@classify %in% c("SEM", "random"))
z = t(sapply(seq_len(nrow(p)), function(i) table(factor(sample(N, size=weights[i], prob=p[i,], replace=TRUE), N))))
else stop("Unknown classification method")
}
}
z
}
###**********************************************************
RemoveGrouping <- function(formula) {
lf <- length(formula)
formula1 <- formula
if(length(formula[[lf]])>1) {
if (deparse(formula[[lf]][[1]]) == "|"){
formula1[[lf]] <- formula[[lf]][[2]]
}
else if (deparse(formula[[lf]][[1]]) == "("){
form <- formula[[lf]][[2]]
if (length(form) == 3 && form[[1]] == "|")
formula1[[lf]] <- form[[2]]
}
}
formula1
}
.FLXgetGroupingVar <- function(x)
{
lf <- length(x)
while (lf > 1) {
x <- x[[lf]]
lf <- length(x)
}
x
}
.FLXgetGrouping <- function(formula, data)
{
group <- factor(integer(0))
formula1 <- RemoveGrouping(formula)
if (!identical(formula1, formula))
group <- factor(eval(.FLXgetGroupingVar(formula), data))
return(list(group=group, formula=formula1))
}
setMethod("FLXgetModelmatrix", signature(model="FLXM"),
function(model, data, formula, lhs=TRUE, ...)
{
formula <- RemoveGrouping(formula)
if (length(grep("\\|", deparse(model@formula)))) stop("no grouping variable allowed in the model")
if(is.null(model@formula))
model@formula = formula
## model@fullformula = update.formula(formula, model@formula)
## : ist das der richtige weg, wenn ein punkt in beiden
## formeln ist?
model@fullformula = update(terms(formula, data=data), model@formula)
##
if (lhs) {
mf <- if (is.null(model@terms)) model.frame(model@fullformula, data=data, na.action = NULL)
else model.frame(model@terms, data=data, na.action = NULL, xlev = model@xlevels)
model@terms <- attr(mf, "terms")
response <- as.matrix(model.response(mf))
model@y <- model@preproc.y(response)
}
else {
mt1 <- if (is.null(model@terms)) terms(model@fullformula, data=data) else model@terms
mf <- model.frame(delete.response(mt1), data=data, na.action = NULL, xlev = model@xlevels)
model@terms<- attr(mf, "terms")
## : warum war das da???
## attr(mt, "intercept") <- attr(mt1, "intercept")
##
}
X <- model.matrix(model@terms, data=mf)
model@contrasts <- attr(X, "contrasts")
model@x <- model@preproc.x(X)
model@xlevels <- .getXlevels(model@terms, mf)
model
})
## groupfirst: for grouped observation we need to be able to use
## the posterior of each group, but for computational simplicity
## post(un)scaled has N rows (with mutiple identical rows for each
## group). postscaled[groupfirst,] extracts posteriors of each
## group ordered as the appear in the data set.
groupFirst <- function(x) !duplicated(x)
## if we have a group variable, set the posterior to the product
## of all density values for that group (=sum in logarithm)
groupPosteriors <- function(x, group)
{
if (length(group) > 0) {
group <- as.integer(group)
x.by.group <- matrix(unname(apply(x, 2, tapply, group, sum)), ncol = ncol(x))
x <- x.by.group[group,, drop = FALSE]
}
x
}
ungroupPriors <- function(x, group, groupfirst) {
if (!length(group)) group <- seq_along(groupfirst)
if (nrow(x) >= length(group[groupfirst])) {
x <- x[order(as.integer(group[groupfirst])),,drop=FALSE]
x <- x[as.integer(group),,drop=FALSE]
}
x
}
setMethod("allweighted", signature(model = "list", control = "ANY", weights = "ANY"), function(model, control, weights) {
allweighted <- all(sapply(model, function(x) allweighted(x, control, weights)))
if(allweighted){
if(control@classify=="auto")
control@classify <- "weighted"
}
else{
if(control@classify=="auto")
control@classify <- "hard"
else if (control@classify=="weighted") {
warning("only hard classification supported for the modeldrivers")
control@classify <- "hard"
}
if(!is.null(weights))
stop("it is not possible to specify weights for models without weighted ML estimation")
}
control
})
setMethod("allweighted", signature(model = "FLXM", control = "ANY", weights = "ANY"), function(model, control, weights) {
model@weighted
})
initPosteriors <- function(k, cluster, N, groups) {
if(is(cluster, "matrix")){
postunscaled <- cluster
if (!is.null(k)) if (k != ncol(postunscaled)) stop("specified k does not match the number of columns of cluster")
}
else{
if(is.null(cluster)){
if(is.null(k))
stop("either k or cluster must be specified")
else
cluster <- ungroupPriors(as.matrix(sample(seq_len(k), size = sum(groups$groupfirst), replace=TRUE)),
groups$group, groups$groupfirst)
}
else{
cluster <- as(cluster, "integer")
if (!is.null(k)) if (k != max(cluster)) stop("specified k does not match the values in cluster")
k <- max(cluster)
}
postunscaled <- matrix(0.1, nrow=N, ncol=k)
for(K in seq_len(k)){
postunscaled[cluster==K, K] <- 0.9
}
}
postunscaled
}
###**********************************************************
setMethod("predict", signature(object="FLXdist"),
function(object, newdata=list(), aggregate=FALSE, ...){
if (missing(newdata)) return(fitted(object, aggregate=aggregate, drop=FALSE))
x = list()
for(m in seq_along(object@model)) {
comp <- lapply(object@components, "[[", m)
x[[m]] <- predict(object@model[[m]], newdata, comp, ...)
}
if (aggregate) {
prior_weights <- prior(object, newdata)
z <- lapply(x, function(z) matrix(rowSums(do.call("cbind", z) * prior_weights), nrow = nrow(z[[1]])))
}
else {
z <- list()
for (k in seq_len(object@k)) {
z[[k]] <- do.call("cbind", lapply(x, "[[", k))
}
names(z) <- paste("Comp", seq_len(object@k), sep=".")
}
z
})
###**********************************************************
setMethod("parameters", signature(object="FLXdist"),
function(object, component=NULL, model=NULL, which = c("model", "concomitant"),
simplify=TRUE, drop=TRUE)
{
which <- match.arg(which)
if (is.null(component)) component <- seq_len(object@k)
if (is.null(model)) model <- seq_along(object@model)
if (which == "model") {
if (simplify) {
parameters <- sapply(model, function(m) sapply(object@components[component], function(x) unlist(x[[m]]@parameters), simplify=TRUE),
simplify = FALSE)
}
else {
parameters <- sapply(model, function(m) sapply(object@components[component], function(x) x[[m]]@parameters, simplify=FALSE),
simplify = FALSE)
}
if (drop) {
if (length(component) == 1 && !simplify) parameters <- lapply(parameters, "[[", 1)
if (length(model) == 1) parameters <- parameters[[1]]
}
} else {
parameters <- object@concomitant@coef[,component,drop=FALSE]
}
parameters
})
setMethod("prior", signature(object="FLXdist"),
function(object, newdata, ...) {
if (missing(newdata))
prior <- object@prior
else {
groups <- .FLXgetGrouping(object@formula, newdata)
nobs <- if (is(newdata, "data.frame")) nrow(newdata)
else min(sapply(newdata, function(x) {
if (is.null(nrow(x))) length(x) else nrow(x)
}))
group <- if (length(groups$group)) groups$group else factor(seq_len(nobs))
object@concomitant <- FLXgetModelmatrix(object@concomitant, data = newdata,
groups = list(group=group,
groupfirst = groupFirst(group)))
prior <- determinePrior(object@prior, object@concomitant, group)[as.integer(group),]
}
prior
})
setMethod("posterior", signature(object="flexmix", newdata="missing"),
function(object, newdata, unscaled = FALSE, ...)
{
if (unscaled) return(object@posterior$unscaled)
else return(object@posterior$scaled)
})
setMethod("posterior", signature(object="FLXdist", newdata="listOrdata.frame"),
function(object, newdata, unscaled=FALSE,...) {
comp <- lapply(object@components, "[[", 1)
postunscaled <- posterior(object@model[[1]], newdata, comp, ...)
for (m in seq_along(object@model)[-1]) {
comp <- lapply(object@components, "[[", m)
postunscaled <- postunscaled + posterior(object@model[[m]], newdata, comp,
...)
}
groups <- .FLXgetGrouping(object@formula, newdata)
prior <- prior(object, newdata = newdata)
if(length(groups$group)>0)
postunscaled <- groupPosteriors(postunscaled, groups$group)
postunscaled <- postunscaled + log(prior)
if (unscaled) return(exp(postunscaled))
else return(exp(postunscaled - log_row_sums(postunscaled)))
})
setMethod("posterior", signature(object="FLXM", newdata="listOrdata.frame"),
function(object, newdata, components, ...) {
object <- FLXgetModelmatrix(object, newdata, object@fullformula, lhs = TRUE)
FLXdeterminePostunscaled(object, components, ...)
})
setMethod("clusters", signature(object="flexmix", newdata="missing"),
function(object, newdata, ...)
{
object@cluster
})
setMethod("clusters", signature(object="FLXdist", newdata="ANY"),
function(object, newdata, ...)
{
max.col(posterior(object, newdata, ...))
})
###**********************************************************
setMethod("summary", "flexmix",
function(object, eps=1e-4, ...){
z <- new("summary.flexmix",
call = object@call,
AIC = AIC(object),
BIC = BIC(object),
logLik = logLik(object))
TAB <- data.frame(prior=object@prior,
size=object@size)
rownames(TAB) <- paste("Comp.", seq_len(nrow(TAB)), sep="")
TAB[["post>0"]] <- if (is.null(object@weights)) colSums(object@posterior$scaled > eps)
else colSums((object@posterior$scaled > eps) * object@weights)
TAB[["ratio"]] <- TAB[["size"]]/TAB[["post>0"]]
z@comptab = TAB
z
})
###**********************************************************
flexmix/R/examples.R 0000644 0001762 0000144 00000004113 14404637304 014074 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: examples.R 5079 2016-01-31 12:21:12Z gruen $
#
ExNPreg = function(n)
{
if(n %% 2 != 0) stop("n must be even")
x <- runif(2*n, 0, 10)
mp <- exp(c(2-0.2*x[1:n], 1+0.1*x[(n+1):(2*n)]))
mb <- binomial()$linkinv(c(x[1:n]-5, 5-x[(n+1):(2*n)]))
data.frame(x=x,
yn=c(5*x[1:n], 40-(x[(n+1):(2*n)]-5)^2)+3*rnorm(n),
yp=rpois(2*n, mp),
yb=rbinom(2*n, size=1, prob=mb),
class = rep(1:2, c(n,n)),
id1 = factor(rep(1:n, rep(2, n))),
id2 = factor(rep(1:(n/2), rep(4, n/2))))
}
ExNclus = function(n=100)
{
if(n %% 2 != 0) stop("n must be even")
rbind(mvtnorm::rmvnorm(n, mean=rep(0,2)),
mvtnorm::rmvnorm(n, mean=c(8,0), sigma=diag(1:2)),
mvtnorm::rmvnorm(1.5*n, mean=c(-2,6), sigma=diag(2:1)),
mvtnorm::rmvnorm(2*n, mean=c(4,4), sigma=matrix(c(1,.9,.9,1), 2)))
}
ExLinear <- function(beta, n, xdist="runif", xdist.args=NULL,
family=c("gaussian", "poisson"), sd=1, ...)
{
family <- match.arg(family)
X <- NULL
y <- NULL
k <- ncol(beta)
d <- nrow(beta)-1
n <- rep(n, length.out=k)
if(family=="gaussian") sd <- rep(sd, length.out=k)
xdist <- rep(xdist, length.out=d)
if(is.null(xdist.args)){
xdist.args <- list(list(...))
}
if(!is.list(xdist.args[[1]]))
xdist.args <- list(xdist.args)
xdist.args <- rep(xdist.args, length.out=d)
for(i in 1:k)
{
X1 <- 1
for(j in 1:d){
xdist.args[[j]]$n <- n[i]
X1 <- cbind(X1, do.call(xdist[j], xdist.args[[j]]))
}
X <- rbind(X, X1)
xb <- X1 %*% beta[,i,drop=FALSE]
if(family=="gaussian")
y1 <- xb + rnorm(n[i], sd=sd[i])
else
y1 <- rpois(n[i], exp(xb))
y <- c(y, y1)
}
X <- X[,-1,drop=FALSE]
colnames(X) <- paste("x", 1:d, sep="")
z <- data.frame(y=y, X)
attr(z, "clusters") <- rep(1:k, n)
z
}
flexmix/R/rFLXmodel.R 0000644 0001762 0000144 00000005667 14404637304 014131 0 ustar ligges users setMethod("rFLXM", signature(model="FLXM", components="list"),
function(model, components, class, ...) {
y <- NULL
for (l in seq_along(components)) {
yl <- as.matrix(rFLXM(model, components[[l]], ...))
if (is.null(y)) y <- matrix(NA, nrow = length(class), ncol = ncol(yl))
y[class == l,] <- yl[class==l,]
}
y
})
setMethod("rFLXM", signature(model = "FLXMRglm", components="FLXcomponent"),
function(model, components, ...) {
family <- model@family
n <- nrow(model@x)
if(family == "gaussian") {
sigma <- components@parameters$sigma
y <- rnorm(n, mean = components@predict(model@x, ...), sd = sigma)
}
else if (family == "binomial") {
dotarg = list(...)
if ("size" %in% names(dotarg))
size <- dotarg$size
else {
if (nrow(model@y)!=n) stop("no y values - specify a size argument")
size <- rowSums(model@y)
}
parms <- components@parameters
y <- rbinom(n, prob = components@predict(model@x, ...), size=size)
y <- cbind(y, size - y)
}
else if (family == "poisson") {
y <- rpois(n, lambda = components@predict(model@x, ...))
}
else if (family == "Gamma") {
shape <- components@parameters$shape
y <- rgamma(n, shape = shape, scale = components@predict(model@x, ...)/shape)
}
else stop("family not supported")
y
})
setMethod("rFLXM", signature(model = "FLXMRglmfix", components="list"),
function(model, components, class, ...) {
k <- sum(model@nestedformula@k)
n <- nrow(model@x)/k
y <- matrix(NA, nrow = length(class), ncol = ncol(model@y))
model.sub <- as(model, "FLXMRglm")
for (l in seq_len(k)) {
rok <- (l-1)*n + seq_len(n)
model.sub@x <- model@x[rok, as.logical(model@design[l,]), drop=FALSE]
model.sub@y <- model@y[rok,,drop=FALSE]
yl <- as.matrix(rFLXM(model.sub, components[[l]], ...))
y[class==l,] <- yl[class==l,]
}
y
})
rmvbinom <- function(n, size, prob) sapply(prob, function(p) rbinom(n, size, p))
rmvbinary <- function(n, center) sapply(center, function(p) rbinom(n, 1, p))
setMethod("rFLXM", signature(model = "FLXMC", components = "FLXcomponent"),
function(model, components, class, ...) {
rmvnorm <- function(n, center, cov) mvtnorm::rmvnorm(n = n, mean = center, sigma = cov)
dots <- list(...)
FUN <- paste("r", model@dist, sep = "")
args <- c(n = nrow(model@x), dots, components@parameters)
return(do.call(FUN, args))
})
flexmix/R/plot-flexmix.R 0000644 0001762 0000144 00000013530 14404637304 014711 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: plot-flexmix.R 5079 2016-01-31 12:21:12Z gruen $
#
determine_y <- function(h, root) {
y <- h$counts
if (root) y <- sqrt(y)
return(y)
}
panel.rootogram <-
function (x, breaks, equal.widths = TRUE, nint = max(round(log2(length(x)) + 1), 3), alpha = plot.polygon$alpha,
col = plot.polygon$col, border = plot.polygon$border,
lty = plot.polygon$lty, lwd = plot.polygon$lwd, subscripts, groups, mark, root = TRUE, markcol, ...)
{
x <- as.numeric(x)
plot.polygon <- trellis.par.get("plot.polygon")
grid.lines(x = c(0.05, 0.95), y = unit(c(0, 0), "native"),
gp = gpar(col = border, lty = lty, lwd = lwd, alpha = alpha),
default.units = "npc")
if (length(x) > 0) {
if (is.null(breaks)) {
breaks <- if (equal.widths)
do.breaks(range(x, finite = TRUE), nint)
else quantile(x, 0:nint/nint, na.rm = TRUE)
}
h <- hist.constructor(x, breaks = breaks, plot = FALSE, ...)
y <- determine_y(h, root)
if (!is.null(mark)) {
h1 <- hist.constructor(x[groups[subscripts] == mark], breaks = h$breaks, plot = FALSE, ...)
y1 <- determine_y(h1, root)
}
nb <- length(breaks)
if (length(y) != nb - 1)
warning("problem with hist computations")
if (nb > 1) {
panel.rect(x = breaks[-nb], y = 0, height = y, width = diff(breaks),
col = col, alpha = alpha, border = border, lty = lty,
lwd = lwd, just = c("left", "bottom"))
if (!is.null(mark)) panel.rect(x = breaks[-nb], y = 0, height = y1, width = diff(breaks),
col = markcol, alpha = alpha, border = border, lty = lty,
lwd = lwd, just = c("left", "bottom"))
}
}
}
prepanel.rootogram <-
function (x, breaks, equal.widths = TRUE, nint = max(round(log2(length(x)) + 1), 3), root = TRUE, ...)
{
if (length(x) < 1)
list(xlim = NA, ylim = NA, dx = NA, dy = NA)
else {
if (is.factor(x)) {
isFactor <- TRUE
xlimits <- levels(x)
}
else isFactor <- FALSE
if (!is.numeric(x))
x <- as.numeric(x)
if (is.null(breaks)) {
breaks <- if (equal.widths)
do.breaks(range(x, finite = TRUE), nint)
else quantile(x, 0:nint/nint, na.rm = TRUE)
}
h <- hist.constructor(x, breaks = breaks, plot = FALSE, ...)
y <- determine_y(h, root)
list(xlim = if (isFactor) xlimits else range(x, breaks,
finite = TRUE), ylim = range(0, y, finite = TRUE),
dx = 1, dy = 1)
}
}
setMethod("plot", signature(x="flexmix", y="missing"),
function(x, y, mark=NULL, markcol=NULL, col=NULL,
eps=1e-4, root=TRUE, ylim=TRUE, main=NULL, xlab = "", ylab = "",
as.table = TRUE, endpoints = c(-0.04, 1.04), ...){
k <- length(x@prior)
if(is.null(markcol)) markcol <- FullColors[5]
if(is.null(col)) col <- LightColors[4]
if(is.null(main)){
main <- ifelse(root,
"Rootogram of posterior probabilities",
"Histogram of posterior probabilities")
main <- paste(main, ">", eps)
}
groupfirst <- if (length(x@group)) !duplicated(x@group) else TRUE
if (is.null(x@weights))
z <- data.frame(posterior = as.vector(x@posterior$scaled[groupfirst,,drop=FALSE]),
component = factor(rep(seq_len(x@k), each = nrow(x@posterior$scaled[groupfirst,,drop=FALSE])),
levels = seq_len(x@k), labels = paste("Comp.", seq_len(x@k))),
cluster = rep(as.vector(x@cluster[groupfirst]), k))
else
z <- data.frame(posterior = rep(as.vector(x@posterior$scaled[groupfirst,,drop=FALSE]),
rep(x@weights[groupfirst], k)),
component = factor(rep(seq_len(x@k), each = sum(x@weights[groupfirst])),
seq_len(x@k), paste("Comp.", seq_len(x@k))),
cluster = rep(rep(as.vector(x@cluster[groupfirst]), x@weights[groupfirst]), k))
panel <- function(x, subscripts, groups, ...)
panel.rootogram(x, root = root, mark = mark, col = col, markcol = markcol,
subscripts = subscripts, groups = groups, ...)
prepanel <- function(x, ...) prepanel.rootogram(x, root = root, ...)
z <- subset(z, posterior > eps)
cluster <- NULL # make codetools happy
if (is.logical(ylim)) {
scales <- if (ylim) list() else list(y = list(relation = "free"))
hh <- histogram(~ posterior | component, data = z, main = main, ylab = ylab, xlab = xlab, groups = cluster,
panel = panel, prepanel = prepanel, scales = scales, as.table = as.table, endpoints = endpoints, ...)
}
else hh <- histogram(~ posterior | component, data = z, main = main, ylab = ylab, xlab = xlab, groups = cluster,
ylim = ylim, panel = panel, prepanel = prepanel, as.table = as.table, endpoints = endpoints, ...)
if (root) {
hh$yscale.components <- function (lim, packet.number = 0, packet.list = NULL, right = TRUE, ...)
{
comps <- calculateAxisComponents(lim, packet.list = packet.list,
packet.number = packet.number, ...)
comps$at <- sqrt(seq(min(comps$at)^2, max(comps$at)^2, length.out = length(comps$at)))
comps$labels <- format(comps$at^2, trim = TRUE)
list(num.limit = comps$num.limit, left = list(ticks = list(at = comps$at,
tck = 1), labels = list(at = comps$at, labels = comps$labels,
cex = 1, check.overlap = comps$check.overlap)), right = right)
}
}
hh
})
flexmix/R/ziglm.R 0000644 0001762 0000144 00000004053 14404637304 013403 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: ziglm.R 5079 2016-01-31 12:21:12Z gruen $
#
setClass("FLXMRziglm", contains = "FLXMRglm")
FLXMRziglm <- function(formula = . ~ .,
family = c("binomial", "poisson"), ...) {
family <- match.arg(family)
new("FLXMRziglm", FLXMRglm(formula, family, ...),
name = paste("FLXMRziglm", family, sep=":"))
}
setMethod("FLXgetModelmatrix", signature(model="FLXMRziglm"),
function(model, data, formula, lhs=TRUE, ...) {
model <- callNextMethod(model, data, formula, lhs)
if (attr(terms(model@fullformula), "intercept") == 0)
stop("please include an intercept")
model
})
setMethod("FLXremoveComponent", signature(model = "FLXMRziglm"),
function(model, nok, ...)
if (1 %in% nok) as(model, "FLXMRglm") else model)
setMethod("FLXmstep", signature(model = "FLXMRziglm"),
function(model, weights, components, ...) {
coef <- c(-Inf, rep(0, ncol(model@x)-1))
names(coef) <- colnames(model@x)
comp.1 <- model@defineComponent(
list(coef = coef, df = 0, offset = NULL,
family = model@family))
c(list(comp.1),
FLXmstep(as(model, "FLXMRglm"), weights[, -1, drop=FALSE], components[-1]))
})
setMethod("FLXgetDesign", signature(object = "FLXMRziglm"),
function(object, components)
rbind(0, FLXgetDesign(as(object, "FLXMRglm"), components[-1])))
setMethod("FLXreplaceParameters", signature(object="FLXMRziglm"),
function(object, components, parms)
c(components[[1]], FLXreplaceParameters(as(object,
"FLXMRglm"), components[-1], parms)))
setMethod("FLXgradlogLikfun", signature(object="FLXMRziglm"),
function(object, components, weights, ...)
FLXgradlogLikfun(as(object, "FLXMRglm"),
components[-1], weights[,-1,drop=FALSE]))
setMethod("refit_optim", signature(object = "FLXMRziglm"),
function(object, components, ...) {
x <- refit_optim(as(object, "FLXMRglm"), components[-1], ...)
names(x) <- paste("Comp", 1 + seq_along(x), sep = ".")
x
})
flexmix/R/stepFlexmix.R 0000644 0001762 0000144 00000013513 14404637304 014572 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: stepFlexmix.R 5079 2016-01-31 12:21:12Z gruen $
#
setClass("stepFlexmix",
representation(models="list",
k="integer",
nrep="integer",
logLiks="matrix",
call="call"))
stepFlexmix <- function(..., k=NULL, nrep=3, verbose=TRUE, drop=TRUE,
unique=FALSE)
{
MYCALL <- match.call()
MYCALL1 <- MYCALL
bestFlexmix <- function(...)
{
z = new("flexmix", logLik=-Inf)
logLiks = rep(NA, length.out = nrep)
for(m in seq_len(nrep)){
if(verbose) cat(" *")
x = try(flexmix(...))
if (!is(x, "try-error")) {
logLiks[m] <- logLik(x)
if(logLik(x) > logLik(z))
z = x
}
}
return(list(z = z, logLiks = logLiks))
}
z = list()
if(is.null(k)){
RET = bestFlexmix(...)
z[[1]] <- RET$z
logLiks <- as.matrix(RET$logLiks)
z[[1]]@call <- MYCALL
z[[1]]@control@nrep <- nrep
names(z) <- as.character(z[[1]]@k)
if(verbose) cat("\n")
}
else{
k = as.integer(k)
logLiks <- matrix(nrow = length(k), ncol = nrep)
for(n in seq_along(k)){
ns <- as.character(k[n])
if(verbose) cat(k[n], ":")
RET <- bestFlexmix(..., k=k[n])
z[[ns]] = RET$z
logLiks[n,] <- RET$logLiks
MYCALL1[["k"]] <- as.numeric(k[n])
z[[ns]]@call <- MYCALL1
z[[ns]]@control@nrep <- nrep
if(verbose) cat("\n")
}
}
logLiks <- logLiks[is.finite(sapply(z, logLik)),,drop=FALSE]
z <- z[is.finite(sapply(z, logLik))]
rownames(logLiks) <- names(z)
if (!length(z)) stop("no convergence to a suitable mixture")
if(drop & (length(z)==1)){
return(z[[1]])
}
else{
z <- return(new("stepFlexmix",
models=z,
k=as.integer(names(z)),
nrep=as.integer(nrep),
logLiks=logLiks,
call=MYCALL))
if(unique) z <- unique(z)
return(z)
}
}
###**********************************************************
setMethod("unique", "stepFlexmix",
function(x, incomparables=FALSE, ...)
{
z <- list()
K <- sapply(x@models, function(x) x@k)
logLiks <- x@logLiks
keep <- rep(TRUE, nrow(logLiks))
for(k in sort(unique(K))){
n <- which(k==K)
if(length(n)>1){
l <- sapply(x@models[n], logLik)
z[as.character(k)] <- x@models[n][which.max(l)]
keep[n[-which.max(l)]] <- FALSE
}
else
z[as.character(k)] <- x@models[n]
}
logLiks <- logLiks[keep,,drop=FALSE]
rownames(logLiks) <- names(z)
attr(logLiks, "na.action") <- NULL
mycall <- x@call
mycall["unique"] <- TRUE
return(new("stepFlexmix",
models=z,
k=as.integer(names(z)),
nrep=x@nrep,
logLiks=logLiks,
call=mycall))
})
###**********************************************************
setMethod("show", "stepFlexmix",
function(object)
{
cat("\nCall:", deparse(object@call,0.75*getOption("width")),
sep="\n")
cat("\n")
z <- data.frame(iter = sapply(object@models, function(x) x@iter),
converged = sapply(object@models, function(x) x@converged),
k = sapply(object@models, function(x) x@k),
k0 = sapply(object@models, function(x) x@k0),
logLik = sapply(object@models, function(x) logLik(x)),
AIC = AIC(object),
BIC = BIC(object),
ICL = ICL(object))
print(z, na.string="")
})
setMethod("nobs", signature(object="stepFlexmix"),
function(object, ...) {
sapply(object@models, nobs)
})
setMethod("logLik", "stepFlexmix",
function(object, ..., k = 2)
{
ll <- lapply(object@models, function(x) logLik(x))
df <- sapply(ll, attr, "df")
nobs <- sapply(ll, attr, "nobs")
ll <- unlist(ll)
attr(ll, "df") <- df
attr(ll, "nobs") <- nobs
class(ll) <- "logLik"
ll
})
setMethod("ICL", "stepFlexmix",
function(object, ...)
{
sapply(object@models, function(x) ICL(x, ...))
})
setMethod("EIC", "stepFlexmix",
function(object, ...)
{
sapply(object@models, function(x) EIC(x, ...))
})
###**********************************************************
setMethod("getModel", "stepFlexmix",
function(object, which="BIC")
{
if(which=="AIC")
which <- which.min(sapply(object@models, function(x) AIC(x)))
if(which=="BIC")
which <- which.min(sapply(object@models, function(x) BIC(x)))
if(which=="ICL")
which <- which.min(sapply(object@models, function(x) ICL(x)))
object@models[[which]]
}
)
###**********************************************************
setMethod("plot", signature(x="stepFlexmix", y="missing"),
function(x, y, what=c("AIC", "BIC", "ICL"), xlab=NULL, ylab=NULL,
legend="topright", ...)
{
X <- x@k
Y <- NULL
for(w in what){
Y <- cbind(Y, do.call(w, list(object=x)))
}
if(is.null(xlab))
xlab <- "number of components"
if(is.null(ylab)){
if(length(what)==1)
ylab <- what
else
ylab <- ""
}
matplot(X, Y, xlab=xlab, ylab=ylab, type="b", lty=1,
pch=seq_along(what), ...)
if(legend!=FALSE && length(what)>1)
legend(x=legend, legend=what,
pch=seq_along(what),
col=seq_along(what))
for(n in seq_len(ncol(Y))){
m <- which.min(Y[,n])
points(X[m], Y[m,n], pch=16, cex=1.5, col=n)
}
})
flexmix/R/plot-FLXboot.R 0000644 0001762 0000144 00000014210 14404637304 014546 0 ustar ligges users prepanel.parallel.horizontal <-
function (x, y, z, horizontal = TRUE, ...)
{
if (horizontal) list(xlim = extend.limits(c(1, ncol(as.data.frame(z))), prop = 0.03), ylim = c(0, 1), dx = 1, dy = 1)
else list(xlim = c(0, 1), ylim = extend.limits(c(1, ncol(as.data.frame(z))), prop = 0.03), dx = 1, dy = 1)
}
panel.parallel.horizontal <-
function (x, y, z, subscripts, groups = NULL, col = superpose.line$col,
lwd = superpose.line$lwd, lty = superpose.line$lty, alpha = superpose.line$alpha,
common.scale = FALSE, lower = sapply(z, function(x) min(as.numeric(x),
na.rm = TRUE)), upper = sapply(z, function(x) max(as.numeric(x),
na.rm = TRUE)), horizontal = TRUE, ...)
{
superpose.line <- trellis.par.get("superpose.line")
reference.line <- trellis.par.get("reference.line")
n.r <- ncol(z)
n.c <- length(subscripts)
if (is.null(groups)) {
col <- rep(col, length = n.c)
lty <- rep(lty, length = n.c)
lwd <- rep(lwd, length = n.c)
alpha <- rep(alpha, length = n.c)
}
else {
groups <- as.factor(groups)[subscripts]
n.g <- nlevels(groups)
gnum <- as.numeric(groups)
col <- rep(col, length = n.g)[gnum]
lty <- rep(lty, length = n.g)[gnum]
lwd <- rep(lwd, length = n.g)[gnum]
alpha <- rep(alpha, length = n.g)[gnum]
}
if (is.function(lower))
lower <- sapply(z, lower)
if (is.function(upper))
upper <- sapply(z, upper)
if (common.scale) {
lower <- min(lower)
upper <- max(upper)
}
lower <- rep(lower, length = n.r)
upper <- rep(upper, length = n.r)
dif <- upper - lower
if (n.r > 1) {
if (horizontal) panel.segments(y0 = 0, y1 = 1, x0 = seq_len(n.r), x1 = seq_len(n.r),
col = reference.line$col, lwd = reference.line$lwd,
lty = reference.line$lty)
else panel.segments(x0 = 0, x1 = 1, y0 = seq_len(n.r), y1 = seq_len(n.r),
col = reference.line$col, lwd = reference.line$lwd,
lty = reference.line$lty)
}else return(invisible())
for (i in seq_len(n.r - 1)) {
x0 <- (as.numeric(z[subscripts, i]) - lower[i])/dif[i]
x1 <- (as.numeric(z[subscripts, i + 1]) - lower[i + 1])/dif[i +
1]
if (horizontal) panel.segments(y0 = x0, x0 = i, y1 = x1, x1 = i + 1,
col = col, lty = lty, lwd = lwd, alpha = alpha, ...)
else panel.segments(x0 = x0, y0 = i, x1 = x1, y1 = i + 1,
col = col, lty = lty, lwd = lwd, alpha = alpha, ...)
}
invisible()
}
confidence.panel.boot <- function(x, y, z, subscripts, lwd = 1, SD = NULL, ..., lower, upper, range = c(0, 1)) {
nc <- ncol(z)
if (missing(lower)) lower <- sapply(z, function(x) quantile(x, range[1]))
if (missing(upper)) upper <- sapply(z, function(x) quantile(x, range[2]))
dif <- upper - lower
if (!is.null(SD)) {
SD <- lapply(SD, function(x) (x - lower)/dif)
for (l in seq_along(SD)) {
grid.polygon(y = unit(c(SD[[l]][,1], rev(SD[[l]][,3])), "native"),
x = unit(c(seq_len(nc),rev(seq_len(nc))), "native"),
gp = gpar(fill = rgb(190/225, 190/225, 190/225, 0.5), col = "darkgrey"))
}
}
panel.parallel.horizontal(x, y, z, subscripts, ..., lower = lower, upper = upper)
if (!is.null(SD)) {
for (l in seq_along(SD)) {
llines(y = SD[[l]][,2], x = seq_len(nc), col="white", lwd=lwd, lty = 1)
}
}
}
setMethod("plot", signature(x = "FLXboot", y = "missing"), function(x, y, ordering = NULL, range = c(0, 1),
ci = FALSE, varnames = colnames(pars), strip_name = NULL, ...) {
k <- x@object@k
pars <- parameters(x)
if (ci) {
x_refit <- refit(x@object)
sd <- sqrt(diag(x_refit@vcov))
CI <- x_refit@coef + qnorm(0.975) * cbind(-sd, 0, sd)
indices_prior <- grep("alpha$", names(x_refit@coef))
if (length(indices_prior)) {
z <- mvtnorm::rmvnorm(10000, x_refit@coef[indices_prior,drop=FALSE], x_refit@vcov[indices_prior,indices_prior,drop=FALSE])
Priors <- t(apply(cbind(1, exp(z))/rowSums(cbind(1, exp(z))), 2, quantile, c(0.025, 0.5, 0.975)))
indices <- lapply(seq_len(k), function(i) grep(paste("_Comp.", i, sep = ""), names(x_refit@coef[-indices_prior])))
SD <- lapply(seq_len(k), function(i) rbind(CI[indices[[i]], ], prior = Priors[i,]))
} else {
indices <- lapply(seq_len(k), function(i) grep(paste("_Comp.", i, sep = ""), names(x_refit@coef)))
SD <- lapply(seq_len(k), function(i) CI[indices[[i]], ])
mnrow <- max(sapply(SD, nrow))
SD <- lapply(SD, function(x) if (nrow(x) < mnrow) do.call("rbind", c(list(x), as.list(rep(0, mnrow - nrow(x))))) else x)
}
if (any("gaussian" %in% sapply(x@object@model, function(x) if (is(x, "FLXMRglm")) x@family else ""))) {
i <- grep("sigma$", colnames(pars))
pars[,i] <- log(pars[,i])
colnames(pars)[i] <- "log(sigma)"
}
} else SD <- NULL
range_name <- vector(mode = "character", length=2)
range_name[1] <- if (range[1] == 0) "Min" else paste(round(range[1]*100), "%", sep = "")
range_name[2] <- if (range[2] == 1) "Max" else paste(round(range[2]*100), "%", sep = "")
Ordering <- if (is.null(ordering)) NULL else factor(as.vector(apply(matrix(pars[,ordering], nrow = k), 2, function(x) order(order(x)))))
if(is.null(strip_name)) formula = ~ pars else {
opt.old <- options(useFancyQuotes = FALSE)
on.exit(options(opt.old))
formula <- as.formula(paste("~ pars | ", sQuote(strip_name)))
}
pars <- na.omit(pars)
if (!is.null(attr(pars, "na.action")))
Ordering <- Ordering[-attr(na.omit(pars), "na.action")]
parallel.plot <- parallelplot(formula, groups = Ordering, default.scales = list(y = list(at = c(0, 1), labels = range_name),
x = list(alternating = FALSE, axs = "i", tck = 0, at = seq_len(ncol(pars)))), range = range,
panel = confidence.panel.boot, prepanel = prepanel.parallel.horizontal, SD = SD, ...)
parallel.plot$x.scales$labels <- varnames
parallel.plot
})
flexmix/R/flxdist.R 0000644 0001762 0000144 00000005542 14404637304 013742 0 ustar ligges users FLXdist <- function(formula, k = NULL, model=FLXMRglm(), components, concomitant=FLXPconstant())
{
mycall <- match.call()
if(is(model, "FLXM")) model <- list(model)
if (length(k)==1) prior <- rep(1/k, k)
else {
prior <- k/sum(k)
}
concomitant@x <- matrix(c(1, rep(0, ncol(concomitant@coef))[-1]), nrow = 1)
prior <- as.vector(evalPrior(prior, concomitant))
lf <- length(formula)
formula1 <- formula
if(length(formula[[lf]])>1 && deparse(formula[[lf]][[1]]) == "|")
formula1[[lf]] <- formula[[lf]][[2]]
for(n in seq(along.with=model)) {
if(is.null(model[[n]]@formula))
model[[n]]@formula <- formula1
else if(length(model[[n]]@formula) == 3 && model[[n]]@formula[[2]] == ".")
model[[n]]@formula <- model[[n]]@formula[-2]
model[[n]]@fullformula <- update.formula(formula1, model[[n]]@formula)
}
if (missing(components)) stop("no parameter values specified")
if (length(components) != length(prior)) stop("components not specified correctly")
comp <- list()
for (k in seq(along.with=prior)) {
comp[[k]] <- list()
if (length(components[[k]]) != length(model))
stop("components not specified correctly")
for (n in seq(along.with=model)) {
comp[[k]][[n]] <- FLXcomponent(model[[n]],
components[[k]][[n]])
}
}
new("FLXdist", formula=formula, call=mycall, concomitant=concomitant,
prior=prior, k=length(prior), model=model, components=comp)
}
###**********************************************************
setGeneric("FLXcomponent", function(object, ...) standardGeneric("FLXcomponent"))
setMethod("FLXcomponent", signature(object="FLXM"), function(object, components, ...) {
components$df <- numeric()
if (is(object@defineComponent, "expression"))
eval(object@defineComponent, components)
else
object@defineComponent(components)
})
####
setMethod("FLXcomponent", signature(object="FLXMRglm"), function(object, components, ...) {
components$df <- numeric()
offset <- NULL
family <- object@family
if (is(object@defineComponent, "expression"))
eval(object@defineComponent, components)
else
object@defineComponent(components)
})
###**********************************************************
setMethod("show", "FLXdist",
function(object){
cat("\nCall:", deparse(object@call,0.75*getOption("width")),
sep="\n")
cat("\nPriors:\n")
names(object@prior) <- paste("Comp.", seq_along(object@prior), sep="")
print(object@prior)
cat("\n")
})
###**********************************************************
evalPrior <- function(prior, concomitant) prior
setGeneric("evalPrior", function(prior, concomitant) standardGeneric("evalPrior"))
setMethod("evalPrior", signature(concomitant="FLXPmultinom"), function(prior, concomitant) {
exps <- exp(concomitant@x %*% concomitant@coef)
exps/rowSums(exps)
})
flexmix/R/utils.R 0000644 0001762 0000144 00000003217 14404637304 013422 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: utils.R 5079 2016-01-31 12:21:12Z gruen $
#
list2object = function(from, to){
n = names(from)
s = slotNames(to)
p = pmatch(n, s)
if(any(is.na(p)))
stop(paste("\nInvalid slot name(s) for class",
to, ":", paste(n[is.na(p)], collapse=" ")))
names(from) = s[p]
do.call("new", c(from, Class=to))
}
printIter = function(iter, logLik, label="Log-likelihood")
cat(formatC(iter, width=4),
label, ":", formatC(logLik, width=12, format="f"),"\n")
## library(colorspace)
## dput(x[c(1,3,5,7,2,4,6,8)])
## x = hcl(seq(0, 360*7/8, length.out = 8), c=30)
LightColors <- c("#F9C3CD", "#D0D4A8", "#9DDDD5", "#D1CCF5",
"#EDCAB2", "#AFDCB8", "#ACD7ED", "#EFC4E8")
## x = hcl(seq(0, 360*7/8, length.out = 8), c=100, l=65)
FullColors <- c("#FF648A", "#96A100", "#00BCA3", "#9885FF",
"#DC8400", "#00B430", "#00AEEF", "#F45BE1")
###**********************************************************
## similar defaults to silhouette plots in flexclust
unipolarCols <- function(n, hue=0, chr=50, lum = c(55, 90))
{
lum <- seq(lum[1], lum[2], length=n)
hcl(hue, chr, lum)
}
bipolarCols <- function(n, hue=c(10, 130), ...)
{
if(n%%2){ # n odd
n2 <- (n-1)/2
c1 <- unipolarCols(n2, hue[1])
c2 <- rev(unipolarCols(n2, hue[2]))
return(c(c1, "white", c2))
}
else{ # n even
n2 <- n/2
c1 <- unipolarCols(n2, hue[1])
c2 <- rev(unipolarCols(n2, hue[2]))
return(c(c1, c2))
}
}
###**********************************************************
flexmix/R/allGenerics.R 0000644 0001762 0000144 00000006210 14404637304 014506 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: allGenerics.R 5079 2016-01-31 12:21:12Z gruen $
#
setGeneric("flexmix",
function(formula, data=list(), k=NULL,
cluster=NULL, model=NULL, concomitant=NULL, control=NULL,
weights = NULL)
standardGeneric("flexmix"))
setGeneric("FLXfit",
function(model, concomitant, control,
postunscaled=NULL, groups, weights)
standardGeneric("FLXfit"))
###**********************************************************
setGeneric("FLXgetModelmatrix",
function(model, data, ...) standardGeneric("FLXgetModelmatrix"))
setGeneric("FLXfillConcomitant",
function(concomitant, ...) standardGeneric("FLXfillConcomitant"))
###**********************************************************
setGeneric("logLik")
setGeneric("clogLik", function(object, ...) standardGeneric("clogLik"))
setGeneric("EIC", function(object, ...) standardGeneric("EIC"))
###**********************************************************
setGeneric("FLXcheckComponent", function(model, ...) standardGeneric("FLXcheckComponent"))
setGeneric("FLXgetK", function(model, ...) standardGeneric("FLXgetK"))
setGeneric("FLXgetObs", function(model) standardGeneric("FLXgetObs"))
setGeneric("FLXmstep", function(model, ...) standardGeneric("FLXmstep"))
setGeneric("FLXremoveComponent", function(model, ...) standardGeneric("FLXremoveComponent"))
setGeneric("FLXdeterminePostunscaled", function(model, ...) standardGeneric("FLXdeterminePostunscaled"))
setGeneric("FLXgetDesign", function(object, ...) standardGeneric("FLXgetDesign"))
setGeneric("FLXreplaceParameters", function(object, ...) standardGeneric("FLXreplaceParameters"))
setGeneric("FLXlogLikfun", function(object, ...) standardGeneric("FLXlogLikfun"))
setGeneric("FLXgradlogLikfun", function(object, ...) standardGeneric("FLXgradlogLikfun"))
setGeneric("VarianceCovariance", function(object, ...) standardGeneric("VarianceCovariance"))
setGeneric("FLXgetParameters", function(object, ...) standardGeneric("FLXgetParameters"))
setGeneric("logLikfun_comp", function(object, ...) standardGeneric("logLikfun_comp"))
setGeneric("getPriors", function(object, ...) standardGeneric("getPriors"))
setGeneric("existGradient", function(object, ...) standardGeneric("existGradient"))
setGeneric("refit_mstep", function(object, newdata, ...) standardGeneric("refit_mstep"))
setGeneric("refit_optim", function(object, ...) standardGeneric("refit_optim"))
###**********************************************************
setGeneric("group", function(object, ...) standardGeneric("group"))
setGeneric("rflexmix", function(object, newdata, ...) standardGeneric("rflexmix"))
setGeneric("rFLXM", function(model, components, ...) standardGeneric("rFLXM"))
## just to make sure that some S3 generics are available in S4
setGeneric("fitted", package = "stats")
setGeneric("predict", package = "stats")
setGeneric("simulate", package = "stats")
setGeneric("summary", package = "base")
setGeneric("unique", package = "base")
setGeneric("allweighted", function(model, control, weights) standardGeneric("allweighted"))
flexmix/R/flexmixFix.R 0000644 0001762 0000144 00000014152 14404637304 014405 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch and Bettina Gruen
# $Id: flexmixFix.R 5079 2016-01-31 12:21:12Z gruen $
#
setMethod("FLXcheckComponent", signature(model = "FLXMRfix"), function(model, k, cluster, ...) {
if (sum(model@nestedformula@k)) {
if (!is.null(k)) {
if (k != sum(model@nestedformula@k)) stop("specified k does not match the nestedformula in the model")
}
else k <- sum(model@nestedformula@k)
}
else {
if (is(cluster, "matrix")) {
if (is.null(k)) k <- ncol(cluster)
}
else if (!is.null(cluster)) {
if (is.null(k)) {
cluster <- as(cluster, "integer")
k <- max(cluster)
}
}
if (is.null(k)) stop("either k, cluster or the nestedformula of the model must be specified")
else model@nestedformula <- as(k, "FLXnested")
}
if (length(model@variance) > 1) {
if (sum(model@variance) != k) stop("specified k does not match the specified varFix argument in the model")
}
else if (model@variance) model@variance <- k
else model@variance <- rep(1, k)
model
})
setMethod("FLXgetObs", signature(model = "FLXMRfix"), function(model) nrow(model@y)/sum(model@nestedformula@k))
setMethod("FLXgetK", signature(model = "FLXMRfix"), function(model, ...) sum(model@nestedformula@k))
setMethod("FLXremoveComponent", signature(model = "FLXMRfix"), function(model, nok, ...)
{
if (!length(nok)) return(model)
K <- model@nestedformula
wnok <- sapply(nok, function(i) which(apply(rbind(i > c(0, cumsum(K@k[-length(K@k)])),
i <= c(cumsum(K@k))), 2, all)))
wnok <- table(wnok)
if (length(wnok) > 0) {
K@k[as.integer(names(wnok))] <- K@k[as.integer(names(wnok))] - wnok
if (any(K@k == 0)) {
keep <- K@k != 0
K@k <- K@k[keep]
K@formula <- K@formula[keep]
}
k <- sum(K@k)
model@nestedformula <- K
}
varnok <- sapply(nok, function(i) which(apply(rbind(i > c(0, cumsum(model@variance[-length(model@variance)])),
i <= c(cumsum(model@variance))), 2, all)))
varnok <- table(varnok)
if (length(varnok) > 0) {
model@variance[as.integer(names(varnok))] <- model@variance[as.integer(names(varnok))] - varnok
if (any(model@variance == 0))
model@variance <- model@variance[model@variance != 0]
}
rok <- which(!apply(model@segment[,nok,drop=FALSE], 1, function(x) any(x)))
model@x <- model@x[rok, which(colSums(model@design[-nok,,drop=FALSE]) > 0), drop=FALSE]
model@y <- model@y[rok,, drop=FALSE]
model@design <- model@design[-nok,,drop=FALSE]
cok <- colSums(model@design) > 0
model@design <- model@design[,cok,drop=FALSE]
model@segment <- model@segment[rok,-nok, drop=FALSE]
model
})
###**********************************************************
setMethod("FLXmstep", signature(model = "FLXMRfix"), function(model, weights, ...)
{
model@fit(model@x, model@y,
as.vector(weights),
model@design, model@variance)
})
###**********************************************************
setMethod("FLXdeterminePostunscaled", signature(model = "FLXMRfix"), function(model, components, ...)
{
sapply(seq_along(components), function(m)
components[[m]]@logLik(model@x[model@segment[,m], as.logical(model@design[m,]), drop=FALSE],
model@y[model@segment[,m],,drop=FALSE]))
})
###**********************************************************
modelMatrix <- function(random, fixed, nested, data=list(), lhs, xlevels = NULL)
{
if (!lhs)
random <- random[-2]
mf.random <- model.frame(random, data=data, na.action = NULL)
response <- if (lhs) as.matrix(model.response(mf.random)) else NULL
xlev <- xlevels[names(.getXlevels(terms(mf.random), mf.random))]
mm.random <- if (is.null(xlev)) model.matrix(terms(mf.random), data=mf.random)
else model.matrix(terms(mf.random), data=data, xlev = xlev)
xlevels.random <- .getXlevels(terms(mf.random), mf.random)
randomfixed <- if(identical(paste(deparse(fixed), collapse = ""), "~0")) random
else update(random, paste("~.+", paste(deparse(fixed[[length(fixed)]]), collapse = "")))
mf.randomfixed <- model.frame(randomfixed, data=data)
mm.randomfixed <- model.matrix(terms(mf.randomfixed), data=mf.randomfixed, xlev = xlevels[names(.getXlevels(terms(mf.randomfixed), mf))])
mm.fixed <- mm.randomfixed[,!colnames(mm.randomfixed) %in% colnames(mm.random), drop=FALSE]
xlevels.fixed <- .getXlevels(terms(mf.randomfixed), mf.randomfixed)
all <- mm.all <- mm.nested <- xlevels.nested <- list()
for (l in seq_along(nested)) {
all[[l]] <- if (identical(paste(deparse(nested[[l]]), collapse = ""), "~0")) randomfixed
else update(randomfixed, paste("~.+", paste(deparse(nested[[l]][[length(nested[[l]])]]), collapse = "")))
mf <- model.frame(all[[l]], data=data)
mm.all[[l]] <- model.matrix(terms(mf), data=mf, xlev = xlevels[names(.getXlevels(terms(mf), mf))])
mm.nested[[l]] <- mm.all[[l]][,!colnames(mm.all[[l]]) %in% colnames(mm.randomfixed),drop=FALSE]
xlevels.nested[[l]] <- .getXlevels(terms(mf), mf)
}
return(list(random=mm.random, fixed=mm.fixed, nested=mm.nested, response=response, xlevels=c(xlevels.random, xlevels.fixed, unlist(xlevels.nested))))
}
###**********************************************************
modelDesign <- function(mm.all, k) {
design <- matrix(1, nrow=sum(k@k), ncol=ncol(mm.all$fixed))
col.names <- colnames(mm.all$fixed)
nested <- matrix(0, nrow=sum(k@k), ncol=sum(sapply(mm.all$nested, ncol)))
cumK <- c(0, cumsum(k@k))
i <- 0
for (l in seq_along(mm.all$nested)) {
if (ncol(mm.all$nested[[l]])) {
nested[(cumK[l] + 1):cumK[l+1], i+seq_len(ncol(mm.all$nested[[l]]))] <- 1
i <- i+ncol(mm.all$nested[[l]])
col.names <- c(col.names, colnames(mm.all$nested[[l]]))
}
}
design <- cbind(design, nested)
if (ncol(mm.all$random)) design <- cbind(design,
kronecker(diag(sum(k@k)), matrix(1, ncol=ncol(mm.all$random))))
colnames(design) <- c(col.names, rep(colnames(mm.all$random), sum(k@k)))
design
}
###**********************************************************
flexmix/R/flxmcsparse.R 0000644 0001762 0000144 00000002252 14404637304 014607 0 ustar ligges users setClass("FLXMCsparse",
contains = "FLXM")
as.data.frame.simple_triplet_matrix <- function(x, ...) {
as.data.frame.model.matrix(x, ...)
}
setMethod("FLXgetModelmatrix", signature(model = "FLXMCsparse"),
function(model, data, formula, lhs=TRUE, ...) {
formula <- RemoveGrouping(formula)
if (length(grep("\\|", deparse(model@formula))))
stop("no grouping variable allowed in the model")
if(is.null(model@formula))
model@formula <- formula
model@fullformula <- update(terms(formula, data = data), model@formula)
fullformula <- terms(model@fullformula, data = data)
model@terms <- attr(fullformula, "terms")
if (lhs) {
env <- environment(fullformula)
vars <- attr(fullformula, "variables")
varnames <- vapply(vars, function(x)
paste(deparse(x, backtick = FALSE), collapse = " "), " ")[-1L]
variables <- eval(vars, data, env)
resp <- attr(fullformula, "response")
response <- variables[[resp]]
model@y <- model@preproc.y(response)
model@x <- matrix(nrow = nrow(model@y), ncol = 0)
} else {
model@x <- matrix(nrow = nrow(as.data.frame(data)), ncol = 0)
}
model
})
flexmix/R/lmer.R 0000644 0001762 0000144 00000015064 14404637304 013224 0 ustar ligges users sigmaMethod <-
getExportedValue(if(getRversion() >= "3.3.0") "stats" else "lme4", "sigma")
setClass("FLXMRlmer",
representation(random = "formula",
lmod = "list",
control = "ANY",
preproc.z = "function"),
prototype(preproc.z = function(x, ...) x),
contains = "FLXMRglm")
defineComponent_lmer <- function(para) {
predict <- function(x, ...) x%*%para$coef
logLik <- function(x, y, lmod, ...) {
z <- as.matrix(lmod$reTrms$Zt)
grouping <- lmod$reTrms$flist[[1]]
llh <- vector(length=nrow(x))
for (i in seq_len(nlevels(grouping))) {
index1 <- which(grouping == levels(grouping)[i])
index2 <- rownames(z) %in% levels(grouping)[i]
V <- crossprod(z[index2,index1,drop=FALSE], para$sigma2$Random) %*% z[index2, index1, drop=FALSE] + diag(length(index1)) * para$sigma2$Residual
llh[index1] <- mvtnorm::dmvnorm(y[index1,], mean=predict(x[index1,,drop=FALSE], ...), sigma = V, log=TRUE)/length(index1)
}
llh
}
new("FLXcomponent",
parameters=list(coef=para$coef, sigma2=para$sigma2),
logLik=logLik, predict=predict,
df=para$df)
}
FLXMRlmer <- function(formula = . ~ ., random, weighted = TRUE,
control = list(), eps = .Machine$double.eps)
{
random <- if (length(random) == 3) random else formula(paste(".", paste(deparse(random), collapse = "")))
missCtrl <- missing(control)
if (missCtrl || !inherits(control, "lmerControl")) {
if (!is.list(control))
stop("'control' is not a list; use lmerControl()")
control <- do.call(lme4::lmerControl, control)
}
object <- new("FLXMRlmer", formula = formula, random = random, control = control,
family = "gaussian", weighted = weighted, name = "FLXMRlmer:gaussian")
if (weighted) object@preproc.z <- function(lmod) {
if (length(unique(names(lmod[["reTrms"]][["flist"]]))) != 1) stop("only a single variable for random effects is allowed")
for (i in seq_along(lmod[["reTrms"]][["flist"]])) {
DIFF <- t(sapply(levels(lmod[["reTrms"]]$flist[[i]]), function(id) {
index1 <- which(lmod[["reTrms"]]$flist[[i]] == id)
index2 <- rownames(lmod[["reTrms"]]$Zt) == id
sort(apply(lmod[["reTrms"]]$Zt[index2, index1, drop=FALSE], 1, paste, collapse = ""))
}))
if (length(unique(table(lmod[["reTrms"]][["flist"]][[i]]))) != 1 || nrow(unique(DIFF)) != 1)
stop("FLXMRlmer does only work correctly if the covariates of the random effects are the same for all observations")
}
lmod
}
lmer.wfit <- function(x, y, w, lmod) {
zero.weights <- any(w < eps)
if (zero.weights) {
ok <- w >= eps
w <- w[ok]
lmod[["fr"]] <- lmod[["fr"]][ok, , drop = FALSE]
lmod[["X"]] <- lmod[["X"]][ok, , drop = FALSE]
lmod[["reTrms"]][["Zt"]] <- lmod[["reTrms"]][["Zt"]][, ok, drop = FALSE]
for (i in seq_along(lmod[["reTrms"]][["flist"]])) {
lmod[["reTrms"]][["flist"]][[i]] <- lmod[["reTrms"]][["flist"]][[i]][ok]
}
}
wts <- sqrt(w)
lmod$X <- lmod$X * wts
lmod$fr[[1]] <- lmod$fr[[1]] * wts
devfun <- do.call(lme4::mkLmerDevfun, c(lmod, list(start = NULL, verbose = FALSE,
control = control)))
opt <- lme4::optimizeLmer(devfun, optimizer = control$optimizer,
restart_edge = control$restart_edge, control = control$optCtrl,
verbose = FALSE, start = NULL)
mer <- lme4::mkMerMod(environment(devfun), opt, lmod$reTrms, fr = lmod$fr)
sigma_res <- sigmaMethod(mer) / sqrt(mean(w))
vc <- lme4::VarCorr(mer)
n <- c(0, cumsum(sapply(vc, ncol)))
Random <- matrix(0, max(n), max(n))
for (i in seq_along(vc)) {
index <- (n[i]+1):n[i+1]
Random[index, index] <- vc[[i]]
}
Random <- Random / mean(w)
list(coefficients = lme4::fixef(mer),
sigma2 = list(Random = Random,
Residual = sigma_res^2),
df = length(lme4::fixef(mer)) + 1 + length(mer@theta))
}
object@defineComponent <- defineComponent_lmer
object@fit <- function(x, y, w, lmod){
fit <- lmer.wfit(x, y, w, lmod)
object@defineComponent(
list(coef = coef(fit),
df = fit$df,
sigma2 = fit$sigma2))
}
object
}
setMethod("FLXgetModelmatrix", signature(model="FLXMRlmer"),
function(model, data, formula, lhs=TRUE, contrasts = NULL, ...)
{
formula_nogrouping <- RemoveGrouping(formula)
if (identical(paste(deparse(formula_nogrouping), collapse = ""), paste(deparse(formula), collapse = ""))) stop("please specify a grouping variable")
model <- callNextMethod(model, data, formula, lhs)
random_formula <- update(model@random,
paste(".~. |", .FLXgetGroupingVar(formula)))
fullformula <- model@fullformula
if (!lhs) fullformula <- fullformula[c(1,3)]
fullformula <- update(fullformula,
paste(ifelse(lhs, ".", ""), "~. + ", paste(deparse(random_formula[[3]]), collapse = "")))
model@fullformula <- update(model@fullformula,
paste(ifelse(lhs, ".", ""), "~. |", .FLXgetGroupingVar(formula)))
model@lmod <- lme4::lFormula(fullformula, data, REML = FALSE, control = model@control)
model@lmod <- model@preproc.z(model@lmod)
model
})
setMethod("FLXmstep", signature(model = "FLXMRlmer"),
function(model, weights, ...)
{
apply(weights, 2, function(w) model@fit(model@x, model@y, w, model@lmod))
})
setMethod("FLXdeterminePostunscaled", signature(model = "FLXMRlmer"), function(model, components, ...) {
sapply(components, function(x) x@logLik(model@x, model@y, model@lmod))
})
setMethod("rFLXM", signature(model = "FLXMRlmer", components="FLXcomponent"),
function(model, components, ...) {
sigma2 <- components@parameters$sigma2
z <- as.matrix(model@lmod$reTrms$Zt)
grouping <- model@lmod$reTrms$flist[[1]]
y <- matrix(0, nrow=nrow(model@x), ncol = 1)
for (i in seq_len(nlevels(grouping))) {
index1 <- which(grouping == levels(grouping)[i])
index2 <- rownames(z) %in% levels(grouping)[i]
V <- crossprod(z[index2,index1,drop=FALSE], sigma2$Random) %*% z[index2, index1, drop=FALSE] + diag(length(index1)) * sigma2$Residual
y[index1, 1] <- mvtnorm::rmvnorm(1, mean=components@predict(model@x[index1,,drop=FALSE], ...), sigma = V)
}
y
})
flexmix/R/rflexmix.R 0000644 0001762 0000144 00000007421 14404637304 014121 0 ustar ligges users setMethod("rflexmix", signature(object = "FLXdist", newdata="numeric"), function(object, newdata, ...) {
newdata <- data.frame(matrix(nrow = as.integer(newdata), ncol = 0))
rflexmix(object, newdata = newdata, ...)
})
setMethod("rflexmix", signature(object = "FLXdist", newdata="listOrdata.frame"), function(object, newdata, ...) {
groups <- .FLXgetGrouping(object@formula, newdata)
object@model <- lapply(object@model, FLXgetModelmatrix, newdata, object@formula, lhs=FALSE)
group <- if (length(groups$group)) groups$group else factor(seq_len(FLXgetObs(object@model[[1]])))
object@concomitant <- FLXgetModelmatrix(object@concomitant, data = newdata,
groups = list(group=group,
groupfirst = groupFirst(group)))
rflexmix(new("flexmix", object, group=group, weights = NULL), ...)
})
setMethod("rflexmix", signature(object = "flexmix", newdata="missing"), function(object, newdata, ...) {
N <- length(object@model)
object <- undo_weights(object)
group <- group(object)
prior <- determinePrior(object@prior, object@concomitant, group)
class <- apply(prior, 1, function(x) rmultinom(1, size = 1, prob = x))
class <- if (is.matrix(class)) t(class) else as.matrix(class)
class <- max.col(class)[group]
y <- vector("list", N)
for (i in seq_len(N)) {
comp <- lapply(object@components, function(x) x[[i]])
yi <- rFLXM(object@model[[i]], comp, class, group, ...)
form <- object@model[[i]]@fullformula
names <- if(length(form) == 3) form[[2]] else paste("y", i, seq_len(ncol(yi)), sep = ".")
if (ncol(yi) > 1) {
if (inherits(names, "call"))
names <- as.character(names[-1])
if (length(names) != ncol(yi)) {
if (length(names) == 1) names <- paste(as.character(names)[1], i, seq_len(ncol(yi)), sep = ".")
else stop("left hand side not specified correctly")
}
}
else if (inherits(names, "call")) names <- deparse(names)
colnames(yi) <- as.character(names)
y[[i]] <- yi
}
list(y = y, group=group, class = class)
})
###**********************************************************
determinePrior <- function(prior, concomitant, group) {
matrix(prior, nrow = length(unique(group)), ncol = length(prior), byrow=TRUE)
}
setGeneric("determinePrior", function(prior, concomitant, group)
standardGeneric("determinePrior"))
setMethod("determinePrior", signature(concomitant="FLXPmultinom"), function(prior, concomitant, group) {
exps <- exp(concomitant@x %*% concomitant@coef)
exps/rowSums(exps)
})
undo_weights <- function(object) {
if (!is.null(object@weights)) {
for (i in seq_along(object@model)) {
object@model[[i]]@x <- apply(object@model[[i]]@x, 2, rep, object@weights)
object@model[[i]]@y <- apply(object@model[[i]]@y, 2, rep, object@weights)
object@concomitant@x <- apply(object@concomitant@x, 2, rep, object@weights)
}
if (length(object@group) > 0)
object@group <- rep(object@group, object@weights)
object@weights <- NULL
}
object
}
###**********************************************************
setMethod("simulate", signature("FLXdist"),
function(object, nsim, seed = NULL, ...) {
if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
runif(1)
if (is.null(seed))
RNGstate <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
else {
R.seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
set.seed(seed)
RNGstate <- structure(seed, kind = as.list(RNGkind()))
on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv, inherits = FALSE))
}
ans <- lapply(seq_len(nsim), function(i) rflexmix(object, ...)$y)
if (all(sapply(ans, ncol) == 1)) ans <- as.data.frame(ans)
attr(ans, "seed") <- RNGstate
ans
})
flexmix/R/lattice.R 0000644 0001762 0000144 00000007466 14404637304 013721 0 ustar ligges users #
# Copyright (C) Deepayan Sarkar
# Internal code copied from package lattice for use in flexmix
#
hist.constructor <-
function (x, breaks, include.lowest = TRUE, right = TRUE, ...)
{
if (is.numeric(breaks) && length(breaks) > 1)
hist(as.numeric(x), breaks = breaks, plot = FALSE, include.lowest = include.lowest,
right = right)
else hist(as.numeric(x), breaks = breaks, right = right,
plot = FALSE)
}
checkArgsAndCall <-
function (FUN, args)
{
if (!("..." %in% names(formals(FUN))))
args <- args[intersect(names(args), names(formals(FUN)))]
do.call(FUN, args)
}
formattedTicksAndLabels <-
function (x, at = FALSE, used.at = NULL, labels = FALSE, logsc = FALSE,
..., num.limit = NULL, abbreviate = NULL, minlength = 4,
format.posixt = NULL, equispaced.log = TRUE)
{
rng <- if (length(x) == 2)
as.numeric(x)
else range(as.numeric(x))
if (is.logical(logsc) && logsc)
logsc <- 10
have.log <- !is.logical(logsc)
if (have.log)
logbase <- if (is.numeric(logsc))
logsc
else if (logsc == "e")
exp(1)
else stop("Invalid value of 'log'")
logpaste <- if (have.log)
paste(as.character(logsc), "^", sep = "")
else ""
check.overlap <- if (is.logical(at) && is.logical(labels))
TRUE
else FALSE
if (is.logical(at)) {
at <- if (have.log && !equispaced.log)
checkArgsAndCall(axisTicks, list(usr = log10(logbase^rng),
log = TRUE, axp = NULL, ...))
else checkArgsAndCall(pretty, list(x = x[is.finite(x)],
...))
}
else if (have.log && (length(at) > 0)) {
if (is.logical(labels))
labels <- as.character(at)
at <- log(at, base = logbase)
}
if (is.logical(labels)) {
if (have.log && !equispaced.log) {
labels <- as.character(at)
at <- log(at, logbase)
}
else labels <- paste(logpaste, format(at, trim = TRUE),
sep = "")
}
list(at = at, labels = labels, check.overlap = check.overlap,
num.limit = rng)
}
calculateAxisComponents <-
function (x, ..., packet.number, packet.list, abbreviate = NULL,
minlength = 4)
{
if (all(is.na(x)))
return(list(at = numeric(0), labels = numeric(0), check.overlap = TRUE,
num.limit = c(0, 1)))
ans <- formattedTicksAndLabels(x, ...)
rng <- range(ans$num.limit)
ok <- ans$at >= min(rng) & ans$at <= max(rng)
ans$at <- ans$at[ok]
ans$labels <- ans$labels[ok]
if (is.logical(abbreviate) && abbreviate)
ans$labels <- abbreviate(ans$labels, minlength)
ans
}
extend.limits <-
function (lim, length = 1, axs = "r", prop = if (axs == "i") 0 else lattice.getOption("axis.padding")$numeric)
{
if (all(is.na(lim)))
NA_real_
else if (is.character(lim)) {
c(1, length(lim)) + c(-1, 1) * if (axs == "i")
0.5
else lattice.getOption("axis.padding")$factor
}
else if (length(lim) == 2) {
if (lim[1] > lim[2]) {
ccall <- match.call()
ccall$lim <- rev(lim)
ans <- eval.parent(ccall)
return(rev(ans))
}
if (!missing(length) && !missing(prop))
stop("'length' and 'prop' cannot both be specified")
if (length <= 0)
stop("'length' must be positive")
if (!missing(length)) {
prop <- (as.numeric(length) - as.numeric(diff(lim)))/(2 *
as.numeric(diff(lim)))
}
if (lim[1] == lim[2])
lim + 0.5 * c(-length, length)
else {
d <- diff(as.numeric(lim))
lim + prop * d * c(-1, 1)
}
}
else {
print(lim)
stop("improper length of 'lim'")
}
}
flexmix/R/multcomp.R 0000644 0001762 0000144 00000003661 14757625043 014135 0 ustar ligges users #
# Copyright (C) 2004-2016 Friedrich Leisch
# $Id: multcomp.R 5298 2025-02-26 14:50:01Z gruen $
#
###*********************************************************
setGeneric("flxglht",
function(model, linfct, ...) standardGeneric("flxglht"))
setMethod("flxglht", signature(model="flexmix", linfct="character"),
function(model, linfct, ...)
{
model <- refit(model)
flxglht(model, linfct, ...)
})
setMethod("flxglht", signature(model="FLXRoptim", linfct="character"),
function(model, linfct, ...)
{
if (!requireNamespace("multcomp", quietly = TRUE)) {
stop("install package multcomp to use this function")
}
if(length(model@components)>1)
stop("Can currently handle only models with one response!\n")
type <- match.arg(linfct, c("zero", "tukey"))
k <- model@k
cf <- model@coef
vc <- model@vcov
nc <- rownames(model@components[[1]][[1]])
nc <- as.vector(outer(paste("model.1_Comp.", 1:k, "_coef.", sep=""),
nc, paste, sep=""))
## FIXME: for zero-inflated models some components are missing
nc <- nc[nc %in% names(cf)]
cf <- cf[nc]
vc <- vc[nc, nc]
nc <- sub("model.1_Comp.(.*)_coef.", "C\\1.", nc)
names(cf) <- colnames(vc) <- rownames(vc) <- nc
if(type=="zero"){
linfct <- diag(length(cf))
rownames(linfct) <- nc
}
else{
k <- length(cf)/length(rownames(model@components[[1]][[1]]))
p <- length(cf) / k
tmp <- rep(3, k)
Ktmp <- multcomp::contrMat(tmp, "Tukey")
linfct <- kronecker(diag(p), Ktmp)
colnames(linfct) <- names(cf)
rownames(linfct) <- 1:nrow(linfct)
for (i in 1:nrow(linfct))
rownames(linfct)[i] <-
paste(colnames(linfct)[linfct[i,] == 1],
colnames(linfct)[linfct[i,] == -1], sep = "-")
}
multcomp::glht(multcomp::parm(coef = cf, vcov = vc), linfct = linfct, ...)
})
flexmix/R/multinom.R 0000644 0001762 0000144 00000003401 14404637304 014121 0 ustar ligges users setClass("FLXMRmultinom",
contains = "FLXMRglm")
FLXMRmultinom <- function(formula=.~., ...)
{
z <- new("FLXMRmultinom", weighted=TRUE, formula=formula,
family = "multinom", name=paste("FLXMRglm", "multinom", sep=":"))
z@preproc.y <- function(x){
x <- as.integer(factor(x))
if (min(x) < 1 | length(unique(x)) != max(x))
stop("x needs to be coercible to an integer vector containing all numbers from 1 to max(x)")
y <- matrix(0, nrow = length(x), ncol = max(x))
y[cbind(seq_along(x), x)] <- 1
y
}
z@defineComponent <- function(para) {
predict <- function(x) {
p <- tcrossprod(x, para$coef)
eta <- cbind(1, exp(p))
eta/rowSums(eta)
}
logLik <- function(x, y) {
log(predict(x))[cbind(seq_len(nrow(y)), max.col(y, "first"))]
}
new("FLXcomponent",
parameters=list(coef=para$coef),
logLik=logLik, predict=predict,
df=para$df)
}
z@fit <- function(x, y, w, component){
r <- ncol(x)
p <- ncol(y)
if (p < 2) stop("Multinom requires at least two components.")
mask <- c(rep(0, r + 1), rep(c(0, rep(1, r)), p - 1))
fit <- nnet.default(x, y, w, mask = mask, size = 0,
skip = TRUE, softmax = TRUE, censored = FALSE,
rang = 0, trace=FALSE, ...)
fit$coefnames <- colnames(x)
fit$weights <- w
fit$vcoefnames <- fit$coefnames[seq_len(ncol(x))]
fit$lab <- seq_len(ncol(y))
class(fit) <- c("multinom", "nnet")
coef <- coef(fit)
z@defineComponent(list(coef = coef, df = length(coef)))
}
z
}
setMethod("existGradient", signature(object = "FLXMRmultinom"),
function(object) FALSE)
flexmix/vignettes/ 0000755 0001762 0000144 00000000000 14757625373 013761 5 ustar ligges users flexmix/vignettes/mymclust.R 0000644 0001762 0000144 00000002053 14404637307 015746 0 ustar ligges users mymclust <- function (formula = .~., diagonal = TRUE)
{
retval <- new("FLXMC", weighted = TRUE,
formula = formula, dist = "mvnorm",
name = "my model-based clustering")
retval@defineComponent <- function(para) {
logLik <- function(x, y) {
mvtnorm::dmvnorm(y, mean = para$center,
sigma = para$cov, log = TRUE)
}
predict <- function(x) {
matrix(para$center, nrow = nrow(x),
ncol = length(para$center), byrow = TRUE)
}
new("FLXcomponent", parameters =
list(center = para$center, cov = para$cov),
df = para$df, logLik = logLik, predict = predict)
}
retval@fit <- function(x, y, w, ...) {
para <- cov.wt(y, wt = w)[c("center", "cov")]
df <- (3 * ncol(y) + ncol(y)^2)/2
if (diagonal) {
para$cov <- diag(diag(para$cov))
df <- 2 * ncol(y)
}
retval@defineComponent(c(para, df = df))
}
retval
}
flexmix/vignettes/mixture-regressions.Rnw 0000644 0001762 0000144 00000220176 14404637307 020504 0 ustar ligges users %
% Copyright (C) 2008 Bettina Gruen and Friedrich Leisch
% $Id: mixture-regressions.Rnw $
%
\documentclass[nojss]{jss}
\usepackage{amsfonts}
\title{FlexMix Version 2: Finite Mixtures with Concomitant
Variables and Varying and Constant Parameters}
\Plaintitle{FlexMix Version 2: Finite Mixtures with Concomitant
Variables and Varying and Constant Parameters}
\Shorttitle{FlexMix Version 2}
\author{Bettina Gr{\"u}n\\
Wirtschaftsuniversit{\"a}t Wien \And
Friedrich Leisch\\
Universit\"at f\"ur Bodenkultur Wien}
\Plainauthor{Bettina Gr{\"u}n, Friedrich Leisch}
\Address{
Bettina Gr\"un\\
Institute for Statistics and Mathematics\\
Wirtschaftsuniversit{\"a}t Wien\\
Welthandelsplatz 1\\
1020 Wien, Austria\\
E-mail: \email{Bettina.Gruen@R-project.org}\\
Friedrich Leisch\\
Institut f\"ur Angewandte Statistik und EDV\\
Universit\"at f\"ur Bodenkultur Wien\\
Peter Jordan Stra\ss{}e 82\\
1190 Wien, Austria\\
E-mail: \email{Friedrich.Leisch@boku.ac.at}
}
\Abstract{
This article is a (slightly) modified version of
\cite{mixtures:Gruen+Leisch:2008a}, published in the \emph{Journal
of Statistical Software}.
\pkg{flexmix} provides infrastructure for flexible fitting of finite
mixture models in \proglang{R} using the expectation-maximization
(EM) algorithm or one of its variants. The functionality of the
package was enhanced. Now concomitant variable models as well as
varying and constant parameters for the component specific
generalized linear regression models can be fitted. The application
of the package is demonstrated on several examples, the
implementation described and examples given to illustrate how new
drivers for the component specific models and the concomitant
variable models can be defined.
}
\Keywords{\proglang{R}, finite mixture models, generalized linear models, concomitant variables}
\Plainkeywords{R, finite mixture models, generalized linear models, concomitant variables}
\usepackage{amsmath, listings}
\def\argmax{\mathop{\rm arg\,max}}
%% \usepackage{Sweave} prevent automatic inclusion
\SweaveOpts{width=9, height=4.5, eps=FALSE, keep.source=TRUE}
<>=
options(width=60, prompt = "R> ", continue = "+ ", useFancyQuotes = FALSE)
library("graphics")
library("stats")
library("flexmix")
library("lattice")
ltheme <- canonical.theme("postscript", FALSE)
lattice.options(default.theme=ltheme)
data("NPreg", package = "flexmix")
data("dmft", package = "flexmix")
source("myConcomitant.R")
@
%%\VignetteIndexEntry{FlexMix Version 2: Finite Mixtures with Concomitant Variables and Varying and Constant Parameters}
%%\VignetteDepends{flexmix}
%%\VignetteKeywords{R, finite mixture models, model based clustering, latent class regression}
%%\VignettePackage{flexmix}
\begin{document}
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\section{Introduction}\label{sec:introduction}
Finite mixture models are a popular technique for modelling unobserved
heterogeneity or to approximate general distribution functions in a
semi-parametric way. They are used in a lot of different areas such as
astronomy, biology, economics, marketing or medicine. An overview on
mixture models is given in \cite{mixtures:Everitt+Hand:1981},
\cite{mixtures:Titterington+Smith+Makov:1985},
\cite{mixtures:McLachlan+Basford:1988}, \cite{mixtures:Boehning:1999},
\cite{mixtures:McLachlan+Peel:2000} and
\cite{mixtures:Fruehwirth-Schnatter:2006}.
Version 1 of \proglang{R} package \pkg{flexmix} was introduced in
\cite{mixtures:Leisch:2004}. The main design principles of the package
are extensibility and fast prototyping for new types of mixture
models. It uses \proglang{S}4 classes and methods
\citep{mixtures:Chambers:1998} as implemented in the \proglang{R}
package \pkg{methods} and exploits advanced features of \proglang{R} such as
lexical scoping \citep{mixtures:Gentleman+Ihaka:2000}. The package
implements a framework for maximum likelihood estimation with the
expectation-maximization (EM) algorithm
\citep{mixtures:Dempster+Laird+Rubin:1977}. The main focus is on
finite mixtures of regression models and it allows for multiple
independent responses and repeated measurements. The EM algorithm can
be controlled through arguments such as the maximum number of
iterations or a minimum improvement in the likelihood to continue.
Newly introduced features in the current package version are
concomitant variable models \citep{mixtures:Dayton+Macready:1988} and
varying and constant parameters in the component specific regressions.
Varying parameters follow a finite mixture, i.e., several groups exist
in the population which have different parameters. Constant parameters
are fixed for the whole population. This model is similar to
mixed-effects models \citep{mixtures:Pinheiro+Bates:2000}. The main
difference is that in this application the distribution of the varying
parameters is unknown and has to be estimated. Thus the model is
actually closer to the varying-coefficients modelling framework
\citep{mixtures:Hastie+Tibshirani:1993}, using convex combinations of
discrete points as functional form for the varying coefficients.
The extension to constant and varying parameters allows for example to
fit varying intercept models as given in
\cite{mixtures:Follmann+Lambert:1989} and \cite{mixtures:Aitkin:1999}.
These models are frequently applied to account for overdispersion in
the data where the components follow either a binomial or Poisson
distribution. The model was also extended to include nested varying
parameters, i.e.~this allows to have groups of components with the
same parameters \citep{mixtures:Gruen+Leisch:2006,
mixtures:Gruen:2006}.
In Section~\ref{sec:model-spec-estim} the extended model class is
presented together with the parameter estimation using the EM
algorithm. In Section~\ref{sec:using-new-funct} examples are given to
demonstrate how the new functionality can be used. An overview on the
implementational details is given in Section~\ref{sec:implementation}.
The new model drivers are presented and changes made to improve the
flexibility of the software and to enable the implementation of the
new features are discussed. Examples for writing new drivers for the
component specific models and the concomitant variable models are
given in Section~\ref{sec:writing-your-own}. This paper gives a short
overview on finite mixtures and the package in order to be
self-contained. A more detailed introduction to finite mixtures and
the package \pkg{flexmix} can be found in \cite{mixtures:Leisch:2004}.
All computations and graphics in this paper have been done with
\pkg{flexmix} version
\Sexpr{packageDescription("flexmix",fields="Version")} and
\proglang{R} version \Sexpr{getRversion()} using Sweave
\citep{mixtures:Leisch:2002}. The newest release version of \pkg{flexmix} is
always available from the Comprehensive \proglang{R} Archive Network at
\url{http://CRAN.R-project.org/package=flexmix}. An up-to-date version of this paper
is contained in the package as a vignette, giving full access to the \proglang{R} code
behind all examples shown below. See \code{help("vignette")} or
\cite{mixtures:Leisch:2003} for details on handling package vignettes.
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\section{Model specification and estimation}\label{sec:model-spec-estim}
A general model class of finite mixtures of regression models is
considered in the following. The mixture is assumed to consist of $K$
components where each component follows a parametric distribution.
Each component has a weight assigned which indicates the a-priori
probability for an observation to come from this component and the
mixture distribution is given by the weighted sum over the $K$
components. If the weights depend on further variables, these are
referred to as concomitant variables.
In marketing choice behaviour is often modelled in dependence of
marketing mix variables such as price, promotion and display. Under
the assumption that groups of respondents with different price,
promotion and display elasticities exist mixtures of regressions are
fitted to model consumer heterogeneity and segment the market.
Socio-demographic variables such as age and gender have often been
shown to be related to the different market segments even though they
generally do not perform well when used to a-priori segment the
market. The relationships between the behavioural and the
socio-demographic variables is then modelled through concomitant
variable models where the group sizes (i.e.~the weights of the
mixture) depend on the socio-demographic variables.
The model class is given by
\begin{align*}
h(y|x, w, \psi) &= \sum_{k = 1}^K
\pi_k(w, \alpha) f_k(y|x,\theta_{k})\\
&= \sum_{k = 1}^K \pi_k(w, \alpha) \prod_{d=1}^D
f_{kd}(y_d|x_d,\theta_{kd}),
\end{align*}
where $\psi$ denotes the vector of all parameters for the mixture
density $h()$ and is given by $(\alpha, (\theta_k)_{k=1,\ldots,K})$.
$y$ denotes the response, $x$ the predictor and $w$ the concomitant
variables. $f_k$ is the component specific density function.
Multivariate variables $y$ are assumed to be dividable into $D$
subsets where the component densities are independent between the
subsets, i.e.~the component density $f_k$ is given by a product over
$D$ densities which are defined for the subset variables $y_d$ and
$x_d$ for $d=1,\ldots,D$. The component specific parameters are given
by $\theta_k = (\theta_{kd})_{d=1,\ldots,D}$. Under the assumption
that $N$ observations are available the dimensions of the variables
are given by $y = (y_d)_{d=1,\ldots,D} \in \mathbb{R}^{N \times
\sum_{d=1}^D k_{yd}}$, $x = (x_d)_{d=1,\ldots,D} \in \mathbb{R}^{N
\times \sum_{d=1}^D k_{xd}}$ for all $d = 1,\ldots,D$ and $w \in
\mathbb{R}^{N \times k_w}$. In this notation $k_{yd}$ denotes the
dimension of the $d^{\textrm{th}}$ response, $k_{xd}$ the dimension of
the $d^{\textrm{th}}$ predictors and $k_w$ the dimension of the
concomitant variables. For mixtures of GLMs each of the $d$ responses
will in general be univariate, i.e.~multivariate responses will be
conditionally independent given the segment memberships.
For the component weights $\pi_k$ it holds $\forall w$ that
\begin{equation}\label{eq:prior}
\sum_{k=1}^K \pi_k(w,\alpha) = 1 \quad \textrm{and} \quad
\pi_k(w, \alpha) > 0, \, \forall k,
\end{equation}
where $\alpha$ are the parameters of the concomitant variable model.
For the moment focus is given to finite mixtures where the component
specific densities are from the same parametric family, i.e.~$f_{kd}
\equiv f_d$ for notational simplicity. If $f_d$ is from the
exponential family of distributions and for each component a
generalized linear model is fitted
\citep[GLMs;][]{mixtures:McCullagh+Nelder:1989} these models are also
called GLIMMIX models \citep{mixtures:Wedel+DeSarbo:1995}. In this
case the component specific parameters are given by $\theta_{kd} =
(\beta'_{kd}, \phi_{kd})$ where $\beta_{kd}$ are the regression
coefficients and $\phi_{kd}$ is the dispersion parameter.
The component specific parameters $\theta_{kd}$ are either restricted
to be equal over all components, to vary between groups of components
or to vary between all components. The varying between groups is
referred to as varying parameters with one level of nesting. A disjoint
partition $K_c$, $c = 1,\ldots,C$ of the set $\tilde{K} :=
\{1\ldots,K\}$ is defined for the regression coefficients. $C$ is the
number of groups of the regression coefficients at the nesting level.
The regression coefficients are accordingly split into three groups:
\begin{align*}
\beta_{kd} &= (\beta'_{1d}, \beta'_{2,c(k)d},
\beta'_{3,kd})',
\end{align*}
where $c(k) = \{c = 1,\ldots, C: k \in K_c\}$.
Similar a disjoint partition $K_v$, $v = 1,\ldots,V$, of $\tilde{K}$
can be defined for the dispersion parameters if nested varying
parameters are present. $V$ denotes the number of groups of the
dispersion parameters at the nesting level. This gives:
\begin{align*}
\phi_{kd} &= \left\{\begin{array}{ll}
\phi_{d} & \textrm{for constant parameters}\\
\phi_{kd} & \textrm{for varying parameters}\\
\phi_{v(k)d} & \textrm{for nested varying parameters}
\end{array}\right.
\end{align*}
where $v(k) = \{v = 1,\ldots,V: k \in K_v\}$. The nesting structure of
the component specific parameters is also described in
\cite{mixtures:Gruen+Leisch:2006}.
Different concomitant variable models are possible to determine the
component weights \citep{mixtures:Dayton+Macready:1988}. The mapping
function only has to fulfill condition \eqref{eq:prior}. In the
following a multinomial logit model is assumed for the $\pi_k$ given
by
\begin{equation*}
\pi_k(w,\alpha) =
\frac{e^{w'\alpha_k}}{\sum_{u = 1}^K
e^{w'\alpha_u}} \quad \forall k,
\end{equation*}
with $\alpha = (\alpha'_k)'_{k=1,\ldots,K}$ and $\alpha_1 \equiv 0$.
%%-------------------------------------------------------------------------
\subsection{Parameter estimation}\label{sec:estimation}
The EM algorithm \citep{mixtures:Dempster+Laird+Rubin:1977} is the
most common method for maximum likelihood estimation of finite mixture
models where the number of components $K$ is fixed. The EM algorithm
applies a missing data augmentation scheme. It is assumed that a
latent variable $z_n \in \{0,1\}^K$ exists for each observation $n$
which indicates the component membership, i.e.~$z_{nk}$ equals 1 if
observation $n$ comes from component $k$ and 0 otherwise. Furthermore
it holds that $\sum_{k=1}^K z_{nk}=1$ for all $n$. In the EM algorithm
these unobserved component memberships $z_{nk}$ of the observations
are treated as missing values and the data is augmented by estimates
of the component membership, i.e.~the estimated a-posteriori
probabilities $\hat{p}_{nk}$. For a sample of $N$ observations
$\{(y_1, x_1, w_1), \ldots, (y_N, x_N, w_N)\}$ the EM algorithm is
given by:
\begin{description}
\item[E-step:] Given the current parameter estimates $\psi^{(i)}$ in
the $i$-th iteration, replace the missing data $z_{nk}$ by the
estimated a-posteriori probabilities
\begin{align*}
\hat{p}_{nk} & = \frac{\displaystyle
\pi_k(w_n, \alpha^{(i)}) f(y_n| x_n,
\theta_k^{(i)})
}{\displaystyle
\sum_{u = 1}^K \pi_u(w_n, \alpha^{(i)}) f(y_n |x_n,
\theta_u^{(i)})
}.
\end{align*}
\item[M-step:] Given the estimates for the a-posteriori probabilities
$\hat{p}_{nk}$ (which are functions of $\psi^{(i)}$), obtain new
estimates $\psi^{(i+1)}$ of the parameters by maximizing
\begin{align*}
Q(\psi^{(i+1)}|\psi^{(i)}) &= Q_1(\theta^{(i+1)} | \psi^{(i)}) +
Q_2(\alpha^{(i+1)} | \psi^{(i)}),
\end{align*}
where
\begin{align*}
Q_1(\theta^{(i+1)} | \psi^{(i)}) &= \sum_{n = 1}^N \sum_{k = 1}^K
\hat{p}_{nk} \log(f(y_n | x_n, \theta_k^{(i+1)}))
\end{align*}
and
\begin{align*}
Q_2(\alpha^{(i+1)}| \psi^{(i)}) &= \sum_{n = 1}^N
\sum_{k = 1}^K \hat{p}_{nk}
\log(\pi_k(w_n, \alpha^{(i+1)})).
\end{align*}
$Q_1$ and $Q_2$ can be maximized separately. The maximization of $Q_1$
gives new estimates $\theta^{(i+1)}$ and the maximization of $Q_2$
gives $\alpha^{(i+1)}$. $Q_1$ is maximized separately for each
$d=1,\ldots,D$ using weighted ML estimation of GLMs and $Q_2$ using
weighted ML estimation of multinomial logit models.
\end{description}
Different variants of the EM algorithm exist such as the stochastic EM
\citep[SEM;][]{mixtures:Diebolt+Ip:1996} or the classification EM
\citep[CEM;][]{mixtures:Celeux+Govaert:1992}. These two variants are
also implemented in package \pkg{flexmix}. For both variants an
additional step is made between the expectation and maximization
steps. This step uses the estimated a-posteriori probabilities and
assigns each observation to only one component, i.e.~classifies it
into one component. For SEM this assignment is determined in a
stochastic way while it is a deterministic assignment for CEM. For the
SEM algorithm the additional step is given by:
\begin{description}
\item[S-step:] Given the a-posteriori probabilities draw
\begin{align*}
\hat{z}_n &\sim \textrm{Mult}((\hat{p}_{nk})_{k=1,\ldots,K}, 1)
\end{align*}
where $\textrm{Mult}(\theta, T)$ denotes the multinomial distribution
with success probabilities $\theta$ and number of trials $T$.
\end{description}
Afterwards, the $\hat{z}_{nk}$ are used instead of the
$\hat{p}_{nk}$ in the M-step. For the CEM the additional step is given
by:
\begin{description}
\item[C-step:] Given the a-posteriori probabilities define
\begin{align*}
\hat{z}_{nk} &= \left\{\begin{array}{ll}
1&\textrm{if } k = \min\{ l : \hat{p}_{nl} \geq \hat{p}_{nk}\, \forall k=1,\ldots,K\}\\
0&\textrm{otherwise}.
\end{array}\right.
\end{align*}
\end{description}
Please note that in this step the observation is assigned to the
component with the smallest index if the same maximum a-posteriori
probability is observed for several components.
Both of these variants have been proposed to improve the performance
of the EM algorithm, because the ordinary EM algorithm tends to
converge rather slowly and only to a local optimum. The convergence
behavior can be expected to be better for the CEM than ordinary EM
algorithm, while SEM can escape convergence to a local optimum.
However, the CEM algorithm does not give ML estimates because it
maximizes the complete likelihood. For SEM good approximations of the
ML estimator are obtained if the parameters where the maximum
likelihood was encountered are used as estimates. Another possibility
for determining parameter estimates from the SEM algorithm could be
the mean after discarding a suitable number of burn-ins. An
implementational advantage of both variants is that no weighted
maximization is necessary in the M-step.
It has been shown that the values of the likelihood are monotonically
increased during the EM algorithm. On the one hand this ensures the
convergence of the EM algorithm if the likelihood is bounded, but on
the other hand only the detection of a local maximum can be
guaranteed. Therefore, it is recommended to repeat the EM algorithm
with different initializations and choose as final solution the one
with the maximum likelihood. Different initialization strategies for
the EM algorithm have been proposed, as its convergence to the optimal
solution depends on the initialization
\citep{mixtures:Biernacki+Celeux+Govaert:2003,mixtures:Karlis+Xekalaki:2003}.
Proposed strategies are for example to first make several runs of the
SEM or CEM algorithm with different random initializations and then
start the EM at the best solution encountered.
The component specific parameter estimates can be determined
separately for each $d=1,\ldots,D$. For simplicity of presentation the
following description assumes $D=1$. If all parameter estimates vary
between the component distributions they can be determined separately
for each component in the M-step. However, if also constant or nested
varying parameters are specified, the component specific estimation
problems are not independent from each other any more. Parameters have
to be estimated which occur in several or all components and hence,
the parameters of the different components have to be determined
simultaneously for all components. The estimation problem for all
component specific parameters is then obtained by replicating the
vector of observations $y = (y_n)_{n=1,\ldots,N}$ $K$ times and
defining the covariate matrix $X = (X_{\textrm{constant}},
X_{\textrm{nested}}, X_{\textrm{varying}})$ by
\begin{align*}
&X_{\textrm{constant}} = \mathbf{1}_K \otimes (x'_{1,n})_{n=1,\ldots,N}\\
&X_{\textrm{nested}} = \mathbf{J} \odot (x'_{2,n})_{n=1,\ldots,N}\\
&X_{\textrm{varying}} = \mathbf{I}_K \otimes(x'_{3,n})_{n=1,\ldots,N},
\end{align*}
where $\mathbf{1}_K$ is a vector of 1s of length $K$, $\mathbf{J}$ is
the incidence matrix for each component $k=1,\ldots,K$ and each
nesting group $c \in C$ and hence is of dimension $K \times |C|$, and
$\mathbf{I}_K$ is the identity matrix of dimension $K \times K$.
$\otimes$ denotes the Kronecker product and $\odot$ the Khatri-Rao
product (i.e., the column-wise Kronecker product). $x_{m,n}$ are the
covariates of the corresponding coefficients $\beta_{m,.}$ for
$m=1,2,3$. Please note that the weights used for the estimation are
the a-posteriori probabilities which are stacked for all components,
i.e.~a vector of length $N K$ is obtained.
Due to the replication of data in the case of constant or nested
varying parameters the amount of memory needed for fitting the mixture
model to large datasets is substantially increased and it might be
easier to fit only varying coefficients to these datasets. To
overcome this problem it could be considered to implement special data
structures in order to avoid storing the same data multiple times for
large datasets.
Before each M-step the average component sizes (over the given data
points) are checked and components which are smaller than a given
(relative) minimum size are omitted in order to avoid too small
components where fitting problems might arise. This strategy has
already been recommended for the SEM algorithm
\citep{mixtures:Celeux+Diebolt:1988} because it allows to determine
the suitable number of components in an automatic way given that the
a-priori specified number of components is large enough. This
recommendation is based on the assumption that the redundent
components will be omitted during the estimation process if the
algorithm is started with too many components. If omission of small
components is not desired the minimum size required can be set to
zero. All components will be then retained throughout the EM algorithm
and a mixture with the number of components specified in the
initialization will be returned. The algorithm is stopped if the
relative change in the log-likelihood is smaller than a pre-specified
$\epsilon$ or the maximum number of iterations is reached.
For model selection different information criteria are available: AIC,
BIC and ICL \citep[Integrated Complete
Likelihood;][]{mixtures:Biernacki+Celeux+Govaert:2000}. They are of
the form twice the negative loglikelihood plus number of parameters
times $k$ where $k=2$ for the AIC and $k$ equals the logarithm of the
number of observations for the BIC. The ICL is the same as the BIC
except that the complete likelihood (where the missing class
memberships are replaced by the assignments induced by the maximum
a-posteriori probabilities) instead of the likelihood is used.
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\section{Using the new functionality}
\label{sec:using-new-funct}
In the following model fitting and model selection in \proglang{R} is
illustrated on several examples including mixtures of Gaussian,
binomial and Poisson regression models, see also
\cite{mixtures:Gruen:2006} and \cite{mixtures:Gruen+Leisch:2007a}.
More examples for mixtures of GLMs are provided as part of the
software package through a collection of artificial and real world
datasets, most of which have been previously used in the literature
(see references in the online help pages). Each dataset can be loaded
to \proglang{R} with \code{data("}\textit{name}\code{")} and the fitting of the proposed
models can be replayed using \code{example("}\textit{name}\code{")}. Further details on
these examples are given in a user guide which can be accessed using
\code{vignette("regression-examples", package="flexmix")} from within
\proglang{R}.
%%-----------------------------------------------------------------------
\subsection{Artificial example}\label{sec:artificial-example}
In the following the artificial dataset \code{NPreg} is used which
has already been used in \cite{mixtures:Leisch:2004} to illustrate the
application of package \pkg{flexmix}. The data comes from two latent
classes of size \Sexpr{nrow(NPreg)/2} each and for each of the classes
the data is drawn with respect to the following structure:
\begin{center}
\begin{tabular}{ll}
Class~1: & $ \mathit{yn} = 5x+\epsilon$\\
Class~2: & $ \mathit{yn} = 15+10x-x^2+\epsilon$
\end{tabular}
\end{center}
with $\epsilon\sim N(0,9)$, see the left panel of
Figure~\ref{fig:npreg}. The dataset \code{NPreg} also includes a
response $\mathit{yp}$ which is given by a generalized linear model following a
Poisson distribution and using the logarithm as link function. The
parameters of the mean are given for the two classes by:
\begin{center}
\begin{tabular}{ll}
Class~1: & $ \mu_1 = 2 - 0.2x$\\
Class~2: & $ \mu_2 = 1 + 0.1x$.
\end{tabular}
\end{center}
This signifies that given $x$ the response $\mathit{yp}$ in group $k$ follows a
Poisson distribution with mean $e^{\mu_k}$, see the right panel of
Figure~\ref{fig:npreg}.
\setkeys{Gin}{width=\textwidth}
\begin{figure}
\centering
<>=
par(mfrow=c(1,2))
plot(yn~x, col=class, pch=class, data=NPreg)
plot(yp~x, col=class, pch=class, data=NPreg)
@
\caption{Standard regression example (left) and Poisson regression (right).}
\label{fig:npreg}
\end{figure}
This model can be fitted in \proglang{R} using the commands:
<<>>=
suppressWarnings(RNGversion("3.5.0"))
set.seed(1802)
library("flexmix")
data("NPreg", package = "flexmix")
Model_n <- FLXMRglm(yn ~ . + I(x^2))
Model_p <- FLXMRglm(yp ~ ., family = "poisson")
m1 <- flexmix(. ~ x, data = NPreg, k = 2, model = list(Model_n, Model_p),
control = list(verbose = 10))
@
If the dimensions are independent the component specific model for
multivariate observations can be specified as a list of models for
each dimension.
The estimation can be controlled with the \code{control} argument
which is specified with an object of class \code{"FLXcontrol"}. For
convenience also a named list can be provided which is used to
construct and set the respective slots of the \code{"FLXcontrol"}
object. Elements of the control object are \code{classify} to select
ordinary EM, CEM or SEM, \code{minprior} for the minimum relative size
of components, \code{iter.max} for the maximum number of iterations
and \code{verbose} for monitoring. If \code{verbose} is a positive
integer the log-likelihood is reported every \code{verbose} iterations
and at convergence together with the number of iterations made. The
default is to not report any log-likelihood information during the
fitting process.
The estimated model \code{m1} is of class \code{"flexmix"} and the
result of the default plot method for this class is given in
Figure~\ref{fig:root1}. This plot method uses package \pkg{lattice}
\citep{mixtures:Sarkar:2008} and the usual parameters can be specified
to alter the plot, e.g.~the argument \code{layout} determines the
arrangement of the panels. The returned object is of class
\code{"trellis"} and the plotting can also be influenced by the
arguments of its show method.
The default plot prints rootograms (i.e., a histogram of the square
root of counts) of the a-posteriori probabilities of each observation
separately for each component. For each component the observations
with a-posteriori probabilities less than a pre-specified $\epsilon$
(default is $10^{-4}$) for this component are omitted in order to
avoid that the bar at zero dominates the plot
\citep{mixtures:Leisch:2004a}. Please note that the labels of the
y-axis report the number of observations in each bar, i.e.~the squared
values used for the rootograms.
\begin{figure}
\centering
<>=
print(plot(m1))
@
\caption{The plot method for \code{"flexmix"} objects, here obtained
by \code{plot(m1)}, shows rootograms of the posterior class
probabilities.}
\label{fig:root1}
\end{figure}
More detailed information on the estimated parameters with respect to
standard deviations and significance tests can be obtained with
function \code{refit()}. This function determines the
variance-covariance matrix of the estimated parameters by using the
inverted negative Hesse matrix as computed by the general purpose
optimizer \code{optim()} on the full likelihood of the
model. \code{optim()} is initialized in the solution obtained with the
EM algorithm. For mixtures of GLMs we also implemented the gradient,
which speeds up convergence and gives more precise estimates of the
Hessian.
Naturally, function \code{refit()} will also work for models which
have been determined by applying some model selection strategy
depending on the data (AIC, BIC, \ldots). The same caution is
necessary as when using \code{summary()} on standard linear models
selected using \code{step()}: The p-values shown are not correct
because they have not been adjusted for the fact that the same data
are used to select the model and compute the p-values. So use them
only in an exploratory manner in this context, see also
\cite{mixtures:Harrell:2001} for more details on the general problem.
The returned object can be inspected using \code{summary()} with
arguments \code{which} to specify if information for the component
model or the concomitant variable model should be shown and
\code{model} to indicate for which dimension of the component models
this should be done. Selecting \code{model=1} gives the parameter
estimates for the dimension where the response variable follows a
Gaussian distribution.
<<>>=
m1.refit <- refit(m1)
summary(m1.refit, which = "model", model = 1)
@
\begin{figure}
\centering
<>=
print(plot(m1.refit, layout = c(1,3), bycluster = FALSE,
main = expression(paste(yn *tilde(" ")* x + x^2))),
split= c(1,1,2,1), more = TRUE)
print(plot(m1.refit, model = 2,
main = expression(paste(yp *tilde(" ")* x)),
layout = c(1,2), bycluster = FALSE),
split = c(2,1,2,1))
@
\caption{The default plot for refitted \code{"flexmix"} objects, here
obtained by \code{plot(refit(m1), model = 1)} and
\code{plot(refit(m1), model = 2)}, shows the coefficient estimates
and their confidence intervals.}
\label{fig:refit}
\end{figure}
The default plot method for the refitted \code{"flexmix"} object
depicts the estimated coefficients with corresponding confidence
intervals and is given in Figure~\ref{fig:refit}. It can be seen that
for the first model the confidence intervals of the coefficients of
the intercept and the quadratic term of \code{x} overlap with zero.
A model where these coefficients are set to zero can be estimated with
the model driver function \code{FLXMRglmfix()} and the following
commands for specifying the nesting structure. The argument
\code{nested} needs input for the number of components in each group
(given by \code{k}) and the formula which determines the model matrix
for the nesting (given by \code{formula}). This information can be
provided in a named list.
For the restricted model the element \code{k} is a vector with two 1s
because each of the components has different parameters. The formulas
specifying the model matrices of these coefficients are
\verb/~ 1 + I(x^2)/ for an intercept and a quadratic term of $x$
for component 1 and \code{~ 0} for no additional coefficients for
component 2. The EM algorithm is initialized in the previously fitted
model by passing the posterior probabilities in the argument
\code{cluster}.
<<>>=
Model_n2 <- FLXMRglmfix(yn ~ . + 0, nested = list(k = c(1, 1),
formula = c(~ 1 + I(x^2), ~ 0)))
m2 <- flexmix(. ~ x, data = NPreg, cluster = posterior(m1),
model = list(Model_n2, Model_p))
m2
@
Model selection based on the BIC would suggest the smaller model which
also corresponds to the true underlying model.
<<>>=
c(BIC(m1), BIC(m2))
@
%%-----------------------------------------------------------------------
\subsection{Beta-blockers dataset}
\label{sec:beta-blockers}
The dataset is analyzed in \cite{mixtures:Aitkin:1999,
mixtures:Aitkin:1999a} using a finite mixture of binomial regression
models. Furthermore, it is described in
\citet[p.~165]{mixtures:McLachlan+Peel:2000}. The dataset is from a
22-center clinical trial of beta-blockers for reducing mortality after
myocardial infarction. A two-level model is assumed to represent the
data, where centers are at the upper level and patients at the lower
level. The data is illustrated in Figure~\ref{fig:beta}.
First, the center information is ignored and a binomial logit
regression model with treatment as covariate is fitted using
\code{glm}, i.e.~$K=1$ and it is assumed that the different centers
are comparable:
<<>>=
data("betablocker", package = "flexmix")
betaGlm <- glm(cbind(Deaths, Total - Deaths) ~ Treatment,
family = "binomial", data = betablocker)
betaGlm
@
The residual deviance suggests that overdispersion is present in the
data. In the next step the intercept is allowed to follow a mixture
distribution given the centers. This signifies that the component
membership is fixed for each center. This grouping is specified in
\proglang{R} by adding \code{| Center} to the formula similar to the
notation used in \pkg{nlme} \citep{mixtures:Pinheiro+Bates:2000}.
Under the assumption of homogeneity within centers identifiability of
the model class can be ensured as induced by the sufficient conditions
for identifability given in \cite{mixtures:Follmann+Lambert:1991} for
binomial logit models with varying intercepts and
\cite{mixtures:Gruen+Leisch:2008} for multinomial logit models with
varying and constant parameters. In order to determine the suitable
number of components, the mixture is fitted with different numbers of
components.
<<>>=
betaMixFix <- stepFlexmix(cbind(Deaths, Total - Deaths) ~ 1 | Center,
model = FLXMRglmfix(family = "binomial", fixed = ~ Treatment),
k = 2:4, nrep = 5, data = betablocker)
@
The returned object is of class \code{"stepFlexmix"} and printing the
object gives the information on the number of iterations until
termination of the EM algorithm, a logical indicating if the EM
algorithm has converged, the log-likelihood and some model information
criteria. The plot method compares the fitted models using the
different model information criteria.
<<>>=
betaMixFix
@
A specific \code{"flexmix"} model contained in the
\code{"stepFlexmix"} object can be selected using \code{getModel()}
with argument \code{which} to specify the selection criterion. The
best model with respect to the BIC is selected with:
<<>>=
betaMixFix_3 <- getModel(betaMixFix, which = "BIC")
betaMixFix_3 <- relabel(betaMixFix_3, "model", "Intercept")
@
The components of the selected model are ordered with respect to the
estimated intercept values.
In this case a model with three components is selected with respect to
the BIC. The fitted values for the model with three components are
given in Figure~\ref{fig:beta} separately for each component and the
treatment and control groups.
The fitted parameters of the component specific models can be accessed
with:
<<>>=
parameters(betaMixFix_3)
@
Please note that the coefficients of variable \code{Treatment} are the
same for all three components.
\begin{figure}
\centering
<>=
library("grid")
betablocker$Center <- with(betablocker, factor(Center, levels = Center[order((Deaths/Total)[1:22])]))
clusters <- factor(clusters(betaMixFix_3), labels = paste("Cluster", 1:3))
print(dotplot(Deaths/Total ~ Center | clusters, groups = Treatment, as.table = TRUE,
data = betablocker, xlab = "Center", layout = c(3, 1),
scales = list(x = list(cex = 0.7, tck = c(1, 0))),
key = simpleKey(levels(betablocker$Treatment), lines = TRUE, corner = c(1,0))))
betaMixFix.fitted <- fitted(betaMixFix_3)
for (i in 1:3) {
seekViewport(trellis.vpname("panel", i, 1))
grid.lines(unit(1:22, "native"), unit(betaMixFix.fitted[1:22, i], "native"), gp = gpar(lty = 1))
grid.lines(unit(1:22, "native"), unit(betaMixFix.fitted[23:44, i], "native"), gp = gpar(lty = 2))
}
@
\setkeys{Gin}{width=0.8\textwidth}
\caption{Relative number of deaths for the treatment and the control
group for each center in the beta-blocker dataset. The centers are
sorted by the relative number of deaths in the control group. The
lines indicate the fitted values for each component of the
3-component mixture model with varying intercept and
constant parameters for treatment.}
\label{fig:beta}
\end{figure}
The variable \code{Treatment} can also be included in the varying part
of the model. This signifies that a mixture distribution is assumed
where for each component different values are allowed for the
intercept and the treatment coefficient. This mixture distribution can
be specified using function \code{FLXMRglm()}. Again it is assumed
that the heterogeneity is only between centers and therefore the
aggregated data for each center can be used.
<<>>=
betaMix <- stepFlexmix(cbind(Deaths, Total - Deaths) ~ Treatment | Center,
model = FLXMRglm(family = "binomial"), k = 3, nrep = 5,
data = betablocker)
betaMix <- relabel(betaMix, "model", "Treatment")
parameters(betaMix)
c(BIC(betaMixFix_3), BIC(betaMix))
@
The difference between model \code{betaMix} and \code{betaMixFix\_3} is
that the treatment coefficients are the same for all three components
for \code{betaMixFix\_3} while they have different values for
\code{betaMix} which can easily be seen when comparing the fitted
component specific parameters. The larger model \code{betaMix} which
also allows varying parameters for treatment has a higher BIC and
therefore the smaller model \code{betaMixFix\_3} would be preferred.
The default plot for \code{"flexmix"} objects gives a rootogram of the
posterior probabilities for each component. Argument \code{mark} can
be used to inspect with which components the specified component
overlaps as all observations are coloured in the different panels
which are assigned to this component based on the maximum a-posteriori
probabilities.
\begin{figure}
\centering
<>=
print(plot(betaMixFix_3, nint = 10, mark = 1, col = "grey", layout = c(3, 1)))
@
\caption{Default plot of \code{"flexmix"} objects where the
observations assigned to the first component are
marked.}\label{fig:default}
\end{figure}
\begin{figure}
\centering
<>=
print(plot(betaMixFix_3, nint = 10, mark = 2, col = "grey", layout = c(3, 1)))
@
\caption{Default plot of \code{"flexmix"} objects where the observations
assigned to the third component are marked.}\label{fig:default-2}
\end{figure}
The rootogram indicates that the components are well separated. In
Figure~\ref{fig:default} it can be seen that component 1 is completely
separated from the other two components, while
Figure~\ref{fig:default-2} shows that component 2 has a slight overlap
with both other components.
The cluster assignments using the maximum a-posteriori probabilities
are obtained with:
<<>>=
table(clusters(betaMix))
@
The estimated probabilities of death for each component for the
treated patients and those in the control group can be obtained with:
<<>>=
predict(betaMix,
newdata = data.frame(Treatment = c("Control", "Treated")))
@
or by obtaining the fitted values for two observations (e.g.~rows 1
and 23) with the desired levels of the predictor \code{Treatment}
<<>>=
betablocker[c(1, 23), ]
fitted(betaMix)[c(1, 23), ]
@
A further analysis of the model is possible with function
\code{refit()} which returns the estimated coefficients together with
the standard deviations, z-values and corresponding p-values. Please
note that the p-values are only approximate in the sense that they
have not been corrected for the fact that the data has already been
used to determine the specific fitted model.
<<>>=
summary(refit(betaMix))
@
Given the estimated treatment coefficients we now also compare this
model to a model where the treatment coefficient is assumed to be the
same for components 1 and 2. Such a model is specified using the model
driver \code{FLXMRglmfix()}. As the first two components are assumed
to have the same coeffcients for treatment and for the third component
the coefficient for treatment shall be set to zero the argument
\code{nested} has \code{k = c(2,1)} and \code{formula =
c(\~{}Treatment, \~{})}.
<<>>=
ModelNested <- FLXMRglmfix(family = "binomial", nested = list(k = c(2, 1),
formula = c(~ Treatment, ~ 0)))
betaMixNested <- flexmix(cbind(Deaths, Total - Deaths) ~ 1 | Center,
model = ModelNested, k = 3, data = betablocker,
cluster = posterior(betaMix))
parameters(betaMixNested)
c(BIC(betaMix), BIC(betaMixNested), BIC(betaMixFix_3))
@
The comparison of the BIC values suggests that the nested model with
the same treatment effect for two components and no treatment effect
for the third component is the best.
%%-----------------------------------------------------------------------
\subsection[Productivity of Ph.D. students in biochemistry]{Productivity of Ph.D.~students in biochemistry}
\label{sec:bioChemists}
<>=
data("bioChemists", package = "flexmix")
@
This dataset is taken from \cite{mixtures:Long:1990}. It contains
\Sexpr{nrow(bioChemists)} observations from academics who obtained
their Ph.D.~degree in biochemistry in the 1950s and 60s. It includes
\Sexpr{sum(bioChemists$fem=="Women")} women and
\Sexpr{sum(bioChemists$fem=="Men")} men. The productivity was measured
by counting the number of publications in scientific journals during
the three years period ending the year after the Ph.D.~was received.
In addition data on the productivity and the prestige of the mentor
and the Ph.D.~department was collected. Two measures of family
characteristics were recorded: marriage status and number of children
of age 5 and lower by the year of the Ph.D.
First, mixtures with one, two and three components and only varying
parameters are fitted, and the model minimizing the BIC is selected.
This is based on the assumption that unobserved heterogeneity is
present in the data due to latent differences between the students in
order to be productive and achieve publications. Starting with the
most general model to determine the number of components using
information criteria and checking for possible model restrictions
after having the number of components fixed is a common strategy in
finite mixture modelling
\citep[see][]{mixtures:Wang+Puterman+Cockburn:1996}. Function
\code{refit()} is used to determine confidence intervals for the
parameters in order to choose suitable alternative models. However, it
has to be noted that in the course of the procedure these confidence
intervals will not be correct any more because the specific fitted
models have already been determined using the same data.
<<>>=
data("bioChemists", package = "flexmix")
Model1 <- FLXMRglm(family = "poisson")
ff_1 <- stepFlexmix(art ~ ., data = bioChemists, k = 1:3, model = Model1)
ff_1 <- getModel(ff_1, "BIC")
@
The selected model has \Sexpr{ff_1@k} components. The estimated
coefficients of the components are given in
Figure~\ref{fig:coefficients-1} together with the corresponding 95\%
confidence intervals using the plot method for objects returned by
\code{refit()}. The plot shows that the confidence intervals of the
parameters for \code{kid5}, \code{mar}, \code{ment} and \code{phd}
overlap for the two components. In a next step a mixture with two
components is therefore fitted where only a varying intercept and a
varying coefficient for \code{fem} is specified and all other
coefficients are constant. The EM algorithm is initialized with the
fitted mixture model using \code{posterior()}.
\begin{figure}
\centering
<>=
print(plot(refit(ff_1), bycluster = FALSE,
scales = list(x = list(relation = "free"))))
@
\caption{Coefficient estimates and confidence intervals for the model
with only varying parameters.}\label{fig:coefficients-1}
\end{figure}
<<>>=
Model2 <- FLXMRglmfix(family = "poisson", fixed = ~ kid5 + mar + ment)
ff_2 <- flexmix(art ~ fem + phd, data = bioChemists,
cluster = posterior(ff_1), model = Model2)
c(BIC(ff_1), BIC(ff_2))
@
If the BIC is used for model comparison the smaller model including
only varying coefficients for the intercept and \code{fem} is
preferred. The coefficients of the fitted model can be obtained using
\code{refit()}:
<<>>=
summary(refit(ff_2))
@
It can be seen that the coefficient of \code{phd} does for both
components not differ significantly from zero and might be omitted.
This again improves the BIC.
<<>>=
Model3 <- FLXMRglmfix(family = "poisson", fixed = ~ kid5 + mar + ment)
ff_3 <- flexmix(art ~ fem, data = bioChemists, cluster = posterior(ff_2),
model = Model3)
c(BIC(ff_2), BIC(ff_3))
@
The coefficients of the restricted model without \code{phd} are given
in Figure~\ref{fig:coefficients-2}.
\begin{figure}[t]
\centering
<>=
print(plot(refit(ff_3), bycluster = FALSE, scales = list(x = list(relation = "free"))))
@
\caption{Coefficient estimates and confidence intervals for the model
with varying and constant parameters where the variable \code{phd}
is not used in the regression.}\label{fig:coefficients-2}
\end{figure}
An alternative model would be to assume that gender does not directly
influence the number of articles but has an impact on the segment
sizes.
<<>>=
Model4 <- FLXMRglmfix(family = "poisson", fixed = ~ kid5 + mar + ment)
ff_4 <- flexmix(art ~ 1, data = bioChemists, cluster = posterior(ff_2),
concomitant = FLXPmultinom(~ fem), model = Model4)
parameters(ff_4)
summary(refit(ff_4), which = "concomitant")
BIC(ff_4)
@
This suggests that the proportion of women is lower in the second
component which is the more productive segment.
The alternative modelling strategy where homogeneity is assumed at the
beginning and a varying interept is added if overdispersion is
observed leads to the following model which is the best with respect
to the BIC.
<<>>=
Model5 <- FLXMRglmfix(family = "poisson", fixed = ~ kid5 + ment + fem)
ff_5 <- flexmix(art ~ 1, data = bioChemists, cluster = posterior(ff_2),
model = Model5)
BIC(ff_5)
@
\begin{figure}
\centering
\setkeys{Gin}{width=0.8\textwidth}
<>=
pp <- predict(ff_5, newdata = data.frame(kid5 = 0,
mar = factor("Married", levels = c("Single", "Married")),
fem = c("Men", "Women"), ment = mean(bioChemists$ment)))
matplot(0:12, sapply(unlist(pp), function(x) dpois(0:12, x)),
type = "b", lty = 1, xlab = "Number of articles", ylab = "Probability")
legend("topright", paste("Comp.", rep(1:2, each = 2), ":",
c("Men", "Women")), lty = 1, col = 1:4, pch = paste(1:4), bty = "n")
@
\caption{The estimated productivity for each compoment for men and women.}
\label{fig:estimated}
\end{figure}
\setkeys{Gin}{width=0.98\textwidth}
In Figure~\ref{fig:estimated} the estimated distribution of
productivity for model \code{ff\_5} are given separately for men and
women as well as for each component where for all other variables the
mean values are used for the numeric variables and the most frequent
category for the categorical variables. The two components differ in
that component 1 contains the students who publish no article or only
a single article, while the students in component 2 write on average
several articles. With a constant coefficient for gender women publish
less articles than men in both components.
This example shows that different optimal models are chosen for
different modelling procedures. However, the distributions induced by
the different variants of the model class may be similar and therefore
it is not suprising that they then will have similar BIC values.
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\section{Implementation}\label{sec:implementation}
The new features extend the available model class described in
\cite{mixtures:Leisch:2004} by providing infrastructure for
concomitant variable models and for fitting mixtures of GLMs with
varying and constant parameters for the component specific parameters.
The implementation of the extensions of the model class made it
necessary to define a better class structure for the component
specific models and to modify the fit functions \code{flexmix()} and
\code{FLXfit()}.
An overview on the \proglang{S}4 class structure of the package is
given in Figure~\ref{fig:class structure}. There is a class for
unfitted finite mixture distributions given by \code{"FLXdist"} which
contains a list of \code{"FLXM"} objects which determine the component
specific models, a list of \code{"FLXcomponent"} objects which specify
functions to determine the component specific log-likelihoods and
predictions and which contain the component specific parameters, and
an object of class \code{"FLXP"} which specifies the concomitant
variable model. Class \code{"flexmix"} extends \code{"FLXdist"}. It
represents a fitted finite mixture distribution and it contains the
information about the fitting with the EM algorithm in the object of
class \code{"FLXcontrol"}. Repeated fitting with the EM algorithm with
different number of components is provided by function
\code{stepFlexmix()} which returns an object of class
\code{"stepFlexmix"}. Objects of class \code{"stepFlexmix"} contain
the list of the fitted mixture models for each number of components in
the slot \code{"models"}.
\setkeys{Gin}{width=.9\textwidth}
\begin{figure}[t]
\centering
\includegraphics{flexmix}
\caption{UML class diagram \citep[see][]{mixtures:Fowler:2004} of the
\pkg{flexmix} package.}
\label{fig:class structure}
\end{figure}
\setkeys{Gin}{width=\textwidth}
For the component specific model a virtual class \code{"FLXM"} is
introduced which (currently) has two subclasses: \code{"FLXMC"} for
model-based clustering and \code{"FLXMR"} for clusterwise regression,
where predictor variables are given. Additional slots have been
introduced to allow for data preprocessing and the construction of the
components was separated from the fit and is implemented using lexical
scoping \citep{mixtures:Gentleman+Ihaka:2000} in the slot
\code{defineComponent}. \code{"FLXMC"} has an additional slot
\code{dist} to specify the name of the distribution of the variable.
In the future functionality shall be provided for sampling from a
fitted or unfitted finite mixture. Using this slot observations can be
generated by using the function which results from adding an \code{r}
at the beginnning of the distribution name. This allows to only
implement the (missing) random number generator functions and
otherwise use the same method for sampling from mixtures with
component specific models of class \code{"FLXMC"}.
For \code{flexmix()} and \code{FLXfit()} code blocks which are model
dependent have been identified and different methods implemented.
Finite mixtures of regressions with varying, nested and constant
parameters were a suitable model class for this identification task as
they are different from models previously implemented. The main
differences are:
\begin{itemize}
\item The number of components is related to the component specific
model and the omission of small components during the EM algorithm
impacts on the model.
\item The parameters of the component specific models can not be
determined separately in the M-step and a joint model matrix is
needed.
\end{itemize}
This makes it also necessary to have different model dependent methods
for \code{fitted()} which extracts the fitted values from a
\code{"flexmix"} object, \code{predict()} which predicts new values
for a \code{"flexmix"} object and \code{refit()} which refits an
estimated model to obtain additional information for a
\code{"flexmix"} object.
%%-----------------------------------------------------------------------
\subsection{Component specific models with varying and constant
parameters}\label{sec:comp-models-with}
A new M-step driver is provided which fits finite mixtures of GLMs
with constant and nested varying parameters for the coefficients and
the dispersion parameters. The class \code{"FLXMRglmfix"} returned by
the driver \code{FLXMRglmfix()} has the following additional slots
with respect to \code{"FLXMRglm"}:
\begin{description}
\item[\code{design}:] An incidence matrix indicating which columns of
the model matrix are used for which component,
i.e.~$\mathbf{D}=(\mathbf{1}_K,\mathbf{J}, \mathbf{I}_K)$.
\item[\code{nestedformula}:] An object of class \code{"FLXnested"}
containing the formula for the nested regression coefficients and
the number of components in each $K_c$, $c \in C$.
\item[\code{fixed}:] The formula for the constant regression
coefficients.
\item[\code{variance}:] A logical indicating if different variances
shall be estimated for the components following a Gaussian
distribution or a vector specifying the nested structure for
estimating these variances.
\end{description}
The difference between estimating finite mixtures including only
varying parameters using models specified with \code{FLXMRglm()} and
those with varying and constant parameters using function
\code{FLXMRglmfix()} is hidden from the user, as only the specified
model is different. The fitted model is also of class \code{"flexmix"}
and can be analyzed using the same functions as for any model fitted
using package \pkg{flexmix}. The methods used are the same except if
the slot containing the model is accessed and method dispatching is
made via the model class. New methods are provided for models of class
\code{"FLXMRglmfix"} for functions \code{refit()}, \code{fitted()} and
\code{predict()} which can be used for analyzing the fitted model.
The implementation allows repeated measurements by specifying a
grouping variable in the formula argument of \code{flexmix()}.
Furthermore, it has to be noticed that the model matrix is determined
by updating the formula of the varying parameters successively with
the formula of the constant and then of the nested varying parameters.
This ensures that if a mixture distribution is fitted for the
intercept, the model matrix of a categorical variable includes only
the remaining columns for the constant parameters to have full column
rank. However, this updating scheme makes it impossible to estimate a
constant intercept while allowing varying parameters for a categorical
variable.
For this model one big model matrix is constructed where the
observations are repeated $K$ times and suitable columns of zero
added. The coefficients of all $K$ components are determined
simultaneously in the M-step, while if only varying parameters are
specified the maximization of the likelihood is made separately for
all components. For large datasets the estimation of a combination of
constant and varying parameters might therefore be more challenging
than only varying parameters.
%% -----------------------------------------------------------------------
\subsection{Concomitant variable models}\label{sec:conc-vari-models}
For representing concomitant variable models the class \code{"FLXP"}
is defined. It specifies how the concomitant variable model is fitted
using the concomitant variable model matrix as predictor variables and
the current a-posteriori probability estimates as response variables.
The object has the following slots:
\begin{description}
\item[\code{fit}:] A \code{function (x, y, ...)} returning the fitted
values for the component weights during the EM algorithm.
\item[\code{refit}:] A \code{function (x, y, ...)} used for refitting
the model.
\item[\code{df}:] A \code{function (x, k, ...)} returning the degrees
of freedom used for estimating the concomitant variable model given
the model matrix \code{x} and the number of components \code{k}.
\item[\code{x}:] A matrix containing the model matrix of the
concomitant variables.
\item[\code{formula}:] The formula for determining the model matrix
\code{x}.
\item[\code{name}:] A character string describing the model, which is
only used for print output.
\end{description}
Two constructor functions for concomitant variable models are provided
at the moment. \code{FLXPconstant()} is for constant component weights
without concomitant variables and for multinomial logit models
\code{FLXPmultinom()} can be used. \code{FLXPmultinom()} has its own
class \code{"FLXPmultinom"} which extends \code{"FLXP"} and has an
additional slot \code{coef} for the fitted coefficients. The
multinomial logit models are fitted using package \pkg{nnet}
\citep{mixtures:Venables+Ripley:2002}.
%%-----------------------------------------------------------------------
\subsection{Further changes}
The estimation of the model with the EM algorithm was improved by
adapting the variants to correspond to the CEM and SEM variants as
outlined in the literature. To make this more explicit it is now also
possible to use \code{"CEM"} or \code{"SEM"} to specify an EM variant
in the \code{classify} argument of the \code{"FLXcontrol"}
object. Even though the SEM algorithm can in general not be expected
to converge the fitting procedure is also terminated for the SEM
algorithm if the change in the relative log-likelhood is smaller than
the pre-specified threshold. This is motivated by the fact that for
well separated clusters the posteriors might converge to an indicator
function with all weight concentrated in one component. The fitted
model with the maximum likelihood encountered during the SEM algorithm
is returned.
For discrete data in general multiple observations with the same
values are given in a dataset. A \code{weights} argument was added to
the fitting function \code{flexmix()} in order to avoid repeating
these observations in the provided dataset. The specification is
through a \code{formula} in order to allow selecting a column of the
data frame given in the \code{data} argument. The weights argument
allows to avoid replicating the same observations and hence enables
more efficient memory use in these applications. This possibitliy is
especially useful in the context of model-based clustering for
mixtures of Poisson distributions or latent class analysis with
multivariate binary observations.
In order to be able to apply different initialization strategies such
as for example first running several different random initializations
with CEM and then switching to ordinary EM using the best solution
found by CEM for initialization a \code{posterior()} function was
implemented. \code{posterior()} also takes a \code{newdata} argument
and hence, it is possible to apply subset strategies for large
datasets as suggested in
\cite{mixtures:Wehrens+Buydens+Fraley:2004}. The returned matrix of
the posterior probabilities can be used to specify the \code{cluster}
argument for \code{flexmix()} and the posteriors are then used as
weights in the first M-step.
The default plot methods now use trellis graphics as implemented in
package \pkg{lattice} \citep{mixtures:Sarkar:2008}. Users familiar
with the syntax of these graphics and with the plotting and printing
arguments will find the application intuitive as a lot of plotting
arguments are passed to functions from \pkg{lattice} as for example
\code{xyplot()} and \code{histogram()}. In fact only new panel,
pre-panel and group-panel functions were implemented. The returned
object is of class \code{"trellis"} and the show method for this class
is used to create the plot.
Function \code{refit()} was modified and has now two different
estimation methods: \code{"optim"} and \code{"mstep"}. The default
method \code{"optim"} determines the variance-covariance matrix of the
parameters from the inverse Hessian of the full log-likelihood. The
general purpose optimizer \code{optim()} is used to maximize the
log-likelihood and initialized in the solution obtained with the EM
algorithm. For mixtures of GLMs there are also functions implemented
to determine the gradient which can be used to speed up
convergence.
The second method \code{"mstep"} is only a raw approximation. It
performs an M-step where the a-posteriori probabilities are treated as
given instead of estimated and returns for the component specific
models nearly complete \code{"glm"} objects which can be further
analyzed. The advantage of this method is that the return value is
basically a list of standard \code{"glm"} objects, such that the
regular methods for this class can be used.
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\section{Writing your own drivers}\label{sec:writing-your-own}
Two examples are given in the following to demonstrate how new drivers
can be provided for concomitant variable models and for component
specific models. Easy extensibility is one of the main implementation
aims of the package and it can be seen that writing new drivers
requires only a few lines of code for providing the constructor
functions which include the fit functions.
%%-----------------------------------------------------------------------
\subsection{Component specific models: Zero-inflated
models}\label{sec:component-models}
\lstset{frame=trbl,basicstyle=\small\tt,stepnumber=5,numbers=left}
In Poisson or binomial regression models it can be often encountered
that the observed number of zeros is higher than expected. A mixture
with two components where one has mean zero can be used to model such
data. These models are also referred to as zero-inflated models
\citep[see for example][]{mixtures:Boehning+Dietz+Schlattmann:1999}.
A generalization of this model class would be to fit mixtures with
more than two components where one component has a mean fixed at
zero. So this model class is a special case of a mixture of
generalized linear models where (a) the family is restricted to
Poisson and binomial and (b) the parameters of one component are
fixed. For simplicity the implementation assumes that the component
with mean zero is the first component. In addition we assume that the
model matrix contains an intercept and to have the first component
absorbing the access zeros the coefficient of the intercept is set to
$-\infty$ and all other coefficients are set to zero.
Hence, to implement this model using package \pkg{flexmix} an
appropriate model class is needed with a corresponding convenience
function for construction. During the fitting of the EM algorithm
using \code{flexmix()} different methods for this model class are
needed when determining the model matrix (to check the presence of an
intercept), to check the model after a component is removed and for
the M-step to account for the fact that the coefficients of the first
component are fixed. For all other methods those available for
\code{"FLXMRglm"} can be re-used. The code is given in
Figure~\ref{fig:ziglm.R}.
\begin{figure}
\centering
\begin{minipage}{0.98\textwidth}
\lstinputlisting{ziglm.R}
\end{minipage}
\caption{Driver for a zero-inflated component specific model.}
\label{fig:ziglm.R}
\end{figure}
The model class \code{"FLXMRziglm"} is defined as extending
\code{"FLXMRglm"} in order to be able to inherit methods from this
model class. For construction of a \code{"FLXMRziglm"} class the
convenicence function \code{FLXMRziglm()} is used which calls
\code{FLXMRglm()}. The only differences are that the family is
restricted to binomial or Poisson, that a different name is assigned
and that an object of the correct class is returned.
The presence of the intercept in the model matrix is checked in
\code{FLXgetModelmatrix()} after using the method available for
\code{"FLXMRglm"} models as indicated by the call to
\code{callNextMethod()}. During the EM algorithm
\code{FLXremoveComponent()} is called if one component is removed. For
this model class it checks if the first component has been removed and
if this is the case the model class is changed to \code{"FLXMRglm"}.
In the M-step the coefficients of the first component are fixed and
not estimated, while for the remaining components the M-step of
\code{"FLXMRglm"} objects can be used. During the EM algorithm
\code{FLXmstep()} is called to perform the M-step and returns a list
of \code{"FLXcomponent"} objects with the fitted parameters. A new
method for this function is needed for \code{"FLXMRziglm"} objects in
order to account for the fixed coefficients in the first component,
i.e.~for the first component the \code{"FLXcomponent"} object is
constructed and concatenated with the list of \code{"FLXcomponent"}
objects returned by using the \code{FLXmstep()} method for
\code{"FLXMRglm"} models for the remaining components.
Similar modifications are necessary in order to be able to use
\code{refit()} for this model class. The code for implementing the
\code{refit()} method using \code{optim()} for \code{"FLXMRziglm"} is
not shown, but can be inspected in the source code of the package.
\subsubsection{Example: Using the driver}
This new M-step driver can be used to estimate a zero-inflated Poisson
model to the data given in
\cite{mixtures:Boehning+Dietz+Schlattmann:1999}. The dataset
\code{dmft} consists of count data from a dental epidemiological study
for evaluation of various programs for reducing caries collected among
school children from an urban area of Belo Horizonte (Brazil). The
variables included are the number of decayed, missing or filled teeth
(DMFT index) at the beginning and at the end of the observation
period, the gender, the ethnic background and the specific treatment
for \Sexpr{nrow(dmft)} children.
The model can be fitted with the new driver function using the
following commands:
<<>>=
data("dmft", package = "flexmix")
Model <- FLXMRziglm(family = "poisson")
Fitted <- flexmix(End ~ log(Begin + 0.5) + Gender + Ethnic + Treatment,
model = Model, k = 2 , data = dmft, control = list(minprior = 0.01))
summary(refit(Fitted))
@
Please note that \cite{mixtures:Boehning+Dietz+Schlattmann:1999} added
the predictor \code{log(Begin + 0.5)} to serve as an offset in order
to be able to analyse the improvement in the DMFT index from the
beginning to the end of the study. The linear predictor with the
offset subtracted is intended to be an estimate for
$\log(\mathbb{E}(\textrm{End})) - \log(\mathbb{E}(\textrm{Begin}))$.
This is justified by the fact that for a Poisson distributed variable
$Y$ with mean between 1 and 10 it holds that $\mathbb{E}(\log(Y +
0.5))$ is approximately equal to $\log(\mathbb{E}(Y))$.
$\log(\textrm{Begin} + 0.5)$ can therefore be seen as an estimate for
$\log(\mathbb{E}(\textrm{Begin}))$.
The estimated coefficients with corresponding confidence intervals are
given in Figure~\ref{fig:dmft}. As the coefficients of the first
component are restricted a-priori to minus infinity for the intercept
and to zero for the other variables, they are of no interest and only
the second component is plotted. The box ratio can be modified as for
\code{barchart()} in package \pkg{lattice}. The code to produce this
plot is given by:
<>=
print(plot(refit(Fitted), components = 2, box.ratio = 3))
@
\begin{figure}
\centering
\setkeys{Gin}{width=0.9\textwidth}
<>=
<>
@
\caption{The estimated coefficients of the zero-inflated model for the
\code{dmft} dataset. The first component is not plotted as this
component captures the inflated zeros and its coefficients are fixed
a-priori.}
\label{fig:dmft}
\end{figure}
%%-----------------------------------------------------------------------
\subsection{Concomitant variable models}\label{sec:concomitant-models}
If the concomitant variable is a categorical variable, the multinomial
logit model is equivalent to a model where the component weights for
each level of the concomitant variable are determined by the mean
values of the a-posteriori probabilities. The driver which implements
this \code{"FLXP"} model is given in Figure~\ref{fig:myConcomitant.R}.
A name for the driver has to be specified and a \code{fit()}
function. In the \code{fit()} function the mean posterior probability
for all observations with the same covariate points is determined,
assigned to the corresponding observations and the full new
a-posteriori probability matrix returned. By contrast \code{refit()}
only returns the new a-posteriori probability matrix for the number of
unique covariate points.
\lstset{frame=trbl,basicstyle=\small\tt,stepnumber=5,numbers=left}
\begin{figure}
\centering
\begin{minipage}{0.98\textwidth}
\lstinputlisting{myConcomitant.R}
\end{minipage}
\caption{Driver for a concomitant variable model where the component
weights are determined by averaging over the a-posteriori
probabilities for each level of the concomitant variable.}
\label{fig:myConcomitant.R}
\end{figure}
\subsubsection{Example: Using the driver}
If the concomitant variable model returned by \code{myConcomitant()}
is used for the artificial example in
Section~\ref{sec:using-new-funct} the same fitted model is returned as
if a multinomial logit model is specified. An advantage is that in
this case no problems occur if the fitted probabilities are close to
zero or one.
<>=
Concomitant <- FLXPmultinom(~ yb)
MyConcomitant <- myConcomitant(~ yb)
set.seed(1234)
m2 <- flexmix(. ~ x, data = NPreg, k = 2, model = list(Model_n, Model_p),
concomitant = Concomitant)
m3 <- flexmix(. ~ x, data = NPreg, k = 2, model = list(Model_n, Model_p),
cluster = posterior(m2), concomitant = MyConcomitant)
@
<<>>=
summary(m2)
summary(m3)
@
For comparing the estimated component weights for each value of $\mathit{yb}$
the following function can be used:
<<>>=
determinePrior <- function(object) {
object@concomitant@fit(object@concomitant@x,
posterior(object))[!duplicated(object@concomitant@x), ]
}
@
<<>>=
determinePrior(m2)
determinePrior(m3)
@
Obviously the fitted values of the two models correspond to each
other.
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\section{Summary and outlook}\label{sec:summary-outlook}
Package \pkg{flexmix} was extended to cover finite mixtures of GLMs
with (nested) varying and constant parameters. This allows for example the
estimation of varying intercept models. In order to be
able to characterize the components given some variables concomitant
variable models can be estimated for the component weights.
The implementation of these extensions have triggered some
modifications in the class structure and in the fit functions
\code{flexmix()} and \code{FLXfit()}. For certain steps, as e.g.~the
M-step, methods which depend on the component specific models are
defined in order to enable the estimation of finite mixtures of GLMs
with only varying parameters and those with (nested) varying and
constant parameters with the same fit function. The flexibility of
this modified implementation is demonstrated by illustrating how a
driver for zero-inflated models can be defined.
In the future diagnostic tools based on resampling methods shall be
implemented as bootstrap results can give valuable insights into the
model fit \citep{mixtures:Gruen+Leisch:2004}. A function which
conveniently allows to test linear hypotheses about the parameters
using the variance-covariance matrix returned by \code{refit()} would
be a further valuable diagnostic tool.
The implementation of zero-inflated Poisson and binomial regression
models are a first step towards relaxing the assumption that all
component specific distributions are from the same parametric family.
As mixtures with components which follow distributions from different
parametric families can be useful for example to model outliers
\citep{mixtures:Dasgupta+Raftery:1998,mixtures:Leisch:2008}, it is intended to also make
this functionality available in \pkg{flexmix} in the future.
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\section*{Computational details}
<>=
SI <- sessionInfo()
pkgs <- paste(sapply(c(SI$otherPkgs, SI$loadedOnly), function(x)
paste("\\\\pkg{", x$Package, "} ",
x$Version, sep = "")), collapse = ", ")
@
All computations and graphics in this paper have been done using
\proglang{R} version \Sexpr{getRversion()} with the packages
\Sexpr{pkgs}.
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\section*{Acknowledgments}
This research was supported by the the Austrian Science Foundation
(FWF) under grants P17382 and T351. Thanks also to Achim Zeileis for
helpful discussions on implementation details and an anonymous
referee for asking a good question about parameter significance which
initiated the new version of function \code{refit()}.
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\bibliography{mixture}
%%-----------------------------------------------------------------------
%%-----------------------------------------------------------------------
\end{document}
flexmix/vignettes/flexmix.bib 0000644 0001762 0000144 00000023352 14404637307 016105 0 ustar ligges users @STRING{jcgs = {Journal of Computational and Graphical Statistics} }
@STRING{tuwien = {Technische Universit{\"a}t Wien, Vienna, Austria} }
@STRING{jasa = {Journal of the American Statistical Association} }
@Article{ flexmix:Aitkin:1996,
author = {Murray Aitkin},
title = {A General Maximum Likelihood Analysis of
Overdispersion in Generalized Linear Models},
journal = {Statistics and Computing},
year = 1996,
volume = 6,
pages = {251--262}
}
@Article{ flexmix:Aitkin:1999,
author = {Murray Aitkin},
title = {A General Maximum Likelihood Analysis of Variance
Components in Generalized Linear Models},
journal = {Biometrics},
year = 1999,
volume = 55,
pages = {117--128}
}
@Article{ flexmix:Aitkin:1999a,
author = {Murray Aitkin},
title = {Meta-Analysis by Random Effect Modelling in
Generalized Linear Models},
journal = {Statistics in Medicine},
year = 1999,
volume = 18,
number = {17--18},
month = {September},
pages = {2343--2351}
}
@Manual{ flexmix:Buyske:2003,
title = {{R} Package \texttt{mmlcr}: Mixed-Mode Latent Class
Regression},
author = {Steve Buyske},
year = 2003,
note = {version 1.3.2},
url = {http://www.stat.rutgers.edu/~buyske/software.html}
}
@Book{ flexmix:Chambers:1998,
author = {John M. Chambers},
title = {Programming with Data: A Guide to the {S} Language},
publisher = {Springer Verlag},
year = 1998,
address = {Berlin, Germany}
}
@Article{ flexmix:DeSarbo+Cron:1988,
author = {Wayne S. DeSarbo and W. L. Cron},
title = {A Maximum Likelihood Methodology for Clusterwise
Linear Regression},
journal = {Journal of Classification},
year = 1988,
volume = 5,
pages = {249--282}
}
@Article{ flexmix:Dempster+Laird+Rubin:1977,
author = {A.P. Dempster and N.M. Laird and D.B. Rubin},
title = {Maximum Likelihood from Incomplete Data via the
{EM}-Alogrithm},
journal = {Journal of the Royal Statistical Society, B},
volume = 39,
pages = {1--38},
year = 1977
}
@Article{ flexmix:Diebolt+Robert:1994,
author = {J. Diebolt and C. P. Robert},
title = {Estimation of Finite Mixture Distributions Through
{B}ayesian Sampling},
journal = {Journal of the Royal Statistical Society, Series B},
year = 1994,
volume = 56,
pages = {363--375}
}
@Book{ flexmix:Everitt+Hand:1981,
author = {Brian S. Everitt and David J. Hand},
title = {Finite Mixture Distributions},
publisher = {Chapman and Hall},
address = {London},
year = 1981
}
@Article{ flexmix:Follmann+Lambert:1989,
author = {Dean A. Follmann and Diane Lambert},
title = {Generalizing Logistic Regression by Non-Parametric
Mixing},
journal = jasa,
volume = 84,
number = 405,
month = {March},
pages = {295--300},
year = 1989
}
@Article{ flexmix:Fraley+Raftery:2002,
author = {Chris Fraley and Adrian E. Raftery},
title = {Model-Based Clustering, Discriminant Analysis and
dDnsity Estimation},
journal = jasa,
year = 2002,
volume = 97,
pages = {611-631}
}
@TechReport{ flexmix:Fraley+Raftery:2002a,
author = {Chris Fraley and Adrian E. Raftery},
title = {{MCLUST}: Software for Model-Based Clustering,
Discriminant Analysis and Density Estimation},
institution = {Department of Statistics, University of Washington},
year = 2002,
number = 415,
address = {Seattle, WA, USA},
url = {http://www.stat.washington.edu/raftery}
}
@Article{ flexmix:Gentleman+Ihaka:2000,
author = {Robert Gentleman and Ross Ihaka},
title = {Lexical Scope and Statistical Computing},
journal = jcgs,
year = 2000,
volume = 9,
number = 3,
pages = {491--508},
keywords = {statistical computing, function closure, lexical
scope, random number generators}
}
@InProceedings{ flexmix:Gruen+Leisch:2006,
author = {Bettina Gr{\" u}n and Friedrich Leisch},
title = {Fitting Finite Mixtures of Linear Regression Models
with Varying \& Fixed Effects in \textsf{R}},
booktitle = {Compstat 2006---Proceedings in Computational
Statistics},
pages = {853--860},
editor = {Alfredo Rizzi and Maurizio Vichi},
publisher = {Physica Verlag},
address = {Heidelberg, Germany},
isbn = {3-7908-1708-2},
year = 2006
}
@InProceedings{ flexmix:Grun+Leisch:2004,
author = {Bettina Gr{\" u}n and Friedrich Leisch},
title = {Bootstrapping Finite Mixture Models},
booktitle = {Compstat 2004---Proceedings in Computational
Statistics},
year = 2004,
editor = {Jaromir Antoch},
publisher = {Physica Verlag},
address = {Heidelberg, Germany},
isbn = {3-7908-1554-3},
pages = {1115--1122},
pdf =
{http://www.stat.uni-muenchen.de/~leisch/papers/Grun+Leisch-2004.pdf}
}
@MastersThesis{ flexmix:Grun:2002,
author = {Bettina Gr{\"u}n},
title = {{I}dentifizierbarkeit von multinomialen
{M}ischmodellen},
school = tuwien,
year = 2002,
note = {Kurt Hornik and Friedrich Leisch, advisors}
}
@Article{ flexmix:Hennig:2000,
author = {Christian Hennig},
title = {Identifiability of Models for Clusterwise Linear
Regression},
journal = {Journal of Classification},
volume = 17,
pages = {273--296},
year = 2000
}
@InProceedings{ flexmix:Leisch:2004,
author = {Friedrich Leisch},
title = {Exploring the Structure of Mixture Model Components},
booktitle = {Compstat 2004---Proceedings in Computational
Statistics},
year = 2004,
editor = {Jaromir Antoch},
publisher = {Physica Verlag},
address = {Heidelberg, Germany},
isbn = {3-7908-1554-3},
pages = {1405--1412},
pdf =
{http://www.stat.uni-muenchen.de/~leisch/papers/Leisch-2004.pdf}
}
@Article{ flexmix:Leisch:2004a,
author = {Friedrich Leisch},
title = {{FlexMix}: A General Framework for Finite Mixture
Models and Latent Class Regression in {R}},
journal = {Journal of Statistical Software},
year = 2004,
volume = 11,
number = 8,
doi = {10.18637/jss.v011.i08},
}
@Book{ flexmix:McLachlan+Peel:2000,
author = {Geoffrey McLachlan and David Peel},
title = {Finite Mixture Models},
publisher = {John Wiley and Sons Inc.},
year = 2000
}
@Manual{ flexmix:R-Core:2004,
title = {R: A Language and Environment for Statistical
Computing},
author = {{R Development Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria},
year = 2004,
isbn = {3-900051-07-0},
url = {http://www.R-project.org}
}
@InProceedings{ flexmix:Tantrum+Murua+Stuetzle:2003,
author = {Jeremy Tantrum and Alejandro Murua and Werner
Stuetzle},
title = {Assessment and Pruning of Hierarchical Model Based
Clustering},
booktitle = {Proceedings of the ninth ACM SIGKDD International
Conference on Knowledge Discovery and Data Mining},
pages = {197--205},
year = 2003,
publisher = {ACM Press},
address = {New York, NY, USA},
isbn = {1-58113-737-0},
}
@Book{ flexmix:Titterington+Smith+Makov:1985,
author = {D.M. Titterington and A.F.M. Smith and U.E. Makov},
title = {Statistical Analysis of Finite Mixture
Distributions},
publisher = {John Wiley and Sons Inc.},
year = 1985
}
@InProceedings{ flexmix:Urbanek+Theus:2003,
author = {Simon Urbanek and Martin Theus},
title = {{iPlots}---High Interaction Graphics for {R}},
booktitle = {Proceedings of the 3rd International Workshop on
Distributed Statistical Computing, Vienna, Austria},
editor = {Kurt Hornik and Friedrich Leisch and Achim Zeileis},
year = 2003,
url =
{http://www.ci.tuwien.ac.at/Conferences/DSC-2003/Proceedings/},
note = {{ISSN 1609-395X}}
}
@Book{ flexmix:Venables+Ripley:2002,
title = {Modern Applied Statistics with S},
author = {William N. Venables and Brian D. Ripley},
publisher = {Springer Verlag},
edition = {Fourth},
address = {New York},
year = 2002,
isbn = {0-387-95457-0}
}
@Article{ flexmix:Wang+Cockburn+Puterman:1998,
author = {Peiming Wang and Iain M. Cockburn and Martin
L. Puterman},
title = {Analysis of Patent Data---{A}
Mixed-{P}oisson-Regression-Model Approach},
journal = {Journal of Business \& Economic Statistics},
year = 1998,
volume = 16,
number = 1,
pages = {27--41}
}
@Article{ flexmix:Wang+Puterman+Cockburn:1996,
author = {Peiming Wang and Martin L. Puterman and Iain
M. Cockburn and Nhu D. Le},
title = {Mixed {P}oisson Regression Models with Covariate
Dependent Rates},
journal = {Biometrics},
year = 1996,
volume = 52,
pages = {381--400}
}
@Article{ flexmix:Wang+Puterman:1998,
author = {Peiming Wang and Martin L. Puterman},
title = {Mixed Logistic Regression Models},
journal = {Journal of Agricultural, Biological, and
Environmental Statistics},
year = 1998,
volume = 3,
number = 2,
pages = {175--200}
}
@Article{ flexmix:Wedel+DeSarbo:1995,
author = {Michel Wedel and Wayne S. DeSarbo},
title = {A Mixture Likelihood Approach for Generalized Linear
Models},
journal = {Journal of Classification},
year = 1995,
volume = 12,
pages = {21--55}
}
@Book{ flexmix:Wedel+Kamakura:2001,
author = {Michel Wedel and Wagner A. Kamakura},
title = {Market Segmentation -- Conceptual and Methodological
Foundations},
publisher = {Kluwer Academic Publishers},
year = 2001,
address = {Boston, MA, USA},
edition = {2nd}
}
flexmix/vignettes/regression-examples.Rnw 0000644 0001762 0000144 00000124072 14404637307 020440 0 ustar ligges users \documentclass[nojss]{jss}
\usepackage{amsfonts,bm,amsmath,amssymb}
%%\usepackage{Sweave} %% already provided by jss.cls
%%%\VignetteIndexEntry{Applications of finite mixtures of regression models}
%%\VignetteDepends{flexmix}
%%\VignetteKeywords{R, finite mixture model, generalized linear model, latent class regression}
%%\VignettePackage{flexmix}
\title{Applications of finite mixtures of regression models}
<>=
library("stats")
library("graphics")
library("flexmix")
@
\author{Bettina Gr{\"u}n\\
Wirtschaftsuniversit{\"a}t Wien \And
Friedrich Leisch\\
Universit\"at f\"ur Bodenkultur Wien}
\Plainauthor{Bettina Gr{\"u}n, Friedrich Leisch}
\Address{
Bettina Gr\"un\\
Institute for Statistics and Mathematics\\
Wirtschaftsuniversit{\"a}t Wien\\
Welthandelsplatz 1\\
1020 Wien, Austria\\
E-mail: \email{Bettina.Gruen@R-project.org}\\
Friedrich Leisch\\
Institut f\"ur Angewandte Statistik und EDV\\
Universit\"at f\"ur Bodenkultur Wien\\
Peter Jordan Stra\ss{}e 82\\
1190 Wien, Austria\\
E-mail: \email{Friedrich.Leisch@boku.ac.at}
}
\Abstract{
Package \pkg{flexmix} provides functionality for fitting finite
mixtures of regression models. The available model class includes
generalized linear models with varying and fixed effects for the
component specific models and multinomial logit models for the
concomitant variable models. This model class includes random
intercept models where the random part is modelled by a finite
mixture instead of a-priori selecting a suitable distribution.
The application of the package is illustrated on various datasets
which have been previously used in the literature to fit finite
mixtures of Gaussian, binomial or Poisson regression models. The
\proglang{R} commands are given to fit the proposed models and
additional insights are gained by visualizing the data and the
fitted models as well as by fitting slightly modified models.
}
\Keywords{\proglang{R}, finite mixture models, generalized linear models, concomitant variables}
\Plainkeywords{R, finite mixture models, generalized linear models, concomitant variables}
%%-------------------------------------------------------------------------
%%-------------------------------------------------------------------------
\begin{document}
\SweaveOpts{engine=R, echo=true, height=5, width=8, eps=FALSE, keep.source=TRUE}
\setkeys{Gin}{width=0.8\textwidth}
<>=
options(width=70, prompt = "R> ", continue = "+ ", useFancyQuotes = FALSE)
suppressWarnings(RNGversion("3.5.0"))
set.seed(1802)
library("lattice")
ltheme <- canonical.theme("postscript", FALSE)
lattice.options(default.theme=ltheme)
@
%%-------------------------------------------------------------------------
%%-------------------------------------------------------------------------
\section{Introduction}
Package \pkg{flexmix} provides infrastructure for flexible fitting of
finite mixtures models. The design principles of the package allow
easy extensibility and rapid prototyping. In addition, the main focus
of the available functionality is on fitting finite mixtures of
regression models, as other packages in \proglang{R} exist which have
specialized functionality for model-based clustering, such as
e.g.~\pkg{mclust} \citep{flexmix:Fraley+Raftery:2002a} for finite
mixtures of Gaussian distributions.
\cite{flexmix:Leisch:2004a} gives a general introduction into the
package outlining the main implementational principles and
illustrating the use of the package. The paper is also contained as a
vignette in the package. An example for fitting mixtures of Gaussian
regression models is given in \cite{flexmix:Gruen+Leisch:2006}. This
paper focuses on examples of finite mixtures of binomial logit and
Poisson regression models. Several datasets which have been
previously used in the literature to demonstrate the use of finite
mixtures of regression models have been selected to illustrate the
application of the package.
The model class covered are finite mixtures of generalized linear
model with focus on binomial logit and Poisson regressions. The
regression coefficients as well as the dispersion parameters of the
component specific models are assumed to vary for all components, vary
between groups of components, i.e.~to have a nesting, or to be fixed
over all components. In addition it is possible to specify concomitant
variable models in order to be able to characterize the components.
Random intercept models are a special case of finite mixtures with
varying and fixed effects as fixed effects are assumed for the
coefficients of all covariates and varying effects for the intercept.
These models are often used to capture overdispersion in the data
which can occur for example if important covariates are omitted in the
regression. It is then assumed that the influence of these covariates
can be captured by allowing a random distribution for the intercept.
This illustration does not only show how the package \pkg{flexmix} can
be used for fitting finite mixtures of regression models but also
indicates the advantages of using an extension package of an
environment for statistical computing and graphics instead of a
stand-alone package as available visualization techniques can be used
for inspecting the data and the fitted models. In addition users
already familiar with \proglang{R} and its formula interface should find
the model specification and a lot of commands for exploring the fitted
model intuitive.
%%-------------------------------------------------------------------------
%%-------------------------------------------------------------------------
\section{Model specification}
Finite mixtures of Gaussian regressions with concomitant variable
models are given by:
\begin{align*}
H(y\,|\,\bm{x}, \bm{w}, \bm{\Theta}) &= \sum_{s = 1}^S \pi_s(\bm{w},
\bm{\alpha}) \textrm{N}(y\,|\, \mu_s(\bm{x}), \sigma^2_s),
\end{align*}
where $\textrm{N}(\cdot\,|\, \mu_s(\bm{x}), \sigma^2_s)$ is the
Gaussian distribution with mean $\mu_s(\bm{x}) = \bm{x}' \bm{\beta}^s$
and variance $\sigma^2_s$. $\Theta$ denotes the vector of all
parameters of the mixture distribution and the dependent variables are
$y$, the independent $\bm{x}$ and the concomitant $\bm{w}$.
Finite mixtures of binomial regressions with concomitant variable
models are given by:
\begin{align*}
H(y\,|\,T, \bm{x}, \bm{w}, \bm{\Theta}) &= \sum_{s = 1}^S \pi_s(\bm{w},
\bm{\alpha}) \textrm{Bi}(y\,|\,T, \theta_s(\bm{x})),
\end{align*}
where $\textrm{Bi}(\cdot\,|\,T, \theta_s(\bm{x}))$ is the binomial
distribution with number of trials equal to $T$ and success
probability $\theta_s(\bm{x}) \in (0,1)$ given by
$\textrm{logit}(\theta_s(\bm{x})) = \bm{x}' \bm{\beta}^s$.
Finite mixtures of Poisson regressions are given by:
\begin{align*}
H(y \,|\, \bm{x}, \bm{w}, \bm{\Theta}) &= \sum_{s = 1}^S \pi_s(\bm{w},
\bm{\alpha}) \textrm{Poi} (y \,|\, \lambda_s(\bm{x})),
\end{align*}
where $\textrm{Poi}(\cdot\,|\,\lambda_s(\bm{x}))$ denotes the Poisson
distribution and $\log(\lambda_s(\bm{x})) = \bm{x}'\bm{\beta}^s$.
For all these mixture distributions the coefficients are split into
three different groups depending on if fixed, nested or varying
effects are specified:
\begin{align*}
\bm{\beta}^s &=
(\bm{\beta}_1, \bm{\beta}^{c(s)}_{2}, \bm{\beta}^{s}_3)
\end{align*}
where the first group represents the fixed, the second the nested and
the third the varying effects. For the nested effects a partition
$\mathcal{C} = \{c_s \,|\, s = 1,\ldots S\}$ of the $S$ components is
determined where $c_s = \{s^* = 1,\ldots,S \,|\, c(s^*) = c(s)\}$. A
similar splitting is possible for the variance of mixtures of Gaussian
regression models.
The function for maximum likelihood (ML) estimation with the
Expectation-Maximization (EM) algorithm is \code{flexmix()} which is
described in detail in \cite{flexmix:Leisch:2004a}. It takes as
arguments a specification of the component specific model and of the
concomitant variable model. The component specific model with
varying, nested and fixed effects can be specified with the M-step
driver \code{FLXMRglmfix()} which has arguments \code{formula} for the
varying, \code{nested} for the nested and \code{fixed} for the fixed
effects. \code{formula} and \code{fixed} take an argument of class
\code{"formula"}, whereas \code{nested} expects an object of class
\code{"FLXnested"} or a named list specifying the nested structure
with a component \code{k} which is a vector of the number of
components in each group of the partition and a component
\code{formula} which is a vector of formulas for each group of the
partition. In addition there is an argument \code{family} which has
to be one of \code{gaussian}, \code{binomial}, \code{poisson} or
\code{Gamma} and determines the component specific distribution
function as well as an \code{offset} argument. The argument
\code{varFix} can be used to determine the structure of the dispersion
parameters.
If only varying effects are specified the M-step driver
\code{FLXMRglm()} can be used which only has an argument \code{formula}
for the varying effects and also a \code{family} and an \code{offset}
argument. This driver has the advantage that in the M-step the
weighted ML estimation is made separately for each component which
signifies that smaller model matrices are used. If a mixture model
with a lot of components $S$ is fitted to a large data set with $N$
observations and the model matrix used in the M-step of
\code{FLXMRglm()} has $N$ rows and $K$ columns, the model matrix used in
the M-step of \code{FLXMRglmfix()} has $S N$ rows and up to $S K$
columns.
In general the concomitant variable model is assumed to be a
multinomial logit model, i.e.~:
\begin{align*}
\pi_s(\bm{w},\bm{\alpha}) &=
\frac{e^{\bm{w}'\bm{\alpha}_s}}{\sum_{u = 1}^S
e^{\bm{w}'\bm{\alpha}_u}} \quad \forall s,
\end{align*}
with $\bm{\alpha} = (\bm{\alpha}'_s)_{s=1,\ldots,S}$ and
$\bm{\alpha}_1 \equiv \bm{0}$. This model can be fitted in
\pkg{flexmix} with \code{FLXPmultinom()} which takes as argument
\code{formula} the formula specification of the multinomial logit
part. For fitting the function \code{nnet()} is used from package
\pkg{MASS} \citep{flexmix:Venables+Ripley:2002} with the independent
variables specified by the formula argument and the dependent
variables are given by the a-posteriori probability estimates.
%%-------------------------------------------------------------------------
%%-------------------------------------------------------------------------
\section[Using package flexmix]{Using package \pkg{flexmix}}
In the following datasets from different areas such as medicine,
biology and economics are used. There are three subsections: for
finite mixtures of Gaussian regressions, for finite mixtures of
binomial regression models and for finite mixtures of Poisson
regression models.
%%-------------------------------------------------------------------------
\subsection{Finite mixtures of Gaussian regressions}
This artificial dataset with 200 observations is given in
\cite{flexmix:Gruen+Leisch:2006}. The data is generated from a
mixture of Gaussian regression models with three components. There is
an intercept with varying effects, an independent variable $x1$, which
is a numeric variable, with fixed effects and another independent
variable $x2$, which is a categorical variable with two levels, with
nested effects. The prior probabilities depend on a concomitant
variable $w$, which is also a categorical variable with two levels.
Fixed effects are also assumed for the variance. The data is
illustrated in Figure~\ref{fig:artificialData} and the true underlying
model is given by:
\begin{align*}
H(y\,|\,(x1, x2), w, \bm{\Theta}) &= \sum_{s = 1}^S \pi_s(w,
\bm{\alpha}) \textrm{N}(y\,|\, \mu_s, \sigma^2),
\end{align*}
with $\bm{\beta}^s = (\beta^s_{\textrm{Intercept}},
\beta^{c(s)}_{\textrm{x1}}, \beta_{\textrm{x2}})$. The nesting
signifies that $c(1) = c(2)$ and $\beta^{c(3)}_{\textrm{x1}} = 0$.
The mixture model is fitted by first loading the package and the
dataset and then specifying the component specific model. In a first
step a component specific model with only varying effects is
specified. Then the fitting function \code{flexmix()} is called
repeatedly using \code{stepFlexmix()}. Finally, we order the
components such that they are in ascending order with respect to the
coefficients of the variable \code{x1}.
<>=
set.seed(2807)
library("flexmix")
data("NregFix", package = "flexmix")
Model <- FLXMRglm(~ x2 + x1)
fittedModel <- stepFlexmix(y ~ 1, model = Model, nrep = 3, k = 3,
data = NregFix, concomitant = FLXPmultinom(~ w))
fittedModel <- relabel(fittedModel, "model", "x1")
summary(refit(fittedModel))
@
The estimated coefficients indicate that the components differ for the
intercept, but that they are not significantly different for the
coefficients of $x2$. For $x1$ the coefficient of the first component
is not significantly different from zero and the confidence intervals
for the other two components overlap. Therefore we fit a modified
model, which is equivalent to the true underlying model. The
previously fitted model is used for initializing the EM algorithm:
<>=
Model2 <- FLXMRglmfix(fixed = ~ x2, nested = list(k = c(1, 2),
formula = c(~ 0, ~ x1)), varFix = TRUE)
fittedModel2 <- flexmix(y ~ 1, model = Model2,
cluster = posterior(fittedModel), data = NregFix,
concomitant = FLXPmultinom(~ w))
BIC(fittedModel)
BIC(fittedModel2)
@
The BIC suggests that the restricted model should be preferred.
\begin{figure}[tb]
\centering
\setkeys{Gin}{width=0.95\textwidth}
<>=
plotNregFix <- NregFix
plotNregFix$w <- factor(NregFix$w, levels = 0:1, labels = paste("w =", 0:1))
plotNregFix$x2 <- factor(NregFix$x2, levels = 0:1,
labels = paste("x2 =", 0:1))
plotNregFix$class <- factor(NregFix$class, levels = 1:3, labels = paste("Class", 1:3))
print(xyplot(y ~ x1 | x2*w, groups = class, data = plotNregFix, cex = 0.6,
auto.key = list(space="right"), layout = c(2,2)))
@
\setkeys{Gin}{width=0.8\textwidth}
\caption{Sample with 200 observations from the artificial example.}
\label{fig:artificialData}
\end{figure}
<>=
summary(refit(fittedModel2))
@
The coefficients are ordered such that the fixed coefficients are
first, the nested varying coefficients second and the varying
coefficients last.
%%-------------------------------------------------------------------------
\subsection{Finite mixtures of binomial logit regressions}
%%-------------------------------------------------------------------------
\subsubsection{Beta blockers}
The dataset is analyzed in \cite{flexmix:Aitkin:1999,
flexmix:Aitkin:1999a} using a finite mixture of binomial regression
models. Furthermore, it is described in
\cite{flexmix:McLachlan+Peel:2000} on page 165. The dataset is from a
22-center clinical trial of beta-blockers for reducing mortality after
myocardial infarction. A two-level model is assumed to represent the
data, where centers are at the upper level and patients at the lower
level. The data is illustrated in Figure~\ref{fig:beta} and the model
is given by:
\begin{align*}
H(\textrm{Deaths} \,|\, \textrm{Total}, \textrm{Treatment},
\textrm{Center}, \bm{\Theta}) &= \sum_{s = 1}^S \pi_s \textrm{Bi}(
\textrm{Deaths} \,|\, \textrm{Total}, \theta_s).
\end{align*}
First, the center classification is ignored and a binomial logit
regression model with treatment as covariate is fitted using
\code{glm}, i.e.~$S=1$:
<>=
data("betablocker", package = "flexmix")
betaGlm <- glm(cbind(Deaths, Total - Deaths) ~ Treatment,
family = "binomial", data = betablocker)
betaGlm
@
In the next step the center classification is included by allowing a
random effect for the intercept given the centers, i.e.~the
coefficients $\bm{\beta}^s$ are given by
$(\beta^s_{\textrm{Intercept|Center}}, \beta_{\textrm{Treatment}})$.
This signifies that the component membership is fixed for each center.
In order to determine the suitable number of components, the mixture
is fitted with different numbers of components and the BIC information
criterion is used to select an appropriate model. In this case a model
with three components is selected. The fitted values for the model
with three components are given in Figure~\ref{fig:beta}.
<>=
betaMixFix <- stepFlexmix(cbind(Deaths, Total - Deaths) ~ 1 | Center,
model = FLXMRglmfix(family = "binomial", fixed = ~ Treatment),
k = 2:4, nrep = 3, data = betablocker)
betaMixFix
@
\begin{figure}
\centering
<>=
library("grid")
betaMixFix_3 <- getModel(betaMixFix, "3")
betaMixFix_3 <- relabel(betaMixFix_3, "model", "Intercept")
betablocker$Center <- with(betablocker, factor(Center, levels = Center[order((Deaths/Total)[1:22])]))
clusters <- factor(clusters(betaMixFix_3), labels = paste("Cluster", 1:3))
print(dotplot(Deaths/Total ~ Center | clusters, groups = Treatment, as.table = TRUE,
data = betablocker, xlab = "Center", layout = c(3, 1), scales = list(x = list(draw = FALSE)),
key = simpleKey(levels(betablocker$Treatment), lines = TRUE, corner = c(1,0))))
betaMixFix.fitted <- fitted(betaMixFix_3)
for (i in 1:3) {
seekViewport(trellis.vpname("panel", i, 1))
grid.lines(unit(1:22, "native"), unit(betaMixFix.fitted[1:22, i], "native"), gp = gpar(lty = 1))
grid.lines(unit(1:22, "native"), unit(betaMixFix.fitted[23:44, i], "native"), gp = gpar(lty = 2))
}
@
\caption{Relative number of deaths for the treatment and the control
group for each center in the beta blocker dataset. The centers are
sorted by the relative number of deaths in the control group. The
lines indicate the fitted values for each component of the 3-component
mixture model with random intercept and fixed effect for treatment.}
\label{fig:beta}
\end{figure}
In addition the treatment effect can also be included in the random
part of the model. As then all coefficients for the covariates and the
intercept follow a mixture distribution the component specific model
can be specified using \code{FLXMRglm()}. The coefficients are
$\bm{\beta}^s=(\beta^s_{\textrm{Intercept|Center}},
\beta^s_{\textrm{Treatment|Center}})$, i.e.~it is assumed that the
heterogeneity is only between centers and therefore the aggregated
data for each center can be used.
<>=
betaMix <- stepFlexmix(cbind(Deaths, Total - Deaths) ~ Treatment | Center,
model = FLXMRglm(family = "binomial"), k = 3, nrep = 3,
data = betablocker)
summary(betaMix)
@
The full model with a random effect for treatment has a higher BIC and
therefore the smaller would be preferred.
The default plot of the returned \code{flexmix} object is a rootogramm
of the a-posteriori probabilities where observations with a-posteriori
probabilities smaller than \code{eps} are omitted. With argument
\code{mark} the component is specified to have those observations
marked which are assigned to this component based on the maximum
a-posteriori probabilities. This indicates which components overlap.
<>=
print(plot(betaMixFix_3, mark = 1, col = "grey", markcol = 1))
@
The default plot of the fitted model indicates that the components are
well separated. In addition component 1 has a slight overlap with
component 2 but none with component 3.
The fitted parameters of the component specific models can be accessed
with:
<>=
parameters(betaMix)
@
The cluster assignments using the maximum a-posteriori probabilities
are obtained with:
<>=
table(clusters(betaMix))
@
The estimated probabilities for each component for the treated
patients and those in the control group can be obtained with:
<>=
predict(betaMix,
newdata = data.frame(Treatment = c("Control", "Treated")))
@
or
<>=
fitted(betaMix)[c(1, 23), ]
@
A further analysis of the model is possible with function
\code{refit()} which returns the estimated coefficients together with
the standard deviations, z-values and corresponding p-values:
<>=
summary(refit(getModel(betaMixFix, "3")))
@
The printed coefficients are ordered to have the fixed effects before
the varying effects.
%%-----------------------------------------------------------------------
\subsubsection{Mehta et al. trial}
This dataset is similar to the beta blocker dataset and is also
analyzed in \cite{flexmix:Aitkin:1999a}. The dataset is visualized in
Figure~\ref{fig:mehta}. The observation for the control group in
center 15 is slightly conspicuous and might classify as an outlier.
The model is given by:
\begin{align*}
H(\textrm{Response} \,|\, \textrm{Total}, \bm{\Theta}) &= \sum_{s = 1}^S
\pi_s \textrm{Bi}( \textrm{Response} \,|\, \textrm{Total}, \theta_s),
\end{align*}
with $\bm{\beta}^s = (\beta^s_{\textrm{Intercept|Site}},
\beta_{\textrm{Drug}})$. This model is fitted with:
<>=
data("Mehta", package = "flexmix")
mehtaMix <- stepFlexmix(cbind(Response, Total - Response)~ 1 | Site,
model = FLXMRglmfix(family = "binomial", fixed = ~ Drug),
control = list(minprior = 0.04), nrep = 3, k = 3, data = Mehta)
summary(mehtaMix)
@
One component only contains the observations for center 15 and in
order to be able to fit a mixture with such a small component it is
necessary to modify the default argument for \code{minprior} which is
0.05. The fitted values for this model are given separately for each
component in Figure~\ref{fig:mehta}.
\begin{figure}
\centering
<>=
Mehta$Site <- with(Mehta, factor(Site, levels = Site[order((Response/Total)[1:22])]))
clusters <- factor(clusters(mehtaMix), labels = paste("Cluster", 1:3))
print(dotplot(Response/Total ~ Site | clusters, groups = Drug, layout = c(3,1),
data = Mehta, xlab = "Site", scales = list(x = list(draw = FALSE)),
key = simpleKey(levels(Mehta$Drug), lines = TRUE, corner = c(1,0))))
mehtaMix.fitted <- fitted(mehtaMix)
for (i in 1:3) {
seekViewport(trellis.vpname("panel", i, 1))
sapply(1:nlevels(Mehta$Drug), function(j)
grid.lines(unit(1:22, "native"), unit(mehtaMix.fitted[Mehta$Drug == levels(Mehta$Drug)[j], i], "native"), gp = gpar(lty = j)))
}
@
\caption{Relative number of responses for the treatment and the
control group for each site in the Mehta et al.~trial dataset
together with the fitted values. The sites are sorted by the
relative number of responses in the control group.}
\label{fig:mehta}
\end{figure}
If also a random effect for the coefficient of $\textrm{Drug}$ is
fitted, i.e.~$\bm{\beta}^s = (\beta^s_{\textrm{Intercept|Site}},
\beta^s_{\textrm{Drug|Site}})$, this is estimated by:
<>=
mehtaMix <- stepFlexmix(cbind(Response, Total - Response) ~ Drug | Site,
model = FLXMRglm(family = "binomial"), k = 3, data = Mehta, nrep = 3,
control = list(minprior = 0.04))
summary(mehtaMix)
@
The BIC is smaller for the larger model and this indicates that the
assumption of an equal drug effect for all centers is not confirmed by
the data.
Given Figure~\ref{fig:mehta} a two-component model with fixed
treatment is also fitted to the data where site 15 is omitted:
<>=
Mehta.sub <- subset(Mehta, Site != 15)
mehtaMix <- stepFlexmix(cbind(Response, Total - Response) ~ 1 | Site,
model = FLXMRglmfix(family = "binomial", fixed = ~ Drug),
data = Mehta.sub, k = 2, nrep = 3)
summary(mehtaMix)
@
%%-----------------------------------------------------------------------
\subsubsection{Tribolium}
A finite mixture of binomial regressions is fitted to the Tribolium
dataset given in \cite{flexmix:Wang+Puterman:1998}. The data was
collected to investigate whether the adult Tribolium species Castaneum
has developed an evolutionary advantage to recognize and avoid eggs of
its own species while foraging, as beetles of the genus Tribolium are
cannibalistic in the sense that adults eat the eggs of their own
species as well as those of closely related species.
The experiment isolated a number of adult beetles of the same species
and presented them with a vial of 150 eggs (50 of each type), the eggs
being thoroughly mixed to ensure uniformity throughout the vial. The
data gives the consumption data for adult Castaneum species. It
reports the number of Castaneum, Confusum and Madens eggs,
respectively, that remain uneaten after two day exposure to the adult
beetles. Replicates 1, 2, and 3 correspond to different occasions on
which the experiment was conducted. The data is visualized in
Figure~\ref{fig:tribolium} and the model is given by:
\begin{align*}
H(\textrm{Remaining} \,|\, \textrm{Total}, \bm{\Theta}) &= \sum_{s = 1}^S
\pi_s(\textrm{Replicate}, \bm{\alpha}) \textrm{Bi}( \textrm{Remaining} \,|\, \textrm{Total}, \theta_s),
\end{align*}
with $\bm{\beta}^s = (\beta^s_{\textrm{Intercept}},
\bm{\beta}_{\textrm{Species}})$. This model is fitted with:
<>=
data("tribolium", package = "flexmix")
TribMix <- stepFlexmix(cbind(Remaining, Total - Remaining) ~ 1,
k = 2:3, model = FLXMRglmfix(fixed = ~ Species, family = "binomial"),
concomitant = FLXPmultinom(~ Replicate), data = tribolium)
@
The model which is selected as the best in
\cite{flexmix:Wang+Puterman:1998} can be estimated with:
<>=
modelWang <- FLXMRglmfix(fixed = ~ I(Species == "Confusum"),
family = "binomial")
concomitantWang <- FLXPmultinom(~ I(Replicate == 3))
TribMixWang <- stepFlexmix(cbind(Remaining, Total - Remaining) ~ 1,
data = tribolium, model = modelWang, concomitant = concomitantWang,
k = 2)
summary(refit(TribMixWang))
@
\begin{figure}
\centering
<>=
clusters <- factor(clusters(TribMixWang), labels = paste("Cluster", 1:TribMixWang@k))
print(dotplot(Remaining/Total ~ factor(Replicate) | clusters, groups = Species,
data = tribolium[rep(1:9, each = 3) + c(0:2)*9,], xlab = "Replicate",
auto.key = list(corner = c(1,0))))
@
\caption{Relative number of remaining beetles for the number of
replicate. The different panels are according to the cluster
assignemnts based on the a-posteriori probabilities of the model
suggested in \cite{flexmix:Wang+Puterman:1998}.}
\label{fig:tribolium}
\end{figure}
\cite{flexmix:Wang+Puterman:1998} also considered a model where they
omit one conspicuous observation. This model can be estimated with:
<>=
TribMixWangSub <- stepFlexmix(cbind(Remaining, Total - Remaining) ~ 1,
k = 2, data = tribolium[-7,], model = modelWang,
concomitant = concomitantWang)
@
%%-----------------------------------------------------------------------
\subsubsection{Trypanosome}
The data is used in \cite{flexmix:Follmann+Lambert:1989}. It is from
a dosage-response analysis where the proportion of organisms belonging
to different populations shall be assessed. It is assumed that
organisms belonging to different populations are indistinguishable
other than in terms of their reaction to the stimulus. The
experimental technique involved inspection under the microscope of a
representative aliquot of a suspension, all organisms appearing within
two fields of view being classified either alive or dead. Hence the
total numbers of organisms present at each dose and the number showing
the quantal response were both random variables. The data is
illustrated in Figure~\ref{fig:trypanosome}.
The model which is proposed in \cite{flexmix:Follmann+Lambert:1989}
is given by:
\begin{align*}
H(\textrm{Dead} \,|\,\bm{\Theta}) &= \sum_{s = 1}^S \pi_s
\textrm{Bi}( \textrm{Dead} \,|\, \theta_s),
\end{align*}
where $\textrm{Dead} \in \{0,1\}$ and with $\bm{\beta}^s =
(\beta^s_{\textrm{Intercept}}, \bm{\beta}_{\textrm{log(Dose)}})$. This
model is fitted with:
<>=
data("trypanosome", package = "flexmix")
TrypMix <- stepFlexmix(cbind(Dead, 1-Dead) ~ 1, k = 2, nrep = 3,
data = trypanosome, model = FLXMRglmfix(family = "binomial",
fixed = ~ log(Dose)))
summary(refit(TrypMix))
@
The fitted values are given in Figure~\ref{fig:trypanosome} together
with the fitted values of a generalized linear model in order to
facilitate comparison of the two models.
\begin{figure}
\centering
<>=
tab <- with(trypanosome, table(Dead, Dose))
Tryp.dat <- data.frame(Dead = tab["1",], Alive = tab["0",],
Dose = as.numeric(colnames(tab)))
plot(Dead/(Dead+Alive) ~ Dose, data = Tryp.dat)
Tryp.pred <- predict(glm(cbind(Dead, 1-Dead) ~ log(Dose), family = "binomial", data = trypanosome), newdata=Tryp.dat, type = "response")
TrypMix.pred <- predict(TrypMix, newdata = Tryp.dat, aggregate = TRUE)[[1]]
lines(Tryp.dat$Dose, Tryp.pred, lty = 2)
lines(Tryp.dat$Dose, TrypMix.pred, lty = 3)
legend(4.7, 1, c("GLM", "Mixture model"), lty=c(2, 3), xjust=0, yjust=1)
@
\caption{Relative number of deaths for each dose level together with
the fitted values for the generalized linear model (``GLM'') and the
random intercept model (``Mixture model'').}
\label{fig:trypanosome}
\end{figure}
%%-------------------------------------------------------------------------
\subsection{Finite mixtures of Poisson regressions}
% %%-----------------------------------------------------------------------
\subsubsection{Fabric faults}
The dataset is analyzed using a finite mixture of Poisson regression
models in \cite{flexmix:Aitkin:1996}. Furthermore, it is described in
\cite{flexmix:McLachlan+Peel:2000} on page 155. It contains 32
observations on the number of faults in rolls of a textile fabric. A
random intercept model is used where a fixed effect is assumed for the
logarithm of length:
<>=
data("fabricfault", package = "flexmix")
fabricMix <- stepFlexmix(Faults ~ 1, model = FLXMRglmfix(family="poisson",
fixed = ~ log(Length)), data = fabricfault, k = 2, nrep = 3)
summary(fabricMix)
summary(refit(fabricMix))
Lnew <- seq(0, 1000, by = 50)
fabricMix.pred <- predict(fabricMix, newdata = data.frame(Length = Lnew))
@
The intercept of the first component is not significantly different
from zero for a signficance level of 0.05. We therefore also fit a
modified model where the intercept is a-priori set to zero for the
first component. This nested structure is given as part of the model
specification with argument \code{nested}.
<>=
fabricMix2 <- flexmix(Faults ~ 0, data = fabricfault,
cluster = posterior(fabricMix),
model = FLXMRglmfix(family = "poisson", fixed = ~ log(Length),
nested = list(k=c(1,1), formula=list(~0,~1))))
summary(refit(fabricMix2))
fabricMix2.pred <- predict(fabricMix2,
newdata = data.frame(Length = Lnew))
@
The data and the fitted values for each of the components for both
models are given in Figure~\ref{fig:fabric}.
\begin{figure}
\centering
<>=
plot(Faults ~ Length, data = fabricfault)
sapply(fabricMix.pred, function(y) lines(Lnew, y, lty = 1))
sapply(fabricMix2.pred, function(y) lines(Lnew, y, lty = 2))
legend(190, 25, paste("Model", 1:2), lty=c(1, 2), xjust=0, yjust=1)
@
\caption{Observed values of the fabric faults dataset together with
the fitted values for the components of each of the two fitted
models.}
\label{fig:fabric}
\end{figure}
%%-----------------------------------------------------------------------
\subsubsection{Patent}
The patent data given in \cite{flexmix:Wang+Cockburn+Puterman:1998}
consist of 70 observations on patent applications, R\&D spending and
sales in millions of dollar from pharmaceutical and biomedical
companies in 1976 taken from the National Bureau of Economic Research
R\&D Masterfile. The observations are displayed in
Figure~\ref{fig:patent}. The model which is chosen as the best
in \cite{flexmix:Wang+Cockburn+Puterman:1998} is given by:
\begin{align*}
H(\textrm{Patents} \,|\, \textrm{lgRD}, \textrm{RDS}, \bm{\Theta}) &=
\sum_{s = 1}^S \pi_s(\textrm{RDS}, \bm{\alpha}) \textrm{Poi} ( \textrm{Patents} \,|\, \lambda_s),
\end{align*}
and $\bm{\beta}^s = (\beta^s_{\textrm{Intercept}}, \beta^s_{\textrm{lgRD}})$.
The model is fitted with:
<>=
data("patent", package = "flexmix")
ModelPat <- FLXMRglm(family = "poisson")
FittedPat <- stepFlexmix(Patents ~ lgRD, k = 3, nrep = 3,
model = ModelPat, data = patent, concomitant = FLXPmultinom(~ RDS))
summary(FittedPat)
@
The fitted values for the component specific models and the
concomitant variable model are given in Figure~\ref{fig:patent}. The
plotting symbol of the observations corresponds to the induced
clustering given by \code{clusters(FittedPat)}.
This model is modified to have fixed effects for the logarithmized
R\&D spendings, i.e.~$\bm(\beta)^s = (\beta^s_{\textrm{Intercept}},
\beta_{\textrm{lgRD}})$. The already fitted model is used for
initialization, i.e.~the EM algorithm is started with an M-step given
the a-posteriori probabilities.
<>=
ModelFixed <- FLXMRglmfix(family = "poisson", fixed = ~ lgRD)
FittedPatFixed <- flexmix(Patents ~ 1, model = ModelFixed,
cluster = posterior(FittedPat), concomitant = FLXPmultinom(~ RDS),
data = patent)
summary(FittedPatFixed)
@
The fitted values for the component specific models and the
concomitant variable model of this model are also given in
Figure~\ref{fig:patent}.
\begin{figure}
\centering
\setkeys{Gin}{width=0.95\textwidth}
<>=
lgRDv <- seq(-3, 5, by = 0.05)
newdata <- data.frame(lgRD = lgRDv)
plotData <- function(fitted) {
with(patent, data.frame(Patents = c(Patents,
unlist(predict(fitted, newdata = newdata))),
lgRD = c(lgRD, rep(lgRDv, 3)),
class = c(clusters(fitted), rep(1:3, each = nrow(newdata))),
type = rep(c("data", "fit"), c(nrow(patent), nrow(newdata)*3))))
}
plotPatents <- cbind(plotData(FittedPat), which = "Wang et al.")
plotPatentsFixed <- cbind(plotData(FittedPatFixed), which = "Fixed effects")
plotP <- rbind(plotPatents, plotPatentsFixed)
rds <- seq(0, 3, by = 0.02)
x <- model.matrix(FittedPat@concomitant@formula, data = data.frame(RDS = rds))
plotConc <- function(fitted) {
E <- exp(x%*%fitted@concomitant@coef)
data.frame(Probability = as.vector(E/rowSums(E)),
class = rep(1:3, each = nrow(x)),
RDS = rep(rds, 3))
}
plotConc1 <- cbind(plotConc(FittedPat), which = "Wang et al.")
plotConc2 <- cbind(plotConc(FittedPatFixed), which = "Fixed effects")
plotC <- rbind(plotConc1, plotConc2)
print(xyplot(Patents ~ lgRD | which, data = plotP, groups=class, xlab = "log(R&D)",
panel = "panel.superpose", type = plotP$type,
panel.groups = function(x, y, type = "p", subscripts, ...)
{
ind <- plotP$type[subscripts] == "data"
panel.xyplot(x[ind], y[ind], ...)
panel.xyplot(x[!ind], y[!ind], type = "l", ...)
},
scales = list(alternating=FALSE), layout=c(1,2), as.table=TRUE),
more=TRUE, position=c(0,0,0.6, 1))
print(xyplot(Probability ~ RDS | which, groups = class, data = plotC, type = "l",
scales = list(alternating=FALSE), layout=c(1,2), as.table=TRUE),
position=c(0.6, 0.01, 1, 0.99))
@
\caption{Patent data with the fitted values of the component specific
models (left) and the concomitant variable model (right) for the
model in \citeauthor{flexmix:Wang+Cockburn+Puterman:1998} and with
fixed effects for $\log(\textrm{R\&D})$. The plotting symbol for
each observation is determined by the component with the maximum
a-posteriori probability.}
\label{fig:patent}
\end{figure}
\setkeys{Gin}{width=0.8\textwidth}
With respect to the BIC the full model is better than the model with
the fixed effects. However, fixed effects have the advantage that the
different components differ only in their baseline and the relation
between the components in return of investment for each additional
unit of R\&D spending is constant. Due to a-priori domain knowledge
this model might seem more plausible. The fitted values for the
constrained model are also given in Figure~\ref{fig:patent}.
%%-----------------------------------------------------------------------
\subsubsection{Seizure}
The data is used in \cite{flexmix:Wang+Puterman+Cockburn:1996} and is
from a clinical trial where the effect of intravenous gamma-globulin
on suppression of epileptic seizures is studied. There are daily
observations for a period of 140 days on one patient, where the first
27 days are a baseline period without treatment, the remaining 113
days are the treatment period. The model proposed in
\cite{flexmix:Wang+Puterman+Cockburn:1996} is given by:
\begin{align*}
H(\textrm{Seizures} \,|\, (\textrm{Treatment}, \textrm{log(Day)},
\textrm{log(Hours)}), \bm{\Theta}) &= \sum_{s = 1}^S \pi_s
\textrm{Poi} ( \textrm{Seizures} \,|\, \lambda_s),
\end{align*}
where $\bm(\beta)^s = (\beta^s_{\textrm{Intercept}},
\beta^s_{\textrm{Treatment}}, \beta^s_{\textrm{log(Day)}},
\beta^s_{\textrm{Treatment:log(Day)}})$ and $\textrm{log(Hours)}$ is
used as offset. This model is fitted with:
<>=
data("seizure", package = "flexmix")
seizMix <- stepFlexmix(Seizures ~ Treatment * log(Day), data = seizure,
k = 2, nrep = 3, model = FLXMRglm(family = "poisson",
offset = log(seizure$Hours)))
summary(seizMix)
summary(refit(seizMix))
@
A different model with different contrasts to directly estimate the
coefficients for the jump when changing between base and treatment
period is given by:
<>=
seizMix2 <- flexmix(Seizures ~ Treatment * log(Day/27),
data = seizure, cluster = posterior(seizMix),
model = FLXMRglm(family = "poisson", offset = log(seizure$Hours)))
summary(seizMix2)
summary(refit(seizMix2))
@
A different model which allows no jump at the change between base and
treatment period is fitted with:
<>=
seizMix3 <- flexmix(Seizures ~ log(Day/27)/Treatment, data = seizure,
cluster = posterior(seizMix), model = FLXMRglm(family = "poisson",
offset = log(seizure$Hours)))
summary(seizMix3)
summary(refit(seizMix3))
@
With respect to the BIC criterion the smaller model with no jump is
preferred. This is also the more intuitive model from a practitioner's
point of view, as it does not seem to be plausible that starting the
treatment already gives a significant improvement, but improvement
develops over time. The data points together with the fitted values
for each component of the two models are given in
Figure~\ref{fig:seizure}. It can clearly be seen that the fitted
values are nearly equal which also supports the smaller model.
\begin{figure}
\centering
<>=
plot(Seizures/Hours~Day, pch = c(1,3)[as.integer(Treatment)], data=seizure)
abline(v=27.5, lty=2, col="grey")
legend(140, 9, c("Baseline", "Treatment"), pch=c(1, 3), xjust=1, yjust=1)
matplot(seizure$Day, fitted(seizMix)/seizure$Hours, type="l", add=TRUE, lty = 1, col = 1)
matplot(seizure$Day, fitted(seizMix3)/seizure$Hours, type="l", add=TRUE, lty = 3, col = 1)
legend(140, 7, paste("Model", c(1,3)), lty=c(1, 3), xjust=1, yjust=1)
@
\caption{Observed values for the seizure dataset together with the
fitted values for the components of the two different models.}
\label{fig:seizure}
\end{figure}
%%-----------------------------------------------------------------------
\subsubsection{Ames salmonella assay data}
The ames salomnella assay dataset was used in
\cite{flexmix:Wang+Puterman+Cockburn:1996}. They propose a model
given by:
\begin{align*}
H(\textrm{y} \,|\, \textrm{x}, \bm{\Theta}) &= \sum_{s = 1}^S \pi_s
\textrm{Poi} ( \textrm{y} \,|\, \lambda_s),
\end{align*}
where $\bm{\beta}^s = (\beta^s_{\textrm{Intercept}},
\beta_{\textrm{x}}, \beta_{\textrm{log(x+10)}})$. The model is fitted with:
<>=
data("salmonellaTA98", package = "flexmix")
salmonMix <- stepFlexmix(y ~ 1, data = salmonellaTA98, k = 2, nrep = 3,
model = FLXMRglmfix(family = "poisson", fixed = ~ x + log(x + 10)))
@
\begin{figure}
\centering
<>=
salmonMix.pr <- predict(salmonMix, newdata=salmonellaTA98)
plot(y~x, data=salmonellaTA98,
pch=as.character(clusters(salmonMix)),
xlab="Dose of quinoline", ylab="Number of revertant colonies of salmonella",
ylim=range(c(salmonellaTA98$y, unlist(salmonMix.pr))))
for (i in 1:2) lines(salmonellaTA98$x, salmonMix.pr[[i]], lty=i)
@
\caption{Means and classification for assay data according to the
estimated posterior probabilities based on the fitted model.}
\label{fig:almes}
\end{figure}
%%-----------------------------------------------------------------------
\section{Conclusions and future work}
Package \pkg{flexmix} can be used to fit finite mixtures of
regressions to datasets used in the literature to illustrate these
models. The results can be reproduced and additional insights can be
gained using visualization methods available in \proglang{R}. The fitted
model is an object in \proglang{R} which can be explored using
\code{show()}, \code{summary()} or \code{plot()}, as suitable methods
have been implemented for objects of class \code{"flexmix"} which are
returned by \code{flexmix()}.
In the future it would be desirable to have more diagnostic tools
available to analyze the model fit and compare different models. The
use of resampling methods would be convenient as they can be applied
to all kinds of mixtures models and would therefore suit well the
purpose of the package which is flexible modelling of various finite
mixture models. Furthermore, an additional visualization method for the
fitted coefficients of the mixture would facilitate the comparison of
the components.
%%-----------------------------------------------------------------------
\section*{Computational details}
<>=
SI <- sessionInfo()
pkgs <- paste(sapply(c(SI$otherPkgs, SI$loadedOnly), function(x)
paste("\\\\pkg{", x$Package, "} ",
x$Version, sep = "")), collapse = ", ")
@
All computations and graphics in this paper have been done using
\proglang{R} version \Sexpr{getRversion()} with the packages
\Sexpr{pkgs}.
%%-----------------------------------------------------------------------
\section*{Acknowledgments}
This research was supported by the the Austrian Science Foundation
(FWF) under grant P17382 and the Austrian Academy of Sciences
({\"O}AW) through a DOC-FFORTE scholarship for Bettina Gr{\"u}n.
%%-----------------------------------------------------------------------
\bibliography{flexmix}
\end{document}
flexmix/vignettes/flexmix.png 0000644 0001762 0000144 00000227461 14404637307 016144 0 ustar ligges users ‰PNG
IHDR Ý ²êÛ sBITÛáOà pHYs jŒw IDATxœìÝi@WÛ7ðIØqW6w©UqƒZ¨¨Xµ¨`Å]¬Õ‚€hk«µRën¥7ŠâŠ÷ÝÞÔ®ÕªuC±*¶¨ˆ²+*BBï‡yïy¦3É0Ùføÿ>M®9sÎ5I€äâÌIuu5 ‚“Š @…º €8P— ‡¹Ø €±¨¬¬ÌÉÉ; 0;;»†
Š üê2 ðÿ=~üØÍÍMì, ÀPÃÃÃÅÎ þ×1 ˆóe ÀdŒ?þåË—bgâøòË/ýýýÅÎBÏP— “qüøñ‚‚±³ qŒ1Bìôu PY³fݺu; ÐÞ£G²²²ÄÎ ¸ . ªuïÞýرcbg Ú[¹rå¬Y³ÄÎ ¸`Ý_ q`¾ ˜$[[Û«W¯ŠPbbbBB‚ØYê2 `’¤R©‡‡‡ØY€ÙÛÛ‹‚Áá:&