segmented/0000755000176200001440000000000014760330372012232 5ustar liggesuserssegmented/MD50000644000176200001440000001543214760330372012547 0ustar liggesuserscf1d293070d2c90d7dd0a9e28059894f *DESCRIPTION 8a39cd45e05043a52b992dbbf4e450ee *NAMESPACE 9146b0cfa0b4dfc250bdfc6b19ed0944 *NEWS 7f9cbdb9dec5d8b633589f7bf3758f12 *R/aapc.r fa6d75b931abb040a6da324dce80147c *R/broken.line.r ac4704d275166b0b4ad149e994701429 *R/coef.segmented.r 695cabe61f2b60e739151d58fe9cdf2b *R/coef.stepmented.r 36b9d0766f62d0dfcdb479f7db3ec35d *R/confint.segmented.R 7015d23c213700990fdfc8a40173be5e *R/confint.segmented.lme.r fc4d006b9e208f4701383a7ba0577288 *R/confint.stepmented.R fcc34ef9c0d73c16f6a10ec49bff98cb *R/davies.test.r 913cb45ab6dc454d5930317532cfcfe0 *R/draw.history.R db42712d0ff9439bc1a27e830a0b4f35 *R/fitted.segmented.lme.R 95affc48a5b734df7d09ca5503449803 *R/fixef.segmented.lme.r fd7bfa6e40fdce2bb4dfd2346263674e *R/intercept.r 290acfa25f3cd3aba2f3184410816baf *R/lines.segmented.R d42b36090f870a32beda4a6d1b365f5c *R/lines.stepmented.R c14cbe109767f8f8f79b3f71c8c00440 *R/logLik.segmented.lme.r a0bca571dc1de664492d7b14d5a6117d *R/model.matrix.segmented.r 422f89a5f7368305904b599d593303db *R/model.matrix.stepmented.r 6e231ed4a00ca2fbd7e4c7b888a5b974 *R/plot.segmented.lme.r a6342dad177a0f58457eb949e399fc76 *R/plot.segmented.r ddacf8f19e9192baa79e7f7fe9636f00 *R/plot.stepmented.r 4c0f71531e13426ee80c86fe4ada36e4 *R/points.segmented.r d9b5750271967fad34c5d75ea04a2d4e *R/predict.segmented.r 9ff14017f80c6b79b535fc167d90baa4 *R/predict.stepmented.R 0b02283b97702932f5208cc6956bda8f *R/print.segmented.R d70d6634cb8d1a670b956e39821d2982 *R/print.segmented.lme.r 82536ff771cfd4d80049ec84d68cc249 *R/print.stepmented.r 5f94ee16092939a85f708e0a344ab4b9 *R/print.summary.segmented.R 6ca823fab2f81105a001dd0046eedcba *R/print.summary.stepmented.R 12383ad9453c38886b06273638ab8ec4 *R/pscore.test.R 8e58e72ca946dba3ff314d64491bea0e *R/pwr.seg.r 25d8381a06652dd766dc215a2fac8df7 *R/reboot.slme.r 15c060ce1477559841934af46504a78a *R/seg.Ar.fit.boot.r bde0a4695367c3dcde3dd80b3f61f5f2 *R/seg.Ar.fit.r d965c3e8b84dafcddcb4ec31d8617c94 *R/seg.control.R 555dce7adfaaada6afd64217daaebd8c *R/seg.def.fit.boot.r 16522cfb78ba4b44c97f8f604078ea20 *R/seg.def.fit.r aaea92ce33970b6ae81a1f43efbfc1ee *R/seg.glm.fit.boot.r ceb7e4160113c6443bf72da6882b2abc *R/seg.glm.fit.r 1bc0057ab8994b5d3fca3f6f211d9404 *R/seg.lm.fit.boot.r c98e64c4d3caea5af3ad098fffa40952 *R/seg.lm.fit.r 767361e25ee57560287bd85310ea78b0 *R/seg.num.fit.boot.r e290749a348a754f76fb867d308cadc2 *R/seg.num.fit.r 06a4816ee12cc2f3d9b24f27a9d342aa *R/seg.r 2ed51b73094d739e63a41c60231c8562 *R/segConstr.glm.fit.boot.r 80762f2079f535aa96b86aad85fb5138 *R/segConstr.glm.fit.r dbd8a459a12a0f6f41a20d67f7c0ca71 *R/segConstr.lm.fit.boot.r cc2bddcb5a59cbbcb4ff5754f0c0f722 *R/segConstr.lm.fit.r 1d76e0ac7ca51eb4172f485823fa1932 *R/segmented.Arima.r fb0c87cd5f82b35b8120f77efc140652 *R/segmented.R 709adcac85ac994dfd12e16ee9031cf6 *R/segmented.default.r c90963d12efcb64dbf15c8d4e4271907 *R/segmented.glm.R fb9ba2e95854c85572cadb9684dadcd1 *R/segmented.lm.R 9310244ad3e105b252b54aeece19e61d *R/segmented.lme.r 1e5802c31bd43f5e8d03431efbb7ecbc *R/segmented.numeric.R ae360e6ae70202d1294cf1ebb1f4c729 *R/segreg.r 5f3017324657b1ab6ac7736fb88703f4 *R/selgmented.R 7548de190e0d7d67c0c1c0710b4d7bac *R/slope.R 2b25cebd1f4db6cc0ba5207f07a8eaae *R/step.glm.fit.boot.r 73bc8c74645e369a281087afb6b9e195 *R/step.glm.fit.r b407aaa1a5c4fc54503043441530b21e *R/step.lm.fit.boot.r 2dbd21ae6b9739835be81805aff9f4f8 *R/step.lm.fit.r 251ed14f613b995e3dac246dc5704185 *R/step.num.fit.boot.r 2a8286196d7067848ba23fbde3c57933 *R/step.num.fit.r 6e4c47f228002a84301cbb63bc03cbaa *R/step.ts.fit.boot.r 5363f56c1e3c7704715f6d72d5c702b2 *R/step.ts.fit.r f174f0fca61a5ec05788d0acc1055ddc *R/stepmented.R 11f68e0fc35ab6e27a3e791a80217015 *R/stepmented.glm.R b444bb9a52aa1ed46c5bf7b15a72cb82 *R/stepmented.lm.R 83c3832c2f6c7bf055a6785fdcce4826 *R/stepmented.numeric.r 2039c7acffb9f1d65e13551b37c5e4c9 *R/stepmented.ts.R e851837572b61067cfe43ba5b13214e7 *R/stepreg.r cd2bf0f14f4a5af160da38fb591094a0 *R/summary.segmented.R 2e84dec73c1d995d1bbbb8363795dad0 *R/summary.segmented.lme.r 0d33a232cfde42bbfdb34ffb8f3c8f0c *R/summary.stepmented.R 435f139cec0d8208ccaee9f8d68c0d2d *R/vcov.segmented.R a49fbf6abc013d92a18fe2926e964cf5 *R/vcov.segmented.lme.R 1f17d8cb756110aa8dc85611d0f74621 *R/vcov.stepmented.R 67d87e0b6e4b76b1be04dc97202d1a5b *data/down.rda 25bc78b037319e8cf92b7b035c51a000 *data/globTempAnom.rda 1a67a3a05f841af643ef472f7ce61d38 *data/plant.rda 9779a816dcfdcf9aba40f7fab9709661 *data/stagnant.rda 682708daf84e41d45bb2cfdf3483905d *inst/CITATION b46ed8c730d2f228b8c8219dd911f37e *man/aapc.Rd 22223ff058e5832687851a43739d545e *man/broken.line.Rd 404b33c9623bb27d5cfbf0de7bf7aa98 *man/confint.segmented.Rd a517be97d348e521c636d00ef6d60ca4 *man/confint.segmented.lme.Rd 6305618813aaff793d8e99a92c190567 *man/confint.stepmented.Rd 0b8421a52c503aed95d632139e09961f *man/davies.test.Rd a1fd6bbde564db5be2a9dde1ecedfb2d *man/down.Rd dd28d164fead7dabdb2715b3340b4364 *man/draw.history.Rd 62655f08857a0b704ac5f3ac26fa9bf0 *man/fitted.segmented.lme.Rd d3c80b407692fa67c31efb3e5000e1ae *man/globTempAnom.Rd c5d7bbf06530be1662b138393ab249f0 *man/intercept.Rd faff03829b9cb70b138c830a3ba97ef7 *man/lines.segmented.Rd 6199c040120edf29a14c63dec9ed6f01 *man/lines.stepmented.Rd 6c0c8f444466e7756fad1e07e54822cd *man/model.matrix.segmented.Rd 740bac6cf4aa0b5069a461a642a9dd2d *man/model.matrix.stepmented.Rd c8cfb285da49911d8e218de1b6b4dd53 *man/plant.Rd 0883be3331641ee7e0964c5587264d74 *man/plot.segmented.Rd 9898781e7a4b61f2e82d239bd5742722 *man/plot.segmented.lme.Rd 835416f3699aff532c49c1b2af6d0ee2 *man/plot.stepmented.Rd 219df852a0a7188483525ba1d8e3dd5c *man/points.segmented.Rd 2dda4be7833f0e93a439e31ffc4cf182 *man/predict.segmented.Rd df673c8f387b2201fce75857d2e03235 *man/predict.stepmented.Rd eb62553aba6323ad4b64c3a4172c739d *man/print.segmented.Rd 070bfad8dd24836d10fcbb35c1ee7907 *man/print.segmented.lme.Rd 5e3bc7061c64f06c78f17a919e54d812 *man/pscore.test.rd ee9ae4505bc6c2211e72febc1b60c2e0 *man/pwr.seg.Rd 9e324c810e8fbeaa0d9b9cb0f611a66b *man/seg.Rd c39b4d4dcb018beba72f8cd0b411620a *man/seg.control.Rd 72b357bcc101f027fe7c1acf25cf050d *man/seg.lm.fit.Rd 780269440bab49b9374e7d8d246c0b81 *man/segmented-package.Rd 6db3a0651c07a5aca5cc88efcb16ea30 *man/segmented.Rd 87faba8222a57863c6aa1ee858d012b1 *man/segmented.lme.Rd a1ce1880ddab5d09db47c4e68fdef6f8 *man/segreg.Rd 031ea87f8743dfcc246a5ab0259a644f *man/selgmented.Rd 2206bceda41670c9a535c4269df5c88b *man/slope.Rd 1c8095f4472b4cabf3dc3cae5132260f *man/stagnant.Rd 93f5155a4c8ef92bd065dc9cef152a0a *man/step.lm.fit.Rd 81a7b109c43c69e2b164dfedca30f220 *man/stepmented.Rd 028642ed745c44283e034aee3e29c0f6 *man/summary.segmented.Rd 41c76f9668e36064506edee8734693e9 *man/summary.segmented.lme.Rd 7b8b31fa48272e167e6c966fcefa73e7 *man/summary.stepmented.Rd 8234a2a7f8da82787db998d7808a92ba *man/vcov.segmented.Rd 426beef0a22b01d2b8f040c6f22a7d1b *man/vcov.segmented.lme.Rd d13301711d3864b9e73911cfcce536e0 *man/vcov.stepmented.Rd segmented/R/0000755000176200001440000000000014757641170012442 5ustar liggesuserssegmented/R/coef.segmented.r0000644000176200001440000000027014415476776015523 0ustar liggesuserscoef.segmented<-function(object, include.psi=FALSE, ...){ b<- object$coefficients if(include.psi){ psi<- object$psi[,"Est."] b[rownames(object$psi)]<-psi } b } segmented/R/stepmented.R0000644000176200001440000000026414554674432014741 0ustar liggesusers`stepmented` <- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control=seg.control(), keep.class=FALSE, var.psi=FALSE, ...){ UseMethod("stepmented") } segmented/R/step.lm.fit.r0000644000176200001440000003077114726062432014772 0ustar liggesusersstep.lm.fit<-function(y, x.lin, Xtrue, PSI, ww, offs, opz, return.all.sol=FALSE){ #---------------------- search.minWO<-function(h, psi, psi.old, X, y, w, offs, n.ok, id.fix.psi=NULL) { #con weighjts e Offs psi.ok<- psi*h + psi.old*(1-h) psi.ok[id.fix.psi]<- psi.old[id.fix.psi] PSI <- matrix(psi.ok, n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Xtrue>PSI) #(Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylmWO(cbind(X, U1), y, w, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 #r<-sum(obj1$residuals^2 * w) L1 } #---------------------- search.min<-function(h, psi, psi.old, X, y, w, offs, n.ok, id.fix.psi=NULL) { #SENZA weighjts e Offs psi.ok<- psi*h + psi.old*(1-h) psi.ok[id.fix.psi]<- psi.old[id.fix.psi] PSI <- matrix(psi.ok, n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Xtrue>PSI) #(Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylm(cbind(X, U1), y), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) L1 <- if(class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 L1 } #---------------------- toMatrix<-function(.x, ki){ # ripete ogni .x[,j] ki[j] volte if(ncol(.x)!=length(ki)) stop("It should be ncol(.x)==length(ki)") if(all(ki==1)) return(.x) M<-vector("list", length=length(ki)) for(j in 1:length(ki)) M[[j]]<-replicate(ki[[j]], cbind(.x[,j]), simplify=TRUE) do.call(cbind, M) } ### ----- isZero <- function(v) sapply(v, function(.x) identical(.x,0)) ###------ mylmWO<-function(x,y,w=1,offs=0){ #con weights e OFFs sw<- sqrt(w) x1<-x*sw y1<-(y-offs)*sw b<-drop(solve(crossprod(x1),crossprod(x1,y1))) fit<-x%*%b #drop(tcrossprod(x,t(b))) r<-y-fit #o<- .lm.fit(x=x1, y=y1) #b<- o$coefficients #r<-o$residuals #fit<- y + r o<-list(coefficients=b,fitted.values=fit, residuals=r, L0=sum(w*r^2), df.residual=length(y)-length(b)) o } mylm<-function(x,y,w,offs){ b<-drop(solve(crossprod(x),crossprod(x,y))) fit<-x%*%b #drop(tcrossprod(x,t(b))) r<-y-fit o<-list(coefficients=b,fitted.values=fit, residuals=r, L0=sum(r^2), df.residual=length(y)-length(b)) o } #----------- if(var(offs)<=0 && var(ww)<=0){ fitter<-function(x, y, w, offs) .lm.fit(x=x, y=y) #list(coefficients=drop(solve(crossprod(x), crossprod(x, y)))) mylmOK <- mylm search.minOK <- search.min } else { fitter<-function(x, y, w, offs) .lm.fit(x=sqrt(w)*x, y=sqrt(w)*(y-offs)) mylmOK <- mylmWO search.minOK <- search.minWO } ##---------- adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #------------ tol<-opz$toll display<-opz$display it.max<-opz$it.max dev0<-opz$dev0 useExp.k<-opz$useExp.k #min.step<- opz$min.step #=.0001 #conv.psi<-opz$conv.psi #=FALSE alpha<-opz$alpha #browser() limZ <- if(is.null(opz$limZ)) apply(Xtrue, 2, quantile, names=FALSE, probs=alpha) else opz$limZ #limZ <- apply(Xtrue, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) fix.npsi<-opz$fix.npsi agg<-opz$agg h<-opz$h npsii<-opz$npsii npsi<- sum(npsii) #opz$npsi P<-length(npsii) #P<-opz$P digits<-opz$digits rangeZ<-opz$rangeZ #browser() # pos.vec <- 1:npsi # pos <- vector("list", P) # ind <- 0 pos<- tapply(1:npsi, rep(1:P, npsii), list) i <- 0 agg <- rep(agg, npsi) # direz <- matrix(NA, it.max, npsi) # conv <- rep(FALSE, npsi) # ind.conv <- NULL n<-length(y) #================== n.ok <- if(!is.null(opz$n.ok)) opz$n.ok else n #================== #browser() plin<-ncol(x.lin) epsilon<-10 k.values<-dev.values<- NULL psi.values <-list() #psi.values[[length(psi.values) + 1]] <- NA #PSI0<- matrix(psi0, n, npsi, byrow = TRUE) XREG <- cbind(x.lin, Xtrue>PSI) psi0<-PSI[1,] if(it.max==0){ obj <- lm.wfit(x = XREG, y = y, w = ww, offset = offs) L1 <- sum(obj$residuals^2 * ww) obj$epsilon <- epsilon idZ<-(plin+1):(plin+ncol(PSI)) b<- obj$coef[idZ] obj <- list(obj = obj, psi = PSI[1,], psi.values = psi.values, idU=ncol(x.lin)+1:(length(psi0)), rangeZ = rangeZ, beta.c=b, epsilon = epsilon, SumSquares.no.gap = L1, id.warn = TRUE) return(obj) } if(!opz$usestepreg){ dev.values[length(dev.values) + 1] <- opz$dev0 #modello senza psi psi.values[[length(psi.values) + 1]] <- NA #nessun psi } #browser() if(is.null(opz$fit.psi0)){ #modello con psi iniziale.. obj0 <- try(mylmOK(XREG, y, w=ww, offs=offs), silent = TRUE) L0<- obj0$L0 #sum(obj0$residuals[1:n.ok]^2*ww[1:n.ok]) #+100 #perche' avevo messo +100? } else { L0 <- opz$fit.psi0$L0 } n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi.values[[length(psi.values) + 1]] <- psi0 #psi iniziali #============================================== if (display) { unlpsi<- unlist(psi0) Lp<-length(unlpsi) cat(paste("iter = ", sprintf("%2.0f",0), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L0, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%5.0f", NA), " n.psi = ",formatC(Lp,digits=0,format="f"), " ini.psi = ",paste(formatC(unlpsi[1:min(5,Lp)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } id.warn <- FALSE #browser() low <- apply(Xtrue[,unique(colnames(Xtrue)),drop=FALSE], 2, min) up <- apply(Xtrue[,unique(colnames(Xtrue)),drop=FALSE], 2, max) #browser() #low <-limZ[1,unique(colnames(limZ))] #up <-limZ[2,unique(colnames(limZ))] #INVECE CHE low e up, USARE limZ che dipendono da alpha??? #no perche' poi quando si forma gruppi si formano NA.. #L1<-L0+10 tolOp<-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) #============================================== #browser() idZ<-(plin+1):(plin+ncol(PSI)) idW<-(plin+ncol(PSI)+1): (plin+2*ncol(PSI)) #browser() deltaAll<-matrix(NA,it.max,length(psi0)) while (abs(epsilon) > tol) { i <- i + 1 #if(i==5) agg<-agg/2 #if(i==1) browser() xx <- Xtrue[,cumsum(npsii),drop=FALSE] for (p in 1:P) { psis <- sort(psi0[pos[[p]]]) gruppi <- cut(xx[,p], breaks = c(low[p] - 0.1, psis, up[p]), labels = FALSE) if(any(is.na(gruppi))) stop(paste("too many breaks for step term #", p, "?"), call.=TRUE) points <- c(low[p], psis, up[p]) right <- c(low[p], points[2:(npsii[p] + 1)] + agg[pos[[p]]][order(psi0[pos[[p]]])] * (points[3:(npsii[p] + 2)] - points[2:(npsii[p] + 1)]), NA) left <- c(NA, points[2:(npsii[p] + 1)] - agg[pos[[p]]][order(psi0[pos[[p]]])] * (points[2:(npsii[p] + 1)] - points[1:npsii[p]]), up[p]) for (j in 1:(npsii[p] + 1)) { xx.j <- xx[,p][gruppi == j] xx[,p][gruppi == j] <- right[j] + (xx.j - points[j]) * ((left[j + 1] - right[j])/(points[j + 1] - points[j])) } } XX<-toMatrix(xx, npsii) PSI<- matrix(psi0, n, npsi, byrow = TRUE) W <- (1/(2 * abs(XX - PSI))) Z <- (XX * W + 1/2) XREG <- cbind(x.lin, Z, W) obj <- fitter(XREG, y, ww, offs)# lm.wfit(x = X, y = y, w = w, offset = offs) b <- obj$coefficients[idZ] g <- obj$coefficients[idW] if(any(isZero(c(b, g)))) { # obj <- lm.wfit(y = y, x = XREG, offset = offs, w=ww) # b<- obj$coef[idZ] # g<- obj$coef[idW] # if(any(is.na(c(b, g)))){ if(return.all.sol) return(list(dev=dev.values, psi=psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } psi1 <- -g/b psi1<- psi0+ h*(psi1-psi0) psi1<- adj.psi(psi1, limZ) psi1<-unlist(tapply(psi1, opz$id.psi.group, sort), use.names =FALSE) #la f e' chiaramente a gradino per cui meglio dividere.. a0<-optimize(search.minOK, c(0,.5), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs, n.ok=n.ok, tol=tolOp[i]) a1<-optimize(search.minOK, c(.5,1), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs, n.ok=n.ok, tol=tolOp[i]) a<-if(a0$objective<=a1$objective) a0 else a1 #a0<-optimize(search.min, c(0,.33), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs) #a1<-optimize(search.min, c(.33,.66), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs) #a2<-optimize(search.min, c(.66, 1), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs) #a<-if(a0$objective<=a1$objective) a0 else a1 #a<-if(a$objective<=a2$objective) a else a2 if(a$objectivePSI1) #obj1 <- try(mylm(XREG1, y, ww, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(XREG1, y, ww, offs), silent = TRUE) #questa e' la proposta di Salvo di ridurre 'agg' quando la soluzione "balla".. deltaAll[i,]<-delta<- psi1-psi0 # if(i>1){ # #browser() # agg<-ifelse(sign(deltaAll[i-1,])!=sign(deltaAll[i,]), agg/2, agg) # } if (display) { flush.console() #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) unlpsi<- unlist(psi1) Lp<-length(unlpsi) cat(paste("iter = ", sprintf("%2.0f",i), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L1, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%2.3f", use.k), " n.psi = ",formatC(Lp,digits=0,format="f"), " est.psi = ",paste(formatC(unlpsi[1:min(Lp,5)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } epsilon <- (L0 - L1)/(abs(L0) + 0.1) #max(abs((psi1 -psi0)/psi0)) else L0<-L1 k.values[length(k.values)+1]<-use.k psi.values[[length(psi.values) + 1]] <- psi1 dev.values[length(dev.values) + 1] <- L0 if (i >= it.max) { id.warn <- TRUE break } psi0<-psi1 } #end while_it #browser() psi1 <-unlist(tapply(psi1, opz$id.psi.group, sort)) PSI<- matrix(psi1, n, npsi, byrow = TRUE) U <- 1*(Xtrue>PSI) #ATTENZIONE .. Assume che obj sia stato stimato sempre! obj<-list(obj=obj, psi=psi1, psi.values=psi.values, rangeZ=rangeZ, SumSquares.no.gap=L1, idU=idZ, beta.c=b, it=i, epsilon=epsilon, id.warn=id.warn, U=U) return(obj) } #end jump.fit segmented/R/confint.segmented.lme.r0000644000176200001440000000405614415476776017031 0ustar liggesusersconfint.segmented.lme <- function(object, parm, level = 0.95, obj.boot, ...) { ci.boot <- function(m, conf.level = 0.95) { # computes three boot CI m: object returned by bootNP() est.orig <- m$coef[1, ] se.orig <- m$se[1, ] zalpha <- -qnorm((1 - conf.level)/2) EST <- m$coef[-1, ] # percentile CIt <- CIN <- CIperc <- apply(EST, 2, quantile, prob = c((1 - conf.level)/2, (conf.level + (1 - conf.level)/2)), na.rm = TRUE) # Normal-based SE <- apply(EST, 2, sd, na.rm = TRUE) CIN[1, ] <- est.orig - zalpha * SE CIN[2, ] <- est.orig + zalpha * SE # t-boot Tdistr <- (EST - matrix(m$coef[1, ], ncol = length(est.orig), nrow = nrow(m$coef) - 1, byrow = TRUE))/m$se[-1, ] quantT <- apply(Tdistr, 2, quantile, prob = c((1 - conf.level)/2, (conf.level + (1 - conf.level)/2)), na.rm = TRUE) CIt[1, ] <- est.orig - quantT[2, ] * se.orig CIt[2, ] <- est.orig - quantT[1, ] * se.orig ris <- list(norm = CIN, perc = CIperc, t = CIt) ris } opz <- list(...) if (missing(obj.boot)) { if (is.null(opz$B)) { r <- object$lme.fit$varFix SE <- sqrt(diag(r)) est <- object$lme.fit$coef$fixed zalpha <- -qnorm((1 - level)/2) CIN <- rbind(est - zalpha * SE, est + zalpha * SE) rownames(CIN) <- paste(100 * c((1 - level)/2, (level + (1 - level)/2)), "%", sep = "") } else { obj.boot <- vcov.segmented.lme(object, B = opz$B, ret.b=TRUE, seed = opz$seed, it.max.b = opz$it.max.b) CIN <- ci.boot(obj.boot, level) } } else { CIN <- ci.boot(obj.boot, level) } if(!missing(parm)){ if(is.character(parm)) { parm <- match(parm, names(fixef(object))) if(any(is.na(parm))) stop("invalid names in parm") } if(is.list(CIN)) CIN <- lapply(CIN, function(x)x[,parm, drop=FALSE]) else CIN<-CIN[,parm,drop=FALSE] } return(CIN) } segmented/R/stepmented.lm.R0000644000176200001440000005130114757372126015346 0ustar liggesusersstepmented.lm <- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control=seg.control(), keep.class=FALSE, var.psi=FALSE, ...) { # --------- mylm<-function(x,y,w=1,offs=0){ x1<-x*sqrt(w) y<-y-offs y1<-y*sqrt(w) XtX <- crossprod(x1) b<-drop(solve(XtX,crossprod(x1,y1))) fit<-drop(tcrossprod(x,t(b))) r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b), invXtX=solve(XtX), w=w) o } #----------- toMatrix<-function(.x, ki){ # ripete ogni .x[,j] ki[j] volte if(ncol(.x)!=length(ki)) stop("It should be ncol(.x)==length(ki)") if(all(ki==1)) return(.x) M<-vector("list", length=length(ki)) for(j in 1:length(ki)) M[[j]]<-replicate(ki[[j]], cbind(.x[,j]), simplify=TRUE) do.call(cbind, M) } #----------- agg<- 1-control$fc it.max<- control$it.max tol<- control$toll display<- control$visual digits <- control$digits min.step <- control$min.step #conv.psi <- control$conv.psi alpha <- control$alpha fix.npsi <- control$fix.npsi n.boot <- control$n.boot break.boot<- control$break.boot + 2 seed<- control$seed fix.npsi<-control$fix.npsi h<-control$h #----------- #browser() #if(!(inherits(obj,"lm") || is.vector(obj) || is.ts(obj))) stop("obj should be a 'lm' fit, a 'vector' or 'ts' object") if(!inherits(obj,"lm")) stop("obj should be a 'lm' fit") y.only.vector <- FALSE Fo0 <- formula(obj) if(missing(seg.Z)) { #if(length(all.vars(formula(obj)))==1) seg.Z<- as.formula(paste("~", "id")) assign("id",1:length(obj$residuals),parent.frame()) #id<-1:length(obj$residuals) # if(length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } n.Seg<-length(all.vars(seg.Z)) id.npsi<-FALSE if("V" %in% sub("V[1-9]*[0-9]","V", c(all.vars(seg.Z), all.vars(formula(obj) )[-1]))) stop("variable names 'V', 'V1', .. are not allowed") if("U" %in% sub("U[1-9]*[0-9]","U", c(all.vars(seg.Z), all.vars(formula(obj) )[-1]))) stop("variable names 'U', 'U1', .. are not allowed") if(any(c("$","[") %in% all.names(seg.Z))) stop(" '$' or '[' not allowed in 'seg.Z' ") if(missing(psi)){ if(n.Seg==1){ if(missing(npsi)) npsi<-1 npsi<-lapply(npsi, function(.x).x) if(length(npsi)!=length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi)<-all.vars(seg.Z) } else {#se n.Seg>1 #if(missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call.=FALSE) if (missing(npsi)) { npsi<-rep(1, n.Seg) names(npsi)<-all.vars(seg.Z) } if(length(npsi)!=n.Seg) stop(" 'npsi' and seg.Z should have the same length") if(!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi<-lapply(npsi, function(.x) rep(NA,.x)) id.npsi<-TRUE ##id.npsi<-FALSE #e' stato fornito npsi? } else { if(n.Seg==1){ if(!is.list(psi)) {psi<-list(psi);names(psi)<-all.vars(seg.Z)} } else {#se n.Seg>1 if(!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if(n.Seg!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if(!all(names(psi)%in%all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } n.psi<- length(unlist(psi)) #browser() #if(missing(x)) x<-1:n #if(missing(psi)) { # if(missing(npsi)) npsi<-1 # psi<-min(x)+cumsum(rep(diff(range(x))/(npsi+1),npsi)) #} ##========================================================================= #--- preso da segmented.lm orig.call<-Call<-mf<-obj$call orig.call$formula<- mf$formula<-formula(obj) #per consentire lm(y~.) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") if(class(mf$formula)[1]=="name" && !"~"%in%paste(mf$formula)) mf$formula<-eval(mf$formula) mfExt<- mf mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mfExt$formula <- update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+") )) } mf <- eval(mf, parent.frame()) n<-nrow(mf) #questo serve per inserire in mfExt le eventuali variabili contenute nella formula con offset(..) nomiOff<-setdiff(all.vars(formula(obj)), names(mf)) if(length(nomiOff)>=1) mfExt$formula<-update.formula(mfExt$formula,paste(".~.+", paste( nomiOff, collapse="+"), sep="")) nomiTUTTI<-all.vars(mfExt$formula) #comprende anche altri nomi (ad es., threshold) "variabili" nomiNO<-NULL for(i in nomiTUTTI){ r<-try(eval(parse(text=i), parent.frame()), silent=TRUE) if(class(r)[1]!="try-error" && length(r)==1 && !is.function(r) && !i%in%names(mf)) nomiNO[[length(nomiNO)+1]]<-i } if(!is.null(nomiNO)) mfExt$formula<-update.formula(mfExt$formula,paste(".~.-", paste( nomiNO, collapse="-"), sep="")) mfExt<-eval(mfExt, parent.frame()) #mf <- mfExt #browser() if(nrow(mf)!=nrow(mfExt)) stop("missing values in any stepmented covariate?") ww <- as.vector(model.weights(mf)) offs <- as.vector(model.offset(mf)) if (is.null(ww)) ww <- rep(1, n) if (is.null(offs)) offs <- rep(0, n) mt <- attr(mf, "terms") interc<-attr(mt,"intercept") y <- model.response(mf, "any") XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf, obj$contrasts) namesXREG0<-colnames(XREG) nameLeftSlopeZero<-setdiff(all.vars(seg.Z), names(coef(obj))) #in questo modo riconosce che sin(x*pi) NON e' x, ad esempio. namesXREG0<-setdiff(namesXREG0, nameLeftSlopeZero) id.duplic<-match(all.vars(formula(obj)),all.vars(seg.Z),nomatch=0)>0 if(any(id.duplic)) { new.mf<-mf[,all.vars(formula(obj))[id.duplic],drop=FALSE] new.XREGseg<-data.matrix(new.mf) XREG<-cbind(XREG,new.XREGseg) } id.n.Seg<-(ncol(XREG)-n.Seg+1):ncol(XREG) XREGseg<-XREG[,id.n.Seg,drop=FALSE] XREG <- XREG[, match(c("(Intercept)", namesXREG0),colnames(XREG), nomatch = 0), drop = FALSE] XREG<-XREG[,unique(colnames(XREG)), drop=FALSE] n <- nrow(XREG) #browser() Z<-lapply(apply(XREGseg,2,list),unlist) #prende anche i nomi! name.Z <- names(Z) <- colnames(XREGseg) if(length(Z)==1 && is.vector(psi) && (is.numeric(psi)||is.na(psi))){ psi <- list(as.numeric(psi)) names(psi)<-name.Z } if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) || is.null(names(psi))) stop("'psi' or 'npsi' have to be *named* when there are multiple stepmented variables") id.nomiZpsi <- match(names(Z), names(psi)) if ((length(Z)!=length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of 'seg.Z' and 'psi' do not match") nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] if(id.npsi){ for(i in 1:length(psi)) { K<-length(psi[[i]]) if(any(is.na(psi[[i]]))) psi[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) } } else { for(i in 1:length(psi)) { if(any(is.na(psi[[i]]))) psi[[i]]<- (min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) } } #########==================== SE PSI FIXED id.psi.fixed <- FALSE if(!is.null(fixed.psi)){ id.psi.fixed <- TRUE if(is.numeric(fixed.psi) && n.Seg==1) { fixed.psi<-list(fixed.psi) names(fixed.psi)<-all.vars(seg.Z) } if(is.list(fixed.psi)) { if(!(names(fixed.psi) %in% all.vars(seg.Z))) stop("names(fixed.psi) is not a subset of variables in 'seg.Z' ") } else { stop(" 'fixed.psi' has to be a named list ") } fixed.psi<-lapply(fixed.psi, sort) Zfixed<-matrix(unlist(mapply(function(x,y)rep(x,y),Z[names(fixed.psi)], sapply(fixed.psi, length), SIMPLIFY = TRUE)), nrow=n) n.fixed.psi<-sapply(fixed.psi, length) rip.nomi <- rep( names(fixed.psi), n.fixed.psi) rip.numeri <- unlist(lapply(n.fixed.psi, function(.x) 1:.x)) colnames(Zfixed) <- paste("U", rip.numeri,".fixed.",rip.nomi, sep="") PSI <- matrix(unlist(fixed.psi), ncol=ncol(Zfixed), nrow=n, byrow = TRUE) fixedU<-(Zfixed-PSI)*(Zfixed>PSI) XREG<-cbind(XREG, fixedU) } #########====================END SE PSI FIXED initial.psi<-psi a <- sapply(psi, length) #n. di psi per ogni covariate #per evitare che durante il processo iterativo i psi non siano ordinati id.psi.group <- rep(1:length(a), times = a) #identificativo di apparteneza alla variabile Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n) psi <- unlist(psi) #se psi e' numerico, la seguente linea restituisce i valori ordinati all'interno della variabile.. psi<-unlist(tapply(psi,id.psi.group,sort)) k <- ncol(Z) PSI <- matrix(rep(psi, rep(n, k)), ncol = k) #controllo se psi e' ammissibile.. c1 <- apply((Z <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Z >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") colnames(Z) <- nomiZ <- rep(nome, times = a) ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], function(.x) {1:.x}))) #browser() nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ, sep = ".") initial <- psi obj0 <- obj dev0 <-sum(ww*obj$residuals^2) list.obj <- list(obj) nomiOK<-nomiU # invXtX<-if(!is.null(obj$qr)) chol2inv(qr.R(obj$qr)) else NULL #(XtX)^{-1} # Xty<-crossprod(XREG,y) # opz<-list(toll=toll,h=h, stop.if.error=stop.if.error, dev0=dev0, visual=visual, it.max=it.max, # nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits,invXtX=invXtX, Xty=Xty, # conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, fc=fc) x.lin <-XREG rangeZ <- apply(Z, 2, range) #browser() plin<-ncol(x.lin) #if(!is.list(psi)) psi<-list(psi) #P <- length(psi) #n. variabili con breakpoints #npsii <- sapply(psi, length) #n di psi for each covariate P<-n.Seg npsii<-a npsi<- sum(npsii) #Xtrue<-Z #psi0 <- unlist(psi) #PSI<- matrix(psi0, n, npsi, byrow=TRUE) #if(ncol(x)!=P) stop("errore") #Xtrue<-toMatrix(x, npsii) #browser() if(it.max == 0) { U <- (Z>PSI) colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") nomiU <- paste("U", colnames(U), sep = "") #for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) for(i in 1:ncol(U)) mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] Fo <- update.formula(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) obj <- update(obj, formula = Fo, evaluate=FALSE, data=mfExt) #data = mf, if(!is.null(obj[["subset"]])) obj[["subset"]]<-NULL obj<-eval(obj, envir=mfExt) #if (model) obj$model <-mf #obj$model <- data.frame(as.list(KK)) psi <- cbind(psi, psi, 0) rownames(psi) <- paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") colnames(psi) <- c("Initial", "Est.", "St.Err") obj$psi <- psi return(obj) } c1 <- apply((Z <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Z >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") if(is.null(alpha)) alpha<- max(.05, 1/length(y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) #browser() opz<-list(toll=tol, dev0=dev0, display=display, it.max=it.max, agg=agg, digits=digits, rangeZ=rangeZ, usestepreg=FALSE, id.psi.group=id.psi.group, h=h, limZ=NULL, #nomiOK=nomiOK, , visualBoot=visualBoot, invXtX=invXtX, Xty=Xty, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, npsii=npsii, seed=control$seed, fit.psi0=control$fit.psi0) # ################################################################################# # #### jump.fit(y, XREG=x.lin, Z=Xtrue, PSI, w=ww, offs, opz, return.all.sol=FALSE) # ################################################################################# if(n.boot<=0){ obj<- step.lm.fit(y, x.lin, Z, PSI, ww, offs, opz, return.all.sol=FALSE) } else { #browser() #if("seed" %in% names(control)) set.seed(control$seed) obj<-step.lm.fit.boot(y, x.lin, Z, PSI, ww, offs, opz, n.boot, break.boot=break.boot) seed<- obj$seed } # if(!is.list(obj)){ # warning("No breakpoint estimated", call. = FALSE) # return(obj0) # } #browser() id.warn<-obj$id.warn it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart #i beta.c corripondono ai psi NON ordinati!!! # ##Nelle funzioni step i beta.c NON servono. Righe sotto commentate l'8/3/24 #beta.c<- obj$beta.c #beta.c<-unlist(tapply(psi, id.psi.group, function(.x)beta.c[order(.x)])) #unlist(lapply(unique(id.psi.group), function(.x) beta.c[id.psi.group==.x][order(psi[id.psi.group==.x])])) psi<-unlist(tapply(psi, id.psi.group, sort)) Z0<-apply(Z,2,sort) psi.rounded<-sapply(1:npsi, function(j) Z0[sum(Z0[,j] INF DEN <- abs(Z - PSI.mid) DEN <- apply(DEN, 2, function(.x) pmax(.x, sort(.x)[2]/2)) #pmax(.x, diff(range(.x))/1000)) #xx=Xtrue - PSI.mid #ss=n^(-.8) #den <- -xx+2*xx*pnorm(xx/ss)+2*ss*dnorm(xx/ss) #.05*log(cosh((x-.5)/.05))) V <- (1/(2 * DEN)) colnames(V)<-nomiV U <- (Z * V + 1/2) colnames(U)<-nomiU Vxb <- -V #* rep(-beta.c, each = nrow(V)) nomiVxb <- gsub("V", "psi", nomiV) nnomi <- c(nomiU, nomiVxb) #browser() for(i in 1:ncol(U)) { mfExt[nomiU[i]]<-mf[nomiU[i]] <- U[,i] mfExt[nomiVxb[i]]<-mf[nomiVxb[i]] <- Vxb[,i] } Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) objF <- update(obj0, formula = Fo, evaluate=FALSE, data = mfExt) #eliminiamo subset, perche' se e' del tipo subset=x>min(x) allora continuerebbe a togliere 1 osservazione if(!is.null(objF[["subset"]])) objF[["subset"]]<-NULL objF<-eval(objF, envir=mfExt) objF$offset<- obj0$offset objW<-objF #browser() #se1=predict.lm(objF, se.fit=TRUE) #ff<-1.934+1.61*(x>.605) #matplot(x, cbind(ff, ff-2*se$se.fit, ff+2*se$se.fit), type="l") #controllo se qualche coeff e' NA.. isNAcoef<-any(is.na(objF$coefficients)) #browser() if (isNAcoef) { nameNA.psi <- names(objF$coefficients)[which(is.na(objF$coefficients))] nameNA.U <- gsub("psi", "U", nameNA.psi) if (fix.npsi) { cat("breakpoint estimate(s):", as.vector(psi), "\n") stop("coef ", nameNA.psi, " is NA: breakpoint(s) at the boundary or too close together", call. = FALSE) } else { warning("some estimate is NA (too many breakpoints?): removing ", length(nameNA.psi), " jump-point(s)", call. = FALSE) Fo <- update(Fo, paste(".~ .-", nameNA.U, "-", nameNA.psi)) objF <- update(obj0, formula = Fo, evaluate = TRUE, data = mfExt) if (!is.null(objF[["subset"]])) objF[["subset"]] <- NULL #objF$offset <- obj0$offset idNA.psi <- match(nameNA.psi, nomiVxb) nomiVxb <- setdiff(nomiVxb, nameNA.psi) nomiU <- setdiff(nomiU, nameNA.U) Z <- Z[, -idNA.psi, drop = FALSE] PSI.mid<- PSI.mid[, -idNA.psi, drop = FALSE] id.psi.group <- id.psi.group[-idNA.psi] psi <- psi[-idNA.psi] psi.rounded <- psi.rounded[, -idNA.psi, drop = FALSE] } } #organizziamo i risultati da restituire per psi... colnames(psi.rounded)<-names(psi)<-nomiVxb rownames(psi.rounded)<-c("inf [","sup (") #browser() ris.psi<-matrix(NA,length(psi), 3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi #ris.psi[,3]<-sqrt(vv) a<-tapply(id.psi.group, id.psi.group, length) #NB "a" deve essere un vettore che si appatta con "initial.psi" per ottnetere "initial" sotto... Se una variabile alla fine risulta # senza breakpoint questo non avviene e ci sono problemi nella formazione di "initial". Allora costruisco a.ok a.ok<-NULL nomiFINALI<-unique(nomiZ) for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } #initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) if(length(psi)!=length(initial.psi)){ ris.psi[,1]<- NA } else { initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) ris.psi[,1]<-initial #if(stop.if.error) ris.psi[,1]<-initial } objF$psi <- ris.psi objF$psi.rounded <- psi.rounded #objW<-objF #stima il modello "vero" (non-working) U<- (Z > PSI.mid) colnames(U)<-nomiU X <- cbind(x.lin, U) objF$objW<- objW objF$obj.ok<-mylm(X, y, w=ww, offs=offs) #coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b)) objF$fitted.values<-objF$obj.ok$fitted.values objF$residuals<- objF$obj.ok$residuals objF$coefficients[names(objF$obj.ok$coefficients)] <- objF$obj.ok$coefficients #objF$coefficients[1:length(objF$obj.ok$coefficients)] <- objF$obj.ok$coefficients objF$coefficients[nomiVxb] <-psi.rounded[1,] objF$nameUV <- list(U = drop(nomiU), V = nomiV, Z = name.Z) #Z = name.Z objF$rangeZ<-obj$rangeZ objF$Z<-Z[,unique(name.Z),drop=FALSE] objF$call <- match.call() objF$orig.call<-orig.call objF$psi.history <- psi.values objF$it <- it objF$epsilon <- obj$epsilon objF$id.warn <- id.warn if(n.boot>0) objF$seed <- seed class(objF) <- c("stepmented", class(obj0)) #Un effetto aggiuntivo.. Z.in.obj<-intersect(all.vars(Fo0), all.vars(seg.Z)) if(length(Z.in.obj)>0){ tt<-terms(Fo0)#, specials=Z.in.obj) #id<-match(Z.in.obj, all.vars(Fo0))-1 #1 e' per la risposta.. id<-match(Z.in.obj, intersect(all.vars(Fo0), names(mf)))-1 nome<-attr(tt,"term.labels")[id] Fo.ok<-as.formula(paste("~0", nome, sep="+")) f.x<-matrix(NA, 150, ncol(objF$Z[,Z.in.obj,drop=FALSE])) #prima era nrow(objF$Z) invece che 100 for(j in 1:length(Z.in.obj)){ idPsi <- nomiVxb[endsWith(nomiVxb, paste(".", Z.in.obj[j], sep = ""))] #psi <- coef(objF)[idPsi] dd<-data.frame(seq(min(objF$Z[,Z.in.obj[j]]), max(objF$Z[,Z.in.obj[j]]), l=nrow(f.x))) names(dd)<- Z.in.obj[j] M<-model.matrix(Fo.ok, data=dd) f.x[,j]<-M%*% coef(objF)[colnames(M)] } colnames(f.x)<-Z.in.obj objF$f.x<-f.x } objF$psi<- objF$psi[,-1,drop=FALSE] #rimuovi la colonna Initial if(var.psi){ Cov <- vcov.stepmented(objF, k=NULL) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) objF$psi[,"St.Err"]<-sqrt(vv) objF$vcov<- Cov } #Cov[nomiVxb, ]<- Cov[, nomiVxb] <- 0 # var.Tay<-function(est1,est2,v1,v2,v12){ # r<- est1/est2 # vv<-(v1+v2*r^2-2*r*v12)/est2^2 # vv} # # varPsi<- rep(NA, length(nomiU)) # for(j in 1:length(nomiU)){ # num<-objF$coefficients[nomiVxb[j]] # den<-objF$coefficients[nomiU[j]] # v.g <-Cov[nomiVxb[j],nomiVxb[j]] # v.b<- Cov[nomiU[j],nomiU[j]] # cov.g.b <- Cov[nomiVxb[j],nomiU[j]] # #if(is.null(rho)) # rho<-mean(Xtrue[, nomiZ[j] ,drop=TRUE]=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi<-ifelse(abs(qpsi-.5)<.1, alpha, qpsi) alpha<-1-alpha est.psi0<-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) } ########################### 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(seg.num.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], opz.boot)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(seg.num.fit(yy, XREG, Z.orig, PSI, weights, opz.boot)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } #if(k==7) browser() ### se est.psi.boot non e' cambiato (e puoi vederlo da all.est.psi.boot), allora cambialo! PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o<-try(suppressWarnings(seg.num.fit(y, XREG, Z.orig, PSI, w, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow = TRUE) o<-try(suppressWarnings(seg.num.fit(y, XREG, Z, PSI1, w, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) {o0<-o; k.psi.change<- k} est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if(visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("boot sample = ", sprintf("%2.0f",k), #" opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " opt.dev = ", sprintf("%1.5f", as.numeric(strsplit(format(o0$SumSquares.no.gap, scientific=TRUE), "e")[[1]][1])), " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #conta i valori ss uguali.. cosi puoi fermarti prima.. asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } #id.uguali<-(round(diff(all.selected.ss[c(k-1,k-2)]),6)==0)+id.uguali } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) # SS.ok<-min(all.selected.ss) # id.accept<- ((abs(all.ss-SS.ok)/SS.ok )<= 0.05) # psi.mean<-apply(all.est.psi[id.accept,,drop=FALSE], 2, mean) # est.psi0<-psi.mean # devi ristimare il modello con psi.mean # PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) # o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ #quando vengono restituiti psi troppo vicini e l'SE non si puo' calcolare, possiamo distanziarli.. #Pero' il processo deve essere esteso nel caso in cui ci sono 3 psi vicini.. min.n <- opz$min.n-1 if(min.n>1){ min1<- function(x, k=min.n-1){ for(i in 1:k) x<-x[-which.min(x)] min(x) } max1<-function(x,k=min.n-1){ for(i in 1:k) x<-x[-which.max(x)] max(x) } } else { min1<-min max1<-max } npsi <- tapply(opz$id.psi.group, opz$id.psi.group, length) nomiAll <- colnames(rangeZ) #rep(opz$nomiSeg, npsi) nomiSeg <- unique(nomiAll) newPsi<-vector("list", length(npsi) ) for(.j in 1:length(npsi)){ psi.j <- sort(est.psi0[opz$id.psi.group==.j]) #psi della stessa variabile segmented id <- nomiSeg[.j]==nomiAll Z.ok <- unique(Z[, id, drop=FALSE][,1]) m.j <- min(limZ[1,id]) M.j <- max(limZ[2,id]) #h=1/1.05 for(.k in 1:length(psi.j)){ id.group<-cut(Z.ok, c(m.j-10^8, psi.j, M.j+10^8), labels=FALSE) n.j<-tabulate(id.group)#<=min.n #per ogni psi calcola il min e il max dei segmenti prima e dopo psi. #se questi segmenti hanno min.n osservazioni considera u min e max fittizzi per evitare che il nuovo psi #modificato porti a segmenti con bassa numerosita'.. M.j.k<- if(n.j[.k]>0) max1(Z.ok[id.group==.k]) -10^6*(n.j[.k]<=min.n) else -10^6*(n.j[.k]<=min.n) m.j.k<- if(n.j[.k+1]>0) min1(Z.ok[id.group==.k+1])+10^6*(n.j[.k+1]<=min.n) else 10^6*(n.j[.k]<=min.n) psi.j[.k]<- psi.j[.k] + ifelse(abs(M.j.k-psi.j[.k])0) #stop.if. If type="bic" or "aic", the search of number of break can be stopped when the last 'stop.if' fits provide higher aic/bic value #th: When the distance between 2 estimated breakpoints is <=th, only one is retained. Default is th = drop(diff(rangeZ)/100) #mettere l'opzione di gdl=n.changepoint e poi (df*log(n))^alpha dove alpha=1.01 (vedi...) #sel1() si usa per G>1 #=== if(stop.if<=0) stop("'stop.if' should be an integer (at least 4, probably)") stop.if<-ceiling(stop.if) f<-function(x, soglia){ #restituisce l'indice del vettore x t.c. il valore e' il piu' piccolo #tra quelli che sono minori della soglia id <- (x<=soglia) xx <- x[id] ind <- (1:length(x))[id] id.ok <- which.min(xx) ind[id.ok] } #=== sel1 <- function(y, x, G, Kmax, type="bic",th=th,refit=FALSE,check.dslope=TRUE, msg=TRUE, bonferroni=FALSE, olm0, control){ #, a=1 #BIC<-function(obj){ # n <-length(obj$residuals) # r <- n*log(sum(obj$residuals^2)/n) + (n-obj$df.residual)*(log(n)^a) #- 1 # r #} ICname<- if(type=="bic") "BIC" else "AIC" control1<-control control1$n.boot = 0 control1$tol <- .001 # BIC.f<-if(type=="bic") BIC else AIC #--- drop.close <-function(all.psi, th){ if(length(all.psi)==1) return(all.psi) all.psi <- c(m1, sort(all.psi[!is.na(all.psi)]), m2) id<- which(diff(all.psi)<=th)[1] while(!is.na(id) && length(id)>0){ all.psi <- all.psi[-(id+1)] id<- which(diff(all.psi)<=th)[1] } #all.psi<- all.psi[-c(1, length(all.psi))] all.psi <- setdiff(all.psi,c(m1, m2)) all.psi } #start.. n<-length(y) K1<-ceiling(Kmax/G) #browser() #x<-1:n #n1<-ceiling(n/G) #cutvalues <- c(seq(1,n,by=n1),n+1) cutvalues <- c(min(x), cumsum(rep(sum(range(x))/G, G))) id<-cut(x, cutvalues, right=FALSE, labels=FALSE) r<-vector("list",G) #browser() for(i in 1:G){ #if(i==4) browser() yy<-y[id==i] xx<-x[id==i] olm<-lm(yy~xx) #, data=d) .a <- capture.output(r[[i]] <-try(suppressWarnings(selgmented(olm, ~xx, type=type, Kmax=K1, refit=FALSE, msg=FALSE, G=1, control=control, check.dslope=FALSE) ), silent=TRUE)) if(inherits(r[[i]],"try-error")) r[[i]]<- olm n.psi<- if(is.null(r[[i]]$psi) || any(is.na(r[[i]]$psi[,"Est."]))) 0 else nrow(r[[i]]$psi) n.psi<- if(n.psi<10) paste("", n.psi) else paste(n.psi) if(msg) { cat("\n##### subset ", paste(i, ": ...",sep="")) cat(" ",n.psi,"selected breakpoints \n") } } #browser() all.psi <-unlist(sapply(r, function(.x) .x$psi[,"Est."])) psi.fromG <- drop.close(all.psi,th) psi.removed<- setdiff(all.psi, psi.fromG) psi.removed<- psi.removed[!is.na(psi.removed)] if(length(psi.removed)>=1 && msg){ cat(paste("\n", length(psi.removed), "breakpoint(s) removed for closeness (see argument 'th')\n")) } all.psi <- psi.fromG olm <- olm0 #lm(y~x) newpsi <- cutvalues[-c(1, length(cutvalues))] if(msg){ cat(paste("\n => Assessing the", length(newpsi), "cutoff(s) as breakpoint(s). Computing the", ICname, "values.. \n")) } all.psi<-sort(c(all.psi, newpsi)) #tutti i psi #elimina quelli vicini.. psi.withCut <- all.psi <-drop.close(all.psi,th) #browser() ######## ORA stima modelli con un numero decrescente di psi... tvalueU <- psi0 <- all.psi list.psi <- list.fit<-NULL #browser() #se nella riduzione del numero di psi il modello non viene stimato, allora piuttosto che passargli i valori di psi # da cui ha arbitrariamente eliminato il primo, passagli il numero di psi.. #fit.ok<-TRUE idbreak<-FALSE conv <- bicVa <- NULL #browser() if(msg) cat(" no. breakpoints: ", length(all.psi)) control2<-control1 control2$n.boot=6 while(length(tvalueU)>=1){ .a <- capture.output(os0 <- try(suppressWarnings( segmented(olm, ~x, psi=all.psi, control=control2)), silent=TRUE)) if(!inherits(os0, "segmented")) { .a <- capture.output(os0 <- try(suppressWarnings( segmented(olm, ~x, npsi=length(all.psi), control=control1)), silent=TRUE)) } if(inherits(os0, "segmented")) { conv[length(conv)+1]<- 1 bicVa[length(bicVa)+1] <- BIC.f(os0) list.fit[[length(list.fit)+1]] <- os0 list.psi[[length(list.psi)+1]]<- os0$psi[,"Est."] if(length(os0$psi[,"Est."])==1) break tvalueU<- abs(summary(os0)$coefficients[os0$nameUV$U,3]) idU <- which.min(abs(tvalueU)) #man mano che rimuovi i psi, se il t della diffSlope del psi che stai rimuovendo e' > soglia, fermati! ## allora non continuare a toglierli.. #===>>>NOOOOO!! puo' accadere poi che toglie qualche psi, si ferma perche' tutti i t sono grandi, ma il bic e' piu' basso #in un modello precedente.. Quindi alla fine il criterio non e' ne bic e neanche tutti i t significativi. #Allora, coerentemente con il caso di G=1, la "verifica delle slope nonsignif, va fatta sul modello #selezionato con il BIC!! Quindi commentiamo le righe di sotto (il idbreak si potrebbe pure eliminare) #e il controllo checkdslope lo facciamo dopo sul modello selezionato dal bic. #browser() #if(check.dslope){ # soglia <- if(!bonferroni) qnorm(1-alpha/2) else qnorm(1-alpha/(2* length(os0$nameUV$U))) # if(abs(tvalueU[idU])>soglia) {idbreak=TRUE; break} #} all.psi <- os0$psi[-idU,"Est."] } else { conv[length(conv)+1]<- 0 list.psi[[length(list.psi)+1]]<-list.fit[[length(list.fit)+1]] <- NA tvalueU<-all.psi<-all.psi[-1] bicVa[length(bicVa)+1] <- if(is.numeric(bicVa[length(bicVa)])) {bicVa[length(bicVa)]+1} else {1e4} } if(msg) cat(" ..", length(all.psi)) #browser() if(length(bicVa)>stop.if && all( na.omit(rev(diff(bicVa))[1:stop.if])>0) && (sum(na.omit(rev(conv)[1:stop.if]))>0)) {idbreak<-TRUE; break} #na.omit() sta per eliminare gli NA che si creano se diff(bicVa) ha dimensione < stop.if #la sum(na.omit(rev(conv)[1:stop.if]))>0 indica che il controllo sui valori del bic va fatto solo #se gli ultimi fit non sono tutti insuccessi. } if(msg) cat(" .. 0\n") bicV <- sapply(list.fit, function(.x) if(inherits(.x, "segmented")) BIC.f(.x) else NA) if(length(bicV)!=length(bicVa)) stop("Errore inatteso 1") bicV<- bicVa #browser() if(idbreak){ if(msg) cat(paste(" =>", length(psi.withCut)-length(bicVa), "unevaluated model(s) due to", stop.if, "increasing A/BIC value(s)..\n")) #se si ferma prima, significa che sono stati valutati length(bicV) modelli con numero di psi da #"length(psi.withCut)" fino a "length(psi.withCut)-length(bicV)+1" #length(psi.withCut)-length(bicV)+1 r <- r0 <- list.fit[[which.min(bicV)]] npsi.ok <- (length(psi.withCut):(length(psi.withCut)-length(bicV)+1))[which.min(bicV)] nameBIC<-paste((length(psi.withCut):(length(psi.withCut)-length(bicV)+1))) } else { if(length(bicV)!= length(psi.withCut)) stop("Errore nella dim!!") list.fit[[length(list.fit)+1]] <- olm bicV<- c(bicV, BIC.f(olm)) r <- r0 <- list.fit[[which.min(bicV)]] npsi.ok <- ((length(psi.withCut)):0)[which.min(bicV)] if(npsi.ok!=length(r$psi[,"Est."])) stop("Unexpected error..") nameBIC<-paste((length(psi.withCut)):0) } names(bicV)<-nameBIC #DUBBIO: il controllo ed eliminazione dei psi lo facciamo all'interno del "while(length(tvalueU)>1)"? # ed inoltre al modello selezionato un ultimo fit con boot bisognerebbe darlo... #browser() if(inherits(r, "segmented")){ #ALTRO CONTROLLO SULLA VICINANZA DEI psi.. all.psi <- sort(r$psi[,"Est."]) #all.psi <- drop.close(all.psi,th) #r$psi<-all.psi #browser() cont1 <- length(all.psi)>0 && (length(all.psi)!=length(drop.close(all.psi,th))) while(cont1){ start.psi<-drop.close(all.psi,th) r0$call$psi<- start.psi .a <- capture.output(r <- suppressWarnings(try(update(r0))), type="message") all.psi<- if(inherits(r, "segmented")) r$psi[,"Est."] else start.psi[-1] cont1<- !(inherits(r, "segmented") && (length(all.psi)==length(drop.close(all.psi,th)))) cont1 <- cont1 && length(all.psi)>0 } if(inherits(r, "segmented")){ # all.psi<- r$psi[,"Est."] # soglia <- if(!bonferroni) qnorm(1-alpha/2) else qnorm(1-alpha/(2* length(r$nameUV$U)) ) # rm.id <- which(abs(summary(r)$coefficients[r$nameUV$U, 3]) <= soglia) # #=============== # if(check.dslope && length(rm.id)>0){ # # se devi controllare le slopeDiff # # #ALTRO CONTROLLO SULLA SIGNIF DELLE SLOPE-DIFF # #Non puoi eliminarli tutti assieme, perche' una volta che il piu' piccolo e stato eliminato #gli altri possono cambiare.. #browser() rm.after.check <- 0 all.psi<- r$psi[,"Est."] #browser() if(check.dslope){ soglia <- if(!bonferroni) qnorm(1-alpha/2) else qnorm(1-alpha/(2* length(r$nameUV$U))) tU <- abs(summary(r)$coefficients[r$nameUV$U, 3]) rm.id <- f(tU, soglia) while(length(rm.id)>0){ rm.after.check <- rm.after.check+1 all.psi <- all.psi[-rm.id] if(length(all.psi)<=0) break r0$call$psi=quote(all.psi) .a <- capture.output(r <- suppressWarnings(try(update(r0), silent=TRUE))) #senza boot if(!inherits(r,"segmented")){ #facciamo un altro tentativo.. #r0$call$psi=all.psi #control$alpha <- .005 r0$call$control<-control #con boot .a <- capture.output(r<-suppressWarnings(try(r<-update(r0), silent=TRUE))) } if(inherits(r,"segmented")){ tU <- abs(summary(r)$coefficients[r$nameUV$U, 3]) #rm.id <- which.min(tU[tU<=soglia]) rm.id <- f(tU, soglia) all.psi<-r$psi[,"Est."] } else { rm.id<-1 } } if(length(all.psi)<=0 || is.null(r$psi) ) { if(msg) warning("All found psi had a non-significant slope diff and have been removed", call.=TRUE, immediate.=TRUE) n.psi.ok<-0 r<- olm } else { if(msg) cat(paste(" => ", rm.after.check, " breakpoint(s) removed due to 'small' slope difference\n", sep="")) if(refit){ all.psi<- r$psi[,"Est."] r$call$psi=quote(all.psi) r$call$control<- quote(control) .a <- capture.output(r <- try(suppressWarnings(update(r0)))) n.psi.ok <- if(is.null(r$psi)) 0 else nrow(r$psi) } else { all.psi<- r$psi[,"Est."] n.psi.ok <- nrow(r$psi) } } } else {#end if(check.dslope) if(refit){ all.psi<- r$psi[,"Est."] r$call$psi=quote(all.psi) r$call$control<- quote(control) .a <- capture.output(r <- try(suppressWarnings(update(r)))) n.psi.ok <- if(is.null(r$psi)) 0 else nrow(r$psi) } else { all.psi<- r$psi[,"Est."] n.psi.ok <- nrow(r$psi) } } } else { #stop("Errore inatteso 1") r<-olm n.psi.ok<-0 } } else { #stop("Errore inatteso 2") r<-olm n.psi.ok<-0 } if(msg) cat("\n####### Overall: ... ", n.psi.ok, "selected breakpoint(s) \n\n") #cat(" ##### Overall ", nrow(r$psi),"selected breakpoints \n") #if(!is.list(r)) r<- olm bic.values=rbind(as.numeric(names(bicV)), bicV) rownames(bic.values)<-c("no. breakpoints", ICname) r$selection.psi <-list(bic.values=bic.values, npsi=n.psi.ok, cutvalues=cutvalues) ##cutvalues #including the extremes if(refit) r$selection.psi$psi.before.refit <-all.psi if(plot.ic) plot(t(bic.values), type="o"); points(n.psi.ok, min(bic.values[2,]), pch=19, cex=1.1) r #r<-segmented(olm, ~x, psi=r$psi[,"Est."], control=seg.control(n.boot=10,alpha=.01)) #fix.npsi=FALSE #r } #fine sel1() #===================================================================== # BIC<-function(obj, a=1){ # #Se a=1 questo e' il BIC classico (a meno di una costante) # n <-length(obj$residuals) # r <- n*log(sum(obj$residuals^2)/n) + (n-obj$df.residual)*(log(n)^a) #- 1 # r # } #===================================================================== type<-match.arg(type) if(!type%in%c("bic","aic") && Kmax!=2) stop("Kmax>2 is not (yet?) allowed with hypothesis testing procedures. Set type='bic' or 'aic'", call.=FALSE) if(!type%in%c("bic","aic") && G>1) stop("G>1 is allowed only with type='bic' or 'aic' ", call.=FALSE) #===================================================================== if(is.numeric(olm)){ y<-olm if(missing(seg.Z)){ Z<-x<- 1:length(y) } else { nomeX<-all.vars(seg.Z) if(length(nomeX)==1) { Z<- eval(parse(text=nomeX)) } else { stop("a single segmented variable should be specified in 'seg.Z' ") } } olm <- lm(y~x) } else { if(!inherits(olm,"lm")) stop("'olm' does not appear a (g)lm fit") y<- model.response(model.frame(olm)) if(is.matrix(y)){ nomeY<-colnames(y) } else { nomeY<- all.vars(formula(olm))[1] } #browser() if(missing(seg.Z)){ nomeX <- setdiff(all.vars(formula(olm)), nomeY) if(length(nomeX)==0 || length(nomeX)>1 || any(is.na(nomeX))) stop("I cannot determine the segmented variable") seg.Z<- as.formula(paste("~", nomeX )) Z <- olm$model[[nomeX]] if(!is.numeric(Z)) stop("The variable in the starting model does not appear to be numeric") } else { if(length(all.vars(seg.Z))>1) stop("Multiple variables are not allowed in seg.Z") nomeX<-all.vars(seg.Z) Z <- if(nomeX%in%all.vars(formula(olm))) olm$model[[nomeX]] else eval(parse(text=nomeX)) } } m1 <-min(Z) m2 <-max(Z) if(is.null(th)) th <- diff(range(Z))/100 if(G==1){ build.mf<-function(o, data=NULL){ #returns the dataframe including the possibly untransformed variables, #including weight and offset fo<-formula(o) if(!is.null(o$weights)) fo<-update.formula(fo,paste("~.+",all.vars(o$call$weights), sep="")) if(!is.null(o$call$offset)) fo<-update.formula(fo,paste("~.+",all.vars(o$call$offset), sep="")) if(!is.null(o$call$subset)) fo<-update.formula(fo,paste("~.+",all.vars(o$call$subset), sep="")) #o$call$formula<-fo if(is.null(o$call$data)) { R<-get_all_vars(fo) } else { R<-get_all_vars(fo, data=eval(o$call$data)) } R } #browser() if(type%in%c("bic","aic")){ control1<-control control1$n.boot = 0 control1$tol <- .001 #default e' .00001 control1$alpha<-.01 #se lo aumenti puo' non funzionare bene se ci sono molti psi da selezionare.. ICname<- if(type=="bic") "BIC" else "AIC" BIC.f<-if(type=="bic") BIC else AIC bicM0 <- BIC.f(olm) Kmax <- min(floor((olm$df.residual-1)/2), Kmax) npsi<-1:Kmax startpsi<-vector("list", length(npsi)) conv<-bic.values<- rep(NA, length(npsi)) if(!is.null(olm$call$data)) assign(paste(olm$call$data), eval(olm$call$data, envir=parent.frame() )) npsiVeri<-0 #fit with 1 breakpoint .a<-capture.output(os<- suppressWarnings(try(segmented(olm, seg.Z, npsi=1, control=control1), silent=TRUE))) ris<- NULL ris[[1]] <- os #<- suppressWarnings(try(segmented(olm, seg.Z, npsi=1, control=control1), silent=TRUE)) #if fails try boot restating if(inherits(os, "try-error")) { .a <- capture.output(os<- suppressWarnings(try(segmented(olm, seg.Z, npsi=1, control=control), silent=TRUE))) ris[[1]]<- os } if(inherits(os, "segmented")){ #if(is.null(th)) th <- drop(diff(os$rangeZ)/100) bic.values[1]<- BIC.f(os) Z<- os$model[,os$nameUV$Z] estpsi <- os$psi[,"Est."] M<-matrix(c(m1 ,rep(estpsi, each=2), m2), ncol=2, nrow=length(estpsi)+1, byrow=TRUE) psi0 <- sum(M[which.max(apply(M,1,diff)),])/2 startpsi[[1]] <- sort(c(estpsi, psi0)) conv[1] <- 1 npsiVeri[length(npsiVeri)+1]<- length(estpsi) } else { estpsi <- (m1+m2)/2 M<-matrix(c(m1 ,rep(estpsi, each=2), m2), ncol=2, nrow=length(estpsi)+1, byrow=TRUE) #psi0 <- sum(M[which.max(apply(M,1,diff)),])/2 startpsi[[1]] <- estpsi bic.values[1]<- BIC.f(olm)+1 conv[1] <- 0 npsiVeri[length(npsiVeri)+1]<- 1 } i=1 #ponilo =1 if(Kmax>=2){ if(msg) { flush.console() cat(paste("No. of breakpoints: ")) } earlyStop<- FALSE #========================================================================================== #= inizio for #browser() for(i in 2:Kmax){ #source("C:/dati/lavori/segmented/segIntermedio/segmented/R/selgmented.R") #if(i==3) browser() .a <- capture.output(os<-suppressWarnings(try(segmented(olm, seg.Z, psi=startpsi[[i-1]], control=control1), silent=TRUE))) if(msg) { flush.console() cat(paste(i,".. ")) } if(inherits(os, "segmented")) { conv[i]<-1 estpsi <- os$psi[,"Est."] bic.values[i]<- BIC.f(os) #-2*logLik(ris[[i]]))+ edf*log(n)*Cn ris[[length(ris)+1]]<- os npsiVeri[length(npsiVeri)+1]<-length(estpsi) id<- which(diff(estpsi)<=th)+1 if(length(id)>0){ #se ci sono psi troppo vicini.. #elimina quelli "vicini" aggiungine altri in modo da stimare un altro modello con lo stesso numero di psi #Quindi alla fine dovresti avere 2 o piu' bic per uno stesso numero di breakpoints estpsi <- estpsi[-id] M<-matrix(c(m1 ,rep(estpsi, each=2), m2), ncol=2, nrow=length(estpsi)+1, byrow=TRUE) diffpsi <- apply(M,1,diff) psi0<-NULL for(j in 1:(length(id))) { psi0[length(psi0)+1] <- sum(M[which(diffpsi==rev(sort(diffpsi))[j]),])/2 } startpsi[[i]] <- sort(c(estpsi, psi0)) } else { #aggiungi uno starting psi M<- matrix(c(m1 ,rep(estpsi, each=2), m2), ncol=2, nrow=length(estpsi)+1, byrow=TRUE) diffpsi <- apply(M,1,diff) id.max.diffpsi <- which.max(diffpsi) psi0 <- sum(M[id.max.diffpsi,])/2 startpsi[[i]] <- sort(c(estpsi, psi0)) } } else { #Vuoi provare a ri-stimarlo con il boot rest? #se non e' arrivato a convergenza: # 1) prima prova a cambiare gli starting psi # 2) prova con il boot restart M<-matrix(c(m1 ,rep(startpsi[[i-1]], each=2), m2), ncol=2, nrow=length(startpsi[[i-1]])+1, byrow=TRUE) diffpsi <- apply(M,1,diff) psi0 <- sum(M[which.max(diffpsi),])/2 start.ora<- sort(c(psi0, M[-c(which.min(diffpsi), nrow(M)),2])) startpsi[[i-1]] <- start.ora .a <- capture.output(os<-suppressWarnings(try(segmented(olm, seg.Z, psi=start.ora, control=control1), silent=TRUE))) if(!inherits(os, "segmented")) { #vai con il boot restrat .a <- capture.output(os<-suppressWarnings(try(segmented(olm, seg.Z, psi=start.ora, control=control), silent=TRUE))) } if(inherits(os, "segmented")) { conv[i]<-1 estpsi <- os$psi[,"Est."] bic.values[i]<- BIC.f(os) #-2*logLik(ris[[i]]))+ edf*log(n)*Cn ris[[length(ris)+1]]<- os npsiVeri[length(npsiVeri)+1]<-length(estpsi) #aggiungi uno starting psi M<-matrix(c(m1 ,rep(estpsi, each=2), m2), ncol=2, nrow=length(estpsi)+1, byrow=TRUE) diffpsi <- apply(M,1,diff) id.max.diffpsi <- which.max(diffpsi) psi0 <- sum(M[id.max.diffpsi,])/2 startpsi[[i]] <- sort(c(estpsi, psi0)) } else { #se dopo 3 tentativi non e' arrivato a conv comunque aggiungi uno starting psi, # il bic e' bic.precedente+1 e ris[[i]] sara' NA #control1$alpha<-.005 ris[[length(ris)+1]]<- NA conv[i]<-0 bic.values[i]<- bic.values[i-1]+1 M<-matrix(c(m1 ,rep(startpsi[[i-1]], each=2), m2), ncol=2, nrow=length(startpsi[[i-1]])+1, byrow=TRUE) diffpsi <- apply(M,1,diff) psi0 <- sum(M[which.max(diffpsi),])/2 #aggiunge un psi ai precedenti startpsi[[i]] <- sort(c(startpsi[[i-1]], psi0)) npsiVeri[length(npsiVeri)+1]<- length(startpsi[[i-1]]) } } #if(i==8) browser() #un controllo su bic values.. fermarsi SE gli ultimi K sono NA oppure sono crescenti!! #bic.values<-bic.values[!is.na(bic.values)] #ind1 e' se gli ultimi modelli non sono arrivati a convergenza per cui il bic e' stato aumentato di 1 rispetto al precedente.. #poiche' ci possono essere piu' bic per uno stesso numero di break, ne devi considerare solo uno (il minimo) #altrimenti -se i bic per uno stesso numero di break sono decrescenti- la valutazione del bic crescente e' sballata #if(i==5) browser() bicValuesTest<-tapply(na.omit(c(bicM0,bic.values)), npsiVeri, min) ind1<-(i>=stop.if && all(diff(rev(na.omit(bicValuesTest)))[1:3]==-1)) ind2<-(i>=stop.if&& length(bicValuesTest)>stop.if && all(rev(na.omit(diff(bicValuesTest)))[1:stop.if]>0)) if(ind1 || ind2) {earlyStop<-TRUE;break} } #end in for(2 in Kmax) #browser() #if(msg) cat(paste(" =>", length(psi.withCut)-length(bicVa), "unevaluated model(s) due to", stop.if, "increasing A/BIC value(s)..\n")) if(msg) cat("\n") } else { #se Kmax=1 earlyStop<-FALSE } #npsiVeri <-sapply(ris, function(.x) if(is.list(.x))nrow(.x$psi) else NA ) #[1:i] #npsiVeri <- npsiVeri[!is.na(npsiVeri)] #if(length(npsiVeri)!=max(npsiVeri)) { # id.psi.repl<-which(diff(npsiVeri)==0) # sapply(id.psi.repl, function(.x) which(npsiVeri==.x)) #} bic.valuesOrig <- bic.values #bic.values<- bic.values#[1:i] #bic.values[!is.na(bic.values)] bic.values <- c(bicM0, bic.values) bic.values <- bic.values[1:length(npsiVeri)] names(bic.values)<-npsiVeri n.psi.ok<- npsiVeri[which.min(bic.values)] #browser() if(n.psi.ok==0){ m<-matrix(NA,1,1, dimnames=list(NULL, "Est.")) olm$selection.psi<- list(bic.values=bic.values, npsi=n.psi.ok) olm$psi<-m if(msg){ if(earlyStop) cat(paste("(search truncated at ", i, " breakpoints due to increasing values of ", ICname ,") \n", sep="")) cat(paste("\n",ICname, " to detect no. of breakpoints:\n",sep="")) print(bic.values) cat(paste("\nNo. of selected breakpoints: ", n.psi.ok, " \n")) } bic.values=rbind(npsiVeri, bic.values) rownames(bic.values)<-c("npsi", paste(ICname, "value",sep="")) selection.psi <- list(bic.values=bic.values, npsi=n.psi.ok) if(return.fit) { #browser() olm$selection.psi <- selection.psi return(olm) } else { return(list(selection.psi=selection.psi)) #return(list(bic.values=bic.values, npsi=n.psi.ok)) } } #browser() if(Kmax==n.psi.ok && msg) warning(paste("The best",ICname, "value at the boundary. Increase 'Kmax'?"), call.=FALSE, immediate. = TRUE) id.best<- which.min(bic.values[-1]) #browser() r<- r0 <- ris[[id.best]] #browser() rm.after.check <- 0 all.psi<- r$psi[,"Est."] if(check.dslope){ soglia <- if(!bonferroni) qnorm(1-alpha/2) else qnorm(1-alpha/(2* length(r$nameUV$U))) tU <- abs(summary(r)$coefficients[r$nameUV$U, 3]) #if(length(tU[tU<=soglia])==length(tU)) #anche se tutti i t<= soglia fai comunque la procedura, perche' #riducendo i psi, i tU potrebbero cambiare #rm.id <- which.min(tU[tU<=soglia]) rm.id <- f(tU, soglia) while(length(rm.id)>0){ rm.after.check <- rm.after.check+1 all.psi <- all.psi[-rm.id] if(length(all.psi)<=0) break r0$call$psi=quote(all.psi) .a <- capture.output(r <- suppressWarnings(try(update(r0), silent=TRUE))) #senza boot if(!inherits(r,"segmented")){ #facciamo un altro tentativo.. #r0$call$psi=all.psi #control$alpha <- .005 r0$call$control<-control #con boot .a <- capture.output(r<-suppressWarnings(try(r<-update(r0), silent=TRUE))) } if(inherits(r,"segmented")){ tU <- abs(summary(r)$coefficients[r$nameUV$U, 3]) #rm.id <- which.min(tU[tU<=soglia]) rm.id <- f(tU, soglia) all.psi<-r$psi[,"Est."] } else { rm.id<-1 } } #browser() if(length(all.psi)<=0 || is.null(r$psi) ) { n.psi.ok<-0 r<- olm } else { n.psi.ok <- nrow(r$psi) } } else { #end if(check.dslope) #ATTENZIONE: SE NON HA SELEZIONATO BREAKPOINTS??? n.psi.ok<-length(all.psi) #in realta' gia' c'e' "n.psi.ok" } if(refit && length(all.psi)>0){ r$call$psi<- all.psi #control$alpha <- .005 r$call$control<-quote(control) #con boot r <- update(r) } if(plot.ic) { if(rm.after.check!=0){ warning(paste("Some psi have been removed due to check on the slope difference; the", ICname, "plot could be misleading"), call.=FALSE) } plot(npsiVeri, bic.values, xlab=" No. of breakpoints", ylab=ICname, type="o") points(n.psi.ok, min(bic.values), pch=19, cex=1.1) } #browser() if(msg){ if(earlyStop) cat(paste("(search truncated at ", i, " breakpoints due to ", stop.if, " increasing values of ", ICname ,") \n", sep="")) cat(paste("\n",ICname, " to detect no. of breakpoints:\n",sep="")) print(bic.values) add.msg <- if(rm.after.check==0) " \n" else paste(" (", rm.after.check, " breakpoint(s) removed due to small slope diff)\n", sep="") cat(paste("\nNo. of selected breakpoints:", n.psi.ok, add.msg)) } if(!return.fit) { bic.values=rbind(npsiVeri, bic.values) rownames(bic.values)<-c("npsi", paste(ICname, "value",sep="")) r<-list(selection.psi=list(bic.values=bic.values, npsi=n.psi.ok)) return(r) } bic.values=rbind(npsiVeri, bic.values) rownames(bic.values)<-c("npsi", paste(ICname, "value",sep="")) r$selection.psi <- list(bic.values=bic.values, npsi=n.psi.ok) if(refit) r$selection.psi$psi.before.refit <-all.psi return(r) } else { #end aic/bic. Quindi se score o davies alpha.adj<-alpha/Kmax p1<- if(type=="score") pscore.test(olm, seg.Z, n.break=2)$p.value else davies.test(olm)$p.value p1.label<-"p-value '0 vs 2' " if(p1>alpha.adj){ p2.label<-"p-value '0 vs 1' " p2<- if(type=="score") pscore.test(olm, seg.Z, n.break=1)$p.value else p1 #davies.test(olm)$p.value if(!bonferroni) alpha.adj<- alpha if(p2>alpha.adj) { out<-olm } else { out<-segmented(olm, seg.Z, npsi=1, control=control) } } else { p2.label<-"p-value '1 vs 2' " ################# #browser() #MF<-build.mf(olm) #olm<-update(olm, data=MF) #olm$call$data<-quote(MF) #olm<-update(olm, data=model.frame(olm)) #questo e' necessario per far funzionare davies.test() sotto.. ################ if(type=="score") { o1<-segmented(olm, seg.Z, npsi=1, control=control) p2<-pscore.test(o1, seg.Z, more.break=TRUE)$p.value } else { #KK<-new.env() #olm1<-update(olm, data=model.frame(o1)) #o1<- update(o1, obj=olm1) MF<-build.mf(olm) olm<-update(olm, data=MF) #olm$call$data<-quote(MF) #olm<-update(olm, data=model.frame(olm)) #questo e' necessario per far funzionare davies.test() sotto.. o1 <- segmented(olm, seg.Z, npsi = 1, control = control) p2<- davies.test(o1, seg.Z)$p.value } if(!bonferroni) alpha.adj<-alpha if(p2>alpha.adj) { o1<-segmented(olm, seg.Z, npsi=1, control=control) #cat("One breakpoint detected\n") out<-o1 } else { o2<-segmented(olm, seg.Z, npsi=2, control=control) #cat("Two breakpoint detected\n") out<-o2 } } n.psi.ok<-length(out$psi[,"Est."]) x2<- -2*sum(log(c(p1,p2))) p<-1-pchisq(x2, df=2*2) r<-list(pvalues=c(p1=p1, p2=p2, p=p), npsi=n.psi.ok) attr(r, "label")<- p2.label if(!return.fit) { return(r) } if(msg){ cat("Hypothesis testing to detect no. of breakpoints\n") type <- chartr(strsplit(type,"")[[1]][1], toupper(strsplit(type,"")[[1]][1]), type) #serve per render maiuscola la prima lettera.. cat(paste("statistic:", type," level:", alpha, " Bonferroni correction:", bonferroni, "\n")) cat(paste(p1.label, "= ", format.pval(p1,4), " ", p2.label, "= ", format.pval(p2,4) , " \nOverall p-value = ", format.pval(p,4),"\n",sep="")) cat(paste("No. of selected breakpoints: ", n.psi.ok, "\n")) } out$selection.psi<-r return(out) }#end if(score o davies) } else { #se G>1 r <- sel1(y=y, x=Z, G=G, Kmax=Kmax, type=type, th=th, refit=refit, check.dslope = check.dslope, msg=msg, bonferroni=bonferroni, olm=olm, control=control) r } } segmented/R/points.segmented.r0000644000176200001440000000371514616155603016114 0ustar liggesuserspoints.segmented <-function(x, term, interc=TRUE, link=TRUE, rev.sgn=FALSE, transf=I, .vcov=NULL, .coef=NULL, const=0, v=TRUE, ...){ #-------------- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #------------- if(missing(term)){ if(length(x$nameUV$Z)>1 ){ stop("please, specify `term'") } else { term<-x$nameUV$Z } } else { term<- if(is.numeric(term)) x$nameUV$Z[term] else term } if(!term%in%x$nameUV$Z) stop("unknown 'term' ") opz<-list(...) if(is.null(opz$col)) opz$col <- 2 if(is.null(opz$lty)) opz$lty <- 3 nameV<- x$nameUV$V[f.U(x$nameUV$V, term)] psii<- x$psi[nameV, "Est."] d<-data.frame(a=psii) names(d)<-term opz$y<-broken.line(x,d, se.fit=FALSE, interc=interc, link=TRUE, .coef=.coef, .vcov=.vcov)[[1]] #browser() if(rev.sgn) psii<- -psii opz$x<- psii if(is.null(opz$cex)) opz$cex<-1.25 if(is.null(opz$lwd)) opz$lwd<-1.6 opz$y <- opz$y + const #browser() if(inherits(x, "glm") && !link) { opz$y<- x$family$linkinv(opz$y) } opz$y<-do.call(transf, list(opz$y)) do.call(points, opz) if(v) segments(psii, par()$usr[3], psii, opz$y, lty = opz$lty, col=opz$col ) invisible(NULL) } segmented/R/seg.r0000644000176200001440000000264714617430275013411 0ustar liggesusersseg <- function(x, npsi=1, psi=NA, est=NA, R=NA, fixed.psi=NULL, by=NULL, f.x=I){ #------------ r<-x r<- if(!is.null(by)) cbind(r,by) else cbind(r) nome <- deparse(substitute(x)) if(is.null(by)) { if(is.matrix(x) && ncol(x)>=2) { colnames(r) <- if(is.null(colnames(x))) paste(nome, 1:ncol(x), sep="") else colnames(x) } else { colnames(r) <-nome } nome <- colnames(r) #la riga sotto la lascio perche' cosi' come nomeBy restitusce "NULL" piuttostoc he NULL attr(r,"nomeBy")<-deparse(substitute(by), backtick = TRUE, width.cutoff = 500) } else { if(is.matrix(by)) { if(!is.numeric(by)) stop(" the matrix should be numeric") colnames(r)[1]<-nome attr(r,"nomeBy")<-paste(colnames(by), collapse=",") } else { #r<-cbind(x, as.factor(by)) #mettere questo cosi' puo' funzionare anche se by e' "character" o "numeric"? colnames(r)<-c(nome, "by") #Perche' non usare attr(r,"nomeBy") attr(r,"nomeBy")<-deparse(substitute(by), backtick = TRUE, width.cutoff = 500) } } attr(r,"nomeX")<- nome attr(r,"psi")<- psi attr(r,"npsi")<- npsi attr(r,"est")<- est attr(r,"R")<- R attr(r,"fix.psi")<- fixed.psi attr(r,"f.x")<- f.x attr(r, "by")<-by attr(r,"levelsBy")<-levels(by) #class(r) <- c("withAttributes", class(r)) r } segmented/R/predict.segmented.r0000644000176200001440000002436314666071520016234 0ustar liggesusers#new predict.segmented predict.segmented<-function(object, newdata, se.fit=FALSE, interval=c("none","confidence", "prediction"), type = c("link", "response"), na.action=na.omit,# "terms"), level=0.95, .coef=NULL, ...){ blockdiag <- function(...) { args <- list(...) nc <- sapply(args,ncol) cumnc <- cumsum(nc) ## nr <- sapply(args,nrow) ## NR <- sum(nr) NC <- sum(nc) rowfun <- function(m,zbefore,zafter) { cbind(matrix(0,ncol=zbefore,nrow=nrow(m)),m, matrix(0,ncol=zafter,nrow=nrow(m))) } ret <- rowfun(args[[1]],0,NC-ncol(args[[1]])) for (i in 2:length(args)) { ret <- rbind(ret,rowfun(args[[i]],cumnc[i-1],NC-cumnc[i])) } ret } dummy.matrix<-function(x.values, x.name, obj.seg, psi.est=TRUE, isV=FALSE, .coef=NULL){ #given the segmented fit 'obj.seg' and a segmented variable x.name with corresponding values x.values, #this function simply returns a matrix with columns (x, (x-psi)_+, -b*I(x>psi)) #if obj.seg does not include the coef for the linear "x", the returned matrix is ((x-psi)_+, -b*I(x>psi)) f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } estcoef <- if(is.null(.coef)) coef(obj.seg) else .coef if(length(isV)==1) isV<-c(FALSE,isV) n<-length(x.values) #le seguenti righe selezionavano (ERRONEAMENTE) sia "U1.x" sia "U1.neg.x" (se "x" e "neg.x" erano segmented covariates) #nameU<- grep(paste("\\.",x.name,"$", sep=""), obj.seg$nameUV$U, value = TRUE) #nameV<- grep(paste("\\.",x.name,"$", sep=""), obj.seg$nameUV$V, value = TRUE) nameU<-obj.seg$nameUV$U[f.U(obj.seg$nameUV$U,x.name)] nameV<-obj.seg$nameUV$V[f.U(obj.seg$nameUV$V,x.name)] #grep(x.name, obj.seg$nameUV$V, value = TRUE) if(is.null(obj.seg$constr)){ diffSlope<-estcoef[nameU] } else { diffSlope<-drop(obj.seg$constr$invA.RList[[match(x.name, obj.seg$nameUV$Z)]]%*%estcoef[nameU])[-1] } est.psi<-obj.seg$psi[nameV,"Est."] se.psi<-obj.seg$psi[nameV, "St.Err"] k<-length(est.psi) PSI <- matrix(rep(est.psi, rep(n, k)), ncol = k) SE.PSI <- matrix(rep(se.psi, rep(n, k)), ncol = k) newZ<-matrix(x.values, nrow=n,ncol=k, byrow = FALSE) dummy1<-if(isV[1]) (newZ-PSI)*pnorm((newZ-PSI)/SE.PSI) else (newZ-PSI)*(newZ>PSI) #pmax(newZ-PSI,0) if(psi.est){ V<-if(isV[2]) -pnorm((newZ-PSI)/SE.PSI) else -(newZ>PSI) #ifelse(newZ>PSI,-1,0) dummy2<- if(k==1) V*diffSlope else V%*%diag(diffSlope) #t(diffSlope*t(-I(newZ>PSI))) newd<-cbind(x.values,dummy1,dummy2) #colnames(newd)[1]<- x.name colnames(newd)<-c(x.name,sub("psi","U", nameV), nameV) } else { newd<-cbind(x.values,dummy1) #colnames(newd)[1]<- x.name colnames(newd)<-c(x.name, sub("psi","U", nameV)) } #if(!x.name%in%names(coef(obj.seg))) newd<-newd[,-1,drop=FALSE] #restituisce sempre il termine principale.. #aggiungi (eventualmente) le colonne relative ai psi noti all.psi<-obj.seg$indexU[[x.name]] if(length(all.psi)!=k){ nomi.psi.noti<-setdiff(names(all.psi),nameU) psi.noti<-setdiff(all.psi, est.psi) PSI.noti <- matrix(rep(psi.noti, rep(n, length(psi.noti))), ncol = length(psi.noti)) nomi<-c(colnames(newd),nomi.psi.noti) newd<-cbind(newd, (newZ-PSI.noti)*(newZ>PSI.noti)) colnames(newd)<-nomi } return(newd) } estcoef <- if(is.null(.coef)) coef(object) else .coef if(is.null(names(estcoef))) stop("the coef estimates should be named") nLin<- length(setdiff(names(coef(object)), c(object$nameUV$U,object$nameUV$V))) nSeg<- length(object$nameUV$Z) type<-match.arg(type) interval<-match.arg(interval) #browser() if(inherits(object, "glm") && object$family$family!="gaussian" && interval=="prediction") stop("prediction intervals are not allowed with non-gaussian glm") nameU<-object$nameUV$U nameV<-object$nameUV$V nameZ<-object$nameUV$Z #browser() if(missing(newdata)){ X <- model.matrix(object) idNA<- rep(FALSE, nrow(X)) } else { #browser() #nomiLin <- setdiff(all.vars(formula(object))[-1], c(object$nameUV$U,object$nameUV$V)) nomiLin <- setdiff(all.vars(as.formula(paste("~",paste(formula(object))[3]))), c(object$nameUV$U,object$nameUV$V)) if(any(is.na(match(nomiLin, names(newdata))))) stop(" 'newdata' should includes all variables") #devi trasformare la variabili segmented attraverso dummy.matrix() na.arg <- deparse(substitute(na.action)) idNA<- !complete.cases(newdata) if(any(idNA)){ newdata<-na.omit(newdata) } if(!na.arg%in%c("na.omit","na.pass")) stop("na.action should be 'na.omit' or 'na.pass'") n<-nrow(newdata) r<-NULL if(length(object$call$obj)>0){ #se l'ogg e' stato ottenuto da segmented.* # Fo<- formula(delete.response(terms(formula(eval(object$call$obj))))) # idSeg<- object$nameUV$Z %in% all.vars(Fo) # if(any(!idSeg)){ # Fo<- update.formula(Fo, as.formula(paste("~.+", paste(object$nameUV$Z[!idSeg], collapse="+")))) # } #nomiTerms, a differenza di nomiLin, include eventuali poly(w,2) nomiTerms<-setdiff(attr(terms(formula(object)),"term.labels"),c(object$nameUV$U,object$nameUV$V)) idSeg<- object$nameUV$Z %in% nomiLin #potresti mettere anche "nomiTerms" if(any(!idSeg)){ nomiTerms <- c(nomiTerms, object$nameUV$Z[!idSeg]) } Fo<-as.formula(paste("~.+", paste(nomiTerms, collapse="+"))) M<-model.matrix(Fo, data=newdata, contrasts=object$contrasts, xlev = object$xlevels) } else { #se l'ogg e' stato ottenuto da segreg #browser() Fo<-as.formula(object$nameUV$formulaSegAllTerms) if(any(all.vars(Fo)%in%names(object$xlevels))){ M<-model.matrix(Fo, data=newdata, contrasts = object$contrasts, xlev=object$xlevels) } else { M<-model.matrix(Fo, data=newdata) } #nomiLin<- all.vars(object$formulaLin)[-1] #non funziona se la rispo e' cbind(y,n-y) nomiLin <- all.vars(as.formula(paste("~",paste(object$formulaLin)[3]))) if(any(!nomiLin%in%all.vars(Fo))){ #nomiLinOK<- nomiLin[!nomiLin%in%all.vars(Fo)] terminLin<-attr(terms(object$formulaLin),"term.labels")[!nomiLin%in%all.vars(Fo)] Fo <- as.formula(paste("~.-1+",paste(terminLin,collapse="+"))) #Fo <- update.formula(Fo, as.formula(paste("~.+",paste(terminLin,collapse="+")))) M1<-model.matrix(Fo, data=newdata, contrasts = object$contrasts, xlev=object$xlevels) M<-cbind(M, M1) #[,nomiLinOK,drop=FALSE]) } } for(i in 1:length(nameZ)){ x.values <- M[,nameZ[i]] DM<-dummy.matrix(x.values, nameZ[i], object) r[[i]]<-DM } #browser() X <-data.matrix(matrix(unlist(r), nrow=n, byrow = FALSE)) colnames(X)<- unlist(sapply(r, colnames)) X<-cbind(M,X) X<-X[,unique(colnames(X)),drop=FALSE] if("(Intercept)" %in% names(estcoef)) X<-cbind("(Intercept)"=1,X) } if(!is.null(object$constr)){ for(i in 1:length(nameZ)){ nomeU.i<-grep(object$nameUV$Z[i], object$nameUV$U, value=TRUE) idU.i <- match(nomeU.i, names(estcoef)) coef.new<-drop(object$constr$invA.RList[[i]]%*%estcoef[nomeU.i]) names(coef.new)<-c(object$nameUV$Z[i], paste("U",1:(length(coef.new)-1),".",object$nameUV$Z[i],sep="" )) estcoef<-append(estcoef[-idU.i], coef.new, after=idU.i[1]-1) } } X<-X[,names(estcoef),drop=FALSE] if(length(setdiff(colnames(X),names(estcoef)))>0) stop("error in the names (of the supplied newdata)") #browser() colnomi<- colnames(X) colnomi.noV <- setdiff(colnomi, nameV) X.noV <- X[, colnomi.noV, drop=FALSE] estcoef.noV<- estcoef[colnomi.noV] #ignora eventuali altre variabili contenute in newdata #nomiOK<- intersect(names(estcoef.noV), colnames(X.noV)) #X.noV<- X.noV[, nomiOK, drop=FALSE] #estcoef.noV<-estcoef.noV[nomiOK] mu <- eta<- drop(X.noV%*% estcoef.noV) if(!is.null(object$offset)) mu<- eta<- eta+ object$offset X <- X[,c(colnomi.noV, nameV),drop=FALSE] if(inherits(object, "glm") && type=="response") { mu<-object$family$linkinv(mu) } #browser() if(interval!="none" || se.fit){ V <- vcov(object) if(!is.null(object$constr)){ B=if(nLin>0) append(list(diag(nLin)), object$constr$invA.RList, 1) else object$constr$invA.RList B=append(B, list(diag(length(nameV))), 2) B= do.call(blockdiag, B) V <- B %*% V %*% t(B) } else { X <- X[,colnames(V)] } se <- sqrt(rowSums((X %*% V) * X)) if(inherits(object, "glm")) { if(type=="response") se <- abs(object$family$mu.eta(eta))*se z<-abs(qnorm((1-level)/2)) s2<-summary(object)$dispersion } else { z <- abs(qt((1-level)/2, df=object$df.residual)) s2<- summary(object)$sigma^2 } if(any(idNA) && na.arg=="na.pass"){ mu0<-mu se0<-se mu<-se<- rep(NA, length(idNA)) mu[!idNA]<-mu0 se[!idNA]<-se0 } if(interval=="confidence"){ mu<-cbind(fit=mu, lwr=mu-z*se, upr=mu+z*se) } if(interval=="prediction"){ mu<-cbind(fit=mu, lwr=mu-z*sqrt(se^2+s2), upr=mu+z*sqrt(se^2+s2)) } } else { if(any(idNA)&& na.arg=="na.pass"){ mu0<-mu mu<- rep(NA, length(idNA)) mu[!idNA]<-mu0 } } if(se.fit) { mu <- list(fit=mu, se.fit=se, df= object$df.residual, residual.scale=sqrt(s2)) if(!inherits(object, "glm")) mu$df<- object$df.residual } return(mu) } segmented/R/stepmented.numeric.r0000644000176200001440000004440414757372152016445 0ustar liggesusersstepmented.numeric <- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control=seg.control(), keep.class=FALSE, var.psi=FALSE, ..., pertV=0, centerX=FALSE, adjX=NULL, weights=NULL) { #, only.mean=TRUE #pertV come calcolare la variabile V=1/(2*abs(Xtrue-PSI)? i psi devono essere diversi dalle x_i # utilizzare i psi stimati che tipcamente sono diversi? (perV=0) # oppure i psi.mid che sicuramente sono (o meglio dovrebbero essere) tra due x_i... # --------- only.mean=TRUE if(!only.mean){ if(!missing(psi)) warning("If only.mean=FALSE, 'psi' is ignored. Use 'npsi'.") if(missing(npsi)) npsi=1 if(length(npsi)==1) npsi=c(npsi,npsi) #o <- stepVar(y=obj, npsi=c(1,1), itmax=10, display=TRUE, control=control, ...) #return(o) npsiM=npsi[1] npsiV=npsi[2] itmax=20 display=control$visual control$visual<-FALSE y<-obj x <- 1:length(y)#/length(y) psiM<-(min(x)+ diff(range(x))*(1:npsiM)/(npsiM+1)) psiV<-(min(x)+ diff(range(x))*(1:npsiV)/(npsiV+1)) if(npsiM>0) { oM <- stepmented.numeric(y, psi=psiM, control=control) } else { oM<- lm(y~1) psiM.r<-psiM<-NA } ly <- log(oM$residuals^2) coefM<-matrix(NA, itmax,npsiM*2+1) coefV<-matrix(NA, itmax,npsiV*2+1) #o0<-lm(ly~1) assign("ly", ly, envir=parent.frame()) #browser() for (i in 1:itmax){ #if(i==3) browser() coefM[i,]<-oM$coefficients assign("ly", ly, envir=parent.frame()) oV <- stepmented.numeric(ly, npsi = npsiV, control=control) #oV <- stepmented.lm(o0, psi = psiV, control=control) psiV <- oV$psi[,"Est."] psiV.r <- oV$psi.rounded[1,] coefV[i,]<-oV$coefficients ww <- 1 / exp(oV$fitted.values) #o <- lm(y ~ 1, weights = ww) #browser() if(npsiM>0){ psiM<- oM$psi[,"Est."] oM <- stepmented.numeric(y, psi = psiM, weights=ww, control=control) #var.psi=FALSE psiM <- oM$psi[,"Est."] psiM.r<- oM$psi.rounded[1,] } #if(display) cat("iteration:", i, " psi:", oV$psi.rounded[1,], " est:", round(oV$obj.ok$coefficients[1:min(3,length(oV$obj.ok$coefficients))],3),"\n") if(display) cat("it:", i, " psi(mean):", psiM.r, " psi(dispersion):", psiV.r, "\n") #est:", round(oV$obj.ok$coefficients[1:min(3,length(oV$obj.ok$coefficients))],3),"\n") ly.old<-ly ly <- log(oM$residuals^2) #if(i==5) browser() if(sum( (ly-ly.old)/ly.old)^2<=.0001) break } #browser() r<-list(fitMean=oM, fitDisp=oV, coefIter=cbind(coefM,NA,coefV)) return(r) } mylm.W<-function(x,y,w=1){ x1<-x*sqrt(w) y1<-y*sqrt(w) XtX <- crossprod(x1) b<-drop(solve(XtX,crossprod(x1,y1))) fit<-drop(tcrossprod(x,t(b))) r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b), invXtX=solve(XtX), L0=sum(w*r^2)) o } mylm.noW<-function(x,y,w=1){ XtX <- crossprod(x) b<-drop(solve(XtX,crossprod(x,y))) fit<-drop(x%*%b) #tcrossprod(x,t(b))) r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b), invXtX=solve(XtX), L0=sum(r^2)) o } mylm<- if(is.null(weights)) mylm.noW else mylm.W #----------- toMatrix<-function(.x, ki){ # ripete ogni .x[,j] ki[j] volte if(ncol(.x)!=length(ki)) stop("It should be ncol(.x)==length(ki)") if(all(ki==1)) return(.x) M<-vector("list", length=length(ki)) for(j in 1:length(ki)) M[[j]]<-replicate(ki[[j]], cbind(.x[,j]), simplify=TRUE) do.call(cbind, M) } #----------- agg<- 1-control$fc it.max<- control$it.max tol<- control$toll display<- control$visual digits <- control$digits min.step <- control$min.step conv.psi <- control$conv.psi alpha <- control$alpha fix.npsi <- control$fix.npsi n.boot <- control$n.boot break.boot<- control$break.boot +2 seed<- control$seed fix.npsi<-control$fix.npsi h<-control$h #----------- #browser() if(!is.vector(obj)) stop(" 'obj' should be a numerical vector ") #if(is.vector(obj) || is.ts(obj)){ #if(is.matrix(obj) && ncol(obj)>1) stop("if matrix 'obj' should have 1 column") #obj<-drop(obj) if(!missing(seg.Z) && length(all.vars(seg.Z))>1) stop(" multiple seg.Z allowed only with (g)lm models") Fo0<-as.formula(paste(deparse(substitute(obj))," ~ 1", sep="")) y.only.vector <- TRUE y<-obj if(missing(seg.Z)) { x<-1:length(y) min.x<- min(x) name.Z <- "index" # if(is.null(adjX)) adjX<-FALSE } else { x<-eval(parse(text=all.vars(seg.Z))) name.Z <- all.vars(seg.Z) # adjX= FALSE } min.x<- min(x) if(is.null(adjX)) { adjX<- if(min.x>=1000) TRUE else FALSE } if(adjX) x<- x - min.x if(missing(psi)){ if(missing(npsi)) npsi<-1 #stop(" psi or npsi have to be provided ") psi<- seq(min(x), max(x), l=npsi+2)[-c(1, npsi+2)] #psi[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) } else { npsi<-length(psi) } initial.psi<- psi n<-length(y) a<- npsi n.Seg<-1 Z <- matrix(x, ncol=a, nrow=n, byrow = FALSE) XREG <- matrix(1, nrow=n, ncol=1) ww<-rep(1, n) #offs<-rep(0,n) PSI<-matrix(psi, ncol=a, nrow=n, byrow = TRUE) #name.Z <- if(missing(seg.Z)) "id" else all.vars(seg.Z) nomiU<-paste("U", 1:a, ".", name.Z,sep="") nomiV<-paste("V", 1:a, ".", name.Z,sep="") colnames(Z)<-nomiZ<-rep(name.Z, a) id.psi.group <- rep(1:length(a), times = a) orig.call<-NULL #################################################### # invXtX<-if(!is.null(obj$qr)) chol2inv(qr.R(obj$qr)) else NULL #(XtX)^{-1} # Xty<-crossprod(XREG,y) # opz<-list(toll=toll,h=h, stop.if.error=stop.if.error, dev0=dev0, visual=visual, it.max=it.max, # nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits,invXtX=invXtX, Xty=Xty, # conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, fc=fc) #x<- Z x.lin <-XREG #if(is.vector(x)) x<-as.matrix(x) #dev0<- n*var(y) #sum(mylm(x.lin, y, ww)$residuals^2*ww) #dev0<- if(!display) var(y)*n else sum(mylm(x.lin, y)$residuals^2) dev0 <- if(is.null(weights)) var(y)*(n-1) else sum(weights*(y-weighted.mean(y, weights))^2) rangeZ <- apply(Z, 2, range) #browser() plin<-ncol(x.lin) #if(!is.list(psi)) psi<-list(psi) #P <- length(psi) #n. variabili con breakpoints #npsii <- sapply(psi, length) #n di psi for each covariate P<-n.Seg npsii<-a npsi<- sum(npsii) Xtrue<-Z #psi0 <- unlist(psi) #PSI<- matrix(psi0, n, npsi, byrow=TRUE) #if(ncol(x)!=P) stop("errore") #Xtrue<-toMatrix(x, npsii) #browser() if(it.max == 0) { mfExt<- data.frame(y, Z) names(mfExt)<-c(all.vars(Fo0), name.Z) ripetizioni<-unlist(tapply(nomiZ, nomiZ, function(.x)1:length(.x))) U <- (Xtrue>PSI) colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") nomiU <- paste("U", colnames(U), sep = "") #for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) for(i in 1:ncol(U)) mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] Fo <- update.formula(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) obj <- update(obj, formula = Fo, evaluate=FALSE, data=mfExt) #data = mf, if(!is.null(weights)) obj <- update(obj, weights=weights) if(!is.null(obj[["subset"]])) obj[["subset"]]<-NULL obj<-eval(obj, envir=mfExt) #if (model) obj$model <-mf #obj$model <- data.frame(as.list(KK)) psi <- cbind(psi, psi, 0) rownames(psi) <- paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") colnames(psi) <- c("Initial", "Est.", "St.Err") obj$psi <- psi return(obj) } c1 <- apply((Xtrue <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Xtrue >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") if(is.null(alpha)) alpha<- max(.05, 1/length(y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) opz<-list(toll=tol, dev0=dev0, display=display, it.max=it.max, agg=agg, digits=digits, rangeZ=rangeZ, id.psi.group=id.psi.group,h=h, #nomiOK=nomiOK, visualBoot=visualBoot, invXtX=invXtX, Xty=Xty, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, npsii=npsii, seed=control$seed) # ################################################################################# # #### jump.fit(y, XREG=x.lin, Z=Xtrue, PSI, w=ww, offs, opz, return.all.sol=FALSE) # ################################################################################# #browser() if(is.null(weights)){ if(n.boot<=0){ obj<- step.ts.fit(y, x.lin, Xtrue, PSI, opz, return.all.sol=FALSE) } else { #if("seed" %in% names(control)) set.seed(control$seed) obj<-step.ts.fit.boot(y, x.lin, Xtrue, PSI, opz, n.boot, break.boot=break.boot) seed <- obj$seed } } else { if(n.boot<=0){ obj<- step.num.fit(y, x.lin, Xtrue, PSI, weights, opz, return.all.sol=FALSE) } else { #if("seed" %in% names(control)) set.seed(control$seed) obj<-step.num.fit.boot(y, x.lin, Xtrue, PSI, weights, opz, n.boot, break.boot=break.boot) seed <- obj$seed } } # if(!is.list(obj)){ # warning("No breakpoint estimated", call. = FALSE) # return(obj0) # } #chol2inv(qr.R(obj$obj$qr)) id.warn<-obj$id.warn it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart #i beta.c corripondono ai psi NON ordinati!!! beta.c<- obj$beta.c beta.c<-unlist(tapply(psi, id.psi.group, function(.x)beta.c[order(.x)])) #unlist(lapply(unique(id.psi.group), function(.x) beta.c[id.psi.group==.x][order(psi[id.psi.group==.x])])) psi<-unlist(tapply(psi, id.psi.group, sort)) Z0<-apply(Z,2,sort) psi.rounded<-sapply(1:npsi, function(j) Z0[sum(Z0[,j] INF DEN <- abs(Xtrue - PSI.mid) DEN <- apply(DEN, 2, function(.x) pmax(.x, sort(.x)[2]/2)) #pmax(.x, diff(range(.x))/1000)) V <- (1/(2 * DEN)) #k=10 #V <- (1/(2 * k*log(cosh((Xtrue - PSI.mid)/k)))) colnames(V)<-nomiV if(centerX){ XtrueS <- scale(Xtrue, TRUE, scale=FALSE) meanX<-attr(XtrueS, "scaled:center") attr(XtrueS, "scaled:center")<-NULL U <- (XtrueS * V + 1/2) } else { U <- (Xtrue * V + 1/2) } colnames(U)<-nomiU if(pertV>0){ #puoi usare o psi.mid o psi.rounded+eps.. Il secondo porta ad una cor ancora piu' bassa della prima.. 0.89 vs 0.96 if(pertV==1){ PSI.mid <- matrix(psi.mid, n, npsi, byrow = TRUE) V <- (1/(2 * abs(Xtrue - PSI.mid))) } else { PSI.mid <- matrix(psi.rounded[1,], n, npsi, byrow = TRUE) V <- (1/(2 * abs(Xtrue - PSI.mid + .0001))) } } # browser() # # V <- -V # o<-lm(y~U+V) # # #return(o) # # #o<- .lm.fit(y=y, x=cbind(1,U, -V)) # #o$psi <- psi # var.Tay<-function(est1,est2,v1,v2,v12){ # r<- est1/est2 # vv<-(v1+v2*r^2-2*r*v12)/est2^2 # vv # } # # # Cov<-vcov(o) # num.g<-o$coefficients[3] # den.b<-o$coefficients[2] # v.g <-Cov[3,3] # v.b <-Cov[2,2] # # rho<-mean(Xtrue< as.numeric(psi)) # rho<- rho^(sqrt(2/n)) # cov.g.b<- rho*sqrt(v.g*v.b) # var.Tay(num.g, den.b, v.g, v.b, cov.g.b) # # -V* drop(beta.c) # # return(o) # Vxb <- -V# * rep(-beta.c, each = nrow(V)) nomiVxb <- gsub("V", "psi", nomiV) nnomi <- c(nomiU, nomiVxb) #XREG <- cbind(x.lin, Z, W) #obj <- lm.wfit(y = y, x = XREG, offset = offs, w=ww ) # source("stepmented.lm.R") #################### #return(list(psi.rounded=psi.rounded)) ### ==========>>>>>>>>>>>>>> ## e i PESI!!!!!!!!!!!!!!!!!!!!!!!!!!!!!????! # A questo punto e' inutile stimare objW, il modello con W, tanto non riesci ad ottenere una misura del SE.. # QUINDI puoi semplicemente stimare il modello con U soltanto e poi aggiustare i df.residuals ## ========================================================================================================= ### Fo <- update.formula(Fo0, as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) mfExt <- data.frame(1, U, Vxb) colnames(mfExt)<-c("(Intercept)", nnomi) objF <- lm(Fo, weights=weights, data = mfExt) #browser() objW<-objF #X1 <- mfExt$psi1.index*beta.c[[1]] #Off <- mfExt$psi1.index*beta.c[[1]]*psi[[1]] #yy <- y-Off #a <- lm(yy~ U1.index + X1, data=mfExt) #l'SE di b(X1) dovrebbe essere quello per di psi.... #controllo se qualche coeff e' NA.. isNAcoef<-any(is.na(objF$coefficients)) #browser() if(isNAcoef) { nameNA.psi <- names(objF$coefficients)[which(is.na(objF$coefficients))] nameNA.U <- gsub("psi", "U", nameNA.psi) if(fix.npsi) { cat("breakpoint estimate(s):", as.vector(psi), "\n") stop("coef ", nameNA.psi, " is NA: breakpoint(s) at the boundary or too close together", call. = FALSE) } else { warning("some estimate is NA (too many breakpoints?): removing ", length(nameNA.psi), " jump-point(s)", call. = FALSE) Fo <- update(Fo, paste(".~ .-", nameNA.U, "-", nameNA.psi)) objF <- lm(Fo, data = mfExt) idNA.psi <- match(nameNA.psi, nomiVxb) nomiVxb <- setdiff(nomiVxb, nameNA.psi) nomiU <- setdiff(nomiU, nameNA.U) Xtrue <- Xtrue[, -idNA.psi, drop = FALSE] PSI.mid<- PSI.mid[, -idNA.psi, drop = FALSE] id.psi.group <- id.psi.group[-idNA.psi] psi <- psi[-idNA.psi] psi.rounded <- psi.rounded[, -idNA.psi, drop = FALSE] } } #organizziamo i risultati da restituire per psi.. colnames(psi.rounded)<-names(psi)<-nomiVxb rownames(psi.rounded)<-c("inf [","sup (") # Cov <- vcov(objF) # # var.Tay<-function(est1,est2,v1,v2,v12){ # r<- est1/est2 # vv<-(v1+v2*r^2-2*r*v12)/est2^2 # vv} # # # #browser() # # #var.Tay(num, den, v.g, v.b, cov.g.b) # varPsi<- rep(NA, length(nomiU)) # for(j in 1:length(nomiU)){ # num<-objF$coefficients[nomiVxb[j]] # den<-objF$coefficients[nomiU[j]] # v.g <-Cov[nomiVxb[j],nomiVxb[j]] # v.b<- Cov[nomiU[j],nomiU[j]] # cov.g.b <- Cov[nomiVxb[j],nomiU[j]] # #if(is.null(rho)) { # rho<-mean(Xtrue[, nomiZ[j] ,drop=TRUE] SE piu' piccoli.. # rho<- rho^(sqrt(1/n)) # #} # cov.g.b<- rho*sqrt(v.g*v.b) # varPsi[j]<-var.Tay(num, den, v.g, v.b, cov.g.b) # } # names(varPsi) <- nomiVxb # # #browser() # Cov[nomiVxb, ]<- Cov[, nomiVxb] <- 0 # diag(Cov)[nomiVxb]<-varPsi # #Cov[nomiVxb, nomiVxb ]<- varPsi # # # #browser() # #var.Tay(num, den, v.g, v.b, cov.g.b) # # id <- match(nomiVxb, names(coef(objF))) # vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) ris.psi <-matrix(NA,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi #ris.psi[,3]<-sqrt(vv) ## solo per simulazioni #browser() #ris.psi<-cbind(ris.psi, # st0=sqrt(var.Tay(num, den, v.g, v.b, 0)), # st99=sqrt(var.Tay(num, den, v.g, v.b, .99*sqrt(v.g*v.b)))) a<-tapply(id.psi.group, id.psi.group, length) #NB "a" deve essere un vettore che si appatta con "initial.psi" per ottnetere "initial" sotto... Se una variabile alla fine risulta # senza breakpoint questo non avviene e ci sono problemi nella formazione di "initial". Allora costruisco a.ok a.ok<-NULL nomiFINALI<-unique(nomiZ) for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } #initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) if(length(psi)!=length(initial.psi)){ ris.psi[,1]<- NA } else { initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) ris.psi[,1]<-initial #if(stop.if.error) ris.psi[,1]<-initial } #================================================= ##RI-AGGIUNGI IL MINIMO!!!!!!!!!! if(adjX){ #ATTENZIONE.. e se ci sono piu' breakpoints o piu' variabili (con piu' breakpoints)?? psi.rounded<- psi.rounded + min.x ris.psi[,2] <- ris.psi[,2] + min.x } objF$psi <- ris.psi objF$psi.rounded <- psi.rounded #stima il modello "vero" (non-working) U <- (Xtrue > PSI.mid) colnames(U)<-nomiU X <- cbind(x.lin,U) #browser() if(is.null(weights)) weights=1 objF$obj.ok<-mylm(X, y, w=weights) #coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b)) objF$objW<- objW objF$fitted.values<-objF$obj.ok$fitted.values objF$residuals<- objF$obj.ok$residuals objF$coefficients[1:length(objF$obj.ok$coefficients)] <- objF$obj.ok$coefficients objF$coefficients[nomiVxb] <-psi.rounded[1,] objF$nameUV <- list(U = drop(nomiU), V = nomiV, Z = name.Z) #Z = name.Z objF$rangeZ<-obj$rangeZ objF$Z <- Z[,unique(name.Z),drop=FALSE] if(adjX) { objF$Z <- objF$Z + min.x objF$rangeZ<- objF$rangeZ + min.x } objF$call <- match.call() objF$orig.call<-orig.call objF$psi.history <- psi.values objF$it <- it objF$epsilon <- obj$epsilon objF$id.warn <- id.warn #objF$rho<-rho objF$psi<- objF$psi[,-1,drop=FALSE] #rimuovi la colonna Initial if(n.boot>0) objF$seed <- seed #browser() if(var.psi){ Cov <- vcov.stepmented(objF, k=NULL) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) objF$psi[,"St.Err"]<-sqrt(vv) objF$vcov<- Cov } class(objF) <- c("stepmented","lm") return(objF) } segmented/R/segmented.lm.R0000644000176200001440000006102714757370260015154 0ustar liggesusers`segmented.lm` <- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...) { build.all.psi<-function(psi, fixed.psi){ all.names.psi<-union(names(psi),names(fixed.psi)) all.psi<-vector("list", length=length(all.names.psi)) names(all.psi)<- all.names.psi for(i in names(all.psi)) { if(!is.null(psi[[i]])){ psi[[i]]<-sort(psi[[i]]) names(psi[[i]])<-paste("U",1:length(psi[[i]]),".",i,sep="") } if(!is.null(fixed.psi[[i]])){ fixed.psi[[i]]<-sort(fixed.psi[[i]]) names(fixed.psi[[i]])<- paste("U",1:length(fixed.psi[[i]]),".fixed.",i,sep="") } all.psi[[i]]<-sort(c(psi[[i]],fixed.psi[[i]])) } return(all.psi) } ##===inizio funzione if(missing(seg.Z)) { if(length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } n.Seg<-length(all.vars(seg.Z)) id.npsi<-FALSE if("V" %in% sub("V[1-9]*[0-9]","V", c(all.vars(seg.Z), all.vars(formula(obj) )[-1]))) stop("variable names 'V', 'V1', .. are not allowed") if("U" %in% sub("U[1-9]*[0-9]","U", c(all.vars(seg.Z), all.vars(formula(obj) )[-1]))) stop("variable names 'U', 'U1', .. are not allowed") if(any(c("$","[") %in% all.names(seg.Z))) stop(" '$' or '[' not allowed in 'seg.Z' ") if(missing(psi)) { if(n.Seg==1){ if(missing(npsi)) npsi<-1 npsi<-lapply(npsi, function(.x).x) if(length(npsi)!=length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi)<-all.vars(seg.Z) } else {#se n.Seg>1 #if(missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call.=FALSE) if (missing(npsi)) { npsi<-rep(1, n.Seg) names(npsi)<-all.vars(seg.Z) } if(length(npsi)!=n.Seg) stop(" 'npsi' and seg.Z should have the same length") if(!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi<-lapply(npsi, function(.x) rep(NA,.x)) id.npsi<-TRUE ##id.npsi<-FALSE #e' stato fornito npsi? } else { if(n.Seg==1){ if(!is.list(psi)) {psi<-list(psi);names(psi)<-all.vars(seg.Z)} } else {#se n.Seg>1 if(!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if(n.Seg!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if(!all(names(psi)%in%all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } fc<- min(max(abs(control$fc),.8),1) #min.step<-control$min.step alpha<-control$alpha it.max <- old.it.max<- control$it.max digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } break.boot=control$break.boot n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow #conv.psi<-control$conv.psi visual <- control$visual visualBoot<-FALSE if(visual && n.boot>0) {visual<-FALSE; visualBoot<-TRUE} # if(n.boot>0){ # if(!is.null(control$seed)) { # set.seed(control$seed) # employed.Random.seed<-control$seed # } else { # employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) # set.seed(employed.Random.seed) # } # if(visual) {visual<-FALSE; visualBoot<-TRUE}# warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} # } last <- control$last K<-control$K h<-control$h #------------------------------- orig.call<-Call<-mf<-obj$call orig.call$formula<- mf$formula<-formula(obj) #per consentire lm(y~.) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") if(class(mf$formula)[1]=="name" && !"~"%in%paste(mf$formula)) mf$formula<-eval(mf$formula) #orig.call$formula<-update.formula(orig.call$formula, paste("~.-",all.vars(seg.Z))) # #genn 2013. dalla versione 0.2.9-4 ho tolto if(length(.. Tra l'altro non capisco perche' lo avevo fatto # if(length(all.vars(formula(obj)))>1){ # mf$formula<-update.formula(mf$formula,paste(paste(seg.Z,collapse=".+"),"+",paste(all.vars(formula(obj))[-1],collapse="+"))) # } else { # mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) # } #nov 2013 dalla versione 0.3-0.0 (che dovrebbe essere successiva alla 0.2-9.5) viene creato anche il modelframe esteso che comprende # termini "originali", prima che fossero trasformati (Ad es., x prima che ns(x) costruisca le basi). Questo permette di avere termini # ns(), poly(), bs() nel modello di partenza mfExt<- mf mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #mfExt$formula<- update.formula(mfExt$formula,paste(paste(seg.Z,collapse=".+"),"+",paste(all.vars(formula(obj)),collapse="+"))) # mfExt$formula<- if(!is.null(obj$call$data)) # update.formula(mf$formula,paste(".~",paste(all.vars(obj$call), collapse="+"),"-",obj$call$data,sep="")) # else update.formula(mf$formula,paste(".~",paste(all.vars(obj$call), collapse="+"),sep="")) #----------- # browser() if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mfExt$formula <- update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+") )) } #browser() #mf <- try(eval(mf, parent.frame(max(1,sys.parent()))), silent=TRUE) #if(inherits(mf, "try-error")) mf <- try(eval(mf, parent.frame()), silent=TRUE) # mf <- eval(mf, parent.frame()) n<-nrow(mf) #questo serve per inserire in mfExt le eventuali variabili contenute nella formula con offset(..) nomiOff<-setdiff(all.vars(formula(obj)), names(mf)) if(length(nomiOff)>=1) mfExt$formula<-update.formula(mfExt$formula,paste(".~.+", paste( nomiOff, collapse="+"), sep="")) #---------------------------------------------------- #ago 2014 c'e' la questione di variabili aggiuntive... nomiTUTTI<-all.vars(mfExt$formula) #comprende anche altri nomi (ad es., threshold) "variabili" nomiNO<-NULL for(i in nomiTUTTI){ #r<-try(eval(parse(text=i), parent.frame(max(1,sys.parent()))), silent=TRUE) # #if(inherits(r,"try-error")) r<-try(eval(parse(text=i), parent.frame()), silent=TRUE) if(class(r)[1]!="try-error" && length(r)==1 && !is.function(r) && !i%in%names(mf)) nomiNO[[length(nomiNO)+1]]<-i } #nomiNO dovrebbe contenere i nomi delle "altre variabili" (come th in subset=x0) mfExt$formula<-update.formula(mfExt$formula,paste(".~.+", paste( nomiAgg, collapse="+"), sep="")) # # # mfExt <- try(eval(mfExt, parent.frame(max(1,sys.parent()))), silent=TRUE) # if(inherits(mfExt, "try-error")) mfExt <- try(eval(mfExt, parent.frame()), silent=TRUE) mfExt <-eval(mfExt, parent.frame()) # mfExt <- eval(mfExt, parent.frame(max(1,sys.parent()))) #mantieni in mfExt solo le variabili che NON ci sono in mf (cosi la funzione occupa meno spazio..) #mfExt<-mfExt[,setdiff(names(mfExt), names(mf)),drop=FALSE] weights <- as.vector(model.weights(mf)) offs <- as.vector(model.offset(mf)) mt <- attr(mf, "terms") interc<-attr(mt,"intercept") y <- model.response(mf, "any") XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf, obj$contrasts) #il cambio in mf da "offset(_nomevar_)" al "_nomevar_" deve avvenire dopo "model.matrix(mt, mf, contrasts)" # if(!is.null(offs)){ # #id.offs<-pmatch("offset",names(mf)) #questa identifica il nome offset(..). ELiminarlo dal dataframe? non conviene altrimenti nel model.frame non risulta l'offset # id.offs<- which(grepl("(offset)", names(mf))) #per consentire anche offset come argomento di glm() # names(mf)[id.offs]<- all.vars(formula(paste("~", names(mf)[id.offs])), functions=FALSE) # } namesXREG0<-colnames(XREG) #nameLeftSlopeZero<-setdiff(all.vars(seg.Z), all.vars(formula(obj))) nameLeftSlopeZero<-setdiff(all.vars(seg.Z), names(coef(obj))) #in questo modo riconosce che sin(x*pi) NON e' x, ad esempio. namesXREG0<-setdiff(namesXREG0, nameLeftSlopeZero) id.duplic<-match(all.vars(formula(obj)),all.vars(seg.Z),nomatch=0)>0 if(any(id.duplic)) { #new.mf<-mf[,id.duplic,drop=FALSE] new.mf<-mf[,all.vars(formula(obj))[id.duplic],drop=FALSE] new.XREGseg<-data.matrix(new.mf) XREG<-cbind(XREG,new.XREGseg) } n.psi<- length(unlist(psi)) id.n.Seg<-(ncol(XREG)-n.Seg+1):ncol(XREG) XREGseg<-XREG[,id.n.Seg,drop=FALSE] #XREG<-XREG[,-id.n.Seg,drop=FALSE] #XREG<-model.matrix(obj0) non va bene perche' non elimina gli eventuali mancanti in seg.Z.. #Due soluzioni #XREG<-XREG[,colnames(model.matrix(obj)),drop=FALSE] #XREG<-XREG[,match(c("(Intercept)",all.vars(formula(obj))[-1]),colnames(XREG),nomatch =0),drop=FALSE] XREG <- XREG[, match(c("(Intercept)", namesXREG0),colnames(XREG), nomatch = 0), drop = FALSE] XREG<-XREG[,unique(colnames(XREG)), drop=FALSE] ################# #if(ncol(XREGseg)==1 && length(psi)==1 && n.psi==1 && !any(is.na(psi))) { if(psi==Inf) psi<-median(XREGseg)} ################# #browser() n <- nrow(XREG) Z<-lapply(apply(XREGseg,2,list),unlist) #prende anche i nomi! name.Z <- names(Z) <- colnames(XREGseg) if(length(Z)==1 && is.vector(psi) && (is.numeric(psi)||is.na(psi))){ psi <- list(as.numeric(psi)) names(psi)<-name.Z } if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) || is.null(names(psi))) stop("Z and psi have to be *named* list") id.nomiZpsi <- match(names(Z), names(psi)) if ((length(Z)!=length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of Z and psi do not match") nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] psiQ<-psiE<-psi if(id.npsi){ for(i in 1:length(psi)) { K<-length(psi[[i]]) psiQ[[i]]<-quantile(Z[[i]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE) psiE[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) psiQ[[i]] else psiE[[i]] } } else { for(i in 1:length(psi)) { psiQ[[i]]<-quantile(Z[[i]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE) psiE[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) psiQ[[i]] else psiE[[i]] } } if(control$quant) { initial<-unlist(psiE) PSI1<- matrix(initial, n, length(initial), byrow = TRUE) } else { initial<-unlist(psiQ) PSI1<- matrix(initial, n, length(initial), byrow = TRUE) } #########==================== SE PSI FIXED id.psi.fixed <- FALSE if(!is.null(fixed.psi)){ id.psi.fixed <- TRUE if(is.numeric(fixed.psi) && n.Seg==1) { fixed.psi<-list(fixed.psi) names(fixed.psi)<-all.vars(seg.Z) } if(is.list(fixed.psi)) { if(!(names(fixed.psi) %in% all.vars(seg.Z))) stop("names(fixed.psi) is not a subset of variables in 'seg.Z' ") } else { stop(" 'fixed.psi' has to be a named list ") } fixed.psi<-lapply(fixed.psi, sort) Zfixed<-matrix(unlist(mapply(function(x,y)rep(x,y),Z[names(fixed.psi)], sapply(fixed.psi, length), SIMPLIFY = TRUE)), nrow=n) n.fixed.psi<-sapply(fixed.psi, length) rip.nomi <- rep( names(fixed.psi), n.fixed.psi) rip.numeri <- unlist(lapply(n.fixed.psi, function(.x) 1:.x)) colnames(Zfixed) <- paste("U", rip.numeri,".fixed.",rip.nomi, sep="") PSI <- matrix(unlist(fixed.psi), ncol=ncol(Zfixed), nrow=n, byrow = TRUE) fixedU<-(Zfixed-PSI)*(Zfixed>PSI) XREG<-cbind(XREG, fixedU) } #########====================END SE PSI FIXED initial.psi<-psi a <- sapply(psi, length) #per evitare che durante il processo iterativo i psi non siano ordinati id.psi.group <- rep(1:length(a), times = a) #identificativo di apparteneza alla variabile # #Znew <- list() #for (i in 1:length(psi)) Znew[[length(Znew) + 1]] <- rep(Z[i], a[i]) #Z <- matrix(unlist(Znew), nrow = n) Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n) psi <- unlist(psi) #se psi e' numerico, la seguente linea restituisce i valori ordinati all'interno della variabile.. psi<-unlist(tapply(psi,id.psi.group,sort)) k <- ncol(Z) PSI <- matrix(psi, n, k, byrow=TRUE) #rep(psi, rep(n, k)), ncol = k) #controllo se psi e' ammissibile.. c1 <- apply((Z <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Z >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") colnames(Z) <- nomiZ <- rep(nome, times = a) ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], function(.x) {1:.x}))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ, sep = ".") #forse non serve crearsi l'ambiente KK, usa mf.. #obj <- update(obj, formula = Fo, data = mf) #if (model.frame) obj$model <- mf #controlla che model.frame() funzioni sull'oggetto restituito # KK <- new.env() # for (i in 1:ncol(objframe$model)) assign(names(objframe$model[i]), objframe$model[[i]], envir = KK) if (it.max == 0) { #mf<-cbind(mf, mfExt) U <- (Z>PSI)*(Z-PSI) #pmax((Z - PSI), 0) colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") nomiU <- paste("U", colnames(U), sep = "") #for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) #e' necessario il for? puoi usare colnames(U)<-nomiU;mf[nomiU]<-U for(i in 1:ncol(U)) mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] Fo <- update.formula(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) obj <- update(obj, formula = Fo, evaluate=FALSE, data=mfExt) #data = mf, if(!is.null(obj[["subset"]])) obj[["subset"]]<-NULL obj<-eval(obj, envir=mfExt) if (model) obj$model <-mf #obj$model <- data.frame(as.list(KK)) psi <- cbind(psi, psi, 0) rownames(psi) <- paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") colnames(psi) <- c("Initial", "Est.", "St.Err") #names(psi)<-paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") obj$psi <- psi return(obj) } #XREG <- model.matrix(obj) creata sopra #o <- model.offset(objframe) #w <- model.weights(objframe) id.noOW <- if(is.null(weights) && is.null(offs)) TRUE else FALSE if (is.null(weights)) weights <- rep(1, n) if (is.null(offs)) offs <- rep(0, n) initial <- psi obj0 <- obj dev0<-sum(weights*obj$residuals^2) list.obj <- list(obj) # psi.values <- NULL nomiOK<-nomiU #invXtX<-if(!is.null(obj$qr)) chol2inv(qr.R(obj$qr)) else NULL #(XtX)^{-1} #Xty<-crossprod(XREG,y) invXtX<-Xty<-NULL #browser() if(is.null(alpha)) alpha<- max(.05, 1/length(y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) opz<-list(toll=toll,h=h, stop.if.error=stop.if.error, dev0=dev0, visual=visual, it.max=it.max, usesegreg=FALSE, tol.opt=control$tol.opt, nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits,invXtX=invXtX, Xty=Xty, #conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, fc=fc, seed=control$seed, fit.psi0=control$fit.psi0, min.n=control$min.n, limZ=NULL, rangeZ=NULL, nomiSeg=unique(colnames(Z)), PSI1=PSI1) # for(.i in unique(colnames(Z))) { # #poni min(z)=0, cosi solve() in step.lm.fit non ha problemi. # if(.i %in% colnames(XREG)) XREG[,.i]<- XREG[,.i] - min(XREG[,.i]) # } if(n.boot<=0){ obj<-seg.lm.fit(y, XREG, Z, PSI ,weights, offs, opz) } else { obj<-seg.lm.fit.boot(y, XREG, Z, PSI, weights, offs, opz, n.boot=n.boot, size.boot=size.boot, random=random, break.boot=break.boot) #seed<- obj$seed } if(!is.list(obj)){ warning("Estimation failed. Too many breakpoints? Returning a lm fit..", call. = FALSE) return(obj0) } seed<- obj$seed #browser() if(obj$obj$df.residual==0) warning("no residual degrees of freedom (other warnings expected)", call.=FALSE) id.psi.group<-obj$id.psi.group nomiOK<-obj$nomiOK #nomiFINALI<-unique(sapply(strsplit(nomiOK, split="[.]"), function(x)x[2])) #nomi delle variabili con breakpoint stimati! #nomiFINALI<-sub("U[1-9].", "", nomiOK) #nomi originali delle variabili con breakpoint stimati! nomiFINALI<- unique(sub("U[1-9]*[0-9].", "", nomiOK)) #se e' stata usata una proc automatica "nomiFINALI" sara' differente da "name.Z" nomiSenzaPSI<-setdiff(name.Z,nomiFINALI) if(length(nomiSenzaPSI)>=1) warning("no breakpoints found for: ", paste(nomiSenzaPSI," "), call. = FALSE) it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart U<-obj$U V<-obj$V id.warn<-obj$id.warn rangeZ<-obj$rangeZ idU<-obj$idU idV<- max(idU)+(1:ncol(V)) obj<-obj$obj k<-length(psi) beta.c<-coef(obj)[idU] #beta.c<-coef(obj)[paste("U", 1:ncol(U), sep = "")] Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) #se usi una procedura automatica devi cambiare ripetizioni, nomiU e nomiV, e quindi: length.psi<-tapply(as.numeric(as.character(names(psi))), as.numeric(as.character(names(psi))), length) forma.nomiU<-function(xx,yy)paste("U",1:xx, ".", yy, sep="") forma.nomiVxb<-function(xx,yy)paste("psi",1:xx, ".", yy, sep="") nomiU <- unlist(mapply(forma.nomiU, length.psi, nomiFINALI)) #invece di un ciclo #paste("U",1:length.psi[i], ".", name.Z[i]) nomiVxb <- unlist(mapply(forma.nomiVxb, length.psi, nomiFINALI)) #########========================= SE PSI FIXED psi.list<-vector("list", length=length(unique(nomiZ))) names(psi.list)<-unique(nomiZ) #names(psi)<-nomiZ #se e' una procedure automatica nomiZ puo essere piu lungo dei breakpoints "rimasti" names(psi)<-rep(nomiFINALI, length.psi) for(i in names(psi.list)){ psi.list[[i]]<-psi[names(psi)==i] } #mf<-cbind(mf, mfExt) #questo creava ripetizioni.. for(i in 1:ncol(U)) { mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] mfExt[nomiVxb[i]]<-mf[nomiVxb[i]]<-Vxb[,i] } nnomi <- c(nomiU, nomiVxb) #browser() Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) #########========================= SE PSI FIXED if(id.psi.fixed){ for(i in 1:ncol(fixedU)) mfExt[colnames(fixedU)[i]]<-mf[colnames(fixedU)[i]]<-fixedU[,i] Fo<-update.formula(Fo, paste(c("~.",colnames(fixedU)), collapse="+")) } #objF <- update(obj0, formula = Fo, data = KK) objF <- update(obj0, formula = Fo, evaluate=FALSE, data = mfExt) #eliminiamo subset, perche' se e' del tipo subset=x>min(x) allora continuerebbe a togliere 1 osservazione if(!is.null(objF[["subset"]])) objF[["subset"]]<-NULL objF<-eval(objF, envir=mfExt) objF$offset<- obj0$offset isNAcoef<-any(is.na(objF$coefficients)) if(isNAcoef){ if(stop.if.error) { cat("breakpoint estimate(s):", as.vector(psi),"\n") stop("at least one coef is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE) } else { warning("some estimate is NA: too many breakpoints? 'var(hat.psi)' cannot be computed \n ..returning a 'lm' model", call. = FALSE) Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) objF <- update(obj0, formula = Fo, evaluate=TRUE, data = mfExt) names(psi)<-nomiVxb objF$psi<-psi return(objF) } } #browser() #if(!gap){ names.coef<-names(objF$coefficients) #questi codici funzionano e si basano sull'assunzioni che le U e le V siano ordinate.. #names(obj$coefficients)[match(c(paste("U",1:k, sep=""), paste("V",1:k, sep="")), names(coef(obj)))]<- nnomi #names(obj$coefficients)[match(c(nomiU, nomiVxb), names(coef(obj)))]<- nnomi #names(obj$coefficients)[c(idU,idV)]<- nnomi #objF$coefficients[names.coef]<-obj$coefficients[names.coef] #sostituisce tutti i coef (gli ultimi sono 0) #Attenzione: quando obj0 contiene interazioni, il rispettivo coeff viene messo alla fine di tutti gli altri (anche di psi), # per cui bisogna fare attenzione a come vengono sostituiti i coeffs #browser() #objF$coefficients<-obj$coefficients if(ncol(XREG)>0) objF$coefficients[match(names(objF$coefficients), names(obj$coefficients),0)] <- obj$coefficients[1:ncol(XREG)] objF$coefficients[nomiU] <- obj$coefficients[idU] objF$coefficients[nomiVxb]<- 0 #names(objF$coefficients)<-names.coef objF$fitted.values<-obj$fitted.values objF$residuals<-obj$residuals #objF$qr<-obj$qr #NON credo.. #} Cov <- vcov(objF) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) #if(length(initial)!=length(psi)) initial<-rep(NA,length(psi)) a<-tapply(id.psi.group, id.psi.group, length) #ho sovrascritto "a" di sopra, ma non dovrebbe servire.. ris.psi<-matrix(NA,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi ris.psi[,3]<-sqrt(vv) #NB "a" deve essere un vettore che si appatta con "initial.psi" per ottnetere "initial" sotto... Se una variabile alla fine risulta # senza breakpoint questo non avviene e ci sono problemi nella formazione di "initial". Allora costruisco a.ok a.ok<-NULL for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } #initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) if(stop.if.error) ris.psi[,1]<-initial #psi <- cbind(initial, psi, sqrt(vv)) #rownames(psi) <- colnames(Cov)[id] objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- it objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) #Z = name.Z objF$id.group <- if(length(name.Z)<=1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn objF$orig.call<-orig.call objF$indexU<-build.all.psi(psi.list, fixed.psi) #browser() if(model) objF$model <- mf #objF$mframe <- data.frame(as.list(KK)) if(n.boot>0) objF$seed <- seed class(objF) <- c("segmented", class(obj0)) objF$psi[,"Initial"]<-NA list.obj[[length(list.obj) + 1]] <- objF class(list.obj) <- "segmented" if (last) list.obj <- list.obj[[length(list.obj)]] return(list.obj) } segmented/R/stepmented.glm.R0000644000176200001440000005616314757372106015526 0ustar liggesusersstepmented.glm <- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control=seg.control(), keep.class=FALSE, var.psi=FALSE, ...) { # --------- toMatrix<-function(.x, ki){ # ripete ogni .x[,j] ki[j] volte if(ncol(.x)!=length(ki)) stop("It should be ncol(.x)==length(ki)") if(all(ki==1)) return(.x) M<-vector("list", length=length(ki)) for(j in 1:length(ki)) M[[j]]<-replicate(ki[[j]], cbind(.x[,j]), simplify=TRUE) do.call(cbind, M) } #----------- agg<- 1-control$fc maxit.glm <- control$maxit.glm it.max<- control$it.max tol<- control$toll display<- control$visual digits <- control$digits #min.step <- control$min.step #conv.psi <- control$conv.psi alpha <- control$alpha fix.npsi <- control$fix.npsi n.boot <- control$n.boot break.boot<- control$break.boot +2 seed<- control$seed fix.npsi<-control$fix.npsi h<- control$h #----------- if(!(inherits(obj,"glm") || is.vector(obj) || is.ts(obj))) stop("obj should be a 'glm' fit, a 'vector' or 'ts' object") if(is.vector(obj) || is.ts(obj)){ #if(is.matrix(obj) && ncol(obj)>1) stop("if matrix 'obj' should have 1 column") #obj<-drop(obj) if(!missing(seg.Z) && length(all.vars(seg.Z))>1) stop(" multiple seg.Z allowed only with (g)lm models") Fo0<-as.formula(paste(deparse(substitute(obj))," ~ 1", sep="")) y.only.vector<-TRUE if(is.vector(obj)) { y<-obj x<-if(missing(seg.Z)) 1:length(y) else eval(parse(text=all.vars(seg.Z))) } else { y<- as.vector(obj) x<- if(missing(seg.Z)) seq(tsp(obj)[1], tsp(obj)[2], by=tsp(obj)[3] ) else eval(parse(text=all.vars(seg.Z))) } if(missing(psi)){ if(missing(npsi)) npsi<-1 #stop(" psi or npsi have to be provided ") psi<- seq(min(x), max(x), l=npsi+2)[-c(1, npsi+2)] #psi[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) } else { npsi<-length(psi) } initial.psi<-psi n<-length(y) a<- npsi n.Seg<-1 Z<-matrix(x, ncol=a, nrow=n, byrow = FALSE) XREG<-matrix(1, nrow=n, ncol=1) ww<-rep(1, n) offs<-rep(0,n) PSI<-matrix(psi, ncol=a, nrow=n, byrow = TRUE) name.Z <- if(missing(seg.Z)) "id" else all.vars(seg.Z) nomiU<-paste("U", 1:a, ".", name.Z,sep="") nomiV<-paste("V", 1:a, ".", name.Z,sep="") colnames(Z)<-nomiZ<-rep(name.Z, a) id.psi.group <- rep(1:length(a), times = a) orig.call<-NULL fam<- list(...)$family if(!(class(fam)[1]=="family" && is.list(fam))) stop(" family in ... should be specified like 'gaussian()'") dev0<-1e+6 #################################################### } else { #se obj e' un glm y.only.vector <- FALSE Fo0 <- formula(obj) fam <- family(obj) if(missing(seg.Z)) { #if(length(all.vars(formula(obj)))==1) seg.Z<- as.formula(paste("~", "id")) assign("id",1:length(obj$residuals),parent.frame()) #id<-1:length(obj$residuals) # if(length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } n.Seg<-length(all.vars(seg.Z)) id.npsi<-FALSE if("V" %in% sub("V[1-9]*[0-9]","V", c(all.vars(seg.Z), all.vars(formula(obj) )[-1]))) stop("variable names 'V', 'V1', .. are not allowed") if("U" %in% sub("U[1-9]*[0-9]","U", c(all.vars(seg.Z), all.vars(formula(obj) )[-1]))) stop("variable names 'U', 'U1', .. are not allowed") if(any(c("$","[") %in% all.names(seg.Z))) stop(" '$' or '[' not allowed in 'seg.Z' ") if(missing(psi)){ if(n.Seg==1){ if(missing(npsi)) npsi<-1 npsi<-lapply(npsi, function(.x).x) if(length(npsi)!=length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi)<-all.vars(seg.Z) } else {#se n.Seg>1 #if(missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call.=FALSE) if (missing(npsi)) { npsi<-rep(1, n.Seg) names(npsi)<-all.vars(seg.Z) } if(length(npsi)!=n.Seg) stop(" 'npsi' and seg.Z should have the same length") if(!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi<-lapply(npsi, function(.x) rep(NA,.x)) id.npsi<-TRUE ##id.npsi<-FALSE #e' stato fornito npsi? } else { if(n.Seg==1){ if(!is.list(psi)) {psi<-list(psi);names(psi)<-all.vars(seg.Z)} } else {#se n.Seg>1 if(!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if(n.Seg!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if(!all(names(psi)%in%all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } n.psi<- length(unlist(psi)) #browser() #if(missing(x)) x<-1:n #if(missing(psi)) { # if(missing(npsi)) npsi<-1 # psi<-min(x)+cumsum(rep(diff(range(x))/(npsi+1),npsi)) #} ##========================================================================= #--- preso da segmented.lm orig.call<-Call<-mf<-obj$call orig.call$formula<- mf$formula<-formula(obj) #per consentire lm(y~.) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") if(class(mf$formula)[1]=="name" && !"~"%in%paste(mf$formula)) mf$formula<-eval(mf$formula) mfExt<- mf mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mfExt$formula <- update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+") )) } mf <- eval(mf, parent.frame()) n<-nrow(mf) #questo serve per inserire in mfExt le eventuali variabili contenute nella formula con offset(..) nomiOff<-setdiff(all.vars(formula(obj)), names(mf)) if(length(nomiOff)>=1) mfExt$formula<-update.formula(mfExt$formula,paste(".~.+", paste( nomiOff, collapse="+"), sep="")) nomiTUTTI<-all.vars(mfExt$formula) #comprende anche altri nomi (ad es., threshold) "variabili" nomiNO<-NULL for(i in nomiTUTTI){ r<-try(eval(parse(text=i), parent.frame()), silent=TRUE) if(class(r)[1]!="try-error" && length(r)==1 && !is.function(r) && !i%in%names(mf)) nomiNO[[length(nomiNO)+1]]<-i } if(!is.null(nomiNO)) mfExt$formula<-update.formula(mfExt$formula,paste(".~.-", paste( nomiNO, collapse="-"), sep="")) mfExt<-eval(mfExt, parent.frame()) #mf <- mfExt ww <- as.vector(model.weights(mf)) offs <- as.vector(model.offset(mf)) if (is.null(ww)) ww <- rep(1, n) if (is.null(offs)) offs <- rep(0, n) mt <- attr(mf, "terms") interc<-attr(mt,"intercept") y <- model.response(mf, "any") XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf, obj$contrasts) namesXREG0<-colnames(XREG) nameLeftSlopeZero<-setdiff(all.vars(seg.Z), names(coef(obj))) #in questo modo riconosce che sin(x*pi) NON e' x, ad esempio. namesXREG0<-setdiff(namesXREG0, nameLeftSlopeZero) id.duplic<-match(all.vars(formula(obj)),all.vars(seg.Z),nomatch=0)>0 if(any(id.duplic)) { new.mf<-mf[,all.vars(formula(obj))[id.duplic],drop=FALSE] new.XREGseg<-data.matrix(new.mf) XREG<-cbind(XREG,new.XREGseg) } id.n.Seg<-(ncol(XREG)-n.Seg+1):ncol(XREG) XREGseg<-XREG[,id.n.Seg,drop=FALSE] XREG <- XREG[, match(c("(Intercept)", namesXREG0),colnames(XREG), nomatch = 0), drop = FALSE] XREG<-XREG[,unique(colnames(XREG)), drop=FALSE] #n <- nrow(XREG) #browser() Z<-lapply(apply(XREGseg,2,list),unlist) #prende anche i nomi! name.Z <- names(Z) <- colnames(XREGseg) if(length(Z)==1 && is.vector(psi) && (is.numeric(psi)||is.na(psi))){ psi <- list(as.numeric(psi)) names(psi)<-name.Z } if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) || is.null(names(psi))) stop("'psi' or 'npsi' have to be *named* when there are multiple stepmented variables") id.nomiZpsi <- match(names(Z), names(psi)) if ((length(Z)!=length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of 'seg.Z' and 'psi' do not match") nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] if(id.npsi){ for(i in 1:length(psi)) { K<-length(psi[[i]]) if(any(is.na(psi[[i]]))) psi[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) } } else { for(i in 1:length(psi)) { if(any(is.na(psi[[i]]))) psi[[i]]<- (min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) } } #########==================== SE PSI FIXED id.psi.fixed <- FALSE if(!is.null(fixed.psi)){ id.psi.fixed <- TRUE if(is.numeric(fixed.psi) && n.Seg==1) { fixed.psi<-list(fixed.psi) names(fixed.psi)<-all.vars(seg.Z) } if(is.list(fixed.psi)) { if(!(names(fixed.psi) %in% all.vars(seg.Z))) stop("names(fixed.psi) is not a subset of variables in 'seg.Z' ") } else { stop(" 'fixed.psi' has to be a named list ") } fixed.psi<-lapply(fixed.psi, sort) Zfixed<-matrix(unlist(mapply(function(x,y)rep(x,y),Z[names(fixed.psi)], sapply(fixed.psi, length), SIMPLIFY = TRUE)), nrow=n) n.fixed.psi<-sapply(fixed.psi, length) rip.nomi <- rep( names(fixed.psi), n.fixed.psi) rip.numeri <- unlist(lapply(n.fixed.psi, function(.x) 1:.x)) colnames(Zfixed) <- paste("U", rip.numeri,".fixed.",rip.nomi, sep="") PSI <- matrix(unlist(fixed.psi), ncol=ncol(Zfixed), nrow=n, byrow = TRUE) fixedU<-(Zfixed-PSI)*(Zfixed>PSI) XREG<-cbind(XREG, fixedU) } #########====================END SE PSI FIXED initial.psi<-psi a <- sapply(psi, length) #n. di psi per ogni covariate #per evitare che durante il processo iterativo i psi non siano ordinati id.psi.group <- rep(1:length(a), times = a) #identificativo di apparteneza alla variabile Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n) psi <- unlist(psi) #se psi e' numerico, la seguente linea restituisce i valori ordinati all'interno della variabile.. psi<-unlist(tapply(psi,id.psi.group,sort)) k <- ncol(Z) PSI <- matrix(rep(psi, rep(n, k)), ncol = k) #controllo se psi e' ammissibile.. c1 <- apply((Z <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Z >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") colnames(Z) <- nomiZ <- rep(nome, times = a) ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], function(.x) {1:.x}))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ, sep = ".") initial <- psi obj0 <- obj dev0<- obj0$dev #sum(obj$residuals^2) list.obj <- list(obj) nomiOK<-nomiU } # invXtX<-if(!is.null(obj$qr)) chol2inv(qr.R(obj$qr)) else NULL #(XtX)^{-1} # Xty<-crossprod(XREG,y) # opz<-list(toll=toll,h=h, stop.if.error=stop.if.error, dev0=dev0, visual=visual, it.max=it.max, # nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits,invXtX=invXtX, Xty=Xty, # conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, fc=fc) #x<- Z x.lin <-XREG rangeZ <- apply(Z, 2, range) plin<-ncol(x.lin) #if(!is.list(psi)) psi<-list(psi) #P <- length(psi) #n. variabili con breakpoints #npsii <- sapply(psi, length) #n di psi for each covariate P<-n.Seg npsii<-a npsi<- sum(npsii) Xtrue<-Z #psi0 <- unlist(psi) #PSI<- matrix(psi0, n, npsi, byrow=TRUE) #if(ncol(x)!=P) stop("errore") #Xtrue<-toMatrix(x, npsii) if(it.max == 0) { U <- (Xtrue>PSI) colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") nomiU <- paste("U", colnames(U), sep = "") #for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) for(i in 1:ncol(U)) mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] Fo <- update.formula(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) obj <- update(obj, formula = Fo, evaluate=FALSE, data=mfExt) #data = mf, if(!is.null(obj[["subset"]])) obj[["subset"]]<-NULL obj<-eval(obj, envir=mfExt) #if (model) obj$model <-mf #obj$model <- data.frame(as.list(KK)) psi <- cbind(psi, psi, 0) rownames(psi) <- paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") colnames(psi) <- c("Initial", "Est.", "St.Err") obj$psi <- psi return(obj) } c1 <- apply((Xtrue <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Xtrue >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") if(is.null(alpha)) alpha<- max(.05, 1/length(y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) eta0 <- if(is.null(control$eta)) obj$linear.predictors else control$eta opz<-list(toll=tol, dev0=dev0, display=display, it.max=it.max, agg=agg, digits=digits, rangeZ=rangeZ, usestepreg=FALSE, fam=fam, maxit.glm=maxit.glm, id.psi.group=id.psi.group, h=h, eta0=eta0, limZ=NULL, #nomiOK=nomiOK, visualBoot=visualBoot, invXtX=invXtX, Xty=Xty, conv.psi=conv.psi, min.step=min.step, alpha=alpha, fix.npsi=fix.npsi, npsii=npsii, seed=control$seed, fit.psi0=control$fit.psi0) opz$Nboot <- 0 # ################################################################################# # #### jump.fit(y, XREG=x.lin, Z=Xtrue, PSI, w=ww, offs, opz, return.all.sol=FALSE) # ################################################################################# if(n.boot<=0){ obj<- step.glm.fit(y, x.lin, Xtrue, PSI, ww, offs, opz, return.all.sol=FALSE) } else { #if("seed" %in% names(control)) set.seed(control$seed) obj<-step.glm.fit.boot(y, x.lin, Xtrue, PSI, ww, offs, opz, n.boot, break.boot=break.boot) seed <- control$seed } # if(!is.list(obj)){ # warning("No breakpoint estimated", call. = FALSE) # return(obj0) # } #browser() id.warn<-obj$id.warn it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart #i beta.c corripondono ai psi NON ordinati!!! beta.c<- obj$beta.c beta.c<-unlist(tapply(psi, id.psi.group, function(.x)beta.c[order(.x)])) #unlist(lapply(unique(id.psi.group), function(.x) beta.c[id.psi.group==.x][order(psi[id.psi.group==.x])])) psi<-unlist(tapply(psi, id.psi.group, sort)) Z0<-apply(Z,2,sort) psi.rounded<-sapply(1:npsi, function(j) Z0[sum(Z0[,j] INF DEN <- abs(Xtrue - PSI.mid) DEN <- apply(DEN, 2, function(.x) pmax(.x, sort(.x)[2]/2)) #pmax(.x, diff(range(.x))/1000)) V <- (1/(2 * DEN)) colnames(V)<-nomiV U <- (Xtrue * V + 1/2) colnames(U)<-nomiU #Vxb <- V %*% diag(-beta.c, ncol = length(beta.c)) Vxb <- -V # * rep(-beta.c, each = nrow(V)) nomiVxb <- gsub("V", "psi", nomiV) nnomi <- c(nomiU, nomiVxb) if(y.only.vector){ Fo <- update.formula(Fo0, as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) mf <-data.frame(1,U,Vxb) colnames(mf)<-c("(Intercept)",nnomi) objF <- glm(Fo, weights = ww, offset = offs, family = fam, data = mf) } else { for(i in 1:ncol(U)) { mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] mfExt[nomiVxb[i]]<-mf[nomiVxb[i]]<-Vxb[,i] } Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) #la seguente linea si potrebbe rimuovere perche' in mfExt c'e' gia' tutto.. if(is.matrix(y)&& (fam$family=="binomial" || fam$family=="quasibinomial")){ mfExt<-cbind(mfExt[[1]], mfExt[,-1]) } objF <- update(obj0, formula = Fo, evaluate=FALSE, data = mfExt) #eliminiamo subset, perche' se e' del tipo subset=x>min(x) allora continuerebbe a togliere 1 osservazione if(!is.null(objF[["subset"]])) objF[["subset"]]<-NULL objF<-eval(objF, envir=mfExt) objF$offset<- obj0$offset } objW<-objF #controllo se qualche coeff e' NA.. isNAcoef<-any(is.na(objF$coefficients)) #browser() if (isNAcoef) { nameNA.psi <- names(objF$coefficients)[which(is.na(objF$coefficients))] nameNA.U <- gsub("psi", "U", nameNA.psi) if(fix.npsi) { cat("breakpoint estimate(s):", as.vector(psi), "\n") stop("coef ", nameNA.psi, " is NA: breakpoint(s) at the boundary or too close together", call. = FALSE) } else { warning("some estimate is NA (too many breakpoints?): removing ", length(nameNA.psi), " jump-point(s)", call. = FALSE) Fo <- update(Fo, paste(".~ .-", nameNA.U, "-", nameNA.psi)) if(y.only.vector){ objF <- glm(Fo, data = mfExt, family=fam) } else { objF <- update(obj0, formula = Fo, evaluate = TRUE, data = mfExt) if (!is.null(objF[["subset"]])) objF[["subset"]] <- NULL } idNA.psi <- match(nameNA.psi, nomiVxb) nomiVxb <- setdiff(nomiVxb, nameNA.psi) nomiU <- setdiff(nomiU, nameNA.U) Xtrue <- Xtrue[, -idNA.psi, drop = FALSE] PSI.mid<- PSI.mid[, -idNA.psi, drop = FALSE] id.psi.group <- id.psi.group[-idNA.psi] psi <- psi[-idNA.psi] psi.rounded <- psi.rounded[, -idNA.psi, drop = FALSE] } } #organizziamo i risultati da restituire per psi.. colnames(psi.rounded)<-names(psi)<-nomiVxb rownames(psi.rounded)<-c("inf [","sup (") # Cov <- vcov(objF) # var.Tay<-function(est1,est2,v1,v2,v12){ # r<- est1/est2 # vv<-(v1+v2*r^2-2*r*v12)/est2^2 # vv} # # varPsi<- rep(NA, length(nomiU)) # for(j in 1:length(nomiU)){ # num<-objF$coefficients[nomiVxb[j]] # den<-objF$coefficients[nomiU[j]] # v.g <-Cov[nomiVxb[j],nomiVxb[j]] # v.b<- Cov[nomiU[j],nomiU[j]] # cov.g.b <- Cov[nomiVxb[j],nomiU[j]] # #if(is.null(rho)) # rho<-mean(Xtrue[, nomiZ[j] ,drop=TRUE] PSI.mid) colnames(U)<-nomiU X <- cbind(x.lin,U) #objF$obj.ok<-mylm(X, y, w=ww, offs=offs) #coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b)) objF$obj.ok<-glm.fit(X, y, weights =ww, offset =offs, family=fam) #, control = glm.control(maxit = maxit.glm), etastart = eta0) #browser() objF$obj.ok$invXtX <- chol2inv(qr.R(objF$obj.ok$qr)) objF$objW<- objW objF$fitted.values<-objF$obj.ok$fitted.values objF$residuals<- objF$obj.ok$residuals #objF$coefficients[1:length(objF$obj.ok$coefficients)] <- objF$obj.ok$coefficients objF$coefficients[names(objF$obj.ok$coefficients)] <- objF$obj.ok$coefficients objF$coefficients[nomiVxb] <-psi.rounded[1,] objF$nameUV <- list(U = drop(nomiU), V = nomiV, Z = name.Z) #Z = name.Z objF$rangeZ<-obj$rangeZ objF$Z<-Z[,unique(name.Z),drop=FALSE] objF$linear.predictors<-objF$obj.ok$linear.predictors objF$deviance<- objF$obj.ok$deviance objF$aic<- objF$obj.ok$aic + 2*ncol(Z) #k ##################################### objF$weights<-objF$obj.ok$weights # in stepmented.lm non serve.. ##################################### objF$call <- match.call() objF$orig.call<-orig.call objF$psi.history <- psi.values objF$it <- it objF$epsilon <- obj$epsilon objF$id.warn <- id.warn if(n.boot>0) objF$seed <- seed #browser() #class(objF) <- if(y.only.vector) "stepmented" else c("stepmented", class(obj0)) #Un effetto aggiuntivo.. if(!y.only.vector){ Z.in.obj<-intersect(all.vars(Fo0), all.vars(seg.Z)) if(length(Z.in.obj)>0){ tt<-terms(Fo0)#, specials=Z.in.obj) #id<-match(Z.in.obj, all.vars(Fo0))-1 #1 e' per la risposta.. id<-match(Z.in.obj, intersect(all.vars(Fo0), names(mf)))-1 nome<-attr(tt,"term.labels")[id] Fo.ok<-as.formula(paste("~0", nome, sep="+")) f.x<-matrix(NA, 150, ncol(objF$Z[,Z.in.obj,drop=FALSE])) for(j in 1:length(Z.in.obj)){ #browser() #dd<-data.frame(objF$Z[,j]) idPsi <- nomiVxb[endsWith(nomiVxb, paste(".", Z.in.obj[j], sep = ""))] psi <- coef(objF)[idPsi] dd<-data.frame(seq(min(objF$Z[,Z.in.obj[j]]), max(objF$Z[,Z.in.obj[j]]), l=nrow(f.x))) names(dd)<- Z.in.obj[j] M<-model.matrix(Fo.ok, data=dd) #M<-M[seq(ceiling(length(psi)/2),by=1,l=nrow(f.x)),,drop=FALSE] f.x[,j]<-M%*% coef(objF)[colnames(M)] } colnames(f.x)<-Z.in.obj objF$f.x<-f.x } } objF$psi<- objF$psi[,-1,drop=FALSE] #rimuovi la colonna Initial if(var.psi){ Cov <- vcov.stepmented(objF, k=NULL) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) objF$psi[,"St.Err"]<-sqrt(vv) objF$vcov<- Cov } class(objF) <- c("stepmented", class(objF)) return(objF) } segmented/R/seg.Ar.fit.boot.r0000644000176200001440000002466314757621067015505 0ustar liggesusersseg.Ar.fit.boot<-function(obj, XREG, Z, PSI, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di extract.psi<-function(lista){ #non serve avere una versione di extract.psi() come quella di seg.lm.fit.boot perche' #qui i primi valori di lista[[1]] e lista[[2]] sempre si riferiscono al modello senza psi #serve per estrarre il miglior psi.. dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #------------- if(is.null(opz$seed)){ mY <- mean(obj$residuals) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } visualBoot<-opz$visualBoot opz.boot<-opz opz1<-opz opz.boot$pow=c(1,1) #c(1.1,1.2) opz.boot$it.max<-20 opz1$it.max <-0 n<-nrow(Z) rangeZ <- apply(Z, 2, range) #serve sempre alpha <- opz$alpha limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha, 1 - alpha)) o0<-try(suppressWarnings(seg.Ar.fit(obj, XREG, Z, PSI, opz)), silent=TRUE) if(!is.list(o0)){ o0<-try(suppressWarnings(seg.Ar.fit(obj, XREG, Z, opz$PSI1, opz)), silent=TRUE) } if(!is.list(o0)) { o0<- seg.Ar.fit(obj, XREG, Z, PSI, opz, return.all.sol=TRUE) o0<-extract.psi(o0) ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0<-try(suppressWarnings(seg.Ar.fit(obj, Z, PSI1, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z #if(visualBoot) cat(0, " ", formatC(opz$dev0, 3, format = "f"),"", "(No breakpoint(s))", "\n") count.random<-0 alpha <- .1 n.boot.rev<- 3 #3 o 4? for(k in seq(n.boot)){ ##se gli *ultimi* n.boot.rev valori di ss sono uguali, cambia i psi... diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) #if(length(na.omit(diff(all.selected.ss[1:n.boot.rev])))==(n.boot.rev-1) && all(round(diff(all.selected.ss[1:n.boot.rev]),6)==0)){ if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi<-ifelse(abs(qpsi-.5)<.1, alpha, qpsi) alpha<- 1-alpha est.psi0<-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) } ########################### 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow = TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(seg.Ar.fit(obj, XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], opz.boot)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0), size=n, replace=TRUE) ##----> o.boot<-try(seg.lm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz.boot), silent=TRUE) #in realta' la risposta dovrebbe essere "yy" da cambiare in mfExt o.boot<- try(suppressWarnings(seg.Ar.fit(obj, XREG, Z.orig, PSI, opz.boot)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o <- try(suppressWarnings(seg.Ar.fit(obj, XREG, Z.orig, PSI, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r) runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o <- try(suppressWarnings(seg.Ar.fit(obj, XREG, Z, PSI1, opz1)), silent=TRUE) count.random<-count.random+1 } if(is.list(o)){ if(!"coef"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if (visualBoot) { flush.console() #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.llik = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), -o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ #quando vengono restituiti psi troppo vicini e l'SE non si puo' calcolare, possiamo distanziarli.. #Pero' il processo deve essere esteso nel caso in cui ci sono 3 psi vicini.. min.n <- opz$min.n-1 if(min.n>1){ min1<- function(x, k=min.n-1){ for(i in 1:k) x<-x[-which.min(x)] min(x) } max1<-function(x,k=min.n-1){ for(i in 1:k) x<-x[-which.max(x)] max(x) } } else { min1<-min max1<-max } npsi <- tapply(opz$id.psi.group, opz$id.psi.group, length) nomiAll <- colnames(rangeZ) #rep(opz$nomiSeg, npsi) nomiSeg <- unique(nomiAll) newPsi<-vector("list", length(npsi) ) for(.j in 1:length(npsi)){ psi.j <- sort(est.psi0[opz$id.psi.group==.j]) #psi della stessa variabile segmented id <- nomiSeg[.j]==nomiAll Z.ok <- unique(Z[, id, drop=FALSE][,1]) m.j <- min(limZ[1,id]) M.j <- max(limZ[2,id]) #h=1/1.05 for(.k in 1:length(psi.j)){ id.group<-cut(Z.ok, c(m.j-10^8, psi.j, M.j+10^8), labels=FALSE) n.j<-tabulate(id.group)#<=min.n #per ogni psi calcola il min e il max dei segmenti prima e dopo psi. #se questi segmenti hanno min.n osservazioni considera u min e max fittizzi per evitare che il nuovo psi #modificato porti a segmenti con bassa numerosita'.. M.j.k<- if(n.j[.k]>0) max1(Z.ok[id.group==.k]) -10^6*(n.j[.k]<=min.n) else -10^6*(n.j[.k]<=min.n) m.j.k<- if(n.j[.k+1]>0) min1(Z.ok[id.group==.k+1])+10^6*(n.j[.k+1]<=min.n) else 10^6*(n.j[.k]<=min.n) psi.j[.k]<- psi.j[.k] + ifelse(abs(M.j.k-psi.j[.k])1 #if(missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call.=FALSE) if (missing(npsi)) { npsi<-rep(1, n.Seg) names(npsi)<-all.vars(seg.Z) } if(length(npsi)!=n.Seg) stop(" 'npsi' and seg.Z should have the same length") if(!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi<-lapply(npsi, function(.x) rep(NA,.x)) id.npsi<-TRUE ##id.npsi<-FALSE #e' stato fornito npsi? } else { if(n.Seg==1){ if(!is.list(psi)) {psi<-list(psi);names(psi)<-all.vars(seg.Z)} } else {#se n.Seg>1 if(!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if(n.Seg!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if(!all(names(psi)%in%all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } fc<- min(max(abs(control$fc),.8),1) maxit.glm <- control$maxit.glm it.max <- old.it.max<- control$it.max #min.step<-control$min.step alpha<-control$alpha digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } break.boot=control$break.boot n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow conv.psi<-control$conv.psi visual <- control$visual visualBoot<-FALSE if(visual && n.boot>0) {visual<-FALSE; visualBoot<-TRUE} # if(n.boot>0){ # if(!is.null(control$seed)) { # set.seed(control$seed) # employed.Random.seed<-control$seed # } else { # employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) # set.seed(employed.Random.seed) # } # if(visual) {visual<-FALSE; visualBoot<-TRUE}#warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} # if(!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") # } last <- control$last K<-control$K #h<-min(abs(control$h),1) h<- control$h if(h<1) it.max<-it.max+round(it.max/2) orig.call<-Call<-mf<-obj$call orig.call$formula<-mf$formula<-formula(obj) #per consentire lm(y~.) m <- match(c("formula", "data", "subset", "weights", "na.action","etastart","mustart","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") #non so a che serva la seguente linea.. if(class(mf$formula)[1]=="name" && !"~"%in%paste(mf$formula)) mf$formula<-eval(mf$formula) mfExt<- mf mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mfExt$formula <- update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+") )) } #browser() mf <- eval(mf, parent.frame()) #mf <- eval(mf, parent.frame(max(1,sys.parent()))) n<-nrow(mf) #La linea sotto serve per inserire in mfExt le eventuali variabili contenute nella formula con offset(..) # o anche variabili che rientrano in espressioni (ad es., y/n o I(y*n)) nomiOff<-setdiff(all.vars(formula(obj)), names(mf)) if(length(nomiOff)>=1) mfExt$formula<-update.formula(mfExt$formula,paste(".~.+", paste( nomiOff, collapse="+"), sep="")) #ago 2014 c'e' la questione di variabili aggiuntive... nomiTUTTI<-all.vars(mfExt$formula) #comprende anche altri nomi (ad es., threshold) "variabili" nomiNO<-NULL #dovrebbe contenere for(i in nomiTUTTI){ r<-try(eval(parse(text=i), parent.frame()), silent=TRUE) # parent.frame(max(1,sys.parent()))) if(class(r)[1]!="try-error" && length(r)==1 && !is.function(r) && !i%in%names(mf)) nomiNO[[length(nomiNO)+1]]<-i } #nomiNO dovrebbe contenere i nomi delle "altre variabili" (come th in subset=x=2) mf[nomeRispo[1]]<-weights*y id.duplic<-match(all.vars(formula(obj)),all.vars(seg.Z),nomatch=0)>0 if(any(id.duplic)) { #new.mf<-mf[,id.duplic,drop=FALSE] new.mf<-mf[,all.vars(formula(obj))[id.duplic],drop=FALSE] new.XREGseg<-data.matrix(new.mf) XREG<-cbind(XREG,new.XREGseg) } n.psi<- length(unlist(psi)) id.n.Seg<-(ncol(XREG)-n.Seg+1):ncol(XREG) XREGseg<-XREG[,id.n.Seg,drop=FALSE] XREG <- XREG[, match(c("(Intercept)", namesXREG0),colnames(XREG), nomatch = 0), drop = FALSE] XREG<-XREG[,unique(colnames(XREG)), drop=FALSE] n <- nrow(XREG) #Z <- list(); for (i in colnames(XREGseg)) Z[[length(Z) + 1]] <- XREGseg[, i] Z<-lapply(apply(XREGseg,2,list),unlist) #prende anche i nomi! name.Z <- names(Z) <- colnames(XREGseg) if(length(Z)==1 && is.vector(psi) && (is.numeric(psi)||is.na(psi))){ psi <- list(as.numeric(psi)) names(psi)<-name.Z } if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) || is.null(names(psi))) stop("Z and psi have to be *named* list") id.nomiZpsi <- match(names(Z), names(psi)) if ((length(Z)!=length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of Z and psi do not match") nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] psiQ<-psiE<-psi if(id.npsi){ for(i in 1:length(psi)) { K<-length(psi[[i]]) psiQ[[i]]<-quantile(Z[[i]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE) psiE[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) psiQ[[i]] else psiE[[i]] } } else { for(i in 1:length(psi)) { psiQ[[i]]<-quantile(Z[[i]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE) psiE[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) psiQ[[i]] else psiE[[i]] } } if(control$quant) { initial<-unlist(psiE) PSI1<- matrix(initial, n, length(initial), byrow = TRUE) } else { initial<-unlist(psiQ) PSI1<- matrix(initial, n, length(initial), byrow = TRUE) } #########==================== SE PSI FIXED id.psi.fixed <- FALSE if(!is.null(fixed.psi)){ id.psi.fixed <- TRUE if(is.numeric(fixed.psi) && n.Seg==1) { fixed.psi<-list(fixed.psi) names(fixed.psi)<-all.vars(seg.Z) } if(is.list(fixed.psi)) { if(!(names(fixed.psi) %in% all.vars(seg.Z))) stop("names(fixed.psi) is not a subset of variables in 'seg.Z' ") } else { stop(" 'fixed.psi' has to be a named list ") } fixed.psi<-lapply(fixed.psi, sort) Zfixed<-matrix(unlist(mapply(function(x,y)rep(x,y),Z[names(fixed.psi)], sapply(fixed.psi, length), SIMPLIFY = TRUE)), nrow=n) n.fixed.psi<-sapply(fixed.psi, length) rip.nomi <- rep( names(fixed.psi), n.fixed.psi) rip.numeri <- unlist(lapply(n.fixed.psi, function(.x) 1:.x)) colnames(Zfixed) <- paste("U", rip.numeri,".fixed.",rip.nomi, sep="") PSI <- matrix(unlist(fixed.psi), ncol=ncol(Zfixed), nrow=n, byrow = TRUE) fixedU<-(Zfixed-PSI)*(Zfixed>PSI) XREG<-cbind(XREG, fixedU) } #########====================END SE PSI FIXED initial.psi<-psi a <- sapply(psi, length)#b <- rep(1:length(a), times = a) id.psi.group <- rep(1:length(a), times = a) #identificativo di appartenenza alla variabile #Znew <- list() #for (i in 1:length(psi)) Znew[[length(Znew) + 1]] <- rep(Z[i], a[i]) #Z <- matrix(unlist(Znew), nrow = n) Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n) psi <- unlist(psi) psi<-unlist(tapply(psi,id.psi.group,sort)) k <- ncol(Z) PSI <- matrix(psi, n, k, byrow=TRUE) #rep(psi, rep(n, k)), ncol = k) colnames(Z) <- nomiZ <- rep(nome, times = a) ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], function(xxx) {1:xxx}))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ, sep = ".") # KK <- new.env() # for (i in 1:ncol(objframe$model)) assign(names(objframe$model[i]), objframe$model[[i]], envir = KK) if (it.max == 0) { #mf<-cbind(mf, mfExt) U <- (Z>PSI)*(Z-PSI) #pmax((Z - PSI), 0) colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") nomiU <- paste("U", colnames(U), sep = "") #for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) #e' necessario il for? puoi usare colnames(U)<-nomiU;mf[nomiU]<-U for(i in 1:ncol(U)) mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] Fo <- update.formula(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) #obj <- update(obj, formula = Fo, data = KK) obj <- update(obj, formula = Fo, data = mfExt, evaluate=FALSE) if(!is.null(obj[["subset"]])) obj[["subset"]]<-NULL obj<-eval(obj, envir=mfExt) if (model) obj$model <-mf #obj$model <- data.frame(as.list(KK)) names(psi)<-paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") obj$psi <- psi return(obj) } if (is.null(weights)) weights <- rep(1, n) if (is.null(offs)) offs <- rep(0, n) fam <- family(obj) initial <- psi obj0 <- obj dev0<-obj$dev list.obj <- list(obj) nomiOK<-nomiU if(is.null(alpha)) alpha<- max(.05, 1/length(y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) eta0<- if(is.null(control$eta)) obj$linear.predictors else control$eta opz<-list(toll=toll, h=h, stop.if.error=stop.if.error, dev0=dev0, visual=visual, it.max=it.max, nomiOK=nomiOK, usesegreg=FALSE, fam=fam, maxit.glm=maxit.glm, id.psi.group=id.psi.group, gap=gap, tol.opt=control$tol.opt, limZ=NULL, rangeZ=NULL, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi,eta0=eta0, # min.step=min.step, pow=pow, visualBoot=visualBoot, digits=digits, fc=fc, seed=control$seed, fit.psi0=control$fit.psi0, min.n=control$min.n, PSI1=PSI1) #browser() if(n.boot<=0){ obj<-seg.glm.fit(y, XREG, Z, PSI, weights, offs, opz) } else { obj<-seg.glm.fit.boot(y, XREG, Z, PSI, weights, offs, opz, n.boot=n.boot, size.boot=size.boot, random=random, break.boot=break.boot) #jt, nonParam } if(!is.list(obj)){ warning("Estimation failed. Too many breakpoints? Returning a glm fit..", call. = FALSE) return(obj0) } seed<- obj$seed #browser() id.psi.group<-obj$id.psi.group nomiOK<-obj$nomiOK nomiFINALI<-unique(sub("U[1-9]*[0-9].", "", nomiOK)) #nomi originali delle variabili con breakpoint stimati! #se e' stata usata una proc automatica "nomiFINALI" sara' differente da "name.Z" nomiSenzaPSI<-setdiff(name.Z,nomiFINALI) if(length(nomiSenzaPSI)>=1) warning("no breakpoints found for: ", paste(nomiSenzaPSI," "), call. = FALSE) it<-obj$it psi<-obj$psi k<-length(psi) psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart U<-obj$U V<-obj$V # #commentati il 28/5 solo per imitare segmented.lm # for(jj in colnames(V)) { # VV<-V[, which(colnames(V)==jj),drop=FALSE] # sumV<-abs(rowSums(VV)) # # if( (any(diff(sumV)>=2) #se ci sono due breakpoints equivalenti # # || any(table(sumV)<=1))) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") # if(any(table(sumV)<=1) && stop.if.error) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") # } rangeZ<-obj$rangeZ idU<-obj$idU obj <- obj$obj beta.c<- obj$coefficients[idU] #beta.c <- coef(obj)[paste("U", 1:ncol(U), sep = "")] Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) #psi.values[[length(psi.values) + 1]] <- psi #in LM e' commentata.. id.warn <- FALSE if (n.boot<=0 && it > it.max) { #it >= (it.max+1) warning("max number of iterations attained", call. = FALSE) id.warn <- TRUE } #se usi una procedura automatica devi cambiare ripetizioni, nomiU e nomiV, e quindi: length.psi<-tapply(as.numeric(as.character(names(psi))), as.numeric(as.character(names(psi))), length) forma.nomiU<-function(xx,yy)paste("U",1:xx, ".", yy, sep="") forma.nomiVxb<-function(xx,yy)paste("psi",1:xx, ".", yy, sep="") nomiU <- unlist(mapply(forma.nomiU, length.psi, nomiFINALI)) #invece di un ciclo #paste("U",1:length.psi[i], ".", name.Z[i]) nomiVxb <- unlist(mapply(forma.nomiVxb, length.psi, nomiFINALI)) #########========================= SE PSI FIXED psi.list<-vector("list", length=length(unique(nomiZ))) names(psi.list)<-unique(nomiZ) #names(psi)<-nomiZ #se e' una procedure automatica nomiZ puo' essere piu lungo dei breakpoints "rimasti" names(psi)<-rep(nomiFINALI, length.psi) for(i in names(psi.list)){ psi.list[[i]]<-psi[names(psi)==i] } ########=================================== #se nomiOK sopra contiene gia' le U1.x,ecc... perche' non fare?nomiVxb<-sub("U","psi", nomiOK) #mf<-cbind(mf, mfExt) for(i in 1:ncol(U)) { mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] mfExt[nomiVxb[i]]<-mf[nomiVxb[i]]<-Vxb[,i] } # for (i in 1:ncol(U)) { # assign(nomiU[i], U[, i], envir = KK) # assign(nomiVxb[i], Vxb[, i], envir = KK) # } nnomi <- c(nomiU, nomiVxb) Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) #########========================= SE PSI FIXED if(id.psi.fixed){ for(i in 1:ncol(fixedU)) mfExt[colnames(fixedU)[i]]<-mf[colnames(fixedU)[i]]<-fixedU[,i] Fo<-update.formula(Fo, paste(c("~.",colnames(fixedU)), collapse="+")) } #la seguente linea si potrebbe rimuovere perche' in mfExt c'e' gia' tutto.. if(is.matrix(y)&& (fam$family=="binomial" || fam$family=="quasibinomial")){ mfExt<-cbind(mfExt[[1]], mfExt[,-1]) } objF <- update(obj0, formula = Fo, data = mfExt, family=obj0$family, evaluate=FALSE) if(!is.null(objF[["subset"]])) objF[["subset"]]<-NULL objF<-eval(objF, envir=mfExt) #C'e' un problema..controlla obj (ha due "(Intercepts)" - bhu.. al 27/03/14 non mi sembra! #Puo' capitare che psi sia ai margini e ci sono 1 o 2 osservazioni in qualche intervallo. Oppure ce ne # sono di piu' ma hanno gli stessi valori di x objF$offset<- obj0$offset isNAcoef<-any(is.na(objF$coefficients)) if(isNAcoef){ if(stop.if.error) { cat("breakpoint estimate(s):", as.vector(psi),"\n") stop("at least one coef is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE)} else { warning("some estimate is NA: too many breakpoints? 'var(hat.psi)' cannot be computed \n ..returning a 'lm' model", call. = FALSE) Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) objF <- update(obj0, formula = Fo, evaluate=TRUE, data = mfExt) names(psi)<-nomiVxb objF$psi<-psi return(objF) } } #aggiornare qui i weights???? (piuttosto che sotto) #------>>> #------>>> #------>>> #browser() #if(!gap){ names.coef<-names(objF$coefficients) if(ncol(XREG)>0) objF$coefficients[match(names(objF$coefficients), names(obj$coefficients),0)] <- obj$coefficients[1:ncol(XREG)] objF$coefficients[nomiU] <- obj$coefficients[idU] objF$coefficients[nomiVxb]<- 0 objF$fitted.values<-obj$fitted.values objF$linear.predictors<-obj$linear.predictors objF$residuals<-obj$residuals objF$deviance<-obj$deviance objF$aic<-obj$aic + 2*ncol(Z) #k objF$weights<-obj$weights #} Cov <- vcov(objF) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) #if(length(initial)!=length(psi)) initial<-rep(NA,length(psi)) a<-tapply(id.psi.group, id.psi.group, length) #ho sovrascritto "a" di sopra, ma non dovrebbe servire.. ris.psi<-matrix(NA,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi ris.psi[,3]<-sqrt(vv) #NB "a" deve essere un vettore che si appatta con "initial.psi" per ottnetere "initial" sotto... Se una variabile alla fine risulta # senza breakpoint questo non avviene e ci sono problemi nella formazione di "initial". Allora costruisco a.ok a.ok<-NULL for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } # initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) if(stop.if.error) ris.psi[,1]<-initial objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- (it - 1) #browser() objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) #Z = name.Z objF$id.group <- if(length(name.Z)<=1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn objF$orig.call<-orig.call ###########################PSI FIXED objF$indexU<-build.all.psi(psi.list, fixed.psi) if (model) objF$model <- mf #objF$mframe <- data.frame(as.list(KK)) if(n.boot>0) objF$seed<- seed #employed.Random.seed objF$psi[,"Initial"]<-NA class(objF) <- c("segmented", class(obj0)) list.obj[[length(list.obj) + 1]] <- objF class(list.obj) <- "segmented" if (last) list.obj <- list.obj[[length(list.obj)]] return(list.obj) } segmented/R/predict.stepmented.R0000644000176200001440000002534214666303216016367 0ustar liggesusers#new predict.segmented predict.stepmented<-function(object, newdata, se.fit=FALSE, interval=c("none","confidence", "prediction"), type = c("link", "response"), na.action=na.omit, level=0.95, .coef=NULL, .vcov=NULL, apprx.fit=c("none","cdf"), apprx.se=c("cdf","none"), ...){ blockdiag <- function(...) { args <- list(...) nc <- sapply(args,ncol) cumnc <- cumsum(nc) ## nr <- sapply(args,nrow) ## NR <- sum(nr) NC <- sum(nc) rowfun <- function(m,zbefore,zafter) { cbind(matrix(0,ncol=zbefore,nrow=nrow(m)),m, matrix(0,ncol=zafter,nrow=nrow(m))) } ret <- rowfun(args[[1]],0,NC-ncol(args[[1]])) for (i in 2:length(args)) { ret <- rbind(ret,rowfun(args[[i]],cumnc[i-1],NC-cumnc[i])) } ret } dummy.matrix<-function(x.values, x.name, obj.seg, psi.est=TRUE, isV=FALSE, .coef=NULL, k=NULL){ #given the segmented fit 'obj.seg' and a segmented variable x.name with corresponding values x.values, #this function simply returns a matrix with columns (x, (x-psi)_+, -b*I(x>psi)) #if obj.seg does not include the coef for the linear "x", the returned matrix is ((x-psi)_+, -b*I(x>psi)) f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } estcoef <- if(is.null(.coef)) coef(obj.seg) else .coef if(length(isV)==1) isV<-c(FALSE,isV) n<-length(x.values) # nameU<-obj.seg$nameUV$U[f.U(obj.seg$nameUV$U,x.name)] # nameV<-obj.seg$nameUV$V[f.U(obj.seg$nameUV$V,x.name)] #grep(x.name, obj.seg$nameUV$V, value = TRUE) #browser() nameU<-object$nameUV$U nameV<-gsub("V","psi", object$nameUV$V) nameU<- nameU[f.U(nameU,x.name)] nameV<- nameV[f.U(nameV, x.name)] #grep(x.name, obj.seg$nameUV$V, value = TRUE) if(is.null(obj.seg$constr)){ diffSlope<-estcoef[nameU] } else { diffSlope<-drop(obj.seg$constr$invA.RList[[match(x.name, obj.seg$nameUV$Z)]]%*%estcoef[nameU])[-1] } #browser() est.psi<-obj.seg$psi[nameV,"Est."] se.psi<-obj.seg$psi[nameV, "St.Err"] npsi <- length(est.psi) PSI <- matrix(est.psi, n, ncol = npsi, byrow=TRUE) minZ <- object$rangeZ[1, x.name] maxZ <- object$rangeZ[2, x.name] Z01<- (x.values-minZ)/(maxZ-minZ) PSI01 <- (PSI-minZ)/(maxZ-minZ) est.psi01<- (est.psi-minZ)/(maxZ-minZ) newd<-matrix(,length(x.values), length(nameU)+length(nameV)) colnames(newd)<-c(sub("psi","U", nameV), nameV) #browser() for(j in 1:npsi){ if(is.null(k)){ idU<-match(nameU[j], nameU) snr.idU<-abs(estcoef[nameU][idU])/sigma(object) ss01=n^(-(.6 + .5*log(snr.idU)/sqrt(snr.idU) -abs(est.psi01[j]-.5)^(1/2)/length(object$residuals)^(1/2))) ss<- ss01*(maxZ-minZ) } else { ss=n^k } newd[ , nameU[idU]] <- pnorm((x.values-est.psi[j])/ss) newd[ , nameV[idU]] <- -(estcoef[nameU][idU]/ss)*dnorm((x.values-est.psi[j])/ss) #newd<-cbind(x.values,dummy1,dummy2) } all.psi<-obj.seg$indexU[[x.name]] if(!is.null(all.psi) && length(all.psi)!=npsi){ newZ<-matrix(x.values, length(x.values), npsi) nomi.psi.noti<-setdiff(names(all.psi),nameU) psi.noti<-setdiff(all.psi, est.psi) PSI.noti <- matrix(rep(psi.noti, rep(n, length(psi.noti))), ncol = length(psi.noti)) nomi<-c(colnames(newd),nomi.psi.noti) newd<-cbind(newd, (newZ-PSI.noti)*(newZ>PSI.noti)) colnames(newd)<-nomi } #browser() U<-sapply(est.psi, function(.x) 1*(x.values>.x)) colnames(U) <- nameU newd<-list(U=U, newd=newd) #colnames(newd)[1]<-x.name #browser() return(newd) } #end dummy.matrix() estcoef <- if(is.null(.coef)) coef(object) else .coef if(is.null(names(estcoef))) stop("the coef estimates should be named") nLin<- length(setdiff(names(coef(object)), c(object$nameUV$U,object$nameUV$V))) nSeg<- length(object$nameUV$Z) type<-match.arg(type) interval<-match.arg(interval) apprx.fit <-match.arg(apprx.fit) apprx.se <-match.arg(apprx.se) if(inherits(object, "glm") && object$family$family!="gaussian" && interval=="prediction") stop("prediction intervals are not allowed with non-gaussian glm") nameU<-object$nameUV$U nameV<-gsub("V","psi", object$nameUV$V) nameZ<-object$nameUV$Z #browser() if(missing(newdata)){ X <- model.matrix.stepmented(object, type = apprx.se) X.noV <- model.matrix.stepmented(object, type = "no") colnomi.noV <-colnames(X.noV) if(apprx.fit=="cdf") X.noV[,nameU]<-X[,nameU] idNA<- rep(FALSE, nrow(X)) } else { #browser() #nomiLin <- setdiff(all.vars(formula(object))[-1], c(object$nameUV$U,object$nameUV$V)) nomiLin <- setdiff(all.vars(as.formula(paste("~",paste(formula(object))[3]))), c(nameU, nameV)) if(any(is.na(match(nomiLin, names(newdata))))) stop(" 'newdata' should includes all variables") na.arg <- deparse(substitute(na.action)) idNA<- !complete.cases(newdata) if(any(idNA)){ newdata<-na.omit(newdata) } if(!na.arg%in%c("na.omit","na.pass")) stop("na.action should be 'na.omit' or 'na.pass'") n<-nrow(newdata) Ulist<-r<-NULL if(length(object$call$obj)>0){ #se l'ogg e' stato ottenuto da segmented.* # Fo<- formula(delete.response(terms(formula(eval(object$call$obj))))) # idSeg<- object$nameUV$Z %in% all.vars(Fo) # if(any(!idSeg)){ # Fo<- update.formula(Fo, as.formula(paste("~.+", paste(object$nameUV$Z[!idSeg], collapse="+")))) # } #nomiTerms, a differenza di nomiLin, include eventuali poly(w,2) nomiTerms<-setdiff(attr(terms(formula(object)),"term.labels"), c(nameU, nameV)) idSeg<- object$nameUV$Z %in% nomiLin #potresti mettere anche "nomiTerms" if(any(!idSeg)){ nomiTerms <- c(nomiTerms, object$nameUV$Z[!idSeg]) } Fo<-as.formula(paste("~.+", paste(nomiTerms, collapse="+"))) M<-model.matrix(Fo, data=newdata, contrasts=object$contrasts, xlev = object$xlevels) } else { #se l'ogg e' stato ottenuto da stepreg #browser() Fo<-as.formula(object$nameUV$formulaSegAllTerms) if(any(all.vars(Fo)%in%names(object$xlevels))){ M<-model.matrix(Fo, data=newdata, contrasts = object$contrasts, xlev=object$xlevels) } else { M<-model.matrix(Fo, data=newdata) } #nomiLin<- all.vars(object$formulaLin)[-1] #non funziona se la rispo e' cbind(y,n-y) nomiLin <- all.vars(as.formula(paste("~",paste(object$formulaLin)[3]))) if(any(!nomiLin%in%all.vars(Fo))){ #nomiLinOK<- nomiLin[!nomiLin%in%all.vars(Fo)] terminLin<-attr(terms(object$formulaLin),"term.labels")[!nomiLin%in%all.vars(Fo)] Fo <- as.formula(paste("~.-1+",paste(terminLin,collapse="+"))) #Fo <- update.formula(Fo, as.formula(paste("~.+",paste(terminLin,collapse="+")))) M1<-model.matrix(Fo, data=newdata, contrasts = object$contrasts, xlev=object$xlevels) M<-cbind(M, M1) #[,nomiLinOK,drop=FALSE]) } } #end se ogg e' stato ottenuto da segreg #browser() for(i in 1:length(nameZ)){ x.values <- M[,nameZ[i]] DM<- dummy.matrix(x.values, nameZ[i], object, k=list(...)$k) Ulist[[i]]<- DM$U r[[i]]<-DM$newd } #browser() X <-data.matrix(matrix(unlist(r), nrow=n, byrow = FALSE)) colnames(X)<- unlist(sapply(r, colnames)) X<-cbind(M,X) if("(Intercept)" %in% names(estcoef)) X<-cbind("(Intercept)"=1,X) #X<-X[,unique(colnames(X)),drop=FALSE] X<- X[, names(estcoef)] U<- data.matrix(matrix(unlist(Ulist), nrow=n, byrow = FALSE)) colnames(U) <- nameU X.noV<-X X.noV[,nameU]<-U colnomi<- colnames(X) colnomi.noV <- setdiff(colnomi, nameV) X.noV <- X.noV[, colnomi.noV, drop=FALSE] } #end if non-missing(newdata) if(length(setdiff(colnames(X),names(estcoef)))>0) stop("error in the names (of the supplied newdata)") estcoef.noV<- estcoef[colnomi.noV] #ignora eventuali altre variabili contenute in newdata #nomiOK<- intersect(names(estcoef.noV), colnames(X.noV)) #X.noV<- X.noV[, nomiOK, drop=FALSE] #estcoef.noV<-estcoef.noV[nomiOK] mu <- eta<- drop(X.noV%*% estcoef.noV) if(!is.null(object$offset)) mu<- eta<- eta+ object$offset #ATTENZIONE c'e' il problema dell'appaiamento dei nomi!!! #il problema e' che estcoef non ha sempre nomi!! if(inherits(object, "glm") && type=="response") { mu<-object$family$linkinv(mu) } if(!se.fit && interval=="none"){ if(any(idNA) && na.arg=="na.pass"){ mu0<-mu mu<- rep(NA, length(idNA)) mu[!idNA]<-mu0 } return(mu) } else { # se if(interval!="none" || se.fit) #browser() V <- if(is.null(.vcov)) vcov.stepmented(object, type=apprx.se, ...) else .vcov if(!is.null(object$constr)){ B=if(nLin>0) append(list(diag(nLin)), object$constr$invA.RList, 1) else object$constr$invA.RList B=append(B, list(diag(length(nameV))), 2) B= do.call(blockdiag, B) V <- B %*% V %*% t(B) } else { X <- X[,colnames(V)] #semplicemente elimina e riordina le colonne di X } se <- sqrt(rowSums((X %*% V) * X)) if(inherits(object, "glm")) { if(type=="response") se <- abs(object$family$mu.eta(eta))*se z<-abs(qnorm((1-level)/2)) s2<- sigma(object)^2 #summary(object)$dispersion } else { z <- abs(qt((1-level)/2, df=object$df.residual)) s2<- sigma(object)^2 #summary(object)$sigma^2 } if(any(idNA) && na.arg=="na.pass"){ mu0<-mu se0<-se mu<-se<- rep(NA, length(idNA)) mu[!idNA]<-mu0 se[!idNA]<-se0 } if(interval=="confidence"){ mu<-cbind(fit=mu, lwr=mu-z*se, upr=mu+z*se) } if(interval=="prediction"){ mu<-cbind(fit=mu, lwr=mu-z*sqrt(se^2+s2), upr=mu+z*sqrt(se^2+s2)) } if(se.fit) { mu <- list(fit=mu, se.fit=se, df= object$df.residual, residual.scale=sqrt(s2)) if(!inherits(object, "glm")) mu$df<- object$df.residual } return(mu) } } segmented/R/seg.Ar.fit.r0000644000176200001440000003545614603515430014527 0ustar liggesusersseg.Ar.fit<-function(obj, XREG, Z, PSI, opz, return.all.sol=FALSE){ #----------------- search.min<-function(h, psi, psi.old, XREG) { psi.ok<- psi*h + psi.old*(1-h) PSI <- matrix(psi.ok, nrow=n, ncol = length(psi.ok), byrow=TRUE) U <- (Z - PSI) * (Z > PSI) #X<-cbind(XREG, U1) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- suppressWarnings(try(eval(call.noV), silent=TRUE)) if (class(obj1)[1] == "try-error") obj1 <- suppressWarnings(try(eval(call.noV.noinit), silent=TRUE)) L1 <- if (class(obj1)[1] == "try-error") (L0 + 10) else (-obj1$loglik) L1 } # call.ok #la call con U e V # call.ok.noinit # call.noV # call.noV.noinit # useExp.k=TRUE # #----------------- # est.k<-function(x1,y1,L0){ # ax<-log(x1) # .x<-cbind(1,ax,ax^2) # b<-drop(solve(crossprod(.x),crossprod(.x,y1))) # const<-b[1]-L0 # DD<-sqrt(b[2]^2-4*const*b[3]) # kk<-exp((-b[2]+ DD) /(2*b[3])) # return(round(kk)) # # # ff<-function(xx) b[1]+b[2]*xx + b[3]*xx^2+ L0 # # a<-uniroot(ff, c(log(x[4]), 3.4)) # } # #----------------- # dpmax<-function(x,y,pow=1){ # #deriv pmax # if(pow==1) -(x>y) #ifelse(x>y, -1, 0) # else -pow*((x-y)*(x>y))^(pow-1)#-pow*pmax(x-y,0)^(pow-1) # } #----------- in.psi<-function(LIM, PSI, ret.id=TRUE){ #check if psi is inside the range a<-PSI[1,]LIM[2,] is.ok<- !a & !b #TRUE se psi e' OK if(ret.id) return(is.ok) isOK<- all(is.ok) && all(!is.na(is.ok)) isOK} #------------ far.psi<-function(Z, PSI, id.psi.group, ret.id=TRUE, fc=.93) { #check if psi are far from the boundaries ..s # returns TRUE, if fine. #id.far.ok<-sapply(unique(id.psi.group), function(.x) (table(rowSums(((Z>PSI)[,id.psi.group==.x,drop=FALSE])))>=2)[-1]) #[-1] esclude lo zero, xPSI)[,id.psi.group==.x,drop=FALSE]))+1)>=2)[-1]) #[-1] esclude lo zero, xPSI" non se ne accorge, mentre Z>=PSI, si.. Il contrario e vero con estremo inf e Z>PSI nSeg<-length(unique(id.psi.group)) npsij<-tapply(id.psi.group,id.psi.group,length) nj<-sapply(unique(id.psi.group), function(.x) { tabulate(rowSums((Z>PSI)[,id.psi.group==.x,drop=FALSE])+1) }, simplify = FALSE) ff<-id.far.ok<-vector("list",length=nSeg) for(i in 1:nSeg){ if(length(nj[[i]])!=npsij[i]+1) nj[[i]]<- tabulate(rowSums((Z>=PSI)[,id.psi.group==i,drop=FALSE])+1) id.ok<-(nj[[i]] >= 2) id.far.ok[[i]] <- id.ok[-length(id.ok)] & id.ok[-1] #esattamente uguale al numero di psi del gruppo i ff[[i]]<-ifelse(diff(nj[[i]])>0, 1/fc, fc) } id.far.ok<-unlist(id.far.ok) ff<-unlist(ff) if(!ret.id) {return(all(id.far.ok)) } else { attr(id.far.ok,"factor") <- ff return(id.far.ok) } #if(ret.id) return(id.far.ok) else return(all(id.far.ok)) } #end far.psi #----------- adj.psi<-function(psii, LIM) {pmin(pmax(LIM[1,],psii),LIM[2,])} #----------- n<-nrow(Z)#length(y) #min.step<-opz$min.step rangeZ <- apply(Z, 2, range) alpha<-opz$alpha limZ <- apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1], alpha[2])) digits<-opz$digits pow<-opz$pow nomiOK<-opz$nomiOK toll<-opz$toll h <-opz$h #conv.psi<-opz$conv.psi gap<-opz$gap #stop.if.error<-opz$stop.if.error #fix.npsi<-opz$fix.npsi fix.npsi<-opz$stop.if.error dev.new<-opz$dev0 visual<-opz$visual id.psi.group<-opz$id.psi.group it.max<-old.it.max<-opz$it.max psi<-PSI[1,] psi<-adj.psi(psi, limZ) PSI<-matrix(psi,nrow=n, ncol=ncol(PSI), byrow=TRUE) fc<-opz$fc names(psi)<-id.psi.group epsilon <- 10 dev.values<-psi.values <- NULL #id.psi.ok<-rep(TRUE, length(psi)) it <- 0 epsilon <- 10 k.values<-dev.values<- NULL psi.values <-list() psi.values[[length(psi.values) + 1]] <- NA #browser() nomiU<- opz$nomiU nomiV<- opz$nomiV call.ok <- opz$call.ok #la call con U e V call.ok.noinit<-call.ok call.ok.noinit$init<-NULL call.noV <- opz$call.noV #la call con U ONLY call.noV.noinit<-call.noV call.noV.noinit$init<-NULL toll<-opz$toll k<-ncol(Z) mio.init<-NULL mio.init.noV<-NULL if(!in.psi(limZ,PSI,FALSE)) stop("starting psi out of the range.. see 'alpha' in seg.control", call.=FALSE) if(!far.psi(Z,PSI,id.psi.group,FALSE)) stop("psi starting values too close each other or at the boundaries. Please change them (e.g. set 'quant=TRUE' in seg.control()), or decrease their number.", call. = FALSE) n.psi1<-ncol(Z) #============================================== U <- ((Z-PSI)*(Z>PSI)) #pmax((Z - PSI), 0)^pow[1] colnames(U)<-nomiU #if(pow[1]!=1) U<-U^pow[1] obj0 <- suppressWarnings(try(eval(call.noV.noinit), silent=TRUE)) if(class(obj0)[1]=="try-error") stop("The first fit with U variables does not work..", call.=FALSE)#obj0 <- suppressWarnings(eval(call.noV.noinit)) ##a volte con i valori iniziali arima() )non converge!! Quindi provo senza init L0<- -obj0$loglik if(it.max==0){ colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") obj <- obj0 #lm.wfit(x = cbind(XREG, U), y = y, w = w, offset = offs) L1 <- L0 obj$coef <- c(obj$coef, rep(0, ncol(V))) #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = TRUE) return(obj) } n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- opz$dev0 #del modello iniziale (senza psi) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi.values[[length(psi.values) + 1]] <- psi #psi iniziali #============================================== if (visual) { cat(paste("iter = ", sprintf("%2.0f",0), " llik = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), -L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " k = ", sprintf("%2.0f", NA), " n.psi = ",formatC(length(unlist(psi)),digits=0,format="f"), " ini.psi = ",paste(formatC(unlist(psi),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #============================================== id.warn <- FALSE id.psi.changed<-rep(FALSE, it.max) pLin<- opz$pLin idU <- (1:ncol(U))+pLin idV <- (1:ncol(U))+max(idU) tolOp <-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) #browser() while (abs(epsilon) > toll) { it<-it+1 #if(it==2) browser() n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if(n.psi1!=n.psi0){ U <- ((Z-PSI)*(Z>PSI)) #pmax((Z - PSI), 0)^pow[1] if(pow[1]!=1) U<-U^pow[1] obj0 <- suppressWarnings(try(eval(call.noV), silent=TRUE)) if(class(obj0)[1]=="try-error") obj0 <- suppressWarnings(eval(call.noV.noinit)) ##a volte con i valori iniziali arima() )non converge!! Quindi provo senza init L0<- -obj0$loglik } V <- -(Z>PSI) #dpmax(Z,PSI,pow=pow[2])# ifelse((Z > PSI), -1, 0) X <- cbind(XREG, U, V) #rownames(X) <- NULL #colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U", 1:ncol(U), sep = ""), paste("V", 1:ncol(V), sep = "")) obj <- suppressWarnings(try(eval(call.ok), silent=TRUE)) if(class(obj)[1]=="try-error") obj <- suppressWarnings(eval(call.ok.noinit)) ##a volte con i valori iniziali arima() )non converge!! Quindi provo senza init beta.c <- obj$coef[idU] #[paste("U", 1:ncol(U), sep = "")] gamma.c <- obj$coef[idV] #[paste("V", 1:ncol(V), sep = "")] if(any(is.na(c(beta.c, gamma.c)))){ if(fix.npsi) { if(return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } else { id.coef.ok<-!is.na(gamma.c) psi<-psi[id.coef.ok] if(length(psi)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } gamma.c<-gamma.c[id.coef.ok] beta.c<-beta.c[id.coef.ok] Z<-Z[, id.coef.ok, drop=FALSE] rangeZ <- rangeZ[,id.coef.ok, drop=FALSE] limZ <- limZ[,id.coef.ok, drop=FALSE] nomiOK<-nomiOK[id.coef.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.coef.ok] names(psi)<-id.psi.group } } psi.old<-psi psi <- psi.old + h*gamma.c/beta.c #+h*gamma.c/beta.c psi<- adj.psi(psi, limZ) psi<-unlist(tapply(psi, opz$id.psi.group, sort), use.names =FALSE) a<-optimize(search.min, c(0,1), psi=psi, psi.old=psi.old, XREG=XREG, tol=tolOp[it]) k.values[length(k.values) + 1] <- use.k <- a$minimum L1<- a$objective #L1.k[length(L1.k) + 1] <- L1<- a$objective psi <- psi*use.k + psi.old* (1-use.k) psi<- adj.psi(psi, limZ) if(!is.null(digits)) psi<-round(psi, digits) PSI <- matrix(psi, n, ncol = length(psi), byrow=TRUE) #--modello con il nuovo psi U<-(Z-PSI)*(Z>PSI) #in seg.(g)lm.fit l'ho chiamata U.. #if(pow[1]!=1) U<-U^pow[1] if (visual) { flush.console() cat(paste("iter = ", sprintf("%2.0f",it), " llik = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), -L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " k = ", sprintf("%2.0f", k), " n.psi = ",formatC(length(unlist(psi)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(psi),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } epsilon <- (L0 - L1)/(abs(L0) + 0.1) L0<-L1 #U <-U1 #le ho sempre chiamate U, non c'e' bisogno di ri-assegnare.. k.values[length(k.values)+1]<-use.k psi.values[[length(psi.values) + 1]] <- psi dev.values[length(dev.values) + 1] <- L0 #Mi sa che non servono i controlli.. soprattutto se non ha fatto step-halving #check if i psi ottenuti sono nel range o abbastanza lontani id.psi.far <-far.psi(Z, PSI, id.psi.group, TRUE, fc=opz$fc) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if(!all(id.psi.ok)){ if(fix.npsi){ # psi<-adj.psi(psi, limZ) #within range!!! # id.psi.far<-far.psi(Z, PSI, id.psi.group, TRUE) psi<-psi*ifelse(id.psi.far,1,.9) PSI <- matrix(psi, n, ncol = length(psi), byrow=TRUE) id.psi.changed[it]<-TRUE } else { Z<-Z[, id.psi.ok, drop=FALSE] PSI<-PSI[, id.psi.ok, drop=FALSE] rangeZ <- rangeZ[,id.psi.ok,drop=FALSE] limZ <- limZ[,id.psi.ok,drop=FALSE] nomiOK<-nomiOK[id.psi.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.psi.ok] psi.old<- psi.old[id.psi.ok] psi<- psi[id.psi.ok] names(psi)<-id.psi.group if(ncol(PSI)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } } } if (it >= it.max) { id.warn <- TRUE break } } #end while_it ##============================================================================= if(id.psi.changed[length(id.psi.changed)]) warning(paste("Some psi (", (1:length(psi))[!id.psi.far], ") changed after the last iter.",sep=""), call. = FALSE) if(id.warn) warning(paste("max number of iterations (", it,") attained",sep=""), call. = FALSE) attr( psi.values, "dev") <- dev.values attr( psi.values, "k")<- k.values #ordina i breakpoints.. psi<-unlist(tapply(psi, id.psi.group, sort)) names(psi)<-id.psi.group names.coef<-names(coef(obj)) #names(obj$coefficients) #obj e' quello vecchio che include U1,.. V1,... #browser() #PSI.old<-PSI PSI <- matrix(psi, n, ncol = length(psi), byrow = TRUE) #U e V possono essere cambiati (rimozione/ordinamento psi.. ) per cui si deve ricalcolare il tutto, altrimenti sarebbe uguale a U1 e obj1 #if(sd(PSI-PSI.old)>0 || id.psi.changed[length(id.psi.changed)]){ V <- (Z>PSI) U <- (Z-PSI)*V V <- -V colnames(U)<-paste("U", 1:ncol(U), sep = "") colnames(V)<-paste("V", 1:ncol(V), sep = "") #X <- cbind(XREG, U, V) #rownames(X) <- NULL obj <- suppressWarnings(try(eval(call.noV), silent=TRUE)) L1<- -obj$loglik obj$coef<-c(obj$coef, rep(0,ncol(V))) names(obj$coef)<-names.coef obj$epsilon <- epsilon obj$it <- it obj<-list(obj=obj,it=it,psi=psi, psi.values=psi.values, U=U,V=V,rangeZ=rangeZ, epsilon=epsilon,nomiOK=nomiOK, SumSquares.no.gap=L1, id.psi.group=id.psi.group,id.warn=id.warn) #inserire id.psi.ok? return(obj) } # if(return.all.sol) { # obj.noV <- suppressWarnings(eval(call.noV)) #, envir=mfExt # #mio.init.noV<-obj.noV$coef # #mio.init.noV<- c(0,obj.noV$coef[-1]) # dev.new1 <- -obj.noV$loglik # #dev.new1 <- sum(mylm(x = cbind(XREG, U), y = y, w = w, offs = offs)$residuals^2) # } # # beta.c<-coef(obj)[nomiU] # gamma.c<-coef(obj)[nomiV] segmented/R/segmented.default.r0000644000176200001440000005715314610436362016227 0ustar liggesuserssegmented.default<-function (obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class = FALSE, ...) { #if("|" %in% all.names(formula(obj))) { # nomeY<-all.vars(formula(obj))[1] #nome Y # nomiX<-strsplit(as.character(formula(obj))[3],"\\|")[[1]][1] # nomiX.disp <-strsplit(as.character(formula(obj))[3],"\\|")[[1]][2] # Fo.charac <- paste(nomeY,nomiX,sep="~") # Fo <- as.formula(Fo.charac) # Fo.conDisp <- as.formula(paste(Fo.charac,nomiX.disp,sep="|")) #} if(is.ts(obj)) { o<-segmented.numeric(obj, seg.Z, psi, npsi, fixed.psi, control, model, keep.class=FALSE, ...) return(o) } build.all.psi<-function(psi, fixed.psi){ all.names.psi<-union(names(psi),names(fixed.psi)) all.psi<-vector("list", length=length(all.names.psi)) names(all.psi)<- all.names.psi for(i in names(all.psi)) { if(!is.null(psi[[i]])){ psi[[i]]<-sort(psi[[i]]) names(psi[[i]])<-paste("U",1:length(psi[[i]]),".",i,sep="") } if(!is.null(fixed.psi[[i]])){ fixed.psi[[i]]<-sort(fixed.psi[[i]]) names(fixed.psi[[i]])<- paste("U",1:length(fixed.psi[[i]]),".fixed.",i,sep="") } all.psi[[i]]<-sort(c(psi[[i]],fixed.psi[[i]])) } return(all.psi) } ##===inizio funzione update.formula1<-function(old,new,...,opt=1){ #se old e' una formula che contiene "|", questa funzione aggiorna old con new, # se opt=1 "new" viene inclusa solo nella prima parte e la formula restuita contiene "|" # se opt=2, la parte dopo |, viene aggiunta insieme a "new" e quindi la formula restituita NON contiene | if("|" %in% all.names(old)) { nomeY<-all.vars(old)[1] #nome Y nomiX<-strsplit(as.character(old)[3],"\\|")[[1]][1] nomiX.disp <-strsplit(as.character(old)[3],"\\|")[[1]][2] if(opt==2){ nomiX.all<-paste(nomiX, nomiX.disp,sep="+") Fo.charac <- paste(nomeY,nomiX.all,sep="~") Fo <- as.formula(Fo.charac) } else { Fo.charac <- paste(nomeY,nomiX,sep="~") Fo <- as.formula(paste(Fo.charac,nomiX.disp,sep="|")) } return(Fo) } else { update.formula(old,new,...) } } dpmax <- function(x, y, pow = 1) { if (pow == 1) -(x > y) else -pow * ((x - y) * (x > y))^(pow - 1) } if (is.null(control$fn.obj)) fn.obj <- "-as.numeric(logLik(x))" else fn.obj <- control$fn.obj if (missing(seg.Z)) { if (length(all.vars(formula(obj))) == 2) seg.Z <- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } if("V" %in% sub("V[1-9]*[0-9]","V", c(all.vars(seg.Z), all.vars(formula(obj) )[-1]))) stop("variable names 'V', 'V1', .. are not allowed") if("U" %in% sub("U[1-9]*[0-9]","U", c(all.vars(seg.Z), all.vars(formula(obj) )[-1]))) stop("variable names 'U', 'U1', .. are not allowed") if(any(c("$","[") %in% all.names(seg.Z))) stop(" '$' or '[' not allowed in 'seg.Z' ") n.Seg <- length(all.vars(seg.Z)) id.npsi <- FALSE if (missing(psi)) { if (n.Seg == 1) { if (missing(npsi)) npsi <- 1 npsi <- lapply(npsi, function(.x) .x) if (length(npsi) != length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi) <- all.vars(seg.Z) } else { #if (missing(npsi)) # stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call. = FALSE) if (missing(npsi)) { npsi<-rep(1, n.Seg) names(npsi)<-all.vars(seg.Z) } if (length(npsi) != n.Seg) stop(" 'npsi' and seg.Z should have the same length") if (!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi <- lapply(npsi, function(.x) rep(NA, .x)) id.npsi <- TRUE } else { if (n.Seg == 1) { if (!is.list(psi)) { psi <- list(psi) names(psi) <- all.vars(seg.Z) } } else { if (!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if (n.Seg != length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if (!all(names(psi) %in% all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } fc<- min(max(abs(control$fc),.8),1) min.step <- control$min.step alpha <- control$alpha it.max <- old.it.max <- control$it.max digits <- control$digits toll <- control$toll if (toll < 0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) stop.if.error <- fix.npsi <- control$fix.npsi break.boot=control$break.boot n.boot <- control$n.boot size.boot <- control$size.boot gap <- control$gap random <- control$random pow <- control$pow conv.psi <- control$conv.psi visual <- control$visual visualBoot<-FALSE if(visual && n.boot>0) {visual<-FALSE; visualBoot<-TRUE} # if (n.boot > 0) { # if (!is.null(control$seed)) { # set.seed(control$seed) # employed.Random.seed <- control$seed # } # else { # employed.Random.seed <- eval(parse(text = paste(sample(0:9, # size = 6), collapse = ""))) # set.seed(employed.Random.seed) # } # if (visual) { # visual <- FALSE # visualBoot <- TRUE # } # if (!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") # } last <- control$last K <- control$K h <- control$h orig.call <- Call <- mf <- obj$call orig.call$formula <- mf$formula <- formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") if (class(mf$formula)[1] == "name" && !"~" %in% paste(mf$formula)) mf$formula <- eval(mf$formula) #mf$formula <- update.formula(mf$formula, paste(seg.Z, collapse = ".+")) mf$formula <- update.formula1(mf$formula, paste(seg.Z, collapse = ".+"), opt=2) mfExt <- mf if (!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset) || !is.null(obj$call$id)) { mfExt$formula <- update.formula(mf$formula, paste(".~.+", paste(c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset), all.vars(obj$call$id)), collapse = "+"))) } if(!is.null(obj$call$random)) { nomiRandom <- all.vars(obj$call$random) if(is.list(eval(obj$call$random))) nomiRandom <- c(nomiRandom, names(eval(obj$call$random))) mfExt$formula<-update.formula(mf$formula, paste(".~.+", paste(nomiRandom, collapse="+"))) } if (inherits(obj, "svyglm")) { mydesign <- eval(obj$call$design) mf$data=quote(mydesign$variables) mfExt$data=quote(mydesign$variables) mf<- eval(mf) } else { mf <- eval(mf, parent.frame()) } n <- nrow(mf) nomiOff <- setdiff(all.vars(formula(obj)), names(mf)) if (length(nomiOff) >= 1) mfExt$formula <- update.formula(mfExt$formula, paste(".~.+", paste(nomiOff, collapse = "+"), sep = "")) nomiTUTTI <- all.vars(mfExt$formula) nomiNO <- NULL for (i in nomiTUTTI) { r <- try(eval(parse(text = i), parent.frame()), silent = TRUE) if (class(r)[1] != "try-error" && length(r) == 1 && !is.function(r) && !i%in%names(mf)) nomiNO[[length(nomiNO) + 1]] <- i } if (!is.null(nomiNO)) mfExt$formula <- update.formula(mfExt$formula, paste(".~.-", paste(nomiNO, collapse = "-"), sep = "")) if(inherits(obj, "svyglm")){ mfExt <- eval(mfExt) } else { mfExt <- eval(mfExt, parent.frame()) } if (inherits(obj, "coxph")) { is.Surv <- NA rm(is.Surv) for (i in 1:ncol(mfExt)) { if (is.Surv(mfExt[, i])) aa <- mfExt[, i][, 1:ncol(mfExt[, i])] } mfExt <- cbind(aa, mfExt) } id.seg <- match(all.vars(seg.Z), names(mfExt)) name.Z <- names(mfExt)[id.seg] Z <- mfExt[, id.seg, drop = FALSE] n.psi <- length(unlist(psi)) if (ncol(Z) == 1 && is.vector(psi) && (is.numeric(psi) || is.na(psi))) { psi <- list(as.numeric(psi)) names(psi) <- name.Z } id.psi <- match(colnames(Z), names(psi)) if ((ncol(Z) != length(psi)) || any(is.na(id.seg))) stop("Length or names of Z and psi do not match") #nome <- names(psi)[id.psi] #psi <- psi[nome] if (id.npsi) { for (i in 1:length(psi)) { K <- length(psi[[i]]) if (any(is.na(psi[[i]]))) psi[[i]] <- if (control$quant) { quantile(Z[, i], prob = seq(0, 1, l = K + 2)[-c(1, K + 2)], names = FALSE) } else { (min(Z[, i]) + diff(range(Z[, i])) * (1:K)/(K + 1)) } } } else { for (i in 1:length(psi)) { if (any(is.na(psi[[i]]))) psi[[i]] <- if (control$quant) { quantile(Z[, i], prob = seq(0, 1, l = K + 2)[-c(1, K + 2)], names = FALSE) } else { (min(Z[, i]) + diff(range(Z[, i])) * (1:K)/(K + 1)) } } } #########==================== SE PSI FIXED id.psi.fixed <- FALSE if(!is.null(fixed.psi)){ id.psi.fixed <- TRUE if(is.numeric(fixed.psi) && n.Seg==1) { fixed.psi<-list(fixed.psi) names(fixed.psi)<-all.vars(seg.Z) } if(is.list(fixed.psi)) { if(!(names(fixed.psi) %in% all.vars(seg.Z))) stop("names(fixed.psi) is not a subset of variables in 'seg.Z' ") } else { stop(" 'fixed.psi' has to be a named list ") } fixed.psi<-lapply(fixed.psi, sort) Zfixed<-matrix(unlist(mapply(function(x,y)rep(x,y),Z[names(fixed.psi)], sapply(fixed.psi, length), SIMPLIFY = TRUE)), nrow=n) n.fixed.psi<-sapply(fixed.psi, length) rip.nomi <- rep( names(fixed.psi), n.fixed.psi) rip.numeri <- unlist(lapply(n.fixed.psi, function(.x) 1:.x)) colnames(Zfixed) <- paste("U", rip.numeri,".fixed.",rip.nomi, sep="") PSI <- matrix(unlist(fixed.psi), ncol=ncol(Zfixed), nrow=n, byrow = TRUE) fixedU<-(Zfixed-PSI)*(Zfixed>PSI) #XREG<-cbind(XREG, fixedU) } #########====================END SE PSI FIXED initial.psi <- psi a <- sapply(psi, length) id.psi.group <- rep(1:length(a), times = a) Z <- matrix(unlist(mapply(function(x, y) rep(x, y), Z, a, SIMPLIFY = TRUE)), nrow = n) colnames(Z) <- nomiZ.vett <- rep(name.Z, times = a) psi <- unlist(psi) psi <- unlist(tapply(psi, id.psi.group, sort)) k <- ncol(Z) PSI <- matrix(rep(psi, rep(n, k)), ncol = k) c1 <- apply((Z <= PSI), 2, all) c2 <- apply((Z >= PSI), 2, all) if (sum(c1 + c2) != 0 || is.na(sum(c1 + c2))) stop("starting psi out of the admissible range") ripetizioni <- as.vector(unlist(tapply(id.psi.group, id.psi.group, function(x) 1:length(x)))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ.vett, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ.vett, sep = ".") nnomi <- c(nomiU, nomiV) U <- (Z - PSI) * (Z > PSI) if (pow[1] != 1) U <- U^pow[1] colnames(U) <- nomiU V <- -(Z > PSI) for (i in 1:k) { mfExt[nomiU[i]] <- U[, i] mfExt[nomiV[i]] <- V[, i] } if(id.psi.fixed){ for(i in 1:ncol(fixedU)) mfExt[colnames(fixedU)[i]]<-fixedU[,i] #mf[colnames(fixedU)[i]]<- mf e' definito sopra... Ma serve??? Fo <- update.formula1(formula(obj), as.formula(paste(".~.+", paste(c(nnomi,colnames(fixedU)), collapse = "+"))), opt=1) Fo.noV <- update.formula1(formula(obj), as.formula(paste(".~.+", paste(c(nomiU,colnames(fixedU)), collapse = "+"))), opt=1) } else { Fo <- update.formula1(formula(obj), as.formula(paste(".~.+", paste(nnomi, collapse = "+"))), opt=1) Fo.noV <- update.formula1(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+"))), opt=1) } call.ok <- update(obj, Fo, evaluate = FALSE, data = mfExt) call.noV <- update(obj, Fo.noV, evaluate = FALSE, data = mfExt) if(class(obj)[1] == "rq") { #inherits(obj, "rq") call.noV$tau <- call.ok$tau <- obj$tau } #browser() if (inherits(obj, "svyglm")){ #mydesign <- eval(obj$call$design) for (i in 1:k) { mydesign$variables[nomiU[i]] <- U[, i] mydesign$variables[nomiV[i]] <- V[, i] } call.ok$design<- call.noV$design<-quote(mydesign) call.ok$data<-call.noV$data<-NULL } if (it.max == 0) { if (!is.null(call.noV[["subset"]])) call.noV[["subset"]] <- NULL obj1 <- eval(call.noV, envir = mfExt) return(obj1) } initial <- psi obj0 <- obj dev0 <- eval(parse(text = fn.obj), list(x = obj)) if (length(dev0) <= 0) stop("error in the objective to be minimized, see 'fn.obj' in ?seg.control") if (length(dev0) > 1) stop("the objective to be minimized is not scalar, see 'fn.obj' in ?seg.control") if (is.na(dev0)) dev0 <- 10 list.obj <- list(obj) nomiOK <- nomiU if(is.null(alpha)) alpha<- max(.05, 1/nrow(PSI)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) opz <- list(toll = toll, h = h, stop.if.error = stop.if.error, dev0 = dev0, visual = visual, it.max = it.max, nomiOK = nomiOK, id.psi.group = id.psi.group, gap = gap, visualBoot = visualBoot, pow = pow, digits = digits, conv.psi = conv.psi, alpha = alpha, fix.npsi = fix.npsi, min.step = min.step, seed=control$seed, min.n=control$min.n) opz$call.ok <- call.ok opz$call.noV <- call.noV opz$formula.orig <- formula(obj) opz$nomiU <- nomiU opz$nomiV <- nomiV opz$fn.obj <- fn.obj opz$fc=fc if (inherits(obj, "svyglm")) opz$mydesign <- mydesign opz <- c(opz, ...) if (n.boot <= 0) { obj <- seg.def.fit(obj, Z, PSI, mfExt, opz) } else { obj <- seg.def.fit.boot(obj, Z, PSI, mfExt, opz, n.boot = n.boot, size.boot = size.boot, random = random, break.boot=break.boot) seed<- obj$seed } if (!is.list(obj)) { warning("No breakpoint estimated", call. = FALSE) return(obj0) } if (!is.null(obj$obj$df.residual) && !is.na(obj$obj$df.residual)) { if (obj$obj$df.residual == 0) warning("no residual degrees of freedom (other warnings expected)", call. = FALSE) } id.psi.group <- obj$id.psi.group nomiU <- nomiOK <- obj$nomiOK nomiVxb <- sub("U", "psi", nomiOK) nomiFINALI <- unique(sub("U[1-9]*[0-9].", "", nomiOK)) nomiSenzaPSI <- setdiff(name.Z, nomiFINALI) if (length(nomiSenzaPSI) >= 1) warning("no breakpoints found for: ", paste(nomiSenzaPSI, " "), call. = FALSE) it <- obj$it psi <- obj$psi #########SE PSI FIXED psi.list<-vector("list", length=length(unique(name.Z))) names(psi.list)<-name.Z names(psi)<-nomiZ.vett for(i in names(psi.list)){ psi.list[[i]]<-psi[names(psi)==i] } psi.values <- if (n.boot <= 0) obj$psi.values else obj$boot.restart U <- obj$U V <- obj$V id.warn <- obj$id.warn for (jj in colnames(V)) { VV <- V[, which(colnames(V) == jj), drop = FALSE] sumV <- abs(rowSums(VV)) if (any(table(sumV) <= 1) && stop.if.error) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") } rangeZ <- obj$rangeZ mfExt <- obj$mfExt names(mfExt)[match(obj$nomiV, names(mfExt))] <- nomiVxb R <- obj$R R.noV <- obj$R.noV r <- obj$r obj <- obj$obj k <- length(psi) #coef(obj) ha gia i nomi corretti... #all.coef <- coef(obj) #names(all.coef) <- c(names(coef(obj0)), nomiU, nomiVxb) #beta.c <- all.coef[nomiU] beta.c<-unlist( unique(coef(obj)[nomiU])) #beta.c<-coef(obj)[nomiU] Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) nnomi <- c(nomiU, nomiVxb) Fo <- update.formula1(formula(obj0), as.formula(paste(".~.+", paste(nnomi, collapse = "+"))), opt=1) ############### if (inherits(obj, "svyglm")){ for (i in 1:ncol(U)) { mydesign$variables[nomiU[i]] <- U[, i] mydesign$variables[nomiVxb[i]] <- Vxb[, i] } } else { for (i in 1:ncol(U)) { mfExt[nomiU[i]] <- mf[nomiU[i]] <- U[, i] mfExt[nomiVxb[i]] <- mf[nomiVxb[i]] <- Vxb[, i] } } ############# DA GUARDARE!!!!!!!!!!!!!!!!!!!! #se ci sono fixed.psi if(id.psi.fixed){ for(i in 1:ncol(fixedU)) mfExt[colnames(fixedU)[i]]<-mf[colnames(fixedU)[i]]<-fixedU[,i] Fo<-update.formula(Fo, paste(c("~.",colnames(fixedU)), collapse="+")) } ############# objF <- update(obj0, Fo, evaluate = FALSE, data = mfExt) if (!is.null(objF[["subset"]])) objF[["subset"]] <- NULL if (is.null(opz$constr)) opz$constr <- 0 if ((opz$constr %in% 1:2) && class(obj0)[1] == "rq") { objF$method <- "fnc" objF$R <- quote(R) objF$r <- quote(r) } #browser() if (inherits(obj, "svyglm")){ objF$design<- call.noV$design<-quote(mydesign) objF$data<-call.noV$data<-NULL } objF <- eval(objF, envir = mfExt) objF$offset <- obj0$offset isNAcoef <- any(is.na(coef(objF))) if (isNAcoef) { if (stop.if.error) { cat("breakpoint estimate(s):", as.vector(psi), "\n") stop("at least one coef is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE) } else { warning("some estimate is NA: too many breakpoints? 'var(hat.psi)' cannot be computed \n ..returning a 'lm' model", call. = FALSE) Fo <- update.formula1(formula(obj0), as.formula(paste(".~.+", paste(nomiU, collapse = "+"))), opt=1) objF <- if ((opz$constr %in% 1:2) && class(obj0)[1] == "rq") { update(obj0, formula = Fo, R = R.noV, r = r, method = "fnc", evaluate = TRUE, data = mfExt) } else { update(obj0, Fo, evaluate = TRUE, data = mfExt) } names(psi) <- nomiVxb objF$psi <- psi return(objF) } } #4/12/19: modifica fatta per consentire betareg.. Attenzione #semplicemente controlla se la componente "coef*" e' una lista o no.. #COSA succede con geese models? # #giugno 20: aggiunto un tentativo "estimate" per consentire oggetti censReg nomeCoef<-grep("coef", names(objF), value = TRUE) if(length(nomeCoef)==0) nomeCoef<-grep("estimate", names(objF), value = TRUE) if(!is.list(objF[[nomeCoef]])){ objF[[nomeCoef]][names(obj[[nomeCoef]])]<-obj[[nomeCoef]] #names.coef <- names(coef(objF)) #names(obj[[nomeCoef]]) <- names(objF[[nomeCoef]]) #objF[[nomeCoef]][names.coef] <- coef(obj)[names.coef] } else { #names.coef <- names(objF[[grep("coef", names(objF), value = TRUE)]][[1]]) names(obj[[nomeCoef]][[1]]) <- names(objF[[nomeCoef]][[1]]) objF[[nomeCoef]][[1]] <- obj[[nomeCoef]][[1]] objF[[nomeCoef]][[2]] <- obj[[nomeCoef]][[2]] } if (!is.null(objF$maximum)) objF$maximum <- obj$maximum if (!is.null(objF$pseudo.r.squared)) objF$pseudo.r.squared <- obj$pseudo.r.squared if (!is.null(objF$geese$beta)) objF$geese$beta <- obj$coefficients #oppure objF$coefficients? if (!is.null(objF$geese$gamma)) objF$geese$gamma <- obj$geese$gamma if (!is.null(objF$geese$alpha)) objF$geese$alpha <- obj$geese$alpha if (!is.null(objF$fitted.values)) objF$fitted.values <- obj$fitted.values if (!is.null(objF$residuals)) objF$residuals <- obj$residuals if (!is.null(objF$linear.predictors)) objF$linear.predictors <- obj$linear.predictors if (!is.null(objF$deviance)) objF$deviance <- obj$deviance if (!is.null(objF$weights)) objF$weights <- obj$weights if (!is.null(objF$aic)) objF$aic <- obj$aic + 2 * k if (!is.null(objF$loglik)) objF$loglik <- obj$loglik if (!is.null(objF$rho)) objF$rho <- obj$rho if (!is.null(objF$dual)) objF$dual <- obj$dual if (!is.null(objF$penalized.deviance)) objF$penalized.deviance <- obj$penalized.deviance if (!is.null(objF$ModifiedScores)) objF$ModifiedScores <- c(obj$ModifiedScores, rep(0, k)) Cov <- try(vcov(objF), silent = TRUE) if(inherits(Cov, "try-error")){ #if (class(Cov) == "try-error") { warning("cannot compute the covariance matrix", call. = FALSE) vv <- NA } else { vv <- Cov[nomiVxb, nomiVxb, drop=FALSE] vv<-sqrt(diag(vv)) } ris.psi <- matrix(NA, length(psi), 3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[, 2] <- psi ris.psi[, 3] <- vv a <- tapply(id.psi.group, id.psi.group, length) a.ok <- NULL for (j in name.Z) { if (j %in% nomiFINALI) { a.ok[length(a.ok) + 1] <- a[1] a <- a[-1] } else { a.ok[length(a.ok) + 1] <- 0 } } initial <- unlist(mapply(function(x, y) { if (is.na(x)[1]) rep(x, y) else x }, initial.psi[nomiFINALI], a.ok[a.ok != 0], SIMPLIFY = TRUE)) if (opz$stop.if.error) ris.psi[, 1] <- initial objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- it objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) objF$id.group <- if (length(name.Z) <= 1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn objF$orig.call <- orig.call ############## PSI FIXED objF$indexU<-build.all.psi(psi.list, fixed.psi) ############## if (model) objF$model <- mf if (n.boot > 0) objF$seed <- seed if (keep.class) class(objF) <- c("segmented", class(obj0)) objF$psi[,"Initial"]<-NA list.obj[[length(list.obj) + 1]] <- objF class(list.obj) <- "segmented" if (last) list.obj <- list.obj[[length(list.obj)]] warning("The returned fit is OK, but not of class 'segmented'.\n If interested, call explicitly the segmented methods (plot.segmented, confint.segmented,..)", call. = FALSE) return(list.obj) } segmented/R/step.num.fit.boot.r0000644000176200001440000002145014757620753016127 0ustar liggesusersstep.num.fit.boot <- function(y, XREG, Z, PSI, w, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di #---------------------------------- # sum.of.squares<-function(obj.seg){ # #computes the "correct" SumOfSquares from a segmented" fit # b<-obj.seg$obj$coef # X<-qr.X(obj.seg$obj$qr) #X<-model.matrix(obj.seg) # X<-X[,!is.na(b)] # b<-b[!is.na(b)] # rev.b<-rev(b) # rev.b[1:length(obj.seg$psi)]<-0 # b<-rev(rev.b) # new.fitted<-drop(X%*%b) # new.res<- obj.seg$obj$residuals + obj.seg$obj$fitted - new.fitted # ss<-sum(new.res^2) # ss # } #-------- adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #-------- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #browser() #------------- if(is.null(opz$seed)){ mY <- mean(y) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } #-------------- visualBoot<-opz$display opz$display<-FALSE #opz.boot<-opz #opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <- 0 opz0<- opz opz0$agg <- .2 n<-length(y) alpha <- opz$alpha limZ <- apply(Z, 2, quantile, names = FALSE, probs = alpha) rangeZ <- apply(Z, 2, range) #serve sempre #browser() o0 <-try(suppressWarnings(step.num.fit(y, XREG, Z, PSI, w, opz0, return.all.sol=FALSE)), silent=TRUE) #o0<-try(suppressWarnings(seg.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=FALSE)), silent=TRUE) if(!is.list(o0)) { o0<- suppressWarnings(step.num.fit(y, XREG, Z, PSI, w, opz, return.all.sol=TRUE)) o0<-extract.psi(o0) ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0<-try(suppressWarnings(step.num.fit(y, XREG, Z, PSI1, w, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(NA, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z count.random<-0 agg.values<-seq(.2,.05,l=n.boot) ###INIZIO BOOT alpha<-.1 corr=1.2 n.boot.rev<- 3 #3 o 4? for(k in seq(n.boot)){ ##se gli *ultimi* n.boot.rev valori di ss sono uguali, cambia i psi... diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ #browser() qpsi <- sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi.cor <- sapply(1:ncol(Z),function(i)mean((corr*est.psi0[i])>=Z[,i])) qpsi <- ifelse(abs(qpsi-.5)<=.2, qpsi.cor, alpha) alpha<-1-alpha corr<-1/corr est.psi0 <- sapply(1:ncol(Z),function(i)quantile(Z[,i], probs=qpsi[i],names=FALSE)) est.psi0 <- adj.psi(est.psi0, limZ) #est.psi0<- jitter(est.psi0, amount=min(diff(est.psi0))) } ############################ 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ########################################## PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(step.num.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], opz)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(step.num.fit(yy, XREG, Z.orig, PSI, weights, opz)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 opz$agg<-agg.values[k] opz$Nboot <- k o <-try(suppressWarnings(step.num.fit(y, XREG, Z.orig, PSI, w, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o <-try(suppressWarnings(step.num.fit(y, XREG, Z, PSI1, w, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if (visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) unlpsi<- unlist(est.psi0) Lp<-length(unlpsi) cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(Lp,digits=0,format="f"), " est.psi = ",paste(formatC(unlpsi[1:min(Lp,5)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #conta i valori ss uguali.. cosi puoi fermarti prima.. asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) #SS.ok<-min(all.selected.ss) #id.accept<- ((abs(all.ss-SS.ok)/SS.ok )<= 0.05) #psi.mean<-apply(all.est.psi[id.accept,,drop=FALSE], 2, mean) #est.psi0<-psi.mean # #devi ristimare il modello con psi.mean # PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) # o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0 <- try(step.num.fit(y, XREG, Z, PSI1, w, opz1), silent=TRUE) warning("The final fit can be unreliable (possibly mispecified segmented relationship)", call.=FALSE, immediate.=TRUE) } if(!is.list(o0)) return(0) o0$boot.restart<-ris o0$seed<-seed #rm(.Random.seed, envir=globalenv()) return(o0) } segmented/R/seg.def.fit.r0000644000176200001440000004163114603520331014707 0ustar liggesusersseg.def.fit<-function (obj, Z, PSI, mfExt, opz, return.all.sol = FALSE) { useExp.k = TRUE search.min<-function(h, psi, psi.old) { #, mfExt , X, y, w, offs psi.ok<- psi*h + psi.old*(1-h) PSI <- matrix(rep(psi.ok, rep(n, length(psi.ok))), ncol = length(psi.ok)) U1 <- (Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] if(is.null(opz$mydesign)){ for (i in 1:ncol(U1)) { mfExt[nomiU[i]] <- U1[, i] } } else { for (i in 1:ncol(U1)) { mydesign$variables[nomiU[i]] <- U1[, i] } } obj1 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else eval(parse(text = fn.obj), list(x = obj1)) L1 } est.k <- function(x1, y1, L0) { ax <- log(x1) .x <- cbind(1, ax, ax^2) b <- drop(solve(crossprod(.x), crossprod(.x, y1))) const <- b[1] - L0 DD <- sqrt(b[2]^2 - 4 * const * b[3]) kk <- exp((-b[2] + DD)/(2 * b[3])) return(round(kk)) } dpmax <- function(x, y, pow = 1) { if (pow == 1) -(x > y) else -pow * ((x - y) * (x > y))^(pow - 1) } in.psi <- function(LIM, PSI, ret.id = TRUE) { a <- PSI[1, ] < LIM[1, ] b <- PSI[1, ] > LIM[2, ] is.ok <- !a & !b if (ret.id) return(is.ok) isOK <- all(is.ok) && all(!is.na(is.ok)) isOK } far.psi<-function(Z, PSI, id.psi.group, ret.id=TRUE, fc=.93) { #check if psi are far from the boundaries ..s # returns TRUE, if fine. #id.far.ok<-sapply(unique(id.psi.group), function(.x) (table(rowSums(((Z>PSI)[,id.psi.group==.x,drop=FALSE])))>=2)[-1]) #[-1] esclude lo zero, xPSI)[,id.psi.group==.x,drop=FALSE]))+1)>=2)[-1]) #[-1] esclude lo zero, xPSI" non se ne accorge, mentre Z>=PSI, si.. Il contrario e' vero con estremo inf e Z>PSI nSeg<-length(unique(id.psi.group)) npsij<-tapply(id.psi.group,id.psi.group,length) nj<-sapply(unique(id.psi.group), function(.x) { tabulate(rowSums((Z>PSI)[,id.psi.group==.x,drop=FALSE])+1) }, simplify = FALSE) ff<-id.far.ok<-vector("list",length=nSeg) for(i in 1:nSeg){ if(length(nj[[i]])!=npsij[i]+1) nj[[i]]<- tabulate(rowSums((Z>=PSI)[,id.psi.group==i,drop=FALSE])+1) id.ok<-(nj[[i]] >= 2) id.far.ok[[i]] <- id.ok[-length(id.ok)] & id.ok[-1] #esattamente uguale al numero di psi del gruppo i ff[[i]]<-ifelse(diff(nj[[i]])>0, 1/fc, fc) } id.far.ok<-unlist(id.far.ok) ff<-unlist(ff) if(!ret.id) {return(all(id.far.ok)) } else { attr(id.far.ok,"factor") <- ff return(id.far.ok) } #if(ret.id) return(id.far.ok) else return(all(id.far.ok)) } adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } fn.costr <- function(n.psi, isLeft = 1, isInterc = 1) { IU <- -diag(n.psi) sumU <- diag(n.psi) sumU[row(sumU) > col(sumU)] <- 1 if (isLeft) { sumU <- cbind(1, sumU) IU <- diag(c(1, -rep(1, n.psi))) } A <- rbind(IU, sumU) if (isInterc) { A <- rbind(0, A) A <- cbind(c(1, rep(0, nrow(A) - 1)), A) } A <- cbind(A, matrix(0, nrow(A), n.psi)) A } vincoli <- FALSE c1 <- apply((Z <= PSI), 2, all) c2 <- apply((Z >= PSI), 2, all) if (sum(c1 + c2) != 0 || is.na(sum(c1 + c2))) stop("psi out of the range") n <- nrow(Z) #min.step <- opz$min.step rangeZ <- apply(Z, 2, range) alpha <- opz$alpha limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) digits <- opz$digits pow <- opz$pow nomiOK <- opz$nomiOK toll <- opz$toll hh <- opz$h conv.psi <- opz$conv.psi gap <- opz$gap stop.if.error <- opz$stop.if.error fix.npsi <- opz$fix.npsi dev.new <- opz$dev0 visual <- opz$visual id.psi.group <- opz$id.psi.group it.max <- old.it.max <- opz$it.max fc<-opz$fc psi <- PSI[1, ] psi<-adj.psi(psi, limZ) PSI<-matrix(psi,nrow=n, ncol=ncol(PSI), byrow=TRUE) names(psi) <- id.psi.group epsilon <- 10 dev.values <- psi.values <- NULL it <- 0 epsilon <- 10 k.values <- dev.values <- NULL psi.values <- list() psi.values[[length(psi.values) + 1]] <- NA nomiU <- opz$nomiU nomiV <- opz$nomiV call.ok <- opz$call.ok call.noV <- opz$call.noV toll <- opz$toll mydesign<- opz$mydesign #browser() if (!in.psi(limZ, PSI, FALSE)) stop("starting psi out of the range", call. = FALSE) if (!far.psi(Z, PSI, id.psi.group, FALSE)) stop("psi starting values too close each other or at the boundaries. Please change them (e.g. set 'quant=TRUE' in seg.control()), or decrease their number.", call. = FALSE) n.psi1 <- ncol(Z) if (is.null(opz$constr)) opz$constr <- 0 if ((opz$constr %in% 1:2) && class(obj)[1] == "rq") { vincoli <- TRUE call.ok$method <- "fnc" call.ok$R <- quote(R) call.ok$r <- quote(r) call.noV$method <- "fnc" call.noV$R <- quote(R.noV) call.noV$r <- quote(r) } fn.obj <- opz$fn.obj U <- ((Z - PSI) * (Z > PSI)) colnames(U) <- nomiU #if (pow[1] != 1) U <- U^pow[1] obj0 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) if ("try-error" %in% class(obj0)) stop("The first fit with U variables does not work..", call. = FALSE) L0 <- eval(parse(text = fn.obj), list(x = obj0)) if(it.max==0){ colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") obj <- obj0 #lm.wfit(x = cbind(XREG, U), y = y, w = w, offset = offs) L1 <- L0 #sum(obj$residuals^2 * w) obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = TRUE, nomiV = nomiV, nomiU = nomiU, mfExt = mfExt, mydesign=mydesign) return(obj) } n.intDev0 <- nchar(strsplit(as.character(L0), "\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- opz$dev0 dev.values[length(dev.values) + 1] <- L0 psi.values[[length(psi.values) + 1]] <- psi if (visual) { cat(paste("iter = ", sprintf("%2.0f", 0), " min.f = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L0), " k = ", sprintf("%2.0f", NA), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " ini.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } id.warn <- FALSE id.psi.changed<-rep(FALSE, it.max) tolOp <-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) while (abs(epsilon) > toll) { it <- it + 1 n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if (n.psi1 != n.psi0) { U <- ((Z - PSI) * (Z > PSI)) if (pow[1] != 1) U <- U^pow[1] for (i in 1:ncol(U)) { mfExt[nomiU[i]] <- U[, i] } obj0 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) L0 <- eval(parse(text = fn.obj), list(x = obj0)) } V <- dpmax(Z, PSI, pow = pow[2]) if(is.null(opz$mydesign)){ for (i in 1:n.psi1) { mfExt[nomiU[i]] <- U[, i] mfExt[nomiV[i]] <- V[, i] } } else { for (i in 1:n.psi1) { mydesign$variables[nomiU[i]] <- U[, i] mydesign$variables[nomiV[i]] <- V[, i] } } R <- fn.costr(ncol(U), 1, 1) R.noV <- R[, -((ncol(R) - 1) + seq_len(ncol(U))), drop = FALSE] r <- rep(0, nrow(R)) obj <- suppressWarnings(eval(call.ok, envir = mfExt)) beta.c <- unlist( unique(coef(obj)[nomiU])) gamma.c <-unlist( unique(coef(obj)[nomiV])) #browser() if (any(is.na(c(beta.c, gamma.c)))) { if (fix.npsi) { if (return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call. = FALSE) } else { id.coef.ok <- !is.na(gamma.c) psi <- psi[id.coef.ok] gamma.c <- gamma.c[id.coef.ok] beta.c <- beta.c[id.coef.ok] Z <- Z[, id.coef.ok, drop = FALSE] rangeZ <- rangeZ[, id.coef.ok, drop = FALSE] limZ <- limZ[, id.coef.ok, drop = FALSE] nomiOK <- nomiOK[id.coef.ok] id.psi.group <- id.psi.group[id.coef.ok] names(psi) <- id.psi.group } } psi.old <- psi psi <- psi.old + hh*gamma.c/beta.c psi<- adj.psi(psi, limZ) a<-optimize(search.min, c(0,1), psi=psi, psi.old=psi.old, tol=tolOp[it]) k.values[length(k.values) + 1] <- use.k <- a$minimum L1<- a$objective psi <- psi*use.k + psi.old* (1-use.k) psi<- adj.psi(psi, limZ) psi<-unlist(tapply(psi, opz$id.psi.group, sort), use.names =FALSE) if (!is.null(digits)) psi <- round(psi, digits) #PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) PSI <- matrix(psi, ncol=length(psi), nrow=n, byrow = TRUE) U <- (Z - PSI) * (Z > PSI) #if (pow[1] != 1) U <- U^pow[1] if(is.null(opz$mydesign)){ for(i in 1:ncol(U)) mfExt[nomiU[i]] <- U[, i] } else { for(i in 1:ncol(U)) mydesign$variables[nomiU[i]] <- U[, i] } # obj1 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) # L1 <- if (class(obj1)[1] == "try-error") # L0 + 10 # else eval(parse(text = fn.obj), list(x = obj1)) # use.k <- k <- 1 # L1.k <- NULL # L1.k[length(L1.k) + 1] <- L1 # # while (L1 > L0) { # # k <- k + 1 # # use.k <- if (useExp.k) # # 2^(k - 1) # # else k # # psi <- psi.old + (gamma.c/beta.c)/(use.k * h) # # if (!is.null(digits)) # # psi <- round(psi, digits) # # PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) # # U <- (Z - PSI) * (Z > PSI) # # if (pow[1] != 1) # # U <- U^pow[1] # # if(is.null(opz$mydesign)){ # # for (i in 1:ncol(U)) mfExt[nomiU[i]] <- U[, i] # # #obj1 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) # # } else { # # for (i in 1:ncol(U)) mydesign$variables[nomiU[i]] <- U[, i] # # #obj1 <- suppressWarnings(try(eval(call.noV), silent = TRUE)) # # } # # obj1 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) # # L1 <- if (class(obj1)[1] == "try-error") # # L0 + 10 # # else eval(parse(text = fn.obj), list(x = obj1)) # # L1.k[length(L1.k) + 1] <- L1 # # if (1/(use.k * h) < min.step) { # # break # # } # # } if (visual) { flush.console() cat(paste("iter = ", sprintf("%2.0f", it), " min.f = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L1), " k = ", sprintf("%2.3f", use.k), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " est.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } epsilon <- if (conv.psi) max(abs((psi - psi.old)/psi.old)) else (L0 - L1)/(abs(L0) + 0.1) L0 <- L1 #U<-U1 k.values[length(k.values) + 1] <- use.k psi.values[[length(psi.values) + 1]] <- psi dev.values[length(dev.values) + 1] <- L0 id.psi.far <- far.psi(Z, PSI, id.psi.group, TRUE, fc=opz$fc) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if (!all(id.psi.ok)) { if (fix.npsi) { #psi <- psi * ifelse(id.psi.far, 1, 0.9) psi <- psi * ifelse(id.psi.far, 1, attr(id.psi.far, "factor")) PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) id.psi.changed[it]<-TRUE } else { Z <- Z[, id.psi.ok, drop = FALSE] PSI <- PSI[, id.psi.ok, drop = FALSE] rangeZ <- rangeZ[, id.psi.ok, drop = FALSE] limZ <- limZ[, id.psi.ok, drop = FALSE] nomiOK <- nomiOK[id.psi.ok] id.psi.group <- id.psi.group[id.psi.ok] psi.old <- psi.old[id.psi.ok] psi <- psi[id.psi.ok] names(psi) <- id.psi.group if (ncol(PSI) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } } } if (it >= it.max) { id.warn <- TRUE break } } #end while ############################################################################################### if(id.psi.changed[length(id.psi.changed)]) warning(paste("Some psi (", (1:length(psi))[!id.psi.far], ") changed after the last iter.",sep=""), call. = FALSE) if (id.warn) warning(paste("max number of iterations (", it, ") attained", sep = ""), call. = FALSE) attr(psi.values, "dev") <- dev.values attr(psi.values, "k") <- k.values psi <- unlist(tapply(psi, id.psi.group, sort)) names(psi) <- id.psi.group names.coef <- names(coef(obj)) PSI.old <- PSI PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) #if(sd(PSI-PSI.old)>0 || id.psi.changed[length(id.psi.changed)]){ U <- (Z - PSI) * (Z > PSI) colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") if(is.null(opz$mydesign)){ for (i in 1:n.psi1) { mfExt[nomiU[i]] <- U[, i] mfExt[nomiV[i]] <- V[, i] } } else { mydesign$variables[nomiU[i]] <- U[, i] mydesign$variables[nomiV[i]] <- V[, i] } obj <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) L1 <- eval(parse(text = fn.obj), list(x = obj)) #} else { # obj <- obj1 #} nomeCoef <- grep("coef", names(obj), value = TRUE) if(length(nomeCoef)==0){ nomeCoef <- grep("estimate", names(obj), value = TRUE) } if(length(nomeCoef)==0) stop("I can't extract the estimated coefficients") if(is.list(obj[[nomeCoef]])) { obj[[nomeCoef]][[1]] <- c(obj[[nomeCoef]][[1]], rep(0, ncol(V))) names(obj[[nomeCoef]][[1]]) <- names.coef[1:length(obj[[nomeCoef]][[1]])] } else { nomiconV<- c( names(obj[[nomeCoef]]), sub("V", "psi", nomiV)) obj[[nomeCoef]] <- c(obj[[nomeCoef]], rep(0, ncol(V))) #se i coeff includono un altro parametro (ed es., la varianza come per censReg), l'ordine deve essere # rispettato.. mentre "names.coef" #names(obj[[nomeCoef]]) <- names.coef names(obj[[nomeCoef]]) <- nomiconV } obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = id.warn, nomiV = nomiV, nomiU = nomiU, mfExt = mfExt, mydesign=mydesign) if (vincoli) { obj$R <- R obj$R.noV <- R.noV obj$r <- r } return(obj) } segmented/R/davies.test.r0000644000176200001440000003061714415477002015055 0ustar liggesusers#se n=1000: value out of range in 'gammafn' #warning se "lm" con "glm"??? `davies.test` <- function (obj, seg.Z, k = 10, alternative = c("two.sided", "less", "greater"), type=c("lrt","wald"), values=NULL, dispersion=NULL) { # extract.t.value.U<-function(x){ # #estrae il t-value dell'ultimo coeff in un oggetto restituito da lm.fit # #non serve... in realta' viene usata extract.t.value.U.glm() # #x<-x$obj # R<-qr.R(x$qr) # p<-ncol(R) # n<-length(x$fitted.values) # invR<-backsolve(R,diag(p)) # hat.sigma2<-sum(x$residuals^2)/(n-p) # #solve(crossprod(qr.X(x$qr))) # V<-tcrossprod(invR)*hat.sigma2 # tt<-x$coefficients[p]/sqrt(V[p,p]) # tt} #------------------------------------------------------------------------------- daviesLM<-function(y, z, xreg, weights, offs, values, k, alternative){ #Davies test with sigma unknown #-------------- #> gammaA<-function(x){ # x^(x-.5)*exp(-x)*sqrt(2*pi)*(1+1/(12*x)+1/(288*x^2)-139/(51840*x^3) -571/(2488320*x^4)) # } #exp(lgamma()) fn="pmax(x-p,0)" y<-y-offs n<-length(y) n1<-length(values) RIS<-matrix(NA,n1,2) X.psi<-matrix(NA,n,length(fn)) df.res<- n - ncol(xreg) - length(fn) for(i in 1:n1){ for(j in 1:length(fn)) X.psi[,j]<-eval(parse(text=fn[[j]]), list(x=z, p=values[i])) xx1.new<-cbind(X.psi,xreg) #lrt #mu1.new<-xx1.new%*%solve(crossprod(xx1.new), crossprod(xx1.new,y)) #rss1<-sum((y-mu1.new)^2) #sigma2<-if(missing(sigma)) rss1/(n-ncol(xx1.new)) else sigma^2 #RIS[i]<-((rss0-rss1)/ncol(X.psi))/sigma2 #Wald invXtX1<-try(solve(crossprod(sqrt(weights)*xx1.new)), silent=TRUE) if(class(invXtX1)[1]!="try-error"){ hat.b<-drop(invXtX1%*%crossprod(weights*xx1.new,y)) mu1.new<-xx1.new%*%hat.b devE<-sum((weights*(y-mu1.new)^2)) hat.sigma<- sqrt(devE/df.res) RIS[i,1]<-hat.b[1]/(hat.sigma*sqrt(invXtX1[1, 1])) Z<-hat.b[1]/(sqrt(invXtX1[1, 1])) D2<- Z^2 + devE RIS[i,2]<-Z^2/D2 #beta } } valori<-values[!is.na(RIS[,1])] RIS<- RIS[!is.na(RIS[,1]),] V<-sum(abs(diff(asin(RIS[,2]^.5)))) onesided <- TRUE if (alternative == "less") { M <- min(RIS[,1]) best<-valori[which.min(RIS[,1])] p.naiv <- pt(M, df=df.res, lower.tail = TRUE) } else if (alternative == "greater") { M <- max(RIS[,1]) best<-valori[which.max(RIS[,1])] p.naiv <- pt(M, df=df.res, lower.tail = FALSE) } else { M <- max(abs(RIS[,1])) best<-valori[which.max(abs(RIS[,1]))] p.naiv <- pt(M, df=df.res, lower.tail = FALSE) onesided <- FALSE } u<-M^2/((n-ncol(xx1.new))+ M^2) approxx<-V*(((1-u)^((df.res-1)/2))*gamma(df.res/2+.5))/(2*gamma(df.res/2)*pi^.5) p.adj <- p.naiv + approxx p.adj <- ifelse(onesided, 1, 2) * p.adj p.adj<-list(p.adj=p.adj, valori=valori, ris.valori=RIS[,1], best=best) return(p.adj) # M<-max(abs(RIS[,1])) # u<-M^2/((n-ncol(xx1.new))+ M^2) # approxx<-V*(((1-u)^((df.res-1)/2))*gamma(df.res/2+.5))/(2*gamma(df.res/2)*pi^.5) # p.naiv<-pt(-abs(M), df=df.res) #naive p-value # p.adj<-2*(p.naiv+approxx) #adjusted p-value (upper bound) # p.adj<-min(p.adj, 1) # p.adj<-list(p.adj=p.adj, valori=values, ris.valori=RIS[,1], approxx=approxx, p.naiv=p.naiv) # return(p.adj) } #-------------------------------- daviesGLM<-function(y, z, xreg, weights, offs, values=NULL, k, list.glm, alternative){ #Davies test for GLM (via LRT or Wald) est.dispGLM<-function(object){ df.r <- object$df.residual dispersion <- if(object$family$family%in%c("poisson","binomial")) 1 else object$dev/df.r dispersion } extract.t.value.U.glm<-function(object,dispersion,isGLM=TRUE){ #estrae il t-value dell'ultimo coeff in un oggetto restituito da lm.wfit/glm.fit est.disp <- FALSE df.r <- object$df.residual if (is.null(dispersion)) dispersion <- if(isGLM&&(object$family$family%in%c("poisson","binomial"))) 1 else if (df.r > 0) { est.disp <- TRUE if (any(object$weights == 0)) warning("observations with zero weight not used for calculating dispersion") sum((object$weights * object$residuals^2)[object$weights > 0])/df.r } else { est.disp <- TRUE NaN } dispersion<-max(c(dispersion, 1e-10)) p <- object$rank p1 <- 1L:p Qr <- object$qr coef.p <- object$coefficients[Qr$pivot[p1]] covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p)) covmat <- dispersion * covmat.unscaled tvalue <- coef.p[1]/sqrt(covmat[1,1]) #<0.4.0-0 era coef.p[p]/sqrt(covmat[p,p]) tvalue }#end extract.t.value.U.glm #-------------- fn<-"pmax(x-p,0)" dev0<-list.glm$dev0 eta0<-list.glm$eta0 family=list.glm$family type<-list.glm$type dispersion<-list.glm$dispersion n<-length(y) r<-length(fn) n1<-length(values) RIS<-rep(NA, n1) X.psi<-matrix(,n,length(fn)) for(i in 1:n1){ for(j in 1:length(fn)) X.psi[,j]<-eval(parse(text=fn[[j]]), list(x=z, p=values[i])) xreg1<-cbind(X.psi,xreg) o1<-glm.fit(x = xreg1, y = y, weights = weights, offset = offs, family=family, etastart=eta0) dev<-o1$dev if (is.list(o1) && ncol(xreg1)==o1$rank) { RIS[i]<- if(type=="lrt") sqrt((dev0-dev)/est.dispGLM(o1))*sign(o1$coef[1]) else extract.t.value.U.glm(o1,dispersion) } } valori<-values[!is.na(RIS)] ris.valori<-RIS[!is.na(RIS)] V<-sum(abs(diff(ris.valori))) #-----Questo e' se il test di riferimento e' una \chi^2_r. (Dovresti considerare il LRT non segnato) #V<-sum(abs(diff(sqrt(RIS))))#nota sqrt #M<- max(RIS) #approxx<-(V*(M^((r-1)/2))*exp(-M/2)*2^(-r/2))/gamma(r/2) #p.naiv<-1-pchisq(M,df=r) #naive p-value #p.adj<-min(p.naiv+approxx,1) #adjusted p-value (upper bound) onesided <- TRUE if (alternative == "less") { M <- min(ris.valori) best<-valori[which.min(ris.valori)] p.naiv <- pnorm(M, lower.tail = TRUE) } else if (alternative == "greater") { M <- max(ris.valori) best<-valori[which.max(ris.valori)] p.naiv <- pnorm(M, lower.tail = FALSE) } else { M <- max(abs(ris.valori)) best<-valori[which.max(abs(ris.valori))] p.naiv <- pnorm(M, lower.tail = FALSE) onesided <- FALSE } approxx<-V*exp(-(M^2)/2)/sqrt(8*pi) p.adj <- p.naiv + approxx p.adj <- ifelse(onesided, 1, 2) * p.adj p.adj<-list(p.adj=p.adj, valori=valori, ris.valori=ris.valori, best=best) return(p.adj) } #------------------------------------------------------------------------------- if(!inherits(obj, "lm")) stop("A 'lm', 'glm', or 'segmented' model is requested") if(missing(seg.Z)){ if(inherits(obj, "segmented") && length(obj$nameUV$Z)==1) seg.Z<- as.formula(paste("~", obj$nameUV$Z )) if(!inherits(obj, "segmented") && length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) } else { #if(class(seg.Z)!="formula") stop("'seg.Z' should be an one-sided formula") #if(!is(seg.Z,"formula")) stop("'seg.Z' should be an one-sided formula") if(!inherits(seg.Z,"formula")) stop("'seg.Z' should be an one-sided formula") } if(k<=1) stop("k>1 requested! k>=10 is recommended") if(k<10) warnings("k>=10 is recommended") alternative <- match.arg(alternative) type <- match.arg(type) #if(length(all.vars(seg.Z))>1) warning("multiple segmented variables ignored in 'seg.Z'",call.=FALSE) name.Z <- all.vars(seg.Z) if(length(name.Z)>1) stop("Only a single segmented variable can be specified in 'seg.Z' ") isGLM<-"glm"%in%class(obj) ##is(obj, "glm") Call<-mf<-obj$call mf$formula<-formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) formulaOrig<-formula(obj) if(class(obj)[1]=="segmented"){ #10/12/20. se segmented e' stato chiamato con un oggetto m (cioe m<-lm()), allora digitando eval(obj$call$obj) #restituiva 'm' ottenuto dal comando di sopra.. match(c("formula",.. #sembra che mettendo eval(obj$call$obj, parent.frame()) o anche eval(obj$call$obj, envir=0) riesca ad ignorare #l'oggetto m che si trova nell'ambiente della funzione if(!is.null(eval(obj$call$obj, parent.frame())$call$data)) mf$data <- eval(obj$call$obj, parent.frame())$call$data mf$formula<-update.formula(mf$formula,paste("~.-",paste(obj$nameUV$V, collapse="-"))) for(i in 1:length(obj$nameUV$U)) assign(obj$nameUV$U[i], obj$model[,obj$nameUV$U[i]], envir=parent.frame()) formulaOrig<-update.formula(formulaOrig, paste("~.-",paste(obj$nameUV$V, collapse="-"))) } mf <- eval(mf, parent.frame()) weights <- as.vector(model.weights(mf)) offs <- as.vector(model.offset(mf)) if(!is.null(Call$weights)){ #"(weights)"%in%names(mf) names(mf)[which(names(mf)=="(weights)")]<-all.vars(Call$weights) #as.character(Call$weights) #aggiungere??? # mf["(weights)"]<-weights } mt <- attr(mf, "terms") interc<-attr(mt,"intercept") y <- model.response(mf, "any") XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf, obj$contrasts) n <- nrow(XREG) if (is.null(weights)) weights <- rep(1, n) if (is.null(offs)) offs <- rep(0, n) name.Z <- all.vars(seg.Z) Z<-XREG[,match(name.Z, colnames(XREG))] if(!name.Z %in% names(coef(obj))) XREG<-XREG[,-match(name.Z, colnames(XREG)),drop=FALSE] list.glm<-list(dev0=obj$dev, eta0=obj$linear.predictor, family=family(obj), type=type, dispersion=dispersion) if(is.null(values)) values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) #values<-seq(min(z), max(z), length=k+2) #values<-values[-c(1,length(values))] if(class(obj)[1]=="lm" || identical(class(obj),c("segmented","lm")) ) { if(n<=300) { rr<-daviesLM(y=y, z=Z, xreg=XREG, weights=weights, offs=offs, values=values, k=k, alternative=alternative) } else { list.glm$family<-gaussian() list.glm$type<-"wald" rr<-daviesGLM(y=y, z=Z, xreg=XREG, weights=weights, offs=offs, values=values, k=k, list.glm=list.glm, alternative=alternative) } } if(identical(class(obj),c("glm","lm")) || identical(class(obj),c("segmented","glm","lm"))) rr<-daviesGLM(y=y, z=Z, xreg=XREG, weights=weights, offs=offs, values=values, k=k, list.glm=list.glm, alternative=alternative) best<-rr$best p.adj<-rr$p.adj valori<-rr$valori ris.valori<-rr$ris.valori if(is.null(obj$family$family)) { famiglia<-"gaussian" legame<-"identity"} else { famiglia<-obj$family$family legame<-obj$family$link } out <- list(method = "Davies' test for a change in the slope", # data.name=paste("Model = ",famiglia,", link =", legame, # "\nformula =", as.expression(formulaOrig), # "\nsegmented variable =", name.Z), data.name=paste("formula =", as.expression(formulaOrig), ", method =", obj$call[[1]] , "\nmodel =",famiglia,", link =", legame, if(isGLM) paste(", statist =", type) else NULL , "\nsegmented variable =", name.Z), statistic = c("'best' at" = best), parameter = c(n.points = length(valori)), p.value = min(p.adj,1), alternative = alternative, process=cbind(psi.values=valori, stat.values=ris.valori)) class(out) <- "htest" return(out) } segmented/R/seg.control.R0000644000176200001440000000146714610435354015023 0ustar liggesusers`seg.control` <- function(n.boot=10, display=FALSE, tol=1e-5, it.max=30, fix.npsi=TRUE, K=10, quant=FALSE, maxit.glm=NULL, h=1.25, break.boot=5, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=TRUE, seed=NULL, fn.obj=NULL, digits=NULL, alpha=NULL, fc=.95, check.next=TRUE, tol.opt=NULL, fit.psi0=NULL, eta=NULL,min.nj=2){ list(toll=tol,it.max=it.max,visual=display,stop.if.error=NULL, K=K,last=TRUE, maxit.glm=maxit.glm,h=h,n.boot=n.boot, size.boot=size.boot, gap=FALSE, jt=jt, break.boot=break.boot, nonParam=nonParam, random=random, pow=c(1,1), seed=seed, quant=quant, fn.obj=fn.obj, digits=digits, conv.psi=FALSE, alpha=alpha, fix.npsi=fix.npsi, fc=fc, check.next=check.next, tol.opt=tol.opt, fit.psi0=fit.psi0, eta=eta,min.n=min.nj)} segmented/R/seg.glm.fit.boot.r0000644000176200001440000002732214757621127015712 0ustar liggesusersseg.glm.fit.boot<-function(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random: if TRUE, when the algorithm fails in minimizing f(y), random numbers are used as final estimates. # If the algorithm fails in minimizing f(y*), the final estimates (to be used as starting values with # the original responses y) *always* are replaced by random numbers (regardless of the random argument) #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore del primo tentativo #show.history() se c'e' stato boot restart potrebbe produrre un grafico 2x1 di "dev vs it" and "no.of distinct vs it" #-------- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]] psi.values<-lista[[2]] if(any(is.na(psi.values[[1]]))) {#se la 1 componente e' NA (fino alla versione 2.0-3 era cosi'... perche' in dev.values c'erano # anche i valori relativi al modello senza psi.. ) dev.values<-dev.values[-1] #remove the 1st one referring to model without psi psi.values<-psi.values[-1] } dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(dev.no.gap=dev.ok, psi=psi.ok) r } #------------- #browser() if(is.null(opz$seed)){ mY <- mean(as.numeric(y)) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } if(!nonParam){ nonParam<-TRUE warning("`nonParam' set to TRUE for segmented glm..", call.=FALSE) } visualBoot<-opz$visualBoot #opz.boot<-opz #opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <- 0 #opz1 viene usata solo quando diversi tentativi di stimare il modello falliscono... n<-length(y) alpha<-opz$alpha #limZ <- apply(Z, 2, quantile, names=FALSE, probs=c(alpha,1-alpha)) limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1],alpha[2])) else opz$limZ rangeZ <- if(is.null(opz$rangeZ)) apply(Z, 2, range) else opz$rangeZ #rangeZ <- apply(Z, 2, range) #serve sempre #13/03/24: #In realta' non ci sarebbe neanche bisogno di questo fit:... Comunque, facciamolo in economia.. opz0 <- opz opz0$maxit.glm <- 2 o0<-try(suppressWarnings(seg.glm.fit(y, XREG, Z, PSI, w, offs, opz0)), silent=TRUE) if(!is.list(o0)) { o0<-try(suppressWarnings(seg.glm.fit(y, XREG, Z, opz0$PSI1, w, offs, opz0, return.all.sol=FALSE)), silent=TRUE) } if(!is.list(o0)) { o0<- suppressWarnings(seg.glm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=TRUE)) o0<-extract.psi(o0) if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$dev.no.gap eta0 <- o0$eta0 if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("semiparametric boot requires reasonable fitted values. Try a different psi or use nonparam boot") if(random) { est.psi00<-est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0<-try(suppressWarnings(seg.glm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) ss00<-o0$dev.no.gap eta0 <- o0$eta0 } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 eta0 <- NULL } } n.intDev0<-nchar(strsplit(as.character(ss00), options()$OutDec)[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z # if(visualBoot) cat(0, " ", formatC(opz$dev0, 3, format = "f"),"", "(No breakpoint(s))", "\n") count.random<-0 alpha<-.1 n.boot.rev<- 3 #3 o 4? #===================================== for(k in seq(n.boot)){ opz$eta0 <- eta0 ##se gli *ultimi* n.boot.rev valori di ss sono uguali, cambia i psi... diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) #if(length(na.omit(diff(all.selected.ss[1:n.boot.rev])))==(n.boot.rev-1) && all(round(diff(all.selected.ss[1:n.boot.rev]),6)==0)){ if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi<-ifelse(abs(qpsi-.5)<.1,alpha,qpsi) alpha<- 1-alpha est.psi0<-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) } ########################### 25/7/24 ##### #spostato sotto #est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(seg.glm.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], offs[id], opz)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(seg.glm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) #opz$it.max<-opz$it.max+1 opz$Nboot <- k o<-try(suppressWarnings(seg.glm.fit(y, XREG, Z.orig, PSI, w, offs, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi00<-est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o<-try(suppressWarnings(seg.glm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) count.random<-count.random+1 } if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$dev.no.gap if(o$dev.no.gap<=ifelse(is.list(o0), o0$dev.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$dev.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) eta0 <- o0$eta0 } est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) if (visualBoot) { flush.console() #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("boot sample = ", sprintf("%2.0f",k), #" opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$dev.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " opt.dev = ", sprintf("%1.5f", as.numeric(strsplit(format(o0$dev.no.gap, scientific=TRUE), "e")[[1]][1])), " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) #browser() if(is.null(o0$obj)){ #quando vengono restituiti psi troppo vicini e l'SE non si puo' calcolare, possiamo distanziarli.. #Pero' il processo deve essere esteso nel caso in cui ci sono 3 psi vicini.. min.n <- opz$min.n-1 if(min.n>1){ min1<- function(x, k=min.n-1){ for(i in 1:k) x<-x[-which.min(x)] min(x) } max1<-function(x,k=min.n-1){ for(i in 1:k) x<-x[-which.max(x)] max(x) } } else { min1<-min max1<-max } npsi <- tapply(opz$id.psi.group, opz$id.psi.group, length) nomiAll <- colnames(rangeZ) #rep(opz$nomiSeg, npsi) nomiSeg <- unique(nomiAll) newPsi<-vector("list", length(npsi) ) for(.j in 1:length(npsi)){ psi.j <- sort(est.psi0[opz$id.psi.group==.j]) #psi della stessa variabile segmented id <- nomiSeg[.j]==nomiAll Z.ok <- unique(Z[, id, drop=FALSE][,1]) m.j <- min(limZ[1,id]) M.j <- max(limZ[2,id]) #h=1/1.05 for(.k in 1:length(psi.j)){ id.group<-cut(Z.ok, c(m.j-10^8, psi.j, M.j+10^8), labels=FALSE) n.j<-tabulate(id.group)#<=min.n #per ogni psi calcola il min e il max dei segmenti prima e dopo psi. #se questi segmenti hanno min.n osservazioni considera u min e max fittizzi per evitare che il nuovo psi #modificato porti a segmenti con bassa numerosita'.. M.j.k<- if(n.j[.k]>0) max1(Z.ok[id.group==.k]) -10^6*(n.j[.k]<=min.n) else -10^6*(n.j[.k]<=min.n) m.j.k<- if(n.j[.k+1]>0) min1(Z.ok[id.group==.k+1])+10^6*(n.j[.k+1]<=min.n) else 10^6*(n.j[.k]<=min.n) psi.j[.k]<- psi.j[.k] + ifelse(abs(M.j.k-psi.j[.k])1) { var.diff<-FALSE warning(" 'var.diff' set to FALSE with multiple segmented variables", call.=FALSE) } #browser() nomiU<-object$nameUV$U nomiV<-object$nameUV$V nomiPsi<- sub("V", "psi", nomiV) idU <-match(nomiU,names(coef(object)[!is.na(coef(object))])) idV <-match(nomiPsi,names(coef(object)[!is.na(coef(object))])) #beta.c<- coef(object)[nomiU] #per metodo default.. ma serve???? if("stepmented.default" == as.character(object$call)[1]){ summ <- c(summary(object, ...), object["psi"]) summ[c("it","epsilon")]<-object[c("it","epsilon")] #v<-try(vcov(object), silent=TRUE) #if(class(v)!="try-error") v<-sqrt(diag(v)) return(summ) } #browser() VAR <- if(!is.null(.vcov)) .vcov else vcov(object,...) se <- sqrt(diag(VAR)) object$psi[,"St.Err"] <- se[nomiPsi] #if("lm"%in%class(object) && !"glm"%in%class(object)){ if(inherits(object, "lm") && !inherits(object, "glm")){ #object$rank include i psi, mentre object$qr$rank no. #Affinche' summary.lm() funzioni, e' necessario che object$rank non tenga conto del numero di psi.. #quindi qua (e anche nei lm sopra) modifichiamo il valore di rank.. #NB: questo problema NON si presenta se sono state usate le funzioni stepmented.* in cui anche object$qr$rank tiene gia' conto # dei psi (perche' hanno stimato il modello con le variabili W per cercare di ottenere una qualche misura del se) object$rank <- object$qr$rank #object$rank - nrow(object$psi) # summ <- c(suppressWarnings(summary.lm(object, ...)), object["psi"]) # summ$Ttable <-summ$coefficients # b <- coef(object, FALSE) # b<-b[b!=0] # summ <- list(Ttable=matrix(NA, length(b), 4, dimnames = list(names(b),c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))), # psi=object[["psi"]], sigma=sigma(object), call=object$call, df=c(length(coef(object)), object$df.residual, length(coef(object)) ) ) # summ$Ttable[,"Estimate"] <- b # summ$Ttable[,"Std. Error"] <- se[1:length(b)] #se[rownames(summ$coefficients)] # summ$Ttable[,"t value"] <- summ$Ttable[,"Estimate"]/summ$Ttable[,"Std. Error"] # summ$Ttable[,"Pr(>|t|)"] <- 2*pt(abs(summ$Ttable[,"t value"]), df=object$df.residual, lower.tail = FALSE) # summ$Ttable[,"Estimate"]/summ$Ttable[,"Std. Error"] summ <- c(suppressWarnings(summary.lm(object, ...)), object["psi"]) summ$Ttable <- summ$coefficients summ$Ttable[, "Std. Error"] <- se[rownames(summ$coefficients)] summ$Ttable[, "t value"] <- summ$Ttable[, "Estimate"]/summ$Ttable[,"Std. Error"] summ$Ttable[, "Pr(>|t|)"] <- 2 * pt(abs(summ$Ttable[,"t value"]), df = object$df.residual, lower.tail = FALSE) if(var.diff){ stop("not allowed") # modifica gli SE Qr <- object$qr p <- object$rank #n.parametri stimati p1 <- 1L:p inv.XtX <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) X <- qr.X(Qr,FALSE) attr(X, "assign") <- NULL K<-length(unique(object$id.group)) #n.gruppi (=n.psi+1) dev.new<-tapply(object$residuals, object$id.group, function(.x){sum(.x^2)}) summ$df.new<-tapply(object$residuals, object$id.group, function(.x){(length(.x)-eval(parse(text=p.df)))}) if(any(summ$df.new<=0)) stop("nonpositive df when computig the group-specific variances.. reduce 'p.df'?", call. = FALSE) summ$sigma.new<-sqrt(dev.new/summ$df.new) sigma.i<-rowSums(model.matrix(~0+factor(object$id.group))%*%diag(summ$sigma.new)) var.b<-inv.XtX%*%crossprod(X*sigma.i)%*%inv.XtX #sqrt(rowSums((X %*% V) * X)) dimnames(var.b)<-dimnames(summ$cov.unscaled) summ$cov.var.diff<-var.b summ$Ttable[,2]<-sqrt(diag(var.b)) summ$Ttable[,3]<-summ$Ttable[,1]/summ$Ttable[,2] summ$Ttable[,4]<- 2 * pt(abs(summ$Ttable[,3]),df=object$df.residual, lower.tail = FALSE) dimnames(summ$Ttable) <- list(names(object$coefficients)[Qr$pivot[p1]], c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) } summ$Ttable[idU,4]<-NA if(all(!is.na(idV))) summ$Ttable<-summ$Ttable[-idV,] summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] summ$n.boot<-length(na.omit(object$psi.history$all.ss)) summ$var.diff<-var.diff summ$short<-short summ$psi.rounded <- object$psi.rounded class(summ) <- c("summary.stepmented", "summary.lm") return(summ) } if(inherits(object, "glm")){ #browser() #23/4/24 mi sono reso conto che con gaussian GLM viene stampato "t-value" e non z-value... # Per cui piuttostoche i nomi, metto gli indici delle colonne.. object$rank <- object$qr$rank summ <- c(suppressWarnings(summary.glm(object, ...)), object["psi"]) summ$Ttable <-summ$coefficients summ$Ttable[,"Std. Error"] <- se[rownames(summ$coefficients)] # summ$Ttable[,"z value"] <- summ$Ttable[,"Estimate"]/summ$Ttable[,"Std. Error"] # summ$Ttable[,"Pr(>|z|)"] <- 2*pnorm(abs(summ$Ttable[,"z value"]), lower.tail = FALSE) # summ$Ttable[,"Estimate"]/summ$Ttable[,"Std. Error"] summ$Ttable[,3] <- summ$Ttable[,"Estimate"]/summ$Ttable[,"Std. Error"] summ$Ttable[,4] <- if(object$family$family=="gaussian") 2*pt(abs(summ$Ttable[,3]), df=object$df.residual, lower.tail = FALSE) else 2*pnorm(abs(summ$Ttable[,3]), lower.tail = FALSE) # summ$Ttable[,"Estimate"]/summ$Ttable[,"Std. Error"] summ$Ttable[idU,4]<-NA if(all(!is.na(idV))) summ$Ttable<-summ$Ttable[-idV,] summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] summ$n.boot<-length(na.omit(object$psi.history$all.ss)) summ$short<-short summ$psi.rounded <- object$psi.rounded class(summ) <- c("summary.stepmented", "summary.glm") return(summ) } if("Arima"%in%class(object)){ stop("stepmented arima model not allowed") #da controllare coeff<-object$coef v<-sqrt(diag(object$var.coef)) Ttable<-cbind(coeff[-idV],v[-idV],coeff[-idV]/v[-idV]) colnames(Ttable)<-c("Estimate","Std. Error","t value") object$Ttable<-Ttable object$short<-short summ<-object summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] summ$n.boot<-length(na.omit(object$psi.history$all.ss)) summ$psi.rounded <- object$psi.rounded class(summ) <- c("summary.stepmented", "summary.Arima") return(summ) } } segmented/R/pscore.test.R0000644000176200001440000006361714533572475015056 0ustar liggesuserspscore.test <- function(obj, seg.Z, k = 10, alternative = c("two.sided", "less", "greater"), values=NULL, dispersion=NULL, df.t=NULL, more.break=FALSE, n.break=1, only.term=FALSE, break.type=c("break","jump")) { #------------------------------------------------------------------------------- test.Sc2<-function(y, z, xreg, sigma=NULL, values=NULL, fn="pmax(x-p,0)", df.t="Inf", alternative, w=NULL, offs=NULL, nbreaks=1, ties.ok=FALSE, only.term=FALSE){ #xreg: la matrice del disegno del modello nullo. Se mancante viene assunta solo l'intercetta. #Attenzione che se invXtX e xx vengono entrambe fornite, non viene fatto alcun controllo #invXtX: {X'X}^{-1}. if missing it is computed from xreg #sigma: the sd. If missing it is computed from data (under the *null* model) #values: the values with respect to ones to compute the average term. If NULL 10 values from min(z) to max(z) are taken. if(!is.null(offs)) y<-y-offs n<-length(y) if(missing(xreg)) xreg<-cbind(rep(1,n)) id.ok<-complete.cases(cbind(y,z,xreg)) y<-y[id.ok] z<-z[id.ok] xreg<-xreg[id.ok,,drop=FALSE] idv <- which(apply(xreg,2,sd)!=0) xreg[,idv]<-scale(xreg[,idv]) n<-length(y) k=ncol(xreg) #per un modello ~1+x if(is.null(values)) values<-seq(min(z), max(z), length=10) n1<-length(values) PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) #(era X2) matrice di valori di psi if(is.matrix(z)) { X1<-matrix(z[,1], nrow=n, ncol=n1, byrow=FALSE) X2<-matrix(z[,2], nrow=n, ncol=n1, byrow=FALSE) X<-eval(parse(text=fn), list(x=X1, y=X2, p=PSI)) #X<-pmax(X1-X2,0) pmaxMedio<-rowMeans(X) } else { X1<-matrix(z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z if(length(fn)<=1){ X<-eval(parse(text=fn), list(x=X1, p=PSI)) #X<-pmax(X1-PSI,0) pmaxMedio <- rowMeans(X) if(nbreaks>1){ XX<-sapply(1:length(values), function(.x) X[,-(1:.x), drop=FALSE]) XX<-do.call("cbind", XX) if(ties.ok) XX<-cbind(X, XX) pmaxMedio2 <- rowMeans(XX) pmaxMedio <- cbind(pmaxMedio, pmaxMedio2) } } else { pmaxMedio<-matrix(NA,n,length(fn)) #list.X<-vector("list", length=length(fn)) for(j in 1:length(fn)){ #list.X[[j]]<-eval(parse(text=fn[j]), list(x=X1, p=PSI)) X<-eval(parse(text=fn[[j]]), list(x=X1, p=PSI)) pmaxMedio[,j]<-rowMeans(X) } } } if(only.term) return(pmaxMedio) if(is.null(w)) { invXtX<-solve(crossprod(xreg)) IA<- -xreg%*%tcrossprod(invXtX, xreg) .a1<-1+diag(IA) id<-col(IA)==row(IA) IA[id]<-.a1 pIA<- drop(crossprod(pmaxMedio,IA)) sc<- drop(pIA %*% y) v.s<- pIA %*% pmaxMedio #pIA%*% pmaxMedio #tcrossprod(pIA, pmaxMedio) } else { invXtX<-solve(crossprod(sqrt(w)*xreg)) #I-hat matrix #dovrebbe essere diag(n) - xreg%*%tcrossprod(invXtX, xreg*w) ma non funziona se n e' grande (12000) IA<- xreg%*%tcrossprod(invXtX, xreg*w) .a1<-1-diag(IA) id<-col(IA)==row(IA) IA[id]<-.a1 pIA<- drop(crossprod(pmaxMedio*w,IA)) #sc<-t(pmaxMedio*w) %*% IA %*% y sc<- drop(pIA %*% y) pIA<-crossprod(pmaxMedio*w,IA/sqrt(w)) v.s<- pIA %*% (pmaxMedio*w) #crossprod(pIA, pmaxMedio*w) #t(pmaxMedio*w) %*% crossprod(t(IA)/sqrt(w))%*%(w*pmaxMedio) } ris<-if(nbreaks==1) drop(sc/(sigma*sqrt(v.s))) else drop(crossprod(sc,solve(v.s,sc)))/(sigma^2) #if(length(fn)<=1 && cadj) ris<- sign(ris)*sqrt((ris^2)*(1-(3-(ris^2))/(2*n))) #passa alla F.. df.t<-eval(parse(text=df.t)) p2<- if(nbreaks==1) 2*pt(abs(ris), df=df.t, lower.tail=FALSE) else pchisq(ris, df=nbreaks, lower.tail=FALSE)#pf((ris/nbreaks)/(sigma^2), df1=nbreaks, df2=df.t, lower.tail =FALSE)# pvalue<-switch(alternative, less = pt(ris, df=df.t, lower.tail =TRUE) , greater = pt(ris, df=df.t, lower.tail =FALSE) , two.sided = p2) #pvalue<- 2*pt(abs(ris), df=df.t, lower.tail =FALSE) r<-c(ris, pvalue)#, pmaxMedio) r #return(pmaxMedio) } #------------------------------------------------------------------------------- scGLM<-function(y, z, xreg, family, values = NULL, size=1, weights.var, fn="pmax(x-p,0)", alternative=alternative){ #score test for GLM (NON USATO!) #size (only if family=binomial()) #weights.var: weights to be used for variance computations. If missing the weights come from the null fit output<-match.arg(output) n<-length(y) if(missing(xreg)) xreg<-cbind(rep(1,n)) id.ok<-complete.cases(cbind(y,z,xreg)) y<-y[id.ok] z<-z[id.ok] xreg<-xreg[id.ok,,drop=FALSE] n<-length(y) if(family$family=="poisson") size=1 if(length(size)==1) size<-rep(size,n) yN<-y/size k=ncol(xreg) #per un modello ~1+x if(is.null(values)) values<-seq(min(z), max(z), length=10) n1<-length(values) PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) #(era X2) matrice di valori di psi X1<-matrix(z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z X<-eval(parse(text=fn), list(x=X1, p=PSI)) #X<-pmax(X1-X2,0) pmaxMedio<-rowMeans(X) o<-glm.fit(yN, x=xreg, weights=size, family=family) r<-y-(o$fitted*size) sc<-drop(crossprod(r, pmaxMedio)) # if(output=="unst.score") return(drop(sc)) p <- o$rank Qr <- o$qr COV <- chol2inv(Qr$qr[1:p, 1:p, drop = FALSE]) #vcov(glm(y~x, family=poisson)) A<-xreg%*%COV%*%crossprod(xreg, diag(o$weights)) h<- drop(tcrossprod(pmaxMedio, diag(n)- A)) if(missing(weights.var)) weights.var<-o$weights v.s<- drop(crossprod(h*sqrt(weights.var))) #t(h)%*%diag(exp(lp))%*%h ris<-if(length(fn)<=1) sc/sqrt(v.s) else drop(crossprod(sc,solve(v.s,sc))) # if(output=="score") return(drop(ris)) pvalue<- switch(alternative, less = pnorm(ris, lower.tail =TRUE) , greater = pnorm(ris, lower.tail =FALSE) , two.sided = 2*pnorm(abs(ris), lower.tail =FALSE) ) # pvalue<- if(length(fn)<=1) 2*pnorm(abs(ris), lower.tail =FALSE) else pchisq(ris,df=length(fn), lower.tail =FALSE) # NB: se calcoli ris<-drop(t(sc)%*%solve(v.s,sc))/(length(fn)*sigma^2) devi usare pf(ris,df1=length(fn),df2=df.t, lower.tail =FALSE) return(c(ris, pvalue)) } #---------------------------------------------------- if(!inherits(obj, "lm")) stop("A '(g)lm', or 'segmented-(g)lm' model is requested") break.type<-match.arg(break.type) #if(!(break.type %in% 1:2)) stop(" 'break.type' should be 1 or 2") fn=if(break.type=="break") "pmax(x-p,0)" else "1*(x>p)" ties.ok=FALSE if(missing(seg.Z)){ if(inherits(obj, "segmented") && length(obj$nameUV$Z)==1) seg.Z<- as.formula(paste("~", obj$nameUV$Z )) if(!inherits(obj, "segmented") && length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) } else { #if(class(seg.Z)!="formula") stop("'seg.Z' should be an one-sided formula") #if(!is(seg.Z,"formula")) stop("'seg.Z' should be an one-sided formula") if(!inherits(seg.Z,"formula")) stop("'seg.Z' should be an one-sided formula") } if(any(c("$","[") %in% all.names(seg.Z))) stop(" '$' or '[' not allowed in 'seg.Z' ") name.Z <- all.vars(seg.Z) if(length(name.Z)>1) stop("Only one variable can be specified in 'seg.Z' ") nomiU.term<-grep(name.Z, obj$nameUV$U, value=TRUE) #termini U per relativi alla variabile nomeZ #se length(nomiU.term)==0 la variabile in seg.Z non e' nel modello (si sta assumendo che la left slope ==0) if(length(nomiU.term)==0 && more.break) warning(paste("variable", name.Z, "has no breakpoint.. 'more.break=TRUE' ignored"), call.=FALSE) #browser() if(k<=1) stop("k>1 requested! k>=10 is recommended") if(k<10) warnings("k>=10 is recommended") alternative <- match.arg(alternative) if(!n.break%in%1:2) stop(" 'n.break' should be 1 or 2", call. = FALSE) if(n.break==2) alternative<-"two.sided" isGLM<-"glm"%in%class(obj) #is(obj, "glm") #================================================================== if(isGLM){ if (is.null(dispersion)) dispersion <- summary.glm(obj)$dispersion if(inherits(obj, "segmented")){ #============se e' GLM segmented if(more.break && !name.Z %in% obj$nameUV$Z) stop(" 'more.break' is meaningful only if at least 1 breakpoint has been estimated") Call<-mf<-obj$orig.call #del GLM formulaSeg <-formula(obj) #contiene le variabili U e le psi formulaNull<- update.formula(formulaSeg, paste("~.-",paste(obj$nameUV$V, collapse="-"))) #rimuovi le variabili "psi.." #se length(nomiU.term)==0 la variabile in seg.Z non e' nel modello (si sta assumendo che la left slope ==0) if(!more.break && length(nomiU.term)>0){ if(length(nomiU.term)>1) stop(" 'more.break=FALSE' does not work with multiple breakpoints referring to the same variable specified in seg.Z", call. = FALSE) formulaNull <-update.formula(formulaNull,paste("~.-",paste(nomiU.term, collapse="-"))) #non contiene U del termine di interesse, MA contiene eventuali altri termini U } mf$formula<-formulaNull mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #se il modello inziale non contiene seg.Z.. if(!is.null(obj$orig.call$offset) || !is.null(obj$orig.call$weights) || !is.null(obj$orig.call$subset)){ mf$formula <- update.formula(mf$formula, paste(".~.+", paste(c(all.vars(obj$orig.call$offset), all.vars(obj$orig.call$weights), all.vars(obj$orig.call$subset)), collapse = "+"))) } m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") #browser() #for(i in 1:length(obj$nameUV$U)) assign(obj$nameUV$U[i], obj$model[,obj$nameUV$U[i]], envir=parent.frame()) #mf <- eval(mf, parent.frame()) mf$data <- quote(obj$model) mf <- eval(mf) mt <- attr(mf, "terms") #interc<-attr(mt,"intercept") y <- model.response(mf, "any") X0<- if (!is.empty.model(mt)) model.matrix(mt, mf) Z<-X0[ ,match(name.Z, colnames(X0))] n<-length(Z) if(is.null(values)) values<-seq(min(Z), max(Z), length=k) #values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) n1<-length(values) X1<-matrix(Z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) X<-eval(parse(text=fn), list(x=X1, p=PSI)) # fn t.c. length(fn)<=1; fn="pmax(x-p,0)" definita sopra.. pmaxMedio <-as.matrix(rowMeans(X)) if(n.break>1){ XX<-sapply(1:length(values), function(.x) X[,-(1:.x), drop=FALSE]) XX<-do.call("cbind", XX) if(ties.ok) XX<-cbind(X, XX) pmaxMedio2 <- rowMeans(XX) pmaxMedio <- cbind(pmaxMedio, pmaxMedio2) } if(only.term) return(pmaxMedio) #necessario salvare pmaxMedio in mf??? mf$pmaxMedio<-pmaxMedio Call$formula<- formulaNull Call$data<-quote(mf) obj0<-eval(Call) # pos<-1 # assign("mf", mf, envir=as.environment(pos)) # r<-as.numeric(as.matrix(add1(obj0, ~.+pmaxMedio, scale=dispersion, test="Rao"))[2,c("scaled Rao sc.", "Pr(>Chi)")]) ws <- sqrt(obj0$weights[obj0$weights>0]) res<-obj0$residuals[obj0$weights>0] zw <- ws * res A <- qr.resid(obj0$qr, ws * pmaxMedio[obj0$weights>0,]) u<-t(A)%*% zw v<-crossprod(A) r<-if(n.break==1) u/sqrt(v*dispersion) else t(u)%*% solve(v) %*%u/dispersion #r<- (colSums(as.matrix(A * zw))/sqrt(colSums(as.matrix(A * A)))/sqrt(dispersion)) p2<- if(n.break==1) 2*pnorm(abs(r), lower.tail=FALSE) else pchisq(r, df=n.break, lower.tail=FALSE) pvalue<- switch(alternative, less = pnorm(r, lower.tail =TRUE) , greater = pnorm(r, lower.tail =FALSE) , two.sided = p2) r<-c(r, pvalue) # ================fine se e' GLM+segmented. } else { #=================Se e' GLM NON segmented Call<-mf<-obj$call mf$formula<-formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") formulaNull <- formula(obj) mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #aggiunto 12/03/18 (non trovava la variable weights perche' era salvata in mf come "(weights)") if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mf$formula <-update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+"))) } mf <- eval(mf, parent.frame()) mt <- attr(mf, "terms") XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf) n <- nrow(XREG) Z<- XREG[,match(name.Z, colnames(XREG))] if(!name.Z %in% names(coef(obj))) XREG<-XREG[,-match(name.Z, colnames(XREG)),drop=FALSE] #questo preso da LM...funziona? # Call<-mf<-obj$call # mf$formula<-formula(obj) # m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) # mf <- mf[c(1, m)] # mf$drop.unused.levels <- TRUE # mf[[1L]] <- as.name("model.frame") # formulaNull <- formula(obj) # mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) # mf<-eval(mf) #parent.frame()? # Z<- mf[[name.Z]] # y <- model.response(mf, "any") # weights <- as.vector(model.weights(mf)) # offset <- as.vector(model.offset(mf)) # #XREG <-model.matrix(formula(obj), data=build.mf(obj)) # #XREG<-XREG[,colnames(XREG0)] # XREG<- model.matrix(update.formula(formula(obj),~.), mf) # # if(length(weights)>0) formulaNull <- update.formula(formulaNull, # paste(".~.+ weights(", obj$call$weights, ")",sep="")) #====================================================================== if(is.null(values)) values<-seq(min(Z), max(Z), length=k) #values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) n1<-length(values) PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) #(era X2) matrice di valori di psi X1<-matrix(Z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z X<-eval(parse(text=fn), list(x=X1, p=PSI)) # fn t.c. length(fn)<=1 pmaxMedio<-as.matrix(rowMeans(X)) if(n.break>1){ XX<-sapply(1:length(values), function(.x) X[,-(1:.x), drop=FALSE]) XX<-do.call("cbind", XX) if(ties.ok) XX<-cbind(X, XX) pmaxMedio2 <- rowMeans(XX) pmaxMedio <- cbind(pmaxMedio, pmaxMedio2) } if(only.term) return(pmaxMedio) #r<-as.numeric(as.matrix(add1(update(obj, data=mf), ~.+pmaxMedio, scale=dispersion, test="Rao"))[2,c("scaled Rao sc.", "Pr(>Chi)")]) #Call$formula<- formulaNull #Call$data<-quote(mf) #obj0<-eval(Call) ws <- sqrt(obj$weights[obj$weights>0]) res<-obj$residuals[obj$weights>0] zw <- ws * res A <- qr.resid(obj$qr, ws * pmaxMedio[obj$weights>0,]) u<-t(A)%*% zw v<-crossprod(A) r<-if(n.break==1) u/sqrt(v*dispersion) else t(u)%*% solve(v) %*%u/dispersion #r<- (colSums(as.matrix(A * zw))/sqrt(colSums(as.matrix(A * A)))/sqrt(dispersion)) p2<- if(n.break==1) 2*pnorm(abs(r), lower.tail=FALSE) else pchisq(r, df=n.break, lower.tail=FALSE) pvalue<- switch(alternative, less = pnorm(r, lower.tail =TRUE) , greater = pnorm(r, lower.tail =FALSE) , two.sided = p2) r<-c(r, pvalue) #fine se e' un GLM NON segmented } } else { ##============================== Se e' un LM.. if(is.null(dispersion)) dispersion<- summary(obj)$sigma^2 if(is.null(df.t)) df.t <- obj$df.residual #df.ok<- if(!is.null(df.t)) df.t else obj$df.residual if(inherits(obj, "segmented")){ #===== se e' LM+segmented if(more.break && !name.Z %in% obj$nameUV$Z) stop(" stop 'more.break' is meaningful only if at least 1 breakpoint has been estimated", call.=FALSE ) # Call<-mf<-obj$orig.call # mf$formula<-formula(obj) # m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) # mf <- mf[c(1, m)] # mf$drop.unused.levels <- TRUE # mf[[1L]] <- as.name("model.frame") # mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) # #formulaOrig<-formula(obj) # if(!is.null(obj$orig.call$offset) || !is.null(obj$orig.call$weights) || !is.null(obj$orig.call$subset)){ # mf$formula <- update.formula(mf$formula, # paste(".~.+", paste(c(all.vars(obj$orig.call$offset), # all.vars(obj$orig.call$weights), # all.vars(obj$orig.call$subset)), # collapse = "+"))) # } # # browser() # # # mf$formula<-update.formula(mf$formula,paste("~.-",paste(obj$nameUV$V, collapse="-"))) #rimuovi le variabili "psi.." # if(!more.break) { # if(length(nomiU.term)>1) stop(" 'more.break=FALSE' does not work with multiple breakpoints referring to the same variable specified in seg.Z", call. = FALSE) # #ovvero il test funziona per un solo breakpoint.. # mf$formula<-update.formula(mf$formula,paste("~.-",paste(nomiU.term, collapse="-"))) #rimuovi il termine U in questione, cioe' solo per una variabile # #altre variabili "U" relative a piu' variabili devono rimanere.. # } # formulaNull <- formula(mf) ############### #PERCHE' NON estrarre direttamente la model.matrix(obj)? #===> se la variabile NON e' nel modello (perche' left slope=0) non so come recuperarla #X <-model.matrix(mf) #non funziona se c'e' log(y).. #X <- X[, !(colnames(X) %in% obj$nameUV$V), drop=FALSE] #perche' poi # if(more.break) { # #ATTENZIONE... A QUANTO PARE SE LA RISPOSTA E' CON log(y), non la trova perche' in model.frame(obj) e' salvata come log(y), #mentre lui cerca y.. Quindi cambiare i nomi? # eval.parent(obj$orig.call$data, n=1) #mf$data<-quote(model.frame(obj)) # mf<-eval(mf) # } else { # mf <- eval(mf, parent.frame()) # } #for(i in 1:length(obj$nameUV$U)) assign(obj$nameUV$U[i], obj$model[,obj$nameUV$U[i]], envir=parent.frame()) if(!is.null(obj$formulaLin)) stop(" 'pscore()' does not work with objects returned by segreg()") mf<-model.frame(obj) y <- model.response(mf, "any") weights <- as.vector(model.weights(mf)) offset <- as.vector(model.offset(mf)) fo<-formula(obj) #NB formula(mf), stranamente, e' del modello lineare di partenza.. non include le U e le V!! fo <-update.formula(fo ,paste("~.-",paste(obj$nameUV$V, collapse="-"))) #escludi tutte le V if(!more.break) { if(length(nomiU.term)>1) stop(" 'more.break=FALSE' does not work with multiple breakpoints referring to the same variable specified in seg.Z", call. = FALSE) #ovvero il test funziona per un solo breakpoint.. fo <-update.formula(fo,paste("~.-",paste(nomiU.term, collapse="-"))) #rimuovi il termine U in questione, cioe' solo per una variabile #altre variabili "U" relative a piu' variabili devono rimanere.. } formulaNull <- fo if(length(weights)>0) formulaNull <- update.formula(formulaNull, paste(".~.+ weights(", obj$orig.call$weights, ")",sep="")) X0<-model.matrix(fo , mf) #Z<-X0[ ,match(name.Z, colnames(X0))] Z<- mf[[name.Z]] #COSA SUCCEDE SE IL MODELLO DI PARTENZA NON INCLUDE IL TERMINE LINEARE? #X0<- if (!is.empty.model(mt)) model.matrix(mt, mf) #E' quella del modello lineare di partenza.. strano! #Z<-X0[ ,match(name.Z, colnames(X0))] #browser() #mt <- attr(mf, "terms") #interc<-attr(mt,"intercept") n<-length(Z) if(is.null(values)) values<-seq(min(Z), max(Z), length=k) #values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) # n1<-length(values) # X1<-matrix(Z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z # PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) # X<-eval(parse(text=fn), list(x=X1, p=PSI)) # fn t.c. length(fn)<=1; fn="pmax(x-p,0)" definita sopra.. # mf$pmaxMedio<- pmaxMedio <-rowMeans(X) r<-test.Sc2(y=y, z=Z, xreg=X0, sigma=sqrt(dispersion), values=values, fn=fn, df.t=df.t, alternative=alternative, w=weights, offs=offset, nbreaks=n.break, ties.ok=FALSE, only.term=only.term) #---fine se e' LM+segmented. } else { #browser() #=================Se e' LM (non segmented) Call<-mf<-obj$call mf$formula<-formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") formulaNull <- formula(obj) mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #modifiche sett 2021 #XREG0 <-model.matrix(obj) mf<-eval(mf) #parent.frame()? Z<- mf[[name.Z]] y <- model.response(mf, "any") weights <- as.vector(model.weights(mf)) offset <- as.vector(model.offset(mf)) #XREG <-model.matrix(formula(obj), data=build.mf(obj)) #XREG<-XREG[,colnames(XREG0)] XREG<- model.matrix(update.formula(formula(obj),~.), mf) if(length(weights)>0) formulaNull <- update.formula(formulaNull, paste(".~.+ weights(", obj$call$weights, ")",sep="")) # #aggiunto 12/03/18 (non trovava la variable weights perche' era salvata in mf come "(weights)") # if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ # mf$formula <-update.formula(mf$formula, # paste(".~.+", paste( # c(all.vars(obj$call$offset), # all.vars(obj$call$weights), # all.vars(obj$call$subset)), # collapse = "+"))) # } # mf <- eval(mf, parent.frame()) # y <- model.response(mf, "any") # weights <- as.vector(model.weights(mf)) # offset <- as.vector(model.offset(mf)) # # mt <- attr(mf, "terms") # XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf) # n <- nrow(XREG) # Z<- XREG[,match(name.Z, colnames(XREG))] # if(!name.Z %in% names(coef(obj))) XREG<-XREG[,-match(name.Z, colnames(XREG)),drop=FALSE] if(is.null(values)) values<-seq(min(Z), max(Z), length=k) #values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) r<-test.Sc2(y=y, z=Z, xreg=XREG, sigma=sqrt(dispersion), values=values, fn=fn, df.t=df.t, alternative=alternative, w=weights, offs=offset, nbreaks=n.break, ties.ok=FALSE, only.term=only.term) #r<-as.numeric(as.matrix(add1(update(obj, data=mf), ~.+pmaxMedio, scale=dispersion, test="Rao"))[2,c("scaled Rao sc.", "Pr(>Chi)")]) } } #end se LM ################################################# if(only.term) return(r) ################################################# if(is.null(obj$family$family)) { famiglia<-"gaussian" legame<-"identity" } else { famiglia<-obj$family$family legame<-obj$family$link } #browser() if(break.type=="break"){ msg.alt <- if(n.break==1) " breakpoint) " else " breakpoints) " } else { msg.alt <- if(n.break==1) " jumpoint) " else " jumpoints) " } if(more.break) msg.alt <- paste(" additional", msg.alt,sep="") msg.alt <- paste(alternative," (",n.break , msg.alt , sep="") out <- list(method = "Score test for one/two changes in the slope", data.name=paste("formula =", as.expression(formulaNull), "\nbreakpoint for variable =", name.Z, "\nmodel =",famiglia,", link =", legame ,", method =", obj$call[[1]]), statistic = c(`observed value` = r[1]), parameter = c(n.points = length(values)), p.value = r[2], #alternative = paste(alternative, " (",n.break ,"breakpoint ) ") #alternative = paste(alternative," (",n.break ,if(n.break==1) " breakpoint) " else " breakpoints) ", sep="") alternative=msg.alt) class(out) <- "htest" return(out) } segmented/R/print.segmented.lme.r0000644000176200001440000000366014415477004016506 0ustar liggesusersprint.segmented.lme<- function (x, digits = max(3, getOption("digits") - 3), ...) { #datacall<- eval(x$call$obj)$call$data #datacall<- if(is.call(eval(x$call$obj))) eval(x$call$obj)$data else eval(x$call$obj)$call$data datacall<- x$misc$datacall xx<-x LL<-x$lme.fit.noG$logLik x<-x$lme.fit dd <- x$dims cat("Segmented linear mixed-effects model fit by ") cat(if (x$method == "REML") "REML\n" else "maximum likelihood\n") cat(" Data:", datacall, "\n") if (!is.null(x$call$subset)) { cat(" Subset:", deparse(asOneSidedFormula(x$call$subset)[[2L]]), "\n") } cat(" Log-", if (x$method == "REML") "restricted-" else "", "likelihood (approx): ", format(LL), "\n", sep = "") if(!is.null(xx$history.boot.restart)) { n.sol<-length(unique(xx$history.boot.restart[,"psi"])) cat(" Bootstrap restarting on", nrow(xx$history.boot.restart), "samples;", n.sol, "different solution(s)\n") } #cat(" \n psi.link =", xx$call$psi.link, "\n") cat("\n") fixF <- x$call$fixed cat("Fixed:", deparse(if (inherits(fixF, "formula") || is.call(fixF) || is.name(fixF)) x$call$fixed else lapply(fixF, function(el) as.name(deparse(el)))), "\n") print(fixef(xx), ...) #<-xx e' l'oggetto segmented.lme cat(" psi.link =", xx$call$psi.link) cat("\n\n") print(summary(x$modelStruct), sigma = x$sigma, ...) cat("Number of Observations:", dd[["N"]]) cat("\nNumber of Groups: ") Ngrps <- dd$ngrps[1:dd$Q] if ((lNgrps <- length(Ngrps)) == 1) { cat(Ngrps, "\n") } else { sNgrps <- 1:lNgrps aux <- rep(names(Ngrps), sNgrps) aux <- split(aux, array(rep(sNgrps, lNgrps), c(lNgrps, lNgrps))[!lower.tri(diag(lNgrps))]) names(Ngrps) <- unlist(lapply(aux, paste, collapse = " %in% ")) cat("\n") print(rev(Ngrps), ...) } invisible(x) } segmented/R/draw.history.R0000644000176200001440000001111714605760267015224 0ustar liggesusersdraw.history<-function(obj,term,...){ #show.history() se c'e' stato boot restart potrebbe produrre un grafico 2x1 di "dev vs it" and "no.of distinct vs it" #-- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #-- if(missing(term)){ if(length(obj$nameUV$Z)>1 ) {stop("please, specify `term'")} else {term<-obj$nameUV$Z} } #browser() opz<-list(...) range.ok<-obj$rangeZ[,term] id.ok<- f.U(rownames(obj$psi), term) est.psi<-obj$psi[id.ok,"Est."] if(is.null(opz$ylim)) opz$ylim<-range.ok if(is.null(opz$col)) opz$col<-1 if(is.null(opz$pch)) opz$pch<-1:length(est.psi) if(is.null(opz$xlab)) opz$xlab<-"iterations" if(is.null(opz$ylab)) opz$ylab<-paste("breakpoint ","(",term,")",sep="") if(is.null(opz$type)) opz$type<-"o" opz$xaxt<-"n" #browser() if(is.null(obj$seed)) { #NO boot if(all(diff(sapply(obj$psi.history, length)[-1])==0)){ #non-autom (elemento [1] e' NA) A<-t(matrix(unlist(obj$psi.history)[-1],nrow=nrow(obj$psi),byrow=FALSE)) colnames(A)<-rownames(obj$psi) opz$x<-0:(nrow(A)-1) opz$y<-A[,id.ok] par(mfrow=c(1,2)) do.call(matplot, opz) #matplot(0:(nrow(A)-2), A[-1,id.ok],type="o",pch=1:length(est.psi),col=1, # xlab=, ylab=, # ylim=range.ok, xaxt="n",...) axis(1,at=0:(nrow(A)-1),cex.axis=.7) abline(h=est.psi,lty=3,col=opz$col) plot(0:(nrow(A)-1), attr(obj$psi.history,"dev")[-1], ylab="deviance", xlab="iterations", type="o", xaxt="n") axis(1,at=0:(nrow(A)-1),cex.axis=.7) abline(h = min(attr(obj$psi.history,"dev")),lty=3,col=opz$col) } else { #automatic psihist<-obj$psi.history[-1] id.iter<-rep(1:length(psihist), times=sapply(psihist, length)) psi.history<-unlist(psihist) nomi<-unlist(sapply(psihist, names)) d<-data.frame(iter=id.iter, psi=psi.history, nomi=nomi) #associa i nomi delle componenti di $psi.history (che sono indici 1,2,..) con i nomi della variabile term ii<-unique(names(obj$psi.history[[length(obj$psi.history)]])[id.ok]) if(length(ii)>1) stop("some error in the names?..") with(d[d$nomi==ii,], plot(iter, psi, xlab=opz$xlab, ylab=opz$ylab, xaxt="n",...)) axis(1,at=unique(d$iter),cex.axis=.7) #se vuoi proprio associare le stime tra le diverse iterazioni #(per poi unire nel grafico i punti con le linee. Ovviamente alcune linee saranno interrotte) # for(i in 1:length(obj$psi.history)) { # a<-obj$psi.history[[i]] # for(j in 1:length(est.psi)){ # psij<-est.psi[j] #a<- ..names match # r[i,j]<-a[which.min(abs(a-psij))] # a<-setdiff(a, r[i,j]) } } else { #se boot par(mfrow=c(1,2)) plot(obj$psi.history$all.selected.ss, type="b", xlab="bootstrap replicates", ylab="RSS (selected values)", xaxt="n", pch=20) axis(1,at=1:length(obj$psi.history$all.selected.ss),cex.axis=.7) #unicita' delle soluzioni if(is.vector(obj$psi.history$all.selected.psi)){ psi.matr<-m<-matrix(obj$psi.history$all.selected.psi, ncol=1) } else { psi.matr<-m<-obj$psi.history$all.selected.psi[,id.ok,drop=FALSE] } for(i in 1:nrow(m)) m[i,]<-apply(psi.matr[1:i,,drop=FALSE],2,function(xx)length(unique(xx))) m<-t(t(m)+.1*(0:(ncol(m)-1))) matplot(1:nrow(m),m, pch=1:ncol(m), type="b", col=1:ncol(m), ylab="no. of distinct solutions",xlab="bootstrap replicates", xaxt="n") axis(1,at=1:nrow(m),cex.axis=.7) } } #end_fn segmented/R/step.glm.fit.r0000644000176200001440000002644414726071266015150 0ustar liggesusersstep.glm.fit<-function(y, x.lin, Xtrue, PSI, ww, offs, opz, return.all.sol=FALSE){ #---------------------- search.min<-function(h, psi, psi.old, X, y, w, offs, id.fix.psi=NULL) { psi.ok<- psi*h + psi.old*(1-h) psi.ok[id.fix.psi]<- psi.old[id.fix.psi] PSI <- matrix(rep(psi.ok, rep(n, length(psi.ok))), ncol = length(psi.ok)) U1 <- (Xtrue>PSI) #(Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(suppressWarnings(glm.fit(x = cbind(X, U1), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[i]), etastart = eta0)), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$dev attr(L1, "eta") <- obj1$linear.predictor L1 } toMatrix<-function(.x, ki){ # ripete ogni .x[,j] ki[j] volte if(ncol(.x)!=length(ki)) stop("It should be ncol(.x)==length(ki)") if(all(ki==1)) return(.x) M<-vector("list", length=length(ki)) for(j in 1:length(ki)) M[[j]]<-replicate(ki[[j]], cbind(.x[,j]), simplify=TRUE) do.call(cbind, M) } ### ----- # mylm<-function(x,y,w=1,offs=0){ # x1<-x*sqrt(w) # y<-y-offs # y1<-y*sqrt(w) # b<-drop(solve(crossprod(x1),crossprod(x1,y1))) # fit<-drop(tcrossprod(x,t(b))) # r<-y-fit # o<-list(coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b)) # o # } #----------- adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #------------ #----------- fam<-opz$fam maxit.glm<-opz$maxit.glm #-------------- tol<-opz$toll display<-opz$display it.max<-opz$it.max #dev0<-opz$dev0 useExp.k<-opz$useExp.k min.step<- opz$min.step #=.0001 conv.psi<-opz$conv.psi #=FALSE alpha<-opz$alpha #limZ <- apply(Xtrue, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) limZ <- if(is.null(opz$limZ)) apply(Xtrue, 2, quantile, names=FALSE, probs=alpha) else opz$limZ fix.npsi<-opz$fix.npsi agg<-opz$agg hh <-opz$h npsii<-opz$npsii npsi<- sum(npsii) #opz$npsi P<-length(npsii) #P<-opz$P digits<-opz$digits rangeZ<-opz$rangeZ # pos.vec <- 1:npsi # pos <- vector("list", P) # ind <- 0 pos<- tapply(1:npsi, rep(1:P, npsii), list) i <- 0 agg <- rep(agg, npsi) # direz <- matrix(NA, it.max, npsi) # conv <- rep(FALSE, npsi) # ind.conv <- NULL n<-length(y) plin<-ncol(x.lin) epsilon<-10 k.values<-dev.values<- NULL psi.values <-list() psi.values[[length(psi.values) + 1]] <- NA #PSI0<- matrix(psi0, n, npsi, byrow = TRUE) XREG <- cbind(x.lin, Xtrue>PSI) if(it.max==0){ obj <- suppressWarnings(glm.fit(x = XREG, y = y, offset = offs, weights = ww, family = fam)) L1 <- obj$dev obj$epsilon <- epsilon idZ<-(plin+1):(plin+ncol(PSI)) b<- obj$coef[idZ] obj <- list(obj = obj, psi = PSI[1,], psi.values = psi.values, rangeZ = rangeZ, beta.c=b, epsilon = epsilon, SumSquares.no.gap = L1, id.warn = TRUE) return(obj) } if(!opz$usestepreg){ dev.values[length(dev.values) + 1] <- opz$dev0 #modello senza psi psi.values[[length(psi.values) + 1]] <- NA #nessun psi } if(is.null(opz$fit.psi0)){ obj <- suppressWarnings(glm.fit(x = XREG, y = y, offset = offs, weights = ww, family = fam, etastart=opz$eta0)) L0 <- obj$dev eta0 <- obj$linear.predictors } else { L0 <- opz$fit.psi0$L0 eta0 <- opz$fit.psi0$eta0 } n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) #dev.values[length(dev.values) + 1] <- dev0#opz$dev0 #del modello iniziale (senza psi) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi0<-PSI[1,] psi.values[[length(psi.values) + 1]] <- psi0 #psi iniziali if(is.null(maxit.glm)){ Nboot <- if(is.null(opz$Nboot)) 0 else opz$Nboot maxit.glm1 <- rep(1:it.max + Nboot, 1:it.max+1) #2*rep(1:it.max, 1:it.max) maxit.glm1 <- pmin(maxit.glm1, 25) } else { maxit.glm1 <- rep(maxit.glm, it.max) } #============================================== if (display) { unlpsi<- unlist(psi0) Lp<-length(unlpsi) cat(paste("iter = ", sprintf("%2.0f",0), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L0, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%5.0f", NA), " n.psi = ",formatC(Lp,digits=0,format="f"), " ini.psi = ",paste(formatC(unlpsi[1:min(5,Lp)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } id.warn <- FALSE low <- apply(Xtrue[,unique(colnames(Xtrue)),drop=FALSE], 2, min) up <- apply(Xtrue[,unique(colnames(Xtrue)),drop=FALSE], 2, max) L1<-L0+10 tolOp<-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) #============================================== idZ<-(plin+1):(plin+ncol(PSI)) idW<-(plin+ncol(PSI)+1): ( plin+2*ncol(PSI)) while (abs(epsilon) > tol) { i <- i + 1 #if(i==1) browser() xx <- Xtrue[,cumsum(npsii),drop=FALSE] for (p in 1:P) { psis <- sort(psi0[pos[[p]]]) gruppi <- cut(xx[,p], breaks = c(low[p] - 0.1, psis, up[p]), labels = FALSE) if(any(is.na(gruppi))) stop(paste("too many breaks for step term #", p, "?"), call.=TRUE) points <- c(low[p], psis, up[p]) right <- c(low[p], points[2:(npsii[p] + 1)] + agg[pos[[p]]][order(psi0[pos[[p]]])] * (points[3:(npsii[p] + 2)] - points[2:(npsii[p] + 1)]), NA) left <- c(NA, points[2:(npsii[p] + 1)] - agg[pos[[p]]][order(psi0[pos[[p]]])] * (points[2:(npsii[p] + 1)] - points[1:npsii[p]]), up[p]) #if(any(is.na(left))| any(is.na(right))) stop(paste("too many breaks for step term #", p, "?"), call.=TRUE) for (j in 1:(npsii[p] + 1)) { xx.j <- xx[,p][gruppi == j] xx[,p][gruppi == j] <- right[j] + (xx.j - points[j]) * ((left[j + 1] - right[j])/(points[j + 1] - points[j])) } } XX<-toMatrix(xx, npsii) PSI<- matrix(psi0, n, npsi, byrow = TRUE) W <- (1/(2 * abs(XX - PSI))) Z <- (XX * W + 1/2) XREG <- cbind(x.lin, Z, W) #obj<-try(mylm(XREG,y,w=ww,offs=offs), silent = TRUE) #if(class(obj)[1]=="try-error") # obj <- lm.wfit(y = y, x = XREG, offset = offs, w=ww ) #b <- obj$coef[(2:(sum(k) + 1))] #g <- obj$coef[((sum(k) + 2):(2 * sum(k) + 1))] obj <- suppressWarnings(glm.fit(x = XREG, y = y, offset = offs, weights = ww, family = fam, control = glm.control(maxit = maxit.glm1[i]), etastart = eta0)) #idZ<-(plin+1):(plin+ncol(Z)) #idW<-(plin+ncol(Z)+1): ( plin+ncol(Z)+ncol(W)) b<- obj$coef[idZ] g<- obj$coef[idW] if(any(is.na(c(b, g)))){ if(return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } psi1 <- -g/b psi1<- psi0+ opz$h*(psi1-psi0) #aggiusta la stima di psi.. psi1<- adj.psi(psi1, limZ) psi1<-unlist(tapply(psi1, opz$id.psi.group, sort), use.names =FALSE) #if(i==1) browser() #la f e' chiaramente a gradino per cui meglio dividere.. a0<-optimize(search.min, c(0,.5), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs, tol=tolOp[i]) a1<-optimize(search.min, c(.5,1), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs, tol=tolOp[i]) a <-if(a0$objective<=a1$objective) a0 else a1 #a0<-optimize(search.min, c(0,.33), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs) #a1<-optimize(search.min, c(.33,.66), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs) #a2<-optimize(search.min, c(.66,1), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs) #a<-if(a2$objective<=a$objective) a2 else a if(a$objectivePSI1) #obj1 <- try(mylm(XREG1, y, ww, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(XREG1, y, ww, offs), silent = TRUE) delta<- psi1-psi0 if (display) { flush.console() #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) unlpsi<- unlist(psi1) Lp<-length(unlpsi) cat(paste("iter = ", sprintf("%2.0f",i), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L1, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%2.3f", use.k), " n.psi = ",formatC(Lp,digits=0,format="f"), " est.psi = ",paste(formatC(unlpsi[1:min(Lp,5)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } epsilon <- (L0 - L1)/(abs(L0) + 0.1) L0<-L1 k.values[length(k.values)+1]<-use.k psi.values[[length(psi.values) + 1]] <- psi1 dev.values[length(dev.values) + 1] <- L0 if (i >= it.max) { id.warn <- TRUE break } psi0<-psi1 } #end while_it #browser() psi1 <-unlist(tapply(psi1, opz$id.psi.group, sort)) PSI<- matrix(psi1, n, npsi, byrow = TRUE) U <- 1*(Xtrue>PSI) #ATTENZIONE .. Assume che obj sia stato stimato sempre! obj<-list(obj=obj, psi=psi1, psi.values=psi.values, rangeZ=rangeZ, SumSquares.no.gap=L1, beta.c=b, it=i, epsilon=epsilon, id.warn=id.warn, U=U, eta0=eta0) return(obj) } #end jump.fit segmented/R/segmented.R0000644000176200001440000000023514415477002014530 0ustar liggesusers`segmented` <- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control=seg.control(), model=TRUE, ...){ UseMethod("segmented") } segmented/R/step.glm.fit.boot.r0000644000176200001440000002325514757620767016121 0ustar liggesusersstep.glm.fit.boot <- function(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di #---------------------------------- # sum.of.squares<-function(obj.seg){ # #computes the "correct" SumOfSquares from a segmented" fit # b<-obj.seg$obj$coef # X<-qr.X(obj.seg$obj$qr) #X<-model.matrix(obj.seg) # X<-X[,!is.na(b)] # b<-b[!is.na(b)] # rev.b<-rev(b) # rev.b[1:length(obj.seg$psi)]<-0 # b<-rev(rev.b) # new.fitted<-drop(X%*%b) # new.res<- obj.seg$obj$residuals + obj.seg$obj$fitted - new.fitted # ss<-sum(new.res^2) # ss # } #-------- #--------------------------------------------- adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #--- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. #dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi #psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.values<-lista[[1]] psi.values<-lista[[2]] if(any(is.na(psi.values[[1]]))) {#se la 1 componente e' NA (fino alla versione 2.0-3 era cosi'... perche' in dev.values c'erano # anche i valori relativi al modello senza psi.. ) dev.values<-dev.values[-1] #remove the 1st one referring to model without psi psi.values<-psi.values[-1] } dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #------------- #browser() if(is.null(opz$seed)){ mY <- mean(as.numeric(y)) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } visualBoot<-opz$display opz$display<-FALSE #opz.boot<-opz #opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz #opz1 viene usata solo quando diversi tentativi di stimare il modello falliscono... opz1$it.max <-0 opz0 <- opz opz0$maxit.glm <- 2 opz0$agg<-.2 n<-length(y) alpha<-opz$alpha #limZ <- apply(Z, 2, quantile, names = FALSE, probs = alpha) #c(alpha, 1 - alpha)) limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=alpha) else opz$limZ rangeZ <- apply(Z, 2, range) #serve sempre o0 <-try(suppressWarnings(step.glm.fit(y, XREG, Z, PSI, w, offs, opz0, return.all.sol=FALSE)), silent=TRUE) #browser() if(!is.list(o0)) { o0<- suppressWarnings(step.glm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=TRUE)) o0<-extract.psi(o0) ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap eta0 <- o0$eta0 if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0<-try(suppressWarnings(step.glm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap eta0 <- o0$eta0 } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 eta0 <- NULL } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(NA, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z count.random<-0 agg.values<-seq(.2,.05,l=n.boot) ###INIZIO BOOT alpha<-.1 corr=1.2 n.boot.rev<- 3 #3 o 4? for(k in seq(n.boot)){ #if(k==7) browser() ##se gli *ultimi* n.boot.rev valori di ss sono uguali, cambia i psi... opz$eta0 <- eta0 diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ #browser() qpsi <- sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi.cor <- sapply(1:ncol(Z),function(i)mean((corr*est.psi0[i])>=Z[,i])) qpsi <- ifelse(abs(qpsi-.5)<=.2, qpsi.cor, alpha) alpha<-1-alpha corr<-1/corr est.psi0 <- sapply(1:ncol(Z),function(i)quantile(Z[,i], probs=qpsi[i],names=FALSE)) est.psi0 <- adj.psi(est.psi0, limZ) #est.psi0<- jitter(est.psi0, amount=min(diff(est.psi0))) } ############################ 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ########################################## PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(step.glm.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], offs[id], opz)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(step.glm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 opz$agg<-agg.values[k] # opz$Nboot <- k # o <-try(suppressWarnings(step.glm.fit(y, XREG, Z.orig, PSI, w, offs, opz, return.all.sol=TRUE)), silent=TRUE) #if(k==8) browser() if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o <-try(suppressWarnings(step.glm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-suppressWarnings(try(extract.psi(o), silent=TRUE)) #if(class(o)!="try-error"){ if(!inherits(o, "try-error")){ #if(k==8) browser() all.est.psi[k,]<-o$psi[!is.na(o$psi)] all.ss[k]<- o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-L0<-o0$SumSquares.no.gap eta0 <- o0$eta0 } } if (visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) unlpsi<- unlist(est.psi0) Lp<-length(unlpsi) cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(Lp, digits=0,format="f"), " est.psi = ",paste(formatC(unlpsi[1:min(Lp,5)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #conta i valori ss uguali.. cosi puoi fermarti prima.. asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) #SS.ok<-min(all.selected.ss) #id.accept<- ((abs(all.ss-SS.ok)/SS.ok )<= 0.05) #psi.mean<-apply(all.est.psi[id.accept,,drop=FALSE], 2, mean) #est.psi0<-psi.mean # #devi ristimare il modello con psi.mean # PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) # o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0),byrow=TRUE) o0 <- try(step.glm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) warning("The final fit can be unreliable (possibly mispecified stepmented relationship)", call.=FALSE, immediate.=TRUE) } if(!is.list(o0)) return(0) o0$boot.restart<-ris o0$seed <- seed #rm(.Random.seed, envir=globalenv()) return(o0) } segmented/R/confint.stepmented.R0000644000176200001440000000475114610167706016377 0ustar liggesusers`confint.stepmented` <- function(object, parm, level=0.95, method=c("delta", "score", "gradient"), round=TRUE, cheb=FALSE, #var.diff=FALSE digits=max(4, getOption("digits") - 1), .coef=NULL, .vcov=NULL, ...){ method<-match.arg(method) if(method!="delta") stop("Only delta allowed") if(missing(parm)) { nomeZ<- object$nameUV$Z } else { if(is.numeric(parm)) parm<-object$nameUV$Z[parm] if(! all(parm %in% object$nameUV$Z)) stop("invalid 'parm' name", call.=FALSE) nomeZ<-parm } if(length(nomeZ)>1) { warning("There are multiple stepmented terms. The first is taken", call.=FALSE, immediate. = TRUE) nomeZ<-nomeZ[1] } nomiZ<- object$nameUV$Z nomiV<- object$nameUV$V nomiU<- object$nameUV$U nomiPsi <- gsub("V","psi", nomiV) Cov<-vcov.stepmented(object, type="cdf", ...) id <- match(nomiPsi, names(coef(object))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) psi<-object$psi[nomiPsi,"Est."] se<- sqrt(vv) #[nomiPsi] npsi<-length(psi) if(cheb){ z<-1/sqrt(1-level) } else { z<-if("lm"%in%class(object)) abs(qt((1-level)/2,df=object$df.residual)) else abs(qnorm((1-level)/2)) } #browser() #z=abs(qnorm((1-level)/2)) Z<-object$Z Z0<-apply(Z,2,sort) #browser() inf<-pmax(psi -z*se, object$rangeZ[1,]) sup<-pmin(psi +z*se, object$rangeZ[2,]) #ripeti i nomi delle variabili stepmented tante volte quanti sono i psi.. #nomiZripetuti<- sub("\\.", "", sub("psi[1-9].","", nomiPsi)) #Il 19/2/ email di Matti Lehtonen che fa notare che con la linea di sopra venivano eliminati i "." dai nomi delle variabili.. #Era stata messa nell'eventualita' che qualche variabile avesse >10 breakpoints #I codici di sotto sono consentiti fino a 99 changepoints per variabile nomiZripetuti <- sub("psi[1-9]*[0-9].","", nomiPsi) #nomiZripetuti <- sub("psi[1-9].", "", nomiZripetuti) #browser() if(round){ inf.rounded<-sapply(1:npsi, function(j) Z0[sum(Z0[, nomiZripetuti[j]] PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylmWO(cbind(X, U1), y, w), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 #sum(obj1$residuals^2 * w) L1 } #--------------------------------- search.min<-function(h, psi, psi.old, X, y, w) { psi.ok<- psi*h + psi.old*(1-h) #PSI <- matrix(rep(psi.ok, rep(n, length(psi.ok))), ncol = length(psi.ok)) PSI <- matrix(psi.ok, nrow=n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylm(cbind(X, U1), y), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 #sum(obj1$residuals^2) L1 } # search.min<-function(h, psi, psi.old, X, y, w) { # psi.ok<- psi*h + psi.old*(1-h) # PSI <- matrix(rep(psi.ok, rep(n, length(psi.ok))), ncol = length(psi.ok)) # U1 <- (Z - PSI) * (Z > PSI) # obj1 <- try(mylm(cbind(X, U1), y, w), silent = TRUE) # if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(x=cbind(X, U1), y, w), silent = TRUE) # L1 <- if (class(obj1)[1] == "try-error") L0 + 10 # else sum(w*obj1$residuals^2) # L1 # } # est.k <- function(x1, y1, L0) { # ax <- log(x1) # .x <- cbind(1, ax, ax^2) # b <- drop(solve(crossprod(.x), crossprod(.x, y1))) # const <- b[1] - L0 # DD <- sqrt(b[2]^2 - 4 * const * b[3]) # kk <- exp((-b[2] + DD)/(2 * b[3])) # return(round(kk)) # } # dpmax <- function(x, y, pow = 1) { # if (pow == 1) # -(x > y) # else -pow * ((x - y) * (x > y))^(pow - 1) # } mylmWO <- function(x, y, w, offs = 0) { sw <- sqrt(w) x1 <- x * sw y <- y - offs y1 <- y * sw b <- drop(solve(crossprod(x1), crossprod(x1, y1))) fit <- x%*%b #drop(tcrossprod(x, t(b))) r <- y - fit o <- list(coefficients = b, fitted.values = fit, residuals = r, L0=sum(w*r^2), df.residual = length(y) - length(b)) o } #---------------------------- mylm <- function(x, y, w, offs) { b <- drop(solve(crossprod(x), crossprod(x, y))) fit <- x%*%b # r <- y - fit o <- list(coefficients = b, fitted.values = fit, residuals = r, L0=sum(r^2), df.residual = length(y) - length(b)) o } #---------------------------- if(!opz$id.weights){ fitter<-function(x, y, w) .lm.fit(x=x, y=y) #list(coefficients=drop(solve(crossprod(x), crossprod(x, y)))) mylmOK <- mylm search.minOK <- search.min } else { fitter<-function(x, y, w) .lm.fit(x=sqrt(w)*x, y=sqrt(w)*y) mylmOK <- mylmWO search.minOK <- search.minWO } isZero <- function(v) sapply(v, function(.x) identical(.x,0)) # mylm <- function(x, y, w=1) { # x1<-x*sqrt(w) # y1<-y*sqrt(w) # b <- drop(solve(crossprod(x1), crossprod(x1, y1))) # fit <- drop(tcrossprod(x, t(b))) # r <- y - fit # o <- list(coefficients = b, fitted.values = fit, residuals = r, # df.residual = length(y) - length(b)) # o # } mylmADD <- function(invXtX, X, v, Xty, y) { vtv <- sum(v^2) Xtv <- crossprod(X, v) m <- invXtX %*% Xtv d <- drop(1/(vtv - t(Xtv) %*% m)) r <- -d * m invF <- invXtX + d * tcrossprod(m) newINV <- rbind(cbind(invF, r), c(t(r), d)) b <- crossprod(newINV, c(Xty, sum(v * y))) fit <- tcrossprod(cbind(X, v), t(b)) r <- y - fit o <- list(coefficients = b, fitted.values = fit, residuals = r) o } in.psi <- function(LIM, PSI, ret.id = TRUE) { a <- PSI[1, ] < LIM[1, ] b <- PSI[1, ] > LIM[2, ] is.ok <- !a & !b if (ret.id) return(is.ok) isOK <- all(is.ok) && all(!is.na(is.ok)) isOK } far.psi <- function(Z, PSI, id.psi.group, ret.id = TRUE, fc = 0.93) { nSeg <- length(unique(id.psi.group)) npsij <- tapply(id.psi.group, id.psi.group, length) nj <- sapply(unique(id.psi.group), function(.x) { tabulate(rowSums((Z > PSI)[, id.psi.group == .x, drop = FALSE]) + 1) }, simplify = FALSE) ff <- id.far.ok <- vector("list", length = nSeg) for (i in 1:nSeg) { if (length(nj[[i]]) != npsij[i] + 1) nj[[i]] <- tabulate(rowSums((Z >= PSI)[, id.psi.group == i, drop = FALSE]) + 1) id.ok <- (nj[[i]] >= 2) id.far.ok[[i]] <- id.ok[-length(id.ok)] & id.ok[-1] ff[[i]] <- ifelse(diff(nj[[i]]) > 0, 1/fc, fc) } id.far.ok <- unlist(id.far.ok) ff <- unlist(ff) if (!ret.id) { return(all(id.far.ok)) } else { attr(id.far.ok, "factor") <- ff return(id.far.ok) } } adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } n <- length(y) #min.step <- opz$min.step rangeZ <- apply(Z, 2, range) alpha <- opz$alpha #limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha, 1 - alpha)) limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) psi <- PSI[1, ] psi<-adj.psi(psi, limZ) PSI<-matrix(psi,nrow=n, ncol=ncol(PSI), byrow=TRUE) npsi<- length(psi) id.psi.group <- opz$id.psi.group #conv.psi <- opz$conv.psi hh <- opz$h digits <- opz$digits pow <- opz$pow nomiOK <- opz$nomiOK toll <- opz$toll gap <- opz$gap fix.npsi <- opz$stop.if.error dev.new <- opz$dev0 visual <- opz$visual it.max <- old.it.max <- opz$it.max fc <- opz$fc names(psi) <- id.psi.group it <- 0 epsilon <- 10 k.values <- dev.values <- NULL psi.values <- list() psi.values[[length(psi.values) + 1]] <- NA sel.col.XREG <- unique(sapply(colnames(XREG), function(x) match(x, colnames(XREG)))) if (is.numeric(sel.col.XREG)) XREG <- XREG[, sel.col.XREG, drop = FALSE] invXtX <- opz$invXtX Xty <- opz$Xty #browser() if(!in.psi(limZ, PSI, FALSE)) stop("starting psi out of the range.. see 'alpha' in seg.control.", call. = FALSE) if (!far.psi(Z, PSI, id.psi.group, FALSE)) stop("psi starting values too close each other or at the boundaries. Please change them (e.g. set 'quant=TRUE' in seg.control()), or decrease their number.", call. = FALSE) n.psi1 <- ncol(Z) #U <- ((Z - PSI) * (Z > PSI)) V <- (Z > PSI) #dpmax(Z, PSI, pow = pow[2]) U <- (Z - PSI) * V V<- -V obj0 <-list(residuals=rep(1,3)) L0 <- var(y)*n #sum(obj0$residuals^2) if(it.max==0){ colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") obj <- lm.wfit(x = cbind(XREG, U), y = y, w = w) L1 <- sum(obj$residuals^2 * w) obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = TRUE, idU=seq_along(psi)+ncol(XREG), idV=NULL) return(obj) } #XREG<-cbind(1,Z[,1]) #for(.i in opz$nomiSeg) { ##poni min(z)=0, cosi solve() in step.lm.fit non ha problemi. # if(.i %in% colnames(XREG)) XREG[,.i] <- XREG[,.i] - min(XREG[,.i]) #} #in seg.num.fit() la xreg ha sempre e solo l'interc e la seconda colonna il termine segmented! minZ <- min(XREG[,2]) XREG[,2]<-XREG[,2]- minZ n.intDev0 <- nchar(strsplit(as.character(L0), "\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- opz$dev0 dev.values[length(dev.values) + 1] <- L0 psi.values[[length(psi.values) + 1]] <- psi if (visual) { cat(paste("iter = ", sprintf("%2.0f", 0), " dev = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L0), " k = ", sprintf("%2.0f", NA), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " ini.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } id.warn <- FALSE id.psi.changed <- rep(FALSE, it.max) nomiUV<-c(paste("U", 1:ncol(U), sep = ""), paste("V", 1:ncol(U), sep = "")) idU <- seq_along(psi)+ncol(XREG) idV <- seq_along(psi)+max(idU) #============================================== inizio ciclo tolOp <-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) while (abs(epsilon) > toll) { it <- it + 1 #if(it==1) browser() n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if (n.psi1 != n.psi0) { U <- ((Z - PSI) * (Z > PSI)) #if (pow[1] != 1) U <- U^pow[1] obj0 <- try(mylm(cbind(XREG, U), y), silent = TRUE) if(class(obj0)[1] == "try-error") obj0 <- .lm.fit(cbind(XREG, U), y) L0 <- sum(obj0$residuals^2) } # V <- (Z > PSI) #dpmax(Z, PSI, pow = pow[2]) # U <- (Z - PSI) * V # V<- -V X <- cbind(XREG, U, V) #colnames(X)[2:ncol(X)] <- nomiUV obj <- fitter(x = X, y = y, w=w) #puoi usare .lm.fit(), ma i coeff non stimati non sono NA ma zero! vedi seg.lm in cumSeg.. beta.c <- obj$coefficients[idU] #coef(obj)[paste("U", 1:ncol(U), sep = "")] gamma.c <- obj$coefficients[idV] # coef(obj)[paste("V", 1:ncol(V), sep = "")] if (any(isZero(c(beta.c, gamma.c)))) { if (fix.npsi) { if (return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call. = FALSE) } else { id.coef.ok <- gamma.c!=0#!is.na(gamma.c) psi <- psi[id.coef.ok] if (length(psi) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } gamma.c <- gamma.c[id.coef.ok] beta.c <- beta.c[id.coef.ok] Z <- Z[, id.coef.ok, drop = FALSE] rangeZ <- rangeZ[, id.coef.ok, drop = FALSE] limZ <- limZ[, id.coef.ok, drop = FALSE] nomiOK <- nomiOK[id.coef.ok] id.psi.group <- id.psi.group[id.coef.ok] names(psi) <- id.psi.group } } psi.old <- psi psi <- psi.old + hh*gamma.c/beta.c #aggiusta la stima di psi.. psi<- adj.psi(psi, limZ) psi<-unlist(tapply(psi, opz$id.psi.group, sort), use.names =FALSE) a<-optimize(search.min, c(0,1), psi=psi, psi.old=psi.old, X=XREG, y=y, w=w, tol=tolOp[it]) k.values[length(k.values) + 1] <- use.k <- a$minimum L1<- a$objective #L1.k[length(L1.k) + 1] <- L1<- a$objective psi <- psi*use.k + psi.old* (1-use.k) psi<- adj.psi(psi, limZ) if (!is.null(digits)) psi <- round(psi, digits) PSI <- matrix(psi, n, ncol = length(psi), byrow=TRUE) V <- (Z > PSI) U <- (Z - PSI) * V V <- -V if (visual) { flush.console() cat(paste("iter = ", sprintf("%2.0f", it), " dev = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L1), " k = ", sprintf("%2.3f", use.k), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " est.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } epsilon <- (L0 - L1)/(abs(L0) + 0.1) L0 <- L1 k.values[length(k.values) + 1] <- use.k psi.values[[length(psi.values) + 1]] <- psi dev.values[length(dev.values) + 1] <- L0 id.psi.far <- far.psi(Z, PSI, id.psi.group, TRUE, fc = opz$fc) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if (!all(id.psi.ok)) { if (fix.npsi) { psi <- psi * ifelse(id.psi.far, 1, attr(id.psi.far, "factor")) PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) id.psi.changed[it] <- TRUE } else { Z <- Z[, id.psi.ok, drop = FALSE] PSI <- PSI[, id.psi.ok, drop = FALSE] rangeZ <- rangeZ[, id.psi.ok, drop = FALSE] limZ <- limZ[, id.psi.ok, drop = FALSE] nomiOK <- nomiOK[id.psi.ok] id.psi.group <- id.psi.group[id.psi.ok] psi.old <- psi.old[id.psi.ok] psi <- psi[id.psi.ok] names(psi) <- id.psi.group if (ncol(PSI) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } } } if (it >= it.max) { id.warn <- TRUE break } } #end while.. ############################################################################## if (id.psi.changed[length(id.psi.changed)]) warning(paste("Some psi (", (1:length(psi))[!id.psi.far], ") changed after the last iter.", sep = ""), call. = FALSE) if (id.warn) warning(paste("max number of iterations (", it, ") attained", sep = ""), call. = FALSE) attr(psi.values, "dev") <- dev.values attr(psi.values, "k") <- k.values psi <- unlist(tapply(psi, id.psi.group, sort)) names(psi) <- id.psi.group names.coef <- names(obj$coefficients) #PSI.old <- PSI PSI <- matrix(psi, n, ncol = length(psi), byrow = TRUE) #if (sd(PSI - PSI.old) > 0 || id.psi.changed[length(id.psi.changed)]) { U <- (Z - PSI) * (Z > PSI) colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") # if(opz$id.weights){ # obj <- lm.wfit(x = cbind(XREG, U), y = y, w=w) # L1 <- sum(w*obj$residuals^2) # } else { # obj <- .lm.fit(x = cbind(XREG, U), y = y) # L1 <- sum(obj$residuals^2) # } #browser() obj <- mylmOK(x = cbind(XREG, U), y = y, w = w) L1 <- obj$L0 obj$coefficients[1] <- obj$coefficients[1]-sum(obj$coefficients[2]*minZ) obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = id.warn, idU=idU, idV=idV) return(obj) } segmented/R/vcov.segmented.R0000644000176200001440000000545314415477000015511 0ustar liggesusers#vc<-function(obj){ # invXtX<-chol2inv(qr.R(obj$qr)) #(XtX)^{-1} # V<-vcov.segmented(obj,is=TRUE) # s2<- if(inherits(obj, "glm")) summary.glm(obj)$dispersion else summary.lm(obj)$sigma^2 # s2*V%*% invXtX %*% V #} vcov.segmented<-function(object, var.diff=FALSE, is=FALSE, ...){ #if(is && inherits(object, "Arima")) { # warning("is=TRUE ignored with Arima fits", call.=FALSE) if(is && !inherits(object, "lm")) { warning("is=TRUE ignored. Only works with lm or glm fits", call.=FALSE) is<-FALSE } if(is){ if(var.diff) warning("option 'var.diff=TRUE' ignored with 'is=TRUE' ", call.=FALSE) X<-model.matrix(object) #qr.X(object$qr) piu efficiente? nomiZ<- object$nameUV$Z nomiV<- object$nameUV$V nomiU<- object$nameUV$U for(i in 1:length(nomiV)){ nomeU<-nomiU[i] nomeV<-nomiV[i] nomepsi<-strsplit(nomeV,"\\.")[[1]][1] #solo "psi1" o "psi2",.. e' meglio estrarre il "psi1" perche' il nome della variabile puo' contenere un punto.. nomeZ<-gsub(paste(nomepsi,".",sep=""),"",nomeV) #estrae il nome della variabile.. Z<-X[,nomeZ] est.psi<- object$psi[nomeV,"Est."] se.psi<- object$psi[nomeV,"St.Err"] #questo e' se i coefficienti non sono nominati "coefficients" nomeCoef<-grep("coef", names(object), value = TRUE) if(length(nomeCoef)==0) nomeCoef<-grep("estimate", names(object), value = TRUE) X[,nomeV]<- (-object[[nomeCoef]][nomeU])*pnorm((Z-est.psi)/se.psi) } s2<- if(inherits(object, "glm")) summary.glm(object)$dispersion else summary.lm(object)$sigma^2 w<-object$weights if(is.null(w)) w<-1 v<-s2*solve(crossprod(X*sqrt(w))) return(v) } else { if(inherits(object, "Arima")){ v<-object$var.coef return(v) } if(inherits(object, "glm")){ if(var.diff) warning("option 'var.diff=TRUE' ignored with 'glm' objects", call.=FALSE) so <- summary.glm(object, correlation = FALSE, ...) v<-so$dispersion * so$cov.unscaled return(v) } if(inherits(object, "lm")){ if(var.diff){ if(length(object$nameUV$Z)>1) { var.diff<-FALSE warning("var.diff set to FALSE with multiple segmented variables", call.=FALSE) } v<-summary.segmented(object, var.diff=TRUE, correlation = FALSE, ...)$cov.var.diff } else { so<-summary.segmented(object, var.diff=FALSE, correlation = FALSE, ...) v<-so$sigma^2 * so$cov.unscaled #object$cov.unscaled.is } return(v) } else { #in tutti gli altri casi.. if(class(object)[1]=="segmented") class(object)<-class(object)[-1] v<-vcov(object) #paste("vcov.",class(object),sep="") return(v) } } #end else is } #end fn segmented/R/plot.segmented.r0000644000176200001440000004712614616154421015557 0ustar liggesusersplot.segmented<-function (x, term, add = FALSE, res = FALSE, conf.level = 0, interc=TRUE, link = TRUE, res.col = grey(.15, alpha = .4), rev.sgn = FALSE, const = NULL, shade=FALSE, rug=!add, dens.rug=FALSE, dens.col = grey(0.8), transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, .coef=NULL, prev.trend=FALSE, smoos=NULL, hide.zeros=FALSE, leg="topleft", psi.lines=FALSE, ...){ #put leg=NA if you do not want the legend.. #funzione plot.segmented che consente di disegnare anche i pointwise CI f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #-------------- enl.range<-function(..., enlarge=TRUE){ #modifica il min dei valori in ... r<-range(...) if(enlarge) r[1]<-if(sign(r[1])>0) r[1]*.9 else r[1]*1.1 r } #-------------- #se l'oggetto e' segmented.Arima il nome dell'eventuale interc va sostituito.. #if((all(class(x)==c("segmented", "Arima")))) names(x$coef)<-gsub("intercept", "(Intercept)", names(coef(x))) if(all(c("segmented", "Arima") %in% class(x))) names(x$coef)<-gsub("intercept", "(Intercept)", names(x$coef)) covv <- if(is.null(.vcov)) vcov(x, is=is, var.diff=var.diff) else .vcov if(!is.null(.coef)) { estcoef<- .coef } else { estcoef <- coef(x) if(is.null(estcoef)) estcoef <- x$coef if(is.null(estcoef)) stop("No coeffs in the fit? Please use '.coef'") } if(length(estcoef)==0) stop("No coefficient in the object fit?") #browser() if(!all(dim(covv)==c(length(estcoef), length(estcoef)))) stop("dimension of cov matrix and estimated coeffs do not match", call. = FALSE) #-------------- linkinv <- !link if (inherits(x, what = "glm", which = FALSE) && linkinv && !is.null(x$offset) && res) stop("residuals with offset on the response scale?") if(conf.level< 0 || conf.level>.9999) stop("meaningless 'conf.level'") if ((inherits(x, what = "glm", which = FALSE) && linkinv) || res) { if(!(identical(transf, I) || identical(transf, "I"))) {transf<-I; warning("'transf' set to I with 'res=TRUE' and/or 'link=FALSE'.")} } if(missing(term)) { if (length(x$nameUV$Z) > 1) { stop("please, specify `term'") } else { term <- x$nameUV$Z } } else { #browser() if(is.numeric(term)) term <- x$nameUV$Z[term] #if(!is.character(term)) stop("please specify correctly 'term' ") #term<- deparse(substitute(term)) #if(dterm %in% x$nameUV$Z) term<-dterm if (!isTRUE(all(term %in% x$nameUV$Z))) stop(paste("Unknown term. It should be numeric or one of: ", paste(" '", x$nameUV$Z, "' ", sep="", collapse=""))) } if(length(term)>1){ opz<-list(...) cols<- if(!is.null(opz$col)) opz$col else 1:length(term)+1 cols <- rep(cols, l=length(term)) res.cols<- rep(res.col, l=length(term)) lwds<- if("lwd"%in% names(opz)) opz$lwd else 2 lwds<- rep(lwds, l=length(term)) ltys<- if("lty"%in% names(opz)) opz$lty else 1 ltys<- rep(ltys, l=length(term)) cexs<- if("cex"%in% names(opz)) opz$cex else .75 cexs<- rep(cexs, l=length(term)) pchs<- if("pch"%in% names(opz)) opz$pch else 19 pchs<- rep(pchs, l=length(term)) if(!is.null(opz$ylim)) { Ylim <- opz$ylim } else { if(inherits(x, "glm")){ if(link){ Ylim <- if(!res) range(x$linear.predictors) else range(x$linear.predictors+x$residuals) } else { Ylim <- if(!res) range(x$fitted.values) else range(x$fitted.values+ residuals(x, "response")) } } else { Ylim <- if(!res) range(x$fitted.values) else range(x$fitted.values+x$residuals) } } Ylab <- if(!is.null(opz$ylab)) opz$ylab else paste(formula(x))[2] idTerm <- if(is.numeric(term)) term else match(term, x$nameUV$Z) nomeX <- intersect(strsplit(x$nameUV$Z,":")[[idTerm[1]]], unlist(strsplit(x$nameUV$Z,":")[idTerm[-1]])) Xlab <- if(!is.null(opz$xlab)) opz$xlab else nomeX Xlim<- if(!is.null(opz$xlim)) opz$xlim else range(x$model[,nomeX]) int.all<-rep(NA, length(term)) plot.segmented(x, term[1], add = add, res = res, conf.level = conf.level, interc=interc, link = link, res.col = res.cols[1], rev.sgn = rev.sgn, const = const, shade=shade, rug=FALSE, dens.rug=FALSE, dens.col = grey(0.8), transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, .coef=NULL, prev.trend=FALSE, smoos=NULL, hide.zeros=TRUE, col=cols[1], ylim=Ylim, xlim=Xlim, ylab=Ylab,xlab=Xlab, lty=ltys[1],pch=pchs[1],lwd=lwds[1],cex=cexs[1]) Term<- if(is.numeric(term[1])) x$nameUV$Z[term[1]] else term[1] int.all[1]<-interc.gr<- strsplit(Term, ":")[[1]][2] points.segmented(x, term[1], col=cols[1], const=estcoef[interc.gr], v=psi.lines, pch=20, link=link) for(j in 2:length(term)){ plot.segmented(x, term[j], add = TRUE, res = res, conf.level = conf.level, interc=interc, link = link, res.col = res.cols[j], rev.sgn = rev.sgn, const = const, shade=shade, rug=FALSE, dens.rug=FALSE, dens.col = grey(0.8), transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, .coef=NULL, prev.trend=FALSE, smoos=NULL, hide.zeros=TRUE,col=cols[j], lty=ltys[j],pch=pchs[j],lwd=lwds[j],cex=cexs[j]) Term<- if(is.numeric(term[j])) x$nameUV$Z[term[j]] else term[j] int.all[j]<-interc.gr<- strsplit(Term, ":")[[1]][2] points.segmented(x, term[j], col=cols[j], const = estcoef[interc.gr], v=psi.lines, pch=20, link=link) } if(!is.na(leg)) { legend(leg, int.all, col=cols, lty=1, lwd=1.5, bty="n") } } else { if(is.null(const)){ interc.gr<- strsplit(term, ":")[[1]][2] const<- estcoef[interc.gr] if(is.na(const)) const<-0 } if(!is.numeric(const)) stop(" 'const' should be NULL (default) or numeric") opz <- list(...) col.shade<-if(!is.null(opz$col.shade)) adjustcolor(opz$col.shade, .15) else adjustcolor("grey", .4) cols<- if("col"%in% names(opz)) opz$col else 2 lwds<- if("lwd"%in% names(opz)) opz$lwd else 2 ltys<- if("lty"%in% names(opz)) opz$lty else 1 cexs<- if("cex"%in% names(opz)) opz$cex else .75 pchs<- if("pch"%in% names(opz)) opz$pch else 19 ylabs<- if("ylab"%in% names(opz)) opz$ylab else paste("Effect of ", term, sep = " ") xlabs<- if("xlab"%in% names(opz)) opz$xlab else term a <- intercept(x, term, digits=20, .vcov=covv, .coef=estcoef)[[1]][, "Est."] #Poiche' intercept() restituisce quantita' che includono sempre l'intercetta del modello, questa va eliminata se interc=FALSE idInterc<-grep("ntercept",names(estcoef)) if(!interc && length(idInterc)==1) a<- a-estcoef[idInterc] b <- slope(x, term, digits=20, .coef=estcoef, .vcov=covv)[[1]][, "Est."] #browser() id <- f.U(rownames(x$psi), term) est.psi <- x$indexU[[term]] val <- sort(c(est.psi, x$rangeZ[, term])) #vettorializza i cols, lwds, ltys cols<-rep(cols, l=length(est.psi)+1) lwds<-rep(lwds, l=length(est.psi)+1) ltys<-rep(ltys, l=length(est.psi)+1) #---------aggiunta per gli IC rangeCI<-NULL vall<-sort(c(seq(min(val), max(val), l=100), est.psi, est.psi+1e-5)) #ciValues<-predict.segmented(x, newdata=vall, se.fit=TRUE, type=tipo, level=conf.level) vall.list<-list(vall) names(vall.list)<-term if(conf.level>0) { k.alpha<- if(all(c("segmented","lm") %in% class(x))) abs(qt((1-conf.level)/2, x$df.residual)) else abs(qnorm((1-conf.level)/2)) ciValues<-broken.line(x, vall.list, link=link, interc=interc, se.fit=TRUE, isV=isV, is=is, var.diff=var.diff, p.df=p.df, .vcov=covv, .coef=estcoef) #se gli passi covv, gli argomenti is e var.diff NON servono perche li ignora.. ciValues<-cbind(ciValues$fit, ciValues$fit- k.alpha*ciValues$se.fit, ciValues$fit + k.alpha*ciValues$se.fit) + const #---> transf... ciValues<-apply(ciValues, 2, transf) rangeCI<-range(ciValues) #ciValues e' una matrice di length(val)x3. Le 3 colonne: stime, inf, sup #polygon(c(vall, rev(vall)), c(ciValues[,2],rev(ciValues[,3])), col = "gray", border=NA) } #--------- a.ok <- c(a[1], a) b.ok <- c(b[1], b) y.val <- a.ok + b.ok * val + const a.ok1 <- c(a, a[length(a)]) b.ok1 <- c(b, b[length(b)]) y.val <- y.val1 <- a.ok1 + b.ok1 * val + const s <- 1:(length(val) - 1) if(rev.sgn) val <- -val m <- cbind(val[s], y.val1[s], val[s + 1], y.val[s + 1]) #xvalues <- if(all(c("segmented", "Arima") %in% class(x))) x$Z[,1] else model.matrix(x)[,term] #x$model[, term] #browser() if(res || dens.rug || rug){ if(inherits(x,"Arima")){ xvalues <-x$Z[,1] } else { M <- model.matrix.segmented(x) #il 18/4/24 mi sono accorto che con ogg ottenuti da segmented.* con leftmost pendenza nulla non funzionava #perche' model.matrix.segmented non restituiva la variabile (non inserita nel modello (g)lm di partenza..) if(!term %in% colnames(M) && term%in%names(x$model)) M<-cbind(M, x$model[,term,drop=FALSE] ) if(term %in% colnames(M)) { xvalues <- M[,term] } else { id.segTerm<-which(sapply(names(x$nameUV$formulaSeg), function(.x) startsWith(term,.x))) xvalues <- model.matrix(x$nameUV$formulaSeg[[id.segTerm]], data=x$model)[,term] } } if(rev.sgn) xvalues <- -xvalues } #browser() if(res){ new.d<-data.frame(ifelse(rep(rev.sgn, length(xvalues)),-xvalues, xvalues)) names(new.d)<-term fit0 <- broken.line(x, new.d, link = link, interc=interc, se.fit=FALSE, .vcov=covv, .coef=estcoef)$fit } #------------------------------------------------------------------------------- if (inherits(x, what = "glm", which = FALSE) && linkinv) { #se GLM con link=FALSE (ovvero linkinv=TRUE) fit <- if (res) #predict.segmented(x, ifelse(rep(rev.sgn, length(xvalues)),-xvalues,xvalues), type=tipo) + resid(x, "response") + const #broken.line(x, term, gap = show.gap, link = link) + resid(x, "response") + const fit0 + resid(x, "response") + const else x$family$linkinv(c(y.val, y.val1)) xout <- sort(c(seq(val[1], val[length(val)], l = 50), val[-c(1, length(val))], pmax(val[-c(1, length(val))]*1.0001, val[-c(1, length(val))]*.9999))) l <- suppressWarnings(approx(as.vector(m[, c(1, 3)]), as.vector(m[, c(2, 4)]), xout = xout)) val[length(val)]<- if(rev.sgn) min(l$x) else max(l$x) #aggiunto 11/09/17.. if else il 9/3/21 id.group <- cut(l$x, val, labels=FALSE, include.lowest =TRUE, right=TRUE) #xout <- sort(c(seq(val[1], val[length(val)], l = 150), val[-c(1, length(val))],val[-c(1, length(val))]*1.0001)) #l <- suppressWarnings(approx(as.vector(m[, c(1, 3)]), as.vector(m[, c(2, 4)]), xout = xout)) #val[length(val)]<-max(l$x) #aggiunto 11/09/17 #id.group <- cut(l$x, val, FALSE, TRUE) yhat <- l$y xhat <- l$x m[, c(2, 4)] <- x$family$linkinv(m[, c(2, 4)]) if (!add) { plot(as.vector(m[, c(1, 3)]), as.vector(m[, c(2, 4)]), type = "n", xlab = xlabs, ylab = ylabs, main = opz$main, sub = opz$sub, cex.axis = opz$cex.axis, cex.lab = opz$cex.lab, xlim = opz$xlim, ylim = if(is.null(opz$ylim)) enl.range(fit, rangeCI, enlarge=dens.rug) else opz$ylim ) if(dens.rug){ density <- density(xvalues) # the height of the densityity curve max.density <- max(density$y) # Get the boundaries of the plot to # put the density polygon at the x-line plot_coordinates <- par("usr") # get the "length" and range of the y-axis y.scale <- plot_coordinates[4] - plot_coordinates[3] # transform the y-coordinates of the density # to the lower 10% of the plotting panel density$y <- (0.1 * y.scale / max.density) * density$y + plot_coordinates[3] ## plot the polygon polygon( density$x , density$y , border = FALSE , col = dens.col) box() } if(rug) { #usare rug()? segments(xvalues, rep(par()$usr[3],length(xvalues)), xvalues, rep(par()$usr[3],length(xvalues))+ abs(diff(par()$usr[3:4]))/80) } } if (res) { if(hide.zeros) { fit <- fit[abs(xvalues)>1e-8] xvalues <- xvalues[abs(xvalues)>1e-8] } if(is.null(smoos)) { smoos <- if(length(xvalues)>10000) TRUE else FALSE } if(smoos){ smoothScatter(xvalues, fit, add=TRUE, nrpoints = 0, colramp= colorRampPalette(c("white", res.col))) } else { points(xvalues, fit, cex = cexs, pch = pchs, col = res.col) } } if(conf.level>0){ if(rev.sgn) vall<- -vall if(shade) { polygon(c(vall, rev(vall)), c(ciValues[,2],rev(ciValues[,3])), col = col.shade, border=NA) } else { #browser() id.group1 <- cut(vall, val, labels=FALSE, include.lowest =TRUE, right=TRUE) #serve per gli IC.. for (i in 1:max(id.group1)) matlines(vall[id.group1 == i], ciValues[id.group1 == i,-1], type="l", lty=2, col=cols[i]) #matlines(vall, ciValues[,-1], type="l", lty=2, col=cols) } } yhat <- x$family$linkinv(yhat) if (length(cols) == 1) cols <- rep(cols, max(id.group)) if (length(lwds) == 1) lwds <- rep(lwds, max(id.group)) if (length(ltys) == 1) ltys <- rep(ltys, max(id.group)) for (i in 1:max(id.group)) { lines(xhat[id.group == i], yhat[id.group == i], col = cols[i], lwd = lwds[i], lty = ltys[i]) if(prev.trend) lines(xhat[xhat>est.psi[i]], x$family$linkinv((a[i]+b[i]*xhat)[xhat>est.psi[i]]), col=cols[i], lwd = lwds[i]*.65, lty = 2) } #------------------------------------------------------------------------------- } else { #se LM o "GLM con link=TRUE (ovvero linkinv=FALSE)" ##---> transf!!! y.val<- do.call(transf, list(y.val)) y.val1<-do.call(transf, list(y.val1)) r <- cbind(val, y.val) r1 <- cbind(val, y.val1) rr <- rbind(r, r1) fit <- c(y.val, y.val1) if (res) { ress <- if (inherits(x, what = "glm", which = FALSE)) residuals(x, "working") #* sqrt(x$weights) mgcv::gam() usa " ..*sqrt(x$weights)/mean(sqrt(x$weights))" else resid(x) #if(!is.null(x$offset)) ress<- ress - x$offset #fit <- broken.line(x, term, gap = show.gap, link = link, interc = TRUE) + ress + const #fit <- predict.segmented(x, ifelse(rep(rev.sgn, length(xvalues)),-xvalues,xvalues), type=tipo) + ress + const fit <- fit0 + ress + const } if (!add) plot(rr, type = "n", xlab = xlabs, ylab = ylabs, main = opz$main, sub = opz$sub, xlim = opz$xlim, cex.axis = opz$cex.axis, cex.lab = opz$cex.lab, #ylim = if(is.null(opz$ylim)) enl.range(fit, rangeCI, enlarge=dens.rug) else opz$ylim) ylim = if(is.null(opz$ylim)) enl.range(fit, rangeCI, do.call(transf, list(m[, c(2,4)])), enlarge=dens.rug) else opz$ylim) if(dens.rug){ density <- density(xvalues) # the height of the densityity curve max.density <- max(density$y) # Get the boundaries of the plot to # put the density polygon at the x-line plot_coordinates <- par("usr") # get the "length" and range of the y-axis y.scale <- plot_coordinates[4] - plot_coordinates[3] # transform the y-coordinates of the density # to the lower 10% of the plotting panel density$y <- (0.1 * y.scale / max.density) * density$y + plot_coordinates[3] ## plot the polygon polygon(density$x , density$y , border = F , col = dens.col) box() } if(rug) {segments(xvalues, rep(par()$usr[3],length(xvalues)), xvalues, rep(par()$usr[3],length(xvalues))+ abs(diff(par()$usr[3:4]))/80)} if (res) { if(hide.zeros) { fit <- fit[abs(xvalues)>1e-8] xvalues <- xvalues[abs(xvalues)>1e-8] } if(is.null(smoos)) { smoos <- if(length(xvalues)>10000) TRUE else FALSE } if(smoos){ smoothScatter(xvalues, fit, add=TRUE, nrpoints = 0, colramp= colorRampPalette(c("white", res.col))) } else { #browser() points(xvalues, fit, cex = cexs, pch = pchs, col = res.col) } } if(rev.sgn) vall<- -vall if(conf.level>0) { if(shade) { polygon(c(vall, rev(vall)), c(ciValues[,2],rev(ciValues[,3])), col = col.shade, border=NA) } else { #infittire vall, soprattutto in prossimita' dei psi? id.group1 <- cut(vall, val, labels=FALSE, include.lowest =TRUE, right=TRUE) #serve per gli IC.. for (i in 1:max(id.group1)) matlines(vall[id.group1 == i], ciValues[id.group1 == i,-1], type="l", lty=2, col=cols[i]) #VECCHIO: matlines(vall, ciValues[,-1], type="l", lty=2, col=cols) } } #aggiunto 06/2019 perche' sotto disegnava linee (e non curve) # segments(m[, 1], do.call(transf, list(m[, 2])), m[, 3], do.call(transf, list(m[, 4])), # col = cols, lwd = lwds, lty = ltys) #--- # modificato 8/2/21.. adesso le linee si uniscono sempre. #.. con valori tipo 2010 (date), non si uniscono.. #comunque vall ha piu' valori di xout, quindi e' sufficiente assegnare xout<-vall (01/10/2021) #xout <- sort(c(seq(val[1], val[length(val)], l = 50), val[-c(1, length(val))], # pmax(val[-c(1, length(val))]*1.0001, val[-c(1, length(val))]*.9999))) #if(rev.sgn) vall<- -vall xout <- vall l <- suppressWarnings(approx(as.vector(m[, c(1, 3)]), as.vector(m[, c(2, 4)]), xout = xout)) val[length(val)]<- if(rev.sgn) min(l$x) else max(l$x) #aggiunto 11/09/17; messo il if .. else 9/3/21 #id.group <- cut(l$x, val, labels=FALSE, include.lowest =TRUE, right=TRUE) id.group <- cut(vall, val, labels=FALSE, include.lowest =TRUE, right=TRUE) #e' come id.group1 #--- xhat <- l$x yhat <- l$y yhat <- do.call(transf, list(yhat)) #transf(yhat) if (length(cols) == 1) cols <- rep(cols, max(id.group)) if (length(lwds) == 1) lwds <- rep(lwds, max(id.group)) if (length(ltys) == 1) ltys <- rep(ltys, max(id.group)) for (i in 1:max(id.group)) { lines(xhat[id.group == i], yhat[id.group == i], col = cols[i], lwd = lwds[i], lty = ltys[i]) #if(conf.level>0 && !shade) matlines(vall[id.group1 == i], ciValues[id.group1 == i,-1], type="l", lty=2, col=cols[i]) if(prev.trend) lines(xhat[xhat>est.psi[i]], (a[i]+b[i]*xhat)[xhat>est.psi[i]], col=cols[i], lwd = lwds[i]*.65, lty = 2) } # if(prev.trend){ # for(i in 1:(length(est.psi)+1)) lines(xhat[xhat>est.psi[i]], a[i]+b[i]*xhat)[xhat>est.psi[i]], col=cols[i], lwd = lwds[i]*.7, lty = 2) # } } invisible(NULL) } } segmented/R/coef.stepmented.r0000644000176200001440000000060614604750473015710 0ustar liggesuserscoef.stepmented <-function(object, include.psi=TRUE, ...){ #browser() b<- object$coefficients #solo coeffs lineari (senza psi) if(!all(match( colnames(object$psi.rounded), names(b), 0)>0)) { psi<- object$psi.rounded[1,] names(psi)<-colnames(object$psi.rounded) b <- c(b, psi) } if(!include.psi) { b[match( colnames(object$psi.rounded), names(b), 0)] <-0 } b } segmented/R/vcov.segmented.lme.R0000644000176200001440000000674214415477002016271 0ustar liggesusersvcov.segmented.lme <-function(object, B=0, ret.b=FALSE, ...){ bootNP<-function(fit, B=50, seed=NULL, it.max.b=6){ #Non parametric boot for slme4 #fit: un oggetto di classe "segmented.lme" #----------------------- update.lme.call<-function (old.call, fixed., ..., evaluate=FALSE) { call <- old.call extras <- match.call(expand.dots = FALSE)$... if (!missing(fixed.)) call$fixed <- update.formula(call$fixed, fixed.) if (length(extras) > 0) { existing <- !is.na(match(names(extras), names(call))) for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if (any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (evaluate) eval(call, parent.frame()) else call } #--------- if(is.null(B) || B<=0) stop("'B>0' is requested") newData<-fit$lme.fit$data rnfGrp<- fit$lme.fit.noG$groups if(ncol(rnfGrp)>1) warning("the innermost grouping variable is used", call. = FALSE) nome.id <-names(rnfGrp)[ncol(rnfGrp)] #name of the innermost grouping variable var.id<-newData[, nome.id] #idLevels<-levels(var.id) idLevels<-levels(fit$lme.fit$groups[[ncol(rnfGrp)]]) N<-nlevels(fit$lme.fit$groups[[ncol(rnfGrp)]]) #n. of "subjects" nomeRispo<-all.vars(formula(fit$lme.fit))[1] #AGGIUSTA la risposta newData[,nomeRispo]<-newData[,nomeRispo] + fit$Off o.b<-fit$boot.call call.b<-update(object=fit, obj=o.b, data=newD, it.max=it.max.b, start=list(kappa0=startKappa0,kappa=startingKappa), display=FALSE, evaluate=FALSE) call.b$random <- fit$randomCALL startingKappa<-extract.psi(fit) startKappa0<- startingKappa[1] startingKappa<-startingKappa[-1] nomiKappa<-names(startingKappa) nomiKappa<-sapply(strsplit(nomiKappa, "G\\."),function(x)x[2]) names(startingKappa) <- nomiKappa est<-fixef(fit$lme.fit) se<-sqrt(diag(vcov(fit$lme.fit))) fitt<-fitted.segmented.lme(fit, level=0) COEF<-SE<-matrix(,B,length(est)) FIT<-matrix(,B,length(fitt)) if(!is.null(seed)) set.seed(seed) for(i in seq(B)){ #build the boot sample #idx<-sample(N, replace=TRUE) #idx<-sample(1:N, size=N, replace=TRUE) #idx<-levels(fit$lme.fit$groups[[1]])[idx] #newD<-do.call("rbind",lapply(idx, function(x)newData[newData$id==x,])) #newD$y.b<- newD$y idx<-sample(idLevels, size=N, replace=TRUE) newD <- do.call("rbind",lapply(idx, function(x)newData[newData[,nome.id]==x,])) newD$y.b<- newD[,nomeRispo] fit.b<-try(suppressWarnings(eval(call.b)), silent=TRUE) #envir=newD) if(is.list(fit.b)){ Tt<- summary(fit.b[[1]])$tTable #usa summary.lme COEF[i,]<-Tt[,1] #coef SE[i,]<-Tt[,2] #se FIT[i,]<-fitted.segmented.lme(fit.b, level=0) } } r<-list(coef=rbind(est,COEF),se=rbind(se,SE), fitted=rbind(fitt,FIT)) r } #-------------------- extract.psi<-function(obj){ #questa funzione restituisce i "kappa", ovvero i coeff di psi.. nomiG<-obj$namesGZ$nomiG b<-fixef(obj[[1]])[c("G0",nomiG)] b } #-------------------- opz<-list(...) opz$fit <- object opz$B <- B if(B<=0) { r <- object$lme.fit$varFix } else { #browser() r <- do.call(bootNP, opz) #bootNP(object, B= B, seed=opz$seed, opz$it.max.b=6) if(!ret.b) r<- var(r$coef[-1,]) } return(r) } segmented/R/slope.R0000644000176200001440000003442514703703717013714 0ustar liggesusers`slope` <- function(ogg, parm, conf.level=0.95, rev.sgn=FALSE, APC=FALSE, .vcov=NULL, .coef=NULL, use.t=NULL, by=NULL, interc=TRUE, level=0, ..., digits = max(4, getOption("digits") - 2)){ # se e' un "newsegmented" # if(!is.null(ogg$R.slope)) { # covv<-old.coef.var(ogg) # ogg$coefficients<- covv$b # covv<- covv$cov # ogg$psi<-old.psi(ogg) # ogg$nameUV<-old.nomi(ogg) # } else { # covv<-try(vcov(ogg,...), silent=TRUE) # } slopeM<-function(obj, by=NULL, conf.level=0.95, vcov.=NULL, level=level, ... ){ #=========>> da provare con by con piu' termini e se leftSlope=0 #obj: the segmented.lme object #by: a named list indicating covariate names and corresponding values affecting the fitted segmented relationship. # Example: by=list(group="2",z2=.2) #conf.level: for pointwise CI.. #withI: if TRUE, the fitted lines are plotted with intercept (if included in the model) #vcov.: the fixed effect cov matrix. If NULL is computed by vcov.segmented.lme #drop.var: possible coefficient names to be removed before computing the segmented relationship (E.g. the group-specific intercept..) #obj[[1]] -> obj$lme.fit #obj[[2]] -> obj$lme.fit.noG #------------------ V<- if(is.null(vcov.)) vcov.segmented.lme(obj) else vcov. #object$lme.fit$varFix if(!is.null(by) && !is.list(by)) stop("if provided, 'by' should be a (named) list of scalars") Z<-obj$Z nomeZ<-obj$namesGZ$nameZ beta.noG<- fixef(obj$lme.fit.noG) beta.all<-fixef(obj$lme.fit) #browser() beta.G<-beta.all[setdiff(names(fixef(obj$lme.fit)), names(beta.noG))] nomiCoef<-names(beta.noG) if(!is.null(by)) { a<-by #isZero<-sapply(a, function(x) x==0) if(!all(sapply(a, length)==1)) stop("vectors in 'by' are not allowed") nomiOK<-const<-idList<-vector("list", length(a)) values<-vector(,length(a)) for(i in 1:length(a)) { nomiOK[i]<-nomeOK <- if(is.character(a[[i]])) paste(names(a[i]),a[[i]], sep="") else names(a[i]) #replace 0 if(a[[i]]==0) a[[i]]<- 1e-16 #per la left slope bLeftSlope<-c(beta.noG[paste(obj$namesGZ$nameZ,":",nomeOK, sep="")], beta.noG[paste(nomeOK, ":", obj$namesGZ$nameZ, sep="")]) bLeftSlope<-bLeftSlope[!is.na(bLeftSlope)] if(!is.character(a[[i]])) bLeftSlope<-bLeftSlope*a[[i]] if(length(bLeftSlope)<=0) bLeftSlope<-NA #per la slope-diff bU<-beta.noG[paste("U", nomeOK, sep=".")] if(!is.character(a[[i]])) bU <-bU*a[[i]] const[[i]]<-c(bLeftSlope, bU) const[[i]]<- ifelse(is.na(const[[i]]),0,const[[i]]) idList[[i]]<-names(c(bLeftSlope, bU)) values[i]<-ifelse(is.character(a[[i]]),1,a[[i]]) } #browser() const<-matrix(unlist(const),2, byrow=FALSE) colnames(const)<-names(by) nomiNOdiff <- names(which(colSums(const)==0)) if(length(nomiNOdiff)>0) warning("The value of", paste(" '", paste(nomiNOdiff, collapse=" and "),"' ",sep=""), "supplied in 'by' does not modify the baseline slopes estimates", call. = FALSE) nomiCoef<- c(nomeZ, "U", unlist(idList)) ########################################## } else { #se 'by' e' NULL const<-matrix(0,2,1) nomiCoef<- c(nomeZ, "U") values<-c(1,1) } ########################################## #browser() final.names<-setdiff(nomiCoef, c("G0",obj$namesGZ$nomiG,"")) final.names<-final.names[!is.na(final.names)] #prepara la matrice del disegno.. X<-matrix(1, 2, 2) #SOLO un breakpoint, quindi 2 segmenti.. X[row(X)0){ re<-ranef(obj[['lme.fit.noG']]) re.left<- if(obj$namesGZ$nameZ%in%colnames(re)) re[,obj$namesGZ$nameZ] else rep(0, nrow(re)) re.right<- if("U"%in%colnames(re)) re.left+re[,"U"] else re.left ris<- data.frame("leftSlope"=re.left+ris["leftSlope","Est."], "rightSlope"=re.right+ris["rightSlope","Est."]) rownames(ris)<-rownames(re) } ris } #se metti la linea sotto, poi NON funziona con oggetti che provengono da segmented.default #if(!inherits(ogg, "stepmented") || !inherits(ogg, "segmented") || !inherits(ogg, "segmented.lme")) stop("slope() works only for..") if(inherits(ogg, "stepmented")){ covv <- if(is.null(.vcov)) vcov(ogg, ...) else .vcov estcoef<- if(is.null(.coef)) coef(ogg) else .coef if(length(estcoef)==0) stop("No coefficient in the object fit?") if(!all(dim(covv)==c(length(estcoef), length(estcoef)))) stop("dimension of cov matrix and estimated coeffs do not match", call. = FALSE) nomepsi<-rownames(ogg$psi) #OK nomeU<-ogg$nameUV$U nomeZ<-ogg$nameUV$Z if(missing(parm)) { nomeZ<- ogg$nameUV$Z } else { if(! all(parm %in% ogg$nameUV$Z)) { stop("invalid parm") } else {nomeZ<-parm} } nomi<-names(estcoef) index<-vector(mode = "list", length = length(nomeZ)) for(i in 1:length(nomeZ)) { idU <- ogg$nameUV$U[endsWith(ogg$nameUV$U, paste(".", nomeZ[i], sep = ""))] if (interc && "(Intercept)" %in% nomi) idU <- c("(Intercept)", idU) index[[i]]<- idU } if(!is.null(use.t)){ k0<- if(use.t) abs(qt((1-conf.level)/2,df=ogg$df.residual)) else abs(qnorm((1-conf.level)/2)) } else { k0<-if(inherits(ogg, "lm", which=TRUE)==2) abs(qt((1-conf.level)/2,df=ogg$df.residual)) else abs(qnorm((1-conf.level)/2)) } #k<-if("lm"%in%class(ogg)) abs(qt((1-conf.level)/2,df=ogg$df.residual)) else abs(qnorm((1-conf.level)/2)) Ris<-list() #digits <- max(3, getOption("digits") - 3) for(i in 1:length(index)){ #ind<-as.numeric(na.omit(unlist(index[[i]]))) ind<- match(na.omit(unlist(index[[i]])),nomi) M<-matrix(1,length(ind),length(ind)) M[row(M)2 && (inherits(ogg, "segmented") || inherits(ogg, "segmented.lme"))){ if(class(ogg)[1]=="segmented.lme"){ a<-slopeM(ogg, conf.level=conf.level, vcov.=.vcov, by=by, level=level, ...) return(a) } else { covv <- if(is.null(.vcov)) vcov(ogg, ...) else .vcov if(is.null(.coef)) { estcoef<- coef(ogg) if(is.null(estcoef)) estcoef <- ogg$coef if(is.null(estcoef)) stop("No coeffs in the fit? Please use '.coef'") } else { estcoef<- .coef } #browser() if(length(estcoef)==0) stop("No coefficient in the object fit?") if(!all(dim(covv)==c(length(estcoef), length(estcoef)))) stop("dimension of cov matrix and estimated coeffs do not match", call. = FALSE) nomepsi<-rownames(ogg$psi) #OK nomeU<-ogg$nameUV$U nomeZ<-ogg$nameUV$Z if(missing(parm)) { nomeZ<- ogg$nameUV$Z if(length(rev.sgn)==1) rev.sgn<-rep(rev.sgn,length(nomeZ)) } else { if(! all(parm %in% ogg$nameUV$Z)) { stop("invalid parm") } else {nomeZ<-parm} } if(length(rev.sgn)!=length(nomeZ)) rev.sgn<-rep(rev.sgn, length.out=length(nomeZ)) nomi<-names(estcoef) index<-vector(mode = "list", length = length(nomeZ)) for(i in 1:length(nomeZ)) { index[[i]]<- match(c(nomeZ[i],ogg$nameUV$U[grep(nomeZ[i], ogg$nameUV$U)]), names(estcoef),0) n.psi.est.i <- length(ogg$nameUV$V[grep(nomeZ[i], ogg$nameUV$V)]) if(length(ogg$indexU[[nomeZ[i]]])!=n.psi.est.i){ #se ci sono anche psi fissi if(!is.null(ogg$constr)){ stop("slope() does not work with constraints and fixed psi") } else { index[[i]]<-match(c(nomeZ[i], names(ogg$indexU[[nomeZ[i]]])), nomi, 0) } } } if(!is.null(use.t)){ k0<- if(use.t) abs(qt((1-conf.level)/2,df=ogg$df.residual)) else abs(qnorm((1-conf.level)/2)) } else { k0<-if(inherits(ogg, "lm", which=TRUE)==2) abs(qt((1-conf.level)/2,df=ogg$df.residual)) else abs(qnorm((1-conf.level)/2)) } #k<-if("lm"%in%class(ogg)) abs(qt((1-conf.level)/2,df=ogg$df.residual)) else abs(qnorm((1-conf.level)/2)) Ris<-list() #digits <- max(3, getOption("digits") - 3) rev.sgn<-rep(rev.sgn, length.out=length(nomeZ)) #browser() for(i in 1:length(index)){ ind<- index[[i]] #as.numeric(unlist(index[[i]])) ind<-ind[ind!=0] cof<-estcoef[ind] if(is.null(ogg$constr)){ M<-matrix(1,length(ind),length(ind)) M[row(M) y range for all # subjects xscale=1 => x range for all subjects # ...: arguments to pass to plotSegLme(), especially col.l, lwd.l, lty.l for the # individual lines, and col.p, lty.p, lwd.p for the population # lines (provided 'pop=T'). NB 'col' refers to points (provided that 'res=TRUE') # col, col.l and col.p may be *vectors* (will be recycled) # lines: if FALSE, points (rather than lines) are plotted (useful if the segmented profile depends on # additional covariates and cannot be displayed) x11() #quartz()? #vline: if TRUE, a (dashed) vertical line is drawn to emphasize the individual breakpoint. #======================================================================== return.psi <- function(obj, level) { # restituisce predizioni dei RE cahngepoints per diversi livelli # di nested. NON completa #-------------------- fn.re <- function(obj) { # restituisce un array n x n.ranef x terms n ? il n. totale # delle misurazioni.. n.ranef ? il n. dei random effects # (tipicamente ? 1, >1 con nested..) terms ? il n. dei # termini coinvolti nei random effects (ad es., intercept, x # ..) ro <- ranef(obj) n.levels <- ncol(obj$groups) #n. dei livelli casuali (ad es., se nested..) if (n.levels <= 1) { ro <- list(ro) names(ro) <- names(obj$groups) } nomi.levels <- names(obj$groups) #nomi degli effetti casuali names(ranef(obj)) n.terms <- sapply(ro, ncol) nomiTermini <- unique(as.vector(unlist(sapply(ro, colnames)))) tutti <- array(0, c(nrow(obj$groups), ncol(obj$groups), max(n.terms)), dimnames = list(NULL, names(obj$groups), nomiTermini)) for (nome in nomiTermini) { for (j in nomi.levels) { if (nome %in% names(ro[[j]])) { for (i in unique(obj$groups[, j])) tutti[obj$groups[, j] == i, j, nome] <- ro[[j]][rownames(ro[[j]]) == i, nome] } } } tutti } #--------------------- if (missing(level)) level <- ncol(obj[[2]]$groups) psi <- rowSums(obj$misc$matrix.psi[, 1:(level + 1), drop = FALSE]) if (level == 0) { names(psi) <- NULL } else { if (level < ncol(obj[[2]]$groups)) names(psi) <- sapply(strsplit(names(psi), "/"), function(.x) .x[max(level, 1)]) } return(psi) # obj<-obj[[1]] RE<-fn.re(obj) if(! 'G0' %in% dimnames(RE)[[3]]) # stop('no 'G0' term!') n.ranef<-dim(RE)[2] psi.i<-vector('list', # ncol(obj$groups)) for(j in 1:n.ranef) { valori.sum<- # rowSums(RE[, 1:j,'G0',drop=FALSE]) valori<- tapply(valori.sum, # obj$groups[,j], function(x)x[1]) # names(valori)<-names(tapply(obj$groups[,j], obj$groups[,j], # function(x)x[1])) psi.i[[j]]<-valori } names(psi.i) <- # colnames(obj$groups) attr(psi.i, 'grpNames') <- # attr(ranef(obj), 'grpNames') return(psi.i) kappa0i <- kappa0+ki # etai<-kappa0i if(id.z.psi) { kappa.old<-kappa #length=1 # kappa<-fixed.effects(obj)[nomiG] #esclude G0.. # etai<-etai+drop(Z.psi%*%kappa) } psi.ex<-if(psi.link=='logit') # inv.logit(etai,min.Z,max.Z) else etai } #======================================================================== plotSegLme <- function(obj, id = stop("'id' should be provided"), add = FALSE, res = TRUE, pop = FALSE, yscale = -1, xscale = -1, text.leg = paste("id =", id), pos.leg = NULL, vline = FALSE, xLab, yLab, level, lines = TRUE, opzione = 1, ...) { #line.col=1, res.col=grey(.7), # Simply plots (or adds) the observed data and the segmented fitted lines for subject # 'id' #--- # obj: an object of class 'segmented.lme' id: the subject 'id' add: if FALSE, a new # plot is produced with observations and fitted lines superimposed. res: if TRUE the # observations (partial residuals) are added; otherwise only the fitted lines pop: if # TRUE the population-level estimate of the segmented relationship is added.. yscale # if <0, the y-scale refers to the values of 'id' only; otherwise the overall range # relevant to *all* subjects (useful for comparisons) main: the plot title. It can be # '' leg: if !NULL it can be one of 'top', 'topright',... and the id subject is put # on the plot. vline: if TRUE lines: if FALSE, points (rather than lines) are plotted # (useful if the segmented profile depends on additional covariates and cannot be # displayed) #...: argomenti da passare al plot, compresi 'col.l' e 'lwd.l' 'lty.l' # che servono per le segmented lines individuali e col.p, lty.p, lwd.p che servono # per le linee di pop (se pop=TRUE) Problema: se ci sono nested re i levels dell # innermost factor vengono modificati 'lev1/lev2' (che rappresentano i nomi dei # coef() o ranef) dove lev1 e' il livello dell'altro fattore. Quindi o in 'id' si # specifica il livello costruito 'lev1/lev2' oppure i nomi dei coef() si devono # modificare estraendo solo il valore dopo il '\' rnfGrp <- obj$lme.fit.noG$groups if (missing(level)) level <- ncol(rnfGrp) # con nested funziona ma i valori dei fitted sono 'strani..', cio? dovrebbero essere # comunque una relazione segmented S? ? giusto solo i psi sono diversi... if (level != ncol(rnfGrp)) stop("Currently only the innermost level (i.e., ", ncol(rnfGrp), ") is allowed") if (level > ncol(rnfGrp) || level < 0) stop(" 'level' should be an integer in [0, ", ncol(rnfGrp), "]") # rownames(coef(obj[[2]])) sono the innermost levels nomi <- levels(rnfGrp[, max(1, level)]) #level 0 non funziona if (!(id %in% nomi)) stop("the specified 'id' is not consistent with 'level' (should be ", nomi[1], ",", nomi[2], ",..,", nomi[length(nomi)], ")") # psi<- obj$psi.i[[paste(id)]] in realt? si possono ottenere i psi a diversi livelli # di nested psi <- if (ncol(obj$misc$matrix.psi) <= 1) obj$misc$matrix.psi[, 1] else rowSums(obj$misc$matrix.psi[, 1:(level + 1)]) # se ci sono nested, i nomi di psi saranno sempre con '/' che non e' coerente se # level=1 if (level < ncol(rnfGrp)) { if (ncol(rnfGrp) >= 2) { names(psi) <- sapply(strsplit(names(psi), "/"), function(.x) .x[level]) } } psi <- psi[[paste(id)]] nameID <- names(rnfGrp)[level] y <- resid(obj$lme.fit.noG) + fitted(obj$lme.fit.noG) # if (level < ncol(rnfGrp)) names(y) <- obj$lme.fit.noG$groups[, max(level, 0)] range.ok <- range(y) XY <- cbind(obj$Z, y) x <- XY[names(y) == id, 1] y <- XY[names(y) == id, 2] if (yscale < 0) range.ok <- range(y) #browser() range.ok[1] <- if (sign(range.ok[1]) > 0) range.ok[1] * 0.98 else range.ok[1] * 1.03 range.ok[2] <- if (sign(range.ok[2]) > 0) range.ok[2] * 1.03 else range.ok[2] * 0.98 # x<-obj$Z rangeX.ok <- range(obj$Z) # x<-x[names(obj$Z)==id] if (xscale < 0) rangeX.ok <- range(x) opz <- list(...) #browser() opz$x <- x opz$y <- y #browser() if (is.null(opz$col)) opz$col <- grey(.7) if (is.null(opz$cex)) opz$cex <- 1.5 if (is.null(opz$pch)) opz$pch <- 19 if (is.null(opz$ylim)) opz$ylim <- range.ok if (is.null(opz$xlim)) opz$xlim <- rangeX.ok if (!res) opz$type <- "n" if (is.null(opz$ylab)) opz$ylab<- all.vars(formula(obj[[2]]))[1] if (is.null(opz$xlab)) opz$xlab<- obj$namesGZ$nameZ #if (missing(yLab)) yLab <- "response" #if (missing(xLab)) xLab <- obj$namesGZ$nameZ #opz$ylab <- yLab #opz$xlab <- xLab # set col.p, lwd.p, lty.p for *population* lines (provided pop=TRUE) if (!is.null(opz$p.col)) { p.col <- opz$p.col opz$p.col <- NULL } else { p.col <- 1 } if (!is.null(opz$p.lty)) { lty.p <- opz$p.lty opz$p.lty <- NULL } else { p.lty <- 2 } if (!is.null(opz$p.lwd)) { p.lwd <- opz$p.lwd opz$p.lwd <- NULL } else { p.lwd <- 1.5 } # set col.l, lwd.l, lty.l for *individual* lines (or points if 'lines=FALSE') if (!is.null(opz$l.pch)) { l.pch <- opz$l.pch opz$l.pch <- NULL } else { l.pch <- 3 } if (!is.null(opz$l.lty)) { l.lty <- opz$l.lty opz$l.lty <- NULL } else { l.lty <- 1 } if (!is.null(opz$l.col)) { l.col <- opz$l.col opz$l.col <- NULL } else { l.col <- 1 } if (!is.null(opz$l.lwd)) { l.lwd <- opz$l.lwd opz$l.lwd <- NULL } else { l.lwd <- 2 } if (!is.null(opz$t.col)) { t.col <- opz$t.col opz$t.col <- NULL } else { t.col <- 1 } if (!add) do.call(plot, opz) if (!is.null(pos.leg)){ legg <- if(id.name) paste(nameID, id, sep = " = ") else id legend(pos.leg, legend = legg, bty = "n", text.col=t.col) } ff <- fitted(obj$lme.fit.noG, level = level) mu <- ff[names(ff) == id] #? fitted.segmented.lme(fit,1) m <- cbind(x, mu) m <- m[order(m[, 1]), ] #browser() if (!lines) { points(m, col = l.col, pch = l.pch, lwd = l.lwd) #do.call(points, opz) } else { if (opzione == 1) { mL <- m[m[, 1] <= psi, , drop = FALSE] if (nrow(mL) > 1) { # if(length(unique(round(diff(mL[,1])/diff(mL[,2]), 6)))>1) # stop('Apparently non-segmented profile for unit #', id, '.. try # 'lines=FALSE'\n') fL <- splinefun(mL[, 1], mL[, 2]) f.psi <- fL(psi) } else { mR <- m[m[, 1] >= psi, , drop = FALSE] # if(length(unique(round(diff(mR[,1])/diff(mR[,2]), 6)))>1) # stop('Apparently non-segmented profile for unit #', id,'.. try # 'lines=FALSE'\n') fR <- splinefun(mR[, 1], mR[, 2]) f.psi <- fR(psi) } # lines(c( mL[,1], psi, mR[,1]), c( mL[,2], f.psi, mR[,2]), col=col.l, # lwd=lwd.l, lty=lty.l) lines(c(m[1, 1], psi, m[nrow(m), 1]), c(m[1, 2], f.psi, m[nrow(m), 2]), col = l.col, lwd = l.lwd, lty = l.lty) } else { nomiCoef.ok <- intersect(c("(Intercept)", obj$namesGZ$nameZ, "U"), colnames(coef(obj$lme.fit.noG))) nomiCoef.ok <- c(nomiCoef.ok, obj$namesGZ$nomiUx) coef.ok <- coef(obj$lme.fit.noG, level = level)[id, nomiCoef.ok] psi.ok <- return.psi(obj, level = level)[id] #dipende da eventuali termini fissi in z.psi, e dal livello di nesting. # adesso devi copstruire la matrice del disegno. ######################################################### ### GUARDARE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ######################################################### #browser() xvar<-seq(min(x), max(x), l=100) coef.ok<- as.numeric(coef.ok) if(!is.null(obj$namesGZ$nomiUx)) { coef.ok[3] <- coef.ok[3]+ coef.ok["bUx"]* obj$lme.fit.noG$data[,obj$namesGZ$nomiUx] } mu.ok <- cbind(1, xvar, pmax(xvar-psi.ok,0))%*%coef.ok lines(xvar, mu.ok, col = l.col, lwd = l.lwd, lty = l.lty) # come fare se ci sono variabili U.x? coef.ok include il coef ma c'? bisogno # del valore corrispondente all'unita' id.. # obj$lme.fit.noG$data[,obj$namesGZ$nomiUx] e poi selezionare per l'unita' # 'id' E se ci sono interazioni con intercetta e left slope??? } } if(vline) segments(psi, par()$usr[3], psi, f.psi, lty = 3, col = l.col) #browser() if(attr(obj$psi.i,"is.break")[paste(id)]){ points(psi, par()$usr[3] * 1, pch = "X", col = l.col, cex = 1.2) points(psi, f.psi, pch = "x", col = l.col, cex = 1, lwd=1.5) } #browser() # codici vecchi.. #left side mL<-m[m[,1]<=psi, ,drop=FALSE] fL<-splinefun(mL[,1], # mL[,2]) new.xL<- c(min(mL[,1]), psi) #right side mR<-m[m[,1]>=psi, ,drop=FALSE] # fR<-splinefun(mR[,1], mR[,2]) new.xR<- c(psi, max(mR[,1])) lines(new.xL, # fL(new.xL), col=1, lwd=2) lines(new.xR, fR(new.xR), col=1, lwd=2) if(vline) # segments(psi, par()$usr[3], psi, fR(psi), lty=3, col=1) if (pop) { #browser() # mu<-fitted(obj[[2]])[names(fitted(obj[[2]]))==id] #e' # fitted.segmented.lme(fit,1) # mu<-fitted(obj[[2]],0)[names(fitted(obj[[2]],0))==id] #e' # fitted.segmented.lme(fit,0) # mu<-fitted(obj, level=0)[names(fitted(obj, level=0))==id] #funziona solo se i # dati sono ordinati (osservazioni dello stesso individuo vicine..). Per cui il # 14/7 messo la seguente: browser() mu<-fitted(obj[[2]], # level=0)[names(fitted(obj[[2]], level=ncol(obj[[2]]$groups)))==id] mu <- fitted(obj, level = 0) mu <- mu[names(mu) == id] # mu<-mu[names(fitted(obj, level=ncol(obj[[2]]$groups)))==id] psi <- obj$fixed.psi[[paste(id)]] m <- cbind(x, mu) m <- m[order(m[, 1]), ] # mL<-m[m[,1]<=psi, ,drop=FALSE] if(nrow(mL)>1){ fL<-splinefun(mL[,1], mL[,2]) # f.psi<-fL(psi) } else { mR<-m[m[,1]>=psi, ,drop=FALSE] fR<-splinefun(mR[,1], # mR[,2]) f.psi<-fR(psi) } lines(c( m[1,1], psi, m[nrow(m),1]), c( m[1,2], f.psi, # m[nrow(m),2]), col=col.l, lwd=lwd.l) left side m1 <- m[m[, 1] <= psi, , drop = FALSE] # right side m2 <- m[m[, 1] >= psi, , drop = FALSE] if (nrow(m1) > 0) { f1 <- splinefun(m1[, 1], m1[, 2]) estremo <- if (nrow(m2) > 0) psi else min(psi, max(m1[, 1])) new.x1 <- c(min(m1[, 1]), estremo) } if (nrow(m2) > 0) { f2 <- splinefun(m2[, 1], m2[, 2]) # new.x1<- seq(psi, max(m1[,1]), l=200) estremo <- if (nrow(m1) > 0) psi else max(psi, min(m2[, 1])) new.x2 <- c(estremo, max(m2[, 1])) } if (nrow(m1) > 0) { if (nrow(m1) > 1) lines(new.x1, f1(new.x1), col = p.col, lwd = p.lwd, lty = p.lty) else lines(new.x1, c(f1(new.x1)[1], f2(new.x2)[1]), col = p.col, lwd = p.lwd, lty = p.lty) } if (nrow(m2) > 0) { if (nrow(m2) > 1) lines(new.x2, f2(new.x2), col = p.col, lwd = p.lwd, lty = p.lty) else lines(new.x2, c(f1(new.x1)[2], f2(new.x2)[2]), col = p.col, lwd = p.lwd, lty = p.lty) } points(psi, par()$usr[3] * 1.015, pch = 4, col = p.col) # segments(psi, par()$usr[3], psi, f1(psi), lty=3, col=1) } } #======================================================================== plotmarg<-function(obj, by=NULL, add=FALSE, conf.level=0, pos.leg=NULL, withI=TRUE, vcov.=NULL, shade=FALSE, drop.var=NULL, text.leg, ... ){ #=========>> da provare con by con piu' termini e se leftSlope=0 #obj: the segmented.lme object #by: a named list indicating covariate names and corresponding values affecting the fitted segmented relationship. # Example: by=list(group="2",z2=.2) #conf.level: for pointwise CI.. #pos.leg if different from NULL, a legend is added on the plot at "pos.leg" # (which should be one a single keyword from the list "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right" and "center") #withI: if TRUE, the fitted lines are plotted with intercept (if included in the model) #vcov.: the fixed effect cov matrix. If NULL is computed by vcov.segmented.lme #shade: logical (ignored if conf.level=0) #drop.var: possible coefficient names to be removed before computing the segmented relationship (E.g. the group-specific intercept..) #...: further arguments (col, lty, lwd, xlim, ylim, xlab,..) to be passed to matplot()/matlines(). Can be vectors (e.g. lty=c(2,1,2) and lwd=c(1,2,1) #text.leg: if specified, it is legend to be added, provided pos.leg has been specified. #obj[[1]] -> obj$lme.fit #obj[[2]] -> obj$lme.fit.noG #------------------ if((conf.level<0 || conf.level>=1)) stop(" 'conf.level' is meaningless'") if(conf.level>0){ V<- if(is.null(vcov.)) vcov.segmented.lme(obj) else vcov. #object$lme.fit$varFix } #s # if(conf.level!=0 && !is.null(by)) stop("Not yet implemented..") if(!is.null(by) && !is.list(by)) stop("if provided, 'by' should be a (named) list of scalars") opz<-list(...) if("pos.legend" %in% names(opz)) warning("'pos.legend' ignored.. Do you mean 'pos.leg'?", call.=FALSE) if(!is.null(pos.leg)) pos.leg<- match.arg(pos.leg, c("bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center")) Z<-obj$Z nomeZ<-obj$namesGZ$nameZ beta.noG<- fixef(obj$lme.fit.noG) beta.all<-fixef(obj$lme.fit) beta.G<-beta.all[setdiff(names(fixef(obj$lme.fit)), names(beta.noG))] nomiCoef<-names(beta.noG) if(!is.null(by)) { a<-by #isZero<-sapply(a, function(x) x==0) if(!all(sapply(a, length)==1)) stop("vectors in 'by' are not allowed") nomiOK<-const<-idList<-vector("list", length(a)) values<-vector(,length(a)) for(i in 1:length(a)) { nomiOK[i]<-nomeOK <- if(is.character(a[[i]])) paste(names(a[i]),a[[i]], sep="") else names(a[i]) #replace 0 if(a[[i]]==0) a[[i]]<- 1e-16 #per l'intercetta bInterc<- beta.noG[nomeOK] if(!is.character(a[[i]])) bInterc<-bInterc*a[[i]] #per la left slope bLeftSlope<-c(beta.noG[paste(obj$namesGZ$nameZ,":",nomeOK, sep="")], beta.noG[paste(nomeOK, ":", obj$namesGZ$nameZ, sep="")]) bLeftSlope<-bLeftSlope[!is.na(bLeftSlope)] if(!is.character(a[[i]])) bLeftSlope<-bLeftSlope*a[[i]] if(length(bLeftSlope)<=0) bLeftSlope<-NA #per la slope-diff bU<-beta.noG[paste("U", nomeOK, sep=".")] if(!is.character(a[[i]])) bU <-bU*a[[i]] #per il changepoint bG<-beta.G[paste("G", nomeOK, sep=".")] if(!is.character(a[[i]])) bG <-bG*a[[i]] const[[i]]<-c(bInterc, bLeftSlope, bU, bG) const[[i]]<- ifelse(is.na(const[[i]]),0,const[[i]]) idList[[i]]<-names(c(bInterc, bLeftSlope, bU, bG)) values[i]<-ifelse(is.character(a[[i]]),1,a[[i]]) } #browser() const<-matrix(unlist(const),4, byrow=FALSE) colnames(const)<-names(by) nomiNOdiff <- names(which(colSums(const)==0)) if(length(nomiNOdiff)>0) warning("The", paste(" '", nomiNOdiff,"' ",sep=""), "value supplied in 'by' does not modify the baseline line", call. = FALSE) nomiCoef<- c("(Intercept)", nomeZ, "U", "G0", unlist(idList)) ########################################## } else { #se 'by' e' NULL const<-matrix(0,4,1) nomiCoef<- c("(Intercept)", nomeZ, "U", "G0") values<-rep(1,4) } ########################################## #browser() #prepara la matrice del disegno.. est.psi.fixed <-fixef(obj$lme.fit)["G0"]+ sum(const[4,]) if(obj$call$psi.link=="logit") est.psi.fixed <- plogis(est.psi.fixed) Z.new<-as.numeric(sort(c(seq(min(Z),max(Z),l=100), est.psi.fixed))) U<-pmax(Z.new-est.psi.fixed,0) X<-cbind(1,Z.new,U) #colnames(X)<-c("(Intercept)",nomeZ,"U") Ident<-diag(ncol(X)) M<-vector("list", length=ncol(const)) for(j in 1:ncol(const)) M[[j]]<-values[j]*Ident[, which(const[-4,j]!=0), drop=FALSE] M<-cbind(Ident, do.call("cbind", M)) if(!withI) { M<-M[,-1] nomiCoef <-setdiff(nomiCoef, "(Intercept)") } final.names<-setdiff(nomiCoef, c("G0",obj$namesGZ$nomiG,"")) final.names<-final.names[!is.na(final.names)] #browser() if(!is.null(drop.var)){ colnames(M)<-final.names final.names <-setdiff(final.names, drop.var) M<-M[, final.names] } XX<- X%*%M r<-fit<- XX %*% beta.noG[final.names] if (conf.level > 0) { zalpha<- -qnorm((1-conf.level)/2) V<-V[final.names,final.names] SE.fit<-sqrt(rowSums((XX %*% V) * XX)) #sqrt(diag(X%*%Var%*%t(X))) r<-cbind(fit-zalpha*SE.fit, fit, fit+zalpha*SE.fit) opz$lty<-c(2,1,2) } # b<-beta.noG[c("(Intercept)",nomeZ,"U")] # b<-b+ rowSums(const[1:3,,drop=FALSE]) # b<-b[!is.na(b)] # r<-fit<- drop(X %*% b) if(is.null(opz$col)) opz$col<-"1" if(is.null(opz$type)) opz$type<-"l" if(is.null(opz$lty)) opz$lty<-1 if(is.null(opz$lwd)) opz$lwd<-1.8 if(is.null(opz$xlab)) opz$xlab<-obj$namesGZ$nameZ if(is.null(opz$ylab)) opz$ylab<-all.vars(formula(obj[[1]]))[1] if(!is.null(opz$alpha.f)) {alpha.f<-opz$alpha.f;opz$alpha.f<-NULL} else {alpha.f<-.15} opz$x<-Z.new opz$y<-r if(add) do.call(matlines, opz) else do.call(matplot, opz) if (shade && conf.level>0) polygon(c(Z.new, rev(Z.new)), c(r[, 1], rev(r[, 3])), col = adjustcolor(opz$col, alpha.f), border = NA) #browser() if(!is.null(pos.leg)) { if(!is.null(by)){ id<-apply(const,2,function(.x) any(.x!=0)) #funziona sempre? #se sono tutti 0, significa che stai disegnando la segmented baseline.. #Ma perche' questo dovrebbe influenzare la scelta di mettere la legenda? proviamo a metterlo sempre TRUE #id<-TRUE leg<-paste(names(by)[id],unlist(by)[id],sep="=",collapse=" ") } else { leg<-NULL } if(!is.null(text.leg)) leg <- text.leg try(legend(pos.leg, leg, bty="n", col=min(opz$col), lty=min(opz$lty), lwd=max(opz$lwd)), silent=TRUE) } } #======================================================================== #inizio funzione if(level==0){ plotmarg(x, by=by, add=add, conf.level=conf.level, pos.leg = pos.leg, withI=withI, vcov.=vcov., shade=shade, drop.var=drop.var, text.leg=text.leg, ...) } else { obj <- x rnfGrp <- obj$lme.fit.noG$groups if (missing(level)) level <- ncol(rnfGrp) if (level != ncol(rnfGrp)) stop("Currently only the innermost level (i.e., ", ncol(rnfGrp), ") is allowed") opz <- list(...) Ylab <- if (is.null(opz$ylab)) all.vars(formula(obj[[1]]))[1] else opz$ylab Xlab <- if (is.null(opz$xlab)) obj$namesGZ$nameZ else opz$xlab opz$xlab <- opz$ylab <- NULL opz$pop <- pop opz$res <- res opz$xLab <- "" opz$yLab <- "" opz$main <- "" opz$xaxt <- "n" opz$yaxt <- "n" opz$pos.leg <- pos.leg opz$yscale <- yscale opz$xscale <- xscale opz$vline <- vline opz$level <- level opz$lines <- lines opz$obj <- quote(obj) if (is.null(id)) id <- names(obj$psi.i) #levels(rnfGrp[,ncol(rnfGrp)]) if (missing(n.plot)) n.plot <- if (length(id) <= 1) c(1, 1) else c(3, ceiling(length(id)/3)) if(prod(n.plot)!=1) id <- id[1:min(prod(n.plot), length(id))] # color of individual lines l.col <- if (!is.null(opz$l.col)) opz$l.col else 1 l.col <- rep(l.col, length(id)) l.lwd <- if (!is.null(opz$l.lwd)) opz$l.lwd else 1 l.lwd <- rep(l.lwd, length(id)) l.lty <- if (!is.null(opz$l.lty)) opz$l.lty else 1 l.lty <- rep(l.lty, length(id)) col <- if (!is.null(opz$col)) opz$col else grey(.7) #for residuals.. col <- rep(col, length(id)) p.col <- if (!is.null(opz$p.col)) opz$p.col else 1 p.lty <- if (!is.null(opz$p.lty)) opz$p.lty else 3 p.lwd <- if (!is.null(opz$p.lwd)) opz$p.lwd else 1 #p.col <- rep(p.col, length(id)) t.col <- if (!is.null(opz$t.col)) opz$t.col else 1 #for legend text.. t.col <- rep(t.col, length(id)) #browser() # if(dev.cur()==1) { #se non e' aperto alcun device.. #unico grafico con tutti i profili individuali.. #====================================================== xlim<- if(!is.null(opz$xlim)) opz$xlim else range(x$Z) ylim<- if(!is.null(opz$ylim)) opz$ylim else NULL k<-1 if(prod(n.plot)==1 && length(id)>1){ plotSegLme(obj=x, id=id[k], add=FALSE, pop=FALSE, res=FALSE, xLab='', yLab='', l.col=l.col[k], l.lwd=l.lwd[k], l.lty=l.lty[k], xlim=xlim, ylim=ylim) for (i in id[-1]) { k<-k+1 plotSegLme(obj=x, id=i, add=TRUE, pop=FALSE, res=FALSE, xLab='', yLab='', l.col=l.col[k], l.lwd=l.lwd[k], l.lty=l.lty[k]) # main='', xaxt='n', yaxt='n', leg=leg, yscale=yscale, # vline=vline, xscale=xscale, level=level, ...) #opz$id <- i #if (col.l.id) opz$col.l <- col.l[k] #if (col.p.id) opz$col.p <- col.p[k] #if (col.id) opz$col <- col[k] #guarda bene il discorso della stima sella relazione only-fixed-effects.. #opz$pop<-FALSE #do.call(plotSegLme, opz) } box() if(pop) plotmarg(obj, add=TRUE, col=p.col, lwd=p.lwd, lty=p.lty) if(ci.psi.pop>=0){ y <- par()$usr[3]+abs(par()$usr[3])/10 points(fixef(obj)["G0"],y,type="p", col=p.col, pch=19, cex=.8) if(ci.psi.pop>0){ ci.psi<-confint.segmented.lme(obj, level=ci.psi.pop)[,"G0"] arrows(min(ci.psi), y, max(ci.psi), y, code=3, angle=90, length=.05, col=p.col) } } return(invisible(NULL)) } old.mar<- par()$mar old.oma<- par()$oma old.mfrow<- par()$mfrow #====================================================== if(length(id)>1){ par(mfrow = n.plot) id.sx <- 1 + n.plot[2] * (0:(n.plot[1] - 1)) #i grafici di sx id.bot <- (prod(n.plot):1)[1:n.plot[2]] #i grafici di sotto if(yscale>=0) { if(xscale>=0) { par(mar = rep(0, 4)) } else { opz$xaxt<-"n" par(mar = c(0,0,2.2,0)) } } else { #opz$yaxt<-NULL if(xscale>=0) { par(mar = c(0,0,0,2.5)) } else { opz$xaxt<-"n" par(mar = c(0,0,2,2.5)) } } par(oma = c(5, 5, 1, 1)) out <- TRUE } else { id.sx <- 1:length(id) id.bot <- 1:length(id) out <- FALSE } k <- 0 opz$add<- add #col.l lo prende anche sui punti???? for (i in id) { #browser() k <- k + 1 # plotSegLme(obj, id=i, pop=pop, res=res, xLab='', yLab='', # main='', xaxt='n', yaxt='n', leg=leg, yscale=yscale, # vline=vline, xscale=xscale, level=level, ...) opz$id <- i opz$l.col <- l.col[k] opz$p.col <- p.col[k] opz$col <- col[k] #guarda bene il discorso della stima sella relazione only-fixed-effects.. opz$pop<-FALSE do.call(plotSegLme, opz) if(pop) plotmarg(obj, add=TRUE, lty=2) #browser() if(ci.psi.pop>=0){ y <- par()$usr[3]+abs(par()$usr[3])/10 points(fixef(obj)["G0"], y, type="p", col=p.col, pch=19, cex=.8) if(ci.psi.pop>0){ ci.psi<-confint.segmented.lme(obj, level=ci.psi.pop)[,"G0"] arrows(min(ci.psi), y, max(ci.psi), y, code=3, angle=90, length=.05, col=p.col) } } #browser() # tt<-axTicks(1) las=2 #if((xscale>=0)&&(k %in% id.bot)) axis(1, cex.axis = 0.7, at = NULL) else axis(1, labels = FALSE) #if((yscale>=0)&&(k%in%id.sx)) axis(2, labels = TRUE, cex.axis = 0.7) else axis(2, labels = FALSE) #if((k %in% id.bot)) axis(1, cex.axis = 0.7, at = NULL) else axis(1, labels = FALSE) # #if(xscale>=0) { # if((k %in% id.bot)) axis(1, cex.axis = 0.7, at = NULL) # axis(1, labels = FALSE) #} #browser() if((xscale<0)|| ((xscale>=0)&&(k%in%id.bot))) axis(1, labels = TRUE, cex.axis = 0.7) else axis(1, labels = FALSE) #if(((xscale>=0)&&(k%in%id.bot))) axis(1, labels = TRUE, cex.axis = 0.7) else axis(1, labels = FALSE) if((yscale<0)|| ((yscale>=0)&&(k%in%id.sx))) axis(2, labels = TRUE, cex.axis = 0.7) else axis(2, labels = FALSE) } if(length(id)>1) { mtext(Xlab, 1, line = 3, outer = out) mtext(Ylab, 2, line = 3, outer = out) par(mar=old.mar, oma=old.oma, mfrow=old.mfrow) } } } segmented/R/plot.stepmented.r0000644000176200001440000004424414666315144015760 0ustar liggesusersplot.stepmented <- function(x, term, add = FALSE, res = TRUE, conf.level=0, interc = TRUE, add.fx=FALSE, psi.lines = TRUE, link = TRUE, const=NULL, res.col = grey(.15, alpha = .4), surf=FALSE, zero.cor=TRUE, heurs=TRUE, shade=FALSE, se.type=c("cdf","abs","none"), k=NULL, .vcov=NULL, leg="topleft", ...) { #============================= plot2step<-function(object, res, interc, psi.lines, arg, add){ nomiZ<- object$nameUV$Z if(length(nomiZ)!=2) stop("surf=TRUE is allowed only with 2 stepmented covariates") nomiV<- object$nameUV$V nomiPsi<-gsub("V","psi",nomiV) nomiU<- object$nameUV$U m1<-slope(object)[[nomiZ[1]]][,"Est."]-coef(object)["(Intercept)"] m2<-slope(object)[[nomiZ[2]]][,"Est."] if(!interc){ m1<-m1 m2<-m2 -coef(object)["(Intercept)"] } fit <-outer(m2,m1,"+") fit01<- 1- (fit-min(fit))/diff(range(fit)) estpsi=object$psi[,"Est."] estpsi1=estpsi[nomiZ[1]==gsub("psi[1234567890]*[1234567890].","",nomiPsi)] estpsi2=estpsi[nomiZ[2]==gsub("psi[1234567890]*[1234567890].","",nomiPsi)] #browser() if(is.null(arg$xlim)) arg$xlim<-object$rangeZ[,nomiZ[1]] if(is.null(arg$ylim)) arg$ylim<-object$rangeZ[,nomiZ[2]] #browser() if(!add){ plot(1, xlab=nomiZ[1], ylab=nomiZ[2], xaxt="n", yaxt="n", ylim=arg$ylim, xlim=arg$xlim, xaxs="i", yaxs="i") axis(2, at= round( x$rangeZ[, nomiZ[2]]+c(diff(x$rangeZ[, nomiZ[2]])/100,0), 3) , cex.axis=.7) axis(2, at= round(c(estpsi2), 3), cex.axis=.7, las=2) axis(1, at= round(c(estpsi1, object$rangeZ[, nomiZ[1]]), 3), cex.axis=.7) } xvalues<-sort(c(par()$usr[1:2], estpsi1)) yvalues<-sort(c(par()$usr[3:4], estpsi2)) #browser() fcol<-function(.x,R=range(.x))colorRampPalette(c(arg$col, "white"), alpha=TRUE)(200)[findInterval(.x,seq(min(R),max(R),l=200))] #fit01<-fit01[nrow(fit01):1,] for(j in 1:(length(estpsi1)+1)){ A=cbind(rep(xvalues[j],length(yvalues)-1), yvalues[-length(yvalues)]) B=cbind(rep(xvalues[j+1],length(yvalues)-1), yvalues[-1]) cc=if(arg$col==1) grey(fit01[,j], alpha=.75) else fcol(fit01[,j], range(fit01)) if(add) cc<-adjustcolor(cc, alpha.f=.5) rect(A[,1],A[,2],B[,1],B[,2], col=cc, border=NA) } xx<-xvalues[-length(xvalues)]+diff(xvalues)/2 yy<- yvalues[-length(yvalues)]+diff(yvalues)/2 E<-expand.grid(yy,xx) #browser() if(!add){ if(res) { r<-object$residuals+object$fitted.values r<- (r-min(r))/diff(range(r)) r<-arg$cex/2+ (arg$cex*3-arg$cex/2)*r points(object$Z, pch=arg$pch, col=adjustcolor(arg$res.col,.5), cex=r) #points(object$Z, pch=arg$pch, cex=arg$cex, col=adjustcolor(arg$res.col,.5)) } } if(psi.lines) abline(v=estpsi1, h=estpsi2, lty=arg$lty) #browser() #text(E[,2],E[,1], round(as.vector(fit[nrow(fit):1,]),3)) text(E[,2],E[,1], round(as.vector(fit),3)) box() return(invisible(NULL)) #X<-matrix(,, nrow(o$psi)) #X[,i]<- -(model.matrix(o)[,nomiU[i]]-.5)/model.matrix(o)[,nomiPsi[i]] #nomiPsi nomiU } #============================= pred.step.plot<-function(object, k=NULL, apprx=c("cdf","abs","none"), nomeZ, zero.cor=TRUE, V=NULL, ...){ apprx=match.arg(apprx) X=model.matrix.stepmented(object, k=k, type=apprx) if(is.null(V)) V<-vcov.stepmented(object, zero.cor=zero.cor, type=apprx) interc=TRUE nomiZ<- object$nameUV$Z nomiV<- object$nameUV$V nomiU<- object$nameUV$U nomiPsi<- gsub("V","psi", nomiV) #id.noV<-setdiff(colnames(X), nomiPsi) #browser() #se ci sono piu' variabili segmented seleziona solo i termini di un unica variabile #if(missing(nomeZ)) nomeZ <-nomiZ[1] nomeZ <- if(missing(nomeZ)) nomiZ[1] else unique(nomeZ) nomeU.ok <- grep(paste(".", nomeZ,sep=""), nomiU, value=TRUE) nomePsi.ok <- grep(paste(".", nomeZ,sep=""), nomiPsi, value=TRUE) N=20 vv<-matrix(, N, length(nomePsi.ok)) for(i in 1:length(nomePsi.ok)){ #psi <- object$psi[nomePsi.ok[i],"Est."] psi <- object$psi.rounded[1, nomePsi.ok[i]] #se <- object$psi[nomePsi.ok[i],"St.Err"] se <- sqrt(diag(V))[nomePsi.ok[i]] if(is.na(se)) se<-1e-5 vv[,i] <- qnorm(seq(.0001, .9999,l=N), psi, se) #mettere 2*se??? } vv<-as.vector(vv) mi=object$rangeZ[1,nomeZ] ma=object$rangeZ[2, nomeZ] vv<-sort(c(object$psi.rounded[1, nomePsi.ok], seq(mi,ma,l=30), vv)) vv<- vv[vv>=mi & vv<= ma] #browser() if(apprx!="none"){ Xok<-matrix(, length(vv), 2*length(nomePsi.ok)) colnames(Xok)<- c(nomeU.ok,nomePsi.ok) for(i in 1:length(nomePsi.ok)){ Xok[, nomeU.ok[i]] <- spline(object$Z[,nomeZ], X[, nomeU.ok[i]], xout=vv)$y Xok[, nomePsi.ok[i]] <- spline(object$Z[,nomeZ], X[, nomePsi.ok[i]], xout=vv)$y } } else { Xok<-matrix(, length(vv), length(nomeU.ok)) colnames(Xok)<- nomeU.ok for(i in 1:length(nomeU.ok)){ Xok[, nomeU.ok[i]] <- spline(object$Z[,nomeZ], X[, nomeU.ok[i]], xout=vv)$y #Xok[, nomePsi.ok[i]] <- spline(object$Z[,nomeZ], X[, nomePsi.ok[i]], xout=vv)$y } } id.ok = grep(paste(".", nomeZ,sep=""), colnames(X)) psii = object$psi.rounded[1, nomePsi.ok] #psii = object$psi[nomePsi.ok,"Est."] M=sapply(psii, function(.x) 1*(vv>.x)) cof<-object$coefficients[nomeU.ok] id.interc = FALSE if(interc && "(Intercept)" %in% colnames(X)) { id.interc=TRUE id.ok<-c(1, id.ok) Xok<-cbind(1, Xok) M = cbind(1, M) cof<-c(object$coefficients[1], cof) } #browser() se=rowSums((Xok%*%V[id.ok,id.ok])*Xok) if(any(se<=0)) { warning("correcting non-positive st.err 1", call.=FALSE) se[se<=0]<-median(se[se>0]) } se.smooth=sqrt(se) fit.smooth <- drop(Xok[,-match(nomePsi.ok, colnames(Xok), 0)]%*%cof) #======================================= Xok[, nomeU.ok] <- if(id.interc) M[,-1] else M se=rowSums((Xok%*%V[id.ok,id.ok])*Xok) if(any(se<=0)) { warning("correcting non-positive st.err 2", call.=FALSE) se[se<=0]<-median(se[se>0]) } se.nonsmooth=sqrt(se) fit.nonsmooth<-drop(M %*% cof) #browser() g <- cut(vv, breaks = c(mi, psii, ma), labels =FALSE, include.lowest = TRUE) r<-list(values=vv, g=g, fit.nonsmooth=fit.nonsmooth, se.nonsmooth=se.nonsmooth, fit.smooth=fit.smooth, se.smooth=se.smooth) r } #============================= arg <- list(...) se.type <- match.arg(se.type) if (is.null(arg$col)) arg$col = 2#grey(0.4) if (is.null(arg$lwd)) arg$lwd = 2.5 if (is.null(arg$lty)) arg$lty = 1 if (is.null(arg$res.col))arg$res.col= grey(0.7, alpha=.8) if (is.null(arg$cex)) arg$cex = 1.2 if (is.null(arg$pch)) arg$pch = 20 if (is.null(arg$xlim)) arg$xlim = NULL if (is.null(arg$ylim)) arg$ylim = NULL if (is.null(arg$main)) arg$main = NULL if (is.null(arg$sub)) arg$sub = NULL if (is.null(arg$cex.axis)) arg$cex.axis = 1 if (is.null(arg$cex.lab)) arg$cex.lab = 1 if(surf){ if(length(x$nameUV$Z)!=2) stop(" 'surf=TRUE' works only with 2 stepmented terms") plot2step(x, res=res, interc=interc, psi.lines=psi.lines, arg=arg, add=add) return(invisible(NULL)) } #browser() if (missing(term)) { if (length(unique(x$nameUV$Z)) > 1) { stop("please, specify `term'") } else { term <- x$nameUV$Z } } else { if(is.numeric(term)) term <- x$nameUV$Z[term] #if(length(term)>=2) stop(" 'term' should be a scalar") #dterm <- deparse(substitute(term)) #if (dterm %in% x$nameUV$Z) term <- dterm if (!isTRUE(all(term %in% x$nameUV$Z))) stop(paste("Unknown term. It should be numeric or one of: ", paste(" '", x$nameUV$Z, "' ", sep="", collapse=""))) } if(add.fx && !term%in%colnames(x$f.x)) stop("no additional effect for the selected term") if(is.null(.vcov)) .vcov <- vcov.stepmented(x, zero.cor=zero.cor, type=se.type) if(length(term)>1){ opz<-list(...) cols<- if(!is.null(opz$col)) opz$col else 1:length(term)+1 cols <- rep(cols, l=length(term)) res.cols<- rep(res.col, l=length(term)) lwds<- if("lwd"%in% names(opz)) opz$lwd else 2 lwds<- rep(lwds, l=length(term)) ltys<- if("lty"%in% names(opz)) opz$lty else 1 ltys<- rep(ltys, l=length(term)) cexs<- if("cex"%in% names(opz)) opz$cex else .75 cexs<- rep(cexs, l=length(term)) pchs<- if("pch"%in% names(opz)) opz$pch else 19 pchs<- rep(pchs, l=length(term)) if(!is.null(opz$ylim)) { Ylim <- opz$ylim } else { if(inherits(x, "glm")){ if(link){ Ylim <- if(!res) range(x$linear.predictors) else range(x$linear.predictors+x$residuals) } else { Ylim <- if(!res) range(x$fitted.values) else range(x$fitted.values+ residuals(x, "response")) } } else { Ylim <- if(!res) range(x$fitted.values) else range(x$fitted.values+x$residuals) } } Ylab <- if(!is.null(opz$ylab)) opz$ylab else paste(formula(x))[2] idTerm <- if(is.numeric(term)) term else match(term, x$nameUV$Z) nomeX <- intersect(strsplit(x$nameUV$Z,":")[[idTerm[1]]], unlist(strsplit(x$nameUV$Z,":")[idTerm[-1]])) Xlab <- if(!is.null(opz$xlab)) opz$xlab else nomeX Xlim<- if(!is.null(opz$xlim)) opz$xlim else range(x$rangeZ[,term]) #range(x$model[,nomeX]) int.all<-rep(NA, length(term)) plot.stepmented(x, term[1], add = add, res = res, conf.level = conf.level, .vcov=.vcov, interc=interc, link = link, res.col = res.cols[1], const = const, shade=shade, surf=FALSE, zero.cor = zero.cor, add.fx=add.fx, heurs=heurs, se.type=se.type, k=k, psi.lines=psi.lines, #rug=FALSE, dens.rug=FALSE, dens.col = grey(0.8), smoos=NULL, hide.zeros=TRUE, #transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, .coef=NULL, prev.trend=FALSE, col=cols[1], ylim=Ylim, xlim=Xlim, ylab=Ylab,xlab=Xlab, lty=ltys[1],pch=pchs[1],lwd=lwds[1],cex=cexs[1]) Term<- if(is.numeric(term[1])) x$nameUV$Z[term[1]] else term[1] int.all[1]<-interc.gr<- strsplit(Term, ":")[[1]][2] #points.segmented(x, term[1], col=cols[1], const=estcoef[interc.gr], v=psi.lines, pch=20, link=link) for(j in 2:length(term)){ #browser() plot.stepmented(x, term[j], add = TRUE, res = res, conf.level = conf.level, .vcov=.vcov, interc=interc, link = link, res.col = res.cols[j], const = const, shade=shade, surf=FALSE, zero.cor = zero.cor, add.fx=add.fx, heurs=heurs, se.type=se.type, k=k, psi.lines=psi.lines, #rug=FALSE, dens.rug=FALSE, dens.col = grey(0.8), smoos=NULL, hide.zeros=TRUE, #transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, .coef=NULL, prev.trend=FALSE, col=cols[j], ylim=Ylim, xlim=Xlim, ylab=Ylab,xlab=Xlab, lty=ltys[j], pch=pchs[j],lwd=lwds[j], cex=cexs[j]) #hide.zeros smoos=NULL # plot.stepmented(x, term[j], add = TRUE, res = res, conf.level = conf.level, # interc=interc, link = link, res.col = res.cols[j], rev.sgn = rev.sgn, const = const, # shade=shade, rug=FALSE, dens.rug=FALSE, dens.col = grey(0.8), # transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, .coef=NULL, prev.trend=FALSE, # smoos=NULL, hide.zeros=TRUE,col=cols[j], # lty=ltys[j],pch=pchs[j],lwd=lwds[j],cex=cexs[j]) Term<- if(is.numeric(term[j])) x$nameUV$Z[term[j]] else term[j] int.all[j]<-interc.gr<- strsplit(Term, ":")[[1]][2] #points.segmented(x, term[j], col=cols[j], const = estcoef[interc.gr], v=psi.lines, pch=20, link=link) } if(!is.na(leg)) { legend(leg, int.all, col=cols, lty=1, lwd=1.5, bty="n") } } else { #se length(term)==1 estcoef <- x$coefficients if(is.null(const)){ interc.gr<- strsplit(term, ":")[[1]][2] const<- estcoef[interc.gr] if(is.na(const)) const<-0 } #browser() idU <- x$nameUV$U[endsWith(x$nameUV$U, paste(".", term, sep = ""))] if (interc && "(Intercept)" %in% names(x$coefficients)) { idU <- c("(Intercept)", idU) est.means <- cumsum(x$coefficients[idU]) } else { est.means <- c(0,cumsum(x$coefficients[idU])) } nomiPsi <- sub("V", "psi", x$nameUV$V) idPsi <- nomiPsi[endsWith(nomiPsi, paste(".", term, sep = ""))] #psi <- sort(x$coefficients[idPsi]) psi <- sort(x$psi.rounded[1,idPsi]) rangeZ <- x$rangeZ[, term] Z <- drop(x$Z[, term, drop = TRUE]) m <- min(rangeZ) M <- max(rangeZ) #browser() fit0 <- as.numeric(as.character(cut(Z, breaks = c(m, psi, M), labels = est.means, include.lowest = TRUE))) #y <- x$residuals + fit0 #oppure: x$model[[1]] #browser() y<- fit0 Y <- rep(est.means, each = 2) X <- c(m, rep(psi, each = 2), M) id1 <- seq(1, length(X), by = 2) id2 <- seq(2, length(X), by = 2) if (is.null(arg$xlab)) arg$xlab = colnames(x$Z[, term, drop = FALSE]) if (is.null(arg$ylab)) arg$ylab = names(x$model)[1] #browser() #se c'e' un effetto della x aggiuntivo if(add.fx && !is.null(x$f.x)){ Z100<- seq(min(Z), max(Z), l=nrow(x$f.x)) #Z100a<- sort(c(psi*c(.999,1,1.0001),seq(min(Z), max(Z), l=nrow(x$f.x)))) #-3*length(psi)))) Z100a<- sort(c(as.vector(sapply(psi, function(.x) .x*c(.999,1,1.0001))), seq(min(Z), max(Z), l=nrow(x$f.x)))) g <- cut(Z100a, breaks = c(m, psi, M), labels =FALSE, include.lowest = TRUE) f.x100 <- x$f.x[, term, drop=TRUE] fSpline<-splinefun(Z100, f.x100) f.x.n <-fSpline(Z) y<- y + f.x.n f.x100 <- fSpline(Z100a)+ rep(est.means, as.numeric(table(g))) #cbind(Z100a, g, rep(est.means, as.numeric(table(g)))) #fit0<- fit0 + f.x.n #y<- y + x$f.x[,term, drop=TRUE] #fit0<- fit0 + x$f.x[,term, drop=TRUE] } y<-y+const Y<-Y+const if (inherits(x, what = "glm", which = FALSE) && !link){ y<- x$family$linkinv(y) Y<- x$family$linkinv(Y) if(add.fx && !is.null(x$f.x)) f.x100<- x$family$linkinv(f.x100) } if(conf.level>0){ if(add.fx && !is.null(x$f.x)) stop(" conf.level>0 is not allowed with additional terms") r<-pred.step.plot(x, k=k, apprx=se.type, nomeZ=term, zero.cor=zero.cor, V=.vcov) #browser() vv<-r$values ff<-r$fit.nonsmooth se<-r$se.nonsmooth z<-if(inherits(x, "glm")) abs(qnorm((1-conf.level)/2)) else abs(qt((1-conf.level)/2, df= x$df.residual)) inf= ff-z*se sup= ff+z*se if(heurs && se.type!="none") { inf<-pmin(inf, r$fit.smooth-z*r$se.smooth ) sup<-pmax(sup, r$fit.smooth+z*r$se.smooth ) } #ff=r$fit.smooth #inf=r$fit.smooth-z*r$se.smooth #sup=r$fit.smooth+z*r$se.smooth M=cbind(vv, ff, inf, sup) if(is.null(arg$ylim)) arg$ylim<-range(c(inf, sup)) } return.ci=FALSE #era per fare alcune verifiche.. if(return.ci){ return(M) } else { ############################################################### if(res) y<- if(link) y+x$residuals else y + resid(x, "response") if(add) { if(res) points(Z, y, pch = arg$pch, col =res.col, cex = arg$cex) } else { if (res) { plot(Z, y, ylab = arg$ylab, xlab = arg$xlab, pch = arg$pch, col =res.col, cex = arg$cex, xlim = arg$xlim, ylim = arg$ylim, main=arg$main, sub=arg$sub, cex.axis=arg$cex.axis, cex.lab=arg$cex.lab) } else { plot(Z, y, ylab = arg$ylab, xlab = arg$xlab, type = "n", xlim = arg$xlim, ylim = arg$ylim, main=arg$main, sub=arg$sub, cex.axis=arg$cex.axis, cex.lab=arg$cex.lab) } } coll <- rep(arg$col, length(est.means)) ltyy <- rep(arg$lty, length(est.means)) lwdd <- rep(arg$lwd, length(est.means)) if(add.fx && !is.null(x$f.x)){ #limy<-tapply(f.x100, g, function(.x).x[1]) limy<-tapply(f.x100, g, function(.x)c(.x[1], .x[length(.x)])) limy<- unlist(limy) limy<-c(limy[1], rep(limy[2:(length(limy)-1)], each=2), limy[length(limy)]) limy<- matrix(limy, ncol=2, byrow=TRUE) limy<- apply(limy[-c(1, nrow(limy)),,drop=FALSE],1,max) #limy<- do.call(rbind, limy) for(i in 1:length(est.means)) lines(Z100a[g==i], f.x100[g==i], col = coll[i], lwd = lwdd[i], lty = ltyy[i]) #col = arg$col, lwd = arg$lwd, lty = arg$lty) } else { limy<- apply(matrix(Y[-c(1,length(Y))], ncol = 2, byrow = TRUE), 1, max) segments(X[id1], Y[id1], X[id2], Y[id2], col = coll, lwd = lwdd, lty = ltyy) #col = arg$col, lwd = arg$lwd, lty = arg$lty) } # ######################################################## } #browser() if(conf.level>0){ #browser() if(shade) { polygon(c(vv, rev(vv)), c(M[,3],rev(M[,4])), col = adjustcolor(coll, .3), border=NA) } else { for(i in 1:length(est.means)){ matlines(vv[r$g==i], M[r$g==i, c(2,3,4)], lty=c(1,2,2), col=coll[i]) } } #matlines(vv, cbind(ff, ff-z*se, ff+z*se), lty=c(1,2,2), col=coll) } # plot(X,Y, type='s) #browser() if(psi.lines) { #limy<-apply(matrix(Y[-c(1,length(Y))], ncol = 2, byrow = TRUE), 1, max) segments(x0 = psi, y0 = par()$usr[3], x1 = psi, y1 = limy, lty = 3, col = 1) #arg$col) points(psi, rep(par()$usr[3], length(psi)), pch = 19, col = arg$col) } invisible(NULL) } } segmented/R/seg.lm.fit.r0000644000176200001440000004351714725615244014603 0ustar liggesusersseg.lm.fit <-function (y, XREG, Z, PSI, w, offs, opz, return.all.sol = FALSE) { useExp.k = TRUE search.minWO<-function(h, psi, psi.old, X, y, w, offs) { psi.ok<- psi*h + psi.old*(1-h) #PSI <- matrix(rep(psi.ok, rep(n, length(psi.ok))), ncol = length(psi.ok)) PSI <- matrix(psi.ok, nrow=n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylmWO(cbind(X, U1), y, w, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 #else sum(obj1$residuals^2 * w) L1 } #--------------------------------- search.min<-function(h, psi, psi.old, X, y, w, offs) { psi.ok<- psi*h + psi.old*(1-h) #PSI <- matrix(rep(psi.ok, rep(n, length(psi.ok))), ncol = length(psi.ok)) PSI <- matrix(psi.ok, nrow=n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylm(cbind(X, U1), y), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 #else sum(obj1$residuals^2) L1 } #--------------------------------- est.k <- function(x1, y1, L0) { ax <- log(x1) .x <- cbind(1, ax, ax^2) b <- drop(solve(crossprod(.x), crossprod(.x, y1))) const <- b[1] - L0 DD <- sqrt(b[2]^2 - 4 * const * b[3]) kk <- exp((-b[2] + DD)/(2 * b[3])) return(round(kk)) } # dpmax <- function(x, y, pow = 1) { # if (pow == 1) # -(x > y) # else -pow * ((x - y) * (x > y))^(pow - 1) # } mylmWO <- function(x, y, w, offs = 0) { sw <- sqrt(w) x1 <- x * sw y <- y - offs y1 <- y * sw b <- drop(solve(crossprod(x1), crossprod(x1, y1))) fit <- x%*%b #drop(tcrossprod(x, t(b))) r <- y - fit o <- list(coefficients = b, fitted.values = fit, residuals = r, L0=sum(w*r^2), df.residual = length(y) - length(b)) o } #---------------------------- mylm <- function(x, y, w, offs) { b <- drop(solve(crossprod(x), crossprod(x, y))) fit <- x%*%b # r <- y - fit o <- list(coefficients = b, fitted.values = fit, residuals = r, L0=sum(r^2), df.residual = length(y) - length(b)) o } id.w.offs<- var(offs)<=0 && var(w)<=0 if(id.w.offs){ fitter<-function(x, y, w, offs) .lm.fit(x=x, y=y) #list(coefficients=drop(solve(crossprod(x), crossprod(x, y)))) mylmOK <- mylm search.minOK <- search.min #final.fitter<- function(x,y,w,offs) lm.fit(x,y) } else { fitter<-function(x, y, w, offs) .lm.fit(x=sqrt(w)*x, y=sqrt(w)*(y-offs)) mylmOK <- mylmWO search.minOK <- search.minWO #final.fitter<- function(x,y,w,offs) lm.wfit(x,y,w,offs) } # isZero<-function (x, neps = 1, eps = .Machine$double.eps, ...) { # if (is.character(eps)) { # eps <- match.arg(eps, choices = c("double.eps", "single.eps")) # if (eps == "double.eps") { # eps <- .Machine$double.eps # } # else if (eps == "single.eps") { # eps <- sqrt(.Machine$double.eps) # } # } # (abs(x) < neps * eps) # } isZero <- function(v) sapply(v, function(.x) identical(.x,0)) # mylmADD <- function(invXtX, X, v, Xty, y) { # vtv <- sum(v^2) # Xtv <- crossprod(X, v) # m <- invXtX %*% Xtv # d <- drop(1/(vtv - t(Xtv) %*% m)) # r <- -d * m # invF <- invXtX + d * tcrossprod(m) # newINV <- rbind(cbind(invF, r), c(t(r), d)) # b <- crossprod(newINV, c(Xty, sum(v * y))) # fit <- tcrossprod(cbind(X, v), t(b)) # r <- y - fit # o <- list(coefficients = b, fitted.values = fit, residuals = r) # o # } in.psi <- function(LIM, PSI, ret.id = TRUE) { a <- PSI[1, ] < LIM[1, ] b <- PSI[1, ] > LIM[2, ] is.ok <- !a & !b if (ret.id) return(is.ok) isOK <- all(is.ok) && all(!is.na(is.ok)) isOK } far.psi <- function(Z, PSI, id.psi.group, ret.id = TRUE, fc = 0.93) { nSeg <- length(unique(id.psi.group)) npsij <- tapply(id.psi.group, id.psi.group, length) nj <- sapply(unique(id.psi.group), function(.x) { tabulate(rowSums((Z > PSI)[, id.psi.group == .x, drop = FALSE]) + 1) }, simplify = FALSE) ff <- id.far.ok <- vector("list", length = nSeg) for (i in 1:nSeg) { if(length(nj[[i]]) != npsij[i] + 1) nj[[i]] <- tabulate(rowSums((Z >= PSI)[, id.psi.group == i, drop = FALSE]) + 1) id.ok <- (nj[[i]] >= 2) id.far.ok[[i]] <- id.ok[-length(id.ok)] #& id.ok[-1] #consideriamo solo le ni precedenti ff[[i]] <- ifelse(diff(nj[[i]]) > 0, 1/fc, fc) } id.far.ok <- unlist(id.far.ok) ff <- unlist(ff) if (!ret.id) { return(all(id.far.ok)) } else { attr(id.far.ok, "factor") <- ff return(id.far.ok) } } adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } n <- length(y) #min.step <- opz$min.step alpha <- opz$alpha rangeZ <- if(is.null(opz$rangeZ)) apply(Z, 2, range) else opz$rangeZ #limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1],alpha[2])) else opz$limZ #browser() psi <- PSI[1, ] psi<-adj.psi(psi, limZ) PSI<-matrix(psi,nrow=n, ncol=ncol(PSI), byrow=TRUE) id.psi.group <- opz$id.psi.group conv.psi <- opz$conv.psi hh <- opz$h digits <- opz$digits pow <- opz$pow nomiOK <- opz$nomiOK toll <- opz$toll gap <- opz$gap fix.npsi <- opz$stop.if.error dev.new <- opz$dev0 visual <- opz$visual it.max <- old.it.max <- opz$it.max fc <- opz$fc names(psi) <- id.psi.group it <- 0 epsilon <- 10 k.values <- dev.values <- NULL psi.values <- list() #psi.values[[length(psi.values) + 1]] <- NA sel.col.XREG <- unique(sapply(colnames(XREG), function(x) match(x, colnames(XREG)))) if (is.numeric(sel.col.XREG)) XREG <- XREG[, sel.col.XREG, drop = FALSE] invXtX <- opz$invXtX Xty <- opz$Xty #browser() if (!in.psi(limZ, PSI, FALSE)) stop("starting psi out of the range.. see 'alpha' in seg.control.", call. = FALSE) if (!far.psi(Z, PSI, id.psi.group, FALSE)) stop("psi starting values too close each other or at the boundaries. Please change them (e.g. set 'quant=TRUE' in seg.control()), or decrease their number.", call. = FALSE) n.psi<- n.psi1 <- ncol(Z) V <- (Z > PSI) U <- (Z - PSI) * V V<- -V #if (pow[1] != 1) U <- U^pow[1] if(it.max==0){ colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") obj <- lm.wfit(x = cbind(XREG, U), y = y, w = w, offset = offs) L1 <- sum(obj$residuals^2 * w) obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, idU=ncol(XREG)+1:(length(psi)), U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = TRUE) return(obj) } # # for(.i in opz$nomiSeg) { ##poni min(z)=0, cosi solve() in step.lm.fit non ha problemi. # if(.i %in% colnames(XREG)) XREG[,.i] <- XREG[,.i] - min(XREG[,.i]) #} # #XREG0<-XREG id.changeCoef <- FALSE if(any(opz$nomiSeg%in%colnames(XREG))) { id.changeCoef <- TRUE nomiSeg<- intersect(opz$nomiSeg, colnames(XREG)) minZ<- apply(XREG[,nomiSeg,drop=FALSE], 2, min) XREG[,nomiSeg] <- sweep(XREG[, nomiSeg, drop=FALSE], 2, minZ) } if(!opz$usesegreg){ dev.values[length(dev.values) + 1] <- opz$dev0 #modello senza psi psi.values[[length(psi.values) + 1]] <- NA #nessun psi } if(is.null(opz$fit.psi0)){ obj0 <- try(mylmOK(cbind(XREG, U), y, w, offs), silent = TRUE) #if (class(obj0)[1] == "try-error") obj0 <- lm.wfit(cbind(XREG, U), y, w, offs) L0 <- sum(obj0$residuals^2 * w) } else { L0 <- opz$fit.psi0$L0 } n.intDev0 <- nchar(strsplit(as.character(L0), "\\.")[[1]][1]) #n.intDev0 <- nchar(strsplit(format(L0, scientific = FALSE), "\\.")[[1]][1]) #fac.L0<- 10^(nchar(strsplit(format(L0, scientific = FALSE),"\\.")[[1]][2])-1) #browser() dev.values[length(dev.values) + 1] <- L0 psi.values[[length(psi.values) + 1]] <- psi if (visual) { cat(paste("iter = ", sprintf("%2.0f", 0), #" dev = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L0), " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L0, scientific=TRUE), "e")[[1]][1])), #" dev = ", sprintf(paste("%", n.intDev0 + 6, ".3f", sep = ""), L0*fac.L0), " k = ", sprintf("%5.0f", NA), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " ini.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } id.warn <- FALSE id.psi.changed <- rep(FALSE, it.max) tolOp <-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) #============================================== inizio ciclo idU <- ncol(XREG)+ 1:n.psi idV <- 1:n.psi + max(idU) while (abs(epsilon) > toll) { it <- it + 1 #if(it==1) browser() n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if (n.psi1 != n.psi0) { V<- (Z > PSI) U <- (Z - PSI) * V V<- -V idU <- ncol(XREG)+ 1:n.psi1 idV <- 1:n.psi1 + max(idU) #if (pow[1] != 1) U <- U^pow[1] obj0 <- try(mylm(cbind(XREG, U), y, w, offs), silent = TRUE) if (class(obj0)[1] == "try-error") obj0 <- lm.wfit(cbind(XREG, U), y, w, offs) L0 <- sum(obj0$residuals^2 * w) } X <- cbind(XREG, U, V) #browser() #rownames(X) <- NULL #colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U", 1:ncol(U), sep = ""), paste("V", 1:ncol(V), sep = "")) obj <- fitter(X,y,w,offs)# lm.wfit(x = X, y = y, w = w, offset = offs) #obj <- .lm.fit(x = X, y = y) #beta.c <- coef(obj)[paste("U", 1:ncol(U), sep = "")] #gamma.c <- coef(obj)[paste("V", 1:ncol(V), sep = "")] beta.c <- obj$coefficients[idU] gamma.c <- obj$coefficients[idV] #if (any(is.na(c(beta.c, gamma.c)))) { #if(it==1) browser() if(any(isZero(c(beta.c, gamma.c)))) { if (fix.npsi) { if (return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call. = FALSE) } else { id.coef.ok <- gamma.c!=0 #!is.na(gamma.c) psi <- psi[id.coef.ok] if (length(psi) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } gamma.c <- gamma.c[id.coef.ok] beta.c <- beta.c[id.coef.ok] Z <- Z[, id.coef.ok, drop = FALSE] rangeZ <- rangeZ[, id.coef.ok, drop = FALSE] limZ <- limZ[, id.coef.ok, drop = FALSE] nomiOK <- nomiOK[id.coef.ok] id.psi.group <- id.psi.group[id.coef.ok] names(psi) <- id.psi.group } } psi.old <- psi psi <- psi.old + hh*gamma.c/beta.c #aggiusta la stima di psi.. #psi<- adj.psi(psi, rangeZ) psi<- adj.psi(psi, limZ) psi<-unlist(tapply(psi, id.psi.group, sort), use.names =FALSE) a<-optimize(search.minOK, c(0,1), psi=psi, psi.old=psi.old, X=XREG, y=y, w=w, offs=offs, tol=tolOp[it]) k.values[length(k.values) + 1] <- use.k <- a$minimum L1<- a$objective #L1.k[length(L1.k) + 1] <- L1<- a$objective psi <- psi*use.k + psi.old* (1-use.k) psi<- adj.psi(psi, limZ) if (!is.null(digits)) psi <- round(psi, digits) #PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) PSI <- matrix(psi, nrow=n, ncol = length(psi), byrow=TRUE) V <- (Z > PSI) U <- (Z - PSI) * V V <- -V #if (pow[1] != 1) U1 <- U1^pow[1] #obj1 <- try(mylm(cbind(XREG, U1), y, w, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(XREG, U1), y, w, offs), silent = TRUE) if (visual) { flush.console() cat(paste("iter = ", sprintf("%2.0f", it), #" dev = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L1), " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L1, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%2.3f", use.k), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " est.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } #epsilon <- if (conv.psi) max(abs((psi - psi.old)/psi.old)) else (L0 - L1)/(abs(L0) + 0.1) epsilon <- (L0 - L1)/(abs(L0) + 0.1) L0 <- L1 k.values[length(k.values) + 1] <- use.k psi.values[[length(psi.values) + 1]] <- psi dev.values[length(dev.values) + 1] <- L0 id.psi.far <- far.psi(Z, PSI, id.psi.group, TRUE, fc = opz$fc) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if (!all(id.psi.ok)) { if (fix.npsi) { psi <- psi * ifelse(id.psi.far, 1, attr(id.psi.far, "factor")) #PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) PSI <- matrix(psi, nrow=n, ncol = length(psi), byrow=TRUE) id.psi.changed[it] <- TRUE } else { Z <- Z[, id.psi.ok, drop = FALSE] PSI <- PSI[, id.psi.ok, drop = FALSE] rangeZ <- rangeZ[, id.psi.ok, drop = FALSE] limZ <- limZ[, id.psi.ok, drop = FALSE] nomiOK <- nomiOK[id.psi.ok] id.psi.group <- id.psi.group[id.psi.ok] psi.old <- psi.old[id.psi.ok] psi <- psi[id.psi.ok] names(psi) <- id.psi.group if (ncol(PSI) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } } } if (it >= it.max) { id.warn <- TRUE break } } #end while.. ############################################################################## if (id.psi.changed[length(id.psi.changed)]) warning(paste("Some psi (", (1:length(psi))[!id.psi.far], ") changed after the last iter.", sep = ""), call. = FALSE) if (id.warn) warning(paste("max number of iterations (", it, ") attained", sep = ""), call. = FALSE) attr(psi.values, "dev") <- dev.values attr(psi.values, "k") <- k.values psi <- unlist(tapply(psi, id.psi.group, sort)) names(psi) <- id.psi.group #names.coef <- names(obj$coefficients) #PSI.old <- PSI #PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) PSI <- matrix(psi, nrow=n, ncol = length(psi), byrow=TRUE) #if (sd(PSI - PSI.old) > 0 || id.psi.changed[length(id.psi.changed)]) { V <- (Z > PSI) U <- (Z - PSI) * V V<- -V #colnames(U) <- paste("U", 1:ncol(U), sep = "") #colnames(V) <- paste("V", 1:ncol(V), sep = "") #Poiche' servono solo coeff, fitted e resid potrei usare anche mylmOK() o .lm.fit che e' piu' veloce.. #browser() obj <- mylmOK(x = cbind(XREG, U), y = y, w = w, offs = offs) L1 <- obj$L0 # if(id.w.offs){ # obj <- lm.fit(x = cbind(XREG, U), y = y) #mylmOK(x = cbind(XREG, U), y = y, w = w, offset = offs) # L1 <- sum(obj$residuals^2) # } else { # obj <- lm.wfit(x = cbind(XREG, U), y = y, w = w, offset = offs) # # L1 <- sum(obj$residuals^2 * w) # } #browser() idInt<-match("(Intercept)", names(obj$coefficients), 0) if(id.changeCoef) obj$coefficients[idInt] <- obj$coefficients[idInt]-sum(obj$coefficients[nomiSeg]*minZ) obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, idU=ncol(XREG)+1:(length(psi)), U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = id.warn) return(obj) } segmented/R/model.matrix.stepmented.r0000644000176200001440000000636214617134612017377 0ustar liggesusersmodel.matrix.stepmented<-function(object, type=c("cdf","abs","none"), k=NULL, ...){ #ret.rangeZ=FALSE #if(!inherits(object, "segmented")) stop("A 'segmented' fit is requested") #browser() type=match.arg(type) if(type=="abs") stop(" type='abs' not (yet?) implemented") if(inherits(object, "lm")) { X<- qr.X(object$qr, ...) #if(inherits(object, "glm") ) { if(!is.null(object$weights)) { #questo vale sia per glm che per lm con weights #W<-chol(diag(object$weights)) #X <- X/diag(W) X<- X/sqrt(object$weights) } } else { class(object)<-class(object)[-1] X<-try(model.matrix(object,...), silent=TRUE) if(!is.matrix(X)) X<- model.matrix(object, data=model.frame(object)) } p=ncol(X) n=nrow(X) nomiZ<- object$nameUV$Z nomiV<- object$nameUV$V nomiU<- object$nameUV$U nomiPsi<- sub("V","psi", nomiV) id.noV<-setdiff(colnames(X), nomiPsi) #se object viene da stepmented() la matrice restituita e' sbagliata #se da stepreg() allora ok.. dropV=FALSE if(strsplit(paste(object$call[[1]]), "\\.")[[1]][1]=="stepmented" && type=="none"){ type<-"cdf" k=-500 dropV=TRUE } if(type=="none") return(X[,id.noV, drop=FALSE]) #se object viene da stepmented() la matrice restituita e' sbagliata #se da stepreg() allora ok.. if(inherits(object, "glm")) { sigma = if(object$family$family%in%c("poisson","binomial")) 1 else sqrt(object$deviance/object$df.residual) } else { sigma = sqrt(sum(object$residuals^2)/object$df.residual) } #browser() maxZ.list<-NULL for(i in 1:length(nomiU)){ nomeZ<- gsub("U[1-9]*[0-9].","",nomiU[i]) Z<-object$Z[,nomeZ] minZ<-min(Z) maxZ<-max(Z) psi<-object$psi[nomiPsi[i],"Est."] if(type%in%c("cdf","abs" )){ Z01<- (Z-minZ)/(maxZ-minZ) psi01<- (psi-minZ)/(maxZ-minZ) if(is.null(k)){ idU<-match(nomiU[i],nomiU) snr.idU<-abs(object$coefficients[nomiU][idU])/sigma #ss01=n^(-(.6 + .3*log(snr.idU) -abs(psi01-.5)^(1/2)/n^(1/2))) ss01=n^(-(.6 + .5*log(snr.idU)/sqrt(snr.idU) -abs(psi01-.5)^(1/2)/n^(1/2))) } else { ss01=n^k } ss<- ss01*(maxZ-minZ) if(type=="cdf"){ X[, nomiU[i]]<- pnorm((Z-psi)/ss) if(nomiPsi[i]%in%colnames(X)) { X[, nomiPsi[i]] <- -(object$coefficients[nomiU[i]]/ss)*dnorm((Z-psi)/ss) } else { nomicolsX<-colnames(X) X <- cbind(X, -(object$coefficients[nomiU[i]]/ss)*dnorm((Z-psi)/ss)) colnames(X)<- c(nomicolsX, nomiPsi[i] ) } } else { xx <- Z-psi den <- -xx+2*xx*pnorm(xx/ss)+2*ss*dnorm(xx/ss) #.05*log(cosh((x-.5)/.05))) V <- (1/(2 * den)) X[, nomiU[i]]<- (Z * V + 1/2) if(nomiPsi[i]%in%colnames(X)) { X[, nomiPsi[i]] <- -object$coefficients[nomiU[i]]*V } else { nomicolsX<-colnames(X) X <- cbind(X, -object$coefficients[nomiU[i]]*V) colnames(X)<- c(nomicolsX, nomiPsi[i] ) } } } maxZ.list[[length(maxZ.list)+1]] <- maxZ-minZ } #browser() #if(ret.rangeZ) attr(X, "rangeZ")<- maxZ.list if(dropV) X<-X[, id.noV, drop=FALSE] return(X) } segmented/R/lines.segmented.R0000644000176200001440000000222714523360254015644 0ustar liggesuserslines.segmented<-function(x, term, bottom=TRUE, shift=FALSE, conf.level=0.95, k=50, pch=18, rev.sgn=FALSE,.vcov=NULL, .coef=NULL,...){ if(missing(term)){ if(length(x$nameUV$Z)>1 ) {stop("please, specify `term'")} else {term<-x$nameUV$Z} } ss<-list(...) metodo<- if(!is.null(ss$method)) ss$method else "delta" colore<- if(is.null(ss$col)) 1 else ss$col usr <- par("usr") h<-(usr[4]-usr[3])/abs(k) y<- if(bottom) usr[3]+h else usr[4]-h m<- confint.segmented(object=x,parm=term,level=conf.level,rev.sgn=rev.sgn,digits=15,method=metodo,.vcov=NULL, .coef=NULL) #m<-r[[term]] #FORSE non e' necessaria #if(rev.sgn) m<- -m #ma invece serve il seguente (se length(psi)=1 e rev.sgn=T): m<-matrix(m,ncol=3) if(nrow(m)>1) m<-m[order(m[,1]),] est.psi<-m[,1] lower.psi<-m[,2] upper.psi<-m[,3] if(length(est.psi)>1) { y<- if(shift) y+seq(-h/2,h/2,length=length(est.psi)) else rep(y,length(est.psi)) } #segments(lower.psi, y, upper.psi, y, ...) arrows(lower.psi, y, upper.psi, y, code=3, angle=90, length=.07, ...) points(est.psi,y,type="p",pch=pch,col=colore) } segmented/R/intercept.r0000644000176200001440000001044614624054360014617 0ustar liggesusersintercept<-function (ogg, parm, rev.sgn = FALSE, var.diff = FALSE, .vcov=NULL, .coef=NULL, digits = max(4, getOption("digits") - 2),...){ #corregge in caso di no model intercept -- CHE VOLEVO DIRE?? #forse che adesso funziona se nel modello non c'e' l'interc. #-- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #if(!inherits(ogg, "segmented.lme") || !inherits(ogg, "segmented")) stop("only segmented fits allowed") covv <- if(is.null(.vcov)) vcov(ogg, ...) else .vcov estcoef<- if(is.null(.coef)) coef(ogg) else .coef if(!all(dim(covv)==c(length(estcoef), length(estcoef)))) stop("dimension of cov matrix and estimated coeffs do not match", call. = FALSE) if (var.diff && length(ogg$nameUV$Z) > 1) { var.diff <- FALSE warning("var.diff set to FALSE with multiple segmented variables", call. = FALSE) } #browser() nomepsi <- rownames(ogg$psi) nomeU <- ogg$nameUV$U nomeZ <- if(inherits(ogg, "segmented.lme")) ogg$namesGZ$nameZ else ogg$nameUV$Z if (missing(parm)) { #nomeZ <- ogg$nameUV$Z if (length(rev.sgn) == 1) rev.sgn <- rep(rev.sgn, length(nomeZ)) } else { if (!all(parm %in%nomeZ)) { stop("invalid parm") } else { nomeZ <- parm } } if (length(rev.sgn) != length(nomeZ)) rev.sgn <- rep(rev.sgn, length.out = length(nomeZ)) nomi <- names(estcoef) #browser() nomi <- nomi[-match(nomepsi, nomi)] Allpsi <- index <- vector(mode = "list", length = length(nomeZ)) # gapCoef<-summary.segmented(ogg)$gap ##eliminato 10/11/15 Ris <- list() rev.sgn <- rep(rev.sgn, length.out = length(nomeZ)) alpha0 <- alpha00 <-0 idInterc<-grep("ntercept",names(estcoef)) if(length(idInterc)>0) if(idInterc!= grep("ntercept",rownames(covv))) stop("intercept name in coeff and vcov do not match") if(length(idInterc)==1) alpha0 <- alpha00 <- estcoef[idInterc] # if("(Intercept)"%in%names(estcoef) || "intercept"%in%names(estcoef)){ # alpha0 <- alpha00 <- estcoef["(Intercept)"]} else {alpha0 <- alpha00 <-0} #per ogni variabile segmented... #browser() for (i in 1:length(nomeZ)) { #id.cof.U <- f.U(ogg$nameUV$U, nomeZ[i]) + (match(ogg$nameUV$U[1], nomi)-1) #psii<- ogg$psi[f.U(ogg$nameUV$V, nomeZ[i]) , "Est."] #Allpsi[[i]] <- sort(psii, decreasing = FALSE) #id.cof.U <- id.cof.U[order(psii)] #index[[i]] <- id.cof.U #ind <- as.numeric(na.omit(unlist(index[[i]]))) #cof <- coef(ogg)[ind] #alpha0<-if("(Intercept)"%in%names(estcoef)) estcoef["(Intercept)"] else 0 alpha0<-if(length(idInterc)==1) estcoef[idInterc] else 0 Allpsi[[i]] <- if(inherits(ogg, "segmented.lme")) estcoef["G0"] else ogg$indexU[[nomeZ[i]]] if(is.null(ogg$constr)){ cof<- if(inherits(ogg, "segmented.lme")) estcoef["U"] else estcoef[names(ogg$indexU[[nomeZ[i]]])] } else { index <- match(c(nomeZ[i],ogg$nameUV$U[grep(nomeZ[i], ogg$nameUV$U)]), names(coef(ogg)),0) cof<- drop(ogg$constr$invA.RList[[match(nomeZ[i], ogg$nameUV$Z,0)]]%*%coef(ogg)[index])[-1] #solo le } alpha <- vector(length = length(cof)) #length(ind) for (j in 1:length(cof)) { alpha[j] <- alpha0 - Allpsi[[i]][j] * cof[j] alpha0 <- alpha[j] } cof.out <- c(alpha00, alpha) if(rev.sgn[i]) cof.out <- cof.out[length(cof.out):1] ris <- matrix(cof.out) dimnames(ris) <- list(paste("intercept", 1:nrow(ris), sep = ""), "Est.") Ris[[nomeZ[i]]] <- signif(ris, digits) } Ris } segmented/R/reboot.slme.r0000644000176200001440000001625114415477002015053 0ustar liggesusersreboot.slme <-function(fit, B=10, display=FALSE, metodo=1, frac=1, it.max=6, it.max.b=5, seed=NULL, start=NULL, msg=TRUE){ #metodo: viene passato alla funzione logL. Se 1 la logL che viene calcolata e' quella della componente # fit$lme.fit.noG, namely the logLik from the lme fit without the G variables.. #bootRestart for slme4 #fit: un oggetto di classe "segmented.lme" (anche proveniente da un altra "bootsegMix" call) #frac: size of the boot resample.. #start : un vettor con i nomi (se non fornito gli starting values sono presi da fit) #----------------------- extract.psi<-function(obj){ #questa funzione restituisce i "kappa", ovvero i coeff di psi.. nomiG<-obj$namesGZ$nomiG b<-fixef(obj[[1]])[c("G0",nomiG)] b } #----------------------- update.lme.call<-function (old.call, fixed., ..., evaluate=FALSE) { call <- old.call extras <- match.call(expand.dots = FALSE)$... if (!missing(fixed.)) call$fixed <- update.formula(call$fixed, fixed.) if (length(extras) > 0) { existing <- !is.na(match(names(extras), names(call))) for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if (any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (evaluate) eval(call, parent.frame()) else call } #--------- #--------- startKappa00<-extract.psi(fit)[1] Z <- fit$Z #segmented covariate rangeZ<-quantile(Z, c(.05,.95), names=FALSE) #quanti soggetti? Attenzione se ci sono nested re, sotto non funziona, o meglio da i livelli del outermost group #idLevels <- levels(fit$lme.fit$groups[,ncol(fit$lme.fit$groups)]) #N<- length(idLevels) newData<-fit$lme.fit$data nomeRispo<-all.vars(formula(fit$lme.fit))[1] #AGGIUSTA la risposta newData[,nomeRispo]<-newData[,nomeRispo] + fit$Off nome.id <-names(fit$lme.fit$groups)[ncol(fit$lme.fit$groups)] #name of the innermost grouping variable newData[, nome.id]<- factor(newData[, nome.id]) var.id<-newData[, nome.id] idLevels<-levels(var.id) N<- length(idLevels) o.b<-fit$boot.call #old: start.psi<-extract.psi(fit) #old: est.psi<-start.psi["G0"] #old: call.b<-update(object=fit, obj=o.b, data=newD, psi=est.psi, display=FALSE, evaluate=FALSE) call.b<-update(object=fit, obj=o.b, data=newD, it.max=it.max.b, start=list(kappa0=startKappa0,kappa=startingKappa), display=FALSE, evaluate=FALSE) call.b$random <- fit$randomCALL o.ok<-update.lme.call(o.b, fixed.=paste(nomeRispo,"~."), evaluate=FALSE) #o.ok<-update.lme.call(o.b, fixed.=y~., evaluate=FALSE) #mycall$data=quote(gh) #o.ok<-update.lme.call(o.b, fixed.=y~.,evaluate=FALSE) #old: call.ok<-update(object=fit, obj=o.ok, data=newData, psi=est.psi.b, display=FALSE, evaluate=FALSE) #o.ok$fixed<- update.formula(o.ok$fixed, paste(nomeRispo,"~.")) call.ok<-update(object=fit, obj=o.ok, data=newData, it.max=it.max, start=list(kappa0=startKappa0.b,kappa=startingKappa.b), display=FALSE, evaluate=FALSE) call.ok$n.boot <- call.b$n.boot<-0 call.ok$control <- call.b$control<-quote(seg.control(display=FALSE)) all.L<-all.psi<-NULL it<-0 L0<-L.orig<-logLik(fit$lme.fit.noG)# logL(fit, metodo=metodo) if(display){ flush.console() cat("original data:", 0, " logLik =", formatC(as.numeric(L.orig), 3, format = "f")," psi parms:", formatC(extract.psi(fit),4,format="f"),"\n") } if(is.null(start)){ startingKappa<-extract.psi(fit) startKappa0<- startingKappa[1] startingKappa<-startingKappa[-1] nomiKappa<-names(startingKappa) nomiKappa<-sapply(strsplit(nomiKappa, "G\\."),function(x)x[2]) names(startingKappa) <- nomiKappa } else { nomiG<-sapply(strsplit(fit$namesGZ$nomiG, "G\\."),function(x)x[2]) if(length(intersect(names(start), c("G0", nomiG)))!=length(start)) stop("'start' should include all the changepoint parameters") startKappa0<-start["G0"] startingKappa<-start[-which("G0"%in%names(start))] nomiKappa<-names(startingKappa) } if(is.null(seed)) seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) if(!is.numeric(seed)) stop(" 'seed' is not numeric") set.seed(seed) #browser() for(i in seq(B)){ #build the boot sample #idx<-sample(N, replace=TRUE) #idx<-sample(1:N, size=trunc(N*frac), replace=TRUE) idx<-sample(idLevels, size=trunc(N*frac), replace=TRUE) newD <- do.call("rbind",lapply(idx, function(x)newData[newData[,nome.id]==x,])) newD$y.b<- newD[,nomeRispo] # r<-list(newD=newD, call.b=call.b) # return(r) #-->> CAMBIA STARTING VALUE in call.b if(startKappa0>=rangeZ[2] | startKappa0<=rangeZ[1] ) startKappa0<- jitter(startKappa00,factor=5) #sum(rangeZ)/2 fit.b<-try(suppressWarnings(eval(call.b)), silent=TRUE) #envir=newD) if(!is.list(fit.b)){ # fit.b<-NULL it.b<-0 while(!is.list(fit.b)){ idx<-sample(idLevels, size=trunc(N*frac), replace=TRUE) newD <- do.call("rbind",lapply(idx, function(x)newData[newData[,nome.id]==x,])) newD$y.b<- newD[,nomeRispo] startKappa0<- jitter(startKappa00,factor=5) fit.b<-try(suppressWarnings(eval(call.b)), silent=TRUE) #envir=newD) it.b<-it.b+1 if(it.b>=10) break } } if(is.list(fit.b)){ #old: start.psi.b<-extract.psi(fit.b) #old: est.psi.b<-start.psi.b["G0"] startingKappa.b<-extract.psi(fit.b) startKappa0.b<- startingKappa.b[1] startingKappa.b<-startingKappa.b[-1] #NB "nomiKappa" dovrebbero essere sempre gli stessi names(startingKappa.b) <- nomiKappa fit.ok<-try(suppressWarnings(eval(call.ok)), silent=TRUE) # data=newData) #L1<-if(is.list(fit.ok)) logL(fit.ok, metodo=metodo) else (-Inf) #22/05/18 aggiunto un altro tentativo... ho notato che l'insuccesso pu? dipendere dagli starting value.. if(!is.list(fit.ok)){ call.ok$start<-NULL fit.ok<-try(suppressWarnings(eval(call.ok)), silent=TRUE) } L1<-if(is.list(fit.ok)) as.numeric(logLik(fit.ok)) else (-Inf) } else { stop("the bootstrap fit is unsuccessful") } if(L0=max(z_i) # se nq>0 gli \hat{\psi}_i sono annullati se \hat{\psi}_i<=min(sort(z)[1:nq]) o \hat{\psi}>= max(rev(z)[1:nq] #adjust valore numerico (0,1,2). # Se 0 i psi_i vengono stimati "normalmente" e alla convergenza al vettore numerico dei psi viene assegnato un # vettore di attributi che serve ad etichettare se il breakpoint ? plausibile o meno (secondo il valore di nq) # Se 1 i psi ottenuti alla fine dell'algoritm vengono aggiustati secondo il valore di nq. Ad es., se nq=1 il breakpoint # immediatamente prima del max (o dopo il min) vengono forzati al min/max e cos? sono di fatto annullati; naturalmente il # modello ? ristimato secondo i nuovi psi. Se 2 l'aggiustamento viene fatto durante l'algoritmo.. #--------------------- reboot.slme <-function(fit, B=10, display=FALSE, break.boot=B, metodo=1, frac=1, it.max=6, it.max.b=5, seed=NULL, start=NULL, msg=TRUE){ #metodo: viene passato alla funzione logL. Se 1 la logL che viene calcolata e' quella della componente # fit$lme.fit.noG, namely the logLik from the lme fit without the G variables.. #bootRestart for slme4 #fit: un oggetto di classe "segmented.lme" (anche proveniente da un altra "bootsegMix" call) #frac: size of the boot resample.. #start : un vettor con i nomi (se non fornito gli starting values sono presi da fit) #----------------------- extract.psi<-function(obj){ #questa funzione restituisce i "kappa", ovvero i coeff di psi.. nomiG<-obj$namesGZ$nomiG b<-fixef(obj[[1]])[c("G0",nomiG)] b } #----------------------- update.lme.call<-function (old.call, fixed., ..., evaluate=FALSE) { call <- old.call extras <- match.call(expand.dots = FALSE)$... if (!missing(fixed.)) call$fixed <- update.formula(call$fixed, fixed.) if (length(extras) > 0) { existing <- !is.na(match(names(extras), names(call))) for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if (any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (evaluate) eval(call, parent.frame()) else call } #--------- #--------- startKappa00<-extract.psi(fit)[1] Z <- fit$Z #segmented covariate rangeZ<-quantile(Z, c(.05,.95), names=FALSE) #quanti soggetti? Attenzione se ci sono nested re, sotto non funziona, o meglio da i livelli del outermost group #idLevels <- levels(fit$lme.fit$groups[,ncol(fit$lme.fit$groups)]) #N<- length(idLevels) newData<-fit$lme.fit$data nomeRispo<-all.vars(formula(fit$lme.fit))[1] #AGGIUSTA la risposta newData[,nomeRispo]<-newData[,nomeRispo] + fit$Off nome.id <-names(fit$lme.fit$groups)[ncol(fit$lme.fit$groups)] #name of the innermost grouping variable newData[, nome.id]<- factor(newData[, nome.id]) var.id<-newData[, nome.id] idLevels<-levels(var.id) N<- length(idLevels) o.b<-fit$boot.call #old: start.psi<-extract.psi(fit) #old: est.psi<-start.psi["G0"] #old: call.b<-update(object=fit, obj=o.b, data=newD, psi=est.psi, display=FALSE, evaluate=FALSE) call.b<-update(object=fit, obj=o.b, data=newD, it.max=it.max.b, start=list(kappa0=startKappa0,kappa=startingKappa), display=FALSE, evaluate=FALSE) call.b$random <- fit$randomCALL o.ok<-update.lme.call(o.b, fixed.=paste(nomeRispo,"~."), evaluate=FALSE) #o.ok<-update.lme.call(o.b, fixed.=y~., evaluate=FALSE) #mycall$data=quote(gh) #o.ok<-update.lme.call(o.b, fixed.=y~.,evaluate=FALSE) #old: call.ok<-update(object=fit, obj=o.ok, data=newData, psi=est.psi.b, display=FALSE, evaluate=FALSE) #o.ok$fixed<- update.formula(o.ok$fixed, paste(nomeRispo,"~.")) call.ok<-update(object=fit, obj=o.ok, data=newData, it.max=it.max, start=list(kappa0=startKappa0.b, kappa=startingKappa.b), display=FALSE, evaluate=FALSE) #call.ok$n.boot <- call.b$n.boot<-0 call.ok$control <- call.b$control<-quote(seg.control(display=FALSE, n.boot=0)) all.L<-all.psi<-NULL it<-0 L0<-L.orig<-logLik(fit$lme.fit.noG)# logL(fit, metodo=metodo) if(display){ flush.console() cat("original data:", 0, " logLik =", formatC(as.numeric(L.orig), 3, format = "f")," psi parms:", formatC(extract.psi(fit),4,format="f"),"\n") } if(is.null(start)){ startingKappa<-extract.psi(fit) startKappa0<- startingKappa[1] startingKappa<-startingKappa[-1] nomiKappa<-names(startingKappa) nomiKappa<-sapply(strsplit(nomiKappa, "G\\."),function(x)x[2]) names(startingKappa) <- nomiKappa } else { nomiG<-sapply(strsplit(fit$namesGZ$nomiG, "G\\."),function(x)x[2]) if(length(intersect(names(start), c("G0", nomiG)))!=length(start)) stop("'start' should include all the changepoint parameters") startKappa0<-start["G0"] startingKappa<-start[-which("G0"%in%names(start))] nomiKappa<-names(startingKappa) } #if(is.null(seed)) seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) if(is.null(seed)){ mY <- mean(newData[,nomeRispo]) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) set.seed(seed) } else { if(is.na(seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { if(!is.numeric(seed)) stop(" 'seed' is not numeric") else set.seed(seed) } } #browser() n.boot.rev<- 3 alpha1<-alpha[1] for(i in seq(B)){ diff.selected.ss <- rev(diff(na.omit(all.L))) if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ #qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi<- mean(startKappa0>Z) qpsi<-ifelse(abs(qpsi-.5)<.1, alpha1, qpsi) alpha1<-1-alpha1 #est.psi0<-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) startKappa0 <- quantile(Z, probs=1-qpsi, names=FALSE) } #build the boot sample #idx<-sample(N, replace=TRUE) #idx<-sample(1:N, size=trunc(N*frac), replace=TRUE) idx<-sample(idLevels, size=trunc(N*frac), replace=TRUE) newD <- do.call("rbind",lapply(idx, function(x)newData[newData[,nome.id]==x,])) newD$y.b<- newD[,nomeRispo] # r<-list(newD=newD, call.b=call.b) # return(r) #-->> CAMBIA STARTING VALUE in call.b if(startKappa0>=rangeZ[2] | startKappa0<=rangeZ[1] ) startKappa0<- jitter(startKappa00,factor=5) #sum(rangeZ)/2 fit.b<-try(suppressWarnings(eval(call.b)), silent=TRUE) #envir=newD) if(!is.list(fit.b)){ # fit.b<-NULL it.b<-0 while(!is.list(fit.b)){ idx<-sample(idLevels, size=trunc(N*frac), replace=TRUE) newD <- do.call("rbind",lapply(idx, function(x)newData[newData[,nome.id]==x,])) newD$y.b<- newD[,nomeRispo] startKappa0<- jitter(startKappa00,factor=5) fit.b<-try(suppressWarnings(eval(call.b)), silent=TRUE) #envir=newD) it.b<-it.b+1 if(it.b>=10) break } } if(is.list(fit.b)){ #old: start.psi.b<-extract.psi(fit.b) #old: est.psi.b<-start.psi.b["G0"] startingKappa.b<-extract.psi(fit.b) startKappa0.b<- startingKappa.b[1] startingKappa.b<-startingKappa.b[-1] #NB "nomiKappa" dovrebbero essere sempre gli stessi names(startingKappa.b) <- nomiKappa fit.ok<-try(suppressWarnings(eval(call.ok)), silent=TRUE) # data=newData) #L1<-if(is.list(fit.ok)) logL(fit.ok, metodo=metodo) else (-Inf) #22/05/18 aggiunto un altro tentativo... ho notato che l'insuccesso pu? dipendere dagli starting value.. if(!is.list(fit.ok)){ call.ok$start<-NULL fit.ok<-try(suppressWarnings(eval(call.ok)), silent=TRUE) } L1<-if(is.list(fit.ok)) as.numeric(logLik(fit.ok)) else (-Inf) } else { stop("the bootstrap fit is unsuccessful") } if(L0break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } } #end boot replicates #============================================================================================ fit$history.boot.restart<-cbind(b=1:length(all.psi),psi=all.psi, logL=all.L) fit$seed<-seed #r<-list(seg.lme.fit=fit, history=cbind(b=1:length(all.psi),psi=all.psi, logL=all.L) ) if(msg) cat(" New solution(s) found:", length(unique(all.psi)), "\n") fit } #------------------ fn.re<-function(obj){ #restituisce un array n x n.ranef x terms # n e' il n. totale delle misurazioni.. # n.ranef e' il n. dei random effects (tipicamente e' 1, >1 con nested..) # terms e' il n. dei termini coinvolti nei random effects (ad es., intercept, x ..) ro<-ranef(obj) n.levels<- ncol(obj$groups) #n. dei livelli casuali (ad es., se nested..) if(n.levels<=1) { ro<-list(ro) names(ro)<-names(obj$groups) } nomi.levels<-names(obj$groups) #nomi degli effetti casuali names(ranef(obj)) n.terms<-sapply(ro, ncol) nomiTermini<- unique(as.vector(unlist(sapply(ro, colnames)))) tutti<-array(0, c(nrow(obj$groups), ncol(obj$groups), max(n.terms)), dimnames=list(NULL, names(obj$groups), nomiTermini)) for(nome in nomiTermini){ for(j in nomi.levels){ if(nome %in% names(ro[[j]])){ for(i in unique(obj$groups[,j])) tutti[obj$groups[,j]==i,j,nome] <- ro[[j]][rownames(ro[[j]])==i, nome] } } } tutti } #------------------ update.lme.call<-function (old.call, fixed., ..., evaluate=FALSE) { call <- old.call extras <- match.call(expand.dots = FALSE)$... if (!missing(fixed.)) call$fixed <- update.formula(call$fixed, fixed.) if (length(extras) > 0) { existing <- !is.na(match(names(extras), names(call))) for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if (any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (evaluate) eval(call, parent.frame()) else call } #--------------------------------------------------------------------------- f.pd<-function(obj){ #dato un modello lme 'obj' restituisce una matrice pdMat che deve essere utilizzata come componente random # nelle call "call.ok$random<-list(id=pd)" pdClasse<-class(obj$modelStruct$reStruct[[1]])[1] if(pdClasse=="pdBlocked"){ #assumiamo solo 2 blocchi..(? un LIMITE, ma ? facile generalizzare..) start.v<-unlist(lapply(obj$modelStruct$reStruct[[1]], function(z){as.numeric(z)})) cl1<-class(obj$modelStruct$reStruct[[1]][[1]])[1] cl2<-class(obj$modelStruct$reStruct[[1]][[2]])[1] fo1<-attr(obj$modelStruct$reStruct[[1]][[1]],"formula") fo2<-attr(obj$modelStruct$reStruct[[1]][[2]],"formula") no1<-attr(obj$modelStruct$reStruct[[1]][[1]],"Dimnames")[[1]] no2<-attr(obj$modelStruct$reStruct[[1]][[2]],"Dimnames")[[1]] pd<-pdBlocked(start.v, pdClass = c(cl1,cl2), nam = list(no1, no2), form=list(fo1, fo2)) } else { fo<-attr(obj$modelStruct$reStruct[[1]],"formula") pd <- pdMat(as.numeric(obj$modelStruct$reStruct[[1]]), form = fo, pdClass = pdClasse) } pd} #--------------------------------------------------------------------------- ### #browser() h <- control$h if(!(is.call(obj) || class(obj)[1]=="lme")) stop(" 'obj' should be a lme fit or a lme call") if(missing(psi) && it.max==0) stop("Please supply 'psi' with 'it.max=0'") if(is.call(obj)) { my.call <- obj datacall <- deparse(obj$data) if(is.null(random)) random<-eval(obj$random) } else { my.call <- obj$call datacall<- deparse(obj$call$data) if(is.null(random)) random<-eval(obj$call$random) } #my.call<-if(is.call(obj)) obj else obj$call #datacall<- if(is.call(obj)) deparse(obj$data) else deparse(obj$call$data) #if(is.null(random)) {random<- if(is.call(obj)) eval(obj$random) else eval(obj$call$random) } randomCALL<-random G0random<- sapply(random, function(.x) "G0" %in% all.vars(attr(.x, "formula"))) if(it.max==0 && !any(G0random)) stop("'G0' in the random part is meaningless with 'it.max=0'") # name.group<-nameRandom<-names(random) # if(is.null(random)) { # # A CHE SERVE???????????????? # random=list( # id=pdMat(as.numeric(obj$modelStruct$reStruct[[1]]), # form=attr(obj$modelStruct[[1]][[1]],"formula"), # pdClass=class(obj$modelStruct$reStruct[[1]])[1])) # randomCALL<- if(is.call(obj)) obj$random else obj$call$random # } else { # randomCALL<- random # } if (!is.null(random)) { if (is.list(random)) { nameRandom <- names(random) #nomi dei fattori id if(is.null(nameRandom)) stop("random argument must be a *named* list.") else if(sum(nameRandom == "")) stop("all elements of random list must be named") } else stop("random effects should be specified as named lists") random.vars <- c(unlist(lapply(random, function(x) all.vars(formula(x)))), nameRandom) names(random.vars)<-NULL #per evitare casini.. spesso i nomi erano le variabili stesse.. } else random.vars <- NULL J<-length(random) #if(missing(Z) && missing(seg.Z)) stop(" 'Z' or 'seg.Z' should be provided") #name.Z<-if(missing(seg.Z)) deparse(substitute(Z)) else all.vars(seg.Z) if(missing(seg.Z)) stop(" 'seg.Z' should be provided") name.Z<- all.vars(seg.Z) if(length(name.Z)>1) stop("segmented.lme works with 1 breakpoint only") allNOMI<-unique(c(name.Z, all.vars(my.call$fixed), random.vars, all.vars(z.psi), all.vars(x.diff))) formTUTTI<-as.formula(paste("~.+", paste(allNOMI,collapse="+"))) formTUTTI<-update.formula(my.call$fixed, as.formula(paste("~.+", paste(allNOMI,collapse="+")))) #U and G0 have not yet been defined formTUTTI<-update.formula(formTUTTI, .~.-U-G0) anyFixedG<-FALSE if(!is.null(fixed.parms)){ name.fixed.butG0<-setdiff(names(fixed.parms),"G0") #nomi dei termini fissi escluso G0 anyFixedG<-if(length(name.fixed.butG0)>=1) TRUE else FALSE #ci sono fixed coef nel submodel of psi? if(anyFixedG){ formTUTTI<-update.formula(formTUTTI, as.formula(paste("~.+", paste(name.fixed.butG0,collapse="+")))) } } if(is.null(my.call$data)) stop("`obj' should include the argument `data'") if(missing(data)) { mf<-model.frame(formTUTTI, data=eval(my.call$data), na.action=na.omit) } else { mf<-model.frame(formTUTTI, data=data, na.action=na.omit) } # if (length(allvars)) { # mf$formula <- as.formula(paste(paste(deparse(gp$fake.formula, # backtick = TRUE), collapse = ""), "+", paste(allvars, # collapse = "+"))) # mf <- eval(mf, parent.frame()) # } #adesso si deve ordinare il dataframe.. mf<-mf[order(mf[[nameRandom[J]]]),] nomeRispo<-names(mf)[1] Rispo<-model.response(mf) # #browser() Z <- mf[[name.Z]] #limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) limZ <- as.matrix(quantile(Z, names = FALSE, probs = c(alpha[1], alpha[2]))) min.Z<- min(limZ[,1]) max.Z<- max(limZ[,1]) #browser() if(!missing(psi)) { if(length(psi)>1) stop("segmented.lme works with 1 breakpoint only") if(psi<=min(limZ) || psi>=max(limZ)) stop("the provided psi is outside the range, see 'alpha' in seg.control()", call.=FALSE) } id <- mf[[nameRandom[J]]] #the innermost factor if(is.factor(id)) id <-factor(id, levels = unique(id)) ni<- tapply(id, id, length) #vector of cluster sizes N<-length(ni)#n. of clusters (subjects) n<-length(id) #n. of total measurements id.x.diff<- FALSE id.z.psi <- FALSE #M.z.psi <- mf[all.vars(z.psi)] # #M.x.diff <- mf[all.vars(x.diff)] # M.z.psi <- model.matrix(z.psi, data = mf) if("(Intercept)"%in%colnames(M.z.psi)) M.z.psi<-M.z.psi[,-match("(Intercept)", colnames(M.z.psi)),drop=FALSE] M.x.diff <- model.matrix(x.diff, data = mf) if("(Intercept)"%in%colnames(M.x.diff)) M.x.diff<-M.x.diff[,-match("(Intercept)", colnames(M.x.diff)),drop=FALSE] fixed<-"U+G0" #fixed<-"U" nomiG<-NULL #se non ci sono explicative nel changepoint (se ci sono poi viene sovrascritto) namesGZ<-list(nameZ=name.Z) Offs.kappa<-0 if(NCOL(M.z.psi)>0){ id.z.psi <- TRUE Z.psi <- data.matrix(M.z.psi) if(anyFixedG){ if(!all(name.fixed.butG0 %in% colnames(M.z.psi))) stop("variable(s) in 'fixed.parms' should be included in 'z.psi'") Offs.kappa<-Fixed.z.psi<-drop(Z.psi[, name.fixed.butG0, drop=FALSE]%*% fixed.parms[name.fixed.butG0]) Z.psi<-Z.psi[,setdiff(colnames(Z.psi), name.fixed.butG0), drop=FALSE] } if(ncol(Z.psi)>0){ nomiG<-paste("G.",colnames(Z.psi),sep="") namesGZ$nomiG<-nomiG fixed<-paste(fixed,paste(nomiG,collapse="+"),sep="+") } else { id.z.psi <- FALSE } } else { #se NCOL(M.z.psi)<=0 if(anyFixedG) stop("variable(s) in 'fixed.parms' should be included in 'z.psi' ") } if(NCOL(M.x.diff)>0) { X.diff <- data.matrix(M.x.diff) id.x.diff <- TRUE nomiUx<-paste("U.",colnames(M.x.diff),sep="") namesGZ$nomiUx<-nomiUx fixed<-paste(fixed,paste(nomiUx,collapse="+"),sep="+") } #================================================================== #Queste funzioni min1() e max1() restituiscono il "quasi" min o max # if(nq>0){ # min1<-function(x,na.rm=FALSE){x<-sort(x)[-(1:nq)];min(x,na.rm=na.rm)} # max1<-function(x,na.rm=FALSE){x<-rev(x)[-(1:nq)];max(x,na.rm=na.rm)} # } else { # min1<-min # max1<-max # } # adjust<-max(min(adjust,2),0) #solo 0,1,2 sono consentiti.. # # #================================================================== # # min.Z<-min1(Z) # max.Z<-max1(Z) mf["U"]<- 1 #rep(1, n) #if(!is.null(obj$data)) my.dd<-cbind(obj$data,my.dd) #browser() #Qua ci possono essere 2 variabili di effetti casuali. Attenzione all'ordine.. il secondo! #if(name.group!="id") mf['id']<-mf[name.group] #costruisci un'altra variabile di clustering con il nome id #correzione per nested r.e: poich? id ? quello "giusto" (costruito prima), allora # mf['id']<-id #E' necessario costruire una nuova id con nome esattamente 'id'??!??! mf[name.Z]<- Z est.kappa0<-TRUE if("G0" %in% names(fixed.parms)) { est.kappa0<-FALSE kappa0<-kappa0Fixed<-fixed.parms["G0"] } if(est.kappa0){ if(!is.null(start$kappa0)) { psi<-if(psi.link=="logit") inv.logit(start$kappa0,min.Z,max.Z) else start$kappa0 } if(missing(psi)){ # formulaFix.Poly<-update.formula(my.call$fixed, paste("~.+",name.Z,"+",paste("I(",name.Z,"^2)",sep=""))) # obj2<-update.lme.call(my.call, fixed = formulaFix.Poly, data=mf, evaluate=TRUE) # psi<- -fixed.effects(obj2)[name.Z]/(2*fixed.effects(obj2)[paste("I(",name.Z,"^2)",sep="")]) psi<-tapply(Z, id, function(.x) sum(range(.x))/2) if(any(psi <= min(Z))||any(psi>=max(Z))) stop("psi estimated by midvalues is outside the range") #the quadratic fit } } else { #se e' fissato e quindi non devi stimarlo psi<- kappa0 } #browser() psi.new <- psi #stime iniziali if(length(psi)!=1 && length(psi)!=N) stop("length(psi) has to be equal to 1 or n. of clusters") if(length(psi) == 1) { psi.new <- rep(psi.new, N) #subj-specific changepoints } psi.ex<-rep(psi.new, ni ) #length = N (n. tot obs) #---------------------------------------- mf$U<- (Z-psi.ex)*(Z>psi.ex) #pmax(0, Z-psi.ex) formulaFix.noG<-update.formula(my.call$fixed, paste("~.+","U")) if(id.x.diff){ Ux<- as.matrix(mf$U*X.diff) colnames(Ux)<-nomiUx mf<-cbind(mf,Ux) #$Ux<- my.dd$U*X.diff formulaFix.noG<-update.formula(my.call$fixed, paste(".~.+U+",paste(nomiUx,collapse="+"),sep="")) } #se vuoi assumere i psi fissi (it.max=0) if(it.max==0) { #aggiorna i random effects. Attenzione in tal caso random deve essere "U" ( o "1"). #Se fosse "U+G0" darebbe errore perch? G0 non esiste #Oppure dovresti modificare la formula di random, #attr(random[[1]], "formula")<-update.formula(attr(random[[1]], "formula"), ~.-G0) formulaRand<-formulaRandOrig<-my.call$random call.ok<-update.lme.call(my.call, fixed = formulaFix.noG, random=random, data=mf, evaluate=FALSE) o<-eval(call.ok) return(o) } #end if(it.max=0) #--------------------------------------------------------------------------- #should we fit a preliminary model? extract starting values start.delta0<-start$delta0 if(id.x.diff) start.delta<-start$delta need.prelim<- (is.null(start.delta0) || (id.x.diff && is.null(start.delta))) if(need.prelim){ random.noG <- random for(j in 1:J) attr(random.noG[[j]],"formula")<-update.formula(formula(random[[j]]), ~.-G0) o<-update.lme.call(my.call, fixed=formulaFix.noG, random=random.noG, data=mf, evaluate=TRUE) #o<-update.lme.call(my.call, fixed=formulaFix.noG, data=mf, evaluate=TRUE) delta0i<-unlist(coef(o)["U"]) #length= N if(id.x.diff) delta<-fixed.effects(o)[nomiUx] #length= n.1 } else { delta0i<-if(length(start.delta0)==N) start.delta0 else rep(start.delta0,N) if(id.x.diff) delta<-start.delta[nomiUx] } start.kappa<-start$kappa eta.psi<-0 if(id.z.psi) { if(is.null(start.kappa)) { kappa<- rep(0, ncol(Z.psi)) names(kappa)<-nomiG eta.psi<-rep(0,nrow(Z.psi)) } else { kappa<-start.kappa names(kappa)<-paste("G.",names(kappa),sep="") if((length(kappa)!=NCOL(M.z.psi)) || any(is.na(match(names(kappa), nomiG)))) stop("error in the names/length of start.kappa") eta.psi <- drop(Z.psi%*%kappa) } } ################################# if(anyFixedG) eta.psi<- eta.psi + Offs.kappa #Offs.kappa<-data.matrix(mf[name.fixed.butG0])%*%fixed.parms[name.fixed.butG0] #----------------------------------------------------------- formulaFix<-update.formula(my.call$fixed, paste(".~.+",fixed)) if(!est.kappa0) formulaFix<-update.formula(formulaFix, .~.-G0) formulaRand<-formulaRandOrig<-my.call$random minMax <- cbind(tapply(Z,id,min),tapply(Z,id,max)) #matrice nx2 dei min-max #--------------------------------------------------------- call.ok<-update.lme.call(my.call, fixed = formulaFix, random=random, data=mf, evaluate=FALSE, control = list(msVerbose = FALSE, niterEM = 100, opt = "optim")) if(!is.null(start.pd)) call.ok$random<-quote(list(id=start.pd)) #-------------------------------------------------------- kappa0i <- if(psi.link=="logit") logit(psi.ex,min.Z,max.Z) else psi.ex #length=n if(est.kappa0) kappa0<-mean(kappa0i) ki<- kappa0i - kappa0 etai<- kappa0i + eta.psi psi.ex<-if(psi.link=="logit") inv.logit(etai,min.Z,max.Z) else etai #length=n #---------------------------------------------------------- boot.call<-update.lme.call(my.call, y.b~., data=newData, evaluate=FALSE) #salva la call before modifying obj it <- 1 epsilon <- 9 obj<-o #serve per estrarre la logLik b.new<-rep(.1,length(all.vars(formulaFix))) #la risposta conteggiata in all.vars(formulaFix) conta per l'intercetta while(abs(epsilon) > tol){ #if(it==9) browser() DD<-if(psi.link=="logit") (max.Z-min.Z)*exp(etai)/((1+exp(etai))^2) else rep(1,n) V<-ifelse(Z >psi.ex, -1, 0) VD <- V*DD mf$U <- pmax(0, Z-psi.ex) mf$G0<- rep(delta0i,ni)*VD #rowSums(rep(delta0i,ni)*VD) if(id.x.diff){ Ux<- as.matrix(mf$U*X.diff) colnames(Ux)<-nomiUx mf[,which(names(mf)%in%nomiUx)]<-Ux deltaMatrix<-cbind(rep(delta0i,ni), matrix(delta,nrow=length(V),ncol=length(delta),byrow=TRUE)) deltaVDx<-deltaMatrix*VD*cbind(1,M.x.diff) mf$G0<-rowSums(deltaVDx) } if(id.z.psi){ G<-cbind(mf$G0,mf$G0*M.z.psi) colnames(G)<-c("G0",nomiG) mf[,colnames(G)]<-G } dev.old <- obj$logLik #costruisci l'offset e modifica la risposta.. Off<- if(est.kappa0) -kappa0i*mf$G0 else -ki*mf$G0 if(id.z.psi) Off<- Off - drop(as.matrix(mf[nomiG])%*%kappa[nomiG]) mf[nomeRispo]<-Rispo-Off # estimate the model ######################################## obj<-eval(call.ok) ######################################## #formulaFix.noG #random.noG b.old<-b.new b.new<-fixed.effects(obj) ### if(psi.new>max(Z)| psi.new= it.max) break #if(abs(epsilon) <= tol) break it <- it+1 #stopping rules not met: update the estimates ##------------------------------- continua<- (abs(epsilon) > tol && it< it.max) #delta0i<-if(inflate.res) inflate.2residuals(obj, coeff=TRUE)[,"U"] else unlist(coef(obj)["U"]) #length=N if(id.x.diff) delta <- fixed.effects(obj)[nomiUx] delta0i <- unlist(coef(obj)["U"]) kappa0.old <- kappa0 #length=1 kappa0 <- fixed.effects(obj)["G0"] if(est.kappa0 && continua){ kappa0<- if(psi.link=="identity") adj.psi(kappa0, limZ) else max(min(9,kappa0),-9) kappa0 <- kappa0.old + (kappa0 - kappa0.old)*h/2 #questo controllo e' sbagliato se link.psi="logit" #if(kappa0<= min(Z) || kappa0>=max(Z)) stop("estimated psi outside the range") } #browser() kappa0i.old<-kappa0i #length=n #browser() RE<-fn.re(obj) # array n x n.randmEff (2 se sono nested..) x n.termini (U, G0,..) ki<-if("G0" %in% dimnames(RE)[[3]]) rowSums(RE[ , ,"G0", drop=FALSE]) else rep(0,n) #NB RE[ , ,"G0"] ? una matrice di n.obs righe e che ha in ogni colonna i breakpoint relativi ad ogni livello di nesting.. # RE[ , J,"G0"] e' l'innermost J=ncol(RE[ , ,"G0"]) #Quindi i ki sono la somma di tutti i termini random (anche a diversi livelli di nested) kappa0i <- kappa0+ki ########I codici sotto non funzionano con nested r.e. # ki<-if("G0"%in%names(ranef(obj))) unlist(ranef(obj)["G0"]) else rep(0,N) # kappa0i <- kappa0+ki #length=N # #kappa0i <-if(inflate.res) inflate.2residuals(obj, coeff=TRUE)[,"G0"] else unlist(coef(obj)["G0"]) #length=N # kappa0i<-rep(kappa0i,ni) #+ kappa0i.old #length=n # ki<-rep(ki,ni) ########################### etai<-kappa0i if(id.z.psi) { kappa.old<-kappa #length=1 kappa<-fixed.effects(obj)[nomiG] #esclude G0.. etai<-etai+drop(Z.psi%*%kappa) } #questo e' se ci sono parametri con valori *fissati* da non stimare.. if(anyFixedG){ etai <- etai+ Offs.kappa } #browser() psi.old <- psi.ex #length=n.obs psi.ex<-if(psi.link=="logit") inv.logit(etai,min.Z,max.Z) else etai #length=n #eventuale aggiustamento dei psi. # if(adjust==2){ # id.bp<-I(psi.new>minMax[,1]&psi.new= (it.max+1)) break # if(abs(epsilon) <= tol) break #NON serve, c'? il while(abs(epsilon) > tol) #f.pd() la chiamo solo se non ci sono nested r.e. (perch? in quel caso non funziona..) if(J<=1){ #se c'e' SOLO 1 r.e. pd<-f.pd(obj) call.ok$random<-quote(list(id=pd)) } } #end_while #--------------------------------------------------------------------------------------- #Adesso devi fare in modo che le linee *veramente si uniscano (no salti), boot restarting e #valore di logLik ed infine aggiorna obj<-eval(call.ok) fixed.noG<-if(is.null(nomiG)) update.formula(call.ok$fixed, paste(".~.-G0",sep="")) else update.formula(call.ok$fixed, paste(".~.-G0-",paste(nomiG, collapse="-"),sep="")) if(is.null(random.noG)){ #se "random.noG" non ? stato specificato in segmented.lme() random.noG<-random #Escludi G0 dalla formula random.. # - #18/6/16: mi sono reso conto che random pu? essere una lista che contiene diverse formula che includono "G0" (ad es., nel caso di r.e.), quindi "G0" si deve # eliminare in ogni formula.. # Just now I don't know what happen if random is a block matrix.. VERIFICARE.. comunque il codice sotto c'e'.. for(j in 1:J){ #J =n. di random cluster (a des., children %in% school,..) #questo sotto ? se random ? una lista e ogni sua componente ha una formula come "attributo".. Dovrebbero rientrare i casi di #semplici e nested r.e. NON con una matrice a blocchi.. if(!is.null(attr(random.noG[[j]], "formula"))){ #semplici e nested r.e. if("G0"%in%all.vars(attr(random.noG[[j]], "formula"))){#se la formula della componente j contiene "G0".. attr(random.noG[[j]], "formula") <- update.formula(attr(random.noG[[j]], "formula"), ~.-G0) } #questo sotto e' se c'e' una matrice a blocchi.. } else { #questo sotto e' se c'e' una matrice a blocchi.. for(k in length(random.noG[[j]])) { if(!is.null(attr(random.noG[[j]][[k]], "formula"))){ #Questo ? se ci sono matrici a blocchi quando if("G0"%in%all.vars(attr(random.noG[[j]][[k]], "formula"))){#se la formula della componente j contiene "G0".. attr(random.noG[[j]][[k]], "formula") <- update.formula(attr(random.noG[[j]][[k]], "formula"), ~.-G0) } } } #end k=1..K } } #end j=1..J } call.ok.noG<-update.lme.call(call.ok, fixed = fixed.noG, random = random.noG) mf[nomeRispo]<-Rispo obj.noG<-eval(call.ok.noG) #if(it >= (it.max+1)) warning("max no. of iterations achieved.. refit.boot() suggested", call. = FALSE) psi.new<-psi.ex[cumsum(ni)] #5/7/18: rownames(ranef(obj)[[J]]) sono del tipo "1/1", cio? tengono conto di eventuali nested.. #names(psi.new)<-rownames(ranef(obj)[[J]]) #names(psi.new)<-levels(unlist(obj$groups)) #names(psi.new)<-levels(id) ##27/6, nuovo: #se id e' numerica levels(id) e' NULL, per cui i psi.new sono senza nomi (e questo da errore in plot.segmented) #names(psi.new)<-levels(factor(id)) #funziona anche con nested r.e.?? #browser() rnfGroups<-obj.noG$groups #names(psi.new)<-levels(rnfGroups[, ncol(rnfGroups)]) #levels ordina per i nomi "nuovi" (se c'? nested 4/10 lo considera prima di 4/9').. names(psi.new)<-rownames(coef(obj.noG)) #oppure unique(rnfGroups[, ncol(rnfGroups)]) attr(psi.new,which="ni")<-table(rnfGroups[, ncol(rnfGroups)]) id.bp<-I(psi.new>=minMax[,1]&psi.new<=minMax[,2]) attr(psi.new,which="is.break")<-id.bp #mf$rispo<-Rispo #o.new<-lme.formula(rispo ~ x + U + U.x.diff, data = mf, random=list(id=pdDiag(~1+x+U)), method=..) #return(o.new) if(adjust==1){ #ristima il modello con i nuovi psi ( e le nuove variabili) psi.new[!id.bp] <- tapply(Z,id,max)[!id.bp]# minMax[!id.bp,2] psi.ex <- rep(psi.new, aa) #length=n.obs DD<-fn1(c(rep(kappa0,aa),kappa1), Z.psi ,2, link=psi.link) #length=n.obs V<-ifelse(Z >psi.ex, -1, 0) my.dd$U<- pmax(0, Z -psi.ex) VD <- V*DD deltaMatrix<-cbind(rep(betaa,aa), matrix(delta,nrow=length(V),ncol=length(delta),byrow=TRUE)) deltaVDx<-deltaMatrix*VD*M.x.diff G0<-rowSums(deltaVDx) G<-G0*M.z.psi colnames(G)<-c("G0",paste("G.",colnames(M.z.psi)[-1],collapse="+",sep="")) my.dd<-cbind(my.dd, G) dev.old <- obj$logLik #stima il modello: obj<-eval(call.ok) } #if(id.z.psi) names(kappa)<- colnames(M.z.psi) #? gi? fatto prima RIS <- list("lme.fit"=obj, "lme.fit.noG"=obj.noG, "psi.i"=psi.new, call=match.call()) if(!is.null(fixed.parms)) RIS$fixed.parms<-fixed.parms if(id.z.psi) { RIS$fixed.eta.psi<-drop(as.matrix(cbind(1,M.z.psi[cumsum(ni),]))%*%c(kappa0,kappa)) names(RIS$fixed.eta.psi) <-names(psi.new) } else { RIS$fixed.eta.psi<-rep(kappa0, length(psi.new)) names(RIS$fixed.eta.psi) <-names(psi.new) } if(id.x.diff) { RIS$fixed.eta.delta<-drop(as.matrix(cbind(1,M.x.diff[cumsum(ni),]))%*%fixef(obj)[c("U",nomiUx)]) names(RIS$fixed.eta.delta) <-names(psi.new) } else { RIS$fixed.eta.delta<- rep(fixef(obj)["U"], length(psi.new)) names(RIS$fixed.eta.delta) <-names(psi.new) } RIS$fixed.psi<-if(psi.link=="logit") inv.logit(RIS$fixed.eta.psi,min.Z,max.Z) else RIS$fixed.eta.psi #browser() names(RIS$fixed.psi) <- names(psi.new) RIS$call$psi.link<-psi.link #in questo modo il nome e' "completo".. RIS$boot.call<-boot.call RIS$randomCALL<-randomCALL RIS$misc$datacall<- datacall #browser() #RIS$misc$matrix.psi<- if("G0" %in% dimnames(RE)[[3]]) { RIS$misc$matrix.psi<- cbind(fixed=RIS$fixed.psi,drop(RE[cumsum(ni), , "G0", drop = FALSE])) colnames(RIS$misc$matrix.psi) <- c("fixed", names(obj$groups)) rownames(RIS$misc$matrix.psi) <- names(psi.new)#rownames(ranef(obj)[[J]]) } else { RIS$misc$matrix.psi<- matrix(RIS$fixed.psi, ncol=1) #fixed=RIS$fixed.psi) rownames(RIS$misc$matrix.psi) <- names(psi.new)#rownames(ranef(obj)[[J]]) } RIS$namesGZ<-namesGZ RIS$Off<-Off RIS$rangeZ<- tapply(Z, id, range) names(Z)<-id #names(psi.new) RIS$Z<-Z #browser() class(RIS)<- "segmented.lme" #c("segmented.lme","segmented") #opz.control<-list(...) #if(!is.null(opz.control$n.boot)) n.boot<- opz.control$n.boot if(it >= (it.max+1) && n.boot==0) warning("max no. of iterations achieved.. 'n.boot>0' suggested", call. = FALSE) if(n.boot>0){ if(display) cat("Implementing bootstrap restarting..\n") RIS <- reboot.slme(RIS, B=n.boot, display=display, break.boot=control$break.boot , seed=control$seed, msg=display)#, metodo=1, frac=1, it.max=6, it.max.b=5, start=NULL, msg=TRUE) } RIS } segmented/R/segreg.r0000644000176200001440000010427114757613114014103 0ustar liggesuserssegreg <- function(formula, data, subset, weights, na.action, family=lm, control=seg.control(), transf=NULL, contrasts=NULL, model=TRUE, x=FALSE, var.psi=TRUE, ...){ ##### ==================================================================================== #DA FARE: #1) i psi fissi OK prova #2) eliminare argomento 'transf' #3) se c'e' by consentire un n.psi diverso per categoria di by? ovvero npsi o psi devono essere liste..ok #4) la matrice dei contrasti per imporre vincoli alle slope ok #================================= #Allora considerando seggrowth() qua ci dovrebbero essere problemi in nel predict.. #================================= # `[.withAttributes` <- function(r, i) { # subset <- NextMethod() # attr(subset, "nomeBy") <- attr(r, "nomeBy") # attr(subset, "nomeX") <- attr(r, "nomeX") # attr(subset, "psi") <- attr(r, "psi") # attr(subset, "npsi") <- attr(r, "npsi") # attr(subset, "est") <- attr(r, "est") # attr(subset, "R") <- attr(r, "R") # attr(subset, "fix.psi") <- attr(r, "fix.psi") # attr(subset, "f.x") <- attr(r, "f.x") # attr(subset, "by") <- attr(r, "by") # attr(subset, "levelsBy") <- attr(r, "levelsBy") # subset # } build.all.psi<-function(psi, fixed.psi){ all.names.psi<-union(names(psi),names(fixed.psi)) all.psi<-vector("list", length=length(all.names.psi)) names(all.psi)<- all.names.psi for(i in names(all.psi)) { if(!is.null(psi[[i]])){ psi[[i]]<-sort(psi[[i]]) names(psi[[i]])<-paste("U",1:length(psi[[i]]),".",i,sep="") } if(!is.null(fixed.psi[[i]])){ fixed.psi[[i]]<-sort(fixed.psi[[i]]) names(fixed.psi[[i]])<- paste("U",1:length(fixed.psi[[i]]),".fixed.",i,sep="") } all.psi[[i]]<-sort(c(psi[[i]],fixed.psi[[i]])) } return(all.psi) } #================================= fc<- min(max(abs(control$fc),.8),1) min.step<-control$min.step maxit.glm <- control$maxit.glm alpha<-control$alpha it.max <- old.it.max<- control$it.max digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } break.boot=control$break.boot n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow conv.psi<-control$conv.psi visual <- control$visual visualBoot<-FALSE if(visual && n.boot>0) {visual<-FALSE; visualBoot<-TRUE} # if(n.boot>0){ # if(!is.null(control$seed)) { # set.seed(control$seed) # employed.Random.seed<-control$seed # } else { # employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) # set.seed(employed.Random.seed) # } # if(visual) {visual<-FALSE; visualBoot<-TRUE}# warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} # # if(!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") # } last <- control$last K<-control$K h<-control$h if (deparse(substitute(family))=="lm" || (is.character(family) && family=="lm")){ fitter0<-"lm" #get("lm") } else { if (is.character(family)) family<-get(family, mode = "function", envir = parent.frame()) if (is.function(family)) family <- family() if (is.null(family$family)) { print(family) stop("'family' not recognized") } fitter0<-"glm" } s1<-strsplit(as.character(formula)[3],"\\+")[[1]] #separa i termini "additivi".. idC<-sapply(sapply(lapply(s1, function(x) grep("seg\\(",x)), function(x) (x>=1)), isTRUE) stringa<-s1[idC] #solo i termini con seg if(any(sapply(stringa, function(.x) grepl("\\* seg\\(", .x)))) stop("invalid usage of symbol '*' in conjunction with seg()") if(any(sapply(stringa, function(.x) grepl("\\:seg\\(", .x)))) stop("invalid usage of symbol ':' in conjunction with seg()") if(any(sapply(stringa, function(.x) grepl("\\):", .x)))) stop("invalid usage of symbol ':' in conjunction with seg()") if(any(sapply(stringa, function(.x) grepl("\\) \\*", .x)))) stop("invalid usage of symbol '*' in conjunction with seg()") call <- match.call() if (missing(data)) data <- environment(formula) tf <- terms(formula, specials = "seg") id.ps<-attr(tf,"specials")$seg #posizione nel modelframe; vettore se ci sono piu' termini..include y ma non da interc #browser() mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action"), names(mf), 0L) #"offset", mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") names(mf)[2]<-"formula" #serve se NON hai usato "formula" #browser() mf <- eval(mf, parent.frame()) #browser() n<-nrow(mf) mt <- attr(mf, "terms") intercMt<-attr(mt,"intercept") interc<-intercMt==1 Y <- model.response(mf, "any") if (length(dim(Y)) == 1L) { nm <- rownames(Y) dim(Y) <- NULL if (!is.null(nm)) names(Y) <- nm } if(!is.null(transf)) { Y.orig <- Y Y <- eval(parse(text=transf), list(y=Y)) transf.inv<-splinefun(Y, Y.orig, ties=min, method="monoH.FC") } if(is.null(alpha)) alpha<- max(.05, 1/length(Y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) #browser() .xlivelli<-.getXlevels(mt, mf) weights <- as.vector(model.weights(mf)) if(!is.null(weights) && !is.numeric(weights)) stop("'weights' must be a numeric vector") if(!is.null(weights) && any(weights < 0)) stop("negative weights not allowed") offs <- as.vector(model.offset(mf)) #browser() #funziona sia nella formula che come argomento? testo.ps<-names(mf)[id.ps] nomiCoefUNPEN<-names(mf)[-c(1,id.ps)] # X <- if(!is.empty.model(mt)){ # model.matrix(mt, mf, contrasts) # } else {stop("error in the design matrix")}#matrix(, NROW(Y), 0L) # attrContr<-attr(X, "contrasts") # n<-nrow(X) #browser() #=========================================================================== #se NON ci sono termini ps #=========================================================================== if(length(testo.ps)<=0) stop("No seg() term. Please, use lm() or glm()") #=========================================================================== #se ci sono termini ps. #=========================================================================== check.estPsi<-function(.x){ #questo e' utile per verificare che estPsi sia o NA oppure di 0,1 (ma non tutti 0) #l'ho fatta perche' se fosse consentito est=c(F,T) (e non FALSE,TRUE) poi T e F venivano presi come nomi di # variabili in predict.segmented if(length(.x)==1 && all(is.na(.x))){ ris<-TRUE } else { if(!is.numeric(.x)){ ris<-FALSE } else { if(length(setdiff(.x,0:1))==0){ ris<- if(all(.x==0)) FALSE else TRUE } else { ris<-FALSE } } } ris } drop.id<-lambda<-S<-B<-BB<-nomiCoefPEN<-nomiVariabPEN<-NULL l<-lapply(testo.ps,function(xx)with(mf,eval(parse(text=xx),envir=data))) nomiPS <-unlist(sapply(l,function(xx)attr(xx,"nomeX"))) psiList <-lapply(l,function(xx)attr(xx,"psi")) #browser() #npsiList <-unlist(sapply(l,function(xx) attr(xx,"npsi"))) npsiList <- lapply(l,function(xx) attr(xx,"npsi")) estList <- lapply(l,function(xx) attr(xx,"est")) if(length(setdiff(drop(unlist(sapply(estList, function(.x){if(any(is.na(.x))) 0 else unique(.x)}))), c(0,1)))>0) stop(" 'est' should include 0's and 1's only") if(length(intersect(nomiCoefUNPEN, nomiPS))>0) stop("any segmented variable included as linear term too?") #browser() #if(!all(sapply(estList,check.estPsi))) stop(" 'est' is misspecified in one or more seg() term") RList <- lapply(l,function(xx) attr(xx,"R")) fixpsiList <- lapply(l,function(xx) attr(xx,"fix.psi")) fxList <- lapply(l,function(xx) attr(xx,"f.x")) byList <- lapply(l,function(xx) attr(xx,"by")) nomiBy <- unlist(sapply(l,function(xx) attr(xx,"nomeBy"))) levelsBy <- lapply(l,function(xx) attr(xx,"levelsBy")) #browser() if(all(sapply(levelsBy, is.null)) && (length(npsiList)!=length(nomiPS))) stop(" 'npsi' is not correctly specified") limZ<-rangeSmooth<-mVariabili<-NULL #se ci sono termini ps()+ps(..,by) il nome delle variabili smooth vengono cambiati per aggiungere la variabile by nomiPS.orig <- nomiPS nomiPS.By <-paste(nomiPS, nomiBy, sep=":") nomiPS <- unlist(lapply(nomiPS.By, function(.x) sub(":NULL", "", .x))) ####se la stessa variabile e' specificata come ps o termine lineare.. if(length(intersect(nomiPS,nomiCoefUNPEN))>=1) stop("The same variable specified in seg() and as linear term") #ATTENZIONE.. se vuoi subito costruire i nomi ps(x), ps(x):z, ecc...usa i: nomiPS.ps<- sapply(nomiPS.orig, function(.x)paste("seg(",list(.x),")",sep="")) nomiPS.ps<-unlist(lapply(paste(nomiPS.ps, nomiBy, sep=":"), function(.x) sub(":NULL", "", .x))) nomiPS.ps.list<-as.list(nomiPS.ps) #serve lista for(j in id.ps) mVariabili[length(mVariabili)+1]<-mf[j] B<- Bfix <- nomiPS.ps.int.list<-vector(length=length(mVariabili) , "list") #BFixed<-BB<-Bderiv #browser() for(j in 1:length(mVariabili)) { if(nomiBy[j]=="NULL"){ # se usuale termine seg() nomiBy.j <- NULL variabileSmooth<- c(mVariabili[[j]]) #c() converte le matrici in vettori, drop() no..! #variabileSmooth<- attr(mVariabili[[j]], "f.x")(variabileSmooth) variabileSmooth<- fxList[[j]](variabileSmooth) #for(jj in c("nomeX", "psi", "npsi", "f.x", "nomeBy")) attr(variabileSmooth,jj)<-NULL B[[j]]<- variabileSmooth rangeSmooth[[j]] <- range(variabileSmooth) limZ[[j]] <- quantile(variabileSmooth, names=FALSE, probs=c(alpha[1],alpha[2])) nomiPS.ps.int.list[[j]]<- nomiPS[j] } else { #se ci sono termini by #browser() if(is.null(levelsBy[[j]])){ #se e' vc con variabile continua stop(" 'by' in seg(), if provided, should be a factor") #B[[j]] <-variabileBy*B[[j]] #nomiCoefPEN[[j]]<- sapply(1:ncol(B[[j]]), function(x) gsub(":", paste(".",x, ":", sep="") , nomiPS.ps[j])) } else {#se e' VC con variabile categoriale nomiBy.j <- nomiBy[j] variabileSmooth<- mVariabili[[j]][,-ncol(mVariabili[[j]]),drop=TRUE] variabileSmooth<- fxList[[j]](variabileSmooth) variabileBy<- mVariabili[[j]][, ncol(mVariabili[[j]]),drop=TRUE] M<-model.matrix(~0+factor(variabileBy)) B[[j]]<- lapply(1:ncol(M), function(.x) M[,.x]*variabileSmooth) rangeSmooth[[j]] <- lapply(B[[j]], function(.x) range(.x[.x!=0])) limZ[[j]] <- lapply(B[[j]], function(.x) quantile(.x[.x!=0], names=FALSE, probs=c(alpha[1],alpha[2]))) #browser() cond1 <- is.list(psiList[[j]]) cond2 <- length(names(psiList[[j]]))==length(levelsBy[[j]]) cond3<- length(setdiff(names(psiList[[j]]),levelsBy[[j]]))==0 if(cond1&& cond2 && cond3) psiList[[j]]<- psiList[[j]][levelsBy[[j]]] nomiPS.ps.list[[j]] <-paste(nomiPS.ps[j], levelsBy[[j]], sep="") #"seg(age):sex1" "seg(age):sex2" nomiPS.ps.int.list[[j]]<-gsub("[)]", "", gsub("seg[(]", "", nomiPS.ps.list[[j]])) #age:sexM", "age:sexF" #la linea sotto e' se hai diversi breakpoints nei gruppi.. } } } #end for(j in 1:length(mVariabili)) #browser() #sapply(limZ[[1]], cbind) repl<-pmax(sapply(B,length)*sapply(B,is.list),1) for(i in 1:length(npsiList)){ if(length(npsiList[[i]])==1) { npsiList[[i]] <- rep(npsiList[[i]], repl[i]) if(!is.list(estList[[i]]) && !is.null(levelsBy[[i]])) estList[[i]] <- rep(estList[i], repl[i]) } if(length(nomiPS.ps.int.list[[i]])!=length(npsiList[[i]])) stop(paste(" 'npsi' (its length) is not correctly specified in the seg term:",i)) if(!is.null(names(npsiList[[i]]))){ if(length(setdiff(nomiPS.ps.int.list[[i]], names(npsiList[[i]])))!=0) stop(paste(" 'npsi' (its names) is not correctly specified in the seg term:",i)) } } npsiList <- unlist(npsiList) #browser() if(!any(sapply(psiList,is.list))) psiList <- rep(psiList, repl) if(!any(sapply(estList,is.list))) estList <- rep(estList, repl) if(!any(sapply(RList,is.list))) RList <- rep(RList, repl) if(!any(sapply(fixpsiList,is.list))) fixpsiList<- rep(fixpsiList, repl) nomiPS.orig <- rep(nomiPS.orig, repl) Bfix <- rep(Bfix, repl) while(any(sapply(B,is.list))){ id.vc<-which((sapply(B, is.list)))[1] nc<-length(B[[id.vc]]) B<-append(B, B[[id.vc]], after = id.vc-1) #for(i in 1:length(B[[id.vc+nc]])) BB<-append(BB, BB[id.vc], id.vc-1) B[[id.vc+nc]]<-NULL #BB[[id.vc+nc]]<-NULL nomiCoefPEN <- append(nomiCoefPEN, nomiCoefPEN[[id.vc]], after = id.vc-1) nomiCoefPEN[[id.vc+nc]]<-NULL psiList <- append(psiList, psiList[[id.vc]], after = id.vc-1) psiList[[id.vc+nc]]<-NULL rangeSmooth <- append(rangeSmooth, rangeSmooth[[id.vc]], after = id.vc-1) rangeSmooth[[id.vc+nc]]<-NULL limZ <- append(limZ, limZ[[id.vc]], after = id.vc-1) limZ[[id.vc+nc]]<-NULL estList <- append(estList, estList[[id.vc]], after = id.vc-1) estList[[id.vc+nc]]<-NULL RList <- append(RList, RList[[id.vc]], after = id.vc-1) RList[[id.vc+nc]]<-NULL fixpsiList <- append(fixpsiList, fixpsiList[[id.vc]], after = id.vc-1) fixpsiList[[id.vc+nc]]<-NULL #se la lista contiene solo NULL, non funziona... #penMatrixList <- append(penMatrixList, penMatrixList[[id.vc]], after = id.vc-1) #penMatrixList[[id.vc+nc]]<-NULL } #if(!all(sapply(estList,check.estPsi))) stop(" 'est' is misspecified in one or more seg() term") #browser() nomiTerminiSEG<-nomiCoefPSI <-NULL nomiPS.ps.unlist.seg <- unlist(nomiPS.ps.list) nomiPS.ps.unlist <- sub("[)]", "", sub("seg[(]", "",nomiPS.ps.unlist.seg )) names(psiList)<- nomiPS.ps.unlist for(i in 1:length(B)) { #nomiCoefPSI[[i]]<- paste(paste("psi",1:length(psiList[[i]]), sep=""), nomiPS.ps.unlist[i], sep=".") ##oppure sep=".psi" nomiTerminiSEG[[i]]<-rep(nomiPS.ps.unlist[i], length(psiList[[i]])) } #nomiCoefU<-lapply(nomiCoefPSI, function(.x) sub("psi","U",.x )) #nomiCoefZ<-lapply(nomiCoefPSI, function(.x) sub("psi","Z",.x )) nomiSeg<- unique(unlist(nomiTerminiSEG)) #browser() #FINALMENTE (speriamo..:-)) #nomiCoefZ, nomiCoefpsi, nomiCoefU sono liste con nomi che includono sia le possibili interazioni, sia il n. dei breakpoints #Anche nomiTerminiSEG e' della stessa dimensione ma i nomi ignorano il n.dei breakpoints (questa serve per rangeZ) #nomiSeg #npsiList #psiList #estList #RList # if(is.null(names(estList))) { # names(estList)<-nomiSeg # } else { # if(any(sapply(names(estList), function(.x).x==""))) stop(" 'estList' is only partially named. # Or all or no name allowed.") # } #browser() if(any(sapply(estList, is.list))) stop(" One or more 'est' components misspecified") npsiList1<-id.contrR <- rep(NA, length(B)) psiListE<-psiListQ<-psiList for(j in 1:length(B)){ #K<- npsiList[j] K <- if(!is.na(npsiList[nomiSeg[j]])) npsiList[nomiSeg[j]] else npsiList[j] npsiList1[j]<- K if(any(is.na(psiList[[j]]))){ #if(control$quant) { psiListQ[[j]]<- quantile(B[[j]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE) #} else { psiListE[[j]]<- (min(B[[j]])+ diff(range(B[[j]]))*(1:K)/(K+1)) #} } else { K<-npsiList1[j]<-length(psiList[[j]]) } if(!is.null(fixpsiList[[j]])) { Bfix[[j]]<- sapply(sort(fixpsiList[[j]]), function(.x) (B[[j]]-.x)*(B[[j]]>.x)) #colnames(Bfix[[j]])<- paste("U", 1:length(fixpsiList[[j]]),".fixed.",nomiPS.orig[j], sep="") colnames(Bfix[[j]])<- paste("U", 1:length(fixpsiList[[j]]),".fixed.",nomiPS.ps.unlist[j], sep="") } #se per qualche termine ci sono le matrici dei vincoli sulle slope #browser() j.ok=match(nomiSeg[j], names(RList), nomatch=0) j.ok <-if(j.ok>0) j.ok else j if(!any(is.na(RList[[j.ok]]))){ RList[[j]] <- RList[[j.ok]] id.contrR[j] <-TRUE } else { j.ok=match(nomiSeg[j], names(estList), nomatch=0) j.ok <-if(j.ok>0) j.ok else j if(!any(is.na(estList[[j.ok]]))){ if(length(estList[[j.ok]])!=(K+1)) stop(" 'est' is not compatible with 'npsi' ") #browser() RList[[j]]<-diag(K+1)[,estList[[j.ok]]==1,drop=FALSE] id.contrR[j] <-TRUE } else { RList[[j]]<-diag(K+1) id.contrR[j] <-FALSE } } } #browser() if(control$quant) { psiList<-psiListQ initial <- unlist(psiListE) PSI1<- matrix(initial, n, length(initial), byrow = TRUE) } else { psiList<-psiListE initial <- unlist(psiListQ) PSI1<- matrix(initial, n, length(initial), byrow = TRUE) } #Quindi PSI1 e' una matrice di valori inziali di psi.. Vanno usati da seg.lm.fit.boot nel caso in cui i primi PSI #non portino ad adattare un modello #NB: Poiche' ora psiList include i veri numeri dei psi, i codici vanno rilanciati for(i in 1:length(B)) { nomiCoefPSI[[i]]<- paste(paste("psi",1:length(psiList[[i]]), sep=""), nomiPS.ps.unlist[i], sep=".") ##oppure sep=".psi" nomiTerminiSEG[[i]]<-rep(nomiPS.ps.unlist[i], length(psiList[[i]])) } nomiCoefU<-lapply(nomiCoefPSI, function(.x) sub("psi","U",.x )) nomiCoefZ<-lapply(nomiCoefPSI, function(.x) sub("psi","Z",.x )) npsii <- sapply(psiList,length) id.psi.group <- rep(1:length(psiList), sapply(psiList,length)) Z<- lapply(1:length(B), function(.x) matrix(B[[.x]], nrow=n, ncol=npsiList1[[.x]])) Z<- do.call(cbind,Z) colnames(Z) <- unlist(nomiCoefZ) #nomiPS, nomiPS.By, nomiPS.orig, nomiPS.ps, nomiPS.ps.list, nomiCateg, nomiInterCateg, nomiCoefPEN #nomiPS: "x", "z" (vettore) #nomiPS.orig: come "nomiPS" #Se ci sono interazioni (by) #nomiPS "x:g" #nomiPS.orig: "x", "x", "x".. La stessa variabile ripetuta per il n.dei gruppi # #nomiPS.ps: "seg(x)", "seg(z)" (vettore) [con by: "seg(x):g"] #nomiPS.ps.list "seg(x)", "seg(z)" (lista) [lista con by: "seg(x):g1" "seg(x):g2" "seg(x):g3" ..] #nomiInterCateg: "x:g1" "x:g2" "x:g3" .. #======================================================================================================== X <- if(!is.empty.model(mt)){ model.matrix(mt, mf, contrasts) } else {stop("error in the design matrix")}#matrix(, NROW(Y), 0L) attrContr<-attr(X, "contrasts") #n<-nrow(X) X<- X[, !startsWith(colnames(X),"seg("), drop=FALSE] idZ <- unlist(tapply(id.psi.group, id.psi.group, function(.x) c(TRUE, rep(FALSE, length(.x)-1)))) Z.ok<-Z[, idZ, drop=FALSE] colnames(Z.ok) <- nomiPS.ps.unlist X<-cbind(X, Z.ok) #include anche i termini lineari delle variabili segmented #colnames(Z)<- unlist(nomiCoefPEN) initial <- unlist(psiList) PSI <- matrix(initial, n, length(initial), byrow = TRUE) #PSI1 di riserva definito sopra. #browser() #NB la matrie del disegno X include in nomi "seg(x)" e non va bene perche' poi da problemi con i #nomi dei coef dell'oggetto.. Quindi bisogna sostituire questi nomi!!! #non serve perche gia' i nomi sono ok.. #id.segX <-grep( "seg[(]" , colnames(X)) #colnames(X)[id.segX]<-gsub("[)]", "", gsub("seg[(]", "", colnames(X)[id.segX])) if(any(!sapply(Bfix, is.null))){ X<-cbind(X, do.call(cbind, Bfix)) } #browser() #colnames(X)[unlist(id.psList)] <- nomiPS.orig #X[,nomiPS.orig] <- Z[, unique(colnames(Z)), drop=FALSE] #nomiCoefPEN include i nomi le interazioni con i livelli (nel caso vc) e anche del numero dei psi #[1] "U1.x" "U2.x" "U1.z" id.noOW <- if(is.null(weights) && is.null(offs)) TRUE else FALSE if(is.null(weights)) weights<-rep(1,n) orig.offs<-offs if(is.null(offs)) offs<-rep(0,n) #E' realmente necessario assegnare offs e weight con 0 e 1 anche se non servono??? #browser() limZ <-do.call(cbind, lapply(limZ, function(.x){if(is.list(.x)) do.call(cbind, .x) else cbind(.x)} )) #limZ <-matrix(sapply(1:length(npsii), function(.x) rep(limZ[,.x],npsii[.x])), nrow=2, byrow = FALSE) limZ <- do.call(cbind, lapply(1:length(npsii), function(.x) matrix(limZ[,.x],nrow=2,ncol=npsii[.x]))) rangeZ <- do.call(cbind, lapply(1:length(npsii), function(.x) matrix(rangeSmooth[[.x]],nrow=2,ncol=npsii[.x]))) colnames(rangeZ)<-rep(names(npsii), npsii) #unlist(nomiTerminiSEG) #browser() #rangeZ<- matrix(sapply(1:length(npsii), function(.x) rep(rangeSmooth[[.x]],npsii[.x])), nrow=2, byrow = FALSE) invXtX<-Xty<-NULL #browser() opz<-list(toll=toll,h=h,stop.if.error=stop.if.error,L0=NULL,visual=visual,it.max=it.max,nomiOK=unlist(nomiCoefU), usesegreg=TRUE, fam=family, eta0=NULL, maxit.glm=maxit.glm, id.psi.group=id.psi.group, gap=gap, limZ=limZ, rangeZ=rangeZ, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, tol.opt=control$tol.opt, pow=pow, visualBoot=visualBoot, digits=digits, fc=fc, RList=RList, nomiSeg=nomiSeg, seed=control$seed, min.n=control$min.n, PSI1=PSI1) #browser() if(any(id.contrR)){ if(fitter0=="lm"){ # for(.i in nomiSeg) { # #poni min(z)=0, cosi solve() in step.lm.fit non ha problemi. # if(.i %in% colnames(X)) X[,.i]<- X[,.i] - min(X[,.i]) # } if(n.boot <= 0) { obj <- segConstr.lm.fit(Y, X, Z, PSI, weights, offs, opz) } else { obj <- segConstr.lm.fit.boot(Y, X, Z, PSI, weights, offs, opz, n.boot = n.boot, size.boot = size.boot, random = random, break.boot = break.boot) # seed<- obj$seed } class0<- "lm" if(obj$obj$df.residual==0) warning("no residual degrees of freedom (other warnings expected)", call.=FALSE) } else { if(n.boot<=0){ obj <-segConstr.glm.fit(Y, X, Z, PSI, weights, offs, opz) } else { obj <-segConstr.glm.fit.boot(Y, X, Z, PSI, weights, offs, opz, n.boot=n.boot, size.boot=size.boot, random=random, break.boot=break.boot) # seed<- obj$seed } class0<-c("glm","lm") } } else { #browser() if(fitter0=="lm"){ if(n.boot <= 0) { obj <- seg.lm.fit(Y, X, Z, PSI, weights, offs, opz) } else { #browser() obj <- seg.lm.fit.boot(Y, X, Z, PSI, weights, offs, opz, n.boot = n.boot, size.boot = size.boot, random = random, break.boot = break.boot) # seed<- obj$seed } class0<-"lm" if(obj$obj$df.residual==0) warning("no residual degrees of freedom (other warnings expected)", call.=FALSE) } else { if(n.boot<=0){ obj <-seg.glm.fit(Y, X, Z, PSI, weights, offs, opz) } else { obj <-seg.glm.fit.boot(Y, X, Z, PSI, weights, offs, opz, n.boot=n.boot, size.boot=size.boot, random=random, break.boot=break.boot) # seed<- obj$seed } class0<-c("glm","lm") } } if(!is.list(obj)){ warning("Estimation failed. Too many breakpoints? Returning a (g)lm fit..", call. = FALSE) if(fitter0=="lm"){ obj0 <- if(id.noOW) lm.fit(x = X, y = Y) else lm.wfit(x = X, y = Y, w = weights, offset = offs) class(obj0)<-"lm" } else { obj0 <- try(suppressWarnings(glm.fit(X, y = Y, offset = offs, weights = weights, family = opz$fam #control = glm.control(maxit = maxit.glm), )), silent = TRUE) class(obj0)<-c("glm", "lm") } return(obj0) } seed<- obj$seed if(!is.list(obj)){ warning("No breakpoint estimated", call. = FALSE) return(invisible(NULL)) } # id.psi.group<-obj$id.psi.group npsi.groups <- tapply(id.psi.group, id.psi.group, length) psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart U<-obj$U V<-obj$V rangeZ<-obj$rangeZ #browser() colnames(rangeZ) <- unlist(nomiTerminiSEG) it <- obj$it epsilon <- obj$epsilon id.warn <- obj$id.warn k <- length(psi) objU <- obj$obj #beta.c <- coef(objU)[paste("U", 1:ncol(U), sep = "")] beta.c <- coef(objU)[obj$idU] #browser() if(any(id.contrR)) { beta.c <- lapply(1:length(obj$constr$RList), function(i) (obj$constr$invA.RList[[i]]%*%beta.c[unlist(obj$constr$nomiUList)==i])[-1]) beta.c <- unlist(beta.c) nomiSeg <- rep(unique(unlist(nomiTerminiSEG)), sapply(obj$constr$nomiUList,function(.x) length(.x))) replSeg <- unlist(sapply(obj$constr$nomiUList,function(.x) 1:length(.x))) nomiU <- paste(paste("U", replSeg,sep=""), nomiSeg, sep=".") nomiVxb <- sub("U","psi", obj$nomiOK) X <- obj$X } else { nomiU <- obj$nomiOK #nomiOK ma puo' essere cambiato se sono eliminati dei psi nella procedura.. nomiVxb <- sub("U","psi", nomiU) #In realta' nomiU e nomiVxb gia' ci sono (sarebbero nomiCoefU e nomiCoefPSI), # pero' se durante la procedura sono stati cambiati perche' alcuni psi sono stati rimossi..bhu?? #nomiU <-nomiCoefU #nomiVxb<- nomiCoefPSI } #browser() Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) colnames(U)<- nomiU #<- nomiOK colnames(Vxb)<-nomiVxb #<- sub("U","psi", nomiU) se.psi<-rep(NA,k) if(fitter0=="lm"){ objV <- if(id.noOW) lm.fit(x = cbind(X, U, Vxb), y = Y) else lm.wfit(x = cbind(X, U, Vxb), y = Y, w = weights, offset = offs) if(var.psi) { s2 <- sum(weights*objU$residuals^2)/objV$df.residual R <- chol2inv(objV$qr$qr) se.psi <- sqrt(diag(R)*s2)[match(nomiVxb, names(coef(objV)),0)] } } else { objV <- try(suppressWarnings(glm.fit(cbind(X, U, Vxb), y = Y, offset = offs, weights = weights, family = opz$fam, #control = glm.control(maxit = maxit.glm), etastart = objU$linear.predictors)), silent = TRUE) objV$linear.predictors<-objU$linear.predictors objV$deviance<-objU$deviance objV$aic<-objU$aic + 2*ncol(PSI) #k objV$weights<-objU$weights #browser() if (length(offs) && attr(mt, "intercept") > 0L) { #se c'e' un offset devi calcolare la null.deviance (come fa glm()) obj0 <- try(suppressWarnings(glm.fit(X[, "(Intercept)", drop = FALSE], y = Y, offset = offs, weights = weights, family = opz$fam, #control = glm.control(maxit = maxit.glm), etastart = objU$linear.predictors, intercept=TRUE)), silent = TRUE) # obj0 <- eval(call(if (is.function(method)) "method" else method, # x = X[, "(Intercept)", drop = FALSE], y = Y, mustart = fit$fitted.values, # weights = weights, offset = offset, family = family, # control = control, intercept = TRUE)) if (!obj0$converged) warning("fitting to calculate the null deviance did not converge -- increase 'maxit'?") objV$null.deviance <- obj0$deviance } if(var.psi) { R <- chol2inv(objV$qr$qr) s2 <- 1 if(!opz$fam$fam%in%c("poisson","binomial")) s2<- objU$deviance/objV$df.residual se.psi <- sqrt(diag(R)*s2)[match(nomiVxb, names(coef(objV)),0)] } } objV$fitted.values <- objU$fitted.values objV$residuals <- objU$residuals names.coef<-names(objV$coefficients) objV$coefficients <- objU$coefficients pLin<- ncol(X) if(pLin>=1) { names(objV$coefficients) <- c(names.coef[1:pLin], c(nomiU, nomiVxb)) } else { names(objV$coefficients) <- c(nomiU, nomiVxb) } #browser() ris.psi<-matrix(NA,k,3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi ris.psi[,3]<- se.psi if(stop.if.error) ris.psi[,1]<-initial all.seg.form<-NULL mf1<-mf[1] for(i in 2:ncol(mf)) { if(i %in% id.ps){ l<-attributes(mf[[i]]) if(!is.null(l$by)){ if(!l$nomeBy%in%names(mf)){ m<-data.frame(mf[[i]][,1],l$by) colnames(m) <- c(l$nomeX, l$nomeBy) } else { m<-data.frame(mf[[i]][,1]) colnames(m) <- l$nomeX } all.seg.form[[length(all.seg.form)+1]]<-as.formula( paste("~0+", l$nomeX, "*", l$nomeBy, "-", l$nomeX)) } else { m <- data.frame(mf[[i]]) colnames(m) <- l$nomeX all.seg.form[[length(all.seg.form)+1]]<- as.formula(paste("~", l$nomeX)) } } else { m <- mf[i] } mf1<-cbind(mf1, m) } #browser() #costruisci la formulaLin.. Attenzione non tiene conto di eventuali vincoli sulle pendenze. splitFo <- strsplit(as.character(formula),"[+]") allX.lin<-paste(c(splitFo[[3]][-grep("seg[(]", splitFo[[3]])], unique(nomiPS.orig)), collapse="+") formulaLin <- as.formula(paste(splitFo[[2]], splitFo[[1]], allX.lin)) #browser() names(all.seg.form)<-nomiPS #names(mf) <- nomi.mf objV$terms <- mt if(fitter0=="lm") objV$y<-Y #modificato il 6/5/24.. Non c'e' bisogno.. l'ogg restituit da glm.fit() ha gia' la y if(x) objV$x <- X objV$contrasts <- attrContr objV$xlevels <- .xlivelli objV$call<-call #browser() if (model) { if(any(!sapply(levelsBy,is.null)) && any(id.contrR)) { mf1<- cbind(mf1, Z.ok) } objV$model <- mf1 } objV$na.action <- attr(mf, "na.action") objV$psi <- ris.psi objV$id.warn <- id.warn objV$it <- it objV$epsilon <- obj$epsilon objV$rangeZ <- rangeZ objV$constr <- obj$constr objV$psi.history <- psi.values #browser() nomiPS.orig<-NULL for(i in 1:length(nomiPS)){ nomiPS.orig[[i]]<-if(is.null(levelsBy[[i]])) nomiPS[i] else paste(nomiPS[i], unlist(levelsBy[[i]]), sep="") } nomiPS.orig <- unlist(nomiPS.orig) #nomiPS.orig<-paste(nomiPS, unlist(levelsBy), sep="") names(fixpsiList) <- nomiPS.orig psi.list<-vector("list", length=length(unique(nomiPS.orig))) #browser() names(psi.list)<- nomiPS.orig names(psi)<-rep(nomiPS.orig, npsi.groups) for(i in names(psi.list)){ psi.list[[i]]<-psi[names(psi)==i] } objV$indexU<-build.all.psi(psi.list, fixpsiList) #browser() objV$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiPS.orig) #nomiPS.orig?? objV$nameUV$formulaSeg<- all.seg.form objV$nameUV$formulaSegAllTerms<- paste("~", paste(sapply(all.seg.form, function(.x) strsplit(paste(.x), "~"))[2,],collapse="+")) objV$formulaLin<- formulaLin objV$id.psi.group<- id.psi.group objV$psi[,"Initial"]<-NA if(n.boot>0) objV$seed <- seed #browser() #il comando structure() l'ho messo perche' avevo bisogno che anche in mancanza di offset, l'oggetto finale restituisse # un oggetto con la componente offset NULL. Cosa che non viene fatta con il semplice comando di sotto.. #objV<- if(id.O) c(objV, offset=offs) else c(objV, offset=NULL) objV<- structure(c(objV, list(offset=orig.offs))) class(objV)<-c("segmented", class0) objV } segmented/R/fitted.segmented.lme.R0000644000176200001440000000404114415477004016563 0ustar liggesusersfitted.segmented.lme<-function(object, level=1, sort=TRUE, ...){ #fit: an object of class "segmented.lme" #What about "fitted(oo$lme.fit.noG)" or "fitted(obj,level=1)+fit$Off"? #fitted(fitG,level=1)+fit$Off e' proprio uguale a fitted(fit.noG, level=1) #comunque per level=0 (population parameter) l'identita' non vale, ed e' necessario fare i calcoli # "manualmente" #obj<- object[[1]] #sarebbe fit$lme.fit levelC<- min(level,1) #valori >1 sono riportati ad 1 levelC<-deparse(levelC) #browser() switch(levelC, "0"={ #leftSlope<- if(object$namesGZ$nameZ %in% names(fixef(object[[2]]))) fixef(object[[2]])[object$namesGZ$nameZ] else 0 #b0<-fixef(object[[2]])["(Intercept)"] #if(is.na(b0)) b0<-0 r<-vector("list", length=length(names(object$psi.i))) mu0 <- fitted(object$lme.fit.noG, level=0) for(id in names(object$psi.i)){ diffSlope<-object$fixed.eta.delta[paste(id)] Psi<- object$fixed.psi[paste(id)] x<- object$Z[names(object$Z)==id] #mu<-b0+leftSlope*x+diffSlope*pmax(x-Psi,0) psi.i <- object$psi.i[paste(id)] mu<-mu0[names(mu0)==paste(id)] mu <- mu - diffSlope*pmax(x-psi.i,0) +diffSlope*pmax(x-Psi,0) r[[id]]<-mu } mu<-unlist(r) names(mu) <- names(object$Z) # mu<-fitted(obj,level=0) + fit$Off # if("G0"%in%names(ranef(obj))){ # ni<-tapply(obj$groups[,1], obj$groups[,1], length) # ki<-rep(ranef(obj)[["G0"]],ni) # mu<-mu + ki*obj$data[["G0"]] # } }, "1"={ mu <- fitted(object$lme.fit.noG, level=level) # "1"={ mu<-fitted(obj,level=1)+fit$Off #e' proprio uguale a fitted(fit[[2]], level=1) } ) #end_switch if(sort) mu<- mu[order(names(mu))] return(mu) } segmented/R/stepmented.ts.R0000644000176200001440000003433414757372053015372 0ustar liggesusersstepmented.ts <- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control=seg.control(), keep.class=FALSE, var.psi=FALSE, ..., pertV=0, centerX=FALSE, adjX=NULL) { #pertV come calcolare la variabile V=1/(2*abs(Xtrue-PSI)? i psi devono essere diversi dalle x_i # utilizzare i psi stimati che tipcamente sono diversi? (perV=0) # oppure i psi.mid che sicuramente sono (o meglio dovrebbero essere) tra due x_i... #NO OFFSET o PESI!!!! # --------- mylm<-function(x,y){ XtX <- crossprod(x) b <- drop(solve(XtX,crossprod(x,y))) fit <- drop(tcrossprod(b,x)) r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b), invXtX=solve(XtX), L0=sum(r^2)) o } #----------- toMatrix<-function(.x, ki){ # ripete ogni .x[,j] ki[j] volte if(ncol(.x)!=length(ki)) stop("It should be ncol(.x)==length(ki)") if(all(ki==1)) return(.x) M<-vector("list", length=length(ki)) for(j in 1:length(ki)) M[[j]]<-replicate(ki[[j]], cbind(.x[,j]), simplify=TRUE) do.call(cbind, M) } #----------- agg<- 1-control$fc it.max<- control$it.max tol<- control$toll #browser() display<- control$visual digits <- control$digits #min.step <- control$min.step #conv.psi <- control$conv.psi alpha <- control$alpha fix.npsi <- control$fix.npsi n.boot <- control$n.boot break.boot<- control$break.boot + 2 seed<- control$seed fix.npsi<-control$fix.npsi h<-control$h #----------- #if(!(is.vector(obj) || is.ts(obj))) stop("obj should be a 'lm' fit, a 'vector' or 'ts' object") #if(is.vector(obj) || is.ts(obj)){ #if(is.matrix(obj) && ncol(obj)>1) stop("if matrix 'obj' should have 1 column") #obj<-drop(obj) if(!missing(seg.Z) && length(all.vars(seg.Z))>1) stop(" multiple seg.Z allowed only with lm models") Fo0<-as.formula(paste(deparse(substitute(obj))," ~ 1", sep="")) y.only.vector<-TRUE y<- as.vector(obj) Tsp<-tsp(obj) nn <- 1 + round((Tsp[2L] - Tsp[1L]) * Tsp[3L]) #preso da getAnywhere("print.ts") if (length(y) != nn) warning(gettextf("series is corrupt: length %d with 'tsp' implying %d", length(y), nn), domain = NA, call. = FALSE) if(missing(seg.Z)) { x<-seq(Tsp[1], Tsp[2], length=length(y) ) name.Z <- "Time" } else { x<-eval(parse(text=all.vars(seg.Z))) name.Z <- all.vars(seg.Z) adjX <- FALSE } min.x<- min(x) if(is.null(adjX)) { adjX<- if(min.x>=1000) TRUE else FALSE } if(adjX) x<- x - min.x if(missing(psi)){ if(missing(npsi)) npsi<-1 #stop(" psi or npsi have to be provided ") psi<- seq(min(x), max(x), l=npsi+2)[-c(1, npsi+2)] #psi[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) } else { npsi<-length(psi) } initial.psi<-psi n<-length(y) a<- npsi n.Seg<-1 Z<-matrix(x, ncol=a, nrow=n, byrow = FALSE) XREG<-matrix(1, nrow=n, ncol=1) PSI<-matrix(psi, ncol=a, nrow=n, byrow = TRUE) #name.Z <- if(missing(seg.Z)) "id" else all.vars(seg.Z) nomiU<-paste("U", 1:a, ".", name.Z,sep="") nomiV<-paste("V", 1:a, ".", name.Z,sep="") colnames(Z)<-nomiZ<-rep(name.Z, a) id.psi.group <- rep(1:length(a), times = a) orig.call<-NULL #################################################### # invXtX<-if(!is.null(obj$qr)) chol2inv(qr.R(obj$qr)) else NULL #(XtX)^{-1} # Xty<-crossprod(XREG,y) # opz<-list(toll=toll,h=h, stop.if.error=stop.if.error, dev0=dev0, visual=visual, it.max=it.max, # nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits,invXtX=invXtX, Xty=Xty, # conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, fc=fc) #x<- Z x.lin <-XREG #if(is.vector(x)) x<-as.matrix(x) #dev0<- if(!display) var(y)*n else sum(mylm(x.lin, y)$residuals^2) #non ci sono altre esplicative, per cui il modello nullo e' sempre con interc. dev0 <- var(y)*(n-1) #mylm(x.lin, y)$L0 rangeZ <- apply(Z, 2, range) #browser() plin<-ncol(x.lin) #if(!is.list(psi)) psi<-list(psi) #P <- length(psi) #n. variabili con breakpoints #npsii <- sapply(psi, length) #n di psi for each covariate P<-n.Seg npsii<-a npsi<- sum(npsii) Xtrue<-Z #psi0 <- unlist(psi) #PSI<- matrix(psi0, n, npsi, byrow=TRUE) #if(ncol(x)!=P) stop("errore") #Xtrue<-toMatrix(x, npsii) #browser() if(it.max == 0) { mfExt<-data.frame(y, Z) names(mfExt) <- c(all.vars(Fo0), name.Z) ripetizioni<-unlist(tapply(nomiZ, nomiZ, function(.x)1:length(.x))) U <- (Xtrue>PSI) colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") nomiU <- paste("U", colnames(U), sep = "") #for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) for(i in 1:ncol(U)) mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] Fo <- update.formula(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) obj <- update(obj, formula = Fo, evaluate=FALSE, data=mfExt) #data = mf, if(!is.null(obj[["subset"]])) obj[["subset"]]<-NULL obj<-eval(obj, envir=mfExt) #if (model) obj$model <-mf #obj$model <- data.frame(as.list(KK)) psi <- cbind(psi, psi, 0) rownames(psi) <- paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") colnames(psi) <- c("Initial", "Est.", "St.Err") obj$psi <- psi return(obj) } c1 <- apply((Xtrue <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Xtrue >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") if(is.null(alpha)) alpha<- max(.05, 1/length(y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) opz<-list(toll=tol, dev0=dev0, display=display, it.max=it.max, agg=agg, digits=digits, rangeZ=rangeZ, id.psi.group=id.psi.group,h=h, #nomiOK=nomiOK, visualBoot=visualBoot, invXtX=invXtX, Xty=Xty, conv.psi=conv.psi,min.step=min.step, alpha=alpha, fix.npsi=fix.npsi, npsii=npsii, seed=control$seed) # ################################################################################# # #### jump.fit(y, XREG=x.lin, Z=Xtrue, PSI, w=ww, offs, opz, return.all.sol=FALSE) # ################################################################################# if(n.boot<=0){ obj<- step.ts.fit(y, x.lin, Xtrue, PSI, opz, return.all.sol=FALSE) } else { #if("seed" %in% names(control)) set.seed(control$seed) obj<-step.ts.fit.boot(y, x.lin, Xtrue, PSI, opz, n.boot, break.boot=break.boot) seed <- obj$seed } # if(!is.list(obj)){ # warning("No breakpoint estimated", call. = FALSE) # return(obj0) # } #chol2inv(qr.R(obj$obj$qr)) id.warn<-obj$id.warn it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart #i beta.c corripondono ai psi NON ordinati!!! beta.c<- obj$beta.c beta.c<-unlist(tapply(psi, id.psi.group, function(.x)beta.c[order(.x)])) #unlist(lapply(unique(id.psi.group), function(.x) beta.c[id.psi.group==.x][order(psi[id.psi.group==.x])])) psi<-unlist(tapply(psi, id.psi.group, sort)) Z0<-apply(Z,2,sort) psi.rounded<-sapply(1:npsi, function(j) Z0[sum(Z0[,j] INF DEN <- abs(Xtrue - PSI.mid) DEN <- apply(DEN, 2, function(.x) pmax(.x, sort(.x)[2]/2)) #pmax(.x, diff(range(.x))/1000)) V <- (1/(2 * DEN)) colnames(V)<-nomiV if(centerX){ XtrueS <- scale(Xtrue, TRUE, scale=FALSE) meanX<-attr(XtrueS, "scaled:center") attr(XtrueS, "scaled:center")<-NULL U <- (XtrueS * V + 1/2) } else { U <- (Xtrue * V + 1/2) } colnames(U)<-nomiU if(pertV>0){ #puoi usare o psi.mid o psi.rounded+eps.. Il secondo porta ad una cor ancora piu bassa della prima.. 0.89 vs 0.96 if(pertV==1){ PSI.mid <- matrix(psi.mid, n, npsi, byrow = TRUE) V <- (1/(2 * abs(Xtrue - PSI.mid))) } else { PSI.mid <- matrix(psi.rounded[1,], n, npsi, byrow = TRUE) V <- (1/(2 * abs(Xtrue - PSI.mid + .0001))) } } Vxb <- -V# * rep(-beta.c, each = nrow(V)) nomiVxb <- gsub("V", "psi", nomiV) nnomi <- c(nomiU, nomiVxb) #XREG <- cbind(x.lin, Z, W) #obj <- lm.wfit(y = y, x = XREG, offset = offs, w=ww ) # source("stepmented.lm.R") Fo <- update.formula(Fo0, as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) mfExt <- data.frame(1,U,Vxb) colnames(mfExt)<-c("(Intercept)",nnomi) objF <- lm(Fo, data = mfExt) #browser() objW<-objF #controllo se qualche coeff e' NA.. isNAcoef<-any(is.na(objF$coefficients)) #browser() if(isNAcoef) { nameNA.psi <- names(objF$coefficients)[which(is.na(objF$coefficients))] nameNA.U <- gsub("psi", "U", nameNA.psi) if(fix.npsi) { cat("breakpoint estimate(s):", as.vector(psi), "\n") stop("coef ", nameNA.psi, " is NA: breakpoint(s) at the boundary or too close together", call. = FALSE) } else { warning("some estimate is NA (too many breakpoints?): removing ", length(nameNA.psi), " jump-point(s)", call. = FALSE) Fo <- update(Fo, paste(".~ .-", nameNA.U, "-", nameNA.psi)) objF <- lm(Fo, data = mfExt) idNA.psi <- match(nameNA.psi, nomiVxb) nomiVxb <- setdiff(nomiVxb, nameNA.psi) nomiU <- setdiff(nomiU, nameNA.U) Xtrue <- Xtrue[, -idNA.psi, drop = FALSE] PSI.mid<- PSI.mid[, -idNA.psi, drop = FALSE] id.psi.group <- id.psi.group[-idNA.psi] psi <- psi[-idNA.psi] psi.rounded <- psi.rounded[, -idNA.psi, drop = FALSE] } } #organizziamo i risultati da restituire per psi.. colnames(psi.rounded)<-names(psi)<-nomiVxb rownames(psi.rounded)<-c("inf [","sup (") # Cov <- vcov(objF) # # var.Tay<-function(est1,est2,v1,v2,v12){ # r<- est1/est2 # vv<-(v1+v2*r^2-2*r*v12)/est2^2 # vv} # # # #browser() # # #var.Tay(num, den, v.g, v.b, cov.g.b) # varPsi<- rep(NA, length(nomiU)) # for(j in 1:length(nomiU)){ # num<-objF$coefficients[nomiVxb[j]] # den<-objF$coefficients[nomiU[j]] # v.g <-Cov[nomiVxb[j],nomiVxb[j]] # v.b<- Cov[nomiU[j],nomiU[j]] # cov.g.b <- Cov[nomiVxb[j],nomiU[j]] # #if(is.null(rho)) { # rho<-mean(Xtrue[, nomiZ[j] ,drop=TRUE] SE piu' piccoli.. # rho<- rho^(sqrt(1/n)) # #} # cov.g.b<- rho*sqrt(v.g*v.b) # varPsi[j]<-var.Tay(num, den, v.g, v.b, cov.g.b) # } # names(varPsi) <- nomiVxb # # #browser() # Cov[nomiVxb, ]<- Cov[, nomiVxb] <- 0 # diag(Cov)[nomiVxb]<-varPsi # #Cov[nomiVxb, nomiVxb ]<- varPsi # # # #browser() # #var.Tay(num, den, v.g, v.b, cov.g.b) # # id <- match(nomiVxb, names(coef(objF))) # vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) ris.psi <-matrix(NA,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi #ris.psi[,3]<-sqrt(vv) ## solo per simulazioni #browser() # ris.psi<-cbind(ris.psi, # st0=sqrt(var.Tay(num, den, v.g, v.b, 0)), # st99=sqrt(var.Tay(num, den, v.g, v.b, .99*sqrt(v.g*v.b)))) a<-tapply(id.psi.group, id.psi.group, length) #NB "a" deve essere un vettore che si appatta con "initial.psi" per ottnetere "initial" sotto... Se una variabile alla fine risulta # senza breakpoint questo non avviene e ci sono problemi nella formazione di "initial". Allora costruisco a.ok a.ok<-NULL nomiFINALI<-unique(nomiZ) for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } #initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) if(length(psi)!=length(initial.psi)){ ris.psi[,1]<- NA } else { initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) ris.psi[,1]<-initial #if(stop.if.error) ris.psi[,1]<-initial } #browser() id.psi<- x%in%psi.rounded[1,] #================================================= ##RI-AGGIUNGI IL MINIMO!!!!!!!!!! if(adjX){ #ATTENZIONE.. e se ci sono piu' breakpoints o piu' variabili (con piu' breakpoints)?? psi.rounded<- psi.rounded + min.x ris.psi[,2] <- ris.psi[,2] + min.x } objF$psi <- ris.psi a<-rep(1:Tsp[3L], l=length(y)) b<-rep(Tsp[1L]:Tsp[2L], each=Tsp[3L])[seq_len(length(y))] break.dates <- paste(b,"(",a,")",sep="")[id.psi] attr(psi.rounded,"break.dates") <- break.dates objF$psi.rounded <- psi.rounded #stima il modello "vero" (non-working) U <- (Xtrue > PSI.mid) colnames(U)<-nomiU X <- cbind(x.lin,U) objF$obj.ok <- mylm(X, y) #coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b)) objF$objW<- objW objF$fitted.values<-objF$obj.ok$fitted.values objF$residuals<- objF$obj.ok$residuals objF$coefficients[1:length(objF$obj.ok$coefficients)] <- objF$obj.ok$coefficients objF$coefficients[nomiVxb] <-psi.rounded[1,] objF$nameUV <- list(U = drop(nomiU), V = nomiV, Z = name.Z) #Z = name.Z objF$rangeZ<-obj$rangeZ objF$Z <- Z[,unique(name.Z),drop=FALSE] if(n.boot>0) objF$seed <- seed if(adjX) { objF$Z <- objF$Z + min.x objF$rangeZ<- objF$rangeZ + min.x } objF$call <- match.call() objF$orig.call<-orig.call objF$psi.history <- psi.values objF$it <- it objF$epsilon <- obj$epsilon objF$id.warn <- id.warn #objF$rho<-rho objF$psi<- objF$psi[,-1,drop=FALSE] #rimuovi la colonna Initial if(var.psi){ Cov <- vcov.stepmented(objF, k=NULL) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) objF$psi[,"St.Err"]<-sqrt(vv) objF$vcov<- Cov } class(objF) <- c("stepmented","lm") return(objF) } segmented/R/vcov.stepmented.R0000644000176200001440000001176414617134635015720 0ustar liggesusersvcov.stepmented<-function(object, k=NULL, zero.cor=TRUE, type=c("cdf", "none", "abs"), ...){ #farlo su scala logit??? #k=-1/2.. conservativi #k=-2/3.. ok, ma forse troppo piccoli.. #================ # U.ef<-U.pert<-matrix(, B, p) # if(!missing(seed)) set.seed(seed) # for(i in 1:B){ # id<-sample(n, replace=TRUE) # U.ef[i,]<-colSums(U[id,]) #EF boot # w<-rgamma(n,1,1) #o anche rnorm(n,1,1) # U.pert[i,]<-colSums(U*w) # } # I1<-var(U.ef) # I2<-var(U.pert) # s2<-sum(o$residuals^2)/(n-p) # r<-list(teor= s2*crossprod(X), emp=crossprod(U), ef=I1,pert=I2) #browser() if(!inherits(object, "lm")) stop("A stepmented (g)lm is requested") #calcola il parametro di disp che serve sempre.. if(inherits(object, "glm")){ disp <- object$deviance/object$df.residual if(object$family$family %in% c("binomial","poisson")) disp <-1 } else { ww <- if(is.null(object$weights)) 1 else object$weights disp <- sum(ww*object$residuals^2)/object$df.residual } #browser() type<-match.arg(type) if(type=="abs") stop("type='abs' not (yet?) implemented") b<-coef.stepmented(object, FALSE) b <- b[b!=0] pLin<-length(b) if(type=="none"){ V<-if(is.null(object$obj.ok)) chol2inv(object$qr$qr[1:pLin, 1:pLin, drop = FALSE]) else object$obj.ok$invXtX #ok anche in presenza di pesi va bene! V<- V*disp #browser() colnames(V)<-rownames(V)<- names(b) return(V) } #===================================================== #X0 deve avere le variabili I(x>psi). Se object e' restituito da segreg allora model.matrix.stepmented(object, apprx = "no") #funziona, se restituito da stepmented no. Allora e' meglio mettere #X0<- model.matrix.stepmented(object, apprx = "no") #funziona solo con oggetti stepreg #browser() X0<- model.matrix.stepmented(object, type = "cdf", k=-100)[,1:pLin] X <- model.matrix.stepmented(object, type = type, k=k) #qr.X(object$qr) piu efficiente? X0 <- cbind(X0, X[,setdiff(1:ncol(X),1:ncol(X0))]) #aggiungi i termini relativi ai psi maxZ.list <- attr(X, "rangeZ") attr(X, "rangeZ")<-NULL #browser() p=ncol(X) n=nrow(X) nomiZ<- object$nameUV$Z nomiV<- object$nameUV$V nomiU<- object$nameUV$U nomiPsi<- sub("V","psi", nomiV) id.noV<-setdiff(colnames(X), nomiPsi) #==================================== #inutile trasf.X(), il lavoro lo fa model.matrix.stepmented # trasf.X<-function(k){ # #trasforma la matrice del disegno di un modello stepmented.. utile per il calcolo della vcov # maxZ.list<-NULL # for(i in 1:length(nomiU)){ # nomeZ<- gsub("U[1-9].","",nomiU[i]) # Z<-object$Z[,nomeZ] # minZ<-min(Z) # maxZ<-max(Z) # psi<-object$psi[nomiPsi[i],"Est."] # Z<- (Z-minZ)/(maxZ-minZ) # psi<- (psi-minZ)/(maxZ-minZ) # if(is.null(k)){ # idU<-match(nomiU[i],nomiU) # snr.idU<-abs(object$coefficients[nomiU][idU])/sigma # ss=n^(-(.6 + .3* log(snr.idU) -abs(psi-.5)^.5/sqrt(n))) # #ss=n^(-(.6 + .07* log(snr.idU)*log10(n) -abs(psi-.5)^.5/sqrt(n))) # #.6 + .3* log(o$coefficients[2]/s) -abs(o$psi[,1]-.5)^.5/sqrt(n) +log(log(log10(n)))/3 # } else { # ss=n^k # } # #browser() # X0[,nomiU[i]]<- 1*(Z>psi) # X[,nomiU[i]]<- pnorm((Z-psi)/ss) # X0[, nomiPsi[i]] <- X[, nomiPsi[i]] <- -(object$coefficients[nomiU[i]]/ss)*dnorm((Z-psi)/ss) # maxZ.list[[length(maxZ.list)+1]]<-maxZ-minZ # } # return(list(X0=X0, X=X, maxZ.list=maxZ.list)) # } #==================================== #R<-trasf.X(k) #browser() #X0<-R$X0 #X<- R$X if(inherits(object, "glm")){ variance = object$family$variance linkinv = object$family$linkinv mu.eta = object$family$mu.eta eta <- object$linear.predictors prior.weights=object$prior.weights mu = linkinv(eta) varg = variance(mu) invgprime = mu.eta(eta) ww<- prior.weights*(invgprime^2/varg) #object$prior.weights*object$weights # if(only.lin){ # invH<- solve(crossprod(sqrt(ww)*X0[,id.noV,drop=FALSE])) # U<-X0[,id.noV,drop=FALSE]*(prior.weights*object$residuals*invgprime/varg) # } invH<- solve(crossprod(sqrt(ww)*X)) U<-X0*(prior.weights*object$residuals*invgprime/varg) } else {#se lm w <- if(is.null(object$weights) || sd(object$weights)==0) 1 else object$weights # if(only.lin){ # invH<- solve(crossprod(sqrt(w)*X0[,id.noV,drop=FALSE])) # U<-X0[,id.noV,drop=FALSE]*(w*object$residuals) # } #browser() invH<- solve(crossprod(sqrt(w)*X)) U<-X0*(w*object$residuals) } INF<- crossprod(U) V =invH %*% INF %*% invH #browser() # for(i in 1:length(nomiPsi)){ # V[,nomiPsi[i]]<-V[,nomiPsi[i]]*maxZ.list[[i]] # V[nomiPsi[i],]<-V[nomiPsi[i],]*maxZ.list[[i]] # } if(zero.cor) V[nomiPsi, id.noV]<- V[id.noV, nomiPsi] <-0 V } segmented/R/seg.def.fit.boot.r0000644000176200001440000002672514757621107015675 0ustar liggesusersseg.def.fit.boot<-function(obj, Z, PSI, mfExt, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #------------- #browser() if(is.null(opz$seed)){ if(!is.null(obj$y)) { mY <- mean(obj$y) } else { mY <- if(!is.null(obj$residuals)) mean(obj$residuals) else as.numeric(logLik(obj)) } sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } visualBoot<-opz$visualBoot opz.boot<-opz opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <-0 n<-nrow(mfExt) rangeZ <- apply(Z, 2, range) #serve sempre alpha <- opz$alpha #limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha, 1 - alpha)) limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) o0<-try(suppressWarnings(seg.def.fit(obj, Z, PSI, mfExt, opz)), silent=TRUE) if(!is.list(o0)){ o0<-try(suppressWarnings(seg.def.fit(obj, Z, opz$PSI1, mfExt, opz)), silent=TRUE) } if(!is.list(o0)) { o0<- seg.def.fit(obj, Z, PSI, mfExt, opz, return.all.sol=TRUE) o0<-extract.psi(o0) if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0<-try(suppressWarnings(seg.def.fit(obj, Z, PSI1, mfExt, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n # na<- ,,apply(...,2,function(x)mean(is.na(x))) Z.orig<-Z # if(visualBoot) cat(0, " ", formatC(opz$dev0, 3, format = "f"),"", "(No breakpoint(s))", "\n") count.random<-0 alpha <- .1 n.boot.rev<- 3 #3 o 4? for(k in seq(n.boot)){ ##se gli *ultimi* n.boot.rev valori di ss sono uguali, cambia i psi... diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) #if(length(na.omit(diff(all.selected.ss[1:n.boot.rev])))==(n.boot.rev-1) && all(round(diff(all.selected.ss[1:n.boot.rev]),6)==0)){ if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi<-ifelse(abs(qpsi-.5)<.1,.8,qpsi) alpha <- 1 - alpha est.psi0<-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) } ########################### 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(seg.def.fit(obj, Z[id,,drop=FALSE], PSI[id,,drop=FALSE], mfExt[id,,drop=FALSE], opz.boot)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) ##----> o.boot<-try(seg.lm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz.boot), silent=TRUE) #in realta' la risposta dovrebbe essere "yy" da cambiare in mfExt o.boot<- try(suppressWarnings(seg.def.fit(obj, Z.orig, PSI, mfExt, opz.boot)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o <- try(seg.def.fit(obj, Z.orig, PSI, mfExt, opz, return.all.sol=TRUE), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o <- try(suppressWarnings(seg.def.fit(obj, Z, PSI1, mfExt, opz1)), silent=TRUE) count.random<-count.random+1 } if(is.list(o)){ #if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) if( isFALSE( "coefficients"%in%names(o$obj) || "estimate"%in%names(o$obj) ) ) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if (visualBoot) { flush.console() cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.min.f = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ #quando vengono restituiti psi troppo vicini e l'SE non si puo' calcolare, possiamo distanziarli.. #Pero' il processo deve essere esteso nel caso in cui ci sono 3 psi vicini.. min.n <- opz$min.n-1 if(min.n>1){ min1<- function(x, k=min.n-1){ for(i in 1:k) x<-x[-which.min(x)] min(x) } max1<-function(x,k=min.n-1){ for(i in 1:k) x<-x[-which.max(x)] max(x) } } else { min1<-min max1<-max } npsi <- tapply(opz$id.psi.group, opz$id.psi.group, length) nomiAll <- colnames(rangeZ) #rep(opz$nomiSeg, npsi) nomiSeg <- unique(nomiAll) newPsi<-vector("list", length(npsi) ) for(.j in 1:length(npsi)){ psi.j <- sort(est.psi0[opz$id.psi.group==.j]) #psi della stessa variabile segmented id <- nomiSeg[.j]==nomiAll Z.ok <- unique(Z[, id, drop=FALSE][,1]) m.j <- min(limZ[1,id]) M.j <- max(limZ[2,id]) #h=1/1.05 for(.k in 1:length(psi.j)){ id.group<-cut(Z.ok, c(m.j-10^8, psi.j, M.j+10^8), labels=FALSE) n.j<-tabulate(id.group)#<=min.n #per ogni psi calcola il min e il max dei segmenti prima e dopo psi. #se questi segmenti hanno min.n osservazioni considera u min e max fittizzi per evitare che il nuovo psi #modificato porti a segmenti con bassa numerosita'.. M.j.k<- if(n.j[.k]>0) max1(Z.ok[id.group==.k]) -10^6*(n.j[.k]<=min.n) else -10^6*(n.j[.k]<=min.n) m.j.k<- if(n.j[.k+1]>0) min1(Z.ok[id.group==.k+1])+10^6*(n.j[.k+1]<=min.n) else 10^6*(n.j[.k]<=min.n) psi.j[.k]<- psi.j[.k] + ifelse(abs(M.j.k-psi.j[.k]) PSI) #if (pow[1] != 1) U1 <- U1^pow[1] for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg[,i], U1[, id.psi.group==i])%*%invA.RList[[i]] # #nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U1<-do.call(cbind, UList) #la matrice del disegno sara' cbind(X, U1) obj1 <- try(suppressWarnings(glm.fit(x = cbind(X, U1), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[it]), etastart = eta0)), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$dev L1 } # est.k<-function(x1,y1,L0){ # ax<-log(x1) # .x<-cbind(1,ax,ax^2) # b<-drop(solve(crossprod(.x),crossprod(.x,y1))) # const<-b[1]-L0 # DD<-sqrt(b[2]^2-4*const*b[3]) # kk<-exp((-b[2]+ DD) /(2*b[3])) # return(round(kk)) # # # ff<-function(xx) b[1]+b[2]*xx + b[3]*xx^2+ L0 # # a<-uniroot(ff, c(log(x[4]), 3.4)) # } # dpmax <- function(x, y, pow = 1) { # if (pow == 1) # -(x > y) # else -pow * ((x - y) * (x > y))^(pow - 1) # } in.psi <- function(LIM, PSI, ret.id = TRUE) { a <- PSI[1, ] <= LIM[1, ] b <- PSI[1, ] >= LIM[2, ] is.ok <- !a & !b if (ret.id) return(is.ok) isOK <- all(is.ok) && all(!is.na(is.ok)) isOK } far.psi <- function(Z, PSI, id.psi.group, ret.id = TRUE, fc = 0.93) { nSeg <- length(unique(id.psi.group)) npsij <- tapply(id.psi.group, id.psi.group, length) nj <- sapply(unique(id.psi.group), function(.x) { tabulate(rowSums((Z > PSI)[, id.psi.group == .x, drop = FALSE]) + 1) }, simplify = FALSE) ff <- id.far.ok <- vector("list", length = nSeg) for (i in 1:nSeg) { if (length(nj[[i]]) != npsij[i] + 1) nj[[i]] <- tabulate(rowSums((Z >= PSI)[, id.psi.group == i, drop = FALSE]) + 1) id.ok <- (nj[[i]] >= 2) id.far.ok[[i]] <- id.ok[-length(id.ok)] & id.ok[-1] ff[[i]] <- ifelse(diff(nj[[i]]) > 0, 1/fc, fc) } id.far.ok <- unlist(id.far.ok) ff <- unlist(ff) if (!ret.id) { return(all(id.far.ok)) } else { attr(id.far.ok, "factor") <- ff return(id.far.ok) } } adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #nuovo per i vincoli RList <- opz$RList nomiUList<-UList<- vector("list",length(RList)) invAList <- lapply(RList, function(.x)rbind(c(1,rep(0,nrow(.x)-1)),diff(diag(nrow(.x))))) invA.RList<-lapply(1:length(RList), function(i) invAList[[i]]%*% RList[[i]]) nomiUList<- lapply(1:length(RList), function(i)rep(i, ncol(RList[[i]]))) #----------- eta0<-opz$eta0 fam<-opz$fam maxit.glm<-opz$maxit.glm #---------------------------- n<-length(y) min.step<-opz$min.step rangeZ <- apply(Z, 2, range) alpha<-opz$alpha #limZ <- apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1],alpha[2])) limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1],alpha[2])) else opz$limZ psi<-PSI[1,] id.psi.group<-opz$id.psi.group conv.psi<-opz$conv.psi digits<-opz$digits pow<-opz$pow nomiOK<-opz$nomiOK toll<-opz$toll hh<-opz$h gap<-opz$gap #fix.npsi<-opz$fix.npsi fix.npsi<-opz$stop.if.error dev.new<-opz$dev0 visual<-opz$visual it.max<-old.it.max<-opz$it.max fc<-opz$fc names(psi)<-id.psi.group it <- 0 epsilon <- 10 k.values<-dev.values<- NULL psi.values <-list() #psi.values[[length(psi.values) + 1]] <- NA #id.psi.ok<-rep(TRUE, length(psi)) sel.col.XREG<-unique(sapply(colnames(XREG), function(x)match(x,colnames(XREG)))) if(is.numeric(sel.col.XREG)) XREG<-XREG[,sel.col.XREG,drop=FALSE] #elimina le ripetizioni, ad es. le due intercette.. #invXtX <- opz$invXtX #Xty <- opz$Xty if (!in.psi(limZ, PSI, FALSE)) stop("starting psi out of the range.. see 'alpha' in seg.control.", call. = FALSE) if (!far.psi(Z, PSI, id.psi.group, FALSE)) stop("psi values too close each other. Please change (decreases number of) starting values", call. = FALSE) n.psi1 <- ncol(Z) #browser() Zseg <- XREG[,opz$nomiSeg,drop=FALSE] # XREG <- XREG[, -match(opz$nomiSeg, colnames(XREG)),drop=FALSE] U <- ((Z - PSI) * (Z > PSI)) #if (pow[1] != 1) U <- U^pow[1] for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg[,i], U[, id.psi.group==i])%*%invA.RList[[i]] # #nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U<-do.call(cbind, UList) #la matrice del disegno sara' cbind(X, U) obj0 <- suppressWarnings(glm.fit(x = cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = 3), etastart = eta0)) eta0<- obj0$linear.predictors L0<- obj0$dev if(it.max==0){ colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") obj <- obj0 L1 <- L0 obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, X=XREG, idU=ncol(XREG)+1:(ncol(U)), U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, dev.no.gap = L1, id.psi.group = id.psi.group, id.warn = TRUE, constr=list(RList=RList, invAList=invAList, invA.RList=invA.RList, nomiUList =nomiUList) ) return(obj) } if(is.null(maxit.glm)){ Nboot <- if(is.null(opz$Nboot)) 0 else opz$Nboot maxit.glm1 <- rep(1:it.max + Nboot, 1:it.max+1) #2*rep(1:it.max, 1:it.max) maxit.glm1 <- pmin(maxit.glm1, 25) } else { maxit.glm1 <- rep(maxit.glm, it.max) } n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) #dev.values[length(dev.values) + 1] <- opz$dev0 #del modello iniziale (senza psi) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi.values[[length(psi.values) + 1]] <- psi #psi iniziali if (visual) { #questo e' il visual di "lm" cat(paste("iter = ", sprintf("%2.0f", 0), " dev = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L0), " k = ", sprintf("%2.0f", NA), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " ini.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } id.warn <- FALSE id.psi.changed <- rep(FALSE, it.max) #============================================== inizio ciclo #browser() #Zseg <- XREG[,opz$nomiSeg,drop=FALSE] #XREG <- XREG[, -match(opz$nomiSeg, colnames(XREG)),drop=FALSE] tolOp<-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) idU=ncol(XREG)+1:(ncol(U)) while (abs(epsilon) > toll) { it<-it+1 n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if(n.psi1!=n.psi0){ U <- ((Z-PSI)*(Z>PSI)) #pmax((Z - PSI), 0)^pow[1] #if(pow[1]!=1) U<-U^pow[1] obj0 <- suppressWarnings(glm.fit(x = cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[it]), etastart = eta0)) eta0<-obj0$linear.predictors L0< - obj0$dev } else { V <- (Z>PSI) U <- (Z - PSI) * V V <- -V } #V <- dpmax(Z,PSI,pow=pow[2])# ifelse((Z > PSI), -1, 0) for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg[,i], U[, id.psi.group==i])%*%invA.RList[[i]] # nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U<-do.call(cbind, UList) X <- cbind(XREG, U, V) #rownames(X) <- NULL #colnames(X)[(ncol(XREG) + 1):ncol(U)] <- paste("U", # 1:ncol(U), sep = "") #, paste("V", 1:ncol(V), sep = "")) obj <- suppressWarnings(glm.fit(X, y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[it]), etastart = eta0)) eta0<-obj$linear.predictors beta.c <- obj$coefficients[idU] # #beta.c <- coef(obj)[ncol(XREG)+(1:ncol(U))] coefUList <- lapply(1:length(RList), function(i) (invA.RList[[i]]%*%beta.c[unlist(nomiUList)==i])[-1]) beta.c <- unlist(coefUList) gamma.c <- coef(obj)[colnames(Z)] if (any(is.na(c(beta.c, gamma.c)))) { if (fix.npsi) { if (return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call. = FALSE) } else { id.coef.ok <- !is.na(gamma.c) psi <- psi[id.coef.ok] if (length(psi) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } gamma.c <- gamma.c[id.coef.ok] beta.c <- beta.c[id.coef.ok] Z <- Z[, id.coef.ok, drop = FALSE] rangeZ <- rangeZ[, id.coef.ok, drop = FALSE] limZ <- limZ[, id.coef.ok, drop = FALSE] nomiOK <- nomiOK[id.coef.ok] id.psi.group <- id.psi.group[id.coef.ok] names(psi) <- id.psi.group } } psi.old <- psi psi <- psi.old + hh*gamma.c/beta.c #aggiusta la stima di psi.. psi<- adj.psi(psi, limZ) psi<-unlist(tapply(psi, opz$id.psi.group, sort), use.names =FALSE) #browser() a<-optimize(search.min, c(0,1), psi=psi, psi.old=psi.old, X=XREG, y=y, w=w, offs=offs, tol=tolOp[it]) k.values[length(k.values) + 1] <- use.k <- a$minimum L1<- a$objective #L1.k[length(L1.k) + 1] <- L1<- a$objective psi <- psi*use.k + psi.old* (1-use.k) psi<- adj.psi(psi, limZ) if (!is.null(digits)) psi <- round(psi, digits) PSI <- matrix(psi, n, ncol = length(psi), byrow=TRUE) U1 <- (Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] #obj1 <- try(mylm(cbind(XREG, U1), y, w, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(XREG, U1), y, w, offs), silent = TRUE) if (visual) { flush.console() cat(paste("iter = ", sprintf("%2.0f",it), " dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " k = ", sprintf("%2.3f", use.k), " n.psi = ",formatC(length(unlist(psi)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(psi),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } epsilon <- (L0 - L1)/(abs(L0) + 0.1) L0 <- L1 U <- U1 k.values[length(k.values) + 1] <- use.k psi.values[[length(psi.values) + 1]] <- psi dev.values[length(dev.values) + 1] <- L0 id.psi.far <- far.psi(Z, PSI, id.psi.group, TRUE, fc = opz$fc) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if (!all(id.psi.ok)) { if (fix.npsi) { psi <- psi * ifelse(id.psi.far, 1, attr(id.psi.far, "factor")) PSI <- matrix(psi, n, ncol = length(psi), byrow=TRUE) id.psi.changed[it] <- TRUE } else { Z <- Z[, id.psi.ok, drop = FALSE] PSI <- PSI[, id.psi.ok, drop = FALSE] rangeZ <- rangeZ[, id.psi.ok, drop = FALSE] limZ <- limZ[, id.psi.ok, drop = FALSE] nomiOK <- nomiOK[id.psi.ok] id.psi.group <- id.psi.group[id.psi.ok] psi.old <- psi.old[id.psi.ok] psi <- psi[id.psi.ok] names(psi) <- id.psi.group if (ncol(PSI) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } } } if (it >= it.max) { id.warn <- TRUE break } } #end while.. ############################################################################## if (id.psi.changed[length(id.psi.changed)]) warning(paste("Some psi (", (1:length(psi))[!id.psi.far], ") changed after the last iter.", sep = ""), call. = FALSE) if (id.warn) warning(paste("max number of iterations (", it, ") attained", sep = ""), call. = FALSE) attr(psi.values, "dev") <- dev.values attr(psi.values, "k") <- k.values psi <- unlist(tapply(psi, id.psi.group, sort)) names(psi) <- id.psi.group names.coef <- names(obj$coefficients) #PSI.old <- PSI PSI <- matrix(psi, n, ncol = length(psi), byrow=TRUE) #if (sd(PSI - PSI.old) > 0 || id.psi.changed[length(id.psi.changed)]) { #browser() V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") U <- (Z - PSI) * (Z > PSI) for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg[,i], U[, id.psi.group==i])%*%invA.RList[[i]] nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U<-do.call(cbind, UList) #X <- cbind(XREG, U, V) colnames(U) <- paste("U", 1:ncol(U), sep = "") obj <- try(suppressWarnings(glm.fit(cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[it]), etastart = eta0)), silent = TRUE) L1<- obj$dev #browser() obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, X=XREG, idU=ncol(XREG)+1:(ncol(U)), U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, #SumSquares.no.gap = L1, dev.no.gap=L1, id.psi.group = id.psi.group, id.warn = id.warn, constr=list(RList=RList, invAList=invAList, invA.RList=invA.RList, nomiUList =nomiUList)) #SlopeList <- lapply(1:length(RList), function(i) RList[[i]]%*%beta.c[unlist(nomiUList)==i]) return(obj) } segmented/R/segConstr.glm.fit.boot.r0000644000176200001440000003151214757621024017073 0ustar liggesuserssegConstr.glm.fit.boot <- function(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di #---------------------------------- # sum.of.squares<-function(obj.seg){ # #computes the "correct" SumOfSquares from a segmented" fit # b<-obj.seg$obj$coef # X<-qr.X(obj.seg$obj$qr) #X<-model.matrix(obj.seg) # X<-X[,!is.na(b)] # b<-b[!is.na(b)] # rev.b<-rev(b) # rev.b[1:length(obj.seg$psi)]<-0 # b<-rev(rev.b) # new.fitted<-drop(X%*%b) # new.res<- obj.seg$obj$residuals + obj.seg$obj$fitted - new.fitted # ss<-sum(new.res^2) # ss # } #-------- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]] psi.values<-lista[[2]] if(any(is.na(psi.values[[1]]))) {#se la 1 componente e' NA (fino alla versione 2.0-3 era cosi'... perche' in dev.values c'erano # anche i valori relativi al modello senza psi.. ) dev.values<-dev.values[-1] #remove the 1st one referring to model without psi psi.values<-psi.values[-1] } dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(dev.no.gap=dev.ok, psi=psi.ok) r } #------------- if(is.null(opz$seed)){ mY <- mean(y) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } #---------------- visualBoot<-opz$visualBoot #opz.boot<-opz #opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <- 0 n<-length(y) rangeZ <- apply(Z, 2, range) #serve sempre alpha <- opz$alpha #limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1],alpha[2])) else opz$limZ opz0 <- opz opz0$maxit.glm <- 2 #o0<-try(suppressWarnings(seg.glm.fit(y, XREG, Z, PSI, w, offs, opz0)), silent=TRUE) #mettere opz o opz0? o0<-try(suppressWarnings(segConstr.glm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=FALSE)), silent=TRUE) if(!is.list(o0)){ o0<-try(suppressWarnings(segConstr.glm.fit(y, XREG, Z, opz$PSI1, w, offs, opz, return.all.sol=FALSE)), silent=TRUE) } if(!is.list(o0)) { o0<- suppressWarnings(segConstr.glm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=TRUE)) o0<-extract.psi(o0) ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<- o0$dev.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0<-try(suppressWarnings(segConstr.glm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) ss00<-o0$dev.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(NA, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n # na<- ,,apply(...,2,function(x)mean(is.na(x))) Z.orig<-Z # if(visualBoot) cat(0, " ", formatC(opz$dev0, 3, format = "f"),"", "(No breakpoint(s))", "\n") count.random<-0 id.uguali<-0 k.psi.change<- 1 alpha<-.1 n.boot.rev<- 3 #3 o 4? for(k in seq(n.boot)){ #if(k==4) browser() ##se gli *ultimi* n.boot.rev valori di ss sono uguali, cambia i psi... diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) #if(length(na.omit(diff(all.selected.ss[1:n.boot.rev])))==(n.boot.rev-1) && all(round(diff(all.selected.ss[1:n.boot.rev]),6)==0)){ if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi<-ifelse(abs(qpsi-.5)<.1, alpha, qpsi) alpha<-1-alpha est.psi0<-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) } ########################### 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(segConstr.glm.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], offs[id], opz)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(segConstr.glm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } #if(k==7) browser() ### se est.psi.boot non e' cambiato (e puoi vederlo da all.est.psi.boot), allora cambialo! PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o<-try(suppressWarnings(segConstr.glm.fit(y, XREG, Z.orig, PSI, w, offs, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o<-try(suppressWarnings(segConstr.glm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$dev.no.gap if(o$dev.no.gap<=ifelse(is.list(o0), o0$dev.no.gap, 10^12)) {o0<-o; k.psi.change<- k} est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$dev.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if(visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("boot sample = ", sprintf("%2.0f",k), #" opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$dev.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " opt.dev = ", sprintf("%1.5f", as.numeric(strsplit(format(o0$dev.no.gap, scientific=TRUE), "e")[[1]][1])), " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #conta i valori ss uguali.. cosi puoi fermarti prima.. asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } #id.uguali<-(round(diff(all.selected.ss[c(k-1,k-2)]),6)==0)+id.uguali } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) #browser() # SS.ok<-min(all.selected.ss) # id.accept<- ((abs(all.ss-SS.ok)/SS.ok )<= 0.05) # psi.mean<-apply(all.est.psi[id.accept,,drop=FALSE], 2, mean) # est.psi0<-psi.mean # devi ristimare il modello con psi.mean # PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) # o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ #quando vengono restituiti psi troppo vicini e l'SE non si puo' calcolare, possiamo distanziarli.. #Pero' il processo deve essere esteso nel caso in cui ci sono 3 psi vicini.. min.n <- opz$min.n-1 if(min.n>1){ min1<- function(x, k=min.n-1){ for(i in 1:k) x<-x[-which.min(x)] min(x) } max1<-function(x,k=min.n-1){ for(i in 1:k) x<-x[-which.max(x)] max(x) } } else { min1<-min max1<-max } npsi <- tapply(opz$id.psi.group, opz$id.psi.group, length) nomiAll <- colnames(rangeZ) #rep(opz$nomiSeg, npsi) nomiSeg <- unique(nomiAll) newPsi<-vector("list", length(npsi) ) for(.j in 1:length(npsi)){ psi.j <- sort(est.psi0[opz$id.psi.group==.j]) #psi della stessa variabile segmented id <- nomiSeg[.j]==nomiAll Z.ok <- unique(Z[, id, drop=FALSE][,1]) m.j <- min(limZ[1,id]) M.j <- max(limZ[2,id]) #h=1/1.05 for(.k in 1:length(psi.j)){ id.group<-cut(Z.ok, c(m.j-10^8, psi.j, M.j+10^8), labels=FALSE) n.j<-tabulate(id.group)#<=min.n #per ogni psi calcola il min e il max dei segmenti prima e dopo psi. #se questi segmenti hanno min.n osservazioni considera u min e max fittizzi per evitare che il nuovo psi #modificato porti a segmenti con bassa numerosita'.. M.j.k<- if(n.j[.k]>0) max1(Z.ok[id.group==.k]) -10^6*(n.j[.k]<=min.n) else -10^6*(n.j[.k]<=min.n) m.j.k<- if(n.j[.k+1]>0) min1(Z.ok[id.group==.k+1])+10^6*(n.j[.k+1]<=min.n) else 10^6*(n.j[.k]<=min.n) psi.j[.k]<- psi.j[.k] + ifelse(abs(M.j.k-psi.j[.k]) PSI) for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg[,i], U1[, id.psi.group==i])%*%invA.RList[[i]] # #nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U1<-do.call(cbind, UList) #la matrice del disegno sara' cbind(X, U1) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylmWO(cbind(X, U1), y, w, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 L1 } #========= search.min<-function(h, psi, psi.old, X, y, w, offs) { psi.ok<- psi*h + psi.old*(1-h) PSI <- matrix(psi.ok, n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Z - PSI) * (Z > PSI) for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg[,i], U1[, id.psi.group==i])%*%invA.RList[[i]] # #nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U1<-do.call(cbind, UList) #la matrice del disegno sara' cbind(X, U1) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylm(cbind(X, U1), y), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 L1 } # est.k <- function(x1, y1, L0) { # ax <- log(x1) # .x <- cbind(1, ax, ax^2) # b <- drop(solve(crossprod(.x), crossprod(.x, y1))) # const <- b[1] - L0 # DD <- sqrt(b[2]^2 - 4 * const * b[3]) # kk <- exp((-b[2] + DD)/(2 * b[3])) # return(round(kk)) # } # dpmax <- function(x, y, pow = 1) { # if (pow == 1) # -(x > y) # else -pow * ((x - y) * (x > y))^(pow - 1) # } mylmWO <- function(x, y, w, offs = 0) { sw <- sqrt(w) x1 <- x * sw y <- y - offs y1 <- y * sw b <- drop(solve(crossprod(x1), crossprod(x1, y1))) fit <- drop(x%*%b) r <- y - fit o <- list(coefficients = b, fitted.values = fit, residuals = r, L0=sum(w*r^2), df.residual = length(y) - length(b)) o } mylm <- function(x, y, w, offs) { b <- drop(solve(crossprod(x), crossprod(x, y))) fit <- drop(x%*%b) r <- y - fit o <- list(coefficients = b, fitted.values = fit, residuals = r, L0=sum(r^2), df.residual = length(y) - length(b)) o } id.w.offs <- var(offs)<=0 && var(w)<=0 if(id.w.offs){ fitter<-function(x, y, w, offs) .lm.fit(x=x, y=y) #list(coefficients=drop(solve(crossprod(x), crossprod(x, y)))) mylmOK <- mylm search.minOK <- search.min } else { fitter<-function(x, y, w, offs) .lm.fit(x=sqrt(w)*x, y=sqrt(w)*(y-offs)) mylmOK <- mylmWO search.minOK <- search.minWO } # isZero<-function (x, neps = 1, eps = .Machine$double.eps, ...) { # if (is.character(eps)) { # eps <- match.arg(eps, choices = c("double.eps", "single.eps")) # if (eps == "double.eps") { # eps <- .Machine$double.eps # } # else if (eps == "single.eps") { # eps <- sqrt(.Machine$double.eps) # } # } # (abs(x) < neps * eps) # } isZero <- function(v) sapply(v, function(.x) identical(.x,0)) # mylmADD <- function(invXtX, X, v, Xty, y) { # vtv <- sum(v^2) # Xtv <- crossprod(X, v) # m <- invXtX %*% Xtv # d <- drop(1/(vtv - t(Xtv) %*% m)) # r <- -d * m # invF <- invXtX + d * tcrossprod(m) # newINV <- rbind(cbind(invF, r), c(t(r), d)) # b <- crossprod(newINV, c(Xty, sum(v * y))) # fit <- tcrossprod(cbind(X, v), t(b)) # r <- y - fit # o <- list(coefficients = b, fitted.values = fit, residuals = r) # o # } in.psi <- function(LIM, PSI, ret.id = TRUE) { a <- PSI[1, ] <= LIM[1, ] b <- PSI[1, ] >= LIM[2, ] is.ok <- !a & !b if (ret.id) return(is.ok) isOK <- all(is.ok) && all(!is.na(is.ok)) isOK } far.psi <- function(Z, PSI, id.psi.group, ret.id = TRUE, fc = 0.93) { nSeg <- length(unique(id.psi.group)) npsij <- tapply(id.psi.group, id.psi.group, length) nj <- sapply(unique(id.psi.group), function(.x) { tabulate(rowSums((Z > PSI)[, id.psi.group == .x, drop = FALSE]) + 1) }, simplify = FALSE) ff <- id.far.ok <- vector("list", length = nSeg) for (i in 1:nSeg) { if (length(nj[[i]]) != npsij[i] + 1) nj[[i]] <- tabulate(rowSums((Z >= PSI)[, id.psi.group == i, drop = FALSE]) + 1) id.ok <- (nj[[i]] >= 2) id.far.ok[[i]] <- id.ok[-length(id.ok)] & id.ok[-1] ff[[i]] <- ifelse(diff(nj[[i]]) > 0, 1/fc, fc) } id.far.ok <- unlist(id.far.ok) ff <- unlist(ff) if (!ret.id) { return(all(id.far.ok)) } else { attr(id.far.ok, "factor") <- ff return(id.far.ok) } } adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #nuovo per i vincoli RList <- opz$RList nomiUList<-UList<- vector("list",length(RList)) invAList <- lapply(RList, function(.x)rbind(c(1,rep(0,nrow(.x)-1)),diff(diag(nrow(.x))))) invA.RList<-lapply(1:length(RList), function(i) invAList[[i]]%*% RList[[i]]) nomiUList<- lapply(1:length(RList), function(i)rep(i, ncol(RList[[i]]))) n <- length(y) min.step <- opz$min.step rangeZ <- apply(Z, 2, range) alpha <- opz$alpha #ha gia' 2 componenti! #limZ <- apply(Z, 2, quantile, names = FALSE, probs = alpha) #c(alpha, 1 - alpha)) limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1],alpha[2])) else opz$limZ #browser() #for(.i in opz$nomiSeg) { ##poni min(z)=0, cosi solve() in step.lm.fit non ha problemi. # if(.i %in% colnames(XREG)) XREG[,.i] <- XREG[,.i] - min(XREG[,.i]) #} psi <- PSI[1, ] id.psi.group <- opz$id.psi.group conv.psi <- opz$conv.psi hh <- opz$h digits <- opz$digits pow <- opz$pow nomiOK <- opz$nomiOK toll <- opz$toll gap <- opz$gap fix.npsi <- opz$stop.if.error dev.new <- opz$dev0 visual <- opz$visual it.max <- old.it.max <- opz$it.max fc <- opz$fc names(psi) <- id.psi.group it <- 0 epsilon <- 10 k.values <- dev.values <- NULL psi.values <- list() #psi.values[[length(psi.values) + 1]] <- NA sel.col.XREG <- unique(sapply(colnames(XREG), function(x) match(x, colnames(XREG)))) if (is.numeric(sel.col.XREG)) XREG <- XREG[, sel.col.XREG, drop = FALSE] invXtX <- opz$invXtX Xty <- opz$Xty if (!in.psi(limZ, PSI, FALSE)) stop("starting psi out of the range.. see 'alpha' in seg.control.", call. = FALSE) if (!far.psi(Z, PSI, id.psi.group, FALSE)) stop("psi values too close each other. Please change (decreases number of) starting values", call. = FALSE) n.psi1 <- ncol(Z) Zseg <- XREG[,opz$nomiSeg,drop=FALSE] # minZ<- apply(Zseg, 2, min) Zseg0<- Zseg Zseg <- sweep(Zseg, 2, minZ) XREG <- XREG[, -match(opz$nomiSeg, colnames(XREG)),drop=FALSE] #browser() U <- ((Z - PSI) * (Z > PSI)) #if (pow[1] != 1) U <- U^pow[1] for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg[,i], U[, id.psi.group==i])%*%invA.RList[[i]] # #nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U<-do.call(cbind, UList) #la matrice del disegno sara' cbind(X, U) if(it.max==0){ colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") obj <- lm.wfit(x = cbind(XREG, U), y = y, w = w, offset = offs) L1 <- sum(obj$residuals^2 * w) obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, X=XREG, idU=ncol(XREG)+1:(ncol(U)), U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = TRUE, constr=list(RList=RList, invAList=invAList, invA.RList=invA.RList, nomiUList =nomiUList)) return(obj) } obj0 <- try(mylmOK(cbind(XREG, U), y, w, offs), silent = TRUE) #if (class(obj0)[1] == "try-error") obj0 <- lm.wfit(cbind(XREG, U), y, w, offs) L0 <- obj0$L0 #sum(obj0$residuals^2 * w) n.intDev0 <- nchar(strsplit(as.character(L0), "\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- L0 psi.values[[length(psi.values) + 1]] <- psi #browser() if (visual) { cat(paste( "iter = ", sprintf("%2.0f", 0), #" dev = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L0), " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L0, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%2.0f", NA), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " ini.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } id.warn <- FALSE id.psi.changed <- rep(FALSE, it.max) #============================================== inizio ciclo #browser() #Zseg (a differenza di Z) ha una colonna per ogni variabile segmented, indipendentemente dal n.psi tolOp<-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) idU <- ncol(XREG)+(1:ncol(U)) idZ <- 1:length(psi) + max(idU) while (abs(epsilon) > toll) { it <- it + 1 n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if (n.psi1 != n.psi0) { U <- ((Z - PSI) * (Z > PSI)) #if (pow[1] != 1) U <- U^pow[1] obj0 <- try(mylm(cbind(XREG, U), y, w, offs), silent = TRUE) if (class(obj0)[1] == "try-error") obj0 <- lm.wfit(cbind(XREG, U), y, w, offs) L0 <- sum(obj0$residuals^2 * w) } else { #V <- dpmax(Z, PSI, pow = pow[2]) V <- (Z>PSI) U <- (Z - PSI) * V V <- -V } for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg[,i], U[, id.psi.group==i])%*%invA.RList[[i]] # nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U<-do.call(cbind, UList) X <- cbind(XREG, U, V) #rownames(X) <- NULL #colnames(X)[(ncol(XREG) + 1):ncol(U)] <- paste("U", # 1:ncol(U), sep = "") #, paste("V", 1:ncol(V), sep = "")) obj <- fitter(X, y, w, offs) #lm.wfit(x = X, y = y, w = w, offset = offs) beta.c <- obj$coefficients[idU] coefUList <- lapply(1:length(RList), function(i) (invA.RList[[i]]%*%beta.c[unlist(nomiUList)==i])[-1]) beta.c <- unlist(coefUList) gamma.c <- obj$coefficients[idZ] #[colnames(Z)] if (any(isZero(c(beta.c, gamma.c)))) { if (fix.npsi) { if (return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call. = FALSE) } else { id.coef.ok <- !isZero(gamma.c) psi <- psi[id.coef.ok] if (length(psi) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } gamma.c <- gamma.c[id.coef.ok] beta.c <- beta.c[id.coef.ok] Z <- Z[, id.coef.ok, drop = FALSE] rangeZ <- rangeZ[, id.coef.ok, drop = FALSE] limZ <- limZ[, id.coef.ok, drop = FALSE] nomiOK <- nomiOK[id.coef.ok] id.psi.group <- id.psi.group[id.coef.ok] names(psi) <- id.psi.group } } psi.old <- psi psi <- psi.old + hh*gamma.c/beta.c #aggiusta la stima di psi.. psi<- adj.psi(psi, limZ) psi<-unlist(tapply(psi, opz$id.psi.group, sort), use.names =FALSE) #browser() a<-optimize(search.min, c(0,1), psi=psi, psi.old=psi.old, X=XREG, y=y, w=w, offs=offs, tol=tolOp[it]) k.values[length(k.values) + 1] <- use.k <- a$minimum L1<- a$objective #L1.k[length(L1.k) + 1] <- L1<- a$objective psi <- psi*use.k + psi.old* (1-use.k) psi<- adj.psi(psi, limZ) if (!is.null(digits)) psi <- round(psi, digits) PSI <- matrix(psi, n, ncol = length(psi), byrow = TRUE) U1 <- (Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] #obj1 <- try(mylm(cbind(XREG, U1), y, w, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(XREG, U1), y, w, offs), silent = TRUE) if (visual) { flush.console() cat(paste( "iter = ", sprintf("%2.0f", it), #" dev = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L1), " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L1, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%2.3f", use.k), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " est.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } epsilon <- (L0 - L1)/(abs(L0) + 0.1) L0 <- L1 U <- U1 k.values[length(k.values) + 1] <- use.k psi.values[[length(psi.values) + 1]] <- psi dev.values[length(dev.values) + 1] <- L0 id.psi.far <- far.psi(Z, PSI, id.psi.group, TRUE, fc = opz$fc) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if (!all(id.psi.ok)) { if (fix.npsi) { psi <- psi * ifelse(id.psi.far, 1, attr(id.psi.far, "factor")) PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) id.psi.changed[it] <- TRUE } else { Z <- Z[, id.psi.ok, drop = FALSE] PSI <- PSI[, id.psi.ok, drop = FALSE] rangeZ <- rangeZ[, id.psi.ok, drop = FALSE] limZ <- limZ[, id.psi.ok, drop = FALSE] nomiOK <- nomiOK[id.psi.ok] id.psi.group <- id.psi.group[id.psi.ok] psi.old <- psi.old[id.psi.ok] psi <- psi[id.psi.ok] names(psi) <- id.psi.group if (ncol(PSI) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } } } if (it >= it.max) { id.warn <- TRUE break } } #end while.. ############################################################################## if (id.psi.changed[length(id.psi.changed)]) warning(paste("Some psi (", (1:length(psi))[!id.psi.far], ") changed after the last iter.", sep = ""), call. = FALSE) if (id.warn) warning(paste("max number of iterations (", it, ") attained", sep = ""), call. = FALSE) attr(psi.values, "dev") <- dev.values attr(psi.values, "k") <- k.values psi <- unlist(tapply(psi, id.psi.group, sort)) names(psi) <- id.psi.group names.coef <- names(obj$coefficients) #PSI.old <- PSI PSI <- matrix(psi, n, ncol = length(psi), byrow = TRUE) #if (sd(PSI - PSI.old) > 0 || id.psi.changed[length(id.psi.changed)]) { #browser() V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") U <- (Z - PSI) * (Z > PSI) for(i in 1:length(RList)){#trasforma le U UList[[i]]<- cbind(Zseg0[,i], U[, id.psi.group==i])%*%invA.RList[[i]] nomiUList[[i]]<- rep(i, ncol(UList[[i]]) ) } U<-do.call(cbind, UList) #X <- cbind(XREG, U, V) colnames(U) <- paste("U", 1:ncol(U), sep = "") #obj <- mylmOK(x = cbind(XREG, U), y = y, w = w, offs = offs) #L1 <- obj$L0 #browser() obj <- fitter(cbind(XREG, U), y, w, offs) #lm.wfit() #obj <- lm.wfit(cbind(XREG, U), y, w, offs) L1 <- sum(obj$residuals^2 * w) #browser() #idInt<-match("(Intercept)", names(obj$coefficients),0) #obj$coefficients[idInt] <- obj$coefficients[idInt]-sum(obj$coefficients[opz$nomiSeg]*minZ) obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) obj$df.residual <- length(y) - length(obj$coefficients) obj$fitted.values <- y - obj$residuals #names(obj$coefficients) <- names.coef obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, X=XREG, idU=ncol(XREG)+1:(ncol(U)), U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = id.warn, constr=list(RList=RList, invAList=invAList, invA.RList=invA.RList, nomiUList =nomiUList)) #SlopeList <- lapply(1:length(RList), function(i) RList[[i]]%*%beta.c[unlist(nomiUList)==i]) return(obj) } segmented/R/print.summary.stepmented.R0000644000176200001440000000737514606213225017565 0ustar liggesusers`print.summary.stepmented` <- function(x, short = x$short, var.diff = x$var.diff, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"),...){ cat("\n\t***Regression Model with Step Relationship(s)***\n\n") cat( "Call: \n" ) print( x$call ) cat("\nEstimated Jump-Point(s):\n ") est.psi<-x$psi[,c("Est.","St.Err"),drop=FALSE] #browser() est.psi[,1]<-x$psi.rounded[1,] rownames(est.psi)<-rownames(x$psi) print(round(est.psi,4)) #era "signif(,4)" if(!is.null(attr(x$psi.rounded,"break.dates"))) { cat("\ncorrisponding dates: ", attr(x$psi.rounded,"break.dates"),"\n") #cat(" ", attr(o$psi.rounded,"break.dates"),"\n") } if(short){ cat("\nDifference-in-levels parameter(s):\n") #print(x$Ttable[(nrow(x$Ttable)-nrow(x$psi)+1):nrow(x$Ttable),])} nome<-rownames(x$psi) #nome<-as.character(parse("",text=nome)) #aa<-grep("U",rownames(x$Ttable)) #bb<-unlist(sapply(nome,function(xx){grep(xx,rownames(x$Ttable))},simplify=FALSE,USE.NAMES=FALSE)) #cc<-intersect(aa,bb) #indices of diff-slope parameters nomiU<-rownames(x$gap) #idU<-match(nomiU,rownames(x$Ttable)) print(x$Ttable[nomiU,]) } else {cat("\nCoefficients of the linear terms:\n") if(is.null(dim(x$Ttable))){ print(x$Ttable) #printCoefmat(matrix(x$Ttable,nrow=1,ncol=4,dimnames=list(" ",names(x$Ttable))),has.Pvalue=FALSE) } else { printCoefmat(x$Ttable, digits = digits, signif.stars = signif.stars,na.print = "NA", ...) } } if("summary.lm"%in%class(x)){ #for lm if(var.diff){ for(i in 1:length(x$sigma.new)){ cat("\nResidual standard error ",i,":", format(signif(x$sigma.new[i], digits)), "on", x$df.new[i], "degrees of freedom")} cat("\n") } else { cat("\nResidual standard error:", format(signif(x$sigma, digits)), "on", x$df[2], "degrees of freedom\n") } if (!is.null(x$fstatistic)) { cat("Multiple R-Squared:", formatC(x$r.squared, digits = digits)) cat(", Adjusted R-squared:", formatC(x$adj.r.squared, digits = digits), "\n") } } if("summary.glm"%in%class(x)){ #for glm cat("\n(Dispersion parameter for ", x$family$family, " family taken to be ", format(x$dispersion), ")\n\n", apply(cbind(paste(format.default(c("Null", "Residual"), width = 8, flag = ""), "deviance:"), format(unlist(x[c("null.deviance", "deviance")]), digits = max(5, digits + 1)), " on", format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"), 1, paste, collapse = " "), "AIC: ", format(x$aic, digits = max(4, digits + 1)), "\n", sep = "") } if("summary.Arima"%in%class(x)){#for Arima cm <- x$call$method if (is.null(cm) || cm != "CSS") cat("\nsigma^2 estimated as ", format(x$sigma2, digits = digits), ", log likelihood = ", format(round(x$loglik, 2)), ", aic = ", format(round(x$aic, 2)), "\n", sep = "") else cat("\nsigma^2 estimated as ", format(x$sigma2, digits = digits), ", part log likelihood = ", format(round(x$loglik, 2)), "\n", sep = "") } #browser() invisible(x) #n.boot<-length(na.omit(..$psi.history$all.ss)) if(x$n.boot>0){ cat("\nBoot restarting based on", x$n.boot, "samples.") #if(x$conv.warn) "*not*" else NULL , "attained in",x$it,"iter. (rel. change",paste(signif(x$epsilon,5),")\n",sep="")) cat("\nNumber of iterations in the last fit:",x$it, "(rel. change", paste(signif(x$epsilon,5),")\n",sep="")) } else { cat("\nConvergence",if(x$conv.warn) "*not*" else NULL , "attained in",x$it,"iterations (rel. change",paste(signif(x$epsilon,5),")\n",sep="")) } } segmented/R/seg.lm.fit.boot.r0000644000176200001440000003161314757620626015544 0ustar liggesusersseg.lm.fit.boot <- function(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di #---------------------------------- # sum.of.squares<-function(obj.seg){ # #computes the "correct" SumOfSquares from a segmented" fit # b<-obj.seg$obj$coef # X<-qr.X(obj.seg$obj$qr) #X<-model.matrix(obj.seg) # X<-X[,!is.na(b)] # b<-b[!is.na(b)] # rev.b<-rev(b) # rev.b[1:length(obj.seg$psi)]<-0 # b<-rev(rev.b) # new.fitted<-drop(X%*%b) # new.res<- obj.seg$obj$residuals + obj.seg$obj$fitted - new.fitted # ss<-sum(new.res^2) # ss # } #-------- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]] psi.values<-lista[[2]] if(any(is.na(psi.values[[1]]))) {#se la 1 componente e' NA (fino alla versione 2.0-3 era cosi'... perche' in dev.values c'erano # anche i valori relativi al modello senza psi.. ) dev.values<-dev.values[-1] #remove the 1st one referring to model without psi psi.values<-psi.values[-1] } dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #------------- #browser() if(is.null(opz$seed)){ mY <- mean(y) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } visualBoot<-opz$visualBoot #opz.boot<-opz #opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <-0 n<-length(y) #rangeZ <- apply(Z, 2, range) #serve sempre rangeZ <- if(is.null(opz$rangeZ)) apply(Z, 2, range) else opz$rangeZ alpha <- opz$alpha #limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1],alpha[2])) else opz$limZ #browser() o0<-try(suppressWarnings(seg.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=FALSE)), silent=TRUE) if(!is.list(o0)) { o0<-try(suppressWarnings(seg.lm.fit(y, XREG, Z, opz$PSI1, w, offs, opz, return.all.sol=FALSE)), silent=TRUE) } #browser() if(!is.list(o0)) { o0<- suppressWarnings(seg.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=TRUE)) o0<-extract.psi(o0) #ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0<-try(suppressWarnings(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(NA, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z count.random<-0 k.psi.change<- 1 alpha<-.1 #browser() n.boot.rev<- 3 #3 o 4? for(k in seq(n.boot)){ #if(k==6) browser() ##se gli *ultimi* n.boot.rev valori di ss sono uguali, cambia i psi... diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) #if(length(na.omit(diff(all.selected.ss[1:n.boot.rev])))==(n.boot.rev-1) && all(round(diff(all.selected.ss[1:n.boot.rev]),6)==0)){ if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi<-ifelse(abs(qpsi-.5)<.1, alpha, qpsi) alpha<-1-alpha est.psi0<-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) } ########################### 25/7/24 ##### spostato sotto.. #est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(seg.lm.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], offs[id], opz)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(seg.lm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } #if(k==7) browser() ### se est.psi.boot non e' cambiato (e puoi vederlo da all.est.psi.boot), allora cambialo! PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) #opz$it.max<-opz$it.max+1 #browser() o<-try(suppressWarnings(seg.lm.fit(y, XREG, Z.orig, PSI, w, offs, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o<-try(suppressWarnings(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) {o0<-o; k.psi.change<- k} est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k] <- o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } ########################### 11/12/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### if(visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) #est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) cat(paste("boot sample = ", sprintf("%2.0f",k), #" opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " opt.dev = ", sprintf("%1.5f", as.numeric(strsplit(format(o0$SumSquares.no.gap, scientific=TRUE), "e")[[1]][1])), " n.psi = ",formatC(length(est.psi0),digits=0,format="f"), " est.psi = ",paste(formatC(est.psi0,digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #conta i valori ss uguali.. cosi puoi fermarti prima.. asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } #id.uguali<-(round(diff(all.selected.ss[c(k-1,k-2)]),6)==0)+id.uguali } #end n.boot#==================== #browser() all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) #browser() if(is.null(o0$obj)){ #quando vengono restituiti psi troppo vicini e l'SE non si puo' calcolare, possiamo distanziarli.. #Pero' il processo deve essere esteso nel caso in cui ci sono 3 psi vicini.. min.n <- opz$min.n-1 if(min.n>1){ min1<- function(x, k=min.n-1){ for(i in 1:k) x<-x[-which.min(x)] min(x) } max1<-function(x,k=min.n-1){ for(i in 1:k) x<-x[-which.max(x)] max(x) } } else { min1<-min max1<-max } npsi <- tapply(opz$id.psi.group, opz$id.psi.group, length) nomiAll <- colnames(rangeZ) #rep(opz$nomiSeg, npsi) nomiSeg <- unique(nomiAll) newPsi<-vector("list", length(npsi) ) for(.j in 1:length(npsi)){ psi.j <- sort(est.psi0[opz$id.psi.group==.j]) #psi della stessa variabile segmented id <- nomiSeg[.j]==nomiAll Z.ok <- unique(Z[, id, drop=FALSE][,1]) m.j <- min(limZ[1,id]) M.j <- max(limZ[2,id]) #h=1/1.05 for(.k in 1:length(psi.j)){ id.group<-cut(Z.ok, c(m.j-10^8, psi.j, M.j+10^8), labels=FALSE) n.j<-tabulate(id.group)#<=min.n #per ogni psi calcola il min e il max dei segmenti prima e dopo psi. #se questi segmenti hanno min.n osservazioni considera u min e max fittizzi per evitare che il nuovo psi #modificato porti a segmenti con bassa numerosita'.. M.j.k<- if(n.j[.k]>0) max1(Z.ok[id.group==.k]) -10^6*(n.j[.k]<=min.n) else -10^6*(n.j[.k]<=min.n) m.j.k<- if(n.j[.k+1]>0) min1(Z.ok[id.group==.k+1])+10^6*(n.j[.k+1]<=min.n) else 10^6*(n.j[.k]<=min.n) psi.j[.k]<- psi.j[.k] + ifelse(abs(M.j.k-psi.j[.k])=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi<-ifelse(abs(qpsi-.5)<.1, alpha, qpsi) alpha<-1-alpha est.psi0<-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) } ########################### 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(segConstr.lm.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], offs[id], opz)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(segConstr.lm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } #if(k==7) browser() ### se est.psi.boot non e' cambiato (e puoi vederlo da all.est.psi.boot), allora cambialo! PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o<-try(suppressWarnings(segConstr.lm.fit(y, XREG, Z.orig, PSI, w, offs, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o<-try(suppressWarnings(segConstr.lm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) {o0<-o; k.psi.change<- k} est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if(visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("boot sample = ", sprintf("%2.0f",k), #" opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " opt.dev = ", sprintf("%1.5f", as.numeric(strsplit(format(o0$SumSquares.no.gap, scientific=TRUE), "e")[[1]][1])), " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #conta i valori ss uguali.. cosi puoi fermarti prima.. asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } #id.uguali<-(round(diff(all.selected.ss[c(k-1,k-2)]),6)==0)+id.uguali } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) # SS.ok<-min(all.selected.ss) # id.accept<- ((abs(all.ss-SS.ok)/SS.ok )<= 0.05) # psi.mean<-apply(all.est.psi[id.accept,,drop=FALSE], 2, mean) # est.psi0<-psi.mean # devi ristimare il modello con psi.mean # PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) # o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ #quando vengono restituiti psi troppo vicini e l'SE non si puo' calcolare, possiamo distanziarli.. #Pero' il processo deve essere esteso nel caso in cui ci sono 3 psi vicini.. min.n <- opz$min.n-1 if(min.n>1){ min1<- function(x, k=min.n-1){ for(i in 1:k) x<-x[-which.min(x)] min(x) } max1<-function(x,k=min.n-1){ for(i in 1:k) x<-x[-which.max(x)] max(x) } } else { min1<-min max1<-max } npsi <- tapply(opz$id.psi.group, opz$id.psi.group, length) nomiAll <- colnames(rangeZ) #rep(opz$nomiSeg, npsi) nomiSeg <- unique(nomiAll) newPsi<-vector("list", length(npsi) ) for(.j in 1:length(npsi)){ psi.j <- sort(est.psi0[opz$id.psi.group==.j]) #psi della stessa variabile segmented id <- nomiSeg[.j]==nomiAll Z.ok <- unique(Z[, id, drop=FALSE][,1]) m.j <- min(limZ[1,id]) M.j <- max(limZ[2,id]) #h=1/1.05 for(.k in 1:length(psi.j)){ id.group<-cut(Z.ok, c(m.j-10^8, psi.j, M.j+10^8), labels=FALSE) n.j<-tabulate(id.group)#<=min.n #per ogni psi calcola il min e il max dei segmenti prima e dopo psi. #se questi segmenti hanno min.n osservazioni considera u min e max fittizzi per evitare che il nuovo psi #modificato porti a segmenti con bassa numerosita'.. M.j.k<- if(n.j[.k]>0) max1(Z.ok[id.group==.k]) -10^6*(n.j[.k]<=min.n) else -10^6*(n.j[.k]<=min.n) m.j.k<- if(n.j[.k+1]>0) min1(Z.ok[id.group==.k+1])+10^6*(n.j[.k+1]<=min.n) else 10^6*(n.j[.k]<=min.n) psi.j[.k]<- psi.j[.k] + ifelse(abs(M.j.k-psi.j[.k])psi)) #or ((x-psi)_+, -b*I(x>psi)) if obj.seg does not include the coef for the linear "x" f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } estcoef <- if(is.null(.coef)) coef(obj.seg) else .coef if(length(isV)==1) isV<-c(FALSE,isV) n<-length(x.values) #le seguenti righe selezionavano (ERRONEAMENTE) sia "U1.x" sia "U1.neg.x" (se "x" e "neg.x" erano segmented covariates) #nameU<- grep(paste("\\.",x.name,"$", sep=""), obj.seg$nameUV$U, value = TRUE) #nameV<- grep(paste("\\.",x.name,"$", sep=""), obj.seg$nameUV$V, value = TRUE) nameU<-obj.seg$nameUV$U[f.U(obj.seg$nameUV$U,x.name)] nameV<-obj.seg$nameUV$V[f.U(obj.seg$nameUV$V,x.name)] #browser() if(is.null(obj.seg$constr)){ diffSlope<-estcoef[nameU] } else { diffSlope<-drop(obj.seg$constr$invA.RList[[match(x.name, obj.seg$nameUV$Z)]]%*%estcoef[nameU])[-1] } est.psi<-obj.seg$psi[nameV, "Est."] se.psi<-obj.seg$psi[nameV, "St.Err"] if(any(is.na(se.psi))) stop("The St.Err. of psi is NA", call. = FALSE) k<-length(est.psi) PSI <- matrix(rep(est.psi, rep(n, k)), ncol = k) SE.PSI <- matrix(rep(se.psi, rep(n, k)), ncol = k) newZ<-matrix(x.values, nrow=n,ncol=k, byrow = FALSE) dummy1<-if(isV[1]) (newZ-PSI)*pnorm((newZ-PSI)/SE.PSI) else (newZ-PSI)*(newZ>PSI) #pmax(newZ-PSI,0) if(psi.est){ V<-if(isV[2]) -pnorm((newZ-PSI)/SE.PSI) else -(newZ>PSI) #ifelse(newZ>PSI,-1,0) dummy2<- if(k==1) V*diffSlope else V%*%diag(diffSlope) #t(diffSlope*t(-I(newZ>PSI))) colnames(dummy2)<- nameV newd<-cbind(x.values,dummy1,dummy2) colnames(newd)<-c(x.name,sub("psi","U", nameV), nameV) #colnames(newd)[1]<- x.name #colnames(newd)<-c(x.name,nameU, nameV) } else { newd<-cbind(x.values,dummy1) colnames(newd)<-c(x.name, sub("psi","U", nameV)) #colnames(newd)[1]<- x.name #colnames(newd)<-c(x.name,nameU) } #if(!x.name%in%names(estcoef)) newd<-newd[,-1,drop=FALSE] #aggiungi (eventualmente) le colonne relative ai psi noti all.psi<-obj.seg$indexU[[x.name]] if(length(all.psi)!=k){ nomi.psi.noti<-setdiff(names(all.psi),nameU) psi.noti<-setdiff(all.psi, est.psi) PSI.noti <- matrix(rep(psi.noti, rep(n, length(psi.noti))), ncol = length(psi.noti)) nomi<-c(colnames(newd),nomi.psi.noti) newZ<-matrix(newZ, nrow=nrow(newZ), ncol=length(psi.noti)) newd<-cbind(newd, (newZ-PSI.noti)*(newZ>PSI.noti)) colnames(newd)<-nomi } rownames(newd)<-NULL return(newd) } #end dummy.matrix() #-------------- blockdiag <- function(...) { args <- list(...) nc <- sapply(args,ncol) cumnc <- cumsum(nc) ## nr <- sapply(args,nrow) ## NR <- sum(nr) NC <- sum(nc) rowfun <- function(m,zbefore,zafter) { cbind(matrix(0,ncol=zbefore,nrow=nrow(m)),m, matrix(0,ncol=zafter,nrow=nrow(m))) } ret <- rowfun(args[[1]],0,NC-ncol(args[[1]])) for (i in 2:length(args)) { ret <- rbind(ret,rowfun(args[[i]],cumnc[i-1],NC-cumnc[i])) } ret } #-------------- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #------------- estcoef<- if(is.null(.coef)) coef(ogg) else .coef if(se.fit) { covv<- if(is.null(.vcov)) vcov.segmented(ogg, ...) else .vcov #---Dalla versione 1.2.0 (20/06/20) ho eliminato il controllo sotto per consentire l'utilizzo #----di modelli che restituivano una cov con dimensione diversa dal numero dei coeff lineari (ad es., censReg) #if(!all(dim(.vcov)==c(length(ogg$coef), length(ogg$coef)))) stop("Incorrect dimension of cov matrix", call. = FALSE) if(!all(dim(covv)==c(length(estcoef), length(estcoef)))) stop("dimension of cov matrix and estimated coeffs do not match", call. = FALSE) } #browser() nomeZ <- ogg$nameUV$Z if(is.null(term)){ term <- nomeZ[1] xvalues<-ogg$model[term] } else { if(is.character(term)) term<- ogg$model[term] if(!is.list(term)) stop("term should be a named list") if(!names(term)%in%nomeZ) stop("term is not a segmented variable") xvalues<-term term<-names(term) } n.seg<-1 # if(is.null(xvalues)){ # if(n.seg>1) stop("there are multiple segmented covariates. Please specify one.") # xvalues<-ogg$model[nomeZ] # } # if(is.character(xvalues)){ # if(!xvalues %in% nomeZ) stop("'xvalues' is not a segmented covariate") # xvalues<-ogg$model[xvalues] # } # nomeOK<-names(xvalues) # if(length(nomeOK)>1) stop("Please specify one variable") # if(!nomeOK %in% nomeZ) stop("'names(xvalues)' is not a segmented covariate") nomi <- names(estcoef) #nomiSenzaV <- nomiSenzaU <- nomi #nomiSenzaU[match(nomeU, nomi)] <- "" #nomiSenzaV[match(nomeV, nomi)] <- "" idInterc<-grep("ntercept",names(estcoef)) ste.fit<-fit <- vector(mode = "list", length = 1) n.seg<-1 #browser() ind <- match(c(term,ogg$nameUV$U[grep(term, ogg$nameUV$U)]), nomi, 0) ind<-ind[ind!=0] indV <- match(c(ogg$nameUV$V[grep(term, ogg$nameUV$V)]), nomi, 0) #Xfit<-dummy.matrix(unlist(xvalues), term, ogg, isV=FALSE, .coef=estcoef) #if(se.fit) X<-dummy.matrix(unlist(xvalues), term, ogg, isV=isV, .coef=estcoef) #browser() if(is.null(ogg$constr)){ cof <- estcoef[ind] if(!term%in%names(estcoef)) X <-X[,-1,drop=FALSE] } else { #idU.i <- match(nomeU.i, names(estcoef)) cof<-drop(ogg$constr$invA.RList[[match(term, ogg$nameUV$Z)]]%*%estcoef[ind]) names(cof)<-c(term, paste("U",1:(length(cof)-1),".",term,sep="" )) #estcoef<-append(estcoef[-ind], cof, after=ind[1]-1) } idV <- match(grep(term, ogg$nameUV$V, value = TRUE),colnames(X)) Xfit <- X[, -idV, drop=FALSE] fit<-drop(Xfit%*%cof) if(interc && length(idInterc)==1){ fit <- fit + estcoef[idInterc] #if(se.fit) X<-cbind(1,X) ind <- c(idInterc, ind) } #browser() if(se.fit) { V <- covv[c(ind, indV), c(ind, indV)] if(!is.null(ogg$constr)){ B <- ogg$constr$invA.RList[[match(term, ogg$nameUV$Z)]] B <- do.call(blockdiag, list(diag(interc), B, diag(length(indV)))) V <- B %*% V %*% t(B) } #else { # V <- vcov(object) # X <- X[,colnames(V)] #} ste.fit <- sqrt(rowSums((X %*% V) * X)) #sqrt(diag(X%*%Var%*%t(X))) if(inherits(ogg, what = "glm", FALSE) && !link) { ste.fit <- ogg$family$mu.eta(fit)*ste.fit fit <- ogg$family$linkinv(fit) } fit<-list(fit=fit, se.fit=ste.fit) } else { if(inherits(ogg, what = "glm", FALSE) && !link) fit <- ogg$family$linkinv(fit) fit<-list(fit=fit) } return(fit) } segmented/R/lines.stepmented.R0000644000176200001440000000263714563076506016056 0ustar liggesuserslines.stepmented<-function(x, term, bottom=TRUE, shift=FALSE, conf.level=0.95, k=50, pch=18, .vcov=NULL, .coef=NULL,...){ if(missing(term)){ if(length(x$nameUV$Z)>1 ) { stop("please, specify `term'") } else { term<- x$nameUV$Z } } else { if(is.numeric(term)) term<-x$nameUV$Z[term] } if(!term%in%x$nameUV$Z) stop(" unknown 'term' ") ss<-list(...) metodo<- if(!is.null(ss$method)) ss$method else "delta" colore<- if(is.null(ss$col)) 2 else ss$col usr <- par("usr") h<-(usr[4]-usr[3])/abs(k) y<- if(bottom) usr[3]+h else usr[4]-h m<- confint.stepmented(object=x,parm=term,level=conf.level,digits=15,method=metodo,.vcov=NULL, .coef=NULL) #browser() #m<-r[[term]] #FORSE non e' necessaria #if(rev.sgn) m<- -m #ma invece serve il seguente (se length(psi)=1 e rev.sgn=T): m<-matrix(m, ncol=3) if(nrow(m)>1) m<-m[order(m[,1]),] est.psi<-m[,1] lower.psi<-m[,2] upper.psi<-m[,3] if(length(est.psi)>1) { y<- if(shift) y+seq(-h/2,h/2,length=length(est.psi)) else rep(y,length(est.psi)) } #segments(lower.psi, y, upper.psi, y, ...) #arrows(lower.psi, y, upper.psi, y, code=3, angle=90, length=.07, ...) ss$x0=lower.psi ss$y0=y ss$x1=upper.psi ss$y1=y ss$code=3 ss$angle=90 ss$length=.07 ss$lwd=2 ss$col=colore do.call("arrows",ss) points(est.psi, y, type="p", pch=pch, col=colore) } segmented/R/pwr.seg.r0000644000176200001440000001465414533573705014224 0ustar liggesuserspwr.seg<-function(oseg, pow, n, z="1:n/n", psi, d, s, n.range=c(10,300), X=NULL, break.type=c("break","jump"), alpha=.01, round.n=TRUE, alternative=c("two.sided","greater","less"), msg=TRUE, ci.pow=0){ #===Power analysis in segmented regression== #Given the input values (z, psi, d..), this function returns n (when pow is provided) or pow (when n is provided) #pow, n= the fixed power or sample size. One and only one has to be specified #z= a string indicating the covariate understood to have a segmented effect. Such string should be # expressed as a function of the quantile function having p as argument. Namely something like # qexp(p,..), qbeta(p,..), qunif(p,0,1) or even qunif(p, 1, n) for instance. # It can be a vector meaning the actual covariate, but 'pow' has to be missing. # Namely if the covariate is supplied (and n is known), it is assumed that only the relevant power can be estimated # #psi: the breakpoint location (within the covariate range) #d= the slope difference #s= the response variance #n.max= the max sample size to evaluate (ignored if 'n' is provided). However the function can estimate sample sizes # larger than n.max #X: the design matrix including additional linear variables #alpha= the significance level #round.n= if TRUE the 'estimated' sample size is rounded #authors: Nicoletta D'Angelo and Vito Muggeo #--------------- #Examples: ## pwr.seg(n=100,psi=.5, d=2, s=1) ## pwr.seg(pow=.23,psi=5, d=.5, s=5) ## pwr.seg(pow=.23,z="1:n/n",psi=.5, d=1.5, s=1) pwr<-function(x, psi, d, s, X=NULL, alpha=.01, alt=c("two.sided","greater","less"), change=1){ n<-length(x) v<-quantile(x, c(.02,.98), names=FALSE) if(psi<=v[1] || psi>=v[2]) stop("psi outside the covariate range", call.=FALSE) values <- seq(min(x), max(x), length = 20) n1 <- length(values) PSI <- matrix(values, nrow = n, ncol = n1, byrow = TRUE) X1 <- matrix(x, nrow = n, ncol = n1, byrow = FALSE) if(change==1) { X2<- (X1-PSI)*(X1>PSI) u.psi<-((x-psi)*(x>psi)) } else { X2<-1*(X1>PSI) u.psi<- 1*(x>psi) } pmaxMedio <- rowMeans(X2) #pmaxMedio <- ((x-psi)*(x>psi)) if(is.null(X)) X<-cbind(1,x) ImH <- -X %*% solve(crossprod(X),t(X)) diag(ImH)<- 1+ diag(ImH) th <- d * pmaxMedio %*% ImH %*% u.psi #se <- s * drop(sqrt(pmaxMedio %*% ImH %*% pmaxMedio)) se <- s* sqrt(rowSums(pmaxMedio %*% ImH * pmaxMedio)) # #browser() pw<-switch(alternative, "greater" = pnorm(qnorm(alpha, sd = se, lower.tail = FALSE), mean = th, sd = se, lower.tail = FALSE), "less" = pnorm(qnorm(alpha, sd = se, lower.tail = TRUE), mean = th, sd = se, lower.tail = TRUE), "two.sided"= pnorm(qnorm(alpha/2, sd = se, lower.tail = TRUE), mean = th, sd = se, lower.tail = TRUE) + pnorm(qnorm(alpha/2, sd = se, lower.tail = FALSE), mean = th, sd = se, lower.tail = FALSE) ) pw } #======================================== #browser() x<-z alternative <- match.arg(alternative) break.type <- match.arg(break.type) break.type<-if(break.type=="break") 1 else 2 if(!(break.type %in% 1:2)) stop(" 'break.type' should be 1 or 2") if(missing(oseg)){ if(!is.character(x) && !missing(pow)) stop(" if the covariate is provided, 'pow' has to be missing ") if(!is.character(x) && !missing(n)) stop(" if the covariate is provided, 'n' has to be missing ") if(missing(pow)) { if(is.character(x)){ if(missing(n)) stop(" 'n' or 'pow' have to be provided") p<-seq(.001,.999,l=n) x<-eval(parse(text= x)) } else { n<-length(x) } pow<-pwr(x=x, psi=psi,d=d, s=s,X=X, alpha=alpha, alt=alternative, change=break.type) if(msg) cat("Est. power:", paste(round(pow,3)), "\n") else return(pow) } else { if(!missing(n)) stop(" Only 'n' *or* 'pow' can be provided") if(pow<=.0001 || pow>=.9999) stop(" 'pow' should be in (.0001, .9999)") n.values<-round(seq(min(n.range), max(n.range), l=50)) K<-length(n.values) pp<-rep(NA, K) x0<-x for(i in 1:K) { n<-n.values[i] p<-seq(.001,.999,l=n) x<-eval(parse(text= x0)) pp[i]<-pwr(x=x, psi=psi, d=d, s=s, X=X, alpha=alpha, alt=alternative, change=break.type) } #browser() if(length(unique(pp))<=3) { cat(stop("Too few distinct values (", unique(pp), ") in the computed power(s)\n", call.=FALSE)) } r<-cbind(n.values, pp) f<-splinefun(r[,2], r[,1], method="monoH.FC") #method="hyman" a<-f(pow) if(round.n) a<-round(a) if(msg) cat("Est. sample size:", paste(a), "\n") else return(a) } } else { #se c'e' l'ogg "segmented" if(length(oseg$nameUV$V)>1) stop("only models with just 1 breakpoint allowed") #introdurre argomento id.psi??? oseg$nameUV$V[id.psi] #browser() x<-oseg$model[,oseg$nameUV$Z] n<-length(x) psi<-oseg$psi[oseg$nameUV$V,2] d<-coef(oseg)[oseg$nameUV$U] s<-summary(oseg)$sigma X<-model.matrix(oseg) X<-X[, setdiff(colnames(X),c(oseg$nameUV$U, oseg$nameUV$V)),drop=FALSE] pow<-pwr(x=x, psi=psi, d=d, s=s, X=X, alpha=alpha, alt=alternative, change=break.type) if(ci.pow>0){ V<-vcov(oseg)[c(oseg$nameUV$U,oseg$nameUV$V),c(oseg$nameUV$U,oseg$nameUV$V)] powi<-rep(NA,ci.pow) for(i in 1:ci.pow){ r<-MASS::mvrnorm(1, c(d,psi), V) di<-r[1] psii<-r[2] powi[i]<-pwr(x=x, psi=psii, d=di, s=s, X=X, alpha=alpha, alt=alternative, change=break.type) } ci<-quantile(powi,c(.025,.975),names=FALSE) m<- paste("Est. power for the current fit:", paste(round(pow[1],3)), " (", paste(round(ci,3), collapse = ", "),")") pow<-c(pow,ci) } else { m <- paste("Est. power for the current fit:", paste(round(pow[1],3))) } if(msg) cat(m, "\n") #if(msg) cat("Est. power for the current fit:", paste(round(pow,3)), # " (", paste(round(ci,3), collapse = ", "),")", "\n") else return(pow) } } segmented/R/stepreg.r0000644000176200001440000012060414757621452014302 0ustar liggesusers#funziona fino a un certo punto.. #E comunque devi aggiunegere il caso di interazioni... #byList nomiBy sono definiti.. #Vedi C:\dati\lavori\jumpoint\fasola\funzioniSalvo\nuove\perpacchetto #stepreg(y~seg(tt, by=x1, npsi=2)+seg(tt, by=cbind(x2,x3))+seg(tt)) stepreg <- function(formula, data, subset, weights, na.action, family=lm, control=seg.control(), transf=NULL, contrasts=NULL, model=TRUE, x=FALSE, var.psi=FALSE, ...){ ##### ==================================================================================== #DA FARE: #1) i psi fissi OK prova #2) eliminare argomento 'transf' #3) se c'e' by consentire un n.psi diverso per categoria di by? ovvero npsi o psi devono essere liste..ok #4) la matrice dei contrasti per imporre vincoli alle slope ok #================================= #Allora considerando seggrowth() qua ci dovrebbero essere problemi in nel predict.. # --------- #a differenza delle funzioni seg*, nei modelli step*, il modello finale da cui estrarre i fitted viene adattato #NON nelle funzioni step.lm.fit, ma in stepmented.(g)lm.. Quindi facciamo lo stesso in stepreg() (anche se non dovrebbe essere cosi'--) step.lm.fitSC.boot<-step.lm.fitSC<-step.glm.fitSC.boot<-step.glm.fitSC<-NULL stepConstr.lm.fit.boot<- stepConstr.lm.fit<- stepConstr.glm.fit.boot<-stepConstr.glm.fit<-NULL mylm<-function(x,y,w=1,offs=0){ x1<-x*sqrt(w) y<-y-offs y1<-y*sqrt(w) XtX <- crossprod(x1) b<-drop(solve(XtX,crossprod(x1,y1))) fit<-drop(tcrossprod(x,t(b))) r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b), XtX=XtX, w=w) o } #----------- #================================= build.all.psi<-function(psi, fixed.psi){ all.names.psi<-union(names(psi),names(fixed.psi)) all.psi<-vector("list", length=length(all.names.psi)) names(all.psi)<- all.names.psi for(i in names(all.psi)) { if(!is.null(psi[[i]])){ psi[[i]]<-sort(psi[[i]]) names(psi[[i]])<-paste("U",1:length(psi[[i]]),".",i,sep="") } if(!is.null(fixed.psi[[i]])){ fixed.psi[[i]]<-sort(fixed.psi[[i]]) names(fixed.psi[[i]])<- paste("U",1:length(fixed.psi[[i]]),".fixed.",i,sep="") } all.psi[[i]]<-sort(c(psi[[i]],fixed.psi[[i]])) } return(all.psi) } #================================= #=============================== check.estPsi<-function(.x){ #questo e' utile per verificare che estPsi sia o NA oppure di 0,1 (ma non tutti 0) #l'ho fatta perche' se fosse consentito est=c(F,T) (e non FALSE,TRUE) poi T e F venivano presi come nomi di # variabili in predict.segmented if(length(.x)==1 && all(is.na(.x))){ ris<-TRUE } else { if(!is.numeric(.x)){ ris<-FALSE } else { if(length(setdiff(.x,0:1))==0){ ris<- if(all(.x==0)) FALSE else TRUE } else { ris<-FALSE } } } ris } #=============================== fc<- min(max(abs(control$fc),.8),1) min.step<-control$min.step maxit.glm <- control$maxit.glm alpha<-control$alpha it.max <- old.it.max<- control$it.max digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } break.boot=control$break.boot + 2 n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow conv.psi<-control$conv.psi display <- control$visual #visualBoot<-FALSE #if(visual && n.boot>0) {visual<-FALSE; visualBoot<-TRUE} agg<- 1-control$fc # if(n.boot>0){ # if(!is.null(control$seed)) { # set.seed(control$seed) # employed.Random.seed<-control$seed # } else { # employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) # set.seed(employed.Random.seed) # } # if(visual) {visual<-FALSE; visualBoot<-TRUE}# warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} # # if(!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") # } last <- control$last K<-control$K h<-control$h if (deparse(substitute(family))=="lm" || (is.character(family) && family=="lm")){ fitter0<-"lm" #get("lm") } else { if (is.character(family)) family<-get(family, mode = "function", envir = parent.frame()) if (is.function(family)) family <- family() if (is.null(family$family)) { print(family) stop("'family' not recognized") } fitter0<-"glm" } s1<-strsplit(as.character(formula)[3],"\\+")[[1]] #separa i termini "additivi".. idC<-sapply(sapply(lapply(s1, function(x) grep("seg\\(",x)), function(x) (x>=1)), isTRUE) stringa<-s1[idC] #solo i termini con seg if(any(sapply(stringa, function(.x) grepl("\\* seg\\(", .x)))) stop("invalid usage of symbol '*' in conjunction with seg()") if(any(sapply(stringa, function(.x) grepl("\\:seg\\(", .x)))) stop("invalid usage of symbol ':' in conjunction with seg()") if(any(sapply(stringa, function(.x) grepl("\\):", .x)))) stop("invalid usage of symbol ':' in conjunction with seg()") if(any(sapply(stringa, function(.x) grepl("\\) \\*", .x)))) stop("invalid usage of symbol '*' in conjunction with seg()") call <- match.call() if (missing(data)) data <- environment(formula) tf <- terms(formula, specials = "seg") id.ps<-attr(tf,"specials")$seg #posizione nel modelframe; vettore se ci sono piu' termini..include y ma non da interc mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action"), names(mf), 0L) #"offset", mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") names(mf)[2]<-"formula" #serve se NON hai usato "formula" mf <- eval(mf, parent.frame()) mt <- attr(mf, "terms") intercMt<-attr(mt,"intercept") interc<-intercMt==1 Y <- model.response(mf, "any") if (length(dim(Y)) == 1L) { nm <- rownames(Y) dim(Y) <- NULL if (!is.null(nm)) names(Y) <- nm } if(!is.null(transf)) { Y.orig <- Y Y <- eval(parse(text=transf), list(y=Y)) transf.inv<-splinefun(Y, Y.orig, ties=min, method="monoH.FC") } if(is.null(alpha)) alpha<- max(.05, 1/length(Y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) #browser() .xlivelli<-.getXlevels(mt, mf) weights <- as.vector(model.weights(mf)) if(!is.null(weights) && !is.numeric(weights)) stop("'weights' must be a numeric vector") if(!is.null(weights) && any(weights < 0)) stop("negative weights not allowed") offs <- as.vector(model.offset(mf)) #browser() #funziona sia nella formula che come argomento? testo.ps <-names(mf)[id.ps] nomiCoefUNPEN<-names(mf)[-c(1,id.ps)] X <- if(!is.empty.model(mt)){ model.matrix(mt, mf, contrasts) } else {stop("error in the design matrix")}#matrix(, NROW(Y), 0L) attrContr<-attr(X, "contrasts") n<-nrow(X) #browser() #=========================================================================== #se NON ci sono termini ps if(length(id.ps)<=0) stop("No seg() term. Please, use lm() or glm()") #se ci sono termini ps. drop.id<-lambda<-S<-B<-BB<-nomiCoefPEN<-nomiVariabPEN<-NULL l<-lapply(testo.ps,function(xx)with(mf,eval(parse(text=xx),envir=data))) nomiPS <- drop(unlist(sapply(l,function(xx)attr(xx,"nomeX")))) psiList <-lapply(l,function(xx)attr(xx,"psi")) npsiList <- lapply(l,function(xx) attr(xx,"npsi")) estList <- lapply(l,function(xx) attr(xx,"est")) if(length(setdiff(drop(unlist(sapply(estList, function(.x){if(any(is.na(.x))) 0 else unique(.x)}))), c(0,1)))>0) stop(" 'est' should include 0's and 1's only") #if(length(intersect(nomiCoefUNPEN, nomiPS))>0) stop("any segmented variable included as linear term too?") RList <- lapply(l,function(xx) attr(xx,"R")) fixpsiList <- lapply(l,function(xx) attr(xx,"fix.psi")) fxList <- lapply(l,function(xx) attr(xx,"f.x")) byList <- lapply(l,function(xx) attr(xx,"by")) nomiBy <- unlist(sapply(l,function(xx) attr(xx,"nomeBy"))) levelsBy <- lapply(l,function(xx) attr(xx,"levelsBy")) names(byList) <- nomiBy #browser() ## questo e' un tentativo per tenere conto di seg(X, npsi=c(2,1,2...)) if(length(nomiPS)>1 && length(npsiList)==1 && length(npsiList[[1]])== length(nomiPS) ) { id.ps <- seq(id.ps, l=length(nomiPS)) npsiList<-sapply(npsiList[[1]], as.list) estList <- rep(estList, length(nomiPS)) psiList <- rep(psiList, length(nomiPS)) byList <- rep(byList, length(nomiPS) ) nomiBy <- rep(nomiBy, length(nomiPS) ) RList <- rep(RList, length(nomiPS) ) fixpsiList <- rep(fixpsiList, length(nomiPS) ) fxList <- rep(fxList, length(nomiPS) ) levelsBy <-rep(levelsBy, length(nomiPS) ) mf<- as.data.frame(sapply(1:ncol(mf), function(.x) mf[.x])) names(mf)[id.ps]<-nomiPS } if(length(nomiPS)!=length(npsiList)){ #prima era length(psiList), ma dovrebbe essere lo stesso.. L'ho cambiato per tener conto di X matrice.. id.ps <- seq(id.ps, l=length(nomiPS)) npsiList <- rep(npsiList, length(nomiPS) ) estList <- rep(estList, length(nomiPS)) psiList <- rep(psiList, length(nomiPS)) byList <- rep(byList, length(nomiPS) ) nomiBy <- rep(nomiBy, length(nomiPS) ) RList <- rep(RList, length(nomiPS) ) fixpsiList <- rep(fixpsiList, length(nomiPS) ) fxList <- rep(fxList, length(nomiPS) ) levelsBy <-rep(levelsBy, length(nomiPS) ) mf<- as.data.frame(sapply(1:ncol(mf), function(.x) mf[.x])) names(mf)[id.ps]<-nomiPS } if(all(sapply(levelsBy, is.null)) && (length(npsiList)!=length(nomiPS))) stop(" 'npsi' is not correctly specified") #testo.ps <-names(mf)[id.ps] nomiCoefUNPEN<-names(mf)[-c(1,id.ps)] X <- if(!is.empty.model(mt)){ model.matrix(mt, mf, contrasts) } else {stop("error in the design matrix")}#matrix(, NROW(Y), 0L) attrContr<-attr(X, "contrasts") n<-nrow(X) limZ<- rangeSmooth<-mVariabili<-NULL #se ci sono termini ps()+ps(..,by) il nome delle variabili smooth vengono cambiati per aggiungere la variabile by nomiPS.orig <- nomiPS nomiPS.By <-paste(nomiPS, nomiBy, sep=":") nomiPS <- unlist(lapply(nomiPS.By, function(.x) sub(":NULL", "", .x))) ####se la stessa variabile e' specificata come ps o termine lineare.. #if(length(intersect(nomiPS,nomiCoefUNPEN))>=1) stop("The same variable specified in seg() and as linear term") #ATTENZIONE.. se vuoi subito costruire i nomi ps(x), ps(x):z, ecc...usa i: nomiPS.ps<- sapply(nomiPS.orig, function(.x)paste("seg(",list(.x),")",sep="")) nomiPS.ps<-unlist(lapply(paste(nomiPS.ps, nomiBy, sep=":"), function(.x) sub(":NULL", "", .x))) nomiPS.ps.list<-as.list(nomiPS.ps) #serve lista for(j in id.ps) mVariabili[length(mVariabili)+1]<-mf[j] B<- Bfix <- nomiPS.ps.int.list<- nomiPS.ps.int.list.All<- vector(length=length(mVariabili) , "list") #BFixed<-BB<-Bderiv #browser() variabiliBY<-vector("list", length(mVariabili)) for(j in 1:length(mVariabili)) { if(nomiBy[j]=="NULL"){ # se usuale termine seg() nomiBy.j <- NULL variabileSmooth<- c(mVariabili[[j]]) #c() converte le matrici in vettori, drop() no..! variabileSmooth<- fxList[[j]](variabileSmooth) #for(jj in c("nomeX", "psi", "npsi", "f.x", "nomeBy")) attr(variabileSmooth,jj)<-NULL B[[j]]<- variabileSmooth rangeSmooth[[j]] <- range(variabileSmooth) limZ[[j]] <- quantile(variabileSmooth, names=FALSE, probs=c(alpha[1],alpha[2])) nomiPS.ps.int.list[[j]]<- nomiPS[j] nomiPS.ps.int.list.All[[j]]<- nomiPS.orig[j] } else { #se ci sono termini by if(is.null(levelsBy[[j]])){ #se e' vc con variabile continua nomiBy.j <- nomiBy[j] B[[j]] <- variabileSmooth<- mVariabili[[j]][,1,drop=TRUE] #variabiliBY[[j]] <- mVariabili[[j]][, -1,drop=TRUE] #byList include le variabili by.. #nomiCoefPEN[[j]]<- sapply(1:ncol(B[[j]]), function(x) gsub(":", paste(".",x, ":", sep="") , nomiPS.ps[j])) rangeSmooth[[j]] <- range(variabileSmooth) limZ[[j]] <- quantile(variabileSmooth, names=FALSE, probs=c(alpha[1],alpha[2])) nomiPS.ps.int.list[[j]]<- nomiPS[j] nomiPS.ps.int.list.All[[j]]<- paste(nomiPS.orig[j], strsplit(nomiBy[j], ",")[[1]], sep=":") if(any(strsplit(nomiBy[j], ",")[[1]]=="")){ #se c'e' un termine senza nome (seg(tt, by=cbind(1,x))) .id <- which(strsplit(nomiBy[j], ",")[[1]]=="") for(.idj in 1:length(.id)) nomiPS.ps.int.list.All[[j]][.idj] <- gsub(":", "", nomiPS.ps.int.list.All[[j]][.idj]) } } else {#se e' VC con variabile categoriale nomiBy.j <- nomiBy[j] variabileSmooth<- mVariabili[[j]][,-ncol(mVariabili[[j]]),drop=TRUE] variabileSmooth<- fxList[[j]](variabileSmooth) #attr( mVariabili[[j]], "f.x")(variabileSmooth) variabileBy<- mVariabili[[j]][, ncol(mVariabili[[j]]),drop=TRUE] M<-model.matrix(~0+factor(variabileBy)) B[[j]]<- lapply(1:ncol(M), function(.x) M[,.x]*variabileSmooth) rangeSmooth[[j]] <- lapply(B[[j]], function(.x) range(.x[.x!=0])) limZ[[j]] <- lapply(B[[j]], function(.x) quantile(.x[.x!=0], names=FALSE, probs=c(alpha[1],alpha[2]))) #browser() cond1 <- is.list(psiList[[j]]) cond2 <- length(names(psiList[[j]]))==length(levelsBy[[j]]) cond3<- length(setdiff(names(psiList[[j]]),levelsBy[[j]]))==0 if(cond1&& cond2 && cond3) psiList[[j]]<- psiList[[j]][levelsBy[[j]]] nomiPS.ps.list[[j]] <-paste(nomiPS.ps[j], levelsBy[[j]], sep="") #"seg(age):sex1" "seg(age):sex2" nomiPS.ps.int.list[[j]]<-gsub("[)]", "", gsub("seg[(]", "", nomiPS.ps.list[[j]])) #age:sexM", "age:sexF" nomiPS.ps.int.list.All[[j]]<- nomiPS.ps.int.list[[j]] } } } #end for(j in 1:length(mVariabili)) allNameOK <- unlist(nomiPS.ps.int.list.All) if(length(allNameOK)!=length(unique(allNameOK))) stop("some error in specification of seg() terms") #browser() repl<-pmax(sapply(B,length)*sapply(B,is.list),1) for(i in 1:length(npsiList)){ if(length(npsiList[[i]])==1) { npsiList[[i]] <- rep(npsiList[[i]], repl[i]) if(!is.list(estList[[i]]) && !is.null(levelsBy[[i]])) estList[[i]] <- rep(estList[i], repl[i]) } if(length(nomiPS.ps.int.list[[i]])!=length(npsiList[[i]])) stop(paste(" 'npsi' (its length) is not correctly specified in the seg term:",i)) if(!is.null(names(npsiList[[i]]))){ if(length(setdiff(nomiPS.ps.int.list[[i]], names(npsiList[[i]])))!=0) stop(paste(" 'npsi' (its names) is not correctly specified in the seg term:",i)) } } npsiList <- unlist(npsiList) if(!any(sapply(psiList,is.list))) psiList <- rep(psiList, repl) if(!any(sapply(estList,is.list))) estList <- rep(estList, repl) if(!any(sapply(RList,is.list))) RList <- rep(RList, repl) if(!any(sapply(fixpsiList,is.list))) fixpsiList<- rep(fixpsiList, repl) nomiPS.orig <- rep(nomiPS.orig, repl) Bfix <- rep(Bfix, repl) fixpsiList<- rep(fixpsiList, repl) while(any(sapply(B,is.list))){ id.vc<-which((sapply(B, is.list)))[1] nc<-length(B[[id.vc]]) B<-append(B, B[[id.vc]], after = id.vc-1) #for(i in 1:length(B[[id.vc+nc]])) BB<-append(BB, BB[id.vc], id.vc-1) B[[id.vc+nc]]<-NULL #BB[[id.vc+nc]]<-NULL nomiCoefPEN <- append(nomiCoefPEN, nomiCoefPEN[[id.vc]], after = id.vc-1) nomiCoefPEN[[id.vc+nc]]<-NULL psiList <- append(psiList, psiList[[id.vc]], after = id.vc-1) psiList[[id.vc+nc]]<-NULL rangeSmooth <- append(rangeSmooth, rangeSmooth[[id.vc]], after = id.vc-1) rangeSmooth[[id.vc+nc]]<-NULL limZ <- append(limZ, limZ[[id.vc]], after = id.vc-1) limZ[[id.vc+nc]]<-NULL estList <- append(estList, estList[[id.vc]], after = id.vc-1) estList[[id.vc+nc]]<-NULL RList <- append(RList, RList[[id.vc]], after = id.vc-1) RList[[id.vc+nc]]<-NULL fixpsiList <- append(fixpsiList, fixpsiList[[id.vc]], after = id.vc-1) fixpsiList[[id.vc+nc]]<-NULL #se la lista contiene solo NULL, non funziona... #penMatrixList <- append(penMatrixList, penMatrixList[[id.vc]], after = id.vc-1) #penMatrixList[[id.vc+nc]]<-NULL } #browser() #if(!all(sapply(estList,check.estPsi))) stop(" 'est' is misspecified in one or more seg() term") nomiTerminiSEG<-nomiCoefPSI <- nomiCoefU <- NULL nomiPS.ps.unlist.seg <- unlist(nomiPS.ps.list) nomiPS.ps.unlist <- sub("[)]", "", sub("seg[(]", "",nomiPS.ps.unlist.seg )) names(psiList)<- nomiPS.ps.unlist for(i in 1:length(B)) { #nomiCoefPSI[[i]]<- paste(paste("psi",1:length(psiList[[i]]), sep=""), nomiPS.ps.unlist[i], sep=".") ##oppure sep=".psi" nomiTerminiSEG[[i]]<-rep(nomiPS.ps.unlist[i], length(psiList[[i]])) } #nomiCoefU<-lapply(nomiCoefPSI, function(.x) sub("psi","U",.x )) #nomiCoefZ<-lapply(nomiCoefPSI, function(.x) sub("psi","Z",.x )) nomiSeg<- unique(unlist(nomiTerminiSEG)) #browser() #FINALMENTE (speriamo..:-)) #nomiCoefZ, nomiCoefpsi, nomiCoefU sono liste con nomi che includono sia le possibili interazioni, sia il n. dei breakpoints #Anche nomiTerminiSEG e' della stessa dimensione ma i nomi ignorano il n.dei breakpoints (questa serve per rangeZ) #nomiSeg #npsiList #psiList #estList #RList # if(is.null(names(estList))) { # names(estList)<-nomiSeg # } else { # if(any(sapply(names(estList), function(.x).x==""))) stop(" 'estList' is only partially named. # Or all or no name allowed.") # } #browser() if(any(sapply(estList, is.list))) stop(" One or more 'est' components misspecified") npsiList1<-id.contrR <- rep(NA, length(B)) for(j in 1:length(B)){ K <- if(!is.na(npsiList[nomiSeg[j]])) npsiList[nomiSeg[j]] else npsiList[j] npsiList1[j]<- K if(any(is.na(psiList[[j]]))){ if(control$quant) { psiList[[j]]<- quantile(B[[j]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE) } else { psiList[[j]]<- (min(B[[j]])+ diff(range(B[[j]]))*(1:K)/(K+1)) } } else { K<-npsiList1[j]<-length(psiList[[j]]) } if(!is.null(fixpsiList[[j]])) { Bfix[[j]]<- sapply(sort(fixpsiList[[j]]), function(.x) 1*(B[[j]]>.x)) #colnames(Bfix[[j]])<- paste("U", 1:length(fixpsiList[[j]]),".fixed.",nomiPS.orig[j], sep="") colnames(Bfix[[j]])<- paste("U", 1:length(fixpsiList[[j]]),".fixed.",nomiPS.ps.unlist[j], sep="") } #se per qualche termine ci sono le matrici dei vincoli sulle slope j.ok=match(nomiSeg[j], names(RList), nomatch=0) j.ok <-if(j.ok>0) j.ok else j if(!any(is.na(RList[[j.ok]]))){ RList[[j]] <- RList[[j.ok]] id.contrR[j] <-TRUE } else { j.ok=match(nomiSeg[j], names(estList), nomatch=0) j.ok <-if(j.ok>0) j.ok else j if(!any(is.na(estList[[j.ok]]))){ if(length(estList[[j.ok]])!=(K+1)) stop(" 'est' is not compatible with 'n.psi' ") #browser() RList[[j]]<-diag(K+1)[,estList[[j.ok]]==1,drop=FALSE] id.contrR[j] <-TRUE } else { RList[[j]]<-diag(K+1) id.contrR[j] <-FALSE } } #nomiCoefPSI[[j]]<- paste(paste("psi",1:length(psiList[[j]]), sep=""), nomiPS.ps.unlist[j], sep=".") #nomiCoefU[[j]]<- paste(paste("U",1:length(psiList[[j]]), sep=""), nomiPS.ps.int.list.All[[j]], sep=".") #nomiTerminiSEG[[j]]<-rep(nomiPS.ps.unlist[j], length(psiList[[j]])) } for(i in 1:length(B)) { nomiCoefPSI[[i]]<- paste(paste("psi",1:length(psiList[[i]]), sep=""), nomiPS.ps.unlist[i], sep=".") ##oppure sep=".psi" nomiTerminiSEG[[i]]<-rep(nomiPS.ps.unlist[i], length(psiList[[i]])) } #browser() nomiCoefU<-lapply(nomiCoefPSI, function(.x) sub("psi","U",.x )) nomiCoefZ<-lapply(nomiCoefPSI, function(.x) sub("psi","Z",.x )) # npsii <- sapply(psiList,length) id.psi.group <- rep(1:length(psiList), npsii) Z<- lapply(1:length(B), function(.x) matrix(B[[.x]], nrow=n, ncol=npsiList1[[.x]])) Z<- do.call(cbind,Z) #NB11111 colnames(Z) <- unlist(nomiCoefZ) #unlist(nomiTerminiSEG) colnames(Z) <- unlist(nomiTerminiSEG) #nomiPS, nomiPS.By, nomiPS.orig, nomiPS.ps, nomiPS.ps.list, nomiCateg, nomiInterCateg, nomiCoefPEN #nomiPS: "x", "z" (vettore) #nomiPS.orig: come "nomiPS" #Se ci sono interazioni (by) #nomiPS "x:g" #nomiPS.orig: "x", "x", "x".. La stessa variabile ripetuta per il n.dei gruppi # #nomiPS.ps: "seg(x)", "seg(z)" (vettore) [con by: "seg(x):g"] #nomiPS.ps.list "seg(x)", "seg(z)" (lista) [lista con by: "seg(x):g1" "seg(x):g2" "seg(x):g3" ..] #nomiInterCateg: "x:g1" "x:g2" "x:g3" .. #======================================================================================================== #browser() X<- X[, !startsWith(colnames(X),"seg("), drop=FALSE] idZ <- unlist(tapply(id.psi.group, id.psi.group, function(.x) c(TRUE, rep(FALSE, length(.x)-1)))) Z.ok<-Z[, idZ, drop=FALSE] colnames(Z.ok) <- nomiPS.ps.unlist #X<-cbind(X, Z.ok) #Z.ok include anche i termini lineari delle variabili segmented #colnames(Z)<- unlist(nomiCoefPEN) initial <- unlist(psiList) PSI <- matrix(initial, n, length(initial), byrow = TRUE) #browser() #NB la matrie del disegno X include in nomi "seg(x)" e non va bene perche' poi da problemi con i #nomi dei coef dell'oggetto.. Quindi bisogna sostituire questi nomi!!! #non serve perche gia' i nomi sono ok.. #id.segX <-grep( "seg[(]" , colnames(X)) #colnames(X)[id.segX]<-gsub("[)]", "", gsub("seg[(]", "", colnames(X)[id.segX])) if(any(!sapply(Bfix, is.null))){ X<-cbind(X, do.call(cbind, Bfix)) } #browser() #colnames(X)[unlist(id.psList)] <- nomiPS.orig #X[,nomiPS.orig] <- Z[, unique(colnames(Z)), drop=FALSE] #nomiCoefPEN include i nomi le interazioni con i livelli (nel caso vc) e anche del numero dei psi #[1] "U1.x" "U2.x" "U1.z" if(is.null(weights)) weights<-rep(1,n) orig.offs<-offs if(is.null(offs)) offs<-rep(0,n) invXtX<-Xty<-NULL if(is.null(alpha)) alpha<- max(.05, 1/length(Y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) #rangeZ <- apply(Z, 2, range) #Z ha molti 0 se e' il prodotto con dummy (quando c'e' seg(x, by)) limZ<-do.call(cbind, lapply(limZ, function(.x){if(is.list(.x)) do.call(cbind, .x) else cbind(.x)} )) #limZ<-matrix(sapply(1:length(npsii), function(.x) rep(limZ[,.x],npsii[.x])), nrow=2, byrow = FALSE) limZ <- do.call(cbind, lapply(1:length(npsii), function(.x) matrix(limZ[,.x],nrow=2,ncol=npsii[.x]))) rangeZ <- do.call(cbind, lapply(1:length(npsii), function(.x) matrix(rangeSmooth[[.x]],nrow=2,ncol=npsii[.x]))) #browser() colnames(rangeZ) <- unlist(nomiTerminiSEG) #sapply(byList, function(.x) {if(length(.x)>0 && !is.matrix(.x)) 1 else ncol(.x)}) #browser() #13/03/24: ho tolto dev0=var(Y)*(n-1) perche' # con Pois con contegg bassi viene molto piccola!! e quindi l'algoritmo non partiva! # e comunque si dovrebbe chiamare L0 # #browser() byList <- lapply(byList, function(.x) if(is.vector(.x)) matrix(.x) else .x) opz<-list(toll=toll,h=h,stop.if.error=stop.if.error, display=display,it.max=it.max,nomiOK=unlist(nomiCoefU), usestepreg=TRUE, fam=family, eta0=NULL, maxit.glm=maxit.glm, id.psi.group=id.psi.group, gap=gap, limZ=limZ, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, pow=pow, #visualBoot=visualBoot, digits=digits, fc=fc, RList=RList, nomiSeg=nomiSeg, seed=control$seed, npsii=npsii, agg=agg, byList=byList,rangeZ=rangeZ, tol.opt=control$tol.opt) #browser() if(any(sapply(levelsBy, is.null)) && any(!sapply(byList, is.null))){ #se ci sono Struct Changes idSC<-TRUE if(fitter0=="lm"){ if(n.boot <=0 ) { obj <- step.lm.fitSC(Y, X, Z, PSI, weights, offs, opz) return(obj) } else { obj <- step.lm.fitSC.boot(Y, X, Z, PSI, weights, offs, opz, n.boot = n.boot, size.boot = size.boot, random = random, break.boot = break.boot) seed<- obj$seed } } else { if(n.boot <=0 ) { obj <- step.glm.fitSC(Y, X, Z, PSI, weights, offs, opz) return(obj) } else { obj <- step.glm.fitSC.boot(Y, X, Z, PSI, weights, offs, opz, n.boot = n.boot, size.boot = size.boot, random = random, break.boot = break.boot) seed<- obj$seed } } } else { idSC<-FALSE if(any(id.contrR)){ if(fitter0=="lm"){ if(n.boot <= 0) { obj <- stepConstr.lm.fit(Y, X, Z, PSI, weights, offs, opz) } else { obj <- stepConstr.lm.fit.boot(Y, X, Z, PSI, weights, offs, opz, n.boot = n.boot, size.boot = size.boot, random = random, break.boot = break.boot) seed<- obj$seed } class0<- "lm" if(obj$obj$df.residual==0) warning("no residual degrees of freedom (other warnings expected)", call.=FALSE) } else { if(n.boot<=0){ obj <-stepConstr.glm.fit(Y, X, Z, PSI, weights, offs, opz) } else { obj <-stepConstr.glm.fit.boot(Y, X, Z, PSI, weights, offs, opz, n.boot=n.boot, size.boot=size.boot, random=random, break.boot=break.boot) seed<- obj$seed } class0<-c("glm","lm") } } else { if(fitter0=="lm"){ if(n.boot <= 0) { obj <- step.lm.fit(Y, X, Z, PSI, weights, offs, opz) } else { #browser() obj <- step.lm.fit.boot(Y, X, Z, PSI, weights, offs, opz, n.boot = n.boot, size.boot = size.boot, random = random, break.boot = break.boot) seed<- obj$seed } class0<-"lm" #if(obj$obj$df.residual==0) warning("no residual degrees of freedom (other warnings expected)", call.=FALSE) } else { if(n.boot<=0){ obj <-step.glm.fit(Y, X, Z, PSI, weights, offs, opz) } else { obj <-step.glm.fit.boot(Y, X, Z, PSI, weights, offs, opz, n.boot=n.boot, size.boot=size.boot, random=random, break.boot=break.boot) seed<- obj$seed } class0<-c("glm","lm") } } } #browser() #da modificare... vedi stepmented.lm o stepmented.glm if(!is.list(obj)){ warning("No breakpoint estimated", call. = FALSE) return(invisible(NULL)) } # id.warn <- obj$id.warn it <- obj$it epsilon <- obj$epsilon psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart #id.psi.group<-obj$id.psi.group #id.psi.group c'e' gia'.... npsi.groups <- tapply(id.psi.group, id.psi.group, length) #Nelle funzioni step servono solo i psi e le U (i beta.c NON servono) #i psi sono gia' ordinati #psi<-unlist(tapply(psi, id.psi.group, sort)) Z0 <-apply(Z,2,sort) npsi<- sum(npsii) ris.psi<-cbind(Est.=psi, St.Err=NA) name.Z <- unlist(nomiTerminiSEG) #e nel caso di Structural changes? U <- obj$U #le U sono calcolate sui psi non-rounded, pero' mi sa che e' lo stesso... psi.rounded<-sapply(1:npsi, function(j) Z0[sum(Z0[,j]psiTry[j]) Lnew <- final.fit.f(fitter0) if(Lnewpsi.rounded[1,j]) } } } psi.rounded <- psi.roundedOK } #se vuoi fare un controllo anche sulla soluzione precedente #questo potrebbe servire per aumentare un po' la toll di optimize()??? # psi.rounded<-sapply(1:npsi, function(j) Z0[sum(Z0[,j]psiTry1[j]) # Lnew1 <- final.fit.f(fitter0) # psiTry2[j] <- psi.rounded[3,j] #il successivo # U[,j]<- 1*(Z[,j]>psiTry2[j]) # Lnew2 <- final.fit.f(fitter0) # # psiOK.j <- c(psiTry1[j], psi.roundedOK[2,j] , psiTry2[j])[which.min(Lnew1, L0, Lnew2)] # U[,j]<- 1*(Z[,j]>psiOK.j) # psi.roundedOK[1,j]<-psiOK.j # } # # all.psi.ok<-psi.roundedOK[1,] #include le soluzioni # #il problema e' # id=sapply(1:ncol(psi.rounded), function(.x) which(all.psi.ok[.x]==data.frame(psi.rounded)[[.x]])) # psi.roundedOK<-sapply(1:ncol(psi.rounded), function(.x) psi.rounded[id[.x]+0:1,.x]) # colnames(U)<- nomiU <-unlist(nomiCoefU) nomiVxb <- unlist(nomiCoefPSI) nomiV<- gsub("psi", "V", nomiVxb) colnames(psi.rounded)<-rownames(ris.psi)<-names(psi)<-nomiVxb rownames(psi.rounded)<-c("inf [","sup (") se.psi<-rep(NA, npsi) if(fitter0=="lm"){ class0<-"lm" objV <- if(is.null(weights)) lm.fit(cbind(X, U), Y, offset = offs) else lm.wfit(cbind(X, U), Y, weights, offset = offs) objV$df.residual <- objV$df.residual- length(psi) L0 <- sum(weights*objV$residuals^2) if(var.psi) { s2 <- L0/objV$df.residual R <- chol2inv(objV$qr$qr) se.psi <- sqrt(diag(R)*s2)[match(nomiVxb, names(coef(objV)),0)] } } else { class0<-c("glm", "lm") eta0 <- attr(obj$SumSquares.no.gap, "eta") #obj$eta0 objV <- try(suppressWarnings(glm.fit(cbind(X, U), y = Y, offset = offs, weights = weights, family = opz$fam, #control = glm.control(maxit = maxit.glm), etastart = eta0)), silent = TRUE) #obj$obj$linear.predictors objV$df.residual <- objV$df.residual- length(psi) L0 <- objV$deviance if (length(offs) && attr(mt, "intercept") > 0L) { #se c'e' un offset devi calcolare la null.deviance (come fa glm()) obj0 <- try(suppressWarnings(glm.fit(X[, "(Intercept)", drop = FALSE], y = Y, offset = offs, weights = weights, family = opz$fam, #control = glm.control(maxit = maxit.glm), etastart = eta0, intercept=TRUE)), silent = TRUE) # obj0 <- eval(call(if (is.function(method)) "method" else method, # x = X[, "(Intercept)", drop = FALSE], y = Y, mustart = fit$fitted.values, # weights = weights, offset = offset, family = family, # control = control, intercept = TRUE)) if (!obj0$converged) warning("fitting to calculate the null deviance did not converge -- increase 'maxit'?") objV$null.deviance <- obj0$deviance } if(var.psi) { R <- chol2inv(objV$qr$qr) s2 <- 1 if(!opz$fam$fam%in%c("poisson","binomial")) s2<- L0/objV$df.residual se.psi <- sqrt(diag(R)*s2)[match(nomiVxb, names(coef(objV)),0)] } } if(any(id.new.result) && display) cat(" Better objective found:", L0, "at psi =", psi.rounded[1,], "\n") objV$rank <- objV$rank + length(psi) if(!is.null(objV$aic)){ objV$aic <- objV$aic + 2*length(psi) } #objV$nameUV <- list(U = drop(nomiU), V = nomiV, Z = name.Z) #Z = name.Z #browser() #objV$nameUV$formulaSegAllTerms<- paste("~",paste(nomiSeg, collapse="+")) #paste("~", paste(sapply(all.seg.form, function(.x) strsplit(paste(.x), "~"))[2,],collapse="+")) objV$rangeZ<-obj$rangeZ objV$call <- match.call() objV$psi<-ris.psi objV$psi.history <- psi.values objV$psi.rounded <- psi.rounded if(n.boot>0) objV$seed <- seed #browser() objV$Z <- Z.ok #Z[,unique(name.Z),drop=FALSE] all.seg.form<-NULL mf1<-mf[1] for(i in 2:ncol(mf)) { if(i %in% id.ps){ l<-attributes(mf[[i]]) if(!is.null(l$by)){ if(!l$nomeBy%in%names(mf)){ m<-data.frame(mf[[i]][,1],l$by) colnames(m) <- c(l$nomeX, l$nomeBy) } else { m<-data.frame(mf[[i]][,1]) colnames(m) <- l$nomeX } all.seg.form[[length(all.seg.form)+1]]<-as.formula( paste("~0+", l$nomeX, "*", l$nomeBy, "-", l$nomeX)) } else { m <- data.frame(mf[[i]]) colnames(m) <- l$nomeX all.seg.form[[length(all.seg.form)+1]]<- as.formula(paste("~", l$nomeX)) } } else { m <- mf[i] } mf1<-cbind(mf1, m) } names(all.seg.form)<-nomiPS #costruisci la formulaLin.. Attenzione non tiene conto di eventuali vincoli sulle pendenze. splitFo <- strsplit(as.character(formula),"[+]") #allX.lin<-paste(c(splitFo[[3]][-grep("seg[(]", splitFo[[3]])], unique(nomiPS.orig)), collapse="+") #anche le variabili seg termLin <- splitFo[[3]][-grep("seg[(]", splitFo[[3]])] if(length(termLin)>0){ #se ci sono altre variabili lineari (i.e. non-seg) allX.lin <- paste(termLin , collapse="+") #solo i termini non-seg! formulaLin <- as.formula(paste(splitFo[[2]], splitFo[[1]], allX.lin)) #formula escluso i termini seg nomiVarLin<- setdiff(all.vars(formulaLin), splitFo[[2]]) #vettore di nomi ("x", "z" ) #termLin <- strsplit(allX.lin,"[+]")[[1]] #se allX.lin e' una formula, vettore di termini ("poly(x,2)", "z") Z.in.obj<-intersect(nomiVarLin, nomiSeg) #nomiSeg=name.Z if(length(Z.in.obj)>0){ f.x<-matrix(NA, 150, ncol(Z.ok[,Z.in.obj,drop=FALSE])) #prima era nrow(objF$Z) invece che 100 for(j in 1:length(Z.in.obj)){ termLin.ok <- grep(Z.in.obj[j], termLin, value=TRUE) dd<-data.frame(seq(min(Z.ok[,Z.in.obj[j]]), max(Z.ok[,Z.in.obj[j]]), l=nrow(f.x))) names(dd)<- Z.in.obj[j] M <- model.matrix(reformulate(termLin.ok, intercept=FALSE), data=dd) f.x[,j]<-M%*% coef(objV)[colnames(M)] } colnames(f.x)<-Z.in.obj objV$f.x<-f.x } } else { formulaLin <- if(interc) as.formula(paste(splitFo[[2]], splitFo[[1]], "1")) else as.formula(paste(splitFo[[2]], splitFo[[1]], "0")) } #browser() objV$nameUV <- list(U = drop(nomiU), V = nomiV, Z = name.Z) #objV$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiPS.orig) #nomiPS.orig?? objV$nameUV$formulaSeg<- all.seg.form objV$nameUV$formulaSegAllTerms<- paste("~", paste(sapply(all.seg.form, function(.x) strsplit(paste(.x), "~"))[2,],collapse="+")) objV$formulaLin<- formulaLin objV$terms <- mt objV$y<-Y if(x) objV$x <- X objV$contrasts <- attrContr objV$xlevels <- .xlivelli objV$it <- it objV$epsilon <- epsilon objV$id.warn <- id.warn objV<- structure(c(objV, list(offset=orig.offs))) class(objV)<-c("stepmented", class0) objV } segmented/R/seg.glm.fit.r0000644000176200001440000004310014726060241014726 0ustar liggesusersseg.glm.fit<-function(y,XREG,Z,PSI,w,offs,opz,return.all.sol=FALSE){ #------------------------- useExp.k=TRUE #----------------- #NB iniziato a modificare il 28/01 (dopo l'invio della 1.4-0) search.min <- function(h, psi, psi.old, X, y, w, offs) { psi.ok<- psi*h + psi.old*(1-h) #PSI <- matrix(rep(psi.ok, rep(n, length(psi.ok))), ncol = length(psi.ok)) PSI <- matrix(psi.ok, nrow=n, ncol = length(psi.ok), byrow = TRUE) U1 <- (Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(suppressWarnings(glm.fit(x = cbind(X, U1), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[it]), etastart = eta0)), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$dev attr(L1, "eta") <- obj1$linear.predictor L1 } #---------------- # est.k<-function(x1,y1,L0){ # ax<-log(x1) # .x<-cbind(1,ax,ax^2) # b<-drop(solve(crossprod(.x),crossprod(.x,y1))) # const<-b[1]-L0 # DD<-sqrt(b[2]^2-4*const*b[3]) # kk<-exp((-b[2]+ DD) /(2*b[3])) # return(round(kk)) # # # ff<-function(xx) b[1]+b[2]*xx + b[3]*xx^2+ L0 # # a<-uniroot(ff, c(log(x[4]), 3.4)) # } # #----------------- # dpmax<-function(x,y,pow=1){ # #deriv pmax # if(pow==1) -(x>y) #ifelse(x>y, -1, 0) # else -pow*((x-y)*(x>y))^(pow-1)#-pow*pmax(x-y,0)^(pow-1) # } #-------------------- in.psi<-function(LIM, PSI, ret.id=TRUE){ #check if psi is inside the range a<-PSI[1,]LIM[2,] is.ok<- !a & !b #TRUE se psi e' OK if(ret.id) return(is.ok) isOK<- all(is.ok) && all(!is.na(is.ok)) isOK} #------------ far.psi<-function(Z, PSI, id.psi.group, ret.id=TRUE, fc=.93) { #check if psi are far from the boundaries ..s # returns TRUE, if fine. #id.far.ok<-sapply(unique(id.psi.group), function(.x) (table(rowSums(((Z>PSI)[,id.psi.group==.x,drop=FALSE])))>=2)[-1]) #[-1] esclude lo zero, xPSI)[,id.psi.group==.x,drop=FALSE]))+1)>=2)[-1]) #[-1] esclude lo zero, xPSI" non se ne accorge, mentre Z>=PSI, si.. Il contrario e vero con estremo inf e Z>PSI nSeg<-length(unique(id.psi.group)) npsij<-tapply(id.psi.group,id.psi.group,length) nj<-sapply(unique(id.psi.group), function(.x) { tabulate(rowSums((Z>PSI)[,id.psi.group==.x,drop=FALSE])+1) }, simplify = FALSE) ff<-id.far.ok<-vector("list",length=nSeg) for(i in 1:nSeg){ if(length(nj[[i]])!=npsij[i]+1) nj[[i]]<- tabulate(rowSums((Z>=PSI)[,id.psi.group==i,drop=FALSE])+1) id.ok<-(nj[[i]] >= 2) id.far.ok[[i]] <- id.ok[-length(id.ok)] #& id.ok[-1] #esattamente uguale al numero di psi del gruppo i ff[[i]]<-ifelse(diff(nj[[i]])>0, 1/fc, fc) } id.far.ok<-unlist(id.far.ok) ff<-unlist(ff) if(!ret.id) { return(all(id.far.ok)) } else { attr(id.far.ok,"factor") <- ff return(id.far.ok) } #if(ret.id) return(id.far.ok) else return(all(id.far.ok)) } #end far.psi #----------- adj.psi<-function(psii, LIM) {pmin(pmax(LIM[1,],psii),LIM[2,])} #----------- eta0<-opz$eta0 fam<-opz$fam maxit.glm<-opz$maxit.glm #-------------- n<- if(is.matrix(y)) nrow(y) else length(y) min.step<-opz$min.step #rangeZ <- apply(Z, 2, range) alpha<-opz$alpha rangeZ <- if(is.null(opz$rangeZ)) apply(Z, 2, range) else opz$rangeZ limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=c(alpha[1],alpha[2])) else opz$limZ psi<-PSI[1,] #browser() psi<- adj.psi(psi, limZ) PSI<- matrix(psi,nrow=n, ncol=ncol(PSI), byrow=TRUE) id.psi.group<-opz$id.psi.group #conv.psi<-opz$conv.psi digits<-opz$digits pow<-opz$pow nomiOK<-opz$nomiOK toll<-opz$toll hh<-opz$h gap<-opz$gap #fix.npsi<-opz$fix.npsi fix.npsi<-opz$stop.if.error dev.new<-opz$dev0 visual<-opz$visual it.max<-old.it.max<-opz$it.max fc<-opz$fc names(psi)<-id.psi.group it <- 0 epsilon <- 10 k.values<-dev.values<- NULL psi.values <-list() #psi.values[[length(psi.values) + 1]] <- NA #id.psi.ok<-rep(TRUE, length(psi)) sel.col.XREG<-unique(sapply(colnames(XREG), function(x)match(x,colnames(XREG)))) if(is.numeric(sel.col.XREG))XREG<-XREG[,sel.col.XREG,drop=FALSE] #elimina le ripetizioni, ad es. le due intercette.. #================== # invXtX<- opz$invXtX # Xty<-opz$Xty #=================== if(!in.psi(limZ, PSI,FALSE)) stop("starting psi out of the range.. see 'alpha' in seg.control", call.=FALSE) if(!far.psi(Z,PSI,id.psi.group,FALSE)) stop("psi starting values too close each other or at the boundaries. Please change them (e.g. set 'quant=TRUE' in seg.control()), or decrease their number.", call. = FALSE) n.psi<-n.psi1<-ncol(Z) #============================================== V <- (Z>PSI) U <- (Z-PSI)*V #pmax((Z - PSI), 0)^pow[1] V<- -V #if(pow[1]!=1) U<-U^pow[1] if(it.max==0){ obj <- suppressWarnings(glm.fit(x = cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, etastart = eta0)) colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") obj$coefficients <- c(obj$coefficients, rep(0, ncol(V))) obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, dev.no.gap = obj$dev, id.psi.group = id.psi.group, idU=ncol(XREG)+1:(length(psi)), id.warn = TRUE) return(obj) } #browser() if(!opz$usesegreg){ dev.values[length(dev.values) + 1] <- opz$dev0 #modello senza psi psi.values[[length(psi.values) + 1]] <- NA #nessun psi } if(is.null(opz$fit.psi0)){ obj <- suppressWarnings(glm.fit(x = cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, etastart = opz$eta0)) L0 <- obj$dev eta0 <- obj$linear.predictors } else { L0 <- opz$fit.psi0$L0 eta0 <- opz$fit.psi0$eta0 } n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi.values[[length(psi.values) + 1]] <- psi #psi iniziali #browser() #sum(opz$fam$dev.resids(y,mean(y),y)) if(is.null(maxit.glm)){ Nboot <- if(is.null(opz$Nboot)) 0 else opz$Nboot maxit.glm1 <- rep(1:it.max + Nboot, 1:it.max+1) #2*rep(1:it.max, 1:it.max) maxit.glm1 <- pmin(maxit.glm1, 25) } else { maxit.glm1 <- rep(maxit.glm, it.max) } # #============================================== if (visual) { cat(paste("iter = ", sprintf("%2.0f",0), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L0, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%5.0f", NA), " n.psi = ",formatC(length(unlist(psi)),digits=0,format="f"), " ini.psi = ",paste(formatC(unlist(psi),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #============================================== id.warn <- FALSE id.psi.changed<-rep(FALSE, it.max) tolOp <-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) idU <- ncol(XREG)+ 1:n.psi idV <- 1:n.psi + max(idU) #browser() while (abs(epsilon) > toll) { it<-it+1 n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if(n.psi1!=n.psi0){ V<- (Z > PSI) U <- (Z - PSI) * V V<- -V idU <- ncol(XREG)+ 1:n.psi1 idV <- 1:n.psi1 + max(idU) #if(pow[1]!=1) U<-U^pow[1] obj0 <- suppressWarnings(glm.fit(x = cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[it]), etastart = eta0)) eta0<-obj0$linear.predictors L0< - obj0$dev } # V <- dpmax(Z,PSI,pow=pow[2])# ifelse((Z > PSI), -1, 0) X <- cbind(XREG, U, V) #rownames(X) <- NULL #colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U", 1:ncol(U), sep = ""), paste("V", 1:ncol(V), sep = "")) obj <- suppressWarnings(glm.fit(X, y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[it]), etastart = eta0)) eta0<-obj$linear.predictors #beta.c <- coef(obj)[paste("U", 1:ncol(U), sep = "")] #obj$coefficients[idU] #gamma.c <- coef(obj)[paste("V", 1:ncol(V), sep = "")] #obj$coefficients[idV] beta.c <- obj$coefficients[idU] gamma.c <- obj$coefficients[idV] #if(it==2) browser() if(any(is.na(c(beta.c, gamma.c)))){ if(fix.npsi) { #stop("Estimates of beta or gamma are NA. Probably too many breakpoints being estimated.", call.=FALSE) if(return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } else { id.coef.ok<-!is.na(gamma.c) psi<-psi[id.coef.ok] if(length(psi)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } gamma.c<-gamma.c[id.coef.ok] beta.c<-beta.c[id.coef.ok] Z<-Z[, id.coef.ok, drop=FALSE] rangeZ <- rangeZ[,id.coef.ok, drop=FALSE] limZ <- limZ[,id.coef.ok, drop=FALSE] nomiOK<-nomiOK[id.coef.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.coef.ok] names(psi)<-id.psi.group } } psi.old<-psi psi <- psi.old + hh*gamma.c/beta.c psi<- adj.psi(psi, limZ) #limZ or rangeZ??? psi<-unlist(tapply(psi, id.psi.group, sort), use.names =FALSE) #############################aggiusta la stima di psi (nel range.. dopo in limZ) #DIREZIONE #if(it==2) browser() a <-optimize(search.min, c(0,1), psi=psi, psi.old=psi.old, X=XREG, y=y, w=w, offs=offs, tol=tolOp[it]) k.values[length(k.values) + 1] <- use.k <- a$minimum L1<- a$objective #L1.k[length(L1.k) + 1] <- L1<- a$objective psi <- psi*use.k + psi.old* (1-use.k) psi<- adj.psi(psi, limZ) if(!is.null(digits)) psi<-round(psi, digits) #PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) PSI <- matrix(psi, n, length(psi), byrow=TRUE) #--modello con il nuovo psi V <- (Z>PSI) U<-(Z-PSI)*V V<- -V eta0<- attr(a$objective, "eta") #if(pow[1]!=1) U1<-U1^pow[1] # obj1 <- try(suppressWarnings(glm.fit(cbind(XREG, U1), y = y, offset = offs, # weights = w, family = fam, control = glm.control(maxit = maxit.glm), etastart = eta0)), silent = TRUE) # L1<- if(class(obj1)[1]=="try-error") L0+10 else obj1$dev # use.k<-k<-1 # L1.k<-NULL # L1.k[length(L1.k)+1]<-L1 # # while(L1>L0){ # k<-k+1 # use.k <- if(useExp.k) 2^(k-1) else k # psi <- psi.old + (gamma.c/beta.c)/(use.k*h) # if(!is.null(digits)) psi<-round(psi, digits) # PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) # U1<-(Z-PSI)*(Z>PSI) # if(pow[1]!=1) U1<-U1^pow[1] # obj1 <- try(suppressWarnings(glm.fit(cbind(XREG, U1), y = y, offset = offs, # weights = w, family = fam, control = glm.control(maxit = maxit.glm), etastart = eta0)), silent = TRUE) # L1<- if(class(obj1)[1]=="try-error") L0+10 else obj1$dev # L1.k[length(L1.k)+1]<-L1 # if(1/(use.k*h)= it.max) { id.warn <- TRUE break } } #end while_it ##============================================================================= if(id.psi.changed[length(id.psi.changed)]) warning(paste("Some psi (", (1:length(psi))[!id.psi.far], ") changed after the last iter.",sep=""), call. = FALSE) if(id.warn) warning(paste("max number of iterations (", it,") attained",sep=""), call. = FALSE) attr( psi.values, "dev") <- dev.values attr( psi.values, "k")<- k.values #ordina i breakpoints.. psi<-unlist(tapply(psi, id.psi.group, sort)) names(psi)<-id.psi.group names.coef<-names(obj$coefficients) #obj e' quello vecchio che include U1,.. V1,... PSI.old<-PSI #PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) PSI <- matrix(psi, nrow=n, ncol = length(psi), byrow=TRUE) #U e V possono essere cambiati (rimozione/ordinamento psi.. ) per cui si deve ricalcolare il tutto, altrimenti sarebbe uguale a U1 e obj1 #if(sd(PSI-PSI.old)>0 || id.psi.changed[length(id.psi.changed)]){ U <- (Z-PSI)*(Z>PSI) #colnames(U)<-paste("U", 1:ncol(U), sep = "") V <- -(Z>PSI) #colnames(V)<-paste("V", 1:ncol(V), sep = "") obj <- try(suppressWarnings(glm.fit(cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm1[it]), etastart = eta0)), silent = TRUE) L1<- obj$dev obj$coefficients<-c(obj$coefficients, rep(0,ncol(V))) names(obj$coefficients)<-names.coef #browser() obj$epsilon <- as.numeric(epsilon) obj<-list(obj=obj,it=it, psi=psi, psi.values=psi.values, U=U,V=V,rangeZ=rangeZ, epsilon=as.numeric(epsilon),nomiOK=nomiOK, dev.no.gap=L1, id.psi.group=id.psi.group, idU=ncol(XREG)+1:(length(psi)), id.warn=id.warn, eta0=eta0) #inserire id.psi.ok? return(obj) } segmented/R/print.summary.segmented.R0000644000176200001440000000710014424421626017356 0ustar liggesusers`print.summary.segmented` <- function(x, short = x$short, var.diff = x$var.diff, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"),...){ cat("\n\t***Regression Model with Segmented Relationship(s)***\n\n") cat( "Call: \n" ) print( x$call ) cat("\nEstimated Break-Point(s):\n ") est.psi<-x$psi[,-1,drop=FALSE] rownames(est.psi)<-rownames(x$psi) print(round(est.psi,3)) #era "signif(,4)" # cat("\nt value for the gap-variable(s) V: ",x$gap[,3],"\n") #if(any(abs(x$gap[,3])>1.96)) cat(" Warning:", sum(abs(x$gap[,3])>1.96),"gap coefficient(s) significant at 0.05 level\n") if(short){ cat("\nDifference-in-slopes parameter(s):\n") #print(x$Ttable[(nrow(x$Ttable)-nrow(x$psi)+1):nrow(x$Ttable),])} nome<-rownames(x$psi) #nome<-as.character(parse("",text=nome)) #aa<-grep("U",rownames(x$Ttable)) #bb<-unlist(sapply(nome,function(xx){grep(xx,rownames(x$Ttable))},simplify=FALSE,USE.NAMES=FALSE)) #cc<-intersect(aa,bb) #indices of diff-slope parameters nomiU<-rownames(x$gap) #idU<-match(nomiU,rownames(x$Ttable)) print(x$Ttable[nomiU,]) } else {cat("\nCoefficients of the linear terms:\n") if(is.null(dim(x$Ttable))){ print(x$Ttable) #printCoefmat(matrix(x$Ttable,nrow=1,ncol=4,dimnames=list(" ",names(x$Ttable))),has.Pvalue=FALSE) } else { printCoefmat(x$Ttable, digits = digits, signif.stars = signif.stars,na.print = "NA", ...) } } if("summary.lm"%in%class(x)){ #for lm if(var.diff){ for(i in 1:length(x$sigma.new)){ cat("\nResidual standard error ",i,":", format(signif(x$sigma.new[i], digits)), "on", x$df.new[i], "degrees of freedom")} cat("\n") } else { cat("\nResidual standard error:", format(signif(x$sigma, digits)), "on", x$df[2], "degrees of freedom\n")} if (!is.null(x$fstatistic)) { cat("Multiple R-Squared:", formatC(x$r.squared, digits = digits)) cat(", Adjusted R-squared:", formatC(x$adj.r.squared, digits = digits), "\n")} } if("summary.glm"%in%class(x)){ #for glm cat("\n(Dispersion parameter for ", x$family$family, " family taken to be ", format(x$dispersion), ")\n\n", apply(cbind(paste(format.default(c("Null", "Residual"), width = 8, flag = ""), "deviance:"), format(unlist(x[c("null.deviance", "deviance")]), digits = max(5, digits + 1)), " on", format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"), 1, paste, collapse = " "), "AIC: ", format(x$aic, digits = max(4, digits + 1)), "\n", sep = "") } if("summary.Arima"%in%class(x)){#for Arima cm <- x$call$method if (is.null(cm) || cm != "CSS") cat("\nsigma^2 estimated as ", format(x$sigma2, digits = digits), ", log likelihood = ", format(round(x$loglik, 2)), ", aic = ", format(round(x$aic, 2)), "\n", sep = "") else cat("\nsigma^2 estimated as ", format(x$sigma2, digits = digits), ", part log likelihood = ", format(round(x$loglik, 2)), "\n", sep = "") } invisible(x) if(x$n.boot>0){ cat("\nBoot restarting based on", x$n.boot, "samples. Last fit:") #if(x$conv.warn) "*not*" else NULL , "attained in",x$it,"iter. (rel. change",paste(signif(x$epsilon,5),")\n",sep="")) } cat("\nConvergence",if(x$conv.warn) "*not*" else NULL , "attained in",x$it,"iterations (rel. change",paste(signif(x$epsilon,5),")\n",sep="")) } segmented/R/step.ts.fit.boot.r0000644000176200001440000002130614757620734015755 0ustar liggesusersstep.ts.fit.boot <- function(y, XREG, Z, PSI, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di #---------------------------------- # sum.of.squares<-function(obj.seg){ # #computes the "correct" SumOfSquares from a segmented" fit # b<-obj.seg$obj$coef # X<-qr.X(obj.seg$obj$qr) #X<-model.matrix(obj.seg) # X<-X[,!is.na(b)] # b<-b[!is.na(b)] # rev.b<-rev(b) # rev.b[1:length(obj.seg$psi)]<-0 # b<-rev(rev.b) # new.fitted<-drop(X%*%b) # new.res<- obj.seg$obj$residuals + obj.seg$obj$fitted - new.fitted # ss<-sum(new.res^2) # ss # } adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #-------- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #browser() if(is.null(opz$seed)){ mY <- mean(y) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } #------------- #obj<- jump.fit(y, XREG=x.lin, Z=Xtrue, PSI, w=ww, offs, opz, return.all.sol=FALSE) #-------------- visualBoot<-opz$display opz$display<-FALSE #opz.boot<-opz #opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <- 0 opz0<-opz opz0$agg<-.2 n<-length(y) rangeZ <- apply(Z, 2, range) #serve sempre alpha <- opz$alpha limZ <- apply(Z, 2, quantile, names = FALSE, probs = alpha) o0 <-try(suppressWarnings(step.ts.fit(y, XREG, Z, PSI, opz0, return.all.sol=FALSE)), silent=TRUE) #browser() if(!is.list(o0)) { o0<- suppressWarnings(step.ts.fit(y, XREG, Z, PSI, opz, return.all.sol=TRUE)) o0<-extract.psi(o0) ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0<-try(suppressWarnings(step.ts.fit(y, XREG, Z, PSI1, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(NA, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z count.random<-0 agg.values<-seq(.2,.05,l=n.boot) ###INIZIO BOOT alpha<-.1 corr=1.2 #browser() n.boot.rev<- 3 #3 o 4? for(k in seq(n.boot)){ #if(k==2) browser() #browser() diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ qpsi <- sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi.cor <- sapply(1:ncol(Z),function(i)mean((corr*est.psi0[i])>=Z[,i])) qpsi <- ifelse(abs(qpsi-.5)<=.2, qpsi.cor, alpha) alpha<-1-alpha corr<-1/corr est.psi0 <- sapply(1:ncol(Z),function(i)quantile(Z[,i], probs=qpsi[i],names=FALSE)) est.psi0 <- adj.psi(est.psi0, limZ) #est.psi0<- jitter(est.psi0, amount=min(diff(est.psi0))) } ########################### 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(step.ts.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], opz)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(step.ts.fit(yy, XREG, Z.orig, PSI, opz)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 opz$agg<-agg.values[k] o <-try(suppressWarnings(step.ts.fit(y, XREG, Z.orig, PSI, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o <-try(suppressWarnings(step.ts.fit(y, XREG, Z, PSI1, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if (visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) unlpsi<- unlist(est.psi0) Lp<-length(unlpsi) cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(Lp,digits=0,format="f"), " est.psi = ",paste(formatC(unlpsi[1:min(Lp,5)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #conta i valori ss uguali.. cosi puoi fermarti prima.. asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) #SS.ok<-min(all.selected.ss) #id.accept<- ((abs(all.ss-SS.ok)/SS.ok )<= 0.05) #psi.mean<-apply(all.est.psi[id.accept,,drop=FALSE], 2, mean) #est.psi0<-psi.mean # #devi ristimare il modello con psi.mean # PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) # o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0 <- try(step.ts.fit(y, XREG, Z, PSI1, opz1), silent=TRUE) warning("The final fit can be unreliable (possibly mispecified segmented relationship)", call.=FALSE, immediate.=TRUE) } if(!is.list(o0)) return(0) o0$boot.restart<-ris o0$seed<-seed #rm(.Random.seed, envir=globalenv()) return(o0) } segmented/R/step.ts.fit.r0000644000176200001440000002362014726062124015001 0ustar liggesusersstep.ts.fit<-function(y, x.lin, Xtrue, PSI, opz, return.all.sol=FALSE){ #---------------------- search.min<-function(h, psi, psi.old, X, y, id.fix.psi=NULL) { psi.ok<- psi*h + psi.old*(1-h) psi.ok[id.fix.psi]<- psi.old[id.fix.psi] PSI <- matrix(psi.ok, n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Xtrue>PSI) #(Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylm(cbind(X, U1), y), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(.lm.fit(cbind(X, U1), y), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 #r<-sum(obj1$residuals^2 * w) L1 } toMatrix<-function(.x, ki){ # ripete ogni .x[,j] ki[j] volte if(ncol(.x)!=length(ki)) stop("It should be ncol(.x)==length(ki)") if(all(ki==1)) return(.x) M<-vector("list", length=length(ki)) for(j in 1:length(ki)) M[[j]]<-replicate(ki[[j]], cbind(.x[,j]), simplify=TRUE) do.call(cbind, M) } ### ----- mylm <-function(x,y){ #,w=1,offs=0 in step.st.fit() non ci sovrebbero essere w e offs #x1<-x*sqrt(w) #y<-y-offs #y1<-y*sqrt(w) #o<-.lm.fit(y=y,x=x) #b<-o$coefficients #fit<- o$fitted.values #r<-o$residuals b<-drop(solve(crossprod(x),crossprod(x,y))) #x<-x1 #b<-solve(crossprod(x),crossprod(x,y)) #browser() #fit<- drop(tcrossprod(x,t(b))) fit<- drop(x%*%b) r<-y-fit o<-list(coefficients=b, fitted.values=fit, residuals=r, L0=sum(r^2), df.residual=length(y)-length(b)) o } ### ----- isZero <- function(v) sapply(v, function(.x) identical(.x,0)) ###------ #----------- adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #------------ tol<-opz$toll display<-opz$display it.max<-opz$it.max dev0<-opz$dev0 #useExp.k<-opz$useExp.k #min.step<- opz$min.step #=.0001 #conv.psi<-opz$conv.psi #=FALSE alpha<-opz$alpha #browser() limZ <- apply(Xtrue, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) #limZ <- apply(Xtrue, 2, quantile, names = FALSE, probs = c(alpha, 1 - alpha)) fix.npsi<-opz$fix.npsi agg<-opz$agg h<-opz$h npsii<-opz$npsii npsi<- sum(npsii) #opz$npsi P<-length(npsii) #P<-opz$P digits<-opz$digits rangeZ<-opz$rangeZ # pos.vec <- 1:npsi # pos <- vector("list", P) # ind <- 0 pos<- tapply(1:npsi, rep(1:P, npsii), list) i <- 0 agg <- rep(agg, npsi) #browser() # direz <- matrix(NA, it.max, npsi) # conv <- rep(FALSE, npsi) # ind.conv <- NULL n<-length(y) plin<-ncol(x.lin) epsilon<-10 k.values<-dev.values<- NULL psi.values <-list() dev.values[length(dev.values) + 1] <- opz$dev0 #modello senza psi psi.values[[length(psi.values) + 1]] <- NA #PSI0<- matrix(psi0, n, npsi, byrow = TRUE) #XREG <- cbind(x.lin, Xtrue>PSI) #obj0 <- .lm.fit(x=XREG, y=y) #try(mylm(XREG, y), silent = TRUE) #L0 <- sum(obj0$residuals^2) #*ww if(it.max==0){ obj <- lm.wfit(x = cbind(x.lin, Xtrue>PSI), y = y) L1 <- sum(obj$residuals^2) obj$epsilon <- epsilon idZ<-(plin+1):(plin+ncol(PSI)) b<- obj$coef[idZ] obj <- list(obj = obj, psi = PSI[1,], psi.values = psi.values, rangeZ = rangeZ, beta.c=b, epsilon = epsilon, SumSquares.no.gap = L1, id.warn = TRUE) return(obj) } L0<- mylm(cbind(x.lin, Xtrue>PSI),y)$L0 # valore con psi iniziale n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi0<-PSI[1,] psi.values[[length(psi.values) + 1]] <- psi0 #psi iniziali #============================================== if (display) { unlpsi<- unlist(psi0) Lp<-length(unlpsi) cat(paste("iter = ", sprintf("%2.0f",0), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L0, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%5.0f", NA), " n.psi = ",formatC(Lp, digits=0,format="f"), " ini.psi = ",paste(formatC(unlpsi[1:min(5,Lp)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } id.warn <- FALSE low <- apply(Xtrue, 2, min) up <- apply(Xtrue, 2, max) #L1<-L0+10 tolOp<-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) idZ<-(plin+1):(plin+ncol(PSI)) idW<-(plin+ncol(PSI)+1): ( plin+2*ncol(PSI)) #============================================== while (abs(epsilon) > tol) { i <- i + 1 #if(i==1) browser() xx <- Xtrue[,cumsum(npsii),drop=FALSE] for (p in 1:P) { psis <- sort(psi0[pos[[p]]]) gruppi <- cut(xx[,p], breaks = c(low[p] - 0.1, psis, up[p]), labels = FALSE) if(any(is.na(gruppi))) stop(paste("too many breaks for step term #", p, "?"), call.=TRUE) points <- c(low[p], psis, up[p]) right <- c(low[p], points[2:(npsii[p] + 1)] + agg[pos[[p]]][order(psi0[pos[[p]]])] * (points[3:(npsii[p] + 2)] - points[2:(npsii[p] + 1)]), NA) left <- c(NA, points[2:(npsii[p] + 1)] - agg[pos[[p]]][order(psi0[pos[[p]]])] * (points[2:(npsii[p] + 1)] - points[1:npsii[p]]), up[p]) for (j in 1:(npsii[p] + 1)) { xx.j <- xx[,p][gruppi == j] xx[,p][gruppi == j] <- right[j] + (xx.j - points[j]) * ((left[j + 1] - right[j])/(points[j + 1] - points[j])) } } XX<-toMatrix(xx, npsii) PSI<- matrix(psi0, n, npsi, byrow = TRUE) W <- (1/(2 * abs(XX - PSI))) Z <- (XX * W + 1/2) XREG <- cbind(x.lin, Z, W) #obj<-try(mylm(XREG,y,w=ww,offs=offs), silent = TRUE) #if(class(obj)[1]=="try-error") obj <- .lm.fit(y = y, x = XREG) #obj <- lm.wfit(y = y, x = XREG, offset = offs, w=ww ) #b <- obj$coef[(2:(sum(k) + 1))] #g <- obj$coef[((sum(k) + 2):(2 * sum(k) + 1))] b<- obj$coef[idZ] g<- obj$coef[idW] #if(any(is.na(c(b, g)))){ if(any(isZero(c(b, g)))) { if(return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } psi1 <- -g/b psi1<- psi0+ h*(psi1-psi0) psi1<- adj.psi(psi1, limZ) #limZ rangeZ psi1<-unlist(tapply(psi1, opz$id.psi.group, sort), use.names =FALSE) #la f e' chiaramente a gradino per cui meglio dividere.. a0<-optimize(search.min, c(0,.5), psi=psi1, psi.old=psi0, X=x.lin, y=y, tol=tolOp[i]) a1<-optimize(search.min, c(.5,1), psi=psi1, psi.old=psi0, X=x.lin, y=y, tol=tolOp[i]) a<-if(a0$objective<=a1$objective) a0 else a1 if(a$objectivePSI1) #obj1 <- try(mylm(XREG1, y, ww, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(XREG1, y, ww, offs), silent = TRUE) delta<- psi1-psi0 if (display) { flush.console() #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) unlpsi<- unlist(psi1) Lp<-length(unlpsi) cat(paste("iter = ", sprintf("%2.0f",i), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L1, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%2.3f", use.k), " n.psi = ",formatC(Lp, digits=0, format="f"), " est.psi = ",paste(formatC(unlpsi[1:min(Lp,5)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } epsilon <- (L0 - L1)/(abs(L0) + 0.1) L0<-L1 k.values[length(k.values)+1]<-use.k psi.values[[length(psi.values) + 1]] <- psi1 dev.values[length(dev.values) + 1] <- L0 if (i >= it.max) { id.warn <- TRUE break } psi0<-psi1 } #end while_it psi1 <-unlist(tapply(psi1, opz$id.psi.group, sort)) PSI<- matrix(psi1, n, npsi, byrow = TRUE) U <- 1*(Xtrue>PSI) #ATTENZIONE .. Assume che obj sia stato stimato sempre! obj<-list(obj=obj, psi=psi1, psi.values=psi.values, rangeZ=rangeZ, SumSquares.no.gap=L1, beta.c=b, it=i, epsilon=epsilon, id.warn=id.warn, U=U) return(obj) } #end jump.fit segmented/R/confint.segmented.R0000644000176200001440000012454414523174166016206 0ustar liggesusers`confint.segmented` <- function(object, parm, level=0.95, method=c("delta", "score", "gradient"), rev.sgn=FALSE, var.diff=FALSE, is=FALSE, digits=max(4, getOption("digits") - 1), .coef=NULL, .vcov=NULL, ...){ #...: argomenti da passare solo a confintSegIS. Questi sono "h", "d.h", "bw" (bw="(1/n)^(1/2)"), nvalues, msgWarn o useSeg. method<-match.arg(method) cls<-class(object) if(length(cls)==1) cls<-c(cls, cls) if(method%in%c("score", "gradient") && !all(cls[1:2]==c("segmented","lm"))) stop("Score- or Gradient-based CI only work with segmented lm models") if(!is.null(object$constr) && method%in%c("score", "gradient")) stop(" Score/Gradient CI with constrained fits are not allowed") estcoef<-if(is.null(.coef)) coef(object) else .coef COV<- if(is.null(.vcov)) vcov(object,var.diff=var.diff, is=is, ...) else .vcov #=========== #browser() if(missing(parm)) { parm<- object$nameUV$Z if(length(rev.sgn)==1) rev.sgn<-rep(rev.sgn,length(parm)) } else { if(is.numeric(parm)) parm<-object$nameUV$Z[parm] } if(! all(parm %in% object$nameUV$Z)) stop("invalid 'parm' name", call.=FALSE) if(length(parm)>1) { warning("There are multiple segmented terms. The first is taken", call.=FALSE, immediate. = TRUE) parm<-parm[1] } #======================================================================================================= #========== metodo Delta #======================================================================================================= confintSegDelta<- function(object, parm, level=0.95, rev.sgn=FALSE, var.diff=FALSE, is=FALSE, ...){ #-- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #-- blockdiag <- function(...) { args <- list(...) nc <- sapply(args,ncol) cumnc <- cumsum(nc) ## nr <- sapply(args,nrow) ## NR <- sum(nr) NC <- sum(nc) rowfun <- function(m,zbefore,zafter) { cbind(matrix(0,ncol=zbefore,nrow=nrow(m)),m, matrix(0,ncol=zafter,nrow=nrow(m))) } ret <- rowfun(args[[1]],0,NC-ncol(args[[1]])) for (i in 2:length(args)) { ret <- rbind(ret,rowfun(args[[i]],cumnc[i-1],NC-cumnc[i])) } ret } # if(!"segmented"%in%class(object)) stop("A segmented model is needed") if(var.diff && length(object$nameUV$Z)>1) { var.diff<-FALSE warning(" 'var.diff' set to FALSE with multiple segmented variables", call.=FALSE) } #nomi delle variabili segmented: #browser() nomeZ<-parm if(length(rev.sgn)!=length(nomeZ)) rev.sgn<-rep(rev.sgn, length.out=length(nomeZ)) rr<-list() z<-if("lm"%in%class(object)) abs(qt((1-level)/2,df=object$df.residual)) else abs(qnorm((1-level)/2)) for(i in 1:length(nomeZ)){ #per ogni variabile segmented `parm' (tutte o selezionata).. #nomi.U<-grep(paste("\\.",nomeZ[i],"$",sep=""),object$nameUV$U,value=TRUE) #nomi.V<-grep(paste("\\.",nomeZ[i],"$",sep=""),object$nameUV$V,value=TRUE) nomi.U<- object$nameUV$U[f.U(object$nameUV$U, nomeZ[i])] nomi.V<- object$nameUV$V[f.U(object$nameUV$V, nomeZ[i])] m<-matrix(,length(nomi.V),3) colnames(m)<-c("Est.",paste("CI","(",level*100,"%",")",c(".low",".up"),sep="")) if(!is.null(object$constr)){ R<-object$constr$invA.RList[[i]] diffSlope<-drop(R%*%estcoef[nomi.U])[-1] Rpsi <- blockdiag(R, diag(length(nomi.V))) COV1 <- Rpsi %*% COV[c(nomi.U, nomi.V),c(nomi.U, nomi.V)] %*% t(Rpsi) COV1<-COV1[-1,-1] #la prima linea e' relativa alla prima slope.. NON Serve nomi.U<-gsub("psi", "U", nomi.V) rownames(COV1)<-colnames(COV1)<-c(nomi.U, nomi.V) names(diffSlope)<-nomi.U } else { diffSlope<- estcoef[nomi.U] COV1 <- COV[c(nomi.U, nomi.V),c(nomi.U, nomi.V)] } for(j in 1:length(nomi.V)){ #per ogni psi della stessa variabile segmented.. sel<-c(nomi.V[j],nomi.U[j]) #15/12/20 V e' costruita sopra.. V<-COV1[sel, sel] #questa e' vcov di (psi,U) #b<-estcoef[sel[2]] #diff-Slope b<- diffSlope[sel[2]] th<-c(b,1) #orig.coef<-drop(diag(th)%*% estcoef[sel]) #sono i (gamma,beta) th*coef(ogg)[sel] orig.coef<-drop(diag(th)%*% c(estcoef[sel[1]], b )) gammma<-orig.coef[1] est.psi<-object$psi[sel[1],2] V<-diag(th)%*%V%*%diag(th) #2x2 vcov() di gamma e beta se.psi<-sqrt((V[1,1]+V[2,2]*(gammma/b)^2-2*V[1,2]*(gammma/b))/b^2) r<-c(est.psi, est.psi-z*se.psi, est.psi+z*se.psi) if(rev.sgn[i]) r<-c(-r[1],rev(-r[2:3])) m[j,]<-r } #end loop j (ogni psi della stessa variabile segmented) #CONTROLLA QUESTO:..sarebbe piu' bello m<-m[order(m[,1]),,drop=FALSE] rownames(m)<-nomi.V #if(nrow(m)==1) rownames(m)<-"" else m<-m[order(m[,1]),] if(rev.sgn[i]) { #m<-m[nrow(m):1,] rownames(m)<-rev(rownames(m)) } rr[[length(rr)+1]]<- m #signif(m,digits) } #end loop i (ogni variabile segmented) names(rr)<-nomeZ return(rr[[1]]) } #end_function #======================================================================================================= #========== metodo Score #======================================================================================================= confintSegIS<-function(obj, parm, d.h=1.5, h=2.5, conf.level=level, ...){ #wrapper per ci.IS().. #d.h: incremento di h.. #se h o d.h sono negativi, tutto il range #========================================================================== #========================================================================== #========================================================================== ci.IS <- function(obj.seg, nomeZ, nomeUj, stat = c("score", "gradient"), transf=FALSE, h = -1, sigma, conf.level = 0.95, use.z = FALSE, is = TRUE, fit.is = TRUE, var.is=TRUE, bw=NULL, smooth = 0, msgWarn = FALSE, n.values = 50, altro = FALSE, cadj = FALSE, plot = FALSE, add=FALSE, agg=FALSE, raw=FALSE, useSeg=FALSE) { #smooth: se 0, i valori decrescenti dello IS score vengono eliminati; porta ad una curva U troppo ripida e quindi IC troppo stretti.. # se 2, B-spline con vincoli di monot e di "passaggio da est.psi" #useSeg, se TRUE (e se smooth>0) viene applicato segmented per selezionare solo i rami con pendenza negativa # dovrebbe essere usato con smooth>0 e se h=-1 (all.range=TRUE) #transf: funziona solo con grad #obj.seg: oggetto restituito da segmented #h: costante per definire il range dei valori di riferimento. Should be >1. # Se NULL viene considerato l'intervallo 'est.psi +/- se*(zalpha*1.5) dove zalpha ? il quantile che dipende da conf.level # Se qualche negativo, viene considerato il range della x dal quantile 0.02 a quello 0.98. # Se >0 il range e' est.psi +/- h* zalpha * se.psi # sigma se mancante viene assunta la stima presa dall'oggetto obj.seg.. # use.z: se TRUE i quantili della z, otherwise la t_{n-p} # stat: which statistic use # agg if TRUE, and plot=TRUE and est.psi!= dalla radice che annulla lo IS score, allora l'IC ? shiftato.. # is, fit.is, var.is: logical, induced smoothing? # plot: la linea nera e' lo score originale (if raw=TRUE) # la linea rossa e' lo score IS # le linea verde e' lo IS score con i pezzi decrescenti eliminati # se useSeg=T aggiunge una linea segmented.. # # # conf.level: confidence levels can be vector # fit.is: i fitted del modello nullo provengono da un modello in cui (x-psi)_+ ? # sostituito dall'approx smooth? # bw: the bandwidth in the kernel.. If NULL the SE(\hat\psi) is used, otherwise use a string, something like "1/n" or "sqrt(1/n)" # cadj: se TRUE l'approx di Ca.... che fa riferimentimento ad una Normale # #========================================================================== #========================================================================== #========================================================================== u.psiX <- function(psi, sigma, x, y, XREG = NULL, scale = FALSE, est.psi = NULL, interc = FALSE, pow = c(1, 1), lag = 0, robust = FALSE, GS = FALSE, is = FALSE, se.psi, var.is = TRUE, which.return = 3, fit.is = FALSE, altro = FALSE, cadj = FALSE, transf=FALSE) { # Restituisce score e/o var, e/o score stand. (vedi 'which.return') Inoltre se robust=TRUE calcola la # var robusta est.psi: o NULL oppure uno scalare con attributi 'b' e 'fitted' se lag>0 allora la # variabile V viene modificata nell'intorno di psi. Valori di pow diversi da uno sono ignorati quando # lag>0 pow: due potenze dei termini (x-psi)_+ e I(x>psi) se GS=TRUE calcola la statistica GS. # richiede 'est.psi', e 'scale' ? ignorato which.return. 3 means the scaled score, 1= the unscaled # score, 2=the sqrt(variance) (see the last row) # is: se TRUE lo smoothing indotto al num # var.is: se TRUE lo smooth indotto viene usato anche per il denom (ovvero per la var dello score) # U.is: se TRUE (provided that is=TRUE) the design matrix includes (x-psi)*pnorm((x-psi)/se) rather than pmax(x-psi,0) #altro: se TRUE (and fit.is=TRUE), U.psi = (x-psi)*pnorm((x-psi)/se) + h*dnorm((x-psi)/h) #-------------------------------------------- varUpsi.fn <- function(X, sigma = 1, r = NULL) { #X: the design matrix. The 1st column corresponds to psi #r: the residual vector. If NULL the usual model-based (rather than robust) variance is returned. INF<- if(length(sigma)==1) # (sigma^2)*crossprod(X) else crossprod(X,diag(sigma^2))%*%X INF <- crossprod(X)/(sigma^2) if (is.null(r)) { vv <- INF[1, 1] - (INF[1, -1] %*% solve(INF[-1, -1], INF[-1, 1])) } else { u <- X * r/(sigma^2) V <- crossprod(u) #nrow(X)*var(u) I22 <- solve(INF[-1, -1]) vv <- V[1, 1] - INF[1, -1] %*% I22 %*% V[1, -1] - V[1, -1] %*% I22 %*% INF[-1, 1] + INF[1, -1] %*% I22 %*% V[-1, -1] %*% I22 %*% INF[-1, 1] } return(vv) } # f.f<-function(x,psi,l=0){ x1<-1*I(x>psi) id<-which(x1>=1)[1] id.change <- # max(1,(id-l)):min(length(x),(id+l)) val<-((1/(2*l+1))*( 1:(2*l+1)))[1:length(id.change)] # #if(length(id.change)!=length(val)) return x1[id.change]<-val x1<- -x1 x1 } dpmax <- function(x, y, pow = 1) { # derivata prima di pmax; se pow=1 ? -I(x>psi) if (pow == 1) -(x > y) else -pow * (x>y)*(x - y)^(pow - 1) #ifelse(x > y, -1, 0) else -pow * pmax(x - y, 0)^(pow - 1) } if (cadj && which.return != 3) stop("cadj=TRUE can return only the studentized score") if (is && missing(se.psi)) stop("is=TRUE needs se.psi") if (interc) XREG <- cbind(rep(1, length(y)), XREG) if(fit.is) { XX<- if(altro) cbind((x-psi)*pnorm((x - psi)/se.psi)+se.psi*dnorm((x-psi)/se.psi), XREG) else cbind((x-psi)*pnorm((x-psi)/se.psi), XREG) o <- lm.fit(x = XX, y = y) #o <- lm.fit(x = cbind(XREG, (x - psi) * pnorm((x - psi)/se.psi)), y = y) } else { .U<-(x > psi)*(x-psi) if(pow[1]!=1) .U<-.U^pow[1] XX<- cbind(.U, XREG) #cbind(pmax(x - psi, 0)^pow[1], XREG) o <- lm.fit(x = XX, y = y) #o <- lm.fit(x = cbind(XREG, pmax(x - psi, 0)), y = y) #o<-lm(y~0+XREG+pmax(x-psi,0)) } #b <- o$coef[length(o$coef)] b <- o$coef[1] mu <- o$fitted.values n <- length(mu) # if (cadj) sigma <- sqrt(sum(o$residuals^2)/(n - sum(!is.na(o$coef)) - 1)) # V <- if (lag == 0) dpmax(x, psi, pow = pow[2]) else f.f(x, psi, lag) #V <- rowMeans(sapply(x, function(xx){-I(x>xx)})) V<-NULL #serve per il check.. if (GS) { if (is.null(est.psi)) stop("'GS=TRUE' needs 'est.psi'") gs <- b * (sum((y - mu) * V)/(sigma^2)) * (est.psi - psi) gs <- sqrt(pmax(gs, 0)) * sign(est.psi - psi) return(gs) } if(is){ r<- -b*sum(((y-mu)*pnorm((x - psi)/se.psi)))/sigma^2 XX<- if(var.is) cbind(-b*pnorm((x - psi)/se.psi), XX) else cbind(-b*I(x > psi), XX) } else { r<- -b*sum((y-mu)*I(x > psi))/sigma^2 XX<- cbind(-b*I(x > psi), XX) } #XX <- if (is) cbind(-b * pnorm((x - psi)/se.psi), (x - psi)*pnorm((x - psi)/se.psi), XREG) else cbind(b * V, pmax(x - psi, 0)^pow[1], XREG) #r <- drop(crossprod(XX, y - mu))/sigma^2 #if (is && altro) r[1] <- r[1] + (b^2) * se.psi * sum(dnorm((x - psi)/se.psi))/sigma^2 #if (!var.is) XX <- cbind(b * V, pmax(x - psi, 0)^pow[1], XREG) if (scale) { if (!is.null(est.psi)) { # questo e' se devi usare l'inf osservata. Cmq visto che dipende da est.psi e non psi, se scale=TRUE # sarebbe inutile calcolarla ogni volta.. mu <- attr(est.psi, "fitted") est.b <- attr(est.psi, "b") est.psi <- as.numeric(est.psi) #V <- if (lag == 0) dpmax(x, est.psi, pow = pow[2]) else f.f(x, est.psi, lag) #V <- rowMeans(sapply(x, function(xx){-I(x>xx)})) #XX <- cbind(est.b * V, pmax(x - psi, 0)^pow[1], XREG) if(is){ XX<- if(var.is) cbind(-est.b*pnorm((x - est.psi)/se.psi), XX[,-1]) else cbind(-est.b*I(x > est.psi), XX[,-1]) } else { XX<- cbind(-est.b*I(x > est.psi), XX[,-1]) } } # INF<- if(length(sigma)==1) (sigma^2)*crossprod(XX) else crossprod(XX,diag(sigma^2))%*%XX # v.Upsi<-INF[1,1]-(INF[1,-1] %*% solve(INF[-1,-1],INF[-1,1])) rr <- if (robust) (y - mu) else NULL v.Upsi <- try(varUpsi.fn(XX, sigma, r = rr), silent = TRUE) if (!is.numeric(v.Upsi)) return(NA) if (v.Upsi <= 0) return(NA) # r<-r[1]/sqrt(v.Upsi) } names(r) <- NULL #r <- c(r[1], v.Upsi, r[1]/sqrt(max(v.Upsi, 0))) r <- c(r, v.Upsi, r/sqrt(max(v.Upsi, 0))) r <- r[which.return] if (cadj) r <- sign(r) * sqrt((r^2) * (1 - (3 - (r^2))/(2 * n))) r } # per disegnare devi vettorizzare u.psiXV <- Vectorize(u.psiX, vectorize.args = "psi", USE.NAMES = FALSE) #========================================================================== gs.fn <- function(x, y, estpsi, sigma2, psivalue, pow = c(1,1), adj = 1, is = FALSE, sepsi, XREG = NULL, fit.is = FALSE, altro = FALSE, transf=FALSE) { # calcola la statist gradiente #x,y i dati; estpsi la stima di psi #a: la costante per lisciare I(x>psi)-> aI(x>psi)^{a-1} (ignorata se is=TRUE) # # is: se TRUE calcola la GS usando lo score 'naturally smoothed' #adj. Se 0 non fa alcuna modifica e cosi' potrebbe risultare non-positiva. Se 1 e 2 vedi i codici all'interno logitDeriv<-function(kappa) exp(kappa)*diff(intv)/((1+exp(kappa))^2) logit<-function(psi) log((psi-min(intv))/(max(intv)-psi)) logitInv<-function(kappa) (min(intv)+max(intv)*exp(kappa))/(1+exp(kappa)) intv<-quantile(x, probs=c(.02,.98),names=FALSE) if (is && missing(sepsi)) stop("SE(psi) is requested when is=TRUE") k <- length(psivalue) r <- vector(length = k) for (i in 1:k) { psii <- psivalue[i] #prima dell'aggiunta di altro..' # if (fit.is) { # X <- cbind(1, x, (x - psii) * pnorm((x - psii)/sepsi), XREG) # } else { # X <- cbind(1, x, pmax(x - psii, 0), XREG) # } if(fit.is) { X<- if(altro) cbind(1,x, (x-psii)*pnorm((x - psii)/sepsi)+sepsi*dnorm((x-psii)/sepsi), XREG) else cbind(1,x,(x-psii)*pnorm((x-psii)/sepsi), XREG) } else { .U<- (x-psii)*(x>psii) if(pow[1]!=1) .U <- .U^pow[1] X<- cbind(1, x, .U, XREG) #X<- cbind(1,x,pmax(x - psii, 0)^pow[1], XREG) } o <- lm.fit(y = y, x = X) b <- o$coef[3] if (is) { v <- pnorm((x - psii)/sepsi) } else { v <- if (pow[2] == 1) I(x > psii) else pow[2] * pmax(x - psii, 0)^(pow[2] - 1) } if(transf) v<-v * logitDeriv(logit(psii)) r[i] <- -(b/sigma2) * sum((y - o$fitted) * v) r[i] <- if(!transf) r[i]*(estpsi - psii) else r[i]*(logit(estpsi) - logit(psii)) if (altro && fit.is) r[i] <- r[i] + (estpsi - psii) * ((b * sepsi * sum(dnorm((x - psii)/sepsi))) * (b/sigma2)) } if (adj > 0) { r<- if (adj == 1) pmax(r, 0) else abs(r) } if(transf) psivalue<-logit(psivalue) segni<-if(transf) sign(logit(estpsi) - psivalue) else sign(estpsi - psivalue) #plot(psivalue, r, type="o") r <- cbind(psi = psivalue, gs.Chi = r, gs.Norm = sqrt(r) * segni ) r } #========================================================================== monotSmooth <- function(xx, yy, hat.psi, k = 20, w = 0) { # xx: esplicativa yy: yy la risposta hat.psi: la stima del psi k: se ? uno scalare allora il rango # della base, altrimenti i nodi.. w: l'esponente per costruire il vettore dei pesi (per dare pi? peso # 'localmente') #------------------- bspline <- function(x, ndx, xlr = NULL, knots, deg = 3, deriv = 0) { # x: vettore di dati xlr: il vettore di c(xl,xr) ndx: n.intervalli in cui dividere il range deg: il # grado della spline #require(splines) if (missing(knots)) { if (is.null(xlr)) { xl <- min(x) - 0.01 * diff(range(x)) xr <- max(x) + 0.01 * diff(range(x)) } else { if (length(xlr) != 2) stop("quando fornito, xlr deve avere due componenti") xl <- xlr[1] xr <- xlr[2] } dx <- (xr - xl)/ndx knots <- seq(xl - deg * dx, xr + deg * dx, by = dx) } B <- splineDesign(knots, x, ord = deg + 1, derivs = rep(deriv, length(x))) # B<-spline.des(knots,x,bdeg+1,0*x) #$design r <- list(B = B, degree = deg, knots = knots) #, dx=dx, nterm=ndx) r #the B-spline base matrix } #end_fn #--------- if (length(k) == 1) r <- bspline(xx, ndx = k) else r <- bspline(xx, knots = k) B <- r$B knots <- r$knots degree <- r$degree D1 <- diff(diag(ncol(B)), diff = 1) d <- drop(solve(crossprod(B), crossprod(B, yy))) # calcola monotone splines. La pen si riferisce solo alle diff dei coef della base!! # rx <- range(xx) nterm <- round(nterm) dx <- (rx[2] - rx[1])/nterm knots <- c(rx[1] + dx * # ((-degree):(nterm - 1)), rx[2] + dx * (0:degree)) B0 <- spline.des(knots, c(min(xx), hat.psi, max(xx)), degree + 1)$design P <- tcrossprod(B0[2, ]) * 10^12 e <- rep(1, length(d)) ww <- (1/(abs(xx - hat.psi) + diff(range(xx))/100))^w it <- 0 while (!isTRUE(all.equal(e, rep(0, length(e))))) { v <- 1 * I(diff(d) > 0) E <- (10^12) * crossprod(D1 * sqrt(v)) #t(D1) %*%diag(v)%*%D1 # d.old <- d #a.new M <- crossprod(B * sqrt(ww)) + E + P #t(B)%*% B + E + P d <- drop(solve(M+.001*diag(ncol(M)), crossprod(B, ww * yy))) #d <- drop(solve(M, t(B)%*% yy)) e <- d - d.old it <- it + 1 if (it >= 20) break } #end_while fit <- drop(B %*% d) return(fit) } #========================================================================== miop<-function(x,y,xs=x, ys=y, h=FALSE,v=FALSE, only.lines=FALSE, top=TRUE, right=TRUE, col.h=grey(.6), col.v=col.h,...){ #disegna il calssico plot(x,y,..) e poi aggiunge le proiezioni orizzontali e/o verticali #x, y : vettori per cui disegnare il grafico #xs, ys: punti rispetto a cui disegnare le proiezioni (default a tutti) #h, v: disegnare le linee horizontal and vertical? #top: le linee v riportarle verso l'alto (TRUE) o il basso? #right: le linee horiz riportarle verso destra (TRUE) o sinistra? #only.lines: se TRUE disegna (aggiungendo in un plot *esistente*) solo le "proiezioni" (linee "v" e "h") if(only.lines) h<-v<-TRUE if(!only.lines) plot(x,y,type="l",...) # col.h<-col.v<-1:length(xs) if(v){ y0<- if(top) par()$usr[4] else par()$usr[3] segments(xs, y0, xs,ys, col=col.v, lty=3) } if(h){ x0<-if(right) par()$usr[2] else par()$usr[1] segments(xs,ys, x0,ys,col=col.h, lty=3, lwd=1.2) } invisible(NULL) } #========================================================================== f.Left<-function(x,y){ yy<-rev(y) xx<-rev(x) idList<-NULL while(any(diff(yy)<0)){ id<-which(diff(yy)<0)[1] idList[length(idList)+1]<- id+1 yy<-yy[-(id+1)] xx<-xx[-(id+1)] } r<-cbind(xx,yy) r } #========================================================================== f.Right<-function(x,y){ #elimina i valori che violano la monotonic xx<-x yy<-y idList<-NULL while(any(diff(yy)>0)){ id<-which(diff(yy)>0)[1] idList[length(idList)+1]<- id+1 yy<-yy[-(id+1)] xx<-xx[-(id+1)] } r<-cbind(xx,yy) r } #========================================================================== #========================================================================== #========================================================================== #browser() stat <- match.arg(stat) if (missing(sigma)) sigma <- summary.lm(obj.seg)$sigma if (cadj) use.z = TRUE zalpha <- if (use.z) -qnorm((1 - conf.level)/2) else -qt((1 - conf.level)/2, df = obj.seg$df.residual) if(!is.numeric(h)) stop(" 'h' should be numeric") if(sign(h)>=0) h<-abs(h[1]) Y <- obj.seg$model[, 1] #la risposta X <- obj.seg$model[, nomeZ] if(is.null(obj.seg$formulaLin)){ formula.lin<- update.formula(formula(obj.seg), paste(".~.", paste("-",paste(obj.seg$nameUV$V,collapse = "-")))) #remove *all* V variables formula.lin<- update.formula(formula.lin, paste(".~.-", nomeUj)) XREG <- model.matrix(formula.lin, data = obj.seg$model) } else { formula.lin <- obj.seg$formulaLin addU<-setdiff(obj.seg$nameUV$U, nomeUj) if(length(addU)>0) formula.lin <- update.formula(formula.lin, paste(". ~ . +", paste(addU, collapse =" + "))) XREG <- model.matrix(formula.lin, data = data.frame(model.matrix(obj.seg))) } if (ncol(XREG) == 0) XREG <- NULL nomePsij<-sub("U","psi", nomeUj) est.psi <- obj.seg$psi[nomePsij, "Est."] se.psi <- obj.seg$psi[nomePsij, "St.Err"] if (any(h < 0)) { all.range <- TRUE valori <- seq(quantile(X,probs=.05, names=FALSE), quantile(X,probs=.95, names=FALSE), l = n.values) } else { all.range <- FALSE valori <- seq(max(quantile(X,probs=.05, names=FALSE), est.psi - h * se.psi), min(quantile(X,probs=.95, names=FALSE), est.psi + h * se.psi), l = n.values) } n <- length(Y) min.X <- min(X) max.X <- max(X) if(!is.null(bw)) se.psi<-eval(parse(text=bw)) if (stat == "score") { U.valori <- u.psiXV(psi = valori, sigma = sigma, x = X, y = Y, XREG = XREG, is = is, se.psi = se.psi, scale = TRUE, pow = c(1, 1), fit.is = fit.is, altro = altro, cadj = cadj, var.is=var.is, transf=transf) statlab<-"Score statistic" if(plot && raw) U.raw <- u.psiXV(valori, sigma, X, Y, XREG, is=FALSE, scale=TRUE, pow = c(1, 1), fit.is=FALSE, altro =altro, cadj = cadj, var.is=FALSE, transf=transf) } else { U.valori <- gs.fn(X, Y, est.psi, sigma^2, valori, is = is, sepsi = se.psi, XREG = XREG, fit.is = fit.is, altro = altro, transf=transf, pow=c(1,1))[, 3] statlab<-"Gradient statistic" if(plot && raw) U.raw <- gs.fn(X, Y, est.psi, sigma^2, valori, is=FALSE, XREG=XREG, fit.is=FALSE, altro=altro, transf=transf)[,3] } if(any(is.na(U.valori))) { #stop("NA in the statistic values") warning("removing NA in the statistic values") valori<-valori[!is.na(U.valori)] U.valori<-U.valori[!is.na(U.valori)] } logit<-function(psi) log((psi-min(intv))/(max(intv)-psi)) logitInv<-function(kappa) (min(intv)+max(intv)*exp(kappa))/(1+exp(kappa)) intv<-quantile(X, probs=c(.02,.98),names=FALSE) if (stat == "gradient" && transf) { est.psi<- logit(est.psi) valori<- logit(valori) x.lab<- "kappa" } if(plot && !add) { x.lab<-"psi" if(raw) { plot(valori, U.raw, xlab=x.lab, ylab=statlab, type="l") points(valori, U.valori, xlab=x.lab, ylab=statlab, type="l", col=2) } else { plot(valori, U.valori, xlab=x.lab, ylab=statlab, type="l", col=2) } abline(h=0, lty=3) segments(est.psi,0, est.psi, -20, lty=2) } if(prod(range(U.valori))>=0) stop("the signs of stat at extremes are not discordant, increase 'h' o set 'h=-1' ") if(smooth==0){ #rimuovi i pezzi di U.valori decrescenti.. ####left valoriLeft<-valori[valori<=est.psi] #valori[U.valori>=0] UvaloriLeft<-U.valori[valori<=est.psi] #U.valori[U.valori>=0] vLeft<-f.Left(valoriLeft,UvaloriLeft) #rendi monotona la curva.. valori.ok<-vLeft[,1] Uvalori.ok<-vLeft[,2] f.interpL <- splinefun(Uvalori.ok, valori.ok, method="mono",ties=min) ####right valoriRight<-valori[valori>=est.psi] #valori[U.valori<0] UvaloriRight<-U.valori[valori>=est.psi] #U.valori[U.valori<0] vRight<-f.Right(valoriRight,UvaloriRight) valori.ok<-vRight[,1] Uvalori.ok<-vRight[,2] f.interpR <- splinefun(Uvalori.ok, valori.ok, method="mono",ties=min) } else { #if smooth>0 if(useSeg){ oseg<-try(suppressWarnings(segmented(lm(U.valori~valori), ~valori, psi=quantile(valori, c(.25,.75),names=FALSE), control=seg.control(n.boot=0, fix.npsi= FALSE))),silent=TRUE) #seg.lm.fit.boot(U.valori, XREG, Z, PSI, w, offs, opz) if(class(oseg)[1]=="try-error"){ oseg<-try(suppressWarnings(segmented(lm(U.valori~valori), ~valori, psi=quantile(valori, .5,names=FALSE), control=seg.control(n.boot=0))),silent=TRUE) } if(class(oseg)[1]=="segmented"){ if(plot) lines(valori, oseg$fitted, lty=3, lwd=1.5) soglie<-oseg$psi[,2] iid<-cut(valori,c(min(valori)-1000, soglie, max(valori)+1000), labels=FALSE) slopes<-cumsum(oseg$coef[2:(length(oseg$coef)-length(soglie))]) slopes<-rep(slopes,table(iid)) valori<-valori[slopes<=0] U.valori<-U.valori[slopes<=0] } } fr<-monotSmooth(valori,U.valori,est.psi,k=7) fr<- fr -(.2/diff(range(valori))) *(valori-mean(valori)) #add a small negative trend to avoid constant values in U.. vLeft<-cbind(valori[valori<=est.psi], fr[valori<=est.psi]) vRight<-cbind(valori[valori>=est.psi], fr[valori>=est.psi]) if(!all.range){ if( (min(valori)> intv[1]) && (fr[1]< max(zalpha))) return("errLeft") if( (max(valori)< intv[2]) && (fr[length(fr)]> min(-zalpha))) return("errRight") } f.interpL<-f.interpR<-splinefun(fr,valori,"m",ties=min) }#end_if smooth L<-f.interpL(zalpha) U<-f.interpR(-zalpha) #browser() #il valore che annulla lo IS score puo' essere differente dalla stima di segmented # quindi salviamo questo "delta": gli IC potrebbero essere aggiustati con IC+delta delta<- est.psi-f.interpL(0) #if(abs((f.interpL(0)-f.interpR(0))/f.interpR(0))>.001) if(plot){ if(!agg) delta<-0 #if(raw) plot(valori, U.raw, xlab="psi", ylab=statlab, type="l") else plot(valori, U.valori, xlab="psi", ylab=statlab, type="n") lines(vLeft, col=3); lines(vRight, col=3) vv<-seq(0,zalpha*1.2,l=50) lines(f.interpL(vv)+delta,vv, col=grey(.8, alpha=.6), lwd=4) vv<-seq(0,-zalpha*1.2,l=50) lines(f.interpR(vv)+delta,vv, col=grey(.8, alpha=.6), lwd=4) points(est.psi, 0, pch=19) miop(c(L,U)+delta,c(zalpha,-zalpha),only.lines=TRUE,top=FALSE, right=FALSE) } if (stat == "gradient" && transf) { L<-logitInv(L) U<-logitInv(U) } L<- pmax(L, quantile(X,probs=.02)) U<- pmin(U,quantile(X,probs=.98)) #r<-cbind(lower=L,upper=U) #rownames(r) <- paste(conf.level) #attr(r, "delta")<-delta r<-c(est.psi, L, U) return(r) } #end fn #-------------------------------------------------------------------------- #========================================================================== #========================================================================== #========================================================================== if(!all(class(obj) == c("segmented","lm"))) stop("A segmented lm object is requested") if(missing(parm)){ nomeZ<- parm<- obj$nameUV$Z } else { if(!all(parm %in% obj$nameUV$Z)) stop("invalid 'parm' ") nomeZ<-parm } if(length(parm)>1) { warning("There are multiple segmented terms. The first is taken", call.=FALSE, immediate. = TRUE) nomeZ<-parm[1] } nomiU.term<-grep(nomeZ, obj$nameUV$U, value=TRUE) #termini U per la *stessa* variabile.. #npsi.term<- length(nomiU.term) #no. di breakpoints for the same variable. ra<-matrix(NA, length(nomiU.term), 3) rownames(ra)<- nomiU.term for(U.j in nomiU.term){ if(any(c(d.h, h)<0)) { ra[U.j,]<-ci.IS(obj, nomeZ, U.j, h=-1, conf.level=level, ...) } d.h<-min(max(d.h, 1.5),10) a<-"start" it<-0 while(is.character(a)){ a<- try(ci.IS(obj, nomeZ, U.j, h=h, conf.level=level, ...), silent=TRUE) h<-h*d.h it<-it+1 #cat(it,"\n") if(it>=20) break } #browser() if(class(a)[1]=="try-error"){ nomePsij<-sub("U","psi", U.j) est.psi <- obj$psi[nomePsij, "Est."] X <- obj$model[, nomeZ] a<-c(est.psi, range(X)) warning("The profile Score is not decreasing enough.. returning the whole range as CI") } ra[U.j,]<-a } colnames(ra)<-c("Est.",paste("CI","(",level*100,"%",")",c(".low",".up"),sep="")) rownames(ra)<-sub("U","psi", nomiU.term) ra } #end fn confintSegIS #======================================================================================================= #========== inizio funzione #======================================================================================================= if(method=="delta"){ r<-confintSegDelta(object, parm, level, rev.sgn, var.diff, is, ...) } else { r<-confintSegIS(object, parm, stat=method, conf.level=level, ...) } r<-signif(r,digits) return(r) } segmented/R/summary.segmented.lme.r0000644000176200001440000001054014415477000017036 0ustar liggesuserssummary.segmented.lme<-function(object, .vcov=NULL, digits = max(3, getOption("digits") - 3), ...){ #quale misura di varianza residua ? pi? approriata e deve restituire? da lme.fit o lme.fit.noG #stesso discorso vale per residui, logLik (and friends) #browser() x<- object$lme.fit dd <- x$dims LL<- as.numeric(logLik(object)) #Quali residui? #resd <- resid(object$lme.fit, type = "pearson") resd <- resid(object$lme.fit.noG, type = "pearson") if (length(resd) > 5) { resd <- quantile(resd, na.rm = TRUE) names(resd) <- c("Min", "Q1", "Med", "Q3", "Max") } cat("Segmented mixed-effects model fit by ") cat(if (x$method == "REML") "REML\n" else "maximum likelihood\n") print(data.frame(AIC = AIC(object), BIC = BIC(object), logLik = LL, row.names = " "), ...) # if(!is.null(object$history.boot.restart)) { # n.sol<-length(unique(object$history.boot.restart[,"psi"])) # cat("\nFit based on", nrow(object$history.boot.restart), "boot samples,", n.sol, "different solutions found\n") # } if(!is.null(object$history.boot.restart)) { n.sol<-length(unique(object$history.boot.restart[,"psi"])) cat(" Bootstrap restarting on", nrow(object$history.boot.restart), "samples; ", n.sol, "different solution(s)\n") } cat("\n") print(summary(x$modelStruct), sigma = x$sigma, reEstimates = x$coef$random,...) # verbose = verbose, ...) cat("Fixed effects:\n ") # fixF <- x$call$fixed # if (inherits(fixF, "formula") || is.call(fixF)) { # cat(deparse(x$call$fixed), "\n") # } else { # cat(deparse(lapply(fixF, function(el) as.name(deparse(el)))), "\n") # } nomeZ <- object$namesGZ$nameZ nomiZ <- c(paste(nomeZ,":",sep=""), paste(":",nomeZ,sep="")) nomiZ <- unlist(sapply(nomiZ, function(.x) grep(.x, names(fixef(object$lme.fit)), value=TRUE))) nomiZ<-c(nomeZ, nomiZ) nomiDiffSlope <- c("U", object$namesGZ$nomiUx) nomiPsi <-c("G0", object$namesGZ$nomiG) nomiTutti <- names(fixef(object$lme.fit)) nomiLin <- setdiff(nomiTutti, c(nomiZ, nomiDiffSlope, nomiPsi)) nominoG<-setdiff(nomiTutti, nomiPsi) a<-summary(x)$tTable # a[nomiPsi,2:5]<-NA # coefAll<- a[,"Value"] # coefAll[nominoG] a[nominoG,"Value"] <- fixef(object$lme.fit.noG)[nominoG] a[,"t-value"]<-a[,"Value"]/a[,"Std.Error"] a[,"p-value"]<-2*pt(-abs(a[,"t-value"]), df=a[,"DF"]) a[,4]<- round(a[,4], digits=3) id.leftS <- nomiZ %in% rownames(a) nomiOrd<- if(any(id.leftS)) c(nomiLin, nomiZ, nomiDiffSlope, nomiPsi ) else c(nomiLin, nomiDiffSlope, nomiPsi ) a<- a[nomiOrd,] a["U", 5]<- NA #pValue per U a["G0",4:5]<-NA if(!is.null(.vcov)){ a[, "Std.Error"]<- sqrt(diag(.vcov))[nomiOrd] a[,"t-value"]<-a[,"Value"]/a[,"Std.Error"] a[,"p-value"]<-2*pt(-abs(a[,"t-value"]), df=a[,"DF"]) } #a1<-rbind(a[nomiLin,],0,a[nomiZ,],0, a[nomiDiffSlope,],0, a[nomiPsi,]) a1<-rbind(a[nomiLin,], NA, if(any(id.leftS)) a[nomiZ,] else NA, NA, a[nomiDiffSlope,], NA, a[nomiPsi,]) #rownames(a1)<-c(nomiLin, "----", nomiZ, "----", nomiDiffSlope, "----", nomiPsi ) rownames(a1)<-c(nomiLin, "-- leftS:", nomiZ, "-- diffS:", nomiDiffSlope, "-- break:", nomiPsi ) if(!any(id.leftS)) a1[nomiZ, 1]<-0 a1[,5]<- round(a1[,5],4) #print(a1, na.print="", digits=4) print(a1, na.print="", digits=4, zero.print = "0") cat(" psi.link =", object$call$psi.link, "\n") #print(a1, zero.print="") cat("\nStandardized Within-Group Residuals:\n") print(resd, ...) cat("\nNumber of Observations:", x$dims[["N"]]) cat("\nNumber of Groups: ") Ngrps <- dd$ngrps[1:dd$Q] if((lNgrps <- length(Ngrps)) == 1) { cat(Ngrps, "\n") } else { sNgrps <- 1:lNgrps aux <- rep(names(Ngrps), sNgrps) aux <- split(aux, array(rep(sNgrps, lNgrps), c(lNgrps, lNgrps))[!lower.tri(diag(lNgrps))]) names(Ngrps) <- unlist(lapply(aux, paste, collapse = " %in% ")) cat("\n") print(rev(Ngrps), ...) } # if(!is.null(object$history.boot.restart)) { # n.sol<-length(unique(object$history.boot.restart[,"psi"])) # cat("\nFit based on", nrow(object$history.boot.restart), "boot samples,", n.sol, "different solutions found\n") # } invisible(object) } segmented/R/segmented.Arima.r0000644000176200001440000004043014610436217015621 0ustar liggesuserssegmented.Arima<- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...) { #Richiede control$f.obj that should be a string like "sum(x$residuals^2)" or "x$dev" #----------------- build.all.psi<-function(psi, fixed.psi){ all.names.psi<-union(names(psi),names(fixed.psi)) all.psi<-vector("list", length=length(all.names.psi)) names(all.psi)<- all.names.psi for(i in names(all.psi)) { if(!is.null(psi[[i]])){ psi[[i]]<-sort(psi[[i]]) names(psi[[i]])<-paste("U",1:length(psi[[i]]),".",i,sep="") } if(!is.null(fixed.psi[[i]])){ fixed.psi[[i]]<-sort(fixed.psi[[i]]) names(fixed.psi[[i]])<- paste("U",1:length(fixed.psi[[i]]),".fixed.",i,sep="") } all.psi[[i]]<-sort(c(psi[[i]],fixed.psi[[i]])) } return(all.psi) } ##===inizio funzione dpmax<-function(x,y,pow=1){ #deriv pmax if(pow==1) -(x>y) #ifelse(x>y, -1, 0) else -pow*((x-y)*(x>y))^(pow-1)#-pow*pmax(x-y,0)^(pow-1) } #----------- # n.Seg<-1 # if(missing(seg.Z) && length(all.vars(o$call$xreg))==1) seg.Z<- as.formula(paste("~", all.vars(o$call$xreg))) # if(missing(psi)){if(length(all.vars(seg.Z))>1) stop("provide psi") else psi<-Inf} # if(length(all.vars(seg.Z))>1 & !is.list(psi)) stop("`psi' should be a list with more than one covariate in `seg.Z'") # if(is.list(psi)){ # if(length(all.vars(seg.Z))!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") # if(any(is.na(match(all.vars(seg.Z),names(psi), nomatch = NA)))) stop("Variables in `seg.Z' and `psi' do not match") # n.Seg <- length(psi) # } # if(length(all.vars(seg.Z))!=n.Seg) stop("A wrong number of terms in `seg.Z' or `psi'") if(missing(seg.Z)) { nomeX<- intersect(paste(obj$call$xreg), names(obj$coef)) if(length(nomeX)==1) seg.Z<- as.formula(paste("~",nomeX)) else stop("please specify 'seg.Z'") #if(length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } #browser() if("V" %in% sub("V[1-9]*[0-9]","V", c(all.vars(seg.Z), names(coef(obj))))) stop("variable names 'V', 'V1', .. are not allowed") if("U" %in% sub("U[1-9]*[0-9]","U", c(all.vars(seg.Z), names(coef(obj))))) stop("variable names 'U', 'U1', .. are not allowed") if(any(c("$","[") %in% all.names(seg.Z))) stop(" '$' or '[' not allowed in 'seg.Z' ") n.Seg<-length(all.vars(seg.Z)) id.npsi<-FALSE if(missing(psi)) { if(n.Seg==1){ if(missing(npsi)) npsi<-1 npsi<-lapply(npsi, function(.x).x) if(length(npsi)!=length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi)<-all.vars(seg.Z) } else {#se n.Seg>1 #if(missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call.=FALSE) if (missing(npsi)) { npsi<-rep(1, n.Seg) names(npsi)<-all.vars(seg.Z) } if(length(npsi)!=n.Seg) stop(" 'npsi' and seg.Z should have the same length") if(!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi<-lapply(npsi, function(.x) rep(NA,.x)) id.npsi<-TRUE ##id.npsi<-FALSE #e' stato fornito npsi? } else { if(n.Seg==1){ if(!is.list(psi)) {psi<-list(psi);names(psi)<-all.vars(seg.Z)} } else {#se n.Seg>1 if(!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if(n.Seg!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if(!all(names(psi)%in%all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } fc<- min(max(abs(control$fc),.8),1) min.step<-control$min.step alpha<-control$alpha it.max <- old.it.max<- control$it.max digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } break.boot=control$break.boot n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow conv.psi<-control$conv.psi visual <- control$visual visualBoot<-FALSE if(visual && n.boot>0) {visual<-FALSE; visualBoot<-TRUE} # if(n.boot>0){ # if(!is.null(control$seed)) { # set.seed(control$seed) # employed.Random.seed<-control$seed # } else { # employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) # set.seed(employed.Random.seed) # } # if(visual) {visual<-FALSE; visualBoot<-TRUE}# warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} # if(!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") # } last <- control$last K<-control$K h<-control$h # if(h<1) it.max<-it.max+round(it.max/2) name.Z <-all.vars(seg.Z) if(length(name.Z)!=n.Seg) stop("errore strano 1") Z<-sapply(name.Z, function(xx) eval(parse(text=xx))) #e' sempre una matrice if(length(name.Z)!=ncol(Z)) stop("errore strano 2") n<-nrow(Z) n.psi<- length(unlist(psi)) ################# #if(ncol(Z)==1 && length(psi)==1 && n.psi==1 && !any(is.na(psi))) { if(psi==Inf) psi<-median(Z)} ################# if(ncol(Z)==1 && is.vector(psi) && (is.numeric(psi)||is.na(psi))){ psi <- list(as.numeric(psi)) names(psi)<-name.Z } if (!is.list(psi) || is.null(names(psi))) stop("psi should be a *named* list") id.nomiZpsi <- match(colnames(Z), names(psi)) if ((ncol(Z)!=length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of Z and psi do not match") nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] if(id.npsi){ for(i in 1:length(psi)) { K<-length(psi[[i]]) if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) {quantile(Z[,i], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE)} else {(min(Z[,i])+ diff(range(Z[,i]))*(1:K)/(K+1))} } } else { for(i in 1:length(psi)) { if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) {quantile(Z[,i], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE)} else {(min(Z[,i])+ diff(range(Z[,i]))*(1:K)/(K+1))} } } ###### se ci sono fixed.psi id.psi.fixed <- FALSE if(!is.null(fixed.psi)){ id.psi.fixed <- TRUE if(is.numeric(fixed.psi) && n.Seg==1) { fixed.psi<-list(fixed.psi) names(fixed.psi)<-all.vars(seg.Z) } if(is.list(fixed.psi)) { if(!(names(fixed.psi) %in% all.vars(seg.Z))) stop("names(fixed.psi) is not a subset of variables in 'seg.Z' ") } else { stop(" 'fixed.psi' has to be a named list ") } fixed.psi<-lapply(fixed.psi, sort) Zfixed<-matrix(unlist(mapply(function(x,y)rep(x,y),Z[names(fixed.psi)], sapply(fixed.psi, length), SIMPLIFY = TRUE)), nrow=n) n.fixed.psi<-sapply(fixed.psi, length) rip.nomi <- rep( names(fixed.psi), n.fixed.psi) rip.numeri <- unlist(lapply(n.fixed.psi, function(.x) 1:.x)) colnames(Zfixed) <- paste("U", rip.numeri,".fixed.",rip.nomi, sep="") PSI <- matrix(unlist(fixed.psi), ncol=ncol(Zfixed), nrow=n, byrow = TRUE) fixedU<-(Zfixed-PSI)*(Zfixed>PSI) XREG<-cbind(XREG, fixedU) } initial.psi<-psi a <- sapply(psi, length) #per evitare che durante il processo iterativo i psi non siano ordinati id.psi.group <- rep(1:length(a), times = a) #identificativo di apparteneza alla variabile Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n,byrow = TRUE) #negli altri metodi Z e' una lista per cui la linea di sopra diventa #Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n) colnames(Z) <- nomiZ.vett <- rep(nome, times = a) #SERVE??? si perche' Z e' senza colnames psi <- unlist(psi) #se psi e' numerico, la seguente linea restituisce i valori ordinati all'interno della variabile.. psi<-unlist(tapply(psi,id.psi.group,sort)) k <- ncol(Z) PSI <- matrix(rep(psi, rep(n, k)), ncol = k) #controllo se psi e' ammissibile.. c1 <- apply((Z <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Z >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") #ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], function(xxx) {1:xxx}))) ripetizioni <- as.vector(unlist(tapply(id.psi.group, id.psi.group, function(x) 1:length(x) ))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ.vett, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ.vett, sep = ".") nnomi <- c(nomiU, nomiV) XREG<-eval(obj$call$xreg) if(!is.null(XREG)){ #se ci sono factor? nomiXREG<-setdiff(names(obj$coef),c("intercept", paste("ar",1:100,sep=""), paste("ma",1:100,sep=""), paste("sma",1:100,sep=""), paste("sar",1:100,sep=""))) XREG<-matrix(XREG, ncol=length(nomiXREG)) colnames(XREG)<-nomiXREG #if((""%in%colnames(XREG)) || (" "%in%colnames(XREG))) stop("all columns in the matrix 'xreg' of 'obj' should be named.. ") if(length(nomiXREG) != ncol(XREG)) stop("ncol(XREG) does not match names of regression coefficients") } mio.init<-mio.init.noV<-NULL X<-NULL call.ok <- update(obj, xreg = X, init=mio.init, evaluate=FALSE) #ho messo X, piuttosto che cbind(XREG,U,V) call.noV <- update(obj, xreg = cbind(XREG,U), init=mio.init.noV, evaluate=FALSE) #, data = mfExt) #objF <- update(obj0, formula = Fo, data = KK) # call.noV <- update(obj, formula = Fo.noV, evaluate=FALSE, data = mfExt) #objF <- update(obj0, formula = Fo, data = KK) if (it.max == 0) { U<-(Z-PSI)*(Z>PSI) colnames(U)<-nomiU obj1 <- eval(call.noV) #, envir=mfExt) return(obj1) } #obj1 <- eval(call.ok, envir=mfExt) initial <- psi obj0 <- obj dev0<- -obj$loglik if(is.na(dev0)) dev0<-10 list.obj <- list(obj) nomiOK<-nomiU if(is.null(alpha)) alpha<- max(.05, 1/nrow(PSI)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) opz<-list(toll=toll,h=h,stop.if.error=stop.if.error,dev0=dev0,visual=visual,it.max=it.max, nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, fc=fc, seed=control$seed, tol.opt=control$tol.opt, pLin=length(obj$coef), min.n=control$min.n) #pLin sono i termini lineari gia' nel modello (interc, ar, termini in xreg..) opz$call.ok<-call.ok opz$call.noV<-call.noV opz$nomiU<-nomiU opz$nomiV<-nomiV if(n.boot<=0){ obj<- seg.Ar.fit(obj, XREG, Z, PSI, opz) } else { obj<- seg.Ar.fit.boot(obj, XREG, Z, PSI, opz, n.boot=n.boot, size.boot=size.boot, random=random, break.boot=break.boot) #jt, nonParam seed <- obj$seed } if(!is.list(obj)){ warning("No breakpoint estimated", call. = FALSE) return(obj0) } it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart U<-obj$U V<-obj$V id.warn<-obj$id.warn id.psi.group<-obj$id.psi.group nomiU<-nomiOK<-obj$nomiOK #sarebbe nomiU #-- nomiVxb<-sub("U","psi", nomiOK) #nomiVxb<-paste("psi",sapply(strsplit(nomiOK,"U"), function(x){x[2]}), sep="") nomiFINALI<-unique(sub("U[1-9]*[0-9].", "", nomiOK)) #nomiFINALI<-unique(sapply(strsplit(nomiOK, split="[.]"), function(x)x[2])) #nomi delle variabili con breakpoint stimati! #se e' stata usata una proc automatica "nomiFINALI" sara' differente da "name.Z" nomiSenzaPSI<-setdiff(name.Z,nomiFINALI) if(length(nomiSenzaPSI)>=1) warning("no breakpoints found for: ", paste(nomiSenzaPSI," "), call. = FALSE) #########========================= SE PSI FIXED psi.list<-vector("list", length=length(unique(name.Z))) names(psi.list)<-unique(name.Z) names(psi)<- nomiZ.vett for(i in names(psi.list)){ psi.list[[i]]<-psi[names(psi)==i] } #if(any(table(rowSums(V))<=1)) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close") for(jj in colnames(V)) { VV<-V[, which(colnames(V)==jj), drop=FALSE] sumV<-abs(rowSums(VV)) if( #(any(diff(sumV)>=2)|| #se ci sono due breakpoints uguali any(table(sumV)<=1) && stop.if.error) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") } rangeZ<-obj$rangeZ obj<-obj$obj k<-length(psi) all.coef<-obj$coef #coef(obj) names(all.coef)<-c(names(obj0$coef), nomiU, nomiVxb) beta.c<- all.coef[nomiU] Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) nnomi <- c(nomiU, nomiVxb) XREG.ok<-cbind(XREG, U, Vxb) colnames(XREG.ok)[((ncol(XREG.ok)-length(nnomi)+1):ncol(XREG.ok))]<- nnomi #se fixed.psi if(id.psi.fixed){ XREG.ok<-cbind(XREG.ok, fixedU) } objF <- update(obj0, xreg = XREG.ok, evaluate=TRUE) # #se usi una procedura automatica devi cambiare ripetizioni, nomiU e nomiV, e quindi: # length.psi<-tapply(as.numeric(as.character(names(psi))), as.numeric(as.character(names(psi))), length) if(any(is.na(objF$coef)) && stop.if.error){ stop("at least one coef estimate is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE) } names.coef <- names(coef(objF)) #names(obj$coef)<- names.coef# all.coef ha gia' i nomi.. objF$coef[names.coef]<-all.coef[names.coef] objF$residuals<- obj$residuals objF$loglik<-obj$loglik objF$sigma2 <-obj$sigma2 objF$aic <- obj$aic + 2*k if(any(is.na(objF$coef))){ stop("some estimate is NA: premature stopping with a large number of breakpoints?", call. = FALSE) } Cov<-objF$var.coef vv<- Cov[nomiVxb, nomiVxb, drop=FALSE] ris.psi<-matrix(NA,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi ris.psi[,3]<-sqrt(diag(vv)) a<-tapply(id.psi.group, id.psi.group, length) #ho sovrascritto "a" di sopra, ma non dovrebbe servire.. a.ok<-NULL for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } # initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) if(stop.if.error) ris.psi[,1]<-initial objF$Z <- Z objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- it objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) #Z = name.Z objF$id.group <- if(length(name.Z)<=1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn ###########################PSI FIXED objF$indexU<-build.all.psi(psi.list, fixed.psi) objF$psi[,"Initial"]<-NA if(n.boot>0) objF$seed<-seed class(objF) <- c("segmented", class(obj0)) list.obj[[length(list.obj) + 1]] <- objF class(list.obj) <- "segmented" if (last) list.obj <- list.obj[[length(list.obj)]] # warning("'segmented.Arima' is at a preliminary stage. Estimates are OK, but the '*.segmented' methods are not expected to work", # call.=FALSE) return(list.obj) } #end function segmented/R/segmented.numeric.R0000644000176200001440000002746214613736243016211 0ustar liggesusers`segmented.numeric` <- function(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, adjX=FALSE, weights=NULL, ...) { #sparse=FALSE, build.all.psi<-function(psi, fixed.psi){ all.names.psi<-union(names(psi),names(fixed.psi)) all.psi<-vector("list", length=length(all.names.psi)) names(all.psi)<- all.names.psi for(i in names(all.psi)) { if(!is.null(psi[[i]])){ psi[[i]]<-sort(psi[[i]]) names(psi[[i]])<-paste("U",1:length(psi[[i]]),".",i,sep="") } if(!is.null(fixed.psi[[i]])){ fixed.psi[[i]]<-sort(fixed.psi[[i]]) names(fixed.psi[[i]])<- paste("U",1:length(fixed.psi[[i]]),".fixed.",i,sep="") } all.psi[[i]]<-sort(c(psi[[i]],fixed.psi[[i]])) } return(all.psi) } ##===inizio funzione if(!(is.vector(obj) || is.ts(obj))) stop(" 'obj' should be a numerical/ts vector ") #if(!is.vector(obj)) stop(" 'obj' should be a numerical vector ") y <- obj n <- length(y) #browser() if(missing(seg.Z)) { if(is.ts(obj)){ Tsp<-tsp(obj) x<-seq(Tsp[1], Tsp[2], length=length(y) ) min.x<- min(x) name.Z <- "Time" if(is.null(adjX)) { adjX<- if(min.x>=1000) TRUE else FALSE } if(adjX) x<- x - min.x } else { x<-1:n/n name.Z <- "index" adjX<-FALSE } } else { x<-eval(parse(text=all.vars(seg.Z))) name.Z <- all.vars(seg.Z) adjX= FALSE } #browser() if(!missing(seg.Z) && length(all.vars(seg.Z))>1) stop(" multiple seg.Z not allowed here: use 'segmented.(g)lm or segreg") Fo0<-as.formula(paste(deparse(substitute(obj)), " ~ ", name.Z, sep="")) #Fo0<-as.formula(paste(deparse(substitute(obj, env = parent.frame())), " ~ ", name.Z, sep="")) #qui mantiene il nome 'obj' #browser() y.only.vector <- TRUE alpha<-control$alpha if(is.null(alpha)) alpha<- max(.05, 1/length(y)) if(length(alpha)==1) alpha<-c(alpha, 1-alpha) #browser() if(missing(psi)){ if(missing(npsi)) npsi<-1 #stop(" psi or npsi have to be provided ") #psi <- seq(min(x), max(x), l=npsi+2)[-c(1, npsi+2)] #psi[[i]]<-(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1)) qx <- quantile(x, probs=c(alpha, 1-alpha), names = FALSE) psi <- seq(qx[1], qx[2], l=npsi+2)[-c(1, npsi+2)] } else { npsi<-length(psi) } a<- npsi initial.psi<-psi Z <- matrix(x, ncol=npsi, nrow=n, byrow = FALSE) XREG <- cbind(1,x) PSI<-matrix(psi, ncol=a, nrow=n, byrow = TRUE) nomiU<-paste("U", 1:a, ".", name.Z,sep="") nomiV<-paste("V", 1:a, ".", name.Z,sep="") colnames(Z)<-nomiZ<-rep(name.Z, a) id.psi.group <- rep(1:length(a), times = a) orig.call<-NULL dev0<- n*var(y) #sum(mylm(x.lin, y, ww)$residuals^2*ww) rangeZ <- apply(Z, 2, range) if(is.null(weights)) { id.weights <- FALSE weights<-rep(1,n) } else { id.weights <- TRUE } fc<- min(max(abs(control$fc),.8),1) min.step<-control$min.step it.max <- old.it.max<- control$it.max digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) visual <- control$visual stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } break.boot=control$break.boot n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow conv.psi<-control$conv.psi visual <- control$visual visualBoot<-FALSE if(visual && n.boot>0) {visual<-FALSE; visualBoot<-TRUE} # if(n.boot>0){ # if(!is.null(control$seed)) { # set.seed(control$seed) # employed.Random.seed<-control$seed # } else { # employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) # set.seed(employed.Random.seed) # } # if(visual) {visual<-FALSE; visualBoot<-TRUE}# warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} # # if(!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") # } last <- control$last K<-control$K h<-control$h #============================================ # ATTENZIONE devi costruire il mf con i pesi e la orig call? #============================================ invXtX=NULL Xty<-NULL nomiOK<-nomiU #browser() #for(.i in unique(colnames(Z))) XREG[,.i]<- XREG[,.i] - min(XREG[,.i]) opz<-list(toll=toll,h=h, stop.if.error=stop.if.error, dev0=dev0, visual=visual, it.max=it.max, nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits,invXtX=invXtX, Xty=Xty, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, fc=fc, id.weights=id.weights, seed=control$seed, min.n=control$min.n) if(n.boot<=0){ #obj<- if(sparse) seg.num.spar.fit(y, XREG, Z, PSI, weights, opz) else seg.num.fit(y, XREG, Z, PSI, weights, opz) obj<- seg.num.fit(y, XREG, Z, PSI, weights, opz) } else { obj<-seg.num.fit.boot(y, XREG, Z, PSI, weights, opz, n.boot=n.boot, size.boot=size.boot, random=random, break.boot=break.boot) #, sparse=sparse) #jt, nonParam seed <- obj$seed } if(!is.list(obj)){ warning("No breakpoint estimated", call. = FALSE) return(y) } #browser() #if(obj$obj$df.residual==0) warning("no residual degrees of freedom (other warnings expected)", call.=FALSE) id.psi.group<-obj$id.psi.group nomiOK<-obj$nomiOK #nomiFINALI<-unique(sapply(strsplit(nomiOK, split="[.]"), function(x)x[2])) #nomi delle variabili con breakpoint stimati! #nomiFINALI<-sub("U[1-9].", "", nomiOK) #nomi originali delle variabili con breakpoint stimati! nomiFINALI<- unique(sub("U[1-9]*[0-9].", "", nomiOK)) #se e' stata usata una proc automatica "nomiFINALI" sara' differente da "name.Z" nomiSenzaPSI<-setdiff(name.Z,nomiFINALI) if(length(nomiSenzaPSI)>=1) warning("no breakpoints found for: ", paste(nomiSenzaPSI," "), call. = FALSE) it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart U<-obj$U V<-obj$V id.warn<-obj$id.warn rangeZ<-obj$rangeZ idU <- obj$idU idV <- obj$idV obj<-obj$obj k<-length(psi) beta.c<-coef(obj)[idU]# [paste("U", 1:ncol(U), sep = "")] #psi.values[[length(psi.values) + 1]] <- psi #non c'e' bisogno! Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) #se usi una procedura automatica devi cambiare ripetizioni, nomiU e nomiV, e quindi: length.psi<-tapply(as.numeric(as.character(names(psi))), as.numeric(as.character(names(psi))), length) #forma.nomiU <-function(xx,yy)paste("U",1:xx, ".", yy, sep="") #forma.nomiVxb <-function(xx,yy)paste("psi",1:xx, ".", yy, sep="") #nomiU <- unlist(mapply(forma.nomiU, length.psi, name.Z)) #invece di un ciclo #paste("U",1:length.psi[i], ".", name.Z[i]) #nomiVxb <- unlist(mapply(forma.nomiVxb, length.psi, name.Z)) #nomiU <- unlist(mapply(forma.nomiU, length.psi, nomiFINALI)) #invece di un ciclo #paste("U",1:length.psi[i], ".", name.Z[i]) #nomiVxb <- unlist(mapply(forma.nomiVxb, length.psi, nomiFINALI)) nomiVxb <- sub("U","psi", nomiU) #########========================= SE PSI FIXED psi.list<-vector("list", length=length(unique(nomiZ))) names(psi.list)<-unique(nomiZ) #names(psi)<-nomiZ #se e' una procedure automatica nomiZ puo essere piu lungo dei breakpoints "rimasti" names(psi)<-rep(nomiFINALI, length.psi) for(i in names(psi.list)){ psi.list[[i]]<-psi[names(psi)==i] } #browser() #mf<- model.frame(update.formula(Fo0, .~ x)) mf<- data.frame(y,x) names(mf)<-all.vars(Fo0) for(i in 1:ncol(U)) { #mfExt[nomiU[i]]<- mf[nomiU[i]]<-U[,i] #mfExt[nomiVxb[i]]<- mf[nomiVxb[i]]<-Vxb[,i] } nnomi <- c(nomiU, nomiVxb) #Fo <- update.formula(Fo0, as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) #se c'e' un "y[-1]", la seguente linea modifica il nome in "y".. Fo <- update.formula(Fo0, as.formula(paste(paste(all.vars(Fo0)[1]),"~.+", paste(nnomi, collapse = "+")))) #mf <- eval(mf, parent.frame()) #forse NON serve.. mf c'e'.. objF <-lm(Fo, data=mf, weights=weights) #browser() isNAcoef<-any(is.na(objF$coefficients)) if(isNAcoef){ if(stop.if.error) { cat("breakpoint estimate(s):", as.vector(psi),"\n") stop("at least one coef is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE) } else { warning("some estimate is NA: too many breakpoints? 'var(hat.psi)' cannot be computed \n ..returning a 'lm' model", call. = FALSE) Fo <- update.formula(Fo0, as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) #objF <- update(obj0, formula = Fo, evaluate=TRUE, data = mf) objF <-lm(Fo, weights=weights, data=mf) names(psi)<-nomiVxb objF$psi<-psi return(objF) } } #browser() objF$coefficients[names(objF$coefficients)] <- obj$coefficients #sostituisce tutti i coeff objF$residuals<- as.numeric(obj$residuals) objF$fitted.values<- y- as.numeric(obj$residuals) #as.numeric(obj$fitted.values) #y-obj$residuals Cov <- vcov(objF) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) #if(length(initial)!=length(psi)) initial<-rep(NA,length(psi)) a<-tapply(id.psi.group, id.psi.group, length) #ho sovrascritto "a" di sopra, ma non dovrebbe servire.. ris.psi<-matrix(NA,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi ris.psi[,3]<-sqrt(vv) #NB "a" deve essere un vettore che si appatta con "initial.psi" per ottnetere "initial" sotto... Se una variabile alla fine risulta # senza breakpoint questo non avviene e ci sono problemi nella formazione di "initial". Allora costruisco a.ok a.ok<-NULL for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } #initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) if(stop.if.error) ris.psi[,1]<-initial #psi <- cbind(initial, psi, sqrt(vv)) #rownames(psi) <- colnames(Cov)[id] #browser() objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- it objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) #Z = name.Z objF$id.group <- if(length(name.Z)<=1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn objF$orig.call<- update(objF, Fo0, evaluate=FALSE) objF$indexU<-build.all.psi(psi.list, fixed.psi) objF$psi[,"Initial"]<-NA if(model) objF$model <- mf #objF$mframe <- data.frame(as.list(KK)) if(n.boot>0) objF$seed<-seed class(objF) <- c("segmented", "lm") return(objF) } segmented/R/fixef.segmented.lme.r0000644000176200001440000000032114415477002016440 0ustar liggesusersfixef.segmented.lme<-fixed.effects.segmented.lme<-function(object,...) { b.all<-object$lme.fit$coefficients$fixed b.noG<-object$lme.fit.noG$coefficients$fixed b.all[names(b.noG)]<- b.noG b.all } segmented/R/aapc.r0000644000176200001440000000752414427415666013544 0ustar liggesusersaapc<-function(ogg, parm, exp.it=FALSE, conf.level=0.95, wrong.se=TRUE, .vcov=NULL, .coef=NULL, ...){ blockdiag <- function(...) { args <- list(...) nc <- sapply(args,ncol) cumnc <- cumsum(nc) ## nr <- sapply(args,nrow) ## NR <- sum(nr) NC <- sum(nc) rowfun <- function(m,zbefore,zafter) { cbind(matrix(0,ncol=zbefore,nrow=nrow(m)),m, matrix(0,ncol=zafter,nrow=nrow(m))) } ret <- rowfun(args[[1]],0,NC-ncol(args[[1]])) for (i in 2:length(args)) { ret <- rbind(ret,rowfun(args[[i]],cumnc[i-1],NC-cumnc[i])) } ret } COV <- if(is.null(.vcov)) vcov(ogg,...) else .vcov estcoef<- if(is.null(.coef)) coef(ogg) else .coef if(missing(parm)) { nomeZ<- ogg$nameUV$Z # if(length(rev.sgn)==1) rev.sgn<-rep(rev.sgn,length(nomeZ)) } else { if(! all(parm %in% ogg$nameUV$Z)) {stop("invalid parm")} else {nomeZ<-parm} } #for(i in 1:length(nomeZ)) { term<-nomeZ[1] nomi.psi<- grep(paste("\\.",term,sep=""), ogg$nameUV$V, value=TRUE) nomi.dslope<- grep(paste("\\.",term,sep=""), ogg$nameUV$U,value=TRUE) null.left<-TRUE if(term %in% names(estcoef)) { nomi.dslope<-c(term, nomi.dslope) null.left<-FALSE } a<- min(ogg$rangeZ[,parm])# min(x)-1 #se discreto b<- max(ogg$rangeZ[,parm]) est.slope <- slope(ogg, parm, .vcov=.vcov, .coef=.coef)[[1]][,1] est.psi <- ogg$psi[nomi.psi,2] est.w<- diff(c(a,est.psi,b))/(b-a) #drop(B%*%c(a,est.psi,b)) k<- length(est.psi)#n.changepoints A<-matrix(0,k+1,k+1) A[row(A)>=col(A)]<-1 B<-diff(diag(k+2),diff=1)/(b-a) mu<-drop(crossprod(est.w,est.slope)) xsi<-c(crossprod(est.w,A),crossprod(est.slope,B)) #browser() if(is.null(ogg$constr)){ cof <- estcoef[nomi.dslope] if(!term%in%names(estcoef)) Xfit<-Xfit[,-1,drop=FALSE] } else { #idU.i <- match(nomeU.i, names(estcoef)) #cosa srevono le slope? o le diffSlope? cof<- drop(ogg$constr$invA.RList[[match(parm, ogg$nameUV$Z)]]%*%estcoef[nomi.dslope]) names(cof)<-c(parm, paste("U",1:(length(cof)-1),".",parm,sep="" )) } v.DeltaPsi<-COV[c(nomi.dslope,nomi.psi),c(nomi.dslope,nomi.psi)] if(!is.null(ogg$constr)){ B <- ogg$constr$invA.RList[[match(parm, ogg$nameUV$Z)]] B <- do.call(blockdiag, list(B, diag(length(est.psi)))) v.DeltaPsi <- B %*% v.DeltaPsi %*% t(B) } rownames(v.DeltaPsi) <- colnames(v.DeltaPsi) <- c(names(cof),nomi.psi) v.delta <- v.DeltaPsi[names(cof),names(cof)] VC<-v.DeltaPsi[nomi.psi, names(cof)] v.psi<-as.matrix(COV[nomi.psi,nomi.psi]) # if(null.left) v.delta<-rbind(0,cbind(0,v.delta)) #v.delta<-vcov(ogg)[2:4,2:4] #questa e' la var cov della left slope e le altre diffSlope #v.psi<-vcov(ogg)[5:6,5:6] #questa e' la var-cov dei psi VV<-blockdiag(v.delta,diag(1)*0,v.psi,diag(1)*0) id.cov1<- 1:length(est.slope) id.cov2<- seq.int((length(est.slope)+2), length.out=length(est.psi)) if(null.left && is.null(ogg$constr)) { # VC<-cbind(0,VC) #column relevant to the "x" term (missing) VV<- rbind(0,cbind(0,VV)) } VV[id.cov2,id.cov1]<-VC VV[id.cov1,id.cov2]<-t(VC) #VV[5:6,1:3]<-vcov(os)[5:6,2:4] #VV[1:3,5:6]<-vcov(os)[2:4,5:6] se.mu<-sqrt(drop(xsi%*%VV%*%xsi)) z<-abs(qnorm((1-conf.level)/2)) r<-c(Est=mu, St.Err=se.mu, mu+c(-z,z)*se.mu) cin <- paste("CI", "(", conf.level * 100, "%", ")", c(".l", ".u"), sep = "") names(r)<-c("Est.","St.Err",cin) if(wrong.se){ if(null.left && is.null(ogg$constr)) v.delta<-rbind(0,cbind(0, v.delta)) se.mu.wrong<- sqrt(drop(t(est.w)%*%A%*%v.delta%*%t(A)%*%est.w)) attr(r,"wrong.se")<- se.mu.wrong } if(exp.it) r<- exp(r[-2])-1 r } segmented/R/step.num.fit.r0000644000176200001440000002721214726071237015160 0ustar liggesusersstep.num.fit<-function(y, x.lin, Xtrue, PSI, ww, opz, return.all.sol=FALSE){ #---------------------- #---------------------- search.minWO<-function(h, psi, psi.old, X, y, w, id.fix.psi=NULL) { psi.ok<- psi*h + psi.old*(1-h) psi.ok[id.fix.psi]<- psi.old[id.fix.psi] PSI <- matrix(psi.ok, n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Xtrue>PSI) #(Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylmWO(cbind(X, U1), y, w), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(.lm.fit(cbind(X, U1), y), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 #r<-sum(obj1$residuals^2 * w) L1 } search.min<-function(h, psi, psi.old, X, y, w, id.fix.psi=NULL) { psi.ok<- psi*h + psi.old*(1-h) psi.ok[id.fix.psi]<- psi.old[id.fix.psi] PSI <- matrix(psi.ok, n, ncol = length(psi.ok), byrow=TRUE) U1 <- (Xtrue>PSI) #(Z - PSI) * (Z > PSI) #if (pow[1] != 1) U1 <- U1^pow[1] obj1 <- try(mylm(cbind(X, U1), y, w), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(cbind(X, U1), y, w), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(.lm.fit(cbind(X, U1), y), silent = TRUE) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else obj1$L0 #r<-sum(obj1$residuals^2 * w) L1 } ### ----- isZero <- function(v) sapply(v, function(.x) identical(.x,0)) ###------ toMatrix<-function(.x, ki){ # ripete ogni .x[,j] ki[j] volte if(ncol(.x)!=length(ki)) stop("It should be ncol(.x)==length(ki)") if(all(ki==1)) return(.x) M<-vector("list", length=length(ki)) for(j in 1:length(ki)) M[[j]]<-replicate(ki[[j]], cbind(.x[,j]), simplify=TRUE) do.call(cbind, M) } ### ----- isZero <- function(v) sapply(v, function(.x) identical(.x,0)) ###------ mylmWO <-function(x,y,w=1){ #,w=1,offs=0 in step.st.fit() non ci sovrebbero essere w e offs x1<-x*sqrt(w) #y<-y-offs y1<-y*sqrt(w) #o<-.lm.fit(y=y,x=x) #b<-o$coefficients #fit<- o$fitted.values #r<-o$residuals b<-drop(solve(crossprod(x1),crossprod(x1,y1))) #.lm.fit(x=x1, y=y1) #lm.wfit(x, y, w) #CHI E' PIU' VELOCE? #b<-solve(crossprod(x),crossprod(x,y)) #browser() #fit<- drop(tcrossprod(x,t(b))) fit<- drop(x%*%b) r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r, L0=sum(w*r^2), df.residual=length(y)-length(b)) o } mylm <-function(x,y,w=1){ #,w=1,offs=0 in step.st.fit() non ci sovrebbero essere w e offs #o<-.lm.fit(y=y,x=x) #b<-o$coefficients #fit<- o$fitted.values #r<-o$residuals b<-drop(solve(crossprod(x),crossprod(x,y))) #.lm.fit(x=x1, y=y1) #lm.wfit(x, y, w) #CHI E' PIU' VELOCE? #browser() #fit<- drop(tcrossprod(x,t(b))) fit<- drop(x%*%b) #fit<- drop(tcrossprod(b,x)) r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r, L0=sum(r^2), df.residual=length(y)-length(b)) o } #----------- if(var(ww)<=0){ fitter<-function(x, y, w) .lm.fit(x=x, y=y) #list(coefficients=drop(solve(crossprod(x), crossprod(x, y)))) mylmOK <- mylm search.minOK <- search.min } else { fitter<-function(x, y, w) .lm.fit(x=sqrt(w)*x, y=sqrt(w)*y) mylmOK <- mylmWO search.minOK <- search.minWO } ##---------- #----------- adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #------------ tol<-opz$toll display<-opz$display it.max<-opz$it.max dev0<-opz$dev0 useExp.k<-opz$useExp.k min.step<- opz$min.step #=.0001 conv.psi<-opz$conv.psi #=FALSE alpha<-opz$alpha limZ <- apply(Xtrue, 2, quantile, names = FALSE, probs = c(alpha[1], alpha[2])) #limZ <- apply(Xtrue, 2, quantile, names = FALSE, probs = c(alpha, 1 - alpha)) fix.npsi<-opz$fix.npsi agg<-opz$agg h<-opz$h npsii<-opz$npsii npsi<- sum(npsii) #opz$npsi P<-length(npsii) #P<-opz$P digits <- opz$digits rangeZ <- opz$rangeZ # pos.vec <- 1:npsi # pos <- vector("list", P) # ind <- 0 pos<- tapply(1:npsi, rep(1:P, npsii), list) i <- 0 agg <- rep(agg, npsi) # direz <- matrix(NA, it.max, npsi) # conv <- rep(FALSE, npsi) # ind.conv <- NULL n<-length(y) plin<-ncol(x.lin) epsilon<-10 k.values<-dev.values<- NULL psi.values <-list() psi.values[[length(psi.values) + 1]] <- NA dev.values[length(dev.values) + 1] <- opz$dev0 #modello senza psi if(it.max==0){ obj <- lm.wfit(x = cbind(x.lin, Xtrue>PSI), y = y, w = ww) L1 <- sum(obj$residuals^2 * ww) obj$epsilon <- epsilon idZ<-(plin+1):(plin+ncol(PSI)) b<- obj$coef[idZ] obj <- list(obj = obj, psi = PSI[1,], psi.values = psi.values, rangeZ = rangeZ, beta.c=b, epsilon = epsilon, SumSquares.no.gap = L1, id.warn = TRUE) return(obj) } #PSI0<- matrix(psi0, n, npsi, byrow = TRUE) #XREG <- cbind(x.lin, Xtrue>PSI) #obj0 <- try(mylm(XREG, y), silent = TRUE) L0<- mylmOK(cbind(x.lin, Xtrue>PSI), y, ww)$L0 # valore con psi iniziale n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi0<-PSI[1,] psi.values[[length(psi.values) + 1]] <- psi0 #psi iniziali #============================================== if (display) { unlpsi<- unlist(psi0) Lp<-length(unlpsi) cat(paste("iter = ", sprintf("%2.0f",0), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L0, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%5.0f", NA), " n.psi = ",formatC(Lp,digits=0,format="f"), " ini.psi = ",paste(formatC(unlpsi[1:min(5,Lp)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } id.warn <- FALSE low <- apply(Xtrue, 2, min) up <- apply(Xtrue, 2, max) L1<-L0+10 tolOp<-if(is.null(opz$tol.opt)) seq(.001, .Machine$double.eps^0.25, l=it.max) else rep(opz$tol.opt, it.max) idZ<-(plin+1):(plin+ncol(PSI)) idW<-(plin+ncol(PSI)+1): ( plin+2*ncol(PSI)) #============================================== while (abs(epsilon) > tol) { i <- i + 1 #if(i==1) browser() xx <- Xtrue[,cumsum(npsii),drop=FALSE] for (p in 1:P) { psis <- sort(psi0[pos[[p]]]) gruppi <- cut(xx[,p], breaks = c(low[p] - 0.1, psis, up[p]), labels = FALSE) if(any(is.na(gruppi))) stop(paste("too many breaks for step term #", p, "?"), call.=TRUE) points <- c(low[p], psis, up[p]) right <- c(low[p], points[2:(npsii[p] + 1)] + agg[pos[[p]]][order(psi0[pos[[p]]])] * (points[3:(npsii[p] + 2)] - points[2:(npsii[p] + 1)]), NA) left <- c(NA, points[2:(npsii[p] + 1)] - agg[pos[[p]]][order(psi0[pos[[p]]])] * (points[2:(npsii[p] + 1)] - points[1:npsii[p]]), up[p]) for (j in 1:(npsii[p] + 1)) { xx.j <- xx[,p][gruppi == j] xx[,p][gruppi == j] <- right[j] + (xx.j - points[j]) * ((left[j + 1] - right[j])/(points[j + 1] - points[j])) } } XX<-toMatrix(xx, npsii) PSI<- matrix(psi0, n, npsi, byrow = TRUE) W <- (1/(2 * abs(XX - PSI))) Z <- (XX * W + 1/2) XREG <- cbind(x.lin, Z, W) obj <- fitter(y = y, x = XREG, w=ww) #b <- obj$coef[(2:(sum(k) + 1))] #g <- obj$coef[((sum(k) + 2):(2 * sum(k) + 1))] b<- obj$coef[idZ] g<- obj$coef[idW] #if(any(is.na(c(b, g)))){ if(any(isZero(c(b, g)))) { if(return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } psi1 <- -g/b psi1<- psi0+ opz$h*(psi1-psi0) psi1<- adj.psi(psi1, limZ) #limZ rangeZ?? psi1<-unlist(tapply(psi1, opz$id.psi.group, sort), use.names =FALSE) #if(i==2) browser() #la f e' chiaramente a gradino per cui meglio dividere.. a0<-optimize(search.min, c(0,.5), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, tol=tolOp[i]) a1<-optimize(search.min, c(.5,1), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, tol=tolOp[i]) a<-if(a0$objective<=a1$objective) a0 else a1 #M<-1 #while(L1>L0){ # a<-optimize(search.min, c(0,M), psi=psi1, psi.old=psi0, X=x.lin, y=y, w=ww, offs=offs) # L1<- a$objective # M<-M*.3 #} if(a$objectivePSI1) #obj1 <- try(mylm(XREG1, y, ww, offs), silent = TRUE) #if (class(obj1)[1] == "try-error") obj1 <- try(lm.wfit(XREG1, y, ww, offs), silent = TRUE) delta<- psi1-psi0 if (display) { flush.console() #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) unlpsi<- unlist(psi1) Lp<-length(unlpsi) cat(paste("iter = ", sprintf("%2.0f",i), #" dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " dev = ", sprintf("%1.5f", as.numeric(strsplit(format(L1, scientific=TRUE), "e")[[1]][1])), " k = ", sprintf("%2.3f", use.k), " n.psi = ",formatC(Lp,digits=0,format="f"), " est.psi = ",paste(formatC(unlpsi,digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } epsilon <- if(conv.psi) max(abs((psi1 -psi0)/psi0)) else (L0 - L1)/(abs(L0) + 0.1) L0<-L1 k.values[length(k.values)+1]<-use.k psi.values[[length(psi.values) + 1]] <- psi1 dev.values[length(dev.values) + 1] <- L0 if (i >= it.max) { id.warn <- TRUE break } psi0<-psi1 } #end while_it psi1 <-unlist(tapply(psi1, opz$id.psi.group, sort)) PSI<- matrix(psi1, n, npsi, byrow = TRUE) U <- 1*(Xtrue>PSI) #ATTENZIONE .. Assume che obj sia stato stimato sempre! obj<-list(obj=obj, psi=psi1, psi.values=psi.values, rangeZ=rangeZ, SumSquares.no.gap=L1, beta.c=b, it=i, epsilon=epsilon, id.warn=id.warn, U=U) return(obj) } #end jump.fit segmented/R/summary.segmented.R0000644000176200001440000001250214726320375016231 0ustar liggesusers`summary.segmented` <- function(object, short=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, ...){ if(is.null(object$psi)) object<-object[[length(object)]] #i seguenti per calcolare aa,bb,cc funzionano per lm e glm, da verificare con arima.... # nome<-rownames(object$psi) # nome<-as.character(parse("",text=nome)) # aa<-grep("U",names(coef(object)[!is.na(coef(object))])) # bb<-unlist(sapply(nome,function(x){grep(x,names(coef(object)[!is.na(coef(object))]))},simplify=FALSE,USE.NAMES=FALSE)) # cc<-intersect(aa,bb) #indices of diff-slope parameters # iV<- -grep("psi.",names(coef(object)[!is.na(coef(object))]))#indices of all but the Vs if(!is.null(.vcov)) var.diff<-FALSE if(var.diff && length(object$nameUV$Z)>1) { var.diff<-FALSE warning(" 'var.diff' set to FALSE with multiple segmented variables", call.=FALSE) } nomiU <- object$nameUV$U nomiV <- object$nameUV$V .coef <- coef(object) if(is.null(.coef)) .coef<- object$coef if(is.null(.coef)) stop(" No coefficients in the fit object?") idU<-match(nomiU,names(.coef[!is.na(.coef)])) idV<-match(nomiV,names(.coef[!is.na(.coef)])) beta.c<- .coef[nomiU] #per metodo default.. ma serve???? #browser() if(all(is.na(object[["psi"]][,"St.Err"]))) { if(inherits(object, "lm")){ R <- chol2inv(object$qr$qr) if(!inherits(object, "glm")){ s2 <- sum(object$weights*object$residuals^2)/object$df.residual se.psi <- sqrt(diag(R)*s2)[idV] } else { s2<- if(object$fam$fam%in%c("poisson","binomial")) 1 else object$deviance/object$df.residual se.psi <- sqrt(diag(R)*s2)[idV] } object[["psi"]][,"St.Err"] <- se.psi } } if("segmented.default" == as.character(object$call)[1]){ summ <- c(summary(object, ...), object["psi"]) summ[c("it","epsilon")]<-object[c("it","epsilon")] #v<-try(vcov(object), silent=TRUE) #if(class(v)!="try-error") v<-sqrt(diag(v)) return(summ) } if("lm"%in%class(object) && !"glm"%in%class(object)){ summ <- c(summary.lm(object, ...), object["psi"]) summ$Ttable<-summ$coefficients if(var.diff){ #modifica gli SE Qr <- object$qr p <- object$rank #n.parametri stimati p1 <- 1L:p inv.XtX <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) X <- qr.X(Qr,FALSE) attr(X, "assign") <- NULL K<-length(unique(object$id.group)) #n.gruppi (=n.psi+1) dev.new<-tapply(object$residuals, object$id.group, function(.x){sum(.x^2)}) summ$df.new<-tapply(object$residuals, object$id.group, function(.x){(length(.x)-eval(parse(text=p.df)))}) if(any(summ$df.new<=0)) stop("nonpositive df when computig the group-specific variances.. reduce 'p.df'?", call. = FALSE) summ$sigma.new<-sqrt(dev.new/summ$df.new) sigma.i<-rowSums(model.matrix(~0+factor(object$id.group))%*%diag(summ$sigma.new)) var.b<-inv.XtX%*%crossprod(X*sigma.i)%*%inv.XtX #sqrt(rowSums((X %*% V) * X)) dimnames(var.b)<-dimnames(summ$cov.unscaled) summ$cov.var.diff<-var.b summ$Ttable[,2]<-sqrt(diag(var.b)) summ$Ttable[,3]<-summ$Ttable[,1]/summ$Ttable[,2] summ$Ttable[,4]<- 2 * pt(abs(summ$Ttable[,3]),df=object$df.residual, lower.tail = FALSE) dimnames(summ$Ttable) <- list(names(object$coefficients)[Qr$pivot[p1]], c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) } if(!is.null(.vcov)){ summ$Ttable[,2]<-sqrt(diag(.vcov)) summ$Ttable[,3]<-summ$Ttable[,1]/summ$Ttable[,2] summ$Ttable[,4]<- 2 * pt(abs(summ$Ttable[,3]),df=object$df.residual, lower.tail = FALSE) #dimnames(summ$Ttable) <- list(names(object$coefficients)[Qr$pivot[p1]], c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) } summ$Ttable[idU,4]<-NA summ$Ttable<-summ$Ttable[-idV,] summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] summ$n.boot<-length(na.omit(object$psi.history$all.ss)) summ$var.diff<-var.diff summ$short<-short class(summ) <- c("summary.segmented", "summary.lm") return(summ) } #if("glm"%in%class(object)){ if(inherits(object, "glm")){ summ <- c(summary.glm(object, ...), object["psi"]) summ$Ttable<-summ$coefficients[-idV,] summ$Ttable[idU,4]<-NA summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] summ$n.boot<-length(na.omit(object$psi.history$all.ss)) summ$short<-short class(summ) <- c("summary.segmented", "summary.glm") return(summ) } if("Arima"%in%class(object)){ #da controllare coeff<-object$coef v<-sqrt(diag(object$var.coef)) Ttable<-cbind(coeff[-idV],v[-idV],coeff[-idV]/v[-idV]) colnames(Ttable)<-c("Estimate","Std. Error","t value") object$Ttable<-Ttable object$short<-short summ<-object summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] summ$n.boot<-length(na.omit(object$psi.history$all.ss)) class(summ) <- c("summary.segmented", "summary.Arima") return(summ) } } segmented/R/step.lm.fit.boot.r0000644000176200001440000002355114757620650015740 0ustar liggesusersstep.lm.fit.boot <- function(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di #---------------------------------- # sum.of.squares<-function(obj.seg){ # #computes the "correct" SumOfSquares from a segmented" fit # b<-obj.seg$obj$coef # X<-qr.X(obj.seg$obj$qr) #X<-model.matrix(obj.seg) # X<-X[,!is.na(b)] # b<-b[!is.na(b)] # rev.b<-rev(b) # rev.b[1:length(obj.seg$psi)]<-0 # b<-rev(rev.b) # new.fitted<-drop(X%*%b) # new.res<- obj.seg$obj$residuals + obj.seg$obj$fitted - new.fitted # ss<-sum(new.res^2) # ss # } #-------- #--------------------------------------------- adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } #-------- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]] psi.values<-lista[[2]] if(any(is.na(psi.values[[1]]))) {#se la 1 componente e' NA (fino alla versione 2.0-3 era cosi'... perche' in dev.values c'erano # anche i valori relativi al modello senza psi.. ) dev.values<-dev.values[-1] #remove the 1st one referring to model without psi psi.values<-psi.values[-1] } dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #------------- #obj<- jump.fit(y, XREG=x.lin, Z=Xtrue, PSI, w=ww, offs, opz, return.all.sol=FALSE) #-------------- if(is.null(opz$seed)){ mY <- mean(y) sepDec<-if(options()$OutDec==".") "\\." else "\\," vv <- strsplit(paste(strsplit(paste(mY), sepDec)[[1]], collapse=""),"")[[1]] vv<-vv[vv!="0"] vv=na.omit(vv[1:5]) seed <-eval(parse(text=paste(vv, collapse=""))) if(is.null(seed)) seed <- 1 set.seed(seed) } else { if(is.na(opz$seed)) { seed <-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(seed) } else { seed <-opz$seed set.seed(opz$seed) } } #browser() visualBoot<-opz$display opz$display<-FALSE #opz.boot<-opz #opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <-0 n<-length(y) opz0 <- opz opz0$agg <- .2 alpha<-opz$alpha #limZ <- apply(Z, 2, quantile, names = FALSE, probs = alpha) limZ <- if(is.null(opz$limZ)) apply(Z, 2, quantile, names=FALSE, probs=alpha) else opz$limZ rangeZ <- apply(Z, 2, range) #serve sempre o0<-try(suppressWarnings(step.lm.fit(y, XREG, Z, PSI, w, offs, opz0, return.all.sol=FALSE)), silent=TRUE) #browser() if(!is.list(o0)) { #qui riproviamo con opz (che ha agg=.05) e che puo' convergere e comunque ha "return.all.sol=TRUE" o0<- suppressWarnings(step.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=TRUE)) if(length(o0)==2) o0<-extract.psi(o0) ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0<-try(suppressWarnings(step.lm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),options()$OutDec)[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(NA, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z count.random<-0 agg.values<-seq(.2,.05,l=n.boot) ###INIZIO BOOT alpha<-.1 corr=1.2 #browser() n.boot.rev<- 3 #3 o 4? for(k in seq(n.boot)){ ##se gli *ultimi* n.boot.rev valori di ss sono uguali, cambia i psi... diff.selected.ss <- rev(diff(na.omit(all.selected.ss))) if(length(diff.selected.ss)>=(n.boot.rev-1) && all(round(diff.selected.ss[1:(n.boot.rev-1)],6)==0)){ #browser() qpsi <- sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) qpsi.cor <- sapply(1:ncol(Z),function(i)mean((corr*est.psi0[i])>=Z[,i])) qpsi <- ifelse(abs(qpsi-.5)<=.2, qpsi.cor, alpha) alpha<-1-alpha corr<-1/corr est.psi0 <- sapply(1:ncol(Z),function(i)quantile(Z[,i], probs=qpsi[i],names=FALSE)) est.psi0 <- adj.psi(est.psi0, limZ) #est.psi0<- jitter(est.psi0, amount=min(diff(est.psi0))) #est.psi0<-est.psi0*.8 #qpsi<-sapply(1:ncol(Z),function(i)mean(est.psi0[i]>=Z[,i])) ##qpsi<-ifelse(abs(qpsi-.5)<.1, alpha, qpsi) ## credo si possa eliminare.. ##alpha<-1-alpha ## credo si possa eliminare.. #est.psi0 <-sapply(1:ncol(Z),function(i)quantile(Z[,i],probs=1-qpsi[i],names=FALSE)) ##est.psi0<- jitter(est.psi0, amount=min(diff(est.psi0))) } ########################### 25/7/24 ##### est.psi0 <- unlist(tapply(est.psi0, opz$id.psi.group, sort)) ######################################### PSI <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(step.lm.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], offs[id], opz)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(step.lm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { #browser() est.psi.boot<-apply(limZ,2,function(r)runif(1,r[1],r[2])) est.psi.boot<- unlist(tapply(est.psi.boot, opz$id.psi.group, sort)) } PSI <- matrix(est.psi.boot, n, ncol = length(est.psi.boot), byrow=TRUE) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 opz$agg<-agg.values[k] opz$Nboot <- k o <-try(suppressWarnings(step.lm.fit(y, XREG, Z.orig, PSI, w, offs, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(limZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o <-try(suppressWarnings(step.lm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. #browser() if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-suppressWarnings(try(extract.psi(o), silent=TRUE)) #if(class(o)!="try-error"){ if(!inherits(o, "try-error")){ all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap } } if (visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) unlpsi<- unlist(est.psi0) Lp<-length(unlpsi) cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(Lp, digits=0,format="f"), " est.psi = ",paste(formatC(unlpsi[1:min(Lp,5)],digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #conta i valori ss uguali.. cosi puoi fermarti prima.. asss<-na.omit(all.selected.ss) if(length(asss)>break.boot){ if(all(rev(round(diff(asss),6))[1:(break.boot-1)]==0)) break } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) #SS.ok<-min(all.selected.ss) #id.accept<- ((abs(all.ss-SS.ok)/SS.ok )<= 0.05) #psi.mean<-apply(all.est.psi[id.accept,,drop=FALSE], 2, mean) #est.psi0<-psi.mean # #devi ristimare il modello con psi.mean # PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) # o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ PSI1 <- matrix(est.psi0, n, ncol = length(est.psi0), byrow=TRUE) o0 <- try(step.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) warning("The final fit can be unreliable (possibly mispecified segmented relationship)", call.=FALSE, immediate.=TRUE) } if(!is.list(o0)) return(0) o0$boot.restart<-ris o0$seed <- seed #rm(.Random.seed, envir=globalenv()) return(o0) } segmented/NEWS0000644000176200001440000007423614757630107012752 0ustar liggesusers**************************** * * * Changes in segmented * * * **************************** =============== version 2.1-4 (2025-02-26) =============== * Small changes in the selection of the starting values for psi and when printing the estimation process * Bugs fixed: in segmented.lme() arguments 'seed' and 'break.boot' were not managed correctly. Thanks to Athanasios Protopapas for reporting. =============== version 2.1-3 (2024-10-25) =============== * New argument 'level' in slope() to return 'individual' left/right slopes for "segmented.lme" fits (thanks to Otto Monge for the input) * Bug fixed in segmented.lme(): with noisy data, the changepoints predictions were not consistent with the fixed effect estimate (thanks to E. (Lisa) Levelt for reporting). =============== version 2.1-2 (2024-09-05) =============== * predict.segmented() and predict.stepmented() now account for the possible offset term in the model (thanks to Pauline Scherdel for her input); 'link' in plot.stepmented() now defaults to TRUE. * Bugs fixed: segreg() and stepreg() did not return the possible offset term in the object fit (although computations correctly accounted for it). =============== version 2.1-1 (2024-07-26) =============== * Bugs fixed: intercept() did not work for segmented.lme fits (thanks to Kelly Aho for reporting); segreg() e stepreg() returned wrong null deviance when an offset term was included; segmented.* occasionally returned unsorted breakpoints (thanks to Christian Wirtz for reporting). =============== version 2.1-0 (2024-05-14) =============== * New functions stepreg() to fit stepmented regression via the formula interface (like segreg()), and predict.stepmented() to compute predictions for 'stepmented' objects; predict.segmented() gains argument 'na.action'. * A change in the procedure to outdistance the possible close estimated breakpoints (thanks to Marc Laurencelle for the input) by segmented. * Some changes in the object returned by selgmented() to have the same names regardless of the final output (thanks to Marc Laurencelle for the input); argument 'const' added in plot.stepmented. * Bug fix: segmented.lm did not manage covariates having large values (tens of thousands). This bug was incidentally introduced in version 2.0-4; thanks to Brian Binder for reporting. model.matrix.segmented() did not work on fits with n large (n>=1e+05, say); thanks to A. Priulla for the input. segmented.(g)lm did not print correctly coefficients when an interaction term with the segmented covariate was included (thanks to Annalise LaPlume for reporting); predict.segmented() did not work if the supplied dataframe had missing values (thanks to Marc Laurencelle for reporting) =============== version 2.0-4 (2024-04-19) =============== * Some improvements in the fitter functions for segmented and stepmented fits to gain efficiency * A small change in the segmented.* methods: the possible estimated breakpoints at the boundary or too close each other are slightly outdistanced to avoid puzzly results (null or very large st.errs). See 'min.nj' in seg.control() * Bug fix: confint.stepmented() did not work if the supplied fit had variable names including dots (thanks to Matti Lehtonen for reporting); segmented crashed when options(outDec=",") was set (thanks to Andras Kosztolanyi for reporting); selgmented() was ignoring return.fit=FALSE when 0 breakpoints were selected (thanks to Marc Laurencelle for reporting); vcov.stepmented() did not work correctly for stepmented glm fits. =============== version 2.0-3 (2024-02-16) =============== * Some improvements in selgmented() to select the number of breakpoints * Dataset globTempAnom included in the package * In plot.segmented() argument 'term' can be a vector to draw multiple fitted segmented relationships on the same plot * In points.segmented() argument 'term' can be numeric =============== version 2.0-2 (2024-01-23) =============== * In seg(), 'est' and 'psi' can be a list to set group specific constraints (when 'by' is specified) * Bug fix: - predict.segmented() did not work when the supplied dataset had 1 row (thanks to Felipe Mattioni for reporting), and when the object fit included a factor, possibly coming from interaction term such as seg(x, by=g) (thanks to Conor McAloon and Dave Armstrong for reporting) - segreg() did not work with contraints when 'by' was specified in seg() (thanks to Signe Marie Jensen for reporting) - segmented.numeric() and stepmented.numeric() were ignoring their 'weights' argument. =============== version 2.0-1 (2023-12-19) =============== * Change of values for argument 'break.type' in pscore.test(): now these are "break" or "jump" (were 1 or 2) * New arguments in plot.stepmented(), especially 'conf.level' to portray pointwise confidence intervals of the step regression function. * Bug fix: predict.segmented() did not work if the supplied dataframe included variables not in the model (thanks to A. Priulla and N. D'angelo for reporting); segreg() was not reporting the intercept name. =============== version 2.0-0 (2023-11-27) =============== * New class "stepmented" introduced, with some relevant methods. The functions stepmented.* allow to fit regression models (lm and glm) with step-function effects (and additional linear terms). * segreg() introduced for segmented modelling. This function allows to specify segmented (G)LM's via a proper formula including both linear and multiple segmented terms wherein some slopes can be constrained to be zero. * Some improvements in segmented.lme() wherein arguments 'nq' and 'adjust' have been removed. * Some changes in seg.control(): arguments 'gap', 'last', 'stop.if.error', and 'powers' removed; 'quant' now defaults to FALSE. -- 'alpha' accepts 2 values to fix (via the quantile probability values) the covariate range within which the breakpoint should stay (thanks to Clement Laroche for his input). -- h=0 is now allowed to keep fixed the breakpoints by also setting n.boot=0 (as an alternative to 'it.max=0') * new function model.matrix.segmented() * segmented.default() works with objects "svyglm" (thanks to Huifeng Jin for his input) * predict.segmented() has been rewritten and is stabler. * changed some default values in plot.segmented() (for lines and residuals). =============== version 1.6-4 (2023-04-14) =============== * A (relevant) enhancement in selgmented() which now can detect a large number of breakpoints not evenly distributed in the covariate range (see argument G). Also, selgmented() now works with Kmax=1 (thanks to Marc Laurencelle for his input) * plot.segmented.lme() now can draw all the segmented profiles on the same panel (see argument 'n.plot') * Bug fix: in slope() when managing "lme" fits; in pscore.test() when managing glm fits (thanks to Yuki Mori for reporting) =============== version 1.6-3 (2023-03-20) =============== * selgmented was not always returning information about the number of selected breakpoints (thanks to Alba Serrat for reporting) * slope() did not work on segmented.lme fits. (thanks to Luiz F. Silva Magnago for reporting; the error was introduced accidentally by version 1.6-2) =============== version 1.6-2 (2022-12-09) =============== * slope() now accepts also a list of two segmented fits to compare the corresponding slope estimates (thanks to Florencia Camus for the input) * Substantial improvement in selgmented() when selecting a large number (and locations) of breakpoints =============== version 1.6-1 (2022-11-08) =============== * New argument 'const' in points.segmented(). * A small correction in plot.segmented.lme() when xscale=-1 and/or yscale=-1 (i.e. for subject-specific x- or y-scales) * (Minor) bug fixed: in plot.segmented() 'const' was ignored when conf.level>0; in selgmented() type='davies' did not work and 'AIC' was always printed, even if type="bic" was set. =============== version 1.6-0 (2022-05-30) =============== * New function segmented.lme (and related methods) introduced to fit segmented mixed models, i.e. segmented model with random effects changepoints. * 'alpha' in seg.control() defaults to NULL, i.e. max(0.05, 1/n). =============== version 1.5-0 (2022-04-11) =============== * New method function segmented.numeric() to allow short call such as 'segmented(y)' (providing the same results of segmented(lm(y~x),~x). Such 'short' call also works for 'ts' objects. * Bug fix: functions segmented.* (with boot restarting) were ignoring the 'alpha' argument of seg.control(). =============== version 1.4-1 (2022-03-24) =============== * Improvement in the estimating algorithm for segmented.glm * Minor changes: option 'aic' introduced in selgmented(); 'alpha' in seg.control is set to 0.05 (it was 0.02); 'shift' in lines.segmented() now defaults to FALSE (was TRUE); a simple correction for plot.segmented(.., rev.sgn=TRUE); predict.segmented now also works for lme fits (thanks to Ting-Fang Lee for his input). =============== version 1.4-0 (2022-01-28) =============== * New function pwr.seg() for power analysis in segmented regression (thanks to Bastian Kückelhaus for his input) * An improvement in the estimating algorithm (only for lm models) to gain efficiency * Bugs fixed: pscore.test() returned an error with covariates taking large values (tens of thousands) leading to collinearity in the design matrix; (thanks to Michael Laviolette for reporting). A minor change in summary.segmented() which reports now "n.boot" information =============== version 1.3-4 (2021-04-22) =============== * selgmented() now returns information on the selection of the no.of breakpoints. It also gains argument 'msg'. * Bug fix in plot.segmented(): it did not work correctly with rev.sgn=TRUE and it did not handle correctly multiple colors for displaying the conf.intervals when shade=FALSE. =============== version 1.3-3 (2021-03-08) =============== * Two (internal) changes in the boot restarting algorithm: i) option 'break.boot' introduced in seg.control() to save computational time; ii) an improvement in the algorithm expected to better escape local solutions (thanks to Daniele Cuntrera for his input). * Fixed bugs: broken.line() didn't work (a silly bug, thanks to Kirstin Deuss for reporting); confint.segmented() and plot.segmented() did not handle correctly the Arima fits. * Some further code examples in ?segmented. =============== version 1.3-2 (2021-02-09) =============== * New arguments '.vcov' and '.coef' in the utility functions (slope(), plot.segmented(), confint.segmented(),..) in order to account for fit objects returned by segmented.default() (Thanks to Yi Hu for his input). * new argument 'smoos' in plot.segmented() to draw smoothed scatterplot when 'res=TRUE'. =============== version 1.3-1 (2020-12-10) =============== * segmented.default() now accepts "lme" objects (thanks to Hannah Watson for her input) * Bug fix: pscore.test() crashed in large datasets (thanks to John Grego for reporting); segmented.lm() did not work when the scale of x was defined on very large range, e.g. 1e8 to 2e8 (thanks to Elio Campitelli for reporting) =============== version 1.3-0 (2020-10-23) =============== * New function selgmented() to select the number of breakpoints (via BIC or hypothesis testing). * New argument 'fixed.psi' in segmented.* methods to fix some of breakpoints in the fitted model. * The 'seed' argument in seg.control() defaults to 12345 to guarantee reproducibility. =============== version 1.2-0 (2020-06-23) =============== * Changes in segmented.default to allow estimation of segmented censReg models (thanks to Georgina Kaltenecker for her input). * coef.segmented() introduced to extract regression coefficients and breakpoints. * A small improvement in segmented.* methods: if fix.npsi=TRUE, the returned number of breakpoints is always equal to the initial one. * npsi can be missing in the segmented.* methods, and one breakpoint is estimated for all variables specified in seg.Z. * New argument 'fc' in seg.control() to manage situations when the estimated breakpoints come to the boundary. * New argument in plot.segmented() to draw also the trends before the breakpoints. * Bug fix: plot.segmented() did not recycle arguments 'col', 'lwd' and 'lty'. =============== version 1.1-0 =============== * Changes in segmented.default to allow estimation of betareg models (thanks to Malcolm Baptie and Jochen Wilhelm for their input) =============== version 1.0-0 =============== * New implementation of the estimating algorithm. Now segmented is much stabler, and dependence on starting values (that occurred in some 'difficult' datasets) is greatly reduced and apparently cancelled. * New arguments: - 'npsi' in segmented.* methods to specify the number of breakpoints (and not the values) to be estimated. - 'is' in vcov.segmented() to compute covariance matrix based on the idea of induced smoothing. This leads to higher (and more trustworthy) standard errors for the breakpoint. - 'isV' in broken.line() and plot.segmented() to allow smooth "transition" in the standard errors values for the fitted values at the breakpoint. - 'conv.psi', 'alpha', 'fix.npsi', 'min.step' and 'tol' (the last one replacing 'toll') in seg.control() (where arguments 'last', 'stop.if.error', and 'gap' will be removed in the next releases) - 'n.break' in pscore.test(). - .vcov in broken.line() and plot.segmented() to pass directly the full covariance matrix of estimates. plot.segmented() accepts 'col.shade', see ?plot.segmented * Minor changes: better display of the iterative steps (if display=TRUE in seg.control()) and general improvement in draw.history(). =============== version 0.5-4.0 =============== * confint.segmented() now computes breakpoint confidence intervals via the (smoothed) Score or Gradient statistic, see new argument 'method' in confint.segmented(). * If segmented does not converge (since the estimated psi leaves only 1 datum on its left/right) the last psi estimate is printed as a message. * Argument keep.class added in segmented.default() * Argument 'rug' defaults to '!add' in plot.segmented() * confint.segmented() now returns a matrix (rather than a list) * Bug fixed: plot.segmented() did not handle appropriately arguments cex.lab and cex.axis. segmented methods did not terminate appropriately when automatic selection of number of breakpoints was performed (i.e. 'stop.if.error=FALSE'). davies.test() and segmented.glm() required 'seg.Z' even if there was just one covariate in the starting model and it could be missing (thanks to Lein E. Pardo for reporting that). Some minor bug fixes (related to argument 'data' and one-sided alternative) in pscore.test (thanks to Karista Hudelson for reporting). =============== version 0.5-3.0 =============== * aapc() introduced. The function computes the 'average annual percent change' to summarize piecewise linear trends (thanks to Yuchen Qin for his input). * plot.segmented() now accepts arguments 'cex.axis' and 'cex.lab' (thanks to Matthew Birk for his input). * Bug fixed: segmented.Arima didn't work for arima fits including a seasonal component (thanks to Claudio Agostinelli for reportig the bug). =============== version 0.5-2.2 =============== * When there is a single covariate in the starting (g)lm, seg.Z can be missing when calling the segmented methods. * Bug fixed: plot.segmented(.., link=FALSE) did not work correctly (sometimes it returned an error) for glm fits with multiple breakpoints. Weights were not handled appropriately by segmented.lm. =============== version 0.5-2.1 =============== * pscore.test() now works also for "glm" fits * plot.segmented() now plots the partial residuals as "component + working residuals" (rather than Pearson residuals, relevant only for glm fits). * segmented.default() now is expected to work for fits obtained by MASS::rlm(). =============== version 0.5-2.0 =============== * pscore.test() introduced. The function tests for a breakpoint using a (pseudo) score statistic which is more powerful than davies.test(), especially when the breakpoint is expected to be in the middle of the covariate range and the signal-to-noise ratio is high. * Argument 'digits' added in seg.control() to fix the number of digits of the breakpoint estimate during the iterative estimation algorithm. * Bug fixed: conf.level>0 in plot.segmented() did not work for objects returned by segmented.default(). =============== version 0.5-1.5 (not on CRAN) =============== * Arguments 'gap' and 'show.gap' removed in intercept() and in plot.segmented(). (they are meaningless, as segmented() always returns joined piecewise lines, i.e. with no gaps). * slope() and broken.line() (and then plot.segmented() which uses them) did not work for objects returned by segmented.default() (Thanks to Marcos Krull for reporting). =============== version 0.5-1.4 =============== * segmented.Arima() should be slightly faster, as starting values are passed in arima() (via 'init') throughout the iterative process. * plot.segmented() is expected to work for objects returned by segmented.Arima. * print.summary.segmented() does not print anymore the t-values for the gap coefficients (this information is meaningless as the gap coeffs are always set to zero in the returned model). * Bug fixed: intercept() ignored argument 'rev.sgn'; points.segmented() missed argument 'transf'. =============== version 0.5-1.3 (not on CRAN) =============== * plot.segmented() gains argument 'transf' to plot 'transf(values)' rather 'values' on the current plot. * print.summary.segmented() now uses round() rather than signif() when displaying the breakpoint estimate. * Bug fixed: psi=NA was not working in the segmented.* methods; this bug was incidentally introduced in the last version (thanks to Bertrand Sudre for first reporting that). =============== version 0.5-1.2 =============== * For 1 breakpoint models, 'psi' argument can be missing (default) when calling the segmented methods. * Bug fixed: lines.segmented() did not plot the dots when the fit object included multiple breakpoints and the argument 'shift' was set to FALSE (thanks to Jan Bull for reporting). There were some troubles with variable names including dots (thanks to Melanie Zoelck which first reported this bug). =============== version 0.5-1.1 =============== * segmented.default now accepts 'gee' fits (Thanks to John Boulanger for his input) * Minor change: argument 'col.dens' changed to 'dens.col' in plot.segmented() ('col.dens' made ineffective 'col') * Minor change: error/warning messages introduced in davies.test() if k<10; print.segmented slightly changed in displaying the estimated breakpoints. * Bug fixed: segmented did not terminate appropriately the algorithm with automatic selection of breakpoints concerning more than one variable (thanks to Ali Hashemi for reporting). =============== version 0.5-1.0 =============== * segmented.Arima() introduced. Now it is possible to estimate segmented relationships in "Arima" fits (although the summarizing and plotting methods do not work..) * plot.segmented() gains arguments 'dens.rug' and 'col.dens' to display in the plot (on the x axis) also the smoothed density of the segmented covariate. * Bug fixed: segmented.lm did not work if it.max=0 (but segmented.glm did), thanks to Eric Nussbaumer for reporting. segmented.lm and segmented.glm did work if the starting linear model included weights (this bug was introduced incidentally since version 0.4-0.1; thanks to Michael Rutter for reporting). segmented.lm and segmented.glm did not check appropriately inadmissible breakpoints (thanks to Erica Tennenhouse for reporting). segmented.lm and segmented.glm did not handle correctly variable names equal to function names. davies.test() did not work with 'segmented' objects (to test for and additional breakpoint). points.segmented() missed the argument 'rev.sgn'. =============== version 0.5-0.0 =============== * segmented.default() introduced. Now it is possible to estimate segmented relationships in arbitrary regression models (besides lm and glm) where specific methods do not exist (e.g. cox or quantile regression models). =============== version 0.4-0.1 (not on CRAN) =============== * segmented.lm() and segmented.glm() did not work if the starting model included additional "variables", such as 'threshold' in 'subset=age0. * The breakpoint starting values when automatic selection is performed are now specified as equally spaced values (optionally as quantiles). see argument 'quant' in seg.control() * added 'Authors@R' entry in the DESCRIPTION file =============== version 0.2-9.1 =============== * Some bugs fixed: segmented.lm() and segmented.glm() did not finish correctly when no breakpoint was found; now segmented.lm() and segmented.glm() take care of flat relationships; plot.segmented() did not compute correctly the partial residuals for segmented glm fits. =============== version 0.2-9.0 =============== * Bootstrap restarting implemented to deal with problems coming from flat segmented relationships. segmented now is less sensitive to starting values supplied for 'psi'. * At the convergence segmented now constrains the gap coefficients to be exactly zero. This is the default and it can be altered by the 'gap' argument in seg.control(). * plot.segmented() has been re-written. It gains argument `res' for plotting partial residuals along with the fitted piecewise lines, and now it produces nicer (and typically smaller) plots. * Some bugs fixed: davies.test() did not work correctly for deterministic data (thanks to Glenn Roberts for finding the error). davies.test() also returns the `process', i.e. the different values of the evaluation points and corresponding test statistic. =============== version 0.2-8.4 =============== * Some bugs fixed: segmented.glm() fitted a simple "lm" (and not "glm") (the error was introduced incidentally from 0.2-8.3, thanks to Veronique Storme for finding the error); broken.line() was not working for models without intercept and a null left slope; intercept() was not working correctly with multiple segmented variables. =============== version 0.2-8.3 =============== * Some minor bugs fixed: segmented.lm() and segmented.glm() did not find the offset variable in the dataframe where the initial (g)lm was called for; segmented.lm() and segmented.glm() sometimes returned an error when the automated algorithm was used (thanks to Paul Cohen for finding the error). =============== version 0.2-8.2 =============== * Some minor bugs fixed (segmented.lm() and segmented.glm() *alway* included the left slope in the estimation process, although the number of parameters was correct in the returned final fit. confint.segmented() did not order the estimated breakpoints for the variable having rev.sgn=TRUE; intercept() missed the (currently meaningless) argument var.diff (thanks to Eric Fuchs for pointing out that). ) =============== version 0.2-8.1 =============== * Some minor bugs fixed (segmented.lm() and segmented.glm() were not working correctly with dataframe subset or when the starting linear model included several intercepts (e.g., see the example about data("plant"); thanks to Nicola Ferrari for finding the error). davies.test() did not work when the variable name of its argument `seg.Z' included reserved words, e.g. `seg.Z~dist'; thanks to Thom White for finding the error). =============== version 0.2-8 =============== * intercept() added. It computes the intercepts of the regression lines for each segment of the fitted segmented relationship. * plot.segmented() now accepts a vector `col' argument to draw the fitted piecewise linear relationships with different colors. * Some minor bugs fixed (summary.segmented were not working correctly). =============== version 0.2-7.3 =============== * argument APC added to the slope() function to compute the `annual percent change'. * Some minor bugs fixed (confint and slope were not working correctly when the estimated breakpoints were returned in non-increasing order; offset was ignored in segmented.lm and segmented.glm; broken.line() was not working correctly (and its argument gap was unimplemented), thanks to M. Rennie for pointing out that; summary.segmented() was not working for models with no linear term, i.e. fitted via segmented(lm(y~0),..)). =============== version 0.2-7.2 =============== * segmented.lm and segmented.glm now accept objects with formulas y~., Thanks to G. Ferrara for finding the error. * Some bugs fixed (slope and confint were using the normal (rather than the t-distribution) to compute the CIs in gaussian models). =============== version 0.2-7.1 =============== * segmented.lm and segmented.glm now accept objects without 'explicit' formulas, namely returned by lm(my_fo,..) (and glm(my_fo,..)) where my_fo was defined earlier. Thanks to Y. Iwasaki for finding the error. =============== version 0.2-7 =============== * A sort of automatic procedure for breakpoint estimation is implemented. See argument stop.if.error in seg.control(). * davies.test() now accepts a one-sided formula (~x) rather than character ("x") to mean the segmented variable to be tested. davies.test also gains the arguments `beta0' and `dispersion'. * Some bugs fixed. =============== version 0.2-6 =============== * vcov.segmented() added. * option var.diff for robust covariance matrix has been added in summary.segmented(), print.summary.segmented(), slope(), and confint(). * Some bugs fixed. segmented/data/0000755000176200001440000000000014616156562013152 5ustar liggesuserssegmented/data/plant.rda0000644000176200001440000000122714415477004014754 0ustar liggesusers‹íV=hSQ¾ùQl@쬛ÔÁÕA‡rX¨¢Í`;dðQ“¢¤Iš¤-M©4m£‹ÐM¨«Ý„.‚t‡º(èÐÅø yiš”æÇ.Šƒ4Þ{Ï9·ôênA/Üwî¹÷;?ß9÷ñ^¸o¨'4BøEÐçþ€\ýòáAÑ!å¡TÜMd…œŠ:=&ç¼X¨N®ù¼ëÛÚbä ¼ÒjÖûW:sÞeXWê@ÞÅô€÷tþ%õ¸êƒéåňòeOÂÆ9= Ü«PÉ©ƒiØ\SÀ—P é¨i³l©g×wØ&û:Ùo“_F½ Ê“e‹òbÙ$ÿMÊï+Åaû¡‡ñÏç7j¼…âÃþ<ªC‰ò+‘=󫮦`ý+{<ˆ7çÉ<ìø-ª¿áMy²½ÑéÜ–[ •Z†ªß „ß$þ\ß ñó–ÔxdúåQ™ócù‘â|"¿Ÿ‰ï |ØO…pUâÇõa^†át?š:Ì)¨ucŒÎ}µûÍýãºr=¹Ÿ\?öÓ¢:œuŸXòý±òä÷ÇqŸjý7à áÜÁº8ñ›:/'qMçã¤~ ž~ˆ¸ÌîgŸ¡>#¹ŒûWÐÏäE”SpŸeîú›>v3GP¿ûñ³×QæÏã~¾qù'˜ïå7WD}~ñó?Ñÿà~!ˆz¡å½³ˆcyìÏòö×é_­ñ—Ÿ £êýѬƒ7dúþîÜ÷q8ˆÆ3ru\g‡»ðÕ{ËA³¼dÿ ÇÝ [Ÿ1w8›LËÕ® O¸£Q;˜oŠÁìíÑ(#GÒÉñšÚV€Ð-7ëvÇÒÒ™ #œìæ ê.úgå£Ýn/a6b÷‹œ"T segmented/data/stagnant.rda0000644000176200001440000000054014415477004015452 0ustar liggesusers‹ r‰0âŠàb```b`adb`b2Y˜€# 'æ(.ILÏKÌ+a``«c`àb™ý/g‚À,û–ª‡îëDª÷_`8½oGàëV¹púX¡ý¯‡úc4aôþ'hô7Û‚Å×¹la´ý³40€ÓWª4¾¶ÿÌV`ÿdjÕCûׯ`§ß¯[§aîúVî£íï@Ý3ªæOû7g@à¬ý°7‚ìŸ@ùOÁÎ+´ µ掷PñÛ@Õ@-pú%H{àûW kõÛ„ò¡ôþ mKö„ŠÃÜ µo?,œ/CìÛl¬üþ»ûöÃìŠÃýý¢Ïj¾ý9H|ØCÍA‹oÖ¼ÄÜÔb C’(À‚Œ0F%ºòäœÄb˜r˜ WJbI¢^ZÐ$4åœEùåz0xA64‰ÿÿÿ?Rÿ@Úò±’segmented/data/globTempAnom.rda0000644000176200001440000000166014563636117016231 0ustar liggesusers‹eÖYPVuÇñË0¾c3y]·z¡7Þ4ýµÌ¥´‚°°(}UДM4Ì,Û4\hÑÊ\h±55±Ì£ ¹‚ˆ(¢InÍå4ãmô=ç<¿3Óë™ùpø/Ïó_Nîˆü!‰ü„çyé^Fzš—žÉ¯^æø¼‘ƒò¼Œ4 ÷x^Þ}§—MÎ+,)^ZVBÿû© ÚïEçe ÅÃxÙxÃ0áqŒÀ‰Q1xOa,Æái<ƒg‘ƒ\<‡<ŒÇóxù˜€ñ ð2^ÁDLB“1SQˆ"LÃt¼Š˜‰b°¾¬R”¡³PÙ˜ƒ×0ó0 P‰…x‹ðãM¼…%XŠe¨Âr¼wð.ÞÃûX•øÕX…ÕXƒµ¨Á‡øã¬Ãz|ŠÏð96à lÄ&lÆÔâK|…¯ñ ¶â[|‡ï± Á™nÇìÄØ…ÝØƒ±õ؇Ÿð3ö〃8„4â0ŽàWÅ1Ç œDšq §Ñ‚VœA΢çÐó¸€N\D.á2~Ãtãw\Å5\Ç ÜäîÞÜ_ÿHCuÁ€;þÞDvù–Ž„¿o]ð¬÷ÔçôT=Pïïp‡ãr‹>~kPÊùÓ?Ñ=ª®ewßüÌž÷k>~£½…irýƒA·òÚx<ÍãåÕ¸ªO}+Æ?nyš¢8·5*»:›·æ¯x¯·ÅùmáBü³•Aâ…~‹µ«^ñ»­]û ùhÝŠ×~¨]óÙiqj?lý•WóR»ò] ÂzªüS–Gëo·y]°}è²õ¨ÜO¶ß†?¿µß:­Sñg‚´ ÕñxZßQ;o•Õ®ùê…\ü?ÿùýbÝAqìŽ;ÐUÒ¶òºÙðØ—€:ÚFÍîµB¥º©‹ê¬¦[ÕKd‹š.–jN«jÞõйºTî®”k^±ku=­´Ûmë½þXÏ/µxкsegmented/NAMESPACE0000644000176200001440000000706014615160720013451 0ustar liggesusersimport(splines) import(MASS) import(nlme) importFrom("grDevices", "grey","adjustcolor","colorRampPalette") importFrom("graphics", "abline", "axis", "box", "lines", "matlines", "matplot", "par", "plot", "points", "polygon", "segments","smoothScatter", "legend","mtext", "text", "rect", "arrows") importFrom("stats", "spline", "approx", "as.formula", "coef", "contrasts", "family", "fitted", "formula", "gaussian", "glm.control", "glm.fit", "is.empty.model", "lm.wfit", "median", "model.frame", "model.matrix", "model.offset", "model.response", "model.weights", "na.omit", "pnorm", "predict", "printCoefmat", "pt", "qnorm", "qt", "quantile", "resid", "residuals", "runif", "summary.glm", "summary.lm", "update", "update.formula", "vcov", "weights", "dnorm", "lm", "lm.fit", "splinefun", "complete.cases","sd","qchisq","pchisq","BIC","get_all_vars", "optimize", "terms","is.ts", "tsp", "AIC",".lm.fit","var", "asOneSidedFormula", "logLik","plogis", ".getXlevels", "cov2cor", "glm", "uniroot","delete.response","weighted.mean","reformulate","sigma") importFrom("utils", "flush.console", "capture.output") export(segreg, seg, segmented, segmented.default, segmented.lm, segmented.glm, segmented.Arima, segmented.numeric, segmented.lme, print.segmented.lme, summary.segmented.lme, vcov.segmented.lme, confint.segmented.lme, logLik.segmented.lme, fixef.segmented.lme, fitted.segmented.lme, plot.segmented.lme, broken.line ,confint.segmented,davies.test,pscore.test,draw.history,aapc, intercept,lines.segmented,plot.segmented,print.segmented, coef.segmented, seg.control,seg.lm.fit,seg.glm.fit,seg.lm.fit.boot,seg.glm.fit.boot, seg.def.fit,seg.def.fit.boot, seg.Ar.fit,seg.Ar.fit.boot, seg.num.fit, seg.num.fit.boot, slope, summary.segmented, print.summary.segmented, vcov.segmented, predict.segmented, points.segmented, aapc, selgmented, pwr.seg, model.matrix.segmented) export(stepreg, stepmented, stepmented.lm, stepmented.glm, stepmented.ts, stepmented.numeric, step.lm.fit, step.glm.fit, step.ts.fit, step.num.fit, step.lm.fit.boot, step.glm.fit.boot, step.ts.fit.boot, step.num.fit.boot, summary.stepmented, print.summary.stepmented, print.stepmented, plot.stepmented, vcov.stepmented, model.matrix.stepmented, predict.stepmented) S3method(segmented,default) S3method(segmented,lm) S3method(segmented,glm) S3method(segmented,Arima) S3method(segmented,numeric) S3method(segmented,lme) S3method(plot,segmented) S3method(print,segmented) S3method(summary,segmented) S3method(print, summary.segmented) S3method(lines,segmented) S3method(confint,segmented) S3method(vcov,segmented) S3method(predict,segmented) S3method(points,segmented) S3method(coef,segmented) S3method(model.matrix,segmented) S3method(print, segmented.lme) S3method(summary, segmented.lme) S3method(vcov, segmented.lme) S3method(confint, segmented.lme) S3method(logLik, segmented.lme) S3method(fixef, segmented.lme) S3method(fitted, segmented.lme) S3method(plot, segmented.lme) #S3method(stepmented,default) S3method(stepmented,lm) S3method(stepmented,glm) S3method(stepmented,ts) S3method(stepmented,numeric) #S3method(stepmented,Arima) S3method(plot,stepmented) S3method(print,stepmented) S3method(summary,stepmented) S3method(print, summary.stepmented) S3method(confint,stepmented) S3method(lines,stepmented) S3method(model.matrix,stepmented) S3method(predict,stepmented)segmented/inst/0000755000176200001440000000000014516147210013203 5ustar liggesuserssegmented/inst/CITATION0000644000176200001440000000765114515671312014355 0ustar liggesuserscitHeader("To cite segmented in publications use one or more of the following papers:") bibentry(bibtype="Article", title = "A heuristic, iterative algorithm for change-point detection in abrupt change models", author = c(as.person("Salvatore Fasola"), as.person("Vito M.R. Muggeo"), as.person("Helmut Kuchenhoff")), journal = "Computational Statistics", year = "2018", volume = "33", number = "2", pages = "997-1015", textVersion = paste("Fasola S, Muggeo VMR, Kuchenhoff K. (2018).", "A heuristic, iterative algorithm for change-point detection in abrupt change models.", "Computational Statistics, 33, 997-1015.") ) bibentry(bibtype="Article", title = "Estimating regression models with unknown break-points.", author = "Vito M.R. Muggeo", journal = "Statistics in Medicine", year = "2003", volume = "22", pages = "3055--3071", textVersion = paste("Muggeo VMR (2003).", "Estimating regression models with unknown break-points.", "Statistics in Medicine, 22, 3055-3071.") ) bibentry(bibtype="Article", title = "segmented: an R Package to Fit Regression Models with Broken-Line Relationships.", author = "Vito M.R. Muggeo", journal = "R News", year = "2008", volume = "8", number = "1", pages = "20--25", url = "https://cran.r-project.org/doc/Rnews/", textVersion = paste("Muggeo VMR (2008).", "segmented: an R Package to Fit Regression Models with Broken-Line Relationships.", "R News, 8/1, 20-25.", "URL https://cran.r-project.org/doc/Rnews/.") ) bibentry(bibtype="Article", title = "Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling.", author = "Vito M.R. Muggeo", journal = "J of Statistical Computation and Simulation", year = "2016", volume = "86", pages = "3059-3067", textVersion = paste("Muggeo VMR (2016).", "Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling.", "J of Statistical Computation and Simulation, 86, 3059-3067.") ) bibentry(bibtype="Article", title = "Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach.", author = "Vito M.R. Muggeo", journal = "Australian & New Zealand Journal of Statistics", year = "2017", volume = "59", pages = "311-322", textVersion = paste("Muggeo VMR (2017).", "Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach.", "Australian \\& New Zealand Journal of Statistics, 59, 311-322.") ) bibentry(bibtype="Article", title = "Segmented mixed models with random changepoints: a maximum likelihood approach with application to treatment for depression study", author = c(as.person("Vito M.R. Muggeo"), as.person("David C. Atkins"), as.person("Robert J Gallop"), as.person("Sona Dimidjian")), journal = "Statistical Modelling", year = "2014", volume = "14", pages = "293-313", textVersion = paste("Muggeo VMR, Atkinhs DC, Gallopp RJ, Dimidjian S (2014).", "Segmented mixed models with random changepoints: a maximum likelihood approach with application to treatment for depression study", "Statistical Modelling, 14, 293-313.") ) segmented/man/0000755000176200001440000000000014614673073013013 5ustar liggesuserssegmented/man/step.lm.fit.Rd0000644000176200001440000001102414523203467015436 0ustar liggesusers\name{step.lm.fit} \alias{step.lm.fit} \alias{step.glm.fit} \alias{step.num.fit} \alias{step.ts.fit} \alias{step.lm.fit.boot} \alias{step.glm.fit.boot} \alias{step.num.fit.boot} \alias{step.ts.fit.boot} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fitter Functions for stepmented Linear Models } \description{ \code{step.lm.fit} is called by \code{stepmented.lm} to fit stepmented linear (gaussian) models. Likewise, \code{step.glm.fit} is called by \code{stepmented.glm} to fit generalized stepmented linear models. %, and \code{step.def.fit} is called by \code{stepmented.default} to fit %stepmented relationships in general regression models (e.g., quantile regression and Cox regression). The \code{step.*.fit.boot} functions are employed to perform bootstrap restarting. These functions should usually not be used directly by the user. } \usage{ step.lm.fit(y, x.lin, Xtrue, PSI, ww, offs, opz, return.all.sol=FALSE) step.lm.fit.boot(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) step.glm.fit(y, x.lin, Xtrue, PSI, ww, offs, opz, return.all.sol=FALSE) step.glm.fit.boot(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) %step.def.fit(obj, Z, PSI, mfExt, opz, return.all.sol=FALSE) %step.def.fit.boot(obj, Z, PSI, mfExt, opz, n.boot=10, size.boot=NULL, % jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) %step.Ar.fit(obj, XREG, Z, PSI, opz, return.all.sol=FALSE) %step.Ar.fit.boot(obj, XREG, Z, PSI, opz, n.boot=10, size.boot=NULL, jt=FALSE, % nonParam=TRUE, random=FALSE, break.boot=n.boot) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{ vector of observations of length \code{n}. } \item{x.lin, XREG}{ design matrix for standard linear terms. } \item{Xtrue, Z}{ appropriate matrix including the stepmented variables whose breakpoints have to be estimated. } \item{PSI}{ appropriate matrix including the starting values of the breakpoints to be estimated. } \item{ww,w}{ possibe weights vector. } \item{offs}{ possibe offset vector. } \item{opz}{ a list including information useful for model fitting. } \item{n.boot}{ the number of bootstrap samples employed in the bootstrap restart algorithm. } \item{break.boot}{ Integer, less than \code{n.boot}. If \code{break.boot} consecutive bootstrap samples lead to the same objective function, the algorithm stops without performing all \code{n.boot} 'trials'. This can save computational time considerably. } \item{size.boot}{ the size of the bootstrap resamples. If \code{NULL} (default), it is taken equal to the sample size. values smaller than the sample size are expected to increase perturbation in the bootstrap resamples. } \item{jt}{ logical. If \code{TRUE} the values of the stepmented variable(s) are jittered before fitting the model to the bootstrap resamples. } \item{nonParam}{ if \code{TRUE} nonparametric bootstrap (i.e. case-resampling) is used, otherwise residual-based. } \item{random}{ if \code{TRUE}, when the algorithm fails to obtain a solution, random values are used as candidate values. } \item{return.all.sol}{ if \code{TRUE}, when the algorithm fails to obtain a solution, the values visited by the algorithm with corresponding deviances are returned. } % \item{obj}{ % the starting regression model where the stepmented relationships have to be added. %} % \item{mfExt}{ % the model frame. %} } \details{ The functions call iteratively \code{lm.wfit} (or \code{glm.fit}) with proper design matrix depending on \code{XREG}, \code{Z} and \code{PSI}. \code{step.lm.fit.boot} (and \code{step.glm.fit.boot}) implements the bootstrap restarting idea discussed in Wood (2001). } \value{ A list of fit information. } \references{ Wood, S. N. (2001) Minimizing model fitting objectives that contain spurious local minima by bootstrap restarting. \emph{Biometrics} \bold{57}, 240--244. } \author{ Vito Muggeo } \note{ These functions should usually not be used directly by the user. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{stepmented.lm}} or \code{\link{stepmented.glm}} } \examples{ ##See ?stepmented } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{regression} \keyword{nonlinear } segmented/man/segmented.Rd0000644000176200001440000004405014757276606015271 0ustar liggesusers\name{segmented} \alias{segmented} \alias{segmented.lm} \alias{segmented.glm} \alias{segmented.default} \alias{segmented.Arima} \alias{segmented.numeric} %\alias{print.segmented} %\alias{summary.segmented} %\alias{print.summary.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Segmented relationships in regression models } \description{ Fits regression models with segmented relationships between the response and one or more explanatory variables. Break-point estimates are provided. } \usage{ segmented(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, ...) \method{segmented}{default}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...) \method{segmented}{lm}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...) \method{segmented}{glm}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...) \method{segmented}{Arima}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...) \method{segmented}{numeric}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, adjX=FALSE, weights=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obj}{ standard `linear' model of class "lm", "glm" or "Arima", or potentially any regression fit may be supplied since version 0.5-0 (see 'Details'). \code{obj} can include any covariate understood to have a linear (i.e. no break-points) effect on the response. If \code{obj} also includes the segmented covariate specified in \code{seg.Z}, then all the slopes of the fitted segmented relationship will be estimated. On the other hand, if \code{obj} misses the segmented variable, then the 1st (the leftmost) slope is assumed to be zero. Since version 1.5.0, \code{obj} can be a simple numeric or \code{ts} object but with only a single segmented variable (\code{segmented.numeric}) see examples below.} \item{seg.Z}{ the segmented variable(s), i.e. the continuous covariate(s) understood to have a piecewise-linear relationship with response. It is a formula with no response variable, such as \code{seg.Z=~x} or \code{seg.Z=~x1+x2}. It can be missing when \code{obj} includes only one covariate which is taken as segmented variable. Currently, formulas involving functions, such as \code{seg.Z=~log(x1)}, or selection operators, such as \code{seg.Z=~d[,"x1"]} or \code{seg.Z=~d$x1}, are \emph{not} allowed. Also, variable names formed by \verb{U} or \verb{V} only (with or without numbers) are not permitted.} \item{psi}{ starting values for the breakpoints to be estimated. If there is a single segmented variable specified in \code{seg.Z}, \code{psi} is a numeric vector, and it can be missing when 1 breakpoint has to be estimated (and the median of the segmented variable is used as a starting value). If \code{seg.Z} includes several covariates, \code{psi} has be specified as a \emph{named} list of vectors whose names have to match the variables in the \code{seg.Z} argument. Each vector of such list includes starting values for the break-point(s) for the corresponding variable in \code{seg.Z}. A \code{NA} value means that `\code{K}' quantiles (or equally spaced values) are used as starting values; \code{K} is fixed via the \code{\link{seg.control}} auxiliary function. See \code{npsi} as an alternative to specify just the number of breakpoints. } \item{npsi}{ A named vector or list meaning the \emph{number} (and not locations) of breakpoints to be estimated. The starting values will be internally computed via the quantiles or equally spaced values, as specified in argument \code{quant} in \code{\link{seg.control}}. \code{npsi} can be missing and \code{npsi=1} is assumed for all variables specified in \code{seg.Z}. If \code{psi} is provided, \code{npsi} is ignored. } \item{fixed.psi}{An optional named list meaning the breakpoints to be kept fixed during the estimation procedure. The names should be a subset of (or even the same) variables specified in \code{seg.Z}. If there is a single variable in \code{seg.Z}, a simple numeric vector can be specified. Note that, in addition to the values specified here, \code{segmented} will estimate additional breakpoints. To keep fixed all breakpoints (to be specified in \code{psi}) use \code{it.max=0} in \code{\link{seg.control}} } \item{control}{ a list of parameters for controlling the fitting process. See the documentation for \code{\link{seg.control}} for details. } \item{model}{logical value indicating if the model.frame should be returned.} \item{keep.class}{logical value indicating if the final fit returned by \code{segmented.default} should keep the class '\code{segmented}' (along with the class of the original fit \code{obj}). Ignored by the segmented methods. } \item{\dots}{ optional arguments (to be ignored safely). Notice specific arguments relevant to the original call (via \code{lm} or \code{glm} for instance), such as \code{weights} or \code{offet}, have to be included in the starting model \code{obj} } \item{adjX}{if \code{obj} is a \code{ts}, the segmented variable (if not specified in \code{seg.Z}) is computed by taking information from the time series (e.g., years starting from 2000, say). If \code{adjX=TRUE}, the segmented variable is shifted such that its min equals zero. Default is using the unshifted values, but if there are several breakpoints to be estimated , it is strongly suggested to set \code{adjX=TRUE}. } \item{weights}{the weights if \code{obj} is a vector or a ts object, otherwise the weights should be specified in the starting fit \code{obj}.} } \details{ Given a linear regression model usually of class "lm" or "glm" (or even a simple numeric/ts vector), segmented tries to estimate a new regression model having broken-line relationships with the variables specified in \code{seg.Z}. A segmented (or broken-line) relationship is defined by the slope parameters and the break-points where the linear relation changes. The number of breakpoints of each segmented relationship is fixed via the \code{psi} (or \code{npsi}) argument, where initial values for the break-points (or simply their number via \code{npsi}) must be specified. The model is estimated simultaneously yielding point estimates and relevant approximate standard errors of all the model parameters, including the break-points. Since version 0.2-9.0 \code{segmented} implements the bootstrap restarting algorithm described in Wood (2001). The bootstrap restarting is expected to escape the local optima of the objective function when the segmented relationship is flat and the log likelihood can have multiple local optima. Since version 0.5-0.0 the default method \code{segmented.default} has been added to estimate segmented relationships in general (besides "lm" and "glm" fits) regression models, such as Cox regression or quantile regression (for a single percentile). The objective function to be minimized is the (minus) value extracted by the \code{logLik} function or it may be passed on via the \code{fn.obj} argument in \code{seg.control}. See example below. While the default method is expected to work with any regression fit (where the usual \code{coef()}, \code{update()}, and \code{logLik()} returns appropriate results), it is not recommended for "lm" or "glm" fits (as \code{segmented.default} is slower than the specific methods \code{segmented.lm} and \code{segmented.glm}), although final results are the same. However the object returned by \code{segmented.default} is \emph{not} of class "segmented", as currently the segmented methods are not guaranteed to work for `generic' (i.e., besides "lm" and "glm") regression fits. The user could try each "segmented" method on the returned object by calling it explicitly (e.g. via \code{plot.segmented()} or \code{confint.segmented()} wherein the regression coefficients and relevant covariance matrix have to be specified, see \code{.coef} and \code{.vcov} in \code{plot.segmented()}, \code{confint.segmented()}, \code{slope()}). } \value{ segmented returns an object of class "segmented" which inherits from the class of \code{obj}, for instance "lm" or "glm". \cr An object of class "segmented" is a list containing the components of the original object \code{obj} with additionally the followings: \item{psi}{estimated break-points (sorted) and relevant (approximate) standard errors} \item{it}{number of iterations employed} \item{epsilon}{difference in the objective function when the algorithm stops} \item{model}{the model frame} \item{psi.history}{a list or a vector including the breakpoint estimates at each step} \item{seed}{the integer vector containing the seed just before the bootstrap resampling. Returned only if bootstrap restart is employed} \item{..}{Other components are not of direct interest of the user} } \section{ Warning }{ At convergence, if the estimated breakpoints are too close each other or at the boundaries, the parameter point estimate could be returned, but without finite standard errors. To avoid that, \code{segmented} revises the final breakpoint estimates to allow that at least \code{min.nj} are within each interval of the segmented covariate. A warning message is printed if such adjustment is made. See \code{min.nj} in \code{\link{seg.control}}. } %It is well-known that the log-likelihood function for the %break-point may be not concave, especially %for poor clear-cut kink-relationships. In these circumstances the initial guess % for the break-point, i.e. the \code{psi} argument, must be provided with care. For instance visual %inspection of a, possibly smoothed, scatter-plot is usually a good way to obtain some idea on breakpoint location. %However bootstrap restarting, implemented since version 0.2-9.0, is relatively more robust to starting values specified %in \code{psi}. Alternatively an automatic procedure may be implemented by specifying \code{psi=NA} and %\code{fix.npsi=FALSE} in \code{\link{seg.control}}: experience suggests to increase the number of iterations %via \code{it.max} in \code{seg.control()}. This automatic procedure, however, is expected to overestimate %the number of breakpoints. %} \note{ \enumerate{ \item The algorithm will start if the \code{it.max} argument returned by \code{seg.control} is greater than zero. If \code{it.max=0} \code{segmented} will estimate a new linear model with break-point(s) fixed at the values reported in \code{psi}.Alternatively, it is also possible to set \code{h=0} in \code{seg.control()}. In this case, bootstrap restarting is unncessary, then to have breakpoints at \code{mypsi} type \cr \code{segmented(.., psi=mypsi, control=seg.control(h=0, n.boot=0, it.max=1))} \item In the returned fit object, `U.' is put before the name of the segmented variable to mean the difference-in-slopes coefficient. \item Methods specific to the class \code{"segmented"} are \itemize{ \item \code{\link{print.segmented}} \item \code{\link{summary.segmented}} \item \code{\link{print.summary.segmented}} \item \code{\link{plot.segmented}} \item \code{\link{lines.segmented}} \item \code{\link{confint.segmented}} \item \code{\link{vcov.segmented}} \item \code{\link{predict.segmented}} \item \code{\link{points.segmented}} \item \code{\link{coef.segmented}} } Others are inherited from the class \code{"lm"} or \code{"glm"} depending on the class of \code{obj}. } } \references{ Muggeo, V.M.R. (2003) Estimating regression models with unknown break-points. \emph{Statistics in Medicine} \bold{22}, 3055--3071. Muggeo, V.M.R. (2008) Segmented: an R package to fit regression models with broken-line relationships. \emph{R News} \bold{8/1}, 20--25. } \author{ Vito M. R. Muggeo, \email{vito.muggeo@unipa.it} } \seealso{ \code{\link{segmented.glm}} for segmented GLM and \code{\link{segreg}} to fit the models via a formula interface. \code{\link{segmented.lme}} fits random changepoints (segmented mixed) models. } \examples{ set.seed(12) xx<-1:100 zz<-runif(100) yy<-2+1.5*pmax(xx-35,0)-1.5*pmax(xx-70,0)+15*pmax(zz-.5,0)+rnorm(100,0,2) dati<-data.frame(x=xx,y=yy,z=zz) out.lm<-lm(y~x,data=dati) #the simplest example: the starting model includes just 1 covariate #.. and 1 breakpoint has to be estimated for that o<-segmented(out.lm) #1 breakpoint for x #the single segmented variable is not in the starting model, and thus.. #... you need to specify it via seg.Z, but no starting value for psi o<-segmented(out.lm, seg.Z=~z) #note the leftmost slope is constrained to be zero (since out.lm does not include z) #2 segmented variables, 1 breakpoint each (again no need to specify npsi or psi) o<-segmented(out.lm,seg.Z=~z+x) #1 segmented variable, but 2 breakpoints: you have to specify starting values (vector) for psi: o<-segmented(out.lm,seg.Z=~x,psi=c(30,60), control=seg.control(display=FALSE)) #.. or you can specify just the *number* of breakpoints #o<-segmented(out.lm,seg.Z=~x, npsi=2, control=seg.control(display=FALSE)) slope(o) #the slopes of the segmented relationship #2 segmented variables: starting values requested via a named list out.lm<-lm(y~z,data=dati) o1<-update(o,seg.Z=~x+z,psi=list(x=c(30,60),z=.3)) #..or by specifying just the *number* of breakpoints #o1<-update(o,seg.Z=~x+z, npsi=c(x=2,z=1)) #the default method leads to the same results (but it is slower) #o1<-segmented.default(out.lm,seg.Z=~x+z,psi=list(x=c(30,60),z=.3)) #o1<-segmented.default(out.lm,seg.Z=~x+z,psi=list(x=c(30,60),z=.3), # control=seg.control(fn.obj="sum(x$residuals^2)")) #automatic procedure to estimate breakpoints in the covariate x (starting from K quantiles) # Hint: increases number of iterations. Notice: bootstrap restart is not allowed! # However see ?selgmented for a better approach #o<-segmented.lm(out.lm,seg.Z=~x+z,psi=list(x=NA,z=.3), # control=seg.control(fix.npsi=FALSE, n.boot=0, tol=1e-7, it.max = 50, K=5, display=TRUE)) #assess the progress of the breakpoint estimates throughout the iterations \dontrun{ par(mfrow=c(1,2)) draw.history(o, "x") draw.history(o, "z") } #try to increase the number of iterations and re-assess the #convergence diagnostics # A simple segmented model with continuous responses and no linear covariates # No need to fit the starting lm model: segmented(yy, npsi=2) #NOTE: subsetting the vector works ( segmented(yy[-1],..) ) #only a single segmented covariate is allowed in seg.Z, and if seg.Z is unspecified, # the segmented variable is taken as 1:n/n # An example using the Arima method: \dontrun{ n<-50 idt <-1:n #the time index mu<-50-idt +1.5*pmax(idt-30,0) set.seed(6969) y<-mu+arima.sim(list(ar=.5),n)*3.5 o<-arima(y, c(1,0,0), xreg=idt) os1<-segmented(o, ~idt, control=seg.control(display=TRUE)) #note using the .coef argument is mandatory! slope(os1, .coef=os1$coef) plot(y) plot(os1, add=TRUE, .coef=os1$coef, col=2) } ################################################################ ################################################################ ######Four examples using the default method: ################################################################ ################################################################ ################################################################ #==> 1. Cox regression with a segmented relationship ################################################################ \dontrun{ library(survival) data(stanford2) o<-coxph(Surv(time, status)~age, data=stanford2) os<-segmented(o, ~age, psi=40) #estimate the breakpoint in the age effect summary(os) #actually it means summary.coxph(os) plot(os) #it does not work plot.segmented(os) #call explicitly plot.segmented() to plot the fitted piecewise lines ################################################################ # ==> 2. Linear mixed model via the nlme package ################################################################ dati$g<-gl(10,10) #the cluster 'id' variable library(nlme) o<-lme(y~x+z, random=~1|g, data=dati) os<-segmented.default(o, ~x+z, npsi=list(x=2, z=1)) #summarizing results (note the '.coef' argument) slope(os, .coef=fixef(os)) plot.segmented(os, "x", .coef=fixef(os), conf.level=.95) confint.segmented(os, "x", .coef=fixef(os)) dd<-data.frame(x=c(20,50),z=c(.2,.6), g=1:2) predict.segmented(os, newdata=dd, .coef=fixef(os)) ################################################################ # ==> 3. segmented quantile regression via the quantreg package ################################################################ library(quantreg) data(Mammals) y<-with(Mammals, log(speed)) x<-with(Mammals, log(weight)) o<-rq(y~x, tau=.9) os<-segmented.default(o, ~x) #it does NOT work. It cannot compute the vcov matrix.. #Let's define the vcov.rq function.. (I don't know if it is the best option..) vcov.rq<-function(x,...) { V<-summary(x,cov=TRUE,se="nid",...)$cov rownames(V)<-colnames(V)<-names(x$coef) V} os<-segmented.default(o, ~x) #now it does work plot.segmented(os, res=TRUE, col=2, conf.level=.95) ################################################################ # ==> 4. segmented regression with the svyglm() (survey package) ################################################################ library(survey) data(api) dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc) o<-svyglm(api00~ell, design=dstrat) #specify as a string the objective function to be minimized. It can be obtained via svyvar() fn.x<- 'as.numeric(svyvar(resid(x, "pearson"), x$survey.design, na.rm = TRUE))' os<-segmented.default(o, ~ell, control=seg.control(fn.obj=fn.x, display=TRUE)) slope(os) plot.segmented(os, res=TRUE, conf.level=.9, shade=TRUE) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{regression} \keyword{nonlinear } segmented/man/vcov.segmented.lme.Rd0000644000176200001440000000462214415476766017022 0ustar liggesusers\name{vcov.segmented.lme} \alias{vcov.segmented.lme} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Variance-Covariance Matrix for a Fitted Segmented Mixed Model} \description{ Returns the variance-covariance matrix of the parameters (including breakpoints) of a fitted segmented mixed model object.} \usage{ \method{vcov}{segmented.lme}(object, B=0, ret.b=FALSE, ...) %is??? } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{a fitted model object of class "segmented.lme", returned by \code{segmented.lme} method.} \item{B}{number of bootstrap replicates, if a bootstrap-based covariance matrix is requested.} \item{ret.b}{logical. If \code{FALSE} the full covariance matrix (for the fixed effect estimates) based on \code{B} case-resampling bootstrap samples is returned; otherwise a list with information on the bootstrap sampling distributions.} \item{...}{optional arguments, i.e. \code{seed} and \code{it.max.b}, used when implementing the bootstrap.} % \item{is}{logical. If \code{TRUE}, the \emph{asymptotic} covariance matrix based on the idea of induced smoothing is returned. If \code{is%=TRUE}, \code{var.diff=FALSE} is set. % } } \details{ The returned covariance matrix is based on an approximation of the nonlinear segmented term. Therefore covariances corresponding to breakpoints are reliable only in large samples and/or clear cut segmented relationships. If \code{B>0} is set, case resampling bootstrap (on the outermost nesting level) is carried out. Moreover, if \code{ret.b=TRUE}, the bootstrap distributions are returned, rather than the covariance matrix. %If \code{is=TRUE}, the returned covariance matrix depends on the design matrix having the term \eqn{I(x>\psi)}{I(x>psi)} replaced by its smooth %counterpart. } \value{ The full matrix of the estimated covariances of the fixed effects estimates, including the breakpoint. } %\references{} \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it}} \section{Warning }{ All the functions for segmented mixed models (*.segmented.lme) are still at an experimental stage } \seealso{\code{\link{summary.segmented.lme}}} \examples{ ##continues example from segmented.lme() # vcov(os) # vcov(os, B=50) # vcov(os, B=50, ret.b=TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{regression} segmented/man/fitted.segmented.lme.Rd0000644000176200001440000000323214415476774017317 0ustar liggesusers\name{fitted.segmented.lme} \alias{fitted.segmented.lme} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fitted values for segmented mixed fits } \description{ Computes fitted values at different levels of nesting for segmented mixed objects } \usage{ \method{fitted}{segmented.lme}(object, level = 1, sort=TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ Object of class \code{"segmented.lme"} } \item{level}{ the level to be considered. Currently only levels 0 or 1 are allowed. } \item{sort}{ If \code{TRUE}, the fitted values are sorted by the names of the 'id' levels. } \item{\dots}{ Ignored } } \details{ Currently it works only if \code{level=1} } \value{ A numeric object including the fitted values at the specified level of nesting. %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Vito Muggeo } %\note{ %% ~~further notes~~ %} \section{Warning }{ All the functions for segmented mixed models (*.segmented.lme) are still at an experimental stage } \seealso{ \code{\link{summary.segmented.lme}} } %\examples{ %} % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/plot.segmented.Rd0000644000176200001440000002075114620341125016222 0ustar liggesusers\name{plot.segmented} \alias{plot.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Plot method for segmented objects } \description{ Takes a fitted \code{segmented} object returned by \code{segmented()} and plots (or adds) the fitted broken-line relationship for the selected segmented term. } \usage{ \method{plot}{segmented}(x, term, add=FALSE, res=FALSE, conf.level=0, interc=TRUE, link=TRUE, res.col=grey(.15, alpha = .4), rev.sgn=FALSE, const=NULL, shade=FALSE, rug=!add, dens.rug=FALSE, dens.col = grey(0.8), transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, .coef=NULL, prev.trend=FALSE, smoos=NULL, hide.zeros=FALSE, leg="topleft", psi.lines=FALSE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a fitted \code{segmented} object. } \item{term}{Numerical or character to indicate the segmented variable having the piece-wise relationship to be plotted. If there is a single segmented variable in the fitted model \code{x}, \code{term} can be omitted. If vector, multiple segmented lines will be drawn on the same plot.} \item{add}{ when \code{TRUE} the fitted lines are added to the current device.} \item{res}{ when \code{TRUE} the fitted lines are plotted along with corresponding partial residuals. See Details.} \item{conf.level}{ If greater than zero, it means the confidence level at which the pointwise confidence itervals have to be plotted.} \item{interc}{ If \code{TRUE} the computed segmented components include the model intercept (if it exists).} \item{link}{ when \code{TRUE} (default), the fitted lines are plotted on the link scale, otherwise they are tranformed on the response scale before plotting. Ignored for linear segmented fits. } \item{res.col}{when \code{res=TRUE} it means the color of the points representing the partial residuals.} \item{rev.sgn}{ when \code{TRUE} it is assumed that current \code{term} is `minus' the actual segmented variable, therefore the sign is reversed before plotting. This is useful when a null-constraint has been set on the last slope.} \item{const}{ constant to add to each fitted segmented relationship (on the scale of the linear predictor) before plotting. If \code{const=NULL} and the fit includes a segmented interaction term (obtained via \code{seg(..,by)} in the formula), the group-specific intercept is included. } \item{shade}{if \code{TRUE} and \code{conf.level>0} it produces shaded regions (in grey color) for the pointwise confidence intervals embracing the fitted segmented line. } \item{rug}{when \code{TRUE} the covariate values are displayed as a rug plot at the foot of the plot. Default is to \code{!add}.} \item{dens.rug}{when \code{TRUE} then smooth covariate distribution is plotted on the x-axis.} \item{dens.col}{if \code{dens.rug=TRUE}, it means the colour to be used to plot the density.} % \item{show.gap}{ when \code{FALSE} the (possible) gaps between the fitted lines at the estimated breakpoints % are hidden. When bootstrap restarting has been employed (default in \code{segmented}), \code{show.gap} is meaningless % as the gap coefficients are always set to zero in the fitted model.} \item{transf}{ A possible function to convert the fitted values before plotting. It is only effective if \code{res=FALSE}. If \code{res=TRUE} any transformation is ignored.} \item{isV}{logical value (to be passed to \code{\link{broken.line}}). Ignored if \code{conf.level=0}} \item{is}{logical value (to be passed to \code{\link{broken.line}}) indicating if the covariance matrix based on the induced smoothing should be used. Ignored if \code{conf.level=0}} \item{var.diff}{logical value to be passed to \code{\link{summary.segmented}} to compute dthe standard errors of fitted values (if \code{conf.level>0}).} \item{p.df}{ degrees of freedom when \code{var.diff=TRUE}, see \code{\link{summary.segmented}}} \item{.vcov}{ The \emph{full} covariance matrix of estimates to be used when \code{conf.level>0}. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by the function \code{vcov.segmented}.} \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef()}.} \item{prev.trend}{logical. If \code{TRUE} dashed lines corresponding to the `previous' trends (i.e. the trends if the breakpoints would not have occurred) are also drawn.} \item{smoos}{ logical, indicating if the residuals (provided that \code{res=TRUE}) will be drawn using a \emph{smoothed} scatterplot. If \code{NULL} (default) the smoothed scatterplot will be employed when the number of observation is larger than 10000. } \item{hide.zeros}{ logical, indicating if the residuals (provided that \code{res=TRUE}) corresponding to the covariate zero values should be deleted. Useful when the fit includes an interaction term in the formula, such as \code{seg(.., by=..)}, and the zeros in covariates indicate units in other groups. } \item{leg}{ If the plot refers to segmented relationships in groups, i.e. \code{term} has been specified as a vector, a legend is placed at the specified \code{leg} position. Put \code{NA} not to draw the legend. } \item{psi.lines}{ if \code{TRUE} vertical lines corresponding to the estimated breakpoints are also drawn. Ignored if \code{term} is not a vector. } \item{\dots}{ other graphics parameters to pass to plotting commands: `col', `lwd' and `lty' (that can be vectors and are recycled if necessary, see the example below) for the fitted piecewise lines; `ylab', `xlab', `main', `sub', `cex.axis', `cex.lab', `xlim' and `ylim' when a new plot is produced (i.e. when \code{add=FALSE}); `pch' and `cex' for the partial residuals (when \code{res=TRUE}, \code{res.col} is for the color); \code{col.shade} for the shaded regions (provided that \code{shade=TRUE} and \code{conf.level>0}). } } \details{ Produces (or adds to the current device) the fitted segmented relationship between the response and the selected \code{term}. If the fitted model includes just a single `segmented' variable, \code{term} may be omitted. \cr The partial residuals are computed as `fitted + residuals', where `fitted' are the fitted values of the segmented relationship relevant to the covariate specified in \code{term}. Notice that for GLMs the residuals are the response residuals if \code{link=FALSE} and the working residuals if \code{link=TRUE}.\cr %weighted by the IWLS weights [fino alla versione 0.5-2.0 i workRes were weighted by the IWLS weights] } \value{ None. } %\references{ } \author{ Vito M. R. Muggeo } \note{ For models with offset, partial residuals on the response scale are not defined. Thus \code{plot.segmented} does not work when \code{link=FALSE}, \code{res=TRUE}, and the fitted model includes an offset.\cr When \code{term} is a vector and multiple segmented relationships are being drawn on the same plot, \code{col} and \code{res.col} can be vectors. Also \code{pch}, \code{cex}, \code{lty}, and \code{lwd} can be vectors, if specified. } % % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{segmented}} to fit the model, \code{\link{lines.segmented}} to add the estimated breakpoints on the current plot. \code{\link{points.segmented}} to add the joinpoints of the segmented relationship. \code{\link{predict.segmented}} to compute standard errors and confidence intervals for predictions from a "segmented" fit. } \examples{ set.seed(1234) z<-runif(100) y<-rpois(100,exp(2+1.8*pmax(z-.6,0))) o<-glm(y~z,family=poisson) o.seg<-segmented(o) #single segmented covariate and one breakpoint: 'seg.Z' and 'psi' can be omitted par(mfrow=c(1,2)) plot(o.seg, conf.level=0.95, shade=TRUE) points(o.seg, link=TRUE, col=2) ## new plot plot(z,y) ## add the fitted lines using different colors and styles.. plot(o.seg,add=TRUE,link=FALSE,lwd=2,col=2:3, lty=c(1,3)) lines(o.seg,col=2,pch=19,bottom=FALSE,lwd=2) #for the CI for the breakpoint points(o.seg,col=4, link=FALSE) ## using the options 'is', 'isV', 'shade' and 'col.shade'. par(mfrow=c(1,2)) plot(o.seg, conf.level=.9, is=TRUE, isV=TRUE, col=1, shade = TRUE, col.shade=2) plot(o.seg, conf.level=.9, is=TRUE, isV=FALSE, col=2, shade = TRUE, res=TRUE, res.col=4, pch=3) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } \keyword{ hplot }segmented/man/predict.stepmented.Rd0000644000176200001440000000647714667540455017126 0ustar liggesusers\name{predict.stepmented} \alias{predict.stepmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Predict method for stepmented model fits } \description{ Returns predictions and optionally associated quantities (standard errors or confidence intervals) from a fitted stepmented model object. } \usage{ \method{predict}{stepmented}(object, newdata, se.fit=FALSE, interval=c("none","confidence", "prediction"), type = c("link", "response"), na.action=na.omit, level=0.95, .coef=NULL, .vcov=NULL, apprx.fit=c("none","cdf"), apprx.se=c("cdf","none"), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ a fitted stepmented model coming from \code{stepmented.lm} or \code{stepmented.glm}. } \item{newdata}{ An optional data frame in which to look for variables with which to predict. If omitted, the fitted values are used. } \item{se.fit}{ Logical. Should the standard errors be returned? } \item{interval}{ Which interval? See \code{\link{predict.lm}} } \item{type}{ Predictions on the link or response scale? Only if \code{object} is a stepmented glm. } \item{na.action}{ How to deal with missing data, \emph{if} \code{newdata} include them. } \item{level}{ The confidence level. } \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef()}.} \item{.vcov}{ The estimate covariance matrix. If unspecified (i.e. \code{NULL}), it is computed internally by \code{vcov.stepmented()}.} \item{apprx.fit}{ The approximation of the \eqn{(x>\hat\psi)} used to compute the predictions/fitted values of the piece-wise relationships. } \item{apprx.se}{ The same abovementioned approximation to compute the standard error.} \item{\dots}{ further arguments, for instance \code{k} to be passed to \code{\link{vcov.stepmented}}. } } \details{ Basically \code{predict.stepmented} builds the right design matrix accounting for breakpoint and passes it to \code{predict.lm} or \code{predict.glm} depending on the actual model fit \code{object}. } \value{ \code{predict.stepmented} produces a vector of predictions with possibly associated standard errors or confidence intervals. See \code{predict.lm}, \code{predict.glm}, or \code{\link{predict.segmented}}. } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Vito Muggeo } %\note{ %If \code{type="terms"}, \code{predict.stepmented} returns predictions for each component of the stepmented term. %Namely if `my.x' is the stepmented variable, predictions for `my.x', `U1.my.x' and `psi1.my.x' are returned. These are %meaningless individually, however their sum provides the predictions for the stepmented term. %} \section{Warning }{ For stepmented glm fits with offset, \code{predict.stepmented} returns the fitted values \emph{including} the offset. } \seealso{ \code{\link{stepreg}}, \code{\link{stepmented}}, \code{\link{plot.stepmented}}, \code{\link{predict.lm}}, \code{\link{predict.glm}} } \examples{ n=10 x=seq(-3,3,l=n) set.seed(1515) y <- (x<0)*x/2 + 1 + rnorm(x,sd=0.15) segm <- segmented(lm(y ~ x), ~ x, psi=0.5) predict(segm,se.fit = TRUE)$se.fit } % \dontrun{..} % KEYWORDS - R documentation directory. \keyword{models} \keyword{regression} segmented/man/vcov.segmented.Rd0000644000176200001440000000377014515416162016232 0ustar liggesusers\name{vcov.segmented} \alias{vcov.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Variance-Covariance Matrix for a Fitted Segmented Model } \description{ Returns the variance-covariance matrix of the parameters (including breakpoints) of a fitted segmented model object.} \usage{ \method{vcov}{segmented}(object, var.diff = FALSE, is = FALSE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{a fitted model object of class "segmented", returned by any \code{segmented} method or \code{segreg}.} \item{var.diff}{logical. If \code{var.diff=TRUE} and there is a single segmented variable, the covariance matrix is computed using a sandwich-type formula. See Details in \code{\link{summary.segmented}}.} \item{is}{logical. If \code{TRUE}, the \emph{asymptotic} covariance matrix based on the idea of induced smoothing is returned. If \code{is=TRUE}, \code{var.diff=FALSE} is set. \code{is=TRUE} only works with segmented (g)lm fits.} \item{\dots}{additional arguments. } } \details{ The returned covariance matrix is based on an approximation of the nonlinear segmented term. Therefore covariances corresponding to breakpoints are reliable only in large samples and/or clear cut segmented relationships. If \code{is=TRUE}, the returned covariance matrix depends on the design matrix having the term \eqn{I(x>\psi)}{I(x>psi)} replaced by its smooth counterpart. } \value{ The full matrix of the estimated covariances between the parameter estimates, including the breakpoints. } %\references{} \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it}} \note{\code{var.diff=TRUE} works when there is a single segmented variable.} \seealso{\code{\link{summary.segmented}}} \examples{ ##continues example from summary.segmented() # vcov(oseg) # vcov(oseg, var.diff=TRUE) # vcov(oseg, is=TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{regression} segmented/man/segmented-package.Rd0000644000176200001440000001242714757630101016645 0ustar liggesusers\name{segmented-package} \alias{segmented-package} %\alias{segmented} \docType{package} \title{ Segmented Relationships in Regression Models with Breakpoints / Changepoints Estimation (with Possibly Random Effects) } \description{ Estimation and inference of regression models with piecewise linear relationships, also known as segmented regression models, with a number of break-points fixed or to be `selected'. Random effects changepoints are also allowed since version 1.6-0, and since version 2.0-0 it is also possible to fit regression models with piecewise constant (or `stepmented') relationships. } \details{ \tabular{ll}{ Package: \tab segmented\cr Type: \tab Package\cr Version: \tab 2.1-4\cr Date: \tab 2025-02-26\cr License: \tab GPL\cr } Package \code{segmented} aims to estimate linear and generalized linear models (and virtually any regression model) having one or more segmented or stepmented relationships in the linear predictor. Estimates of the slopes and breakpoints are provided along with standard errors. The package includes testing/estimating functions and methods to print, summarize and plot the results. The algorithms used by \code{segmented} are \emph{not} grid-search. They are iterative procedures (Muggeo, 2003; Fasola et al., 2018) that need starting values \emph{only} for the breakpoint parameters and therefore they are quite efficient even with several breakpoints to be estimated. Moreover since version 0.2-9.0, \code{segmented} implements the bootstrap restarting (Wood, 2001) to make the algorithms less sensitive to the starting values (which can be also omitted by the user) . Since version 0.5-0.0 a default method \code{segmented.default} has been added. It may be employed to include segmented relationships in \emph{general} regression models where specific methods do not exist. Examples include quantile, Cox, and lme regressions where the random effects do not refer to the breakpoints; see \code{segmented.lme} to include random changepoints. \code{\link{segmented.default}} includes some examples. Since version 1.0-0 the estimating algorithm has been slight modified and it appears to be much stabler (in examples with noisy segmented relationhips and flat log likelihoods) then previous versions. Hypothesis testing (about the existence of the breakpoint) and confidence intervals are performed via appropriate methods and functions. A tentative approach to deal with unknown number of breakpoints is also provided, see option \code{fix.npsi} in \code{\link{seg.control}}. Also, as version 1.3-0, the \code{\link{selgmented}} function has been introduced to select the number of breakpoints via the BIC or sequential hypothesis testing. Since version 1.6-0, estimation of segmented mixed models has been introduced, see \code{\link{segmented.lme}} and related function. Since version 2.0-0, it is possible to fit segmented relationships with constraints on the slopes, see \code{\link{segreg}}. Finally, since 2.0-0, it is possible to fit (G)LM wherein one or more covariates have a stepmented (i.e. a step-function like) relationship, see \code{\link{stepmented}}. } \author{ Vito M.R. Muggeo } \references{ Muggeo V.M.R., Atkins D.C., Gallop R.J., Dimidjian S. (2014) Segmented mixed models with random changepoints: a maximum likelihood approach with application to treatment for depression study. \emph{Statistical Modelling}, \bold{14}, 293-313. Muggeo, V.M.R. (2017) Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach. \emph{Australian & New Zealand Journal of Statistics}, \bold{59}, 311--322. Fasola S, Muggeo V.M.R., Kuchenhoff, H. (2018) A heuristic, iterative algorithm for change-point detection in abrupt change models, \emph{Computational Statistics}, \bold{2}, 997--1015. Muggeo, V.M.R. (2016) Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling. \emph{J of Statistical Computation and Simulation} \bold{86}, 3059--3067. Davies, R.B. (1987) Hypothesis testing when a nuisance parameter is present only under the alternative. \emph{Biometrika} \bold{74}, 33--43. Seber, G.A.F. and Wild, C.J. (1989) \emph{Nonlinear Regression}. Wiley, New York. Bacon D.W., Watts D.G. (1971) Estimating the transistion between two intersecting straight lines. \emph{Biometrika} \bold{58}: 525 -- 534. Muggeo, V.M.R. (2003) Estimating regression models with unknown break-points. \emph{Statistics in Medicine} \bold{22}, 3055--3071. Muggeo, V.M.R. (2008) Segmented: an R package to fit regression models with broken-line relationships. \emph{R News} \bold{8/1}, 20--25. Muggeo, V.M.R., Adelfio, G. (2011) Efficient change point detection in genomic sequences of continuous measurements. \emph{Bioinformatics} \bold{27}, 161--166. Wood, S. N. (2001) Minimizing model fitting objectives that contain spurious local minima by bootstrap restarting. \emph{Biometrics} \bold{57}, 240--244. Muggeo, V.M.R. (2010) Comment on `Estimating average annual per cent change in trend analysis' by Clegg et al., Statistics in Medicine; 28, 3670-3682. \emph{Statistics in Medicine}, \bold{29}, 1958--1960. } \keyword{ regression } \keyword{ nonlinear } segmented/man/aapc.Rd0000644000176200001440000000634214415476772014221 0ustar liggesusers\name{aapc} \alias{aapc} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Average annual per cent change in segmented trend analysis } \description{ Computes the average annual per cent change to summarize piecewise linear relationships in segmented regression models. } \usage{ aapc(ogg, parm, exp.it = FALSE, conf.level = 0.95, wrong.se = TRUE, .vcov=NULL, .coef=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ogg}{ the fitted model returned by \code{segmented}. } \item{parm}{ the \emph{single} segmented variable of interest. It can be missing if the model includes a single segmented covariate. If missing and \code{ogg} includes several segmented variables, the first one is considered.} \item{exp.it}{logical. If \code{TRUE}, the per cent change is computed, namely \eqn{\exp(\hat\mu)-1}{exp(mu)-1} where \eqn{\mu=\sum_j \beta_jw_j}{mu=\sum j bjwj}, see `Details'.} \item{conf.level}{the confidence level desidered.} \item{wrong.se}{logical, if \code{TRUE}, the `wrong'' standard error (as discussed in Clegg et al. (2009)) ignoring uncertainty in the breakpoint estimate is returned as an attribute \code{"wrong.se"}.} \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov(ogg,...)}.} \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(ogg)}.} \item{...}{further arguments to be passed on to \code{vcov.segmented()}, such as \code{var.diff} or \code{is}.} } \details{ To summarize the fitted piecewise linear relationship, Clegg et al. (2009) proposed the 'average annual per cent change' (AAPC) computed as the sum of the slopes (\eqn{\beta_j}{beta_j}) weighted by corresponding covariate sub-interval width (\eqn{w_j}{w_j}), namely \eqn{\mu=\sum_j \beta_jw_j}{mu=sum_j beta_j w_j}. Since the weights are the breakpoint differences, the standard error of the AAPC should account for uncertainty in the breakpoint estimate, as discussed in Muggeo (2010) and implemented by \code{aapc()}. } \value{ \code{aapc} returns a numeric vector including point estimate, standard error and confidence interval for the AAPC relevant to variable specified in \code{parm}. } \references{ Clegg LX, Hankey BF, Tiwari R, Feuer EJ, Edwards BK (2009) Estimating average annual per cent change in trend analysis. \emph{Statistics in Medicine}, \bold{28}; 3670-3682. Muggeo, V.M.R. (2010) Comment on `Estimating average annual per cent change in trend analysis' by Clegg et al., Statistics in Medicine; 28, 3670-3682. \emph{Statistics in Medicine}, \bold{29}, 1958--1960. } \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it} } \note{ \code{exp.it=TRUE} would be appropriate only if the response variable is the log of (any) counts. } %% ~Make other sections like Warning with \section{Warning }{....} ~ %%\seealso{ %% ~~objects to See Also as \code{\link{help}}, ~~~ %%} \examples{ set.seed(12) x<-1:20 y<-2-.5*x+.7*pmax(x-9,0)-.8*pmax(x-15,0)+rnorm(20)*.3 o<-lm(y~x) os<-segmented(o, psi=c(5,12)) aapc(os) } \keyword{ regression } segmented/man/vcov.stepmented.Rd0000644000176200001440000000667014607214142016425 0ustar liggesusers\name{vcov.stepmented} \alias{vcov.stepmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Variance-Covariance Matrix for a Fitted Stepmented Model } \description{ Returns the variance-covariance matrix of the parameters estimates (including breakpoints) of a fitted stepmented model object. } \usage{ \method{vcov}{stepmented}(object, k=NULL, zero.cor=TRUE, type=c("cdf", "none", "abs"), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ a fitted model object of class "stepmented", returned by any \code{stepmented} method } \item{k}{ The power of \code{n} for the smooth approximation. Simulation evidence suggests \code{k} in \eqn{[-1, -1/2]}; with \eqn{k=-1/2} providing somewhat 'conservative' standard errors especially at small sample sizes. In general, the larger \eqn{k}, the smaller \eqn{n^{-k}}, and the smaller the jumpoint standard error. } \item{zero.cor}{ If \code{TRUE}, the covariances between the jumpoints and the remaining linear coefficients are set to zero (as theory states). } \item{type}{How the covariance matrix should be computed. If \code{"none"}, the usual asymptotic covariance matrix for the linear coefficients only (under homoskedasticity and assuming known the jumpoints) is returned; if \code{"cdf"}, the standard normal cdf is used to approximate the indicator function (see details); \code{"abs"} is yet another approximation (currently unimplemented). } \item{\dots}{ additional arguments. } } \details{ The full covariance matrix is based on the smooth approximation \deqn{I(x>\psi)\approx \Phi((x-\psi)/n^{k})} via the sandwich formula using the empirical information matrix and assuming \eqn{x \in [0,1]}. \eqn{\Phi(\cdot)} is the standard Normal cdf, and \eqn{k} is the argument \code{k}. When \code{k=NULL} (default), it is computed via \deqn{k=-(0.6 + 0.5 \ \log(snr)/\sqrt snr - (|\hat\psi-0.5|/n)^{1/2})} where \eqn{snr} is the signal-to-noise ratio corresponding to the estimated changepoint \eqn{\hat\psi} (in the range (0,1)). The above formula comes from extensive simulation studies under different scenarios: Seo and Linton (2007) discuss using the normal cdf to smooth out the indicator function by suggesting \eqn{\log(n)/n^{1/2}} as bandwidth; we found such suggestion does not perform well in practice. } \value{ The full matrix of the estimated covariances between the parameter estimates, including the breakpoints. } \references{ Seo MH, Linton O (2007) A smoothed least squares estimator for threshold regression models, J of Econometrics, 141: 704-735 } \author{ Vito Muggeo } \note{ If the fit \code{object} has been called by \code{stepmented(.., var.psi=TRUE)}, then \code{vcov.stepmented} will return \code{object$vcov}, unless the power \code{k} differs from \code{-2/3}. } \section{Warning }{The function, including the value of \eqn{k}, must be considered at preliminary stage. Currently the value of \eqn{k} appears to overestimate slightly the true \eqn{\hat\psi} variability.} \seealso{ \code{\link{stepmented}} } \examples{ ##see ?stepmented } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/stagnant.Rd0000644000176200001440000000161014415476774015127 0ustar liggesusers\name{stagnant} \alias{stagnant} \docType{data} \title{Stagnant band height data} \description{ The \code{stagnant} data frame has 28 rows and 2 columns. } \usage{data(stagnant)} \format{ A data frame with 28 observations on the following 2 variables. \describe{ \item{\code{x}}{log of flow rate in g/cm sec.} \item{\code{y}}{log of band height in cm} } } \details{ Bacon and Watts report that such data were obtained by R.A. Cook during his investigation of the behaviour of stagnant surface layer height in a controlled flow of water. } \source{ Bacon D.W., Watts D.G. (1971) Estimating the transistion between two intersecting straight lines. \emph{Biometrika} \bold{58}: 525 -- 534. Originally from the PhD thesis by R.A. Cook } %\references{ % PhD thesis by R.A. Cook %} \examples{ data(stagnant) ## plot(stagnant) } \keyword{datasets} segmented/man/print.segmented.lme.Rd0000644000176200001440000000203314415476766017173 0ustar liggesusers\name{print.segmented.lme} \alias{print.segmented.lme} \alias{fixef.segmented.lme} \alias{logLik.segmented.lme} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Print method for the segmented.lme class } \description{ Printing and extracting the most important features of a segmented mixed model. } \usage{ \method{print}{segmented.lme}(x, digits = max(3, getOption("digits") - 3), ...) \method{fixef}{segmented.lme}(object, ...) \method{logLik}{segmented.lme}(object, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ object of class \code{segmented.lme} } \item{digits}{ number of digits to be printed } \item{object}{object of class \code{segmented} } %\item{include.psi}{logical. If \code{TRUE}, the breakpoints are returned along with the regression coefficients} \item{\dots}{ arguments passed to other functions } } \author{ Vito M.R. Muggeo } \seealso{ \code{\link{segmented.lme}}, \code{\link{summary.segmented.lme}} } \keyword{ models } segmented/man/seg.Rd0000644000176200001440000001070314757616474014073 0ustar liggesusers\name{seg} \alias{seg} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Specifying a segmented/stepmented term in the segreg/stepreg formula } \description{ Function used to define a segmented (stepmented) term within the segreg (stepreg) formula. The function simply passes relevant information to proper fitter functions. } \usage{ seg(x, npsi = 1, psi = NA, est = NA, R = NA, fixed.psi = NULL, by = NULL, f.x = I) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ The segmented/stepmented (numeric) covariate } \item{npsi}{ The number of breakpoints/jumpoints to estimate. Default to \code{npsi=1}. If \code{by} has been specified and \code{npsi} is a scalar, the same \code{npsi} applies to all categories of the factor \code{by}; otherwise it can be vector, wherein the entries represent the number of breakpoints of the segmented relationships within the categories of \code{by}. The \code{npsi} starting values are computed according the specification of \code{quant} in \code{\link{seg.control}}. } \item{psi}{ Numerical vector indicating possible starting value(s) for the breakpoint(s). When provided, \code{psi} overwrites \code{npsi}. If \code{by} has been specified and \code{psi} is a vector, the same \code{psi} applies to all categories of the factor \code{by}. Otherwise \code{psi} can be a list, wherein the components represent the starting values of the segmented/stepmented relationships within the categories of \code{by}. } \item{est}{ Possible vector (of length equal to \code{npsi+1}) of 1's and 0's to indicate whether the slopes have to be estimated or fixed to zero. \code{NA}, the default, means all ones, namely every slope is estimated. Consecutive zeroes are not allowed. If it is a vector and \code{by} has been specified, the same \code{est} applies to all categories of the factor \code{by}; otherwise \code{est} can be a list, wherein the components represent the slope constraints of the segmented relationships within the categories of \code{by}. } \item{R}{ Matrix to constrain the slopes. If provided, it overwrites the matrix (which is built internally) coming from the specification of \code{est}. If matrix and \code{by} has been specified, the same \code{R} applies to all categories of the factor \code{by}; otherwise \code{R} can be a list, wherein the components represent the slope constraints of the segmented relationships within the categories of \code{by}. } \item{fixed.psi}{ Possible \emph{fixed} breakpoint values to be accounted for \emph{in addition} to those being estimated. If \code{by} has been specified and \code{fixed.psi} is a vector, the same \code{fixed.psi} applies to all categories of the factor \code{by}. Otherwise \code{fixed.psi} can be a list, wherein the components refer to the segmented relationship within the levels of \code{by}. \code{slope} and \code{plot.segmented} will account for them correctly. } \item{by}{ A possible \emph{factor} meaning an interaction with the segmented term \code{x}. Hence, if specified, a different segmented relationship is fitted within each category of \code{by}. } \item{f.x}{ an optional function meaning a function to apply to the covariate before fitting. } } \details{ The function is used within \code{\link{segreg}} and \code{stepreg} to 'build' information about the segmented relationships to fit. Note that, when a grouping variable has been specified in \code{by}, \code{npsi} can be specified as a vector, and \code{est}, if specified, should be a list. } \value{ The function simply returns the covariate with added attributes relevant to segmented term } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Vito Muggeo } \note{ If any value is provided in \code{fix.psi}, the corresponding slope difference coefficient will be labelled by \code{*.fixed.*}. The \code{\link{slope}} function will compute the 'right' slopes also accounting for the fixed breakpoints. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{segreg}} } \examples{ ##see ?segreg } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/down.Rd0000644000176200001440000000203514415476774014261 0ustar liggesusers\name{down} \alias{down} \docType{data} \title{ Down syndrome in babies} \description{ The \code{down} data frame has 30 rows and 3 columns. Variable \code{cases} means the number of babies with Down syndrome out of total number of births \code{births} for mothers with mean age \code{age}. } \usage{data(down)} \format{ A data frame with 30 observations on the following 3 variables. \describe{ \item{\code{age}}{the mothers' mean age.} \item{\code{births}}{count of total births.} \item{\code{cases}}{count of babies with Down syndrome.} } } %\details{ % ~~ If necessary, more details than the description above ~~ %} \source{ Davison, A.C. and Hinkley, D. V. (1997) \emph{Bootstrap Methods and their Application}. Cambridge University Press. } \references{ Geyer, C. J. (1991) Constrained maximum likelihood exemplified by isotonic convex logistic regression. \emph{Journal of the American Statistical Association} \bold{86}, 717--724. } \examples{ data(down) } \keyword{datasets} segmented/man/seg.lm.fit.Rd0000644000176200001440000001260314610171053015235 0ustar liggesusers\name{seg.lm.fit} \alias{seg.lm.fit} \alias{seg.glm.fit} \alias{seg.def.fit} \alias{seg.Ar.fit} \alias{seg.num.fit} \alias{seg.lm.fit.boot} \alias{seg.glm.fit.boot} \alias{seg.def.fit.boot} \alias{seg.Ar.fit.boot} \alias{seg.num.fit.boot} \alias{segConstr.lm.fit} \alias{segConstr.glm.fit} \alias{segConstr.lm.fit.boot} \alias{segConstr.glm.fit.boot} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fitter Functions for Segmented Linear Models } \description{ \code{seg.lm.fit} is called by \code{segmented.lm} to fit segmented linear (gaussian) models. Likewise, \code{seg.glm.fit} is called by \code{segmented.glm} to fit generalized segmented linear models, and \code{seg.def.fit} is called by \code{segmented.default} to fit segmented relationships in general regression models (e.g., quantile regression and Cox regression). \code{seg.lm.fit.boot}, \code{seg.glm.fit.boot}, and \code{seg.def.fit.boot} are employed to perform bootstrap restart. The functions \code{segConstr.*} are called by \code{segreg()} when some contraints are set on the slopes of the segmented relationships. These functions should usually not be used directly by the user. } \usage{ seg.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=FALSE) seg.lm.fit.boot(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) seg.glm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=FALSE) seg.glm.fit.boot(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) seg.def.fit(obj, Z, PSI, mfExt, opz, return.all.sol=FALSE) seg.def.fit.boot(obj, Z, PSI, mfExt, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) seg.Ar.fit(obj, XREG, Z, PSI, opz, return.all.sol=FALSE) seg.Ar.fit.boot(obj, XREG, Z, PSI, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) seg.num.fit(y, XREG, Z, PSI, w, opz, return.all.sol=FALSE) seg.num.fit.boot(y, XREG, Z, PSI, w, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) segConstr.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol = FALSE) segConstr.lm.fit.boot(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) segConstr.glm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol = FALSE) segConstr.glm.fit.boot(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE, break.boot=n.boot) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{ vector of observations of length \code{n}. } \item{XREG}{ design matrix for standard linear terms. } \item{Z}{ appropriate matrix including the segmented variables whose breakpoints have to be estimated. } \item{PSI}{ appropriate matrix including the starting values of the breakpoints to be estimated. } \item{w}{ possibe weights vector. } \item{offs}{ possibe offset vector. } \item{opz}{ a list including information useful for model fitting. } \item{n.boot}{ the number of bootstrap samples employed in the bootstrap restart algorithm. } \item{break.boot}{ Integer, less than \code{n.boot}. If \code{break.boot} consecutive bootstrap samples lead to the same objective function, the algorithm stops without performing all \code{n.boot} 'trials'. This can save computational time considerably. } \item{size.boot}{ the size of the bootstrap resamples. If \code{NULL} (default), it is taken equal to the sample size. values smaller than the sample size are expected to increase perturbation in the bootstrap resamples. } \item{jt}{ logical. If \code{TRUE} the values of the segmented variable(s) are jittered before fitting the model to the bootstrap resamples. } \item{nonParam}{ if \code{TRUE} nonparametric bootstrap (i.e. case-resampling) is used, otherwise residual-based. } \item{random}{ if \code{TRUE}, when the algorithm fails to obtain a solution, random values are used as candidate values. } \item{return.all.sol}{ if \code{TRUE}, when the algorithm fails to obtain a solution, the values visited by the algorithm with corresponding deviances are returned. } \item{obj}{ the starting regression model where the segmented relationships have to be added. } \item{mfExt}{ the model frame. } } \details{ The functions call iteratively \code{lm.wfit} (or \code{glm.fit}) with proper design matrix depending on \code{XREG}, \code{Z} and \code{PSI}. \code{seg.lm.fit.boot} (and \code{seg.glm.fit.boot}) implements the bootstrap restarting idea discussed in Wood (2001). } \value{ A list of fit information. } \references{ Wood, S. N. (2001) Minimizing model fitting objectives that contain spurious local minima by bootstrap restarting. \emph{Biometrics} \bold{57}, 240--244. } \author{ Vito Muggeo } \note{ These functions should usually not be used directly by the user. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{segmented.lm}}, \code{\link{segmented.glm}} } \examples{ ##See ?segmented } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{regression} \keyword{nonlinear } segmented/man/confint.segmented.Rd0000644000176200001440000001063714614162047016715 0ustar liggesusers\name{confint.segmented} \alias{confint.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Confidence intervals for breakpoints} \description{ Computes confidence intervals for the breakpoints in a fitted `segmented' model. } \usage{ \method{confint}{segmented}(object, parm, level=0.95, method=c("delta", "score", "gradient"), rev.sgn=FALSE, var.diff=FALSE, is=FALSE, digits=max(4, getOption("digits") - 1), .coef=NULL, .vcov=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{a fitted \code{segmented} object. } \item{parm}{the segmented variable of interest. If missing the first segmented variable in \code{object} is considered. } \item{level}{the confidence level required, default to 0.95.} \item{method}{which confidence interval should be computed. One of \code{"delta"}, \code{"score"}, or \code{"gradient"}. Can be abbreviated.} \item{rev.sgn}{vector of logicals. The length should be equal to the length of \code{parm}; recycled otherwise. when \code{TRUE} it is assumed that the current \code{parm} is `minus' the actual segmented variable, therefore the sign is reversed before printing. This is useful when a null-constraint has been set on the last slope.} \item{var.diff}{logical. If \code{method="delta"}, and there is a single segmented variable, \code{var.diff=TRUE} leads to standard errors based on sandwich-type formula of the covariance matrix. See Details in \code{\link{summary.segmented}}.} \item{is}{logical. If \code{method="delta"}, \code{is=TRUE} means that the full covariance matrix is computed via \code{vcov(.., is=TRUE)}} \item{digits}{controls the number of digits to print when returning the output. } \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(object)}.} \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov(object)}.} \item{\dots}{additional parameters referring to Score-based confidence intervals, such as \code{"h"}, \code{"d.h"}, \code{"bw"}, \code{"msgWarn"}, and \code{"n.values"} specifying the number of points used to profile the Score (or Gradient) statistic.} } \details{ \code{confint.segmented} computes confidence limits for the breakpoints. Currently there are three options, see argument \code{method}. \code{method="delta"} uses the standard error coming from the Delta method for the ratio of two random variables. This value is an approximation (slightly) better than the one reported in the `psi' component of the list returned by any \code{segmented} method. The resulting confidence intervals are based on the asymptotic Normal distribution of the breakpoint estimator which is reliable just for clear-cut kink relationships. See Details in \code{\link{segmented}}. \cr \code{method="score"} or \code{method="gradient"} compute the confidence interval via profiling the Score or the Gradient statistics smoothed out by the induced smoothing paradigm, as discussed in the reference below. } \value{ A matrix including point estimate and confidence limits of the breakpoint(s) for the segmented variable possibly specified in \code{parm}. } \references{ Muggeo, V.M.R. (2017) Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach. \emph{Australian & New Zealand Journal of Statistics} \bold{59}, 311--322. } \author{ Vito M.R. Muggeo } \note{ Currently \code{method="score"} or \code{method="gradient"} only works for segmented \emph{linear} model. For segmented \emph{generalized linear} model, currently only \code{method="delta"} is available. } % % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{segmented}} and \code{\link{lines.segmented}} to plot the estimated breakpoints with corresponding confidence intervals. } \examples{ set.seed(10) x<-1:100 z<-runif(100) y<-2+1.5*pmax(x-35,0)-1.5*pmax(x-70,0)+10*pmax(z-.5,0)+rnorm(100,0,2) out.lm<-lm(y~x) o<-segmented(out.lm,seg.Z=~x+z,psi=list(x=c(30,60),z=.4)) confint(o) #delta CI for the 1st variable confint(o, "x", method="score") #also method="g" } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/model.matrix.segmented.Rd0000644000176200001440000000222014515673056017654 0ustar liggesusers\name{model.matrix.segmented} \alias{model.matrix.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Design matrix for segmented fits } \description{ This function builds the model matrix for \code{segmented} fits. } \usage{ \method{model.matrix}{segmented}(object, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ A segmented fit } \item{\dots}{ additional arguments } } \details{ \code{model.matrix.segmented} } \value{ The design matrix for a segmented regression model with the specified formula and data } \author{ Vito Muggeo } %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ See Also as \code{\link{model.matrix}} } %\examples{ % %} % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/pscore.test.rd0000644000176200001440000001343614533572607015623 0ustar liggesusers\name{pscore.test} \alias{pscore.test} \title{ Testing for existence of one breakpoint} \description{ Given a (generalized) linear model, the (pseudo) Score statistic tests for the existence of one breakpoint. } \usage{ pscore.test(obj, seg.Z, k = 10, alternative = c("two.sided", "less", "greater"), values=NULL, dispersion=NULL, df.t=NULL, more.break=FALSE, n.break=1, only.term=FALSE, break.type=c("break","jump")) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obj}{ a fitted model typically returned by \code{glm} or \code{lm}. Even an object returned by \code{segmented} can be set. Offset and weights are allowed.} \item{seg.Z}{ a formula with no response variable, such as \code{seg.Z=~x1}, indicating the (continuous) segmented variable being tested. Only a single variable may be tested and an error is printed when \code{seg.Z} includes two or more terms. \code{seg.Z} can be omitted if i)\code{obj} is a segmented fit with a single segmented covariate (and that variable is taken), or ii)if it is a "lm" or "glm" fit with a single covariate (and that variable is taken).} \item{k}{ optional. Number of points (equi-spaced from the min to max) used to compute the pseudo Score statistic. See Details. } \item{alternative}{ a character string specifying the alternative hypothesis (relevant to the slope difference parameter). } \item{values}{ optional. The evaluation points where the Score test is computed. See Details for default values.} \item{dispersion}{ optional. the dispersion parameter for the family to be used to compute the test statistic. When \code{NULL} (the default), it is inferred from \code{obj}. Namely it is taken as \code{1} for the Binomial and Poisson families, and otherwise estimated by the residual Chi-squared statistic in the model \code{obj} (calculated from cases with non-zero weights divided by the residual degrees of freedom).} \item{df.t}{ optional. The degress-of-freedom used to compute the p-value. When \code{NULL}, the df extracted from \code{obj} are used.} \item{more.break}{ optional, logical. If \code{obj} is a 'segmented' fit, \code{more.break=FALSE} tests for the actual breakpoint for the variable 'seg.Z', while \code{more.break=TRUE} tests for an \emph{additional} breakpoint(s) for the variable 'seg.Z'. Ignored when \code{obj} is not a segmented fit.} \item{n.break}{optional. Number of breakpoints postuled under the alternative hypothesis.} \item{only.term}{logical. If \code{TRUE}, only the pseudo covariate(s) relevant to the testing for the breakpoint is returned, and no test is computed.} \item{break.type}{The kind of breakpoint being tested. \code{"break"} is for piecewise-linear relationships, \code{"jump"} means piecewise-constant, i.e. a step-function, relationships.} } \details{ \code{pscore.test} tests for a non-zero difference-in-slope parameter of a segmented relationship. Namely, the null hypothesis is \eqn{H_0:\beta=0}{H_0:beta=0}, where \eqn{\beta}{beta} is the difference-in-slopes, i.e. the coefficient of the segmented function \eqn{\beta(x-\psi)_+}{beta*(x-psi)_+}. The hypothesis of interest \eqn{\beta=0}{beta=0} means no breakpoint. Simulation studies have shown that such Score test is more powerful than the Davies test (see reference) when the alternative hypothesis is `one changepoint'. If there are two or more breakpoints (for instance, a sinusoidal-like relationships), \code{pscore.test} can have lower power, and \code{\link{davies.test}} can perform better. The \code{dispersion} value, if unspecified, is taken from \code{obj}. If \code{obj} represents the fit under the null hypothesis (no changepoint), the dispersion parameter estimate will be usually larger, leading to a (potentially severe) loss of power. The \code{k} evaluation points are \code{k} equally spaced values in the range of the segmented covariate. \code{k} should not be small. Specific values can be set via \code{values}, although I have found no important difference due to number and location of the evaluation points, thus default is \code{k=10} equally-spaced points. However, when the possible breakpoint is believed to lie into a specified narrower range, the user can specify \code{k} values in that range leading to higher power in detecting it, i.e. typically lower p-value. If \code{obj} is a (segmented) \emph{lm} object, the returned p-value comes from the t-distribution with appropriate degrees of freedom. Otherwise, namely if \code{obj} is a (segmented) \emph{glm} object, the p-value is computed wrt the Normal distribution. } \value{ A list with class '\code{htest}' containing the following components: \item{method}{title (character)} \item{data.name}{the regression model and the segmented variable being tested} \item{statistic }{the empirical value of the statistic} \item{parameter }{number of evaluation points} \item{p.value }{the p-value} \item{process}{the alternative hypothesis set} } \references{ Muggeo, V.M.R. (2016) Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling. \emph{J of Statistical Computation and Simulation}, \bold{86}, 3059--3067. } \author{ Vito M.R. Muggeo } \seealso{See also \code{\link{davies.test}}. } \examples{ \dontrun{ set.seed(20) z<-runif(100) x<-rnorm(100,2) y<-2+10*pmax(z-.5,0)+rnorm(100,0,3) o<-lm(y~z+x) #testing for one changepoint #use the simple null fit pscore.test(o,~z) #compare with davies.test(o,~z).. #use the segmented fit os<-segmented(o, ~z) pscore.test(os,~z) #smaller p-value, as it uses the dispersion under the alternative (from 'os') #test for the 2nd breakpoint in the variable z pscore.test(os,~z, more.break=TRUE) } } \keyword{ htest } segmented/man/model.matrix.stepmented.Rd0000644000176200001440000000325314610170476020052 0ustar liggesusers\name{model.matrix.stepmented} \alias{model.matrix.stepmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Design matrix for stepmented fits } \description{ This function builds the model matrix for \code{stepmented} fits. } \usage{ \method{model.matrix}{stepmented}(object, type=c("cdf","abs","none"), k=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ A stepmented fit } \item{k}{ The (negative) exponent of the sample size to approximate the absolute value; see \code{\link{vcov.stepmented}} for details. } \item{type}{ The approximation for the indicator function/absolute value. If \code{"none"}, the simple matrix with the original indicator values is returned. \code{type='abs'} is not yet allowed. } \item{\dots}{ additional arguments } } \details{ If \code{type="none"}, \code{model.matrix.stepmented} return the design matrix including the indicator function values and ignoring the psi terms. } \value{ The design matrix for a stepmented regression model with the specified formula and data } \author{ Vito Muggeo } %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ See Also as \code{\link{model.matrix}}, \code{\link{vcov.stepmented}} } %\examples{ % %} % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/slope.Rd0000644000176200001440000001206614672261456014433 0ustar liggesusers\name{slope} \alias{slope} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Slope estimates from segmented/stepmented relationships } \description{ Computes the slopes of each `segmented' (or even `stepmented') relationship in the fitted model. } \usage{ slope(ogg, parm, conf.level = 0.95, rev.sgn=FALSE, APC=FALSE, .vcov=NULL, .coef=NULL, use.t=NULL, by=NULL, interc=TRUE, level=0, ..., digits = max(4, getOption("digits") - 2)) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ogg}{ an object of class "segmented" or "segmented.lme", returned by any \code{segmented} method or a list of two segmented fits to compare the estimates of corresponding slopes.} \item{parm}{ the segmented variable whose slopes have to be computed. If missing all the segmented variables are considered. } \item{conf.level}{ the confidence level required. } \item{rev.sgn}{vector of logicals. The length should be equal to the length of \code{parm}, but it is recycled otherwise. When \code{TRUE} it is assumed that the current \code{parm} is `minus' the actual segmented variable, therefore the sign is reversed before printing. This is useful when a null-constraint has been set on the last slope.} \item{APC}{logical. If \code{APC=TRUE} the `annual percent changes', i.e. \eqn{100\times(\exp(\beta)-1)}{100*(exp(b)-1)}, are computed for each interval (\eqn{\beta}{b} is the slope). Only point estimates and confidence intervals are returned.} \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov(ogg)}.} \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(ogg)}.} \item{use.t}{Which quantiles should be used to compute the confidence intervals? If \code{NULL} (default) the \eqn{t}{t} distribution is used only for objects obtained by \code{segmented.lm}. } \item{by}{Only for \code{segmented.lme} objects. It is a named list indicating covariate names and corresponding values affecting the fitted segmented relationship. For instance, \code{by=list(group="2",z2=.2)}, provided that the model has been fitted by specifying \code{group} and \code{z2} in \code{x.diff} (or as interaction with the segmented variable). Note that if the provided variables or values are irrelevant for changing the slopes, a warning message is printed. } \item{interc}{logical, only for \code{'stepmented'} fits. If \code{TRUE}, the mean levels also account for the intercept; otherwise the first level is assumed to be zero. } \item{level}{Numeric, only for \code{'segmented.lme'} fits. If \code{0}, fixed effects left/right slopes are returned, otherwise the subject-specific values (with no confidence intervals). } \item{...}{ Further arguments to be passed on to \code{vcov.segmented}, such as \code{var.diff} and \code{is}. See Details in \code{\link{vcov.segmented}} and \code{\link{summary.segmented}}. } \item{digits}{controls number of digits in the returned output.} } \details{ To fit broken-line relationships, \code{segmented} uses a parameterization whose coefficients are not the slopes. Therefore given an object \code{"segmented"}, \code{slope} computes point estimates, standard errors, t-values and confidence intervals of the slopes of each segmented relationship in the fitted model. } \value{ \code{slope} returns a list of matrices. Each matrix represents a segmented relationship and its number of rows equal to the number of segments, while five columns summarize the results. } \references{ Muggeo, V.M.R. (2003) Estimating regression models with unknown break-points. \emph{Statistics in Medicine} \bold{22}, 3055--3071. } \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it} } \note{The returned summary is based on limiting Gaussian distribution for the model parameters involved in the computations. Sometimes, even with large sample sizes such approximations are questionable (e.g., with small difference-in-slope parameters) and the results returned by \code{slope} might be unreliable. Therefore is responsability of the user to gauge the applicability of such asymptotic approximations. Anyway, the t values may be not assumed for testing purposes and they should be used just as guidelines to assess the estimate uncertainty. } \seealso{See also \code{\link{davies.test}} and \code{\link{pscore.test}} to test for a nonzero difference-in-slope parameter. } \examples{ set.seed(16) x<-1:100 y<-2+1.5*pmax(x-35,0)-1.5*pmax(x-70,0)+rnorm(100,0,3) out<-glm(y~1) out.seg<-segmented(out,seg.Z=~x,psi=list(x=c(20,80))) ## the slopes of the three segments.... slope(out.seg) rm(x,y,out,out.seg) # ## an heteroscedastic example.. set.seed(123) n<-100 x<-1:n/n y<- -x+1.5*pmax(x-.5,0)+rnorm(n,0,1)*ifelse(x<=.5,.4,.1) o<-lm(y~x) oseg<-segmented(o,seg.Z=~x,psi=.6) slope(oseg) slope(oseg,var.diff=TRUE) #better CI } \keyword{ regression } \keyword{ htest } segmented/man/pwr.seg.Rd0000644000176200001440000001122114540253210014645 0ustar liggesusers\name{pwr.seg} \alias{pwr.seg} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Power Analysis in segmented regression } \description{ Given the appropriate input values, the function computes the power (sample size) corresponding to the specifed sample size (power). If a segmented fit object is provided, the power is computed taking the parameter estimates as input values. } \usage{ pwr.seg(oseg, pow, n, z = "1:n/n", psi, d, s, n.range = c(10,300), X = NULL, break.type=c("break","jump"), alpha = 0.01, round.n = TRUE, alternative = c("two.sided", "greater", "less"), msg = TRUE, ci.pow=0) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{oseg}{ The fitted segmented object. If provided, the power is computed at the model parameter estimates, and all the remaining arguments but \code{alternative} and \code{alpha} are ignored. } \item{pow}{ The desired power level. If provided \code{n} has to be missing } \item{n}{ The fixed sample size. If provided \code{pow} has to be missing } \item{z}{ The covariate understood to have a segmented effect. Default is \code{"1:n/n"}, i.e. equispaced values in (0,1). More generally a string indicating the quantile function having \code{p} and possible other numerical values as arguments. For istance \code{"qunif(p,0,1)"}, \code{"qnorm(p,2,5)"}, or \code{"qexp(p)"}. \code{"qunif(p,1,n)"} can be also specified, but attention should be paid to guarantee \code{psi} within the covariate range. Finally, it could be also a numerical vector meaning the actual covariate, but \code{pow} has to be missing. Namely if the covariate is supplied (and n is known), only the relevant power can be estimated. } \item{psi}{ The breakpoint value within the covariate range } \item{d}{ The slope difference } \item{s}{ The response standard deviation } \item{n.range}{ When \code{pow} is provided and the relevant sample size estimate has to be returned, the function evaluates 50 sample sizes equally spaced in \code{n.range}. However the function can also compute, via spline interpolation, sample sizes outside the specified range. } \item{X}{ The design matrix including additional linear variables in the regression equation. Default to \code{NULL} which means intercept and linear term for the segmented covariate. } \item{break.type}{ Type of breakpoint. \code{break.type='break'} means piecewise linear (segmented), \code{break.type='jump'} refers to piecewise constant. } \item{alpha}{ The type-I error probability. Default to 0.01. } \item{round.n}{ logical. If \code{TRUE} the (possible) returned sample size value is rounded. } \item{alternative}{ a character string specifying the alternative hypothesis, must be one of "two.sided", "greater" or "less". Note, this refers to the sign of the slope difference. } \item{msg}{ logical. If \code{TRUE} the output is returned along with a simple message, otherwise only the values are returned } \item{ci.pow}{ Numerical. If \code{oseg} has been supplied, \code{ci.pow} replicates are drawn to build a 95\% confidence interval for the power. } } \details{ The function exploits the sampling distribution of the pseudo Score statistic under the alternative hypothesis of one breakpoint. } \value{ The computed power \emph{or} sample size, with or without message (depending on \code{msg}) } \references{ D'Angelo N, Muggeo V.M.R. (2021) Power analysis in segmented regression, working paper \cr https://www.researchgate.net/publication/355885747. Muggeo, V.M.R. (2016) Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling. \emph{J of Statistical Computation and Simulation}, \bold{86}, 3059--3067. } \author{ Nicoletta D'Angelo and Vito Muggeo } \note{ Currently the function assumes just 1 breakpoint in one covariate } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{pscore.test}} } \examples{ ## pwr.seg(pow=.7, psi=.5, d=1.5, s=.5) #returns the sample size ## pwr.seg(n=219, psi=.5, d=1.5, s=.5) #returns the power ## pwr.seg(n=20,z="qnorm(p, 2,5)", psi=3, d=.5, s=2) #the covariate is N(2,5) ## pwr.seg(n=20,z="qexp(p)", psi=.1, d=.5, s=.1) #the covariate is Exp(1) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/plant.Rd0000644000176200001440000000260014554204246014411 0ustar liggesusers\name{plant} \alias{plant} \docType{data} \title{ Plan organ dataset} \description{ The \code{plant} data frame has 103 rows and 3 columns. } \usage{data(plant)} \format{ A data frame with 103 observations on the following 3 variables: \describe{ \item{\code{y}}{measurements of the plant organ.} \item{\code{time}}{times where measurements took place.} \item{\code{group}}{three attributes of the plant organ, \code{RKV}, \code{RKW}, \code{RWC}.} } } \details{ Three attributes of a plant organ measured over time where biological reasoning indicates likelihood of multiple breakpoints. The data are scaled to the maximum value for each attribute and all attributes are measured at each time. } \source{ The data have been kindly provided by Dr Zongjian Yang at School of Land, Crop and Food Sciences, The University of Queensland, Brisbane, Australia. } %\references{ % ~~ possibly secondary sources and usages ~~ %} \examples{ \dontrun{ data(plant) lattice::xyplot(y~time,groups=group,auto.key=list(space="right"), data=plant) o<-segreg(y~ 0+group+seg(time, by=group, npsi=2), data=plant) summary(o) par(mfrow=c(1,2)) plot(y~time, data=plant) plot(o, term=1:3, add=TRUE, leg=NA, psi.lines=TRUE) #add the lines to the current plot plot(o, term=1:3, col=3:5, res.col=3:5, res=TRUE, leg="bottomright") } } \keyword{datasets} segmented/man/selgmented.Rd0000644000176200001440000002372514650720424015433 0ustar liggesusers\name{selgmented} \alias{selgmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Selecting the number of breakpoints in segmented regression } \description{ This function selects (and estimates) the number of breakpoints of the segmented relationship according to the BIC/AIC criterion or sequential hypothesis testing. } \usage{ selgmented(olm, seg.Z, Kmax=2, type = c("score", "bic", "davies", "aic"), alpha = 0.05, control = seg.control(), refit = FALSE, stop.if = 5, return.fit = TRUE, bonferroni = FALSE, msg = TRUE, plot.ic = FALSE, th = NULL, G = 1, check.dslope = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{olm}{ A starting \code{lm} or \code{glm} object, or a simple numerical vector meaning the response variable. } \item{seg.Z}{ A one-side formula for the segmented variable. Only one term can be included, and it can be omitted if \code{olm} is a (g)lm fit including just one numeric covariate. Also it might be omitted, and will be taken as 1,2..., if \code{olm} includes a single numeric variable. } \item{Kmax}{ The maximum number of breakpoints being tested. If \code{type='bic'} or \code{type='aic'}, any integer value can be specified; otherwise at most \code{Kmax=2} breakpoints can be tested via the Score or Davies statistics. } \item{type}{ Which criterion should be used? Options \code{"score"} and \code{"davies"} allow to carry out sequential hypothesis testing with no more than 2 breakpoints (\code{Kmax=2}). Alternatively, the number of breakpoints can be selected via the BIC (or AIC) with virtually no upper bound for \code{Kmax}. } \item{alpha}{ The fixed type I error probability when sequential hypothesis testing is carried out (i.e. \code{type='score'} or \code{'davies'}). It is also used when \code{type='bic'} (or \code{type='aic'}) and \code{check.dslope=TRUE} to remove the breakpoints based on the slope diffence t-value. } \item{control}{ See \code{\link{seg.control}}. } \item{refit}{ If \code{TRUE}, the final selected model is re-fitted using arguments in \code{control}, typically with bootstrap restarting. Set \code{refit=FALSE} to speed up computation (and possibly accepting near-optimal estimates). It is always \code{TRUE} if \code{type='score'} or \code{type='davies'}. } \item{stop.if}{ An integer. If, when trying models with an increasing (when \code{G=1}) or decreasing (when \code{G>1}) number of breakpoints, \code{stop.if} consecutive fits exhibit higher AIC/BIC values, the search is interrupted. Set a large number, larger then \code{Kmax} say, if you want to assess the fits for all breakpoints \code{0, 1, 2, ..., Kmax}. Ignored if \code{type='score'} or \code{type='davies'}. } \item{return.fit}{ If \code{TRUE}, the fitted model (with the number of breakpoints selected according to \code{type}) is returned. } \item{bonferroni}{ If \code{TRUE}, the Bonferroni correction is employed, i.e. \code{alpha/Kmax} (rather than \code{alpha}) is always taken as threshold value to reject or not. If \code{FALSE}, \code{alpha} is used in the second level of hypothesis testing. It is also effective when \code{type="bic"} (or \code{'aic'}) and \code{check.dslope=TRUE}, see Details. } \item{msg}{ If \code{FALSE} the final fit is returned silently with the selected number of breakpoints, otherwise the messages about the selection procedure (i.e. the BIC values), and possible warnings are also printed. } \item{plot.ic}{ If \code{TRUE} the information criterion values with respect to the number of breakpoints are plotted. Ignored if \code{type='score'} or \code{type='davies'} or \code{G>1}. Note that if \code{check.dslope=TRUE}, the final number of breakpoints could differ from the one selected by the BIC/AIC leading to an inconsistent plot of the information criterion, see Note below. } \item{th}{ When a large number of breakpoints is being tested, it could happen that 2 estimated breakpoints are too close each other, and only one can be retained. Thus if the difference between two breakpoints is less or equal to \code{th}, one (the first) breakpoint is removed. Of course, \code{th} depends on the \code{x} scale: Integers, like 5 or 10, are appropriate if the covariate is the observation index. Default (\code{NULL}) means \code{th=diff(range(x))/100}. Set \code{th=0} if you are willing to consider even breakpoints very clode each other. Ignored if \code{type='score'} or \code{type='davies'}. } \item{G}{ Number of sub-intervals to consider to search for the breakpoints when \code{type='bic'} or \code{'aic'}. See Details. } \item{check.dslope}{ Logical. Effective only if \code{type='bic'} or \code{'aic'}. After the optimal number of breakpoints has been selected (via AIC/BIC), should the \eqn{t}{t} values of the slope differences be checked? If \code{TRUE}, the breakpoints corresponding to slope differences with a 'low' \eqn{t}{t} values will be removed. Note the model is re-fitted at each removal and a new check is performed. Simulation evidence suggests that such strategy leads to better results. See Details. } % \item{a}{ %An additional tuning parameter for the BIC. \eqn{a=1}{a=1} provides the classical BIC. %} } \details{ The function uses properly the functions \code{segmented}, \code{pscore.test} or \code{davies.test} to select the 'optimal' number of breakpoints \code{0,1,...,Kmax}. If \code{type='bic'} or \code{'aic'}, the procedure stops if the last \code{stop.if} fits have increasing values of the information criterion. Moreover, a breakpoint is removed if too close to other, actually if the difference between two consecutive estimates is less then \code{th}. Finally, if \code{check.dslope=TRUE}, breakpoints whose corresponding slope difference estimate is `small' (i.e. \eqn{p}-value larger then \code{alpha} or \code{alpha/Kmax}) are also removed. When \eqn{G>1}{G>1} the dataset is split into \eqn{G}{G} groups, and the search is carried out separately within each group. This approach is fruitful when there are many breakpoints not evenly spaced in the covariate range and/or concentrated in some sub-intervals. \code{G=3} or \code{4} is recommended based on simulation evidence. Note \code{Kmax} is always tacitely reduced in order to have at least 1 residual df in the model with \code{Kmax} changepoints. Namely, if \eqn{n=20}, the maximal segmented model has \code{2*(Kmax + 1)} parameters, and therefore the largest \code{Kmax} allowed is 8. When \code{type='score'} or \code{'davies'}, the function also returns the 'overall p-value' coming from combing the single p-values using the Fisher method. The pooled p-value, however, does not affect the final result which depends on the single p-values only. } \note{ If \code{check.dslope=TRUE}, there is no guarantee that the final model has the lowest AIC/BIC. Namely the model with the best A/BIC could have `non-significant' slope differences which will be removed (with the corresponding breakpoints) by the final model. Hence the possible plot (obtained via \code{plot.ic=TRUE}) could be misleading. See Example 1 below. } \value{ The returned object depends on argument \code{return.fit}. If \code{FALSE}, the returned object is a list with some information on the compared models (i.e. the BIC values), otherwise a classical \code{'segmented'} object (see \code{\link{segmented}} for details) with the component \code{selection.psi} including the A/BIC values and\cr - if \code{refit=TRUE}, \code{psi.no.refit} that represents the breakpoint values before the last fit (with boot restarting)\cr - if \code{G>1}, \code{cutvalues} including the cutoffs values used to split the data. %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... } \references{ Muggeo V (2020) Selecting number of breakpoints in segmented regression: implementation in the R package segmented https://www.researchgate.net/publication/343737604 } \author{ Vito M. R. Muggeo } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{segmented}}, \code{\link{pscore.test}}, \code{\link{davies.test}} } \examples{ set.seed(12) xx<-1:100 zz<-runif(100) yy<-2+1.5*pmax(xx-35,0)-1.5*pmax(xx-70,0)+15*pmax(zz-.5,0)+rnorm(100,0,2) dati<-data.frame(x=xx,y=yy,z=zz) out.lm<-lm(y~x,data=dati) os <-selgmented(out.lm) #selection (Kmax=2) via the Score test (default) os <-selgmented(out.lm, type="bic", Kmax=3) #BIC-based selection \dontrun{ ######################################## #Example 1: selecting a large number of breakpoints b <- c(-1,rep(c(1.5,-1.5),l=15)) psi <- seq(.1,.9,l=15) n <- 2000 x <- 1:n/n X <- cbind(x, outer(x,psi,function(x,y)pmax(x-y,0))) mu <- drop(tcrossprod(X,t(b))) set.seed(113) y<- mu + rnorm(n)*.022 par(mfrow=c(1,2)) #select number of breakpoints via the BIC (and plot it) o<-selgmented(y, Kmax=20, type='bic', plot.ic=TRUE, check.dslope = FALSE) plot(o, res=TRUE, col=2, lwd=3) points(o) # select via the BIC + check on the slope differences (default) o1 <-selgmented(y, Kmax=20, type='bic', plot.ic=TRUE) #check.dslope = TRUE by default #note the plot of BIC is misleading.. But the number of psi is correct plot(o1, add=TRUE, col=3) points(o1, col=3, pch=3) ################################################## #Example 2: a large number of breakpoints not evenly spaced. b <- c(-1,rep(c(2,-2),l=10)) psi <- seq(.5,.9,l=10) n <- 2000 x <- 1:n/n X <- cbind(x, outer(x,psi,function(x,y)pmax(x-y,0))) mu <- drop(tcrossprod(X,t(b))) y<- mu + rnorm(n)*.02 #run selgmented with G>1. G=3 or 4 recommended. #note G=1 does not return the right number of breaks o1 <-selgmented(y, type="bic", Kmax=20, G=4) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/print.segmented.Rd0000644000176200001440000000174014415476772016420 0ustar liggesusers\name{print.segmented} \alias{print.segmented} \alias{coef.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Print method for the segmented class } \description{ Printing the most important features and coefficients (including the breakpoints) of a segmented model. } \usage{ \method{print}{segmented}(x, digits = max(3, getOption("digits") - 3), ...) \method{coef}{segmented}(object, include.psi=FALSE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ object of class \code{segmented} } \item{digits}{ number of digits to be printed } \item{object}{object of class \code{segmented} } \item{include.psi}{logical. If \code{TRUE}, the breakpoints are returned along with the regression coefficients} \item{\dots}{ arguments passed to other functions } } \author{ Vito M.R. Muggeo } \seealso{ \code{\link{summary.segmented}}, \code{\link{print.summary.segmented}} } \keyword{ models } segmented/man/segmented.lme.Rd0000644000176200001440000002571214624050707016032 0ustar liggesusers\name{segmented.lme} \alias{segmented.lme} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Segmented relationships in linear mixed models } \description{ Fits linear mixed models with a segmented relationship between the response and a numeric covariate. Random effects are allowed in each model parameter, including the breakpoint. } \usage{ \method{segmented}{lme}(obj, seg.Z, psi, npsi = 1, fixed.psi = NULL, control = seg.control(), model = TRUE, z.psi = ~1, x.diff = ~1, random = NULL, random.noG = NULL, start.pd = NULL, psi.link = c("identity", "logit"), start = NULL, data, fixed.parms = NULL,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obj}{ A 'lme' fit returned by \code{lme} or simply its call. See example below. This represents the linear mixed model where the segmented relationship is added. } \item{seg.Z}{ A one-sided formula indicating the segmented variable, i.e. the quantitative variable having a segmented relationship with the response. In longitudinal studies typically it is the time.} \item{psi}{ An optional starting value for the breakpoint. If missing a starting value is obtained via the nadir estimate of a quadratic fit. When provided it may be a single numeric value or a vector of length equal to the number of clusters (i.e. subjects). } \item{z.psi}{ Optional. A one-sided formula meaning the covariates in the sub-regression model for the changepoint parameter. Default to \code{~1}. } \item{x.diff}{ Optional. A one-sided formula meaning the covariates in the sub-regression model for the difference-in-slopes parameter. Default to \code{~1} for no covariate for the difference-in-slope parameter. } \item{npsi}{ Ignored. Currently only \code{npsi=1} is allowed. } \item{fixed.psi}{ Ignored. } \item{control}{ A list returned by \code{\link{seg.control}}, in particular \code{display}, \code{n.boot} for the bootstrap restarting. } \item{model}{ Ignored. } \item{random}{ A list, as the one supplied in \code{random} of \code{lme()} including the random effects. Default to \code{NULL}, meaning that the same random effect structure of the initial lme fit supplied in \code{obj} should be used. When specified, this list could include the variables '\code{G0}' and '\code{U}'. \code{G0} means random effects in the breakpoints and \code{U} means random effects in the slope-difference parameter. Assuming \code{id} is the the cluster variable and \code{x} the segmented variable, some examples are \code{random = list(id = pdDiag(~1 + x + U))} #ind. random eff. (changepoint fixed) \code{random = list(id = pdDiag(~1 + x + U + G0))} #ind. random eff. (in the changepoint too) \code{random = list(id=pdBlocked(list(pdSymm(~1+x), pdSymm(~U+G0-1))))} #block diagonal } \item{random.noG}{ Ignored. } \item{start.pd}{ An optional starting value for the variances of the random effects. It should be coherent with the specification in \code{random}. } \item{psi.link}{ The link function used to specify the sub-regression model for the breakpoint \eqn{psi}. The identity (default) assumes \deqn{\psi_i=\eta_i} while the logit link is \deqn{\psi_i=(m+M*exp(\eta_i))/(1+exp(\eta_i))} where \eqn{m} and \eqn{M} are the observed minimum and maximum of the segmented variable in \code{seg.Z}. In each case the `linear predictor' is \eqn{\eta_i=\kappa_0+z_i^T\kappa_1+k_i}, where \eqn{z^T} includes the covariates specified in \code{z.psi} and the \eqn{k_i}s are the changepoint random effects included by means of \code{G0} in the \code{random} argument. } % \item{nq}{ %Integer value to assess if the estimated breakpoint for the subject i can be considered 'reliable' or not. For each subject, \eqn{\hat\psi} is not considered reliable if %\deqn{\hat{\psi}\le z_{(1+nq)} \; \mathrm{ and } \; \hat{\psi}\ge z_{(n_i-nq)} } %where \eqn{z_{(\cdot)}} means the sorted values of the segmented variable. Note \code{nq} just affects the logical vector \code{attr(psi.i,"is.break")} where \code{psi.i} is the component of the model fit including the subject-specific breakpoints. Default is \code{nq=0}. %} % \item{adjust}{ %A numerical (0 or 1) value. If \code{adjust=1} at convergence and for each subject \code{i} the estimated breakpoint assessed as %unreliable (see argument \code{nq}) is moved to the maximum of the covariate range (for the subject i). Thus for the subjects with %'unreliable' breakpoint, the corresponding fitted profile will be truly linear. %} \item{start}{ An optional list including the \emph{starting values} for the difference-in-slopes parameter, delta0 and delta, and the changepoint parameter, kappa and kappa0. When provided, 'kappa0' overwrites 'psi'. If provided, the components 'delta' and 'kappa' should be \emph{named} vectors with length and names matching length and names in \code{x.diff} and \code{z.psi} respectively. The component \code{delta0} can be a scalar or a vector with length equal to the number of clusters (subjects). } \item{data}{ the dataframe where the variables are stored. If missing, the dataframe of the \code{"lme"} fit \code{obj} is assumed. } \item{fixed.parms}{ An optional \emph{named} vector representing the coefficients \emph{of the changepoint} to be maintained \emph{fixed} during the estimation process. Allowed names are "G0" or any variable (in the dataframe) supposed to affect the location of breakpoints. For instance \code{fixed.parms=c(G0=.3)} implies a fixed value for the changepoint. Notice if you use the same variable in \code{fixed.parms} and in \code{z.psi}, for instance \code{fixed.parms=c(x2=.3)} and \code{z.psi=~x2}, a warning is printed and the coefficient "G.x2" is estimated to maximize the log likelihood \emph{given} that fixed value. As an example, suppose the unconstrained estimated coefficient for x2, say, in \code{z.psi} is 0.5; if in a new call both \code{fixed.parms=c(x2=.4)} and \code{z.psi=~x2} are included, the estimate of "G.x2" will be (approximately) 0.1. Essentially, if you really want to fix the parameters in \code{fixed.parms}, then do not include the same covariates in \code{z.psi}. } \item{...}{ Ignored } % \item{tol}{ %the tolerance value to declare convergence %} % \item{it.max}{ %the maximum number of iterations allowed %} % \item{display}{ %logical. If \code{TRUE} the objective function value is printed at each iteration %} } \details{ The function fits segmented mixed regression models, i.e. segmented models with random effects also in the slope-difference and change-point parameters. } \value{ A list of class \code{segmented.lme} with several components. The most relevant are \item{lme.fit }{The fitted lme object at convergence} \item{lme.fit.noG }{The fitted lme object at convergence assuming known the breakpoints} \item{psi.i}{The subject/cluster-specific change points (fixed + random). It includes 2 attributes: \code{attr(,"ni")} for the number of measurements in each 'cluster', and \code{attr(,"is.break")} a vector of logicals indicating if the breakpoint for each subject i can be reliable (\code{TRUE}) or not (\code{FALSE}). Here 'reliable' simply means within the covariate range (for subject i). See also argument \code{nq}.} \item{fixed.eta.psi}{The fixed-effect linear predictor for the change points regression equation. These values will different among 'clusters' only if at least one covariate has been specified in \code{z.psi}.} \item{fixed.eta.delta}{The fixed-effect linear predictor of the slope difference regression equation. These values will different among 'clusters' only if at least one covariate has been specified in \code{x.diff}.} } \references{ Muggeo V., Atkins D.C., Gallop R.J., Dimidjian S. (2014) Segmented mixed models with random changepoints: a maximum likelihood approach with application to treatment for depression study. Statistical Modelling, 14, 293-313. Muggeo V. (2016) Segmented mixed models with random changepoints in R. Working paper available on RG. doi: 10.13140/RG.2.1.4180.8402 } \author{ Vito M.R. Muggeo \email{vito.muggeo@unipa.it} } \note{ Currently only one breakpoint (with or without random effects) can be estimated. If \code{fit} is the segmented.lme fit, use \code{VarCorr(fit$lme.fit)} to extract the random effect covariance matrix. } \section{Warning }{ The function deals with estimation with a \emph{single} breakpoint only. %All the functions for segmented mixed models (*.segmented.lme) work pretty well, but you should are still at an experimental stage } \seealso{ \code{\link{plot.segmented.lme}} for the plotting method and \code{\link{segmented.default}} (example 2) for segmented models with no random effects in breakpoints or slope difference. } \examples{ \dontrun{ library(nlme) data(Cefamandole) Cefamandole$lTime <-log(Cefamandole$Time) Cefamandole$lconc <-log(Cefamandole$conc) o<-lme(lconc ~ lTime, random=~1|Subject, data=Cefamandole) os<-segmented.lme(o, ~lTime, random=list(Subject=pdDiag(~1+lTime+U+G0)), control=seg.control(n.boot=0, display=TRUE)) slope(os) #################################################### # covariate effect on the changepoint and slope diff #let's assume a new subject-specific covariates.. set.seed(69) Cefamandole$z <- rep(runif(6), rep(14,6)) Cefamandole$group <- gl(2,42,labels=c('a','b')) #Here 'group' affects the slopes and 'z' affects the changepoint o1 <-lme(lconc ~ lTime*group, random=~1|Subject, data=Cefamandole) os1 <- segmented(o1, ~lTime, x.diff=~group, z.psi=~z, random=list(Subject=pdDiag(~1+lTime+U+G0))) slope(os1, by=list(group="a")) #the slope estimates in group="a" (baseline level) slope(os1, by=list(group="b")) #the slope estimates in group="b" ################################################### # A somewhat "complicated" example: # i) strong heterogeneity in the changepoints # ii) No changepoint for the Subject #7 (added) d<-Cefamandole d$x<- d$lTime d$x[d$Subject==1]<- d$lTime[d$Subject==1]+3 d$x[d$Subject==5]<- d$lTime[d$Subject==5]+5 d$x[d$Subject==3]<- d$lTime[d$Subject==3]-5 d<-rbind(d, d[71:76,]) d$Subject <- factor(d$Subject, levels=c(levels(d$Subject),"7")) d$Subject[85:90] <- rep("7",6) o<-lme(lconc ~ x, random=~1|Subject, data=d) os2<-segmented.lme(o, ~x, random=list(Subject=pdDiag(~1+x+U+G0)), control=seg.control(n.boot=5, display=TRUE)) #plots with common x- and y- scales (to note heterogeneity in the changepoints) plot(os2, n.plot = c(3,3)) os2$psi.i attr(os2$psi.i, "is.break") #it is FALSE for Subject #7 #plots with subject-specific scales plot(os2, n.plot = c(3,3), xscale=-1, yscale = -1) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/davies.test.Rd0000644000176200001440000001450114415476770015540 0ustar liggesusers\name{davies.test} \alias{davies.test} \title{ Testing for a change in the slope } \description{ Given a generalized linear model, the Davies' test can be employed to test for a non-constant regression parameter in the linear predictor. } \usage{ davies.test(obj, seg.Z, k = 10, alternative = c("two.sided", "less", "greater"), type=c("lrt","wald"), values=NULL, dispersion=NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obj}{ a fitted model typically returned by \code{glm} or \code{lm}. Even an object returned by \code{segmented} can be set (e.g. if interest lies in testing for an additional breakpoint).} \item{seg.Z}{ a formula with no response variable, such as \code{seg.Z=~x1}, indicating the (continuous) segmented variable being tested. Only a single variable may be tested and an error is printed when \code{seg.Z} includes two or more terms. \code{seg.Z} can be omitted if i)\code{obj} is a segmented fit with a single segmented covariate (and that variable is taken), or ii)if it is a "lm" or "glm" fit with a single covariate (and that variable is taken)} \item{k}{ number of points where the test should be evaluated. See Details. } \item{alternative}{ a character string specifying the alternative hypothesis (relevant to the slope difference parameter). } \item{type}{ the test statistic to be used (only for GLM, default to lrt). Ignored if \code{obj} is a simple linear model.} \item{values}{ optional. The evaluation points where the Davies approximation is computed. See Details for default values.} \item{dispersion}{ the dispersion parameter for the family to be used to compute the test statistic. When \code{NULL} (the default), it is inferred from \code{obj}. Namely it is taken as \code{1} for the Binomial and Poisson families, and otherwise estimated by the residual Chi-squared statistic (calculated from cases with non-zero weights) divided by the residual degrees of freedom. } } \details{ \code{davies.test} tests for a non-zero difference-in-slope parameter of a segmented relationship. Namely, the null hypothesis is \eqn{H_0:\beta=0}{H_0:beta=0}, where \eqn{\beta}{beta} is the difference-in-slopes, i.e. the coefficient of the segmented function \eqn{\beta(x-\psi)_+}{beta*(x-psi)_+}. The hypothesis of interest \eqn{\beta=0}{beta=0} means no breakpoint. Roughtly speaking, the procedure computes \code{k} `naive' (i.e. assuming fixed and known the breakpoint) test statistics for the difference-in-slope, seeks the `best' value and corresponding naive p-value (according to the alternative hypothesis), and then corrects the selected (minimum) p-value by means of the \code{k} values of the test statistic. If \code{obj} is a LM, the Davies (2002) test is implemented. This approach works even for small samples. If \code{obj} represents a GLM fit, relevant methods are described in Davies (1987), and the Wald or the Likelihood ratio test statistics can be used, see argument \code{type}. This is an asymptotic test. The \code{k} evaluation points are \code{k} equally spaced values between the second and the second-last values of the variable reported in \code{seg.Z}. \code{k} should not be small; I find no important difference for \code{k} larger than 10, so default is \code{k=10}. } \value{ A list with class '\code{htest}' containing the following components: \item{method}{title (character)} \item{data.name}{the regression model and the segmented variable being tested} \item{statistic }{the point within the range of the covariate in \code{seg.Z} at which the maximum (or the minimum if \code{alternative="less"}) occurs} \item{parameter }{number of evaluation points} \item{p.value }{the adjusted p-value} \item{process}{a two-column matrix including the evaluation points and corresponding values of the test statistic} } \references{ Davies, R.B. (1987) Hypothesis testing when a nuisance parameter is present only under the alternative. \emph{Biometrika} \bold{74}, 33--43. Davies, R.B. (2002) Hypothesis testing when a nuisance parameter is present only under the alternative: linear model case. \emph{Biometrika} \bold{89}, 484--489. } \author{ Vito M.R. Muggeo } \note{ Strictly speaking, the Davies test is not confined to the segmented regression; the procedure can be applied when a nuisance parameter vanishes under the null hypothesis. The test is slightly conservative, as the computed p-value is actually an upper bound. Results should change slightly with respect to previous versions where the evaluation points were computed as \code{k} equally spaced values between the second and the second last observed values of the segmented variable. } \section{Warning }{ The Davies test is \emph{not} aimed at obtaining the estimate of the breakpoint. The Davies test is based on \code{k} evaluation points, thus the value returned in the \code{statistic} component (and printed as "'best' at") is the best among the \code{k} points, and typically it will differ from the maximum likelihood estimate returned by \code{segmented}. Use \code{\link{segmented}} if you are interested in the point estimate. To test for a breakpoint in \emph{linear} models with small samples, it is suggested to use \code{davies.test()} with objects of class "lm". If \code{obj} is a \code{"glm"} object with gaussian family, \code{davies.test()} will use an approximate test resulting in smaller p-values when the sample is small. However if the sample size is large (n>300), the exact Davies (2002) upper bound cannot be computed (as it relies on \code{gamma()} function) and the \emph{approximate} upper bound of Davies (1987) is returned. } %%\section{Warning }{Currently \code{davies.test} does not work if the fitted model \code{ogg} %% does not include the segmented variable \code{term} being tested.} \seealso{See also \code{\link{pscore.test}} which is more powerful, especially when the signal-to-noise ratio is low. } \examples{ \dontrun{ set.seed(20) z<-runif(100) x<-rnorm(100,2) y<-2+10*pmax(z-.5,0)+rnorm(100,0,3) o<-lm(y~z+x) davies.test(o,~z) davies.test(o,~x) o<-glm(y~z+x) davies.test(o,~z) #it works but the p-value is too small.. } } \keyword{ htest } segmented/man/summary.segmented.lme.Rd0000644000176200001440000000525114415476766017541 0ustar liggesusers\name{summary.segmented.lme} \alias{summary.segmented.lme} %\alias{print.summary.segmented} \title{ Summarizing model fits for segmented mixed-effects regression } \description{ summary method for class \code{segmented.lme}. } \usage{ \method{summary}{segmented.lme}(object, .vcov=NULL, digits = max(3, getOption("digits") - 3), ...) %\method{print}{summary.segmented}(x, short=x$short, var.diff=x$var.diff, % digits = max(3, getOption("digits") - 3), % signif.stars = getOption("show.signif.stars"),...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ Object of class "segmented.lme". } \item{.vcov}{ Optional. The full covariance matrix for the parameter estimates. If provided, standard errors are computed (and displayed) according to this matrix.} % \item{x}{a \code{summary.segmented} object produced by \code{summary.segmented()}.} \item{digits}{controls number of digits printed in output.} % \item{signif.stars}{logical, should stars be printed on summary tables of coefficients?} \item{\dots}{ further arguments. } } \details{ %If \code{short=TRUE} only coefficients of the segmented relationships are printed. The function summarizes and prints the most relevant information on the segmented mixed fit. The output is similar to that returned by \code{print.summary.lme} } \value{ A list (similar to one returned by \code{segmented.lm}) with estimates of the variance components, and point estimates, standard errors, DF, t-value and p-value for the fixed effects. p-values for the variables \code{U} and \code{G0} are omitted as pointless. % \item{Ttable }{estimates and standard errors of the model parameters. This is similar % to the matrix \code{coefficients} returned by \code{summary.lm} or \code{summary.glm}, % but without the rows corresponding to the breakpoints. Even the p-values relevant to the % difference-in-slope parameters have been replaced by NA, since they are meaningless in % this case, see \code{\link{davies.test}}.} % \item{cov.var.diff}{if \code{var.diff=TRUE}, the covaraince matrix accounting for heteroscedastic errors.} % \item{sigma.new}{if \code{var.diff=TRUE}, the square root of the estimated error variances in each interval.} % \item{df.new}{if \code{var.diff=TRUE}, the residual degrees of freedom in each interval.} } %\references{ ~put references to the literature/web site here ~ } \author{ Vito M.R. Muggeo } \seealso{ \code{\link{print.segmented.lme}} } \examples{ ##continues example from segmented.lme() # summary(os) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } segmented/man/broken.line.Rd0000644000176200001440000000747314515672222015516 0ustar liggesusers\name{broken.line} \alias{broken.line} \title{ Fitted values for segmented relationships} \description{ Given a segmented model (typically returned by a \code{segmented} method), \code{broken.line} computes the fitted values (and relevant standard errors) for the specified `segmented' relationship. } \usage{ broken.line(ogg, term = NULL, link = TRUE, interc=TRUE, se.fit=TRUE, isV=FALSE, .vcov=NULL, .coef=NULL, ...) } \arguments{ \item{ogg}{ A fitted object of class segmented (returned by any \code{segmented} method). } \item{term}{ Three options. i) A named list (whose name should be one of the segmented covariates in the model \code{ogg}) including the covariate values for which segmented predictions should be computed; ii) a character meaning the name of any segmented covariate in the model (and predictions corresponding to the observed covariate values are returned); iii) It can be \code{NULL} if the model includes a single segmented covariate (and predictions corresponding to the observed covariate values are returned).} \item{link}{ Should the predictions be computed on the scale of the link function if \code{ogg} is a segmented glm fit? Default to \code{TRUE}. } \item{interc}{ Should the model intercept be added? (provided it exists).} \item{se.fit}{ If \code{TRUE} also standard errors for predictions are returned.} \item{isV}{ A couple of logicals indicating if the segmented terms \eqn{(x-\psi)_+}{(x-\psi)_+} and \eqn{I(x>\psi)}{I(x>\psi)} in the model matrix should be replaced by their smoothed counterparts when computing the standard errors. If a single logical is provided, it is applied to both terms.} \item{.vcov}{ Optional. The \emph{full} covariance matrix of estimates. If \code{NULL} (and \code{se.fit=TRUE}), the matrix is computed internally via \code{vcov.segmented()}.} \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(ogg)}.} \item{...}{ Additional arguments to be passed on to \code{vcov.segmented()} when computing the standard errors for the predictions, namely \code{is}, \code{var.diff}, \code{p.df}. See \code{\link{summary.segmented}} and \code{\link{vcov.segmented}}.} } \details{ If \code{term=NULL} or \code{term} is a valid segmented covariate name, predictions for that segmented variable are the relevant fitted values from the model. If \code{term} is a (correctly named) list with numerical values, predictions corresponding to such specified values are computed. If \code{link=FALSE} and \code{ogg} inherits from the class "glm", predictions and possible standard errors are returned on the response scale. The standard errors come from the Delta method. Argument \code{link} is ignored whether \code{ogg} does not inherit from the class "glm". } \value{ A list having one component if (if \code{se.fit=FALSE}), and two components (if \code{se.fit=TRUE}) list representing predictions and standard errors for the segmented covariate values. } %\references{ ~put references to the literature/web site here ~ } \author{ Vito M. R. Muggeo } \note{ This function was written when there was not \code{predict.segmented} (which is more general). } % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{segmented}}, \code{\link{predict.segmented}}, \code{\link{plot.segmented}}, \code{\link{vcov.segmented}}} \examples{ set.seed(1234) z<-runif(100) y<-rpois(100,exp(2+1.8*pmax(z-.6,0))) o<-glm(y~z,family=poisson) o.seg<-segmented(o,seg.Z=~z) \dontrun{plot(z,y)} \dontrun{points(z,broken.line(o.seg,link=FALSE)$fit,col=2)} #ok, but use plot.segmented()! } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/intercept.Rd0000644000176200001440000000601414415476774015310 0ustar liggesusers\name{intercept} \alias{intercept} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Intercept estimates from segmented relationships } \description{ Computes the intercepts of each `segmented' relationship in the fitted model. } \usage{ intercept(ogg, parm, rev.sgn = FALSE, var.diff=FALSE, .vcov=NULL, .coef=NULL, digits = max(4, getOption("digits") - 2),...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ogg}{ an object of class "segmented", returned by any \code{segmented} method. } \item{parm}{ the segmented variable whose intercepts have to be computed. If missing all the segmented variables in the model are considered. } % \item{gap}{ % logical. should the intercepts account for the (possible) gaps? %} \item{rev.sgn}{vector of logicals. The length should be equal to the length of \code{parm}, but it is recycled otherwise. When \code{TRUE} it is assumed that the current \code{parm} is `minus' the actual segmented variable, therefore the order is reversed before printing. This is useful when a null-constraint has been set on the last slope. } \item{var.diff}{Currently ignored as only point estimates are computed. %logical. If \code{var.diff=TRUE} and there is a single segmented variable, the computed standard errors % are based on a sandwich-type formula of the covariance matrix. See Details in \code{\link{summary.segmented}}. } \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov(ogg)}.} \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(ogg)}.} \item{digits}{controls number of digits in the returned output.} \item{...}{ Further arguments to be passed on to \code{vcov.segmented}, such as \code{var.diff} and \code{is}. See Details in \code{\link{vcov.segmented}}.} } \details{ A broken-line relationship means that a regression equation exists in the intervals `\eqn{min(x)}{min(x)} to \eqn{\psi_1}{psi1}', `\eqn{\psi_1}{psi1} to \eqn{\psi_2}{psi2}', and so on. \code{intercept} computes point estimates of the intercepts of the different regression equations for each segmented relationship in the fitted model. } \value{ \code{intercept} returns a list of one-column matrices. Each matrix represents a segmented relationship. } %\references{ %% ~put references to the literature/web site here ~ %} \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it}} %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ See also \code{\link{slope}} to compute the slopes of the different regression equations for each segmented relationship in the fitted model. } \examples{ ## see ?slope \dontrun{ intercept(out.seg) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } segmented/man/plot.segmented.lme.Rd0000644000176200001440000001527414727042257017016 0ustar liggesusers\name{plot.segmented.lme} \alias{plot.segmented.lme} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Plot method for segmented mixed objects } \description{ Takes a fitted \code{segmented.lme} object returned by \code{segmented.lme()} and plots (or adds) the fitted broken-line relationship for the segmented term. } \usage{ \method{plot}{segmented.lme}(x, level=1, id = NULL, res = TRUE, pop = FALSE, yscale = 1, xscale = 1, n.plot, pos.leg = "topright", vline = FALSE, lines = TRUE, by=NULL, add=FALSE, conf.level=0, withI=TRUE, vcov.=NULL, shade=FALSE, drop.var=NULL, text.leg=NULL, id.name=TRUE, ci.psi.pop=-1, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ Object of class \code{"segmented.lme"} } \item{level}{ An integer giving the level of grouping to be used when computing the segmented relationship(s). \code{level=0} means depending on fixed effects estimates only (such estimates are also said, to some extend, 'population' or 'marginal' estimates), otherewise the segmented lines will also depend on the random effects predictions. } \item{id}{ A scalar or vector meaning which subjects profiles have to be drawn. If \code{NULL} (default) all profiles are drawn. Ignored if \code{level=0}. } \item{res}{ If \code{TRUE}, the data points are also drawn. Ignored if \code{level=0}. } \item{pop}{ if \code{TRUE}, the fitted segmented relationships based on fixed-effects only is also portrayed. Ignored if \code{level=0}. } \item{yscale}{ If \code{>= 0}, the same and common y-scale is used for all 'subjects' (panels); otherwise the y-scale will depend on the actual (observed and fitted) values for each 'subject'. } \item{xscale}{ If \code{>= 0}, the same and common x-scale is used for all 'subjects' (panels); otherwise the x-scale will depend on the actual observed values of the segmented covariate for each 'subject'. } \item{n.plot}{ a vector to be passed to \code{par(mfrow=c(..))} for plotting multiple panels (should be coherent with \code{length(id)}). If missing, it is computed automatically depending on \code{length(id)}. Type \code{n.plot=1} to draw all the segmented profiles on the same panel. } \item{pos.leg}{ a character ('topright', 'topleft',...) meaning the location of the legend. Set \code{NULL} for no legend. } \item{vline}{ logical, if \code{TRUE} a vertical dashed segment is added to emphasize the breakpoint location. } \item{lines}{ logical, if \code{FALSE} points, rather than lines, are used to portray the segmented relationships. } \item{by}{ A named list indicating covariate names and corresponding values affecting the fitted segmented relationship. For instance: \code{by=list(sex="male",z=.2)}, provided that the variables \code{sex} and \code{z} affect the segmented relationship. Effective only if \code{level=0}. } \item{add}{ If \code{TRUE} the (fixed-effect) fitted segmented relationship is added to the current plot. Effective only if \code{level=0}. } \item{conf.level}{ The confidence level for pointwise confidence intervals. Effective only if \code{level=0}. } \item{withI}{ If \code{TRUE}, the level 0 segmented relationship is computed with the model intercept. Effective only if \code{level=0}. } \item{vcov.}{ The fixed effects covariance matrix. If \code{NULL}, it is computed by \code{vcov.segmented.lme()}. Effective only if \code{level=0}. } \item{shade}{ If \code{TRUE} (and \code{conf.level>0}) the area within the pointiwise CIs is shaded. Effective only if \code{level=0}. } \item{drop.var}{ Possible coefficient names to be removed before computing the segmented relationship (E.g. the group-specific intercept.). } \item{text.leg}{ If specified (and \code{pos.leg} has been also specified), it is the legend text. Effective only if \code{level=0}. } \item{id.name}{ If \code{pos.leg} is different from \code{NULL}, \code{id.name=TRUE} will portray the cluster variable name along the value. Namely \code{id.name=TRUE} leads to 'country = italy' on each panel, while \code{id.name=FALSE} to 'italy'. } \item{ci.psi.pop}{ Should the point estimate and CI for the population breakpoint be added? If \code{ci.psi.pop>0} both the point estimate and CI (at level \code{ci.psi.pop}) are added; if \code{ci.psi.pop=0} only the point estimate. } \item{\dots}{ additional arguments, such as \code{ylab},\code{xlab}, \code{ylim} and \code{xlim}; \code{l.col,l.lwd,l.lty} (for the fitted individual lines - can be vectors and will be recycled); \code{p.col, p.lwd, p.lty} for the population line (if \code{pop=TRUE}); \code{col, cex, pch} for the data points (if \code{res=TRUE}); \code{t.col} for the legend color, if \code{pos.leg} is not \code{NULL}. If \code{level=0} and \code{conf.level>0}, \code{lty} and \code{lwd} can be vectors. } } \details{ The function plots the 'subject'-specific segmented profiles for the 'subjects' specificed in \code{id} or, if \code{level=0}, the fitted segmented relationship based on fixed effects only. The number of panels to drawn is actually the minimum between \code{length(id)} and \code{prod(n.plot)}, but if \code{n.plot=c(1,1)} (or also simply \code{n.plot=1}), the `individual' profiles will be pictured on the same panel. } \value{ A single or multiple (depending on \code{level} and \code{id}) plot showing the fitted segmented profiles. %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Vito Muggeo } \note{ If \code{by} is specified (and \code{level=0} is set), a legend is also added in the plot reporting covariate(s) name and value affecting the segmented relationship. Set \code{pos.leg=TRUE} to have no legend. On the other hand, use \code{text.leg} to add legend reporting the covariate baseline values. } \section{Warning }{ All the functions for segmented mixed models (*.segmented.lme) are still at an experimental stage } \seealso{ \code{\link{segmented.lme}} } \examples{ \dontrun{ #continues example from segmented.lme plot(os, yscale=-1) #different y-scales plot(os2, n.plot=1, l.col=2:6, l.lwd=2) #all segmented profiles on the same plot } } \keyword{ regression } \keyword{ nonlinear } \concept{changepoint} % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/confint.stepmented.Rd0000644000176200001440000001021114610170135017067 0ustar liggesusers\name{confint.stepmented} \alias{confint.stepmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Confidence intervals for jumpoints in stepmented regression} \description{ Computes confidence intervals for the changepoints (or jumpoints) in a fitted `stepmented' model. } \usage{ \method{confint}{stepmented}(object, parm, level=0.95, method=c("delta", "score", "gradient"), %var.diff=FALSE, round=TRUE, cheb=FALSE, digits=max(4, getOption("digits") - 1), .coef=NULL, .vcov=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{a fitted \code{stepmented} object. } \item{parm}{the stepmented variable of interest. If missing the first stepmented variable in \code{object} is considered. } \item{level}{the confidence level required, default to 0.95.} \item{method}{which confidence interval should be computed. One of \code{"delta"}, \code{"score"}, or \code{"gradient"}. Can be abbreviated. Currently only \code{"delta"} allowed.} \item{round}{logical. Should the values (estimates and lower/upper limits) rounded to the smallest observed value?} \item{cheb}{logical. If \code{TRUE}, the confidence limits are computed using the Chebyshev inequality which yields conservative confidence intervals but it is 'robust' to the non-normality of the changepoint sampling distribution. } %\item{var.diff}{logical. If \code{method="delta"}, and there is a single segmented variable, \code{var.diff=TRUE} leads to standard errors based on sandwich-type formula of the covariance matrix. See Details in \code{\link{summary.segmented}}.} %\item{is}{logical. If \code{method="delta"}, \code{is=TRUE} means that the full covariance matrix is computed via %\code{vcov(.., is=TRUE)}} \item{digits}{controls the number of digits to print when returning the output. } \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(object)}.} \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov(object)}.} \item{\dots}{additional arguments passed to \code{vcov.stepmented}, namely \code{k}. %referring to Score-based confidence intervals, such as \code{"h"}, \code{"d.h"}, \code{"bw"}, \code{"msgWarn"}, and \code{"n%.values"} specifying the number of points used to profile the Score (or Gradient) statistic. } } \details{ \code{confint.stepmented} computes confidence limits for the changepoints. Currently the only option is \code{'delta'}, i.e. to compute the approximate covariance matrix via a smoothing approximation (see \code{\link{vcov.stepmented}}) and to build the limits using the standard Normal quantiles. Note that, the limits are rounded to the lowest observed value, thus the resulting confidence interval might not be symmetric if the stepmented covariate has not equispaced values. } \value{ A matrix including point estimate and confidence limits of the breakpoint(s) for the stepmented variable possibly specified in \code{parm}. } %\references{ %Muggeo, V.M.R. (2017) Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach. %\emph{Australian & New Zealand Journal of Statistics} \bold{59}, 311--322. %} \author{ Vito M.R. Muggeo } \note{ Currently only method='delta' is allowed. %\code{method="score"} or \code{method="gradient"} only works for segmented \emph{linear} model. For segmented \emph{generalized linear} model, % currently only \code{method="delta"} is available. } % % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{stepmented}} and \code{\link{lines.segmented}} to plot the estimated breakpoints with corresponding confidence intervals. } \examples{ set.seed(10) x<-1:100 z<-runif(100) y<-2+2.5*(x>45)-1.5*(x>70)+z+rnorm(100) o<-stepmented(y, npsi=2) confint(o) #round=TRUE is default confint(o, round=FALSE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/lines.stepmented.Rd0000644000176200001440000000573014523360321016554 0ustar liggesusers\name{lines.stepmented} \alias{lines.stepmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Bars for interval estimate of the breakpoints } \description{ Draws bars relevant to breakpoint estimates (point estimate and confidence limits) on the current device } \usage{ \method{lines}{stepmented}(x, term, bottom = TRUE, shift=FALSE, conf.level = 0.95, k = 50, pch = 18, .vcov=NULL, .coef=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ an object of class \code{stepmented}. } \item{term}{ the stepmented variable of the breakpoints being drawn. It may be unspecified when there is a single stepmented variable.} \item{bottom}{ logical, indicating if the bars should be plotted at the bottom (\code{TRUE}) or at the top (\code{FALSE}).} \item{shift}{ logical, indicating if the bars should be `shifted' on the y-axis before plotting. Useful for multiple breakpoints with overlapped confidence intervals.} \item{conf.level}{ the confidence level of the confidence intervals for the breakpoints. } \item{k}{ a positive integer regulating the vertical position of the drawn bars. See Details. } \item{pch}{ either an integer specifying a symbol or a single character to be used in plotting the point estimates of the breakpoints. See \code{\link{points}}. } \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov(x)}.} \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(x)}.} \item{\dots}{ further arguments passed to \code{\link{arrows}}, for instance `col' that can be a vector. } } \details{ \code{lines.stepmented} simply draws on the current device the point estimates and relevant confidence limits of the estimated breakpoints from a "stepmented" object. The y coordinates where the bars are drawn is computed as \code{usr[3]+h} if \code{bottom=TRUE} or \code{usr[4]-h} when \code{bottom=FALSE}, where \code{h=(usr[4]-usr[3])/abs(k)} and \code{usr} are the extremes of the user coordinates of the plotting region. Therefore for larger values of \code{k} the bars are plotted on the edges. } %\value{ % ~Describe the value returned % If it is a LIST, use % \item{comp1 }{Description of 'comp1'} % \item{comp2 }{Description of 'comp2'} % ... %} %\references{ ~put references to the literature/web site here ~ } %\author{ ~~who you are~~ } %\note{ ~~further notes~~ % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{plot.stepmented}} to plot the fitted stepmented lines} %\code{\link{points.segmented}} to add the fitted joinpoints. } \examples{ ## See ?plot.stepmented } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/stepmented.Rd0000644000176200001440000003355614757641006015465 0ustar liggesusers\name{stepmented} \alias{stepmented} \alias{stepmented.lm} \alias{stepmented.glm} \alias{stepmented.ts} \alias{stepmented.numeric} %\alias{stepmented.default} %\alias{stepmented.Arima} %\alias{print.stepmented} %\alias{summary.stepmented} %\alias{print.summary.stepmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ stepmented relationships in regression models } \description{ Fits regression models with stepmented (i.e. piecewise-constant) relationships between the response and one or more explanatory variables. Break-point estimates are provided. } \usage{ stepmented(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), keep.class=FALSE, var.psi=FALSE, ...) %\method{stepmented}{default}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...) \method{stepmented}{lm}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), keep.class=FALSE, var.psi=FALSE, ...) \method{stepmented}{glm}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), keep.class=FALSE, var.psi=FALSE, ...) \method{stepmented}{numeric}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), keep.class=FALSE, var.psi=FALSE, ..., pertV=0, centerX=FALSE, adjX=NULL, weights=NULL) \method{stepmented}{ts}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), keep.class=FALSE, var.psi=FALSE, ..., pertV=0, centerX=FALSE, adjX=NULL) %\method{stepmented}{Arima}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), % model = TRUE, keep.class=FALSE, ...) } %- ------------------------------>>>>> "Arima" \arguments{ \item{obj}{A standard `linear' regression model of class "lm" or "glm". Alternatively, a simple "ts" object or a simple data vector may be supplied. } \item{seg.Z}{ the stepmented variables(s), i.e. the numeric covariate(s) understood to have a piecewise-constant relationship with response. It is a formula with no response variable, such as \code{seg.Z=~x} or \code{seg.Z=~x1+x2}. Currently, formulas involving functions, such as \code{seg.Z=~log(x1)}, or selection operators, such as \code{seg.Z=~d[,"x1"]} or \code{seg.Z=~d$x1}, are \emph{not} allowed. Also, variable names formed by \verb{U} or \verb{V} only (with or without numbers ) are not permitted. If missing, the index variable \code{id=1,2,..,n} is used. For \code{stepmented.ts}, \code{seg.Z} is usually unspecified as the (time) covariate is obtained by the \code{ts} object itself. } \item{psi}{ starting values for the breakpoints to be estimated. If there is a single stepmented variable specified in \code{seg.Z}, \code{psi} can be a numeric vector, and it can be missing when 1 breakpoint has to be estimated (and the median of the stepmented variable is used as a starting value). If \code{seg.Z} includes several covariates, \code{psi} has to be specified as a \emph{named} list of vectors whose names have to match the variables in the \code{seg.Z} argument. Each vector of such list includes starting values for the break-point(s) for the corresponding variable in \code{seg.Z}. A \code{NA} value means that `\code{K}' quantiles (or equally spaced values) are used as starting values; \code{K} is fixed via the \code{\link{seg.control}} auxiliary function. } \item{npsi}{A named vector or list meaning the \emph{number} (and not locations) of breakpoints to be estimated. The starting values will be internally computed via the quantiles or equally spaced values, as specified in argument \code{quant} in \code{\link{seg.control}}. \code{npsi} can be missing and \code{npsi=1} is assumed for all variables specified in \code{seg.Z}. If \code{psi} is provided, \code{npsi} is ignored.} \item{fixed.psi}{An optional named list including the breakpoint values to be kept fixed during the estimation procedure. The names should be a subset of (or even the same) variables specified in \code{seg.Z}. If there is a single variable in \code{seg.Z}, a simple numeric vector can be specified. Note that, in addition to the values specified here, \code{stepmented} will estimate additional breakpoints. To keep fixed all breakpoints (to be specified in \code{psi}) use \code{it.max=0} in \code{\link{seg.control}} } \item{control}{ a list of parameters for controlling the fitting process. See the documentation for \code{\link{seg.control}} for details. } %\item{model}{logical value indicating if the model.frame should be returned.} \item{keep.class}{logical value indicating if the final fit returned by \code{stepmented.default} should keep the class '\code{stepmented}' (along with the class of the original fit \code{obj}). Ignored by the stepmented methods. } \item{\dots}{ optional arguments (to be ignored safely). Notice specific arguments relevant to the original call (via \code{lm} or \code{glm} for instance), such as \code{weights} or \code{offet}, have to be included in the starting model \code{obj}. } \item{pertV}{ Only for \code{stepmented.ts} and \code{stepmented.numeric}. } \item{centerX}{ Only for \code{stepmented.ts} and \code{stepmented.numeric}. If \code{TRUE}, the covariate is centered before fitting. } \item{adjX}{ Only for \code{stepmented.ts} and \code{stepmented.numeric}. If the response vector leads to covariate with large values (such as years for ts objects), \code{adjX=TRUE} will shift the covariate to have a zero origin. Default is \code{NULL} which means \code{TRUE} if the minimum of covariate is 1000 or larger. } \item{var.psi}{ logical. If \code{TRUE}, the estimate covariance matrix is also computed via \code{\link{vcov.stepmented}}, thus the breakpoint standard errors are also included in the \code{psi} component of the returned object. Default is \code{FALSE}, as computing the estimate covariance matrix is somewhat time-consuming when the sample size is large. } \item{weights}{ possible weights to include in the estimation process (only for \code{stepmented.numeric}). } %\item{only.mean}{ % logical (only for \code{stepmented.numeric}). If \code{FALSE}, changepoints will be estimated even for the dispersion (variance) model. The number of changepoints to estimate in the two sub-models can be specified via \code{npsi} which can be a vector wherein % \code{npsi[1]} refers to the mean model and \code{npsi[2]} to the variance model. If \code{npsi} is scalar, the same number of changepoints is estimated in the two submodels. % } } \details{ Given a linear regression model (usually of class "lm" or "glm"), stepmented tries to estimate a new regression model having piecewise-constant (i.e. step-function like) relationships with the variables specified in \code{seg.Z}. A \emph{stepmented} relationship is defined by the mean level parameters and the break-points where the mean level changes. The number of breakpoints of each stepmented relationship depends on the \code{psi} argument, where initial values for the break-points must be specified. The model is estimated simultaneously yielding point estimates and relevant approximate standard errors of all the model parameters, including the break-points. \code{stepmented} implements the algorithm described in Fasola et al. (2018) along with bootstrap restarting (Wood, 2001) to escape local optima. The procedure turns out to be particularly appealing and efficient when there are two or more covariates exhibiting different change points to be estimated. See also section `Note' below. } \value{ The returned object is of class "stepmented" which inherits from the class "lm" or "glm" depending on the class of \code{obj}. When \code{only.mean=FALSE}, it is a list having two 'stepmented' fits (for the mean and for the dispersion submodels). \cr An object of class "stepmented" is a list containing the components of the original object \code{obj} with additionally the followings: \item{psi}{estimated break-points and relevant (approximate) standard errors (on the continuum)} \item{psi.rounded}{the rounded estimated break-points (see Note, below)} \item{it}{number of iterations employed} \item{epsilon}{difference in the objective function when the algorithm stops} \item{model}{the model frame} \item{psi.history}{a list or a vector including the breakpoint estimates at each step} \item{seed}{the integer vector containing the seed just before the bootstrap resampling. Returned only if bootstrap restart is employed} \item{..}{Other components are not of direct interest of the user} } %\section{ Warning }{ %It is well-known that the log-likelihood function for the %break-point may be not concave, especially %for poor clear-cut kink-relationships. In these circumstances, the initial guess % for the break-point, i.e. the \code{psi} argument, must be provided with care. % For instance visual %inspection of a, possibly smoothed, scatter-plot is usually a good way to obtain some idea on breakpoint location. %However bootstrap restarting, implemented since version 0.2-9.0, is relatively more robust to starting values specified %in \code{psi}. Alternatively an automatic procedure may be implemented by specifying \code{psi=NA} and \code{fix.npsi=FALSE} in %\code{\link{seg.control}}: experience suggests to increase the number of iterations via \code{it.max} in \code{seg.control()}. This automatic procedure, however, is expected to overestimate the number of breakpoints. %} \note{ Assuming a single changepoint \eqn{\psi} for the covariate \eqn{x}, the underlying fitted stepmented relationship is \eqn{\beta_0+\beta_1 \ I(x>\psi)}, namely the fitted value (on the linear predictor scale) is \eqn{\beta_0} if \eqn{x\le \psi}, and \eqn{\beta_0+\beta_1} when \eqn{x > \psi}. While the point estimate \eqn{\hat\psi} returned (in the \code{psi} component of the fit object) is a unique real number, actually there exist infinite solutions in the range \eqn{[a, b)} where the extremes are the two closest \emph{observed} covariate values such hat \eqn{a \le \hat\psi.6) y<- mu + rnorm(n)*.8 #fitting via regression model os <-stepmented(lm(y~1),~x) y<-ts(y) os1<- stepmented(y) #the 'ts' method os2<- stepmented(y, npsi=2) #plot(y) #plot(os1, add=TRUE) #plot(os2, add=TRUE, col=3:5) ### Example with (poisson) GLM y<- rpois(n,exp(mu)) o<-stepmented(glm(y~1,family=poisson)) plot(o, res=TRUE) \dontrun{ ## Example using the (well-known) Nile dataset data(Nile) plot(Nile) os<- stepmented(Nile) plot(os, add=TRUE) ### Example with (binary) GLM (example from the package stepR) set.seed(1234) y <- rbinom(200, 1, rep(c(0.1, 0.7, 0.3, 0.9), each=50)) o<-stepmented(glm(y~1,family=binomial), npsi=3) plot(o, res=TRUE) ### Two stepmented covariates (with 1 and 2 breakpoints); z has also an additional linear effect n=100 x<-1:n/n z<-runif(n,2,5) mu<- 2+ 1*(x>.6)-2*(z>3)+3*(z>4)+z y<- mu + rnorm(n)*.8 os <-stepmented(lm(y~z),~x+z, npsi=c(x=1,z=2)) os summary(os) ## see ?plot.stepmented } } %# An example using the Arima method: %\dontrun{ %n<-50 %idt <-1:n #the time index % %mu<-50-idt +1.5*pmax(idt-30,0) %set.seed(6969) %y<-mu+arima.sim(list(ar=.5),n)*3.5 % %o<-arima(y, c(1,0,0), xreg=idt) %os1<-stepmented(o, ~idt, control=seg.control(display=TRUE)) % %#note using the .coef argument is mandatory! %slope(os1, .coef=os1$coef) %plot(y) %plot(os1, add=TRUE, .coef=os1$coef, col=2) %} % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{regression} \keyword{nonlinear } segmented/man/points.segmented.Rd0000644000176200001440000000541714424413732016570 0ustar liggesusers\name{points.segmented} \alias{points.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Points method for segmented objects } \description{ Takes a fitted \code{segmented} object returned by \code{segmented()} and adds on the current plot the joinpoints of the fitted broken-line relationships. } \usage{ \method{points}{segmented}(x, term, interc = TRUE, link = TRUE, rev.sgn=FALSE, transf=I, .vcov=NULL, .coef=NULL, const=0, v=TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ an object of class \code{segmented}. } \item{term}{ the segmented variable of interest. It may be unspecified when there is a single segmented variable. } \item{interc}{ If \code{TRUE} the computed joinpoints include the model intercept (if it exists). } \item{link}{ when \code{TRUE} (default), the fitted joinpoints are plotted on the link scale } \item{rev.sgn}{ when \code{TRUE}, the fitted joinpoints are plotted on the `minus' scale of the current \code{term} variable. This is useful when a null-constraint has been set on the last slope. } \item{transf}{ A possible function to convert the fitted values before plotting. } \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov()}.} \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(x)}.} \item{const}{ A constant to be added (on the y-scale) to the values before transforming and plotting.} \item{v}{logical. If \code{TRUE}, vertical lines at the breakpoints are also drawn.} \item{\dots}{ other graphics parameters to pass on to \code{points()} and \code{segments()} (if \code{v=TRUE}). } } \details{ We call 'joinpoint' the plane point having as coordinates the breakpoints (on the x scale) and the fitted values of the segmented relationship at that breakpoints (on the y scale). \code{points.segmented()} simply adds the fitted joinpoints on the current plot. This could be useful to emphasize the changes of the piecewise linear relationship. } %\value{ %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... %} %\references{ %% ~put references to the literature/web site here ~ %} %\author{ %% ~~who you are~~ %} %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{plot.segmented}} to plot the fitted segmented lines. } \examples{ \dontrun{ #see examples in ?plot.segmented } } \keyword{ nonlinear } \keyword{ regression }% __ONLY ONE__ keyword per line segmented/man/seg.control.Rd0000644000176200001440000002363414630275405015542 0ustar liggesusers\name{seg.control} \alias{seg.control} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Auxiliary for controlling segmented/stepmented model fitting } \description{ Auxiliary function as user interface for 'segmented' and 'stepmented' fitting. Typically only used when calling any 'segmented' or 'stepmented' method. } \usage{ seg.control(n.boot=10, display = FALSE, tol = 1e-05, it.max = 30, fix.npsi=TRUE, K = 10, quant = FALSE, maxit.glm = NULL, h = 1.25, break.boot=5, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=TRUE, seed=NULL, fn.obj=NULL, digits=NULL, alpha = NULL, fc=.95, check.next=TRUE, tol.opt=NULL, fit.psi0=NULL, eta=NULL, min.nj=2) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{n.boot}{ number of bootstrap samples used in the bootstrap restarting algorithm. If 0 the standard algorithm, i.e. without bootstrap restart, is used. Default to 10 that appears to be sufficient in most of problems. However when multiple breakpoints have to be estimated it is suggested to increase \code{n.boot}, e.g. \code{n.boot=50}, and even \code{break.boot}.} \item{display}{ logical indicating if the value of the objective function should be printed along with current breakpoint estimates at each iteration or at each bootstrap resample (but no more than 5 breakpoints are printed). If bootstrap restarting is employed, the values of objective and breakpoint estimates should not change at the last runs.} \item{tol}{ positive convergence tolerance. } \item{it.max}{ integer giving the maximal number of iterations. } \item{fix.npsi}{logical (it replaces previous argument \code{stop.if.error}) If \code{TRUE} (default) the \emph{number} (and not location) of breakpoints is held fixed throughout iterations. Otherwise a sort of `automatic' breakpoint selection is carried out, provided that several starting values are supplied for the breakpoints, see argument \code{psi} in \code{\link{segmented.lm}} or \code{\link{segmented.glm}}. The idea, relying on removing the `non-admissible' breakpoint estimates at each iteration, is discussed in Muggeo and Adelfio (2011) and it is not compatible with the bootstrap restart algorithm. \code{fix.npsi=FALSE}, indeed, should be considered as a preliminary and tentative approach to deal with an unknown number of breakpoints. Alternatively, see \code{\link{selgmented}}.} \item{K}{ the number of quantiles (or equally-spaced values) to supply as starting values for the breakpoints when the \code{psi} argument of \code{segmented} is set to \code{NA}. \code{K} is ignored when \code{psi} is different from \code{NA}. } \item{quant}{logical, indicating how the starting values should be selected. If \code{FALSE} equally-spaced values are used, otherwise the quantiles. Ignored when \code{psi} is different from \code{NA}.} \item{maxit.glm}{ integer giving the maximum number of inner IWLS iterations (see details). If \code{NULL}, the number is low in the first iterations and then increases as th eprocess goes on. Ignored for segmented lm fits } \item{h}{ positive factor modifying the increments in breakpoint updates during the estimation process (see details). } \item{break.boot}{ Integer, less than \code{n.boot}. If \code{break.boot} consecutive bootstrap samples lead to the same objective function during the estimation process, the algorithm stops without performing all \code{n.boot} 'trials'. This can save computational time considerably. Default is \code{5} for the \code{segmented} and \code{5+2} for the \code{stepmented} functions. However if the number of changepoints is large, \code{break.boot} should be increased, even 10 or 15. } \item{size.boot}{the size of the bootstrap samples. If \code{NULL}, it is taken equal to the actual sample size. If the sample is very large, the idea is to run bootstrap restarting using smaller bootstrap samples.} \item{jt}{logical. If \code{TRUE} the values of the segmented variable(s) are jittered before fitting the model to the bootstrap resamples.} \item{nonParam}{ if \code{TRUE} nonparametric bootstrap (i.e. case-resampling) is used, otherwise residual-based. Currently working only for LM fits. It is not clear what residuals should be used for GLMs.} \item{random}{ if \code{TRUE}, when the algorithm fails to obtain a solution, random values are employed to obtain candidate values. } \item{seed}{ The seed to be passed on to \code{set.seed()} when \code{n.boot>0}. If \code{NULL}, a seed depending on the response values is generated and used. Otherwise it can be a numerical value or, if \code{NA}, a random value is generated. Fixing the seed can be useful to replicate \emph{exactly} the results when the bootstrap restart algorithm is employed. Whichever choice, the segmented fit includes the component \code{seed} representing the value saved just before the bootstrap resampling. Re-use it if you want to replicate the bootstrap restarting algorithm with the \emph{same} re-samples. } \item{fn.obj}{ A \emph{character string} to be used (optionally) only when \code{segmented.default} is used. It represents the function (with argument \code{'x'}) to be applied to the fit object to extract the objective function to be \emph{minimized}. Thus for \code{"lm"} fits (although unnecessary) it should be \code{fn.obj="sum(x$residuals^2)"}, for \code{"coxph"} fits it should be \code{fn.obj="-x$loglik[2]"}. If \code{NULL} the `minus log likelihood' extracted from the object, namely \code{"-logLik(x)"}, is used. See \code{\link{segmented.default}}. } \item{digits}{optional. If specified it means the desidered number of decimal points of the breakpoint to be used during the iterative algorithm. } % \item{conv.psi}{optional. Should convergence of iterative procedure to be assessed on changes of breakpoint estimates or changes in the %objective? Default to FALSE. % } \item{alpha}{optional numerical values. The breakpoints are estimated within the quantiles \code{alpha[1]} and \code{alpha[2]} of the relevant covariate. If a single value is provided, it is assumed \code{alpha} and \code{1-alpha}. Defaults to \code{NULL} which means \code{alpha=max(.05, 1/n)}. Note: Providing \code{alpha=c(mean(x<=a),mean(x<=b))} means to constrain the breakpoint estimates within \eqn{[a,b]}{[a,b]}. } % \item{min.step}{optional. The minimum step size to break the iterative algorithm. Default to 0.0001.} % \item{stop.if.error}{ same than \code{fix.npsi}. \emph{This argument will be removed in next releases}, %and replaced by % \code{fix.npsi}. % If provided, and different from \code{NULL}, it overwrites \code{fix.npsi} % } \item{fc}{A proportionality factor (\eqn{\le 1}{<= 1}) to adjust the breakpoint estimates \emph{if} these come close to the boundary or too close each other. For instance, if \code{psi} turns up close to the maximum, it will be changed to \code{psi*fc} or to \code{psi/fc} if close to the minimum. This is useful to get finite point estimate and standard errors for each slope paramete. } \item{check.next}{logical, effective only for stepmented fit. If \code{TRUE} the solutions next to the current one are also investigated. } \item{tol.opt}{Numerical value to be passed to \code{tol} in \code{\link{optimize}}. } \item{fit.psi0}{Possible list including preliminary values. } \item{eta}{Only for segmented/stepmented fits: starting values to be passed to \code{etastart} in \code{\link{glm.fit}}. } \item{min.nj}{How many observations (at least) should be in the covariate intervals induced by the breakpoints?} } \details{ Fitting a `segmented' GLM model is attained via fitting iteratively standard GLMs. The number of (outer) iterations is governed by \code{it.max}, while the (maximum) number of (inner) iterations to fit the GLM at each fixed value of psi is fixed via \code{maxit.glm}. Usually three-four inner iterations may be sufficient. When the starting value for the breakpoints is set to \code{NA} for any segmented variable specified in \code{seg.Z}, \code{K} values (quantiles or equally-spaced) are selected as starting values for the breakpoints. %In this case, it may be useful to set also \code{fix.npsi=FALSE} to automate the procedure, see Muggeo and Adelfio (2011). %The maximum number of iterations (\code{it.max}) should be also increased when the `automatic' procedure is used. % If \code{last=TRUE}, the object resulting from \code{segmented.lm} (or \code{segmented.glm}) is a % list of fitted GLM; the i-th model is the segmented model with the values of the breakpoints at the i-th %iteration. Since version 0.2-9.0 \code{segmented} implements the bootstrap restarting algorithm described in Wood (2001). The bootstrap restarting is expected to escape the local optima of the objective function when the segmented relationship is noisy and the loglikelihood can be flat. Notice bootstrap restart runs \code{n.boot} iterations regardless of \code{tol} that only affects convergence within the inner loop. } \value{ A list with the arguments as components. } \references{ Muggeo, V.M.R., Adelfio, G. (2011) Efficient change point detection in genomic sequences of continuous measurements. \emph{Bioinformatics} \bold{27}, 161--166. Wood, S. N. (2001) Minimizing model fitting objectives that contain spurious local minima by bootstrap restarting. \emph{Biometrics} \bold{57}, 240--244. } \author{ Vito Muggeo } %\note{ ~~further notes~~ % ~Make other sections like Warning with \section{Warning }{....} ~ %} %\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ #decrease the maximum number inner iterations and display the #evolution of the (outer) iterations #seg.control(display = TRUE, maxit.glm=4) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } segmented/man/globTempAnom.Rd0000644000176200001440000000203014563642202015652 0ustar liggesusers\name{globTempAnom} \alias{globTempAnom} \docType{data} \title{ Global temperature anomalies 1850-2023} \description{ The \code{globTempAnom} data frame includes the global surface temperature anomalies from 1850 to 2023. } \usage{data(globTempAnom)} \format{ The included variables are (clearly). \describe{ \item{\code{Year}}{the calendar year.} \item{\code{Anomaly}}{the temperature anomalies computed as differences of the annual (average) measurement with respect to the 20th century average (1901-2000). } } } \details{ Data refer to averages measurements referring to land and ocean surface of Northern and Southern hemisphere. } \source{ https://www.ncei.noaa.gov/access/monitoring/global-temperature-anomalies/anomalies } \references{ There are several references using such dataset, e.g. Cahill, N., Rahmstorf, S., and Parnell, A. C. (2015). Change points of global temperature. \emph{Environmental Research Letters}, 10: 1-6. } \examples{ data(globTempAnom) } \keyword{datasets} segmented/man/summary.stepmented.Rd0000644000176200001440000001235414606213143017140 0ustar liggesusers\name{summary.stepmented} \alias{summary.stepmented} \alias{print.summary.stepmented} \alias{print.stepmented} \title{ Summarizing model fits for stepmented regression } \description{ summary/print method for class \code{stepmented}. } \usage{ \method{summary}{stepmented}(object, short = FALSE, var.diff = FALSE, p.df="p", .vcov=NULL, ...) \method{print}{summary.stepmented}(x, short=x$short, var.diff=x$var.diff, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"),...) \method{print}{stepmented}(x, digits = max(3, getOption("digits") - 3), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object, x}{ Object of class "stepmented" or a \code{summary.stepmented} object produced by \code{summary.stepmented()}. } \item{short}{ logical indicating if the `short' summary should be printed. } \item{var.diff}{ logical indicating if different error variances should be computed in each interval of the stepmented variable, see Details. If \code{.vcov} is provided, \code{var.diff} is set to \code{FALSE}. } \item{p.df}{ A character as a function of \code{'p'} (number of parameters) and \code{'K'} (number of groups or segments) affecting computations of the group-specific variance (and the standard errors) if \code{var.diff=TRUE}, see Details.} \item{.vcov}{ Optional. The full covariance matrix for the parameter estimates. If provided, standard errors are computed (and displayed) according to this matrix.} %\item{x}{a \code{summary.stepmented} object produced by \code{summary.stepmented()}.} \item{digits}{controls number of digits printed in output.} \item{signif.stars}{logical, should stars be printed on summary tables of coefficients?} \item{\dots}{ further arguments, notably \code{type} to be passed to \code{vcov.stepmented} to compute the standard errors. See \code{\link{vcov.stepmented}}. } } \details{ If \code{short=TRUE} only coefficients of the stepmented relationships are printed. If \code{var.diff=TRUE} and there is only one stepmented variable, different error variances are computed in the intervals defined by the estimated breakpoints of the stepmented variable. For the jth interval with \eqn{n_j}{nj} observations, the error variance is estimated via \eqn{RSS_j/(n_j-p)}{RSSj/(nj-p)}, where \eqn{RSS_j} is the residual sum of squares in interval j, and \eqn{p}{p} is the number of model parameters. This number to be subtracted from \eqn{n_j}{nj} can be changed via argument \code{p.df}. For instance \code{p.df="0"} uses \eqn{RSS_j/(n_j)}{RSSj/(nj)}, and \code{p.df="p/K"} leads to \eqn{RSS_j/(n_j-p/K)}{RSSj/(nj-p/K)}, where \eqn{K}{K} is the number of groups (segments), and \eqn{p/K}{p/K} can be interpreted as the average number of model parameter in that group. Note \code{var.diff=TRUE} only affects the estimates covariance matrix. It does \emph{not} affect the parameter estimates, neither the log likelihood and relevant measures, such as AIC or BIC. In other words, \code{var.diff=TRUE} just provides 'alternative' standard errors, probably appropriate when the error variances are different before/after the estimated breakpoints. Also \eqn{p-values}{p-values} are computed using the t-distribution with 'naive' degrees of freedom (as reported in \code{object$df.residual}). If \code{var.diff=TRUE} the variance-covariance matrix of the estimates is computed via the sandwich formula, \deqn{(X^TX)^{-1}X^TVX(X^TX)^{-1}}{(X'X)^{-1}X'VX(X'X)^{-1}} where V is the diagonal matrix including the different group-specific error variance estimates. Standard errors are the square root of the main diagonal of this matrix. } \value{ A list (similar to one returned by \code{stepmented.lm} or \code{stepmented.glm}) with additional components: \item{psi }{estimated break-points and relevant (approximate) standard errors} \item{Ttable }{estimates and standard errors of the model parameters. This is similar to the matrix \code{coefficients} returned by \code{summary.lm} or \code{summary.glm}, but without the rows corresponding to the breakpoints. Even the p-values relevant to the difference-in-slope parameters have been replaced by NA, since they are meaningless in this case, see \code{\link{davies.test}}.} \item{cov.var.diff}{if \code{var.diff=TRUE}, the covaraince matrix accounting for heteroscedastic errors.} \item{sigma.new}{if \code{var.diff=TRUE}, the square root of the estimated error variances in each interval.} \item{df.new}{if \code{var.diff=TRUE}, the residual degrees of freedom in each interval.} } %\references{ ~put references to the literature/web site here ~ } \author{ Vito M.R. Muggeo } \section{Warning }{If \code{type} is not specified in \code{...} (which means \code{type="standard"}), no standard error will be computed (and returned) for the jumpoint. } \seealso{ \code{\link{pscore.test}}} \examples{ ##continues example from stepmented() # summary(stepmented.model,short=TRUE) ## an heteroscedastic example.. # set.seed(123) # n<-100 # x<-1:n/n # y<- -x+1.5*pmax(x-.5,0)+rnorm(n,0,1)*ifelse(x<=.5,.4,.1) # o<-lm(y~x) # oseg<-stepmented(o,seg.Z=~x,psi=.6) # summary(oseg,var.diff=TRUE)$sigma.new } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } segmented/man/segreg.Rd0000644000176200001440000001315614757574553014577 0ustar liggesusers\name{segreg} \alias{segreg} \alias{stepreg} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fitting segmented/stepmented regression } \description{ \code{segreg} (\code{stepreg}) fits (generalized) linear segmented (stepmented) regression via a symbolic description of the linear predictor. This is an alternative but equivalent function, introduced since version 2.0-0 (segreg) and 2.1-0 (stepreg), to \code{segmented.(g)lm} or \code{stepmented.(g)lm}. } \usage{ segreg(formula, data, subset, weights, na.action, family = lm, control = seg.control(), transf = NULL, contrasts = NULL, model = TRUE, x = FALSE, var.psi = TRUE, ...) stepreg(formula, data, subset, weights, na.action, family = lm, control = seg.control(), transf = NULL, contrasts = NULL, model = TRUE, x = FALSE, var.psi = FALSE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{formula}{ A standard model formula also including one or more 'segmented'/'stepmented' terms via the function \code{\link{seg}} } \item{data}{ The possible dataframe where the variables are stored } \item{subset}{ Possible subset, as in \code{\link{lm}} or \code{\link{glm}} } \item{weights}{ Possible weight vector, see \code{weights} in \code{\link{lm}} or \code{\link{glm}} } \item{na.action}{ a function which indicates what happen when the data contain NA values. See \code{na.action} in \code{\link{lm}} or \code{\link{glm}}. } \item{family}{ The family specification, similar to \code{family} in \code{\link{glm}}. Default to \code{'lm'} for segmented/stepmented linear models. } \item{control}{ See \code{\link{seg.control}} } \item{transf}{ an optional character string (with "y" as argument) meaning a function to apply to the response variable before fitting } \item{contrasts}{ see \code{contrasts} in \code{\link{glm}} } \item{model}{ If \code{TRUE}, the model frame is returned. } \item{x}{ If \code{TRUE}, the model matrix is returned. } \item{var.psi}{ logical, meaning if the standard errors for the breakpoint estimates should be returned in the object fit. If \code{FALSE}, the standard errors will be computed by \code{\link{vcov.segmented}} or \code{\link{summary.segmented}}. Setting \code{var.psi=FALSE} could speed up model estimation for very large datasets. Default to \code{TRUE} for \code{segreg} and \code{FALSE} for \code{stepreg}. } \item{\dots}{ Ignored } } \details{ The function allows to fit segmented/stepmented (G)LM regression models using a formula interface. Results will be the same of those coming from the traditional \code{segmented.lm} and \code{segmented.glm} (or \code{stepmented.lm} or \code{stepmented.glm}), but there are some additional facilities: i) it is possible to estimate strightforwardly the segmented/stepmented relationships in each level of a categorical variable, see argument \code{by} in \code{\link{seg}}; ii) it is possible to constrain some slopes of the segmented relationship, see argument \code{est} or \code{R} in \code{\link{seg}}. See \code{\link{segmented}} and \code{\link{stepmented}} for some details on the fit objects. } \value{ An object of class "segmented" (or "stepmented") which inherits from the class "lm" or "glm" depending on \code{family} specification. See \code{\link{segmented.lm}}. } \references{ Muggeo, V.M.R. (2003) Estimating regression models with unknown break-points. Statistics in Medicine 22, 3055-3071. } \author{ Vito Muggeo } \note{ When the formula includes even a single segmented term with constraints (specified via the argument \code{est} in \code{seg()}), the relevant coefficients returned do not represent the slope differences as in \code{segmented.lm} or \code{segmented.glm}. The values depend on the constraints and are not usually interpretable. Use \code{\link{slope}} the recover the actual slopes of the segmented relationships. } \section{Warning }{ Currently for fits returned by \code{segreg}, \code{\link{confint.segmented}} only works if \code{method="delta"}. Constraints on the mean levels (possibly via argument 'est' of \code{seg}) are not yet allowed when calling \code{stepreg}. } \seealso{ \code{\link{seg}}, \code{\link{segmented}}, \code{\link{stepmented}} } \examples{ ########################### #An example using segreg() ########################### set.seed(10) x<-1:100 z<-runif(100) w<-runif(100,-10,-5) y<-2+1.5*pmax(x-35,0)-1.5*pmax(x-70,0)+10*pmax(z-.5,0)+rnorm(100,0,2) ##the traditional approach out.lm<-lm(y~x+z+w) o<-segmented(out.lm, seg.Z=~x+z, psi=list(x=c(30,60),z=.4)) o1<-segreg(y ~ w+seg(x,npsi=2)+seg(z)) all.equal(fitted(o), fitted(o1)) #put some constraints on the slopes o2<-segreg(y ~ w+seg(x,npsi=2, est=c(0,1,0))+seg(z)) o3<-segreg(y ~ w+seg(x,npsi=2, est=c(0,1,0))+seg(z, est=c(0,1))) slope(o2) slope(o3) ##see ?plant for an additional example ########################### #An example using stepreg() ########################### ### Two stepmented covariates (with 1 and 2 breakpoints) n=100 x<-1:n/n z<-runif(n,2,5) w<-rnorm(n) mu<- 2+ 1*(x>.6)-2*(z>3)+3*(z>4) y<- mu + rnorm(n)*.8 os <-stepreg(y~seg(x)+seg(z,2)+w) #also includes 'w' as a possible linear term os summary(os) plot(os, "z", col=2:4) #plot the effect of z } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/plot.stepmented.Rd0000644000176200001440000001272514666315154016436 0ustar liggesusers\name{plot.stepmented} \alias{plot.stepmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Plot method for stepmented objects } \description{ Takes a fitted \code{stepmented} object returned by \code{stepmented()} and plots (or adds) the fitted piecewise constant lines for the selected stepmented term. } \usage{ \method{plot}{stepmented}(x, term, add = FALSE, res = TRUE, conf.level=0, interc = TRUE, add.fx = FALSE, psi.lines = TRUE, link=TRUE, const=NULL, res.col=grey(.15, alpha = .4), surf=FALSE, zero.cor=TRUE, heurs=TRUE, shade=FALSE, se.type=c("cdf","abs","none"), k=NULL, .vcov=NULL, leg="topleft", ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a fitted \code{stepmented} object. } \item{term}{ the stepmented variable having the piece-wise constant relationship to be plotted. If there is a single stepmented variable in the fitted model \code{x}, \code{term} can be omitted. } \item{add}{ when \code{TRUE} the fitted lines are added to the current device. } \item{res}{ when \code{TRUE} the fitted lines are plotted along with corresponding partial residuals. } \item{conf.level}{ the confidence level for the pointwise confidence intervals for the expected values. } \item{interc}{ if \code{TRUE} the computed components include the model intercept (if it exists). } \item{add.fx}{ logical. If TRUE and the object fit also includes an additional term for the same stepmented variable, the plot also portrays such `additional' term. } \item{psi.lines}{ if \code{TRUE} vertical lines corresponding to the estimated changepoints are also drawn } \item{link}{ if \code{FALSE} the fitted lines (and possibily the residuals) are reported on the response scale. Ignored if the fit object \code{x} is not a glm-like fit. } \item{const}{ constant to add to each fitted segmented relationship (on the scale of the linear predictor) before plotting. If \code{const=NULL} and the fit includes a segmented interaction term (obtained via \code{seg(..,by)} in the formula), the group-specific intercept is included. } \item{res.col}{when \code{res=TRUE} it means the color of the points representing the partial residuals.} \item{surf}{ if the object fit \code{x} includes 2 stepmented covariates (x1 and x2, say) with relevant estimated breakpoints, \code{surf=TRUE} will draw on the plane x1-x2 the areas splitted according to the estimated breakpoints with corresponding estimated means superimposed. } \item{zero.cor}{ see \code{zero.cor} in \code{\link{vcov.stepmented}}; effective only if \code{conf.level>0}. } \item{heurs}{ logical; if \code{TRUE}, heuristic (usually somewhat conservative) confidence intervals are computed and plotted; effective only if \code{conf.level>0}. } \item{shade}{ if \code{TRUE} the pointwise confidence intervals are portrayed via shaded area; effective only if \code{conf.level>0}. } \item{se.type}{ which standard errors should be computed? see \code{type} in \code{\link{vcov.stepmented}}; effective only if \code{conf.level>0}. } \item{k}{ The value to be passed to \code{vcov.stepmented} to computed the standard errors. } \item{.vcov}{ The estimate var-covariance matrix; if \code{NULL}, it is computed internally by \code{\link{vcov.stepmented}}. } \item{leg}{ If the plot refers to stepmented relationships in groups, i.e. \code{term} has been specified as a vector, a legend is placed at the specified \code{leg} position. Put \code{NA} not to draw the legend. } \item{\dots}{ other graphics parameters to pass to plotting commands: `col', `lwd' and `lty' (that can be vectors and are recycled if necessary, see the example below) for the fitted piecewise constant lines; `ylab', `xlab', `main', `sub', `cex.axis', `cex.lab', `xlim' and `ylim' when a new plot is produced (i.e. when \code{add=FALSE}); `pch' and `cex' for the partial residuals (when \code{res=TRUE}, \code{res.col} is for the color).% \code{col.shade} for the shaded regions (provided that \code{shade=TRUE} and \code{conf.level>0}). } } \details{ Produces (or adds to the current device) the fitted step-function like relationship between the response and the selected \code{term}. If the fitted model includes just a single `stepmented' variable, \code{term} may be omitted. If \code{surf=TRUE}, and \code{res=TRUE} the point widths are proportional to the partial residual values. } \value{ None. } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Vito M. R. Muggeo } \note{ Implementation of confidence intervals for the conditional means in stepmented regression is under development; \code{conf.level>0} should be used with care, especially with multiple jumpoints. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ See Also as \code{\link{stepmented}} } \examples{ #Following code in stepmented.. \dontrun{ par(mfrow=c(1,3)) plot(os,"x") plot(os,"z") plot(os,"z", add.fx=TRUE, psi.lines=FALSE ) lines(os, "z") #display the 'surface' par(mfrow=c(1,3)) plot(os, surf=TRUE, col=1, res.col=2) plot(os, surf=TRUE, lty=2) plot(x,z) plot(os, surf=TRUE, add=TRUE, col=4, res=FALSE) } } \keyword{ regression } \keyword{ nonlinear } \keyword{ hplot } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/summary.segmented.Rd0000644000176200001440000001165614415476772016770 0ustar liggesusers\name{summary.segmented} \alias{summary.segmented} \alias{print.summary.segmented} \title{ Summarizing model fits for segmented regression } \description{ summary method for class \code{segmented}. } \usage{ \method{summary}{segmented}(object, short = FALSE, var.diff = FALSE, p.df="p", .vcov=NULL, ...) \method{print}{summary.segmented}(x, short=x$short, var.diff=x$var.diff, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"),...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ Object of class "segmented". } \item{short}{ logical indicating if the `short' summary should be printed. } \item{var.diff}{ logical indicating if different error variances should be computed in each interval of the segmented variable, see Details. If \code{.vcov} is provided, \code{var.diff} is set to \code{FALSE}. } \item{p.df}{ A character as a function of \code{'p'} (number of parameters) and \code{'K'} (number of groups or segments) affecting computations of the group-specific variance (and the standard errors) if \code{var.diff=TRUE}, see Details.} \item{.vcov}{ Optional. The full covariance matrix for the parameter estimates. If provided, standard errors are computed (and displayed) according to this matrix.} \item{x}{a \code{summary.segmented} object produced by \code{summary.segmented()}.} \item{digits}{controls number of digits printed in output.} \item{signif.stars}{logical, should stars be printed on summary tables of coefficients?} \item{\dots}{ further arguments. } } \details{ If \code{short=TRUE} only coefficients of the segmented relationships are printed. If \code{var.diff=TRUE} and there is only one segmented variable, different error variances are computed in the intervals defined by the estimated breakpoints of the segmented variable. For the jth interval with \eqn{n_j}{nj} observations, the error variance is estimated via \eqn{RSS_j/(n_j-p)}{RSSj/(nj-p)}, where \eqn{RSS_j} is the residual sum of squares in interval j, and \eqn{p}{p} is the number of model parameters. This number to be subtracted from \eqn{n_j}{nj} can be changed via argument \code{p.df}. For instance \code{p.df="0"} uses \eqn{RSS_j/(n_j)}{RSSj/(nj)}, and \code{p.df="p/K"} leads to \eqn{RSS_j/(n_j-p/K)}{RSSj/(nj-p/K)}, where \eqn{K}{K} is the number of groups (segments), and \eqn{p/K}{p/K} can be interpreted as the average number of model parameter in that group. Note \code{var.diff=TRUE} only affects the estimates covariance matrix. It does \emph{not} affect the parameter estimates, neither the log likelihood and relevant measures, such as AIC or BIC. In other words, \code{var.diff=TRUE} just provides 'alternative' standard errors, probably appropriate when the error variances are different before/after the estimated breakpoints. Also \eqn{p-values}{p-values} are computed using the t-distribution with 'naive' degrees of freedom (as reported in \code{object$df.residual}). If \code{var.diff=TRUE} the variance-covariance matrix of the estimates is computed via the sandwich formula, \deqn{(X^TX)^{-1}X^TVX(X^TX)^{-1}}{(X'X)^{-1}X'VX(X'X)^{-1}} where V is the diagonal matrix including the different group-specific error variance estimates. Standard errors are the square root of the main diagonal of this matrix. } \value{ A list (similar to one returned by \code{segmented.lm} or \code{segmented.glm}) with additional components: \item{psi }{estimated break-points and relevant (approximate) standard errors} \item{Ttable }{estimates and standard errors of the model parameters. This is similar to the matrix \code{coefficients} returned by \code{summary.lm} or \code{summary.glm}, but without the rows corresponding to the breakpoints. Even the p-values relevant to the difference-in-slope parameters have been replaced by NA, since they are meaningless in this case, see \code{\link{davies.test}}.} \item{gap}{estimated coefficients, standard errors and t-values for the `gap' variables} \item{cov.var.diff}{if \code{var.diff=TRUE}, the covaraince matrix accounting for heteroscedastic errors.} \item{sigma.new}{if \code{var.diff=TRUE}, the square root of the estimated error variances in each interval.} \item{df.new}{if \code{var.diff=TRUE}, the residual degrees of freedom in each interval.} } %\references{ ~put references to the literature/web site here ~ } \author{ Vito M.R. Muggeo } \seealso{ \code{\link{print.segmented}}, \code{\link{davies.test}} } \examples{ ##continues example from segmented() # summary(segmented.model,short=TRUE) ## an heteroscedastic example.. # set.seed(123) # n<-100 # x<-1:n/n # y<- -x+1.5*pmax(x-.5,0)+rnorm(n,0,1)*ifelse(x<=.5,.4,.1) # o<-lm(y~x) # oseg<-segmented(o,seg.Z=~x,psi=.6) # summary(oseg,var.diff=TRUE)$sigma.new } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } segmented/man/draw.history.Rd0000644000176200001440000000333514605760537015745 0ustar liggesusers\name{draw.history} \alias{draw.history} %- Also NEED an '\alias' for EACH other topic documented here. \title{ History for the breakpoint estimates } \description{ Displays breakpoint iteration values for segmented fits. } \usage{ draw.history(obj, term, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obj}{ a segmented fit returned by any "segmented" method. } \item{term}{ a character to mean the `segmented' variable whose breakpoint values throughout iterations have to be displayed. } \item{\dots}{ graphic parameters to be passed to \code{matplot()}. } } \details{ For a given \code{term} in a segmented fit, \code{draw.history()} produces two plots. On the left panel it displays the different breakpoint values obtained during the estimating process, since the starting values up to the final ones, while on the right panel the objective values at different iterations. When bootstrap restarting is employed, \code{draw.history()} produces two plots, the values of objective function and the number of distinct solutions against the bootstrap replicates. } \value{ None. } %\references{ } \author{ Vito M.R. Muggeo } %\note{ ~~further notes~~ % ~Make other sections like Warning with \section{Warning }{....} ~ %} %\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ data(stagnant) os<-segmented(lm(y~x,data=stagnant),seg.Z=~x,psi=-.8) # draw.history(os) #diagnostics with boot restarting os<-segmented(lm(y~x,data=stagnant),seg.Z=~x,psi=-.8, control=seg.control(n.boot=0)) # draw.history(os) #diagnostics without boot restarting } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/lines.segmented.Rd0000644000176200001440000000634514523360336016370 0ustar liggesusers\name{lines.segmented} \alias{lines.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Bars for interval estimate of the breakpoints } \description{ Draws bars relevant to breakpoint estimates (point estimate and confidence limits) on the current device } \usage{ \method{lines}{segmented}(x, term, bottom = TRUE, shift=FALSE, conf.level = 0.95, k = 50, pch = 18, rev.sgn = FALSE, .vcov=NULL, .coef=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ an object of class \code{segmented}. } \item{term}{ the segmented variable of the breakpoints being drawn. It may be unspecified when there is a single segmented variable.} \item{bottom}{ logical, indicating if the bars should be plotted at the bottom (\code{TRUE}) or at the top (\code{FALSE}).} \item{shift}{ logical, indicating if the bars should be `shifted' on the y-axis before plotting. Useful for multiple breakpoints with overlapped confidence intervals.} \item{conf.level}{ the confidence level of the confidence intervals for the breakpoints. } \item{k}{ a positive integer regulating the vertical position of the drawn bars. See Details. } \item{pch}{ either an integer specifying a symbol or a single character to be used in plotting the point estimates of the breakpoints. See \code{\link{points}}. } \item{rev.sgn}{ should the signs of the breakpoint estimates be changed before plotting? see Details. } \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov(x)}.} \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef(x)}.} \item{\dots}{ further arguments passed to \code{\link{arrows}}, for instance `col' that can be a vector. } } \details{ \code{lines.segmented} simply draws on the current device the point estimates and relevant confidence limits of the estimated breakpoints from a "segmented" object. The y coordinate where the bars are drawn is computed as \code{usr[3]+h} if \code{bottom=TRUE} or \code{usr[4]-h} when \code{bottom=FALSE}, where \code{h=(usr[4]-usr[3])/abs(k)} and \code{usr} are the extremes of the user coordinates of the plotting region. Therefore for larger values of \code{k} the bars are plotted on the edges. The argument \code{rev.sgn} allows to change the sign of the breakpoints before plotting. This may be useful when a null-right-slope constraint is set. } %\value{ % ~Describe the value returned % If it is a LIST, use % \item{comp1 }{Description of 'comp1'} % \item{comp2 }{Description of 'comp2'} % ... %} %\references{ ~put references to the literature/web site here ~ } %\author{ ~~who you are~~ } %\note{ ~~further notes~~ % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{plot.segmented}} to plot the fitted segmented lines, and \code{\link{points.segmented}} to add the fitted joinpoints. } \examples{ ## See ?plot.segmented } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/confint.segmented.lme.Rd0000644000176200001440000000442514415476766017506 0ustar liggesusers\name{confint.segmented.lme} \alias{confint.segmented.lme} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Confidence intervals in segmented mixed models } \description{ Computes confidence intervals for all regression parameters, including the the breakpoint, in a fitted `segmented mixed' model. } \usage{ \method{confint}{segmented.lme}(object, parm, level = 0.95, obj.boot, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ A fit object returned by \code{\link{segmented.lme}}. } \item{parm}{ A vector of numbers indicating which parameters should be considered. If missing all parameters. } \item{level}{ The confidence level. } \item{obj.boot}{ The possible list including the bootstrap distributions of the regression coefficients. Such list is returned by \code{vcov.segmented.lme(.., ret.b=TRUE)} } \item{\dots}{ if \code{obj.boot} is missing and bootstrap CIs are requested, additional optional arguments, such as \code{B}, \code{seed}, and \code{it.max.b}, to be used in computations of the boot distributions. } } \details{ If \code{obj.boot} is provided or \code{...} includes the argument \code{B>0}, confidence intervals are computed by exploiting the bootstrap distributions. } \value{ A matrix (or a list of matrices if bootstrap ci are requested) including the confidence intervals for the model parameters. } % \references{ % %% ~put references to the literature/web site here ~ % } \author{ Vito Muggeo } %\note{ %% ~~further notes~~ %} \section{Warning }{ All the functions for segmented mixed models (*.segmented.lme) are still at an experimental stage } \seealso{ \code{\link{vcov.segmented.lme}} } \examples{ \dontrun{ confint(os) #asymptotic CI confint(os, B=50) #boot CIs #it is possible to obtain the boot distribution beforehand ob <-vcov(os, B=50, ret.b=TRUE) confint(os, obj.boot=ob) #boot CI } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. segmented/man/predict.segmented.Rd0000644000176200001440000000563114667540472016717 0ustar liggesusers\name{predict.segmented} \alias{predict.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Predict method for segmented model fits } \description{ Returns predictions and optionally associated quantities (standard errors or confidence intervals) from a fitted segmented model object. } \usage{ \method{predict}{segmented}(object, newdata, se.fit=FALSE, interval=c("none","confidence", "prediction"), type = c("link", "response"), na.action=na.omit, level=0.95, .coef=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ a fitted segmented model coming from \code{segmented.(g)lm} or \code{segreg}. } \item{newdata}{ An optional data frame in which to look for variables with which to predict. If omitted, the fitted values are used. } \item{se.fit}{ Logical. Should the standard errors be returned? } \item{interval}{ Which interval? See \code{\link{predict.lm}} } \item{type}{ Predictions on the link or response scale? Only if \code{object} is a segmented glm. } \item{na.action}{ How to deal with missing data, \emph{if} \code{newdata} include them. } \item{level}{ The confidence level. } \item{.coef}{ The regression parameter estimates. If unspecified (i.e. \code{NULL}), it is computed internally by \code{coef()}.} \item{\dots}{ further arguments. } } \details{ Basically \code{predict.segmented} builds the right design matrix accounting for breakpoint and passes it to \code{predict.lm} or \code{predict.glm} depending on the actual model fit \code{object}. } \value{ \code{predict.segmented} produces a vector of predictions with possibly associated standard errors or confidence intervals. See \code{predict.lm} or \code{predict.glm}. } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Vito Muggeo } %\note{ %If \code{type="terms"}, \code{predict.segmented} returns predictions for each component of the segmented term. %Namely if `my.x' is the segmented variable, predictions for `my.x', `U1.my.x' and `psi1.my.x' are returned. These are %meaningless individually, however their sum provides the predictions for the segmented term. %} \section{Warning }{ For segmented glm fits with offset, \code{predict.segmented} returns the fitted values \emph{including} the offset. } \seealso{ \code{\link{segreg}}, \code{\link{segmented}}, \code{\link{plot.segmented}}, \code{\link{broken.line}}, \code{\link{predict.lm}}, \code{\link{predict.glm}} } \examples{ n=10 x=seq(-3,3,l=n) set.seed(1515) y <- (x<0)*x/2 + 1 + rnorm(x,sd=0.15) segm <- segmented(lm(y ~ x), ~ x, psi=0.5) predict(segm,se.fit = TRUE)$se.fit #wrong (smaller) st.errors (assuming known the breakpoint) olm<-lm(y~x+pmax(x-segm$psi[,2],0)) predict(olm,se.fit = TRUE)$se.fit } % \dontrun{..} % KEYWORDS - R documentation directory. \keyword{models} \keyword{regression} segmented/DESCRIPTION0000644000176200001440000000310414760330372013736 0ustar liggesusersPackage: segmented Type: Package Title: Regression Models with Break-Points / Change-Points Estimation (with Possibly Random Effects) Version: 2.1-4 Date: 2025-02-26 Authors@R: c(person(given = c("Vito","M.","R."), family = "Muggeo", role = c("aut", "cre"), email = "vito.muggeo@unipa.it", comment=c(ORCID="0000-0002-3386-4054"))) Maintainer: Vito M. R. Muggeo Description: Fitting regression models where, in addition to possible linear terms, one or more covariates have segmented (i.e., broken-line or piece-wise linear) or stepmented (i.e. piece-wise constant) effects. Multiple breakpoints for the same variable are allowed. The estimation method is discussed in Muggeo (2003, ) and illustrated in Muggeo (2008, ). An approach for hypothesis testing is presented in Muggeo (2016, ), and interval estimation for the breakpoint is discussed in Muggeo (2017, ). Segmented mixed models, i.e. random effects in the change point, are discussed in Muggeo (2014, ). Estimation of piecewise-constant relationships and changepoints (mean-shift models) is discussed in Fasola et al. (2018, ). Depends: R (>= 3.5.0), MASS, nlme License: GPL NeedsCompilation: no Packaged: 2025-02-26 16:34:01 UTC; vito Author: Vito M. R. Muggeo [aut, cre] () RoxygenNote: 7.3.1 Repository: CRAN Date/Publication: 2025-02-28 12:50:02 UTC