segmented/0000755000176200001440000000000013573767742012252 5ustar liggesuserssegmented/NAMESPACE0000644000176200001440000000325713475757752013501 0ustar liggesusersimport(splines) importFrom("grDevices", "grey","adjustcolor") importFrom("graphics", "abline", "axis", "box", "lines", "matlines", "matplot", "par", "plot", "points", "polygon", "segments") importFrom("stats", "approx", "as.formula", "coef", "contrasts", "family", "fitted", "formula", "gaussian", "glm.control", "glm.fit", "is.empty.model", "lm.wfit", "median", "model.frame", "model.matrix", "model.offset", "model.response", "model.weights", "na.omit", "pnorm", "predict", "printCoefmat", "pt", "qnorm", "qt", "quantile", "resid", "residuals", "runif", "summary.glm", "summary.lm", "update", "update.formula", "vcov", "weights", "dnorm", "lm", "lm.fit", "splinefun", "complete.cases","sd","qchisq","pchisq") importFrom("utils", "flush.console") export(segmented, segmented.default, segmented.lm, segmented.glm, segmented.Arima, broken.line ,confint.segmented,davies.test,pscore.test,draw.history,aapc, intercept,lines.segmented,plot.segmented,print.segmented, seg.control,seg.lm.fit,seg.glm.fit,seg.lm.fit.boot,seg.glm.fit.boot, seg.def.fit,seg.def.fit.boot, seg.Ar.fit,seg.Ar.fit.boot, slope, summary.segmented,print.summary.segmented,vcov.segmented, predict.segmented, points.segmented, aapc) S3method(segmented,default) S3method(segmented,lm) S3method(segmented,glm) S3method(segmented,Arima) S3method(plot,segmented) S3method(print,segmented) S3method(summary,segmented) S3method(print, summary.segmented) S3method(lines,segmented) S3method(confint,segmented) S3method(vcov,segmented) S3method(predict,segmented) S3method(points,segmented) segmented/data/0000755000176200001440000000000013572471520013144 5ustar liggesuserssegmented/data/down.R0000644000176200001440000000105612766105416014242 0ustar liggesusers"down"<- data.frame(age = c(17.0, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5, 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5, 42.5, 43.5, 44.5, 45.5, 47.0), births = c(13555, 13675, 18752, 22005, 23896, 24667, 24807, 23986, 22860, 21450, 19202, 17450, 15685, 13954, 11987, 10983, 9825, 8483, 7448, 6628, 5780, 4834, 3961, 2952, 2276, 1589, 1018, 596, 327, 249), cases = c(16, 15, 16, 22, 16, 12, 17, 22, 15, 14, 27, 14, 9, 12, 12, 18, 13, 11, 23, 13, 17, 15, 30, 31, 33, 20, 16, 22, 11, 7)) segmented/data/stagnant.R0000644000176200001440000000060712766105416015113 0ustar liggesusers"stagnant"<- data.frame(x = c(-0.8, 0.01, -0.25, -0.25, -0.12, -0.12, -0.94, 0.11, 0.11, -0.63, -0.63, -1.39, -1.39, 0.7, 0.7, 0.34, 1.19, 0.59, 0.85, 0.85, 0.99, 0.99, 0.25, -1.08, -1.08, 0.44, 0.34, 0.25), y=c(0.9, 0.51, 0.65, 0.67, 0.6, 0.59, 0.92, 0.43, 0.43, 0.81, 0.83, 1.12, 1.12, -0.13, -0.14, 0.25, -0.65, -0.01, -0.3, -0.33, -0.46, -0.43, 0.33, 0.99, 1.03, 0.13, 0.24, 0.3)) segmented/data/stagnant.rda0000644000176200001440000000054013465321572015454 0ustar liggesusers r0b```b`fef`b2Y# '(.ILK+a``c`b/g,D_`8oGVpXc4a'h7ۂ׹la40W4V`djC`߯[aV@3O7g@7@O+ 掷P@@-p%H{W k mK o?,/Cl쁊j9H|CAoּb CA ]yrNb1L9L+%$Q/hr΢r=  segmented/data/down.rda0000644000176200001440000000062113465321572014604 0ustar liggesusersuMKABCDt3K׮Q^7LC}:t.: *,;G7gfYNfjFe9RCNŜC\+1Ym>k=C-GN)8 g`a8`FA!ǐqr9DN!iEC_oKTkOԚ?M2D6TBJ>O1{(P7;iTwM^_Ȅ'~!>=`?eUׇy_Pa:VPuk?+דc?M '|ycϔL0b] H^WOԝGK/~9 +?9~.~,Y=72ws==|) _}(GtNj= :!:zS;"3wcV|2n)jk7CrJc2lL#9 M"3g䨓\k͌ٝp300), the exact Davies (2002) upper bound cannot be computed (as it relies on \code{gamma()} function) and the \emph{approximate} upper bound of Davies (1987) is returned. } %%\section{Warning }{Currently \code{davies.test} does not work if the fitted model \code{ogg} %% does not include the segmented variable \code{term} being tested.} \seealso{See also \code{\link{pscore.test}} which is more powerful, especially when the signal-to-noise ratio is low. } \examples{ \dontrun{ set.seed(20) z<-runif(100) x<-rnorm(100,2) y<-2+10*pmax(z-.5,0)+rnorm(100,0,3) o<-lm(y~z+x) davies.test(o,~z) davies.test(o,~x) o<-glm(y~z+x) davies.test(o,~z) #it works but the p-value is too small.. } } \keyword{ htest } segmented/man/seg.control.Rd0000644000176200001440000002041213573461132015530 0ustar liggesusers\name{seg.control} \alias{seg.control} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Auxiliary for controlling segmented model fitting } \description{ Auxiliary function as user interface for 'segmented' fitting. Typically only used when calling any 'segmented' method (\code{segmented.lm}, \code{segmented.glm}, \code{segmented.Arima} or \code{segmented.default}). } \usage{ seg.control(n.boot=10, display = FALSE, tol = 1e-05, it.max = 30, fix.npsi=TRUE, K = 10, quant = TRUE, maxit.glm = 25, h = 1, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=TRUE, seed=NULL, fn.obj=NULL, digits=NULL, conv.psi=FALSE, alpha=.02, min.step=.0001, powers=c(1,1), last = TRUE, stop.if.error = NULL, gap=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{n.boot}{ number of bootstrap samples used in the bootstrap restarting algorithm. If 0 the standard algorithm, i.e. without bootstrap restart, is used. Default to 10 that appears to be sufficient in most of problems. However when multiple breakpoints have to be estimated it is suggested to increase \code{n.boot}, e.g. \code{n.boot=50}.} \item{display}{ logical indicating if the value of the objective function should be printed (along with current breakpoint estimates) at each iteration or at each bootstrap resample. If bootstrap restarting is employed, the values of objective and breakpoint estimates should not change at the last runs.} \item{tol}{ positive convergence tolerance. } \item{it.max}{ integer giving the maximal number of iterations. } \item{fix.npsi}{logical (it replaces previous argument \code{stop.if.error}) If \code{TRUE} (default) the \emph{number} (and not location) of breakpoints is held fixed throughout iterations. Otherwise a sort of `automatic' breakpoint selection is carried out, provided that several starting values are supplied for the breakpoints, see argument \code{psi} in \code{\link{segmented.lm}} or \code{\link{segmented.glm}}. The idea, relying on removing the `non-admissible' breakpoint estimates at each iteration, is discussed in Muggeo and Adelfio (2011) and it is not compatible with the bootstrap restart algorithm. \code{fix.npsi=FALSE}, indeed, should be considered as a preliminary and tentative approach to deal with an unknown number of breakpoints.} \item{K}{ the number of quantiles (or equally-spaced values) to supply as starting values for the breakpoints when the \code{psi} argument of \code{segmented} is set to \code{NA}. \code{K} is ignored when \code{psi} is different from \code{NA}. } \item{quant}{logical, indicating how the starting values should be selected. If \code{FALSE} equally-spaced values are used, otherwise the quantiles. Ignored when \code{psi} is different from \code{NA}.} \item{maxit.glm}{ integer giving the maximum number of inner IWLS iterations (see details). } \item{h}{ positive factor modifying the increments in breakpoint updates during the estimation process (see details). } \item{size.boot}{the size of the bootstrap samples. If \code{NULL}, it is taken equal to the actual sample size.} \item{jt}{logical. If \code{TRUE} the values of the segmented variable(s) are jittered before fitting the model to the bootstrap resamples.} \item{nonParam}{ if \code{TRUE} nonparametric bootstrap (i.e. case-resampling) is used, otherwise residual-based. Currently working only for LM fits. It is not clear what residuals should be used for GLMs.} \item{random}{ if \code{TRUE}, when the algorithm fails to obtain a solution, random values are employed to obtain candidate values. } \item{seed}{ The seed to be passed on to \code{set.seed()} when \code{n.boot>0}. Setting the seed can be useful to replicate the results when the bootstrap restart algorithm is employed. In fact a segmented fit includes \code{seed} representing the integer vector saved just before the bootstrap resampling. Re-use it if you want to replicate the bootstrap restarting algorithm with the \emph{same} samples. } \item{fn.obj}{ A \emph{character string} to be used (optionally) only when \code{segmented.default} is used. It represents the function (with argument \code{'x'}) to be applied to the fit object to extract the objective function to be \emph{minimized}. Thus for \code{"lm"} fits (although unnecessary) it should be \code{fn.obj="sum(x$residuals^2)"}, for \code{"coxph"} fits it should be \code{fn.obj="-x$loglik[2]"}. If \code{NULL} the `minus log likelihood' extracted from the object, namely \code{"-logLik(x)"}, is used. See \code{\link{segmented.default}}. } \item{digits}{optional. If specified it means the desidered number of decimal points of the breakpoint to be used during the iterative algorithm.} \item{conv.psi}{optional. Should convergence of iterative procedure to be assessed on changes of breakpoint estimates or changes in the objective? Default to FALSE.} \item{alpha}{optional numerical value. The breakpoint is estimated within the quantiles \code{alpha} and \code{1-alpha} of the relevant covariate.} \item{min.step}{optional. The minimum step size to break the iterative algorithm. Default to 0.0001.} \item{powers}{ The powers of the pseudo covariates employed by the algorithm. These can be altered during the iterative process to stabilize the estimation procedure. Usually of no interest for the user. \emph{This argument will be removed in next releases}. } \item{last}{ logical indicating if output should include only the last fitted model. \emph{This argument will be removed in next releases}} \item{stop.if.error}{ same than \code{fix.npsi}. \emph{This argument will be removed in next releases}, and replaced by \code{fix.npsi}. If provided, and different from \code{NULL}, it overwrites \code{fix.npsi}} \item{gap}{logical, if \code{FALSE} the gap coefficients are \emph{always} constrained to zero at the convergence. \emph{This argument will be removed in next releases}.} } \details{ Fitting a `segmented' GLM model is attained via fitting iteratively standard GLMs. The number of (outer) iterations is governed by \code{it.max}, while the (maximum) number of (inner) iterations to fit the GLM at each fixed value of psi is fixed via \code{maxit.glm}. Usually three-four inner iterations may be sufficient. When the starting value for the breakpoints is set to \code{NA} for any segmented variable specified in \code{seg.Z}, \code{K} values (quantiles or equally-spaced) are selected as starting values for the breakpoints. In this case, it may be useful to set also \code{fix.npsi=FALSE} to automate the procedure, see Muggeo and Adelfio (2011). The maximum number of iterations (\code{it.max}) should be also increased when the `automatic' procedure is used. If \code{last=TRUE}, the object resulting from \code{segmented.lm} (or \code{segmented.glm}) is a list of fitted GLM; the i-th model is the segmented model with the values of the breakpoints at the i-th iteration. Since version 0.2-9.0 \code{segmented} implements the bootstrap restarting algorithm described in Wood (2001). The bootstrap restarting is expected to escape the local optima of the objective function when the segmented relationship is flat. Notice bootstrap restart runs \code{n.boot} iterations regardless of \code{tol} that only affects convergence within the inner loop. } \value{ A list with the arguments as components. } \references{ Muggeo, V.M.R., Adelfio, G. (2011) Efficient change point detection in genomic sequences of continuous measurements. \emph{Bioinformatics} \bold{27}, 161--166. Wood, S. N. (2001) Minimizing model fitting objectives that contain spurious local minima by bootstrap restarting. \emph{Biometrics} \bold{57}, 240--244. } \author{ Vito Muggeo } %\note{ ~~further notes~~ % ~Make other sections like Warning with \section{Warning }{....} ~ %} %\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ #decrease the maximum number inner iterations and display the #evolution of the (outer) iterations seg.control(display = TRUE, maxit.glm=4) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } segmented/man/plot.segmented.Rd0000644000176200001440000001563213476152534016240 0ustar liggesusers\name{plot.segmented} \alias{plot.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Plot method for segmented objects } \description{ Takes a fitted \code{segmented} object returned by \code{segmented()} and plots (or adds) the fitted broken-line for the selected segmented term. } \usage{ \method{plot}{segmented}(x, term, add=FALSE, res=FALSE, conf.level=0, interc=TRUE, link=TRUE, res.col=1, rev.sgn=FALSE, const=0, shade=FALSE, rug=!add, dens.rug=FALSE, dens.col = grey(0.8), transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a fitted \code{segmented} object. } \item{term}{ the segmented variable having the piece-wise relationship to be plotted. If there is a single segmented variable in the fitted model \code{x}, \code{term} can be omitted.} \item{add}{ when \code{TRUE} the fitted lines are added to the current device.} \item{res}{ when \code{TRUE} the fitted lines are plotted along with corresponding partial residuals. See Details.} \item{conf.level}{ If greater than zero, it means the confidence level at which the pointwise confidence itervals have to be plotted.} \item{interc}{ If \code{TRUE} the computed segmented components include the model intercept (if it exists).} \item{link}{ when \code{TRUE} (default), the fitted lines are plotted on the link scale, otherwise they are tranformed on the response scale before plotting. Ignored for linear segmented fits. } \item{res.col}{when \code{res=TRUE} it means the color of the points representing the partial residuals.} \item{rev.sgn}{ when \code{TRUE} it is assumed that current \code{term} is `minus' the actual segmented variable, therefore the sign is reversed before plotting. This is useful when a null-constraint has been set on the last slope.} \item{const}{ constant to add to each fitted segmented relationship (on the scale of the linear predictor) before plotting.} \item{shade}{if \code{TRUE} and \code{conf.level>0} it produces shaded regions (in grey color) for the pointwise confidence intervals embracing the fitted segmented line. } \item{rug}{when \code{TRUE} the covariate values are displayed as a rug plot at the foot of the plot. Default is to \code{!add}.} \item{dens.rug}{when \code{TRUE} then smooth covariate distribution is plotted on the x-axis.} \item{dens.col}{if \code{dens.rug=TRUE}, it means the colour to be used to plot the density.} % \item{show.gap}{ when \code{FALSE} the (possible) gaps between the fitted lines at the estimated breakpoints % are hidden. When bootstrap restarting has been employed (default in \code{segmented}), \code{show.gap} is meaningless % as the gap coefficients are always set to zero in the fitted model.} \item{transf}{ A possible function to convert the fitted values before plotting. It is only effective if the fitted values refer to a linear or a generalized linear model (on the link scale) \emph{and} \code{res=FALSE}.} \item{isV}{logical value (to be passed to \code{\link{broken.line}}). Ignored if \code{conf.level=0}} \item{is}{logical value (to be passed to \code{\link{broken.line}}) indicating if the covariance matrix based on the induced smoothing should be used. Ignored if \code{conf.level=0}} \item{var.diff}{logical value to be passed to \code{\link{summary.segmented}} to compute dthe standard errors of fitted values (if \code{conf.level>0}).} \item{p.df}{ degrees of freedom when \code{var.diff=TRUE}, see \code{\link{summary.segmented}}} \item{.vcov}{ The \emph{full} covariance matrix of estimates to be used when \code{conf.level>0}. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov.segmented}.} \item{\dots}{ other graphics parameters to pass to plotting commands: `col', `lwd' and `lty' (that can be vectors, see the example below) for the fitted piecewise lines; `ylab', `xlab', `main', `sub', `cex.axis', `cex.lab', `xlim' and `ylim' when a new plot is produced (i.e. when \code{add=FALSE}); `pch' and `cex' for the partial residuals (when \code{res=TRUE}); \code{col.shade} for the shaded regions (provided that \code{shade=TRUE} and \code{conf.level>0}). } } \details{ Produces (or adds to the current device) the fitted segmented relationship between the response and the selected \code{term}. If the fitted model includes just a single `segmented' variable, \code{term} may be omitted. %Due to the parameterization of the segmented terms, sometimes %the fitted lines may not appear to join at the estimated breakpoints. If this is the case, the apparent %`gap' would indicate some lack-of-fit. However, since version 0.2-9.0, the gap coefficients are set to zero by default %(see argument \code{gap} in in \code{\link{seg.control}}). The partial residuals are computed as `fitted + residuals', where `fitted' are the fitted values of the segmented relationship relevant to the covariate specified in \code{term}. Notice that for GLMs the residuals are the response residuals if \code{link=FALSE} and the working residuals if \code{link=TRUE}. %weighted by the IWLS weights [fino alla versione 0.5-2.0 i workRes were weighted by the IWLS weights] } \value{ None. } %\references{ } \author{ Vito M. R. Muggeo } \note{ For models with offset, partial residuals on the response scale are not defined. Thus \code{plot.segmented} does not work when \code{link=FALSE}, \code{res=TRUE}, and the fitted model includes an offset.} % % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{segmented}} to fit the model, \code{\link{lines.segmented}} to add the estimated breakpoints on the current plot. \code{\link{points.segmented}} to add the joinpoints of the segmented relationship. \code{\link{predict.segmented}} to compute standard errors and confidence intervals for predictions from a "segmented" fit. } \examples{ set.seed(1234) z<-runif(100) y<-rpois(100,exp(2+1.8*pmax(z-.6,0))) o<-glm(y~z,family=poisson) o.seg<-segmented(o, ~z) #single segmented covariate and one breakpoint:'psi' can be omitted par(mfrow=c(2,1)) plot(o.seg, conf.level=0.95, shade=TRUE) points(o.seg, link=TRUE, col=2) ## new plot plot(z,y) ## add the fitted lines using different colors and styles.. plot(o.seg,add=TRUE,link=FALSE,lwd=2,col=2:3, lty=c(1,3)) lines(o.seg,col=2,pch=19,bottom=FALSE,lwd=2) #for the CI for the breakpoint points(o.seg,col=4, link=FALSE) ## using the options 'is', 'isV', 'shade' and 'col.shade'. par(mfrow=c(1,2)) plot(o.seg, conf.level=.9, is=TRUE, isV=TRUE, col=1, shade = TRUE, col.shade=2) plot(o.seg, conf.level=.9, is=TRUE, isV=FALSE, col=2, shade = TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } \keyword{ hplot }segmented/man/confint.segmented.Rd0000644000176200001440000001013413500410154016671 0ustar liggesusers\name{confint.segmented} \alias{confint.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Confidence intervals for breakpoints} \description{ Computes confidence intervals for the breakpoints in a fitted `segmented' model. } \usage{ \method{confint}{segmented}(object, parm, level=0.95, method=c("delta", "score", "gradient"), rev.sgn=FALSE, var.diff=FALSE, is=FALSE, digits=max(4, getOption("digits") - 1), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{a fitted \code{segmented} object. } \item{parm}{the segmented variable of interest. If missing the first segmented variable in \code{object} is considered. } \item{level}{the confidence level required, default to 0.95.} \item{method}{which confidence interval should be computed. One of \code{"delta"}, \code{"score"}, or \code{"gradient"}. Can be abbreviated.} \item{rev.sgn}{vector of logicals. The length should be equal to the length of \code{parm}; recycled otherwise. when \code{TRUE} it is assumed that the current \code{parm} is `minus' the actual segmented variable, therefore the sign is reversed before printing. This is useful when a null-constraint has been set on the last slope.} \item{var.diff}{logical. If \code{method="delta"}, and there is a single segmented variable, \code{var.diff=TRUE} leads to standard errors based on sandwich-type formula of the covariance matrix. See Details in \code{\link{summary.segmented}}.} \item{is}{logical. If \code{method="delta"}, \code{is=TRUE} means that the full covariance matrix is computed via \code{vcov(.., is=TRUE)}} \item{digits}{controls the number of digits to print when returning the output. } \item{\dots}{additional parameters referring to Score-based confidence intervals, such as \code{"h"}, \code{"d.h"}, \code{"bw"}, \code{"msgWarn"}, and \code{"n.values"} specifying the number of points used to profile the Score (or Gradient) statistic.} } \details{ \code{confint.segmented} computes confidence limits for the breakpoints. Currently there are three options, see argument \code{method}. \code{method="delta"} uses the standard error coming from the Delta method for the ratio of two random variables. This value is an approximation (slightly) better than the one reported in the `psi' component of the list returned by any \code{segmented} method. The resulting confidence intervals are based on the asymptotic Normal distribution of the breakpoint estimator which is reliable just for clear-cut kink relationships. See Details in \code{\link{segmented}}. \cr \code{method="score"} or \code{method="gradient"} compute the confidence interval via profiling the Score or the Gradient statistics smoothed out by the induced smoothing paradigm, as discussed in the reference below. } \value{ A matrix including point estimate and confidence limits of the breakpoint(s) for the segmented variable possibly specified in \code{parm}. } \references{ Muggeo, V.M.R. (2017) Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach. \emph{Australian & New Zealand Journal of Statistics} \bold{59}, 311--322. } \author{ Vito M.R. Muggeo } \note{ Currently \code{method="score"} or \code{method="gradient"} only works for segmented \emph{linear} model. For segmented \emph{generalized linear} model, currently only \code{method="delta"} is available. } % % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{segmented}} and \code{\link{lines.segmented}} to plot the estimated breakpoints with corresponding confidence intervals. } \examples{ set.seed(10) x<-1:100 z<-runif(100) y<-2+1.5*pmax(x-35,0)-1.5*pmax(x-70,0)+10*pmax(z-.5,0)+rnorm(100,0,2) out.lm<-lm(y~x) o<-segmented(out.lm,seg.Z=~x+z,psi=list(x=c(30,60),z=.4)) confint(o) #delta CI for the 1st variable confint(o, "x", method="score") confint(o, "z", method="g") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/predict.segmented.Rd0000644000176200001440000000443313471247140016702 0ustar liggesusers\name{predict.segmented} \alias{predict.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Predict method for segmented model fits } \description{ Returns predictions and optionally associated quantities (standard errors or confidence intervals) from a fitted segmented model object. } \usage{ \method{predict}{segmented}(object, newdata, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ a fitted segmented model coming from \code{segmented.lm} or \code{segmented.glm}. } \item{newdata}{ An optional data frame in which to look for variables with which to predict. If omitted, the fitted values are used. } \item{\dots}{ further arguments passed to \code{predict.lm} or \code{predict.glm}. Usually these are \code{se.fit}, or \code{interval} or \code{type}. } } \details{ Basically \code{predict.segmented} builds the right design matrix accounting for breakpoint and passes it to \code{predict.lm} or \code{predict.glm} depending on the actual model fit \code{object}. } \value{ \code{predict.segmented} produces a vector of predictions with possibly associated standard errors or confidence intervals. See \code{predict.lm} or \code{predict.glm}. } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Vito Muggeo } \note{ If \code{type="terms"}, \code{predict.segmented} returns predictions for each component of the segmented term. Namely if `my.x' is the segmented variable, predictions for `my.x', `U1.my.x' and `psi1.my.x' are returned. These are meaningless individually, however their sum provides the predictions for the segmented term. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{segmented}}, \code{\link{plot.segmented}}, \code{\link{broken.line}}, \code{\link{predict.lm}}, \code{\link{predict.glm}} } \examples{ n=10 x=seq(-3,3,l=n) set.seed(1515) y <- (x<0)*x/2 + 1 + rnorm(x,sd=0.15) segm <- segmented(lm(y ~ x), ~ x, psi=0.5) predict(segm,se.fit = TRUE)$se.fit #wrong (smaller) st.errors (assuming known the breakpoint) olm<-lm(y~x+pmax(x-segm$psi[,2],0)) predict(olm,se.fit = TRUE)$se.fit } % \dontrun{..} % KEYWORDS - R documentation directory. \keyword{models} \keyword{regression} segmented/man/slope.Rd0000644000176200001440000000727513500155044014421 0ustar liggesusers\name{slope} \alias{slope} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Slope estimates from segmented relationships } \description{ Computes the slopes of each `segmented' relationship in the fitted model. } \usage{ slope(ogg, parm, conf.level = 0.95, rev.sgn=FALSE, APC=FALSE, .vcov=NULL, ..., digits = max(4, getOption("digits") - 2)) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ogg}{ an object of class "segmented", returned by any \code{segmented} method. } \item{parm}{ the segmented variable whose slopes have to be computed. If missing all the segmented variables are considered. } \item{conf.level}{ the confidence level required. } \item{rev.sgn}{vector of logicals. The length should be equal to the length of \code{parm}, but it is recycled otherwise. When \code{TRUE} it is assumed that the current \code{parm} is `minus' the actual segmented variable, therefore the sign is reversed before printing. This is useful when a null-constraint has been set on the last slope.} \item{APC}{logical. If \code{APC=TRUE} the `annual percent changes', i.e. \eqn{100\times(\exp(\beta)-1)}{100*(exp(b)-1)}, are computed for each interval (\eqn{\beta}{b} is the slope). Only point estimates and confidence intervals are returned.} \item{.vcov}{ The \emph{full} covariance matrix of estimates. If unspecified (i.e. \code{NULL}), the covariance matrix is computed internally by \code{vcov(ogg)}.} \item{...}{ Further arguments to be passed on to \code{vcov.segmented}, such as \code{var.diff} and \code{is}. See Details in \code{\link{vcov.segmented}} and \code{\link{summary.segmented}}.} \item{digits}{controls number of digits in the returned output.} } \details{ To fit broken-line relationships, \code{segmented} uses a parameterization whose coefficients are not the slopes. Therefore given an object \code{"segmented"}, \code{slope} computes point estimates, standard errors, t-values and confidence intervals of the slopes of each segmented relationship in the fitted model. } \value{ \code{slope} returns a list of matrices. Each matrix represents a segmented relationship and its number of rows equal to the number of segments, while five columns summarize the results. } \references{ Muggeo, V.M.R. (2003) Estimating regression models with unknown break-points. \emph{Statistics in Medicine} \bold{22}, 3055--3071. } \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it} } \note{The returned summary is based on limiting Gaussian distribution for the model parameters involved in the computations. Sometimes, even with large sample sizes such approximations are questionable (e.g., with small difference-in-slope parameters) and the results returned by \code{slope} might be unreliable. Therefore is responsability of the user to gauge the applicability of such asymptotic approximations. Anyway, the t values may be not assumed for testing purposes and they should be used just as guidelines to assess the estimate uncertainty. } \seealso{See also \code{\link{davies.test}} and \code{\link{pscore.test}} to test for a nonzero difference-in-slope parameter. } \examples{ set.seed(16) x<-1:100 y<-2+1.5*pmax(x-35,0)-1.5*pmax(x-70,0)+rnorm(100,0,3) out<-glm(y~1) out.seg<-segmented(out,seg.Z=~x,psi=list(x=c(20,80))) ## the slopes of the three segments.... slope(out.seg) rm(x,y,out,out.seg) # ## an heteroscedastic example.. set.seed(123) n<-100 x<-1:n/n y<- -x+1.5*pmax(x-.5,0)+rnorm(n,0,1)*ifelse(x<=.5,.4,.1) o<-lm(y~x) oseg<-segmented(o,seg.Z=~x,psi=.6) slope(oseg) slope(oseg,var.diff=TRUE) #better CI } \keyword{ regression } \keyword{ htest } segmented/man/lines.segmented.Rd0000644000176200001440000000564112766105420016364 0ustar liggesusers\name{lines.segmented} \alias{lines.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Bars for interval estimate of the breakpoints } \description{ Draws bars relevant to breakpoint estimates (point estimate and confidence limits) on the current device } \usage{ \method{lines}{segmented}(x, term, bottom = TRUE, shift=TRUE, conf.level = 0.95, k = 50, pch = 18, rev.sgn = FALSE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ an object of class \code{segmented}. } \item{term}{ the segmented variable of the breakpoints being drawn. It may be unspecified when there is a single segmented variable.} \item{bottom}{ logical, indicating if the bars should be plotted at the bottom (\code{TRUE}) or at the top (\code{FALSE}).} \item{shift}{ logical, indicating if the bars should be `shifted' on the y-axis before plotting. Useful for multiple breakpoints with overlapped confidence intervals.} \item{conf.level}{ the confidence level of the confidence intervals for the breakpoints. } \item{k}{ a positive integer regulating the vertical position of the drawn bars. See Details. } \item{pch}{ either an integer specifying a symbol or a single character to be used in plotting the point estimates of the breakpoints. See \code{\link{points}}. } \item{rev.sgn}{ should the signs of the breakpoint estimates be changed before plotting? see Details. } \item{\dots}{ further arguments passed to \code{\link{segments}}, for instance `col' that can be a vector. } } \details{ \code{lines.segmented} simply draws on the current device the point estimates and relevant confidence limits of the estimated breakpoints from a "segmented" object. The y coordinate where the bars are drawn is computed as \code{usr[3]+h} if \code{bottom=TRUE} or \code{usr[4]-h} when \code{bottom=FALSE}, where \code{h=(usr[4]-usr[3])/abs(k)} and \code{usr} are the extremes of the user coordinates of the plotting region. Therefore for larger values of \code{k} the bars are plotted on the edges. The argument \code{rev.sgn} allows to change the sign of the breakpoints before plotting. This may be useful when a null-right-slope constraint is set. } %\value{ % ~Describe the value returned % If it is a LIST, use % \item{comp1 }{Description of 'comp1'} % \item{comp2 }{Description of 'comp2'} % ... %} %\references{ ~put references to the literature/web site here ~ } %\author{ ~~who you are~~ } %\note{ ~~further notes~~ % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{plot.segmented}} to plot the fitted segmented lines, and \code{\link{points.segmented}} to add the fitted joinpoints. } \examples{ ## See ?plot.segmented } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/vcov.segmented.Rd0000644000176200001440000000360413467355150016232 0ustar liggesusers\name{vcov.segmented} \alias{vcov.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Variance-Covariance Matrix for a Fitted Segmented Model} \description{ Returns the variance-covariance matrix of the parameters (including breakpoints) of a fitted segmented model object.} \usage{ \method{vcov}{segmented}(object, var.diff = FALSE, is = FALSE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{a fitted model object of class "segmented", returned by any \code{segmented} method.} \item{var.diff}{logical. If \code{var.diff=TRUE} and there is a single segmented variable, the covariance matrix is computed using a sandwich-type formula. See Details in \code{\link{summary.segmented}}.} \item{is}{logical. If \code{TRUE}, the \emph{asymptotic} covariance matrix based on the idea of induced smoothing is returned. If \code{is=TRUE}, \code{var.diff=FALSE} is set. } \item{\dots}{additional arguments. } } \details{ The returned covariance matrix is based on an approximation of the nonlinear segmented term. Therefore covariances corresponding to breakpoints are reliable only in large samples and/or clear cut segmented relationships. If \code{is=TRUE}, the returned covariance matrix depends on the design matrix having the term \eqn{I(x>\psi)}{I(x>psi)} replaced by its smooth counterpart. } \value{ The full matrix of the estimated covariances between the parameter estimates, including the breakpoints. } %\references{} \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it}} \note{\code{var.diff=TRUE} works when there is a single segmented variable.} \seealso{\code{\link{summary.segmented}}} \examples{ ##continues example from summary.segmented() # vcov(oseg) # vcov(oseg, var.diff=TRUE) # vcov(oseg, is=TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{regression} segmented/man/intercept.Rd0000644000176200001440000000473213473327062015301 0ustar liggesusers\name{intercept} \alias{intercept} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Intercept estimates from segmented relationships } \description{ Computes the intercepts of each `segmented' relationship in the fitted model. } \usage{ intercept(ogg, parm, rev.sgn = FALSE, var.diff=FALSE, digits = max(4, getOption("digits") - 2)) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ogg}{ an object of class "segmented", returned by any \code{segmented} method. } \item{parm}{ the segmented variable whose intercepts have to be computed. If missing all the segmented variables in the model are considered. } % \item{gap}{ % logical. should the intercepts account for the (possible) gaps? %} \item{rev.sgn}{vector of logicals. The length should be equal to the length of \code{parm}, but it is recycled otherwise. When \code{TRUE} it is assumed that the current \code{parm} is `minus' the actual segmented variable, therefore the order is reversed before printing. This is useful when a null-constraint has been set on the last slope. } \item{var.diff}{Currently ignored as only point estimates are computed. %logical. If \code{var.diff=TRUE} and there is a single segmented variable, the computed standard errors % are based on a sandwich-type formula of the covariance matrix. See Details in \code{\link{summary.segmented}}. } \item{digits}{controls number of digits in the returned output.} } \details{ A broken-line relationship means that a regression equation exists in the intervals `\eqn{min(x)}{min(x)} to \eqn{\psi_1}{psi1}', `\eqn{\psi_1}{psi1} to \eqn{\psi_2}{psi2}', and so on. \code{intercept} computes point estimates of the intercepts of the different regression equations for each segmented relationship in the fitted model. } \value{ \code{intercept} returns a list of one-column matrices. Each matrix represents a segmented relationship. } %\references{ %% ~put references to the literature/web site here ~ %} \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it}} %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ See also \code{\link{slope}} to compute the slopes of the different regression equations for each segmented relationship in the fitted model. } \examples{ ## see ?slope \dontrun{ intercept(out.seg) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } segmented/man/print.segmented.Rd0000644000176200001440000000127012766105424016404 0ustar liggesusers\name{print.segmented} \alias{print.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Print method for the segmented class } \description{ Printing the most important feautures of a segmented model. } \usage{ \method{print}{segmented}(x, digits = max(3, getOption("digits") - 3), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ object of class \code{segmented} } \item{digits}{ number of digits to be printed } \item{\dots}{ arguments passed to other functions } } \author{ Vito M.R. Muggeo } \seealso{ \code{\link{summary.segmented}}, \code{\link{print.summary.segmented}} } \keyword{ models } segmented/man/summary.segmented.Rd0000644000176200001440000001165613475512760016761 0ustar liggesusers\name{summary.segmented} \alias{summary.segmented} \alias{print.summary.segmented} \title{ Summarizing model fits for segmented regression } \description{ summary method for class \code{segmented}. } \usage{ \method{summary}{segmented}(object, short = FALSE, var.diff = FALSE, p.df="p", .vcov=NULL, ...) \method{print}{summary.segmented}(x, short=x$short, var.diff=x$var.diff, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"),...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ Object of class "segmented". } \item{short}{ logical indicating if the `short' summary should be printed. } \item{var.diff}{ logical indicating if different error variances should be computed in each interval of the segmented variable, see Details. If \code{.vcov} is provided, \code{var.diff} is set to \code{FALSE}. } \item{p.df}{ A character as a function of \code{'p'} (number of parameters) and \code{'K'} (number of groups or segments) affecting computations of the group-specific variance (and the standard errors) if \code{var.diff=TRUE}, see Details.} \item{.vcov}{ Optional. The full covariance matrix for the parameter estimates. If provided, standard errors are computed (and displayed) according to this matrix.} \item{x}{a \code{summary.segmented} object produced by \code{summary.segmented()}.} \item{digits}{controls number of digits printed in output.} \item{signif.stars}{logical, should stars be printed on summary tables of coefficients?} \item{\dots}{ further arguments. } } \details{ If \code{short=TRUE} only coefficients of the segmented relationships are printed. If \code{var.diff=TRUE} and there is only one segmented variable, different error variances are computed in the intervals defined by the estimated breakpoints of the segmented variable. For the jth interval with \eqn{n_j}{nj} observations, the error variance is estimated via \eqn{RSS_j/(n_j-p)}{RSSj/(nj-p)}, where \eqn{RSS_j} is the residual sum of squares in interval j, and \eqn{p}{p} is the number of model parameters. This number to be subtracted from \eqn{n_j}{nj} can be changed via argument \code{p.df}. For instance \code{p.df="0"} uses \eqn{RSS_j/(n_j)}{RSSj/(nj)}, and \code{p.df="p/K"} leads to \eqn{RSS_j/(n_j-p/K)}{RSSj/(nj-p/K)}, where \eqn{K}{K} is the number of groups (segments), and \eqn{p/K}{p/K} can be interpreted as the average number of model parameter in that group. Note \code{var.diff=TRUE} only affects the estimates covariance matrix. It does \emph{not} affect the parameter estimates, neither the log likelihood and relevant measures, such as AIC or BIC. In other words, \code{var.diff=TRUE} just provides 'alternative' standard errors, probably appropriate when the error variances are different before/after the estimated breakpoints. Also \eqn{p-values}{p-values} are computed using the t-distribution with 'naive' degrees of freedom (as reported in \code{object$df.residual}). If \code{var.diff=TRUE} the variance-covariance matrix of the estimates is computed via the sandwich formula, \deqn{(X^TX)^{-1}X^TVX(X^TX)^{-1}}{(X'X)^{-1}X'VX(X'X)^{-1}} where V is the diagonal matrix including the different group-specific error variance estimates. Standard errors are the square root of the main diagonal of this matrix. } \value{ A list (similar to one returned by \code{segmented.lm} or \code{segmented.glm}) with additional components: \item{psi }{estimated break-points and relevant (approximate) standard errors} \item{Ttable }{estimates and standard errors of the model parameters. This is similar to the matrix \code{coefficients} returned by \code{summary.lm} or \code{summary.glm}, but without the rows corresponding to the breakpoints. Even the p-values relevant to the difference-in-slope parameters have been replaced by NA, since they are meaningless in this case, see \code{\link{davies.test}}.} \item{gap}{estimated coefficients, standard errors and t-values for the `gap' variables} \item{cov.var.diff}{if \code{var.diff=TRUE}, the covaraince matrix accounting for heteroscedastic errors.} \item{sigma.new}{if \code{var.diff=TRUE}, the square root of the estimated error variances in each interval.} \item{df.new}{if \code{var.diff=TRUE}, the residual degrees of freedom in each interval.} } %\references{ ~put references to the literature/web site here ~ } \author{ Vito M.R. Muggeo } \seealso{ \code{\link{print.segmented}}, \code{\link{davies.test}} } \examples{ ##continues example from segmented() # summary(segmented.model,short=TRUE) ## an heteroscedastic example.. # set.seed(123) # n<-100 # x<-1:n/n # y<- -x+1.5*pmax(x-.5,0)+rnorm(n,0,1)*ifelse(x<=.5,.4,.1) # o<-lm(y~x) # oseg<-segmented(o,seg.Z=~x,psi=.6) # summary(oseg,var.diff=TRUE)$sigma.new } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } segmented/man/seg.lm.fit.Rd0000644000176200001440000001016312766105424015245 0ustar liggesusers\name{seg.lm.fit} \alias{seg.lm.fit} \alias{seg.glm.fit} \alias{seg.def.fit} \alias{seg.Ar.fit} \alias{seg.lm.fit.boot} \alias{seg.glm.fit.boot} \alias{seg.def.fit.boot} \alias{seg.Ar.fit.boot} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fitter Functions for Segmented Linear Models } \description{ \code{seg.lm.fit} is called by \code{segmented.lm} to fit segmented linear (gaussian) models. Likewise, \code{seg.glm.fit} is called by \code{segmented.glm} to fit generalized segmented linear models, and \code{seg.def.fit} is called by \code{segmented.default} to fit segmented relationships in general regression models (e.g., quantile regression and Cox regression). \code{seg.lm.fit.boot}, \code{seg.glm.fit.boot}, and \code{seg.def.fit.boot} are employed to perform bootstrap restart. These functions should usually not be used directly by the user. } \usage{ seg.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=FALSE) seg.lm.fit.boot(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE) seg.glm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=FALSE) seg.glm.fit.boot(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE) seg.def.fit(obj, Z, PSI, mfExt, opz, return.all.sol=FALSE) seg.def.fit.boot(obj, Z, PSI, mfExt, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE) seg.Ar.fit(obj, XREG, Z, PSI, opz, return.all.sol=FALSE) seg.Ar.fit.boot(obj, XREG, Z, PSI, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{ vector of observations of length \code{n}. } \item{XREG}{ design matrix for standard linear terms. } \item{Z}{ appropriate matrix including the segmented variables whose breakpoints have to be estimated. } \item{PSI}{ appropriate matrix including the starting values of the breakpoints to be estimated. } \item{w}{ possibe weights vector. } \item{offs}{ possibe offset vector. } \item{opz}{ a list including information useful for model fitting. } \item{n.boot}{ the number of bootstrap samples employed in the bootstrap restart algorithm. } \item{size.boot}{ the size of the bootstrap resamples. If \code{NULL} (default), it is taken equal to the sample size. values smaller than the sample size are expected to increase perturbation in the bootstrap resamples. } \item{jt}{ logical. If \code{TRUE} the values of the segmented variable(s) are jittered before fitting the model to the bootstrap resamples. } \item{nonParam}{ if \code{TRUE} nonparametric bootstrap (i.e. case-resampling) is used, otherwise residual-based. } \item{random}{ if \code{TRUE}, when the algorithm fails to obtain a solution, random values are used as candidate values. } \item{return.all.sol}{ if \code{TRUE}, when the algorithm fails to obtain a solution, the values visited by the algorithm with corresponding deviances are returned. } \item{obj}{ the starting regression model where the segmented relationships have to be added. } \item{mfExt}{ the model frame. } } \details{ The functions call iteratively \code{lm.wfit} (or \code{glm.fit}) with proper design matrix depending on \code{XREG}, \code{Z} and \code{PSI}. \code{seg.lm.fit.boot} (and \code{seg.glm.fit.boot}) implements the bootstrap restarting idea discussed in Wood (2001). } \value{ A list of fit information. } \references{ Wood, S. N. (2001) Minimizing model fitting objectives that contain spurious local minima by bootstrap restarting. \emph{Biometrics} \bold{57}, 240--244. } \author{ Vito Muggeo } \note{ These functions should usually not be used directly by the user. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{segmented.lm}}, \code{\link{segmented.glm}} } \examples{ ##See ?segmented } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{regression} \keyword{nonlinear } segmented/man/down.Rd0000644000176200001440000000203512766105420014241 0ustar liggesusers\name{down} \alias{down} \docType{data} \title{ Down syndrome in babies} \description{ The \code{down} data frame has 30 rows and 3 columns. Variable \code{cases} means the number of babies with Down syndrome out of total number of births \code{births} for mothers with mean age \code{age}. } \usage{data(down)} \format{ A data frame with 30 observations on the following 3 variables. \describe{ \item{\code{age}}{the mothers' mean age.} \item{\code{births}}{count of total births.} \item{\code{cases}}{count of babies with Down syndrome.} } } %\details{ % ~~ If necessary, more details than the description above ~~ %} \source{ Davison, A.C. and Hinkley, D. V. (1997) \emph{Bootstrap Methods and their Application}. Cambridge University Press. } \references{ Geyer, C. J. (1991) Constrained maximum likelihood exemplified by isotonic convex logistic regression. \emph{Journal of the American Statistical Association} \bold{86}, 717--724. } \examples{ data(down) } \keyword{datasets} segmented/man/segmented-package.Rd0000644000176200001440000000745513572237424016657 0ustar liggesusers\name{segmented-package} \alias{segmented-package} %\alias{segmented} \docType{package} \title{ Segmented relationships in regression models with breakpoints / changepoints estimation } \description{ Estimation and Inference of Regression Models with piecewise linear relationships having a fixed number of break-points. The estimation method is described in Muggeo (2003) . } \details{ \tabular{ll}{ Package: \tab segmented\cr Type: \tab Package\cr Version: \tab 1.1-0\cr Date: \tab 2019-12-10\cr License: \tab GPL\cr } Package \code{segmented} is aimed to estimate linear and generalized linear models (and virtually any regression model) having one or more segmented relationships in the linear predictor. Estimates of the slopes and breakpoints are provided along with standard errors. The package includes testing/estimating functions and methods to print, summarize and plot the results. \cr The algorithm used by \code{segmented} is \emph{not} grid-search. It is an iterative procedure (Muggeo, 2003) that needs starting values \emph{only} for the breakpoint parameters and therefore it is quite efficient even with several breakpoints to be estimated. Moreover since version 0.2-9.0, \code{segmented} implements the bootstrap restarting (Wood, 2001) to make the algorithm less sensitive to starting values. \cr Since version 0.5-0.0 a default method \code{segmented.default} has been added. It may be employed to include segmented relationships in \emph{general} regression models where specific methods do not exist. Examples include quantile and Cox regressions. See examples in \code{\link{segmented.default}}.\cr Since version 1.0-0 the estimating algorithm has been slight modified and it appears to be much stabler (in examples with noisy segmented relationhips and flat log likelihoods) then previous versions.\cr Hypothesis testing (about the existence of the breakpoint) and confidence intervals are performed via appropriate methods and functions. A tentative approach to deal with unknown number of breakpoints is also provided, see option \code{fix.npsi} in \code{\link{seg.control}}. } \author{ Vito M.R. Muggeo } \references{ Muggeo, V.M.R. (2017) Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach. \emph{Australian & New Zealand Journal of Statistics} \bold{59}, 311--322. Muggeo, V.M.R. (2016) Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling. \emph{J of Statistical Computation and Simulation} \bold{86}, 3059--3067. Davies, R.B. (1987) Hypothesis testing when a nuisance parameter is present only under the alternative. \emph{Biometrika} \bold{74}, 33--43. Seber, G.A.F. and Wild, C.J. (1989) \emph{Nonlinear Regression}. Wiley, New York. Bacon D.W., Watts D.G. (1971) Estimating the transistion between two intersecting straight lines. \emph{Biometrika} \bold{58}: 525 -- 534. Muggeo, V.M.R. (2003) Estimating regression models with unknown break-points. \emph{Statistics in Medicine} \bold{22}, 3055--3071. Muggeo, V.M.R. (2008) Segmented: an R package to fit regression models with broken-line relationships. \emph{R News} \bold{8/1}, 20--25. Muggeo, V.M.R., Adelfio, G. (2011) Efficient change point detection in genomic sequences of continuous measurements. \emph{Bioinformatics} \bold{27}, 161--166. Wood, S. N. (2001) Minimizing model fitting objectives that contain spurious local minima by bootstrap restarting. \emph{Biometrics} \bold{57}, 240--244. Muggeo, V.M.R. (2010) Comment on `Estimating average annual per cent change in trend analysis' by Clegg et al., Statistics in Medicine; 28, 3670-3682. \emph{Statistics in Medicine}, \bold{29}, 1958--1960. } \keyword{ regression } \keyword{ nonlinear } segmented/man/stagnant.Rd0000644000176200001440000000161012766105426015115 0ustar liggesusers\name{stagnant} \alias{stagnant} \docType{data} \title{Stagnant band height data} \description{ The \code{stagnant} data frame has 28 rows and 2 columns. } \usage{data(stagnant)} \format{ A data frame with 28 observations on the following 2 variables. \describe{ \item{\code{x}}{log of flow rate in g/cm sec.} \item{\code{y}}{log of band height in cm} } } \details{ Bacon and Watts report that such data were obtained by R.A. Cook during his investigation of the behaviour of stagnant surface layer height in a controlled flow of water. } \source{ Bacon D.W., Watts D.G. (1971) Estimating the transistion between two intersecting straight lines. \emph{Biometrika} \bold{58}: 525 -- 534. Originally from the PhD thesis by R.A. Cook } %\references{ % PhD thesis by R.A. Cook %} \examples{ data(stagnant) ## plot(stagnant) } \keyword{datasets} segmented/man/pscore.test.rd0000644000176200001440000001214213475504426015612 0ustar liggesusers\name{pscore.test} \alias{pscore.test} \title{ Testing for existence of one breakpoint} \description{ Given a (generalized) linear model, the (pseudo) Score statistic tests for the existence of one breakpoint. } \usage{ pscore.test(obj, seg.Z, k = 10, alternative = c("two.sided", "less", "greater"), values=NULL, dispersion=NULL, df.t=NULL, more.break=FALSE, n.break=1) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obj}{ a fitted model typically returned by \code{glm} or \code{lm}. Even an object returned by \code{segmented} can be set. Offset and weights are allowed.} \item{seg.Z}{ a formula with no response variable, such as \code{seg.Z=~x1}, indicating the (continuous) segmented variable being tested. Only a single variable may be tested and an error is printed when \code{seg.Z} includes two or more terms. \code{seg.Z} can be omitted if i)\code{obj} is a segmented fit with a single segmented covariate (and that variable is taken), or ii)if it is a "lm" or "glm" fit with a single covariate (and that variable is taken).} \item{k}{ optional. Number of points used to compute the pseudo Score statistic. See Details. } \item{alternative}{ a character string specifying the alternative hypothesis. } \item{values}{ optional. The evaluation points where the Score test is computed. See Details for default values.} \item{dispersion}{ optional. the dispersion parameter for the family to be used to compute the test statistic. When \code{NULL} (the default), it is inferred from \code{obj}. Namely it is taken as \code{1} for the Binomial and Poisson families, and otherwise estimated by the residual Chi-squared statistic in the model \code{obj} (calculated from cases with non-zero weights divided by the residual degrees of freedom).} \item{df.t}{ optional. The degress-of-freedom used to compute the p-value. When \code{NULL}, the df extracted from \code{obj} are used.} \item{more.break}{ optional, logical. If \code{obj} is a 'segmented' fit, \code{more.break=FALSE} tests for the actual breakpoint for the variable 'seg.Z', while \code{more.break=TRUE} tests for an \emph{additional} breakpoint(s) for the variable 'seg.Z'. Ignored when \code{obj} is not a segmented fit.} \item{n.break}{optional. Number of breakpoints postuled under the alternative hypothesis.} } \details{ \code{pscore.test} tests for a non-zero difference-in-slope parameter of a segmented relationship. Namely, the null hypothesis is \eqn{H_0:\beta=0}{H_0:beta=0}, where \eqn{\beta}{beta} is the difference-in-slopes, i.e. the coefficient of the segmented function \eqn{\beta(x-\psi)_+}{beta*(x-psi)_+}. The hypothesis of interest \eqn{\beta=0}{beta=0} means no breakpoint. Simulation studies have shown that such Score test is more powerful than the Davies test (see reference) when the alternative hypothesis is `one changepoint'. If there are two or more breakpoints (for instance, a sinusoidal-like relationships), \code{pscore.test} can have lower power, and \code{\link{davies.test}} can perform better. The \code{dispersion} value, if unspecified, is taken from \code{obj}. If \code{obj} represents the fit under the null hypothesis (no changepoint), the dispersion parameter estimate will be usually larger, leading to a (potentially severe) loss of power. The \code{k} evaluation points are \code{k} equally spaced values in the range of the segmented covariate. \code{k} should not be small. Specific values can be set via \code{values}. However I have found no important difference due to number and location of the evaluation points, thus default is \code{k=10} equally-spaced points. If \code{obj} is a (segmented) \emph{lm} object, the returned p-value comes from the t-distribution with appropriate degrees of freedom. Otherwise, namely if \code{obj} is a (segmented) \emph{glm} object, the Normal distribution is used. } \value{ A list with class '\code{htest}' containing the following components: \item{method}{title (character)} \item{data.name}{the regression model and the segmented variable being tested} \item{statistic }{the empirical value of the statistic} \item{parameter }{number of evaluation points} \item{p.value }{the p-value} \item{process}{the alternative hypothesis set} } \references{ Muggeo, V.M.R. (2016) Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling. \emph{J of Statistical Computation and Simulation}, \bold{86}, 3059--3067. } \author{ Vito M.R. Muggeo } \seealso{See also \code{\link{davies.test}}. } \examples{ \dontrun{ set.seed(20) z<-runif(100) x<-rnorm(100,2) y<-2+10*pmax(z-.5,0)+rnorm(100,0,3) o<-lm(y~z+x) #testing for one changepoint #use the simple null fit pscore.test(o,~z) #compare with davies.test(o,~z).. #use the segmented fit os<-segmented(o, ~z) pscore.test(os,~z) #smaller p-value, as it uses the dispersion under the alternative (from 'os') #test for the 2nd breakpoint in the variable z pscore.test(os,~z, more.break=TRUE) } } \keyword{ htest } segmented/man/aapc.Rd0000644000176200001440000000562313476144564014217 0ustar liggesusers\name{aapc} \alias{aapc} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Average annual per cent change in segmented trend analysis } \description{ Computes the average annual per cent change to summarize piecewise linear relationships in segmented regression models. } \usage{ aapc(ogg, parm, exp.it = FALSE, conf.level = 0.95, wrong.se = TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ogg}{ the fitted model returned by \code{segmented}. } \item{parm}{ the \emph{single} segmented variable of interest. It can be missing if the model includes a single segmented covariate. If missing and \code{ogg} includes several segmented variables, the first one is considered.} \item{exp.it}{logical. If \code{TRUE}, the per cent change is computed, namely \eqn{\exp(\hat\mu)-1}{exp(mu)-1} where \eqn{\mu=\sum_j \beta_jw_j}{mu=\sum j bjwj}, see `Details'.} \item{conf.level}{the confidence level desidered.} \item{wrong.se}{logical, if \code{TRUE}, the `wrong'' standard error (as discussed in Clegg et al. (2009)) ignoring uncertainty in the breakpoint estimate is returned as an attribute \code{"wrong.se"}.} \item{...}{further arguments to be passed on to \code{vcov.segmented()}, such as \code{var.diff} or \code{is}.} } \details{ To summarize the fitted piecewise linear relationship, Clegg et al. (2009) proposed the 'average annual per cent change' (AAPC) computed as the sum of the slopes (\eqn{\beta_j}{beta_j}) weighted by corresponding covariate sub-interval width (\eqn{w_j}{w_j}), namely \eqn{\mu=\sum_j \beta_jw_j}{mu=sum_j beta_j w_j}. Since the weights are the breakpoint differences, the standard error of the AAPC should account for uncertainty in the breakpoint estimate, as discussed in Muggeo (2010) and implemented by \code{aapc()}. } \value{ \code{aapc} returns a numeric vector including point estimate, standard error and confidence interval for the AAPC relevant to variable specified in \code{parm}. } \references{ Clegg LX, Hankey BF, Tiwari R, Feuer EJ, Edwards BK (2009) Estimating average annual per cent change in trend analysis. \emph{Statistics in Medicine}, \bold{28}; 3670-3682 Muggeo, V.M.R. (2010) Comment on `Estimating average annual per cent change in trend analysis' by Clegg et al., Statistics in Medicine; 28, 3670-3682. \emph{Statistics in Medicine}, \bold{29}, 1958--1960. } \author{Vito M. R. Muggeo, \email{vito.muggeo@unipa.it} } \note{ \code{exp.it=TRUE} would be appropriate only if the response variable is the log of (any) counts. } %% ~Make other sections like Warning with \section{Warning }{....} ~ %%\seealso{ %% ~~objects to See Also as \code{\link{help}}, ~~~ %%} \examples{ set.seed(12) x<-1:20 y<-2-.5*x+.7*pmax(x-9,0)-.8*pmax(x-15,0)+rnorm(20)*.3 o<-lm(y~x) os<-segmented(o, psi=c(5,12)) aapc(os) } \keyword{ regression } segmented/man/broken.line.Rd0000644000176200001440000000715213500407744015506 0ustar liggesusers\name{broken.line} \alias{broken.line} \title{ Fitted values for segmented relationships} \description{ Given a segmented model (typically returned by a \code{segmented} method), \code{broken.line} computes the fitted values (and relevant standard errors) for each `segmented' relationship. } \usage{ broken.line(ogg, term = NULL, link = TRUE, interc=TRUE, se.fit=TRUE, isV=FALSE, .vcov=NULL, ...) } \arguments{ \item{ogg}{ A fitted object of class segmented (returned by any \code{segmented} method). } \item{term}{ Three options. i) A named list (whose name should be one of the segmented covariates in the model \code{ogg}) including the covariate values for which segmented predictions should be computed; ii) a character meaning the name of any segmented covariate in the model, and predictions corresponding to the observed covariate values are returned; iii) It can be \code{NULL} if the model includes a single segmented covariate, and predictions corresponding to the observed covariate values are returned; } \item{link}{ Should the predictions be computed on the scale of the link function? Default to \code{TRUE}. } \item{interc}{ Should the model intercept be added? (provided it exists).} \item{se.fit}{ If \code{TRUE} also standard errors for predictions are returned.} \item{isV}{ A couple of logicals indicating if the segmented terms \eqn{(x-\psi)_+}{(x-\psi)_+} and \eqn{I(x>\psi)}{I(x>\psi)} in the model matrix should be replaced by their smoothed counterparts. If a single logical is provided, it is applied to both terms.} \item{.vcov}{ Optional. The \emph{full} covariance matrix of estimates. If \code{NULL} (and \code{se.fit=TRUE}), the matrix is computed internally via \code{vcov.segmented()}.} \item{...}{ Additional arguments to be passed on to \code{vcov.segmented()} when computing the standard errors for the predictions, namely \code{is}, \code{var.diff}, \code{p.df}. See \code{\link{summary.segmented}} and \code{\link{vcov.segmented}}.} } \details{ If \code{term=NULL} or \code{term} is a valid segmented covariate name, predictions for each segmented variable are the relevant fitted values from the model. If \code{term} is a (correctly named) list with numerical values, predictions corresponding to such specified values are computed. If \code{link=FALSE} and \code{ogg} inherits from the class "glm", predictions and possible standard errors are returned on the response scale. The standard errors come from the Delta method. Argument \code{link} is ignored whether \code{ogg} does not inherit from the class "glm". } \value{ A list having one component if (if \code{se.fit=FALSE}), and two components (if \code{se.fit=TRUE}) list representing predictions and standard errors for the segmented covariate values. } %\references{ ~put references to the literature/web site here ~ } \author{ Vito M. R. Muggeo } %\note{ %This function will be probably removed in the next versions. See \code{predict.segmented} instead. %} % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{segmented}}, \code{\link{predict.segmented}}, \code{\link{plot.segmented}}, \code{\link{vcov.segmented}}} \examples{ set.seed(1234) z<-runif(100) y<-rpois(100,exp(2+1.8*pmax(z-.6,0))) o<-glm(y~z,family=poisson) o.seg<-segmented(o,seg.Z=~z,psi=.5) \dontrun{plot(z,y)} \dontrun{points(z,broken.line(o.seg,link=FALSE)$fit,col=2)} #ok, but use plot.segmented()! } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ regression } \keyword{ nonlinear } segmented/man/points.segmented.Rd0000644000176200001440000000432713056000740016556 0ustar liggesusers\name{points.segmented} \alias{points.segmented} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Points method for segmented objects } \description{ Takes a fitted \code{segmented} object returned by \code{segmented()} and adds on the current plot the joinpoints of the fitted broken-line relationships. } \usage{ \method{points}{segmented}(x, term, interc = TRUE, link = TRUE, rev.sgn=FALSE, transf=I, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ an object of class \code{segmented}. } \item{term}{ the segmented variable of interest. It may be unspecified when there is a single segmented variable. } \item{interc}{ If \code{TRUE} the computed joinpoints include the model intercept (if it exists). } \item{link}{ when \code{TRUE} (default), the fitted joinpoints are plotted on the link scale } \item{rev.sgn}{ when \code{TRUE}, the fitted joinpoints are plotted on the `minus' scale of the current \code{term} variable. This is useful when a null-constraint has been set on the last slope. } \item{transf}{ A possible function to convert the fitted values before plotting. } \item{\dots}{ other graphics parameters to pass on to \code{points()} function. } } \details{ We call 'joinpoint' the plane point having as coordinates the breakpoint (on the x scale) and the fitted value of the segmented relationship at that breakpoint (on the y scale). \code{points.segmented()} simply adds the fitted joinpoints on the current plot. This could be useful to emphasize the changes of the piecewise linear relationship. } %\value{ %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... %} %\references{ %% ~put references to the literature/web site here ~ %} %\author{ %% ~~who you are~~ %} %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{plot.segmented}} to plot the fitted segmented lines. } \examples{ \dontrun{ #see examples in ?plot.segmented } } \keyword{ nonlinear } \keyword{ regression }% __ONLY ONE__ keyword per line segmented/man/plant.Rd0000644000176200001440000000226612766105422014420 0ustar liggesusers\name{plant} \alias{plant} \docType{data} \title{ Plan organ dataset} \description{ The \code{plant} data frame has 103 rows and 3 columns. } \usage{data(plant)} \format{ A data frame with 103 observations on the following 3 variables: \describe{ \item{\code{y}}{measurements of the plant organ.} \item{\code{time}}{times where measurements took place.} \item{\code{group}}{three attributes of the plant organ, \code{RKV}, \code{RKW}, \code{RWC}.} } } \details{ Three attributes of a plant organ measured over time where biological reasoning indicates likelihood of multiple breakpoints. The data are scaled to the maximum value for each attribute and all attributes are measured at each time. } \source{ The data have been kindly provided by Dr Zongjian Yang at School of Land, Crop and Food Sciences, The University of Queensland, Brisbane, Australia. } %\references{ % ~~ possibly secondary sources and usages ~~ %} \examples{ \dontrun{ data(plant) attach(plant) %lattice::xyplot(y~time,groups=group,pch=19,col=2:4,auto.key=list(space="right")) lattice::xyplot(y~time,groups=group,auto.key=list(space="right")) } } \keyword{datasets} segmented/DESCRIPTION0000644000176200001440000000203613573767742013761 0ustar liggesusersPackage: segmented Type: Package Title: Regression Models with Break-Points / Change-Points Estimation Version: 1.1-0 Date: 2019-12-10 Authors@R: c(person(given = c("Vito","M.","R."), family = "Muggeo", role = c("aut", "cre"), email = "vito.muggeo@unipa.it")) Author: Vito M. R. Muggeo [aut, cre] Maintainer: Vito M. R. Muggeo Description: Given a regression model, segmented `updates' it by adding one or more segmented (i.e., piece-wise linear) relationships. Several variables with multiple breakpoints are allowed. The estimation method is discussed in Muggeo (2003, ) and illustrated in Muggeo (2008, ). An approach for hypothesis testing is presented in Muggeo (2016, ), and interval estimation for the breakpoint is discussed in Muggeo (2017, ). License: GPL NeedsCompilation: no Packaged: 2019-12-10 15:31:45 UTC; vito Repository: CRAN Date/Publication: 2019-12-10 19:20:02 UTC segmented/NEWS0000644000176200001440000004407613572500700012736 0ustar liggesusers**************************** * * * Changes in segmented * * * **************************** =============== version 1.1-0 =============== * Changes in segmented.default to allow estimation of betareg models (thanks to Malcolm Baptie and Jochen Wilhelm for their input) =============== version 1.0-0 =============== * New implementation of the estimating algorithm. Now segmented is much stabler, and dependence on starting values (that occurred in some 'difficult' datasets) is greatly reduced and apparently cancelled. * New arguments: - 'npsi' in segmented.* methods to specify the number of breakpoints (and not the values) to be estimated. - 'is' in vcov.segmented() to compute covariance matrix based on the idea of induced smoothing. This leads to higher (and more trustworthy) standard errors for the breakpoint. - 'isV' in broken.line() and plot.segmented() to allow smooth "transition" in the standard errors values for the fitted values at the breakpoint. - 'conv.psi', 'alpha', 'fix.npsi', 'min.step' and 'tol' (the last one replacing 'toll') in seg.control() (where arguments 'last', 'stop.if.error', and 'gap' will be removed in the next releases) - 'n.break' in pscore.test(). - .vcov in broken.line() and plot.segmented() to pass directly the full covariance matrix of estimates. plot.segmented() accepts 'col.shade', see ?plot.segmented * Minor changes: better display of the iterative steps (if display=TRUE in seg.control()) and general improvement in draw.history(). =============== version 0.5-4.0 =============== * confint.segmented() now computes breakpoint confidence intervals via the (smoothed) Score or Gradient statistic, see new argument 'method' in confint.segmented(). * if segmented does not converge (since the estimated psi leaves only 1 datum on its left/right) the last psi estimate is printed as a message. * argument keep.class added in segmented.default() * argument 'rug' defaults to '!add' in plot.segmented() * confint.segmented now returns a matrix (rather than a list) * bug fixed: plot.segmented() did not handle appropriately arguments cex.lab and cex.axis. segmented methods did not terminate appropriately when automatic selection of number of breakpoints was performed (i.e. 'stop.if.error=FALSE'). davies.test() and segmented.glm() required 'seg.Z' even if there was just one covariate in the starting model and it could be missing (thanks to Lein E. Pardo for reporting that). Some minor bug fixes (related to argument 'data' and one-sided alternative) in pscore.test (thanks to Karista Hudelson for reporting). =============== version 0.5-3.0 =============== * aapc() introduced. The function computes the 'average annual percent change' to summarize piecewise linear trends (thanks to Yuchen Qin for his input). * plot.segmented() now accepts arguments 'cex.axis' and 'cex.lab' (thanks to Matthew Birk for his input). * bug fixed: segmented.Arima didn't work for arima fits including a seasonal component (thanks to Claudio Agostinelli for reportig the bug). =============== version 0.5-2.2 =============== * when there is a single covariate in the starting (g)lm, seg.Z can be missing when calling the segmented methods. * bug fixed: plot.segmented(.., link=FALSE) did not work correctly (sometimes it returned an error) for glm fits with multiple breakpoints. Weights were not handled appropriately by segmented.lm. =============== version 0.5-2.1 =============== * pscore.test() now works also for "glm" fits * plot.segmented() now plots the partial residuals as "component + working residuals" (rather than Pearson residuals, relevant only for glm fits). * segmented.default() now is expected to work for fits obtained by MASS::rlm(). =============== version 0.5-2.0 =============== * pscore.test() introduced. The function tests for a breakpoint using a (pseudo) score statistic which is more powerful than davies.test(), especially when the breakpoint is expected to be in the middle of the covariate range and the signal-to-noise ratio is high. * argument 'digits' added in seg.control() to fix the number of digits of the breakpoint estimate during the iterative estimation algorithm. * bug fixed: conf.level>0 in plot.segmented() did not work for objects returned by segmented.default(). =============== version 0.5-1.5 (not on CRAN) =============== * arguments 'gap' and 'show.gap' removed in intercept() and in plot.segmented(). (they are meaningless, as segmented() always returns joined piecewise lines, i.e. with no gaps). * slope() and broken.line() (and then plot.segmented() which uses them) did not work for objects returned by segmented.default() (Thanks to Marcos Krull for reporting). =============== version 0.5-1.4 =============== * segmented.Arima() should be slightly faster, as starting values are passed in arima() (via 'init') throughout the iterative process. * plot.segmented() is expected to work for objects returned by segmented.Arima. * print.summary.segmented() does not print anymore the t-values for the gap coefficients (this information is meaningless as the gap coeffs are always set to zero in the returned model). * Bug fixed: intercept() ignored argument 'rev.sgn'; points.segmented() missed argument 'transf'. =============== version 0.5-1.3 (not on CRAN) =============== * plot.segmented() gains argument 'transf' to plot 'transf(values)' rather 'values' on the current plot. * print.summary.segmented() now uses round() rather than signif() when displaying the breakpoint estimate. * Bug fixed: psi=NA was not working in the segmented.* methods; this bug was incidentally introduced in the last version (thanks to Bertrand Sudre for first reporting that). =============== version 0.5-1.2 =============== * For 1 breakpoint models, 'psi' argument can be missing (default) when calling the segmented methods. * Bug fixed: lines.segmented() did not plot the dots when the fit object included multiple breakpoints and the argument 'shift' was set to FALSE (thanks to Jan Bull for reporting). There were some troubles with variable names including dots (thanks to Melanie Zoelck which first reported this bug). =============== version 0.5-1.1 =============== * segmented.default now accepts 'gee' fits (Thanks to John Boulanger for his input) * Minor change: argument 'col.dens' changed to 'dens.col' in plot.segmented() ('col.dens' made ineffective 'col') * Minor change: error/warning messages introduced in davies.test() if k<10; print.segmented slightly changed in displaying the estimated breakpoints. * Bug fixed: segmented did not terminate appropriately the algorithm with automatic selection of breakpoints concerning more than one variable (thanks to Ali Hashemi for reporting). =============== version 0.5-1.0 =============== * segmented.Arima() introduced. Now it is possible to estimate segmented relationships in "Arima" fits (although the summarizing and plotting methods do not work..) * plot.segmented() gains arguments 'dens.rug' and 'col.dens' to display in the plot (on the x axis) also the smoothed density of the segmented covariate. * Bug fixed: segmented.lm did not work if it.max=0 (but segmented.glm did), thanks to Eric Nussbaumer for reporting. segmented.lm and segmented.glm did work if the starting linear model included weights (this bug was introduced incidentally since version 0.4-0.1; thanks to Michael Rutter for reporting). segmented.lm and segmented.glm did not check appropriately inadmissible breakpoints (thanks to Erica Tennenhouse for reporting). segmented.lm and segmented.glm did not handle correctly variable names equal to function names. davies.test() did not work with 'segmented' objects (to test for and additional breakpoint). points.segmented() missed the argument 'rev.sgn'. =============== version 0.5-0.0 =============== * segmented.default() introduced. Now it is possible to estimate segmented relationships in arbitrary regression models (besides lm and glm) where specific methods do not exist (e.g. cox or quantile regression models). =============== version 0.4-0.1 (not on CRAN) =============== * segmented.lm() and segmented.glm() did not work if the starting model included additional "variables", such as 'threshold' in 'subset=age0. * The breakpoint starting values when automatic selection is performed are now specified as equally spaced values (optionally as quantiles). see argument 'quant' in seg.control() * added 'Authors@R' entry in the DESCRIPTION file =============== version 0.2-9.1 =============== * Some bugs fixed: segmented.lm() and segmented.glm() did not finish correctly when no breakpoint was found; now segmented.lm() and segmented.glm() take care of flat relationships; plot.segmented() did not compute correctly the partial residuals for segmented glm fits. =============== version 0.2-9.0 =============== * Bootstrap restarting implemented to deal with problems coming from flat segmented relationships. segmented now is less sensitive to starting values supplied for 'psi'. * At the convergence segmented now constrains the gap coefficients to be exactly zero. This is the default and it can be altered by the 'gap' argument in seg.control(). * plot.segmented() has been re-written. It gains argument `res' for plotting partial residuals along with the fitted piecewise lines, and now it produces nicer (and typically smaller) plots. * Some bugs fixed: davies.test() did not work correctly for deterministic data (thanks to Glenn Roberts for finding the error). davies.test() also returns the `process', i.e. the different values of the evaluation points and corresponding test statistic. =============== version 0.2-8.4 =============== * Some bugs fixed: segmented.glm() fitted a simple "lm" (and not "glm") (the error was introduced incidentally from 0.2-8.3, thanks to Veronique Storme for finding the error); broken.line() was not working for models without intercept and a null left slope; intercept() was not working correctly with multiple segmented variables. =============== version 0.2-8.3 =============== * Some minor bugs fixed: segmented.lm() and segmented.glm() did not find the offset variable in the dataframe where the initial (g)lm was called for; segmented.lm() and segmented.glm() sometimes returned an error when the automated algorithm was used (thanks to Paul Cohen for finding the error). =============== version 0.2-8.2 =============== * Some minor bugs fixed (segmented.lm() and segmented.glm() *alway* included the left slope in the estimation process, although the number of parameters was correct in the returned final fit. confint.segmented() did not order the estimated breakpoints for the variable having rev.sgn=TRUE; intercept() missed the (currently meaningless) argument var.diff (thanks to Eric Fuchs for pointing out that). ) =============== version 0.2-8.1 =============== * Some minor bugs fixed (segmented.lm() and segmented.glm() were not working correctly with dataframe subset or when the starting linear model included several intercepts (e.g., see the example about data("plant"); thanks to Nicola Ferrari for finding the error). davies.test() did not work when the variable name of its argument `seg.Z' included reserved words, e.g. `seg.Z~dist'; thanks to Thom White for finding the error). =============== version 0.2-8 =============== * intercept() added. It computes the intercepts of the regression lines for each segment of the fitted segmented relationship. * plot.segmented() now accepts a vector `col' argument to draw the fitted piecewise linear relationships with different colors. * Some minor bugs fixed (summary.segmented were not working correctly). =============== version 0.2-7.3 =============== * argument APC added to the slope() function to compute the `annual percent change'. * Some minor bugs fixed (confint and slope were not working correctly when the estimated breakpoints were returned in non-increasing order; offset was ignored in segmented.lm and segmented.glm; broken.line() was not working correctly (and its argument gap was unimplemented), thanks to M. Rennie for pointing out that; summary.segmented() was not working for models with no linear term, i.e. fitted via segmented(lm(y~0),..)). =============== version 0.2-7.2 =============== * segmented.lm and segmented.glm now accept objects with formulas y~., Thanks to G. Ferrara for finding the error. * Some bugs fixed (slope and confint were using the normal (rather than the t-distribution) to compute the CIs in gaussian models). =============== version 0.2-7.1 =============== * segmented.lm and segmented.glm now accept objects without 'explicit' formulas, namely returned by lm(my_fo,..) (and glm(my_fo,..)) where my_fo was defined earlier. Thanks to Y. Iwasaki for finding the error. =============== version 0.2-7 =============== * A sort of automatic procedure for breakpoint estimation is implemented. See argument stop.if.error in seg.control(). * davies.test() now accepts a one-sided formula (~x) rather than character ("x") to mean the segmented variable to be tested. davies.test also gains the arguments `beta0' and `dispersion'. * Some bugs fixed. =============== version 0.2-6 =============== * vcov.segmented() added. * option var.diff for robust covariance matrix has been added in summary.segmented(), print.summary.segmented(), slope(), and confint(). * Some bugs fixed. segmented/R/0000755000176200001440000000000013572471543012441 5ustar liggesuserssegmented/R/vcov.segmented.R0000644000176200001440000000473713475757520015532 0ustar liggesusers#vc<-function(obj){ # invXtX<-chol2inv(qr.R(obj$qr)) #(XtX)^{-1} # V<-vcov.segmented(obj,is=TRUE) # s2<- if(inherits(obj, "glm")) summary.glm(obj)$dispersion else summary.lm(obj)$sigma^2 # s2*V%*% invXtX %*% V #} vcov.segmented<-function(object, var.diff=FALSE, is=FALSE, ...){ if(is && inherits(object, "Arima")) { warning("is=TRUE ignored with Arima fits", call.=FALSE) is<-FALSE } if(is){ if(var.diff) warning("option 'var.diff=TRUE' ignored with 'is=TRUE' ", call.=FALSE) X<-model.matrix(object) #qr.X(object$qr) piu efficiente? nomiZ<- object$nameUV$Z nomiV<- object$nameUV$V nomiU<- object$nameUV$U for(i in 1:length(nomiV)){ nomeU<-nomiU[i] nomeV<-nomiV[i] nomepsi<-strsplit(nomeV,"\\.")[[1]][1] #solo "psi1" o "psi2",.. e' meglio estrarre il "psi1" perche' il nome della variabile puo' contenere un punto.. nomeZ<-gsub(paste(nomepsi,".",sep=""),"",nomeV) #estrae il nome della variabile.. Z<-X[,nomeZ] est.psi<- object$psi[nomeV,"Est."] se.psi<- object$psi[nomeV,"St.Err"] X[,nomeV]<- (-object$coefficients[nomeU])*pnorm((Z-est.psi)/se.psi) } s2<- if(inherits(object, "glm")) summary.glm(object)$dispersion else summary.lm(object)$sigma^2 w<-object$weights if(is.null(w)) w<-1 v<-s2*solve(crossprod(X*sqrt(w))) return(v) } else { if(inherits(object, "Arima")){ v<-object$var.coef return(v) } if(inherits(object, "glm")){ if(var.diff) warning("option 'var.diff=TRUE' ignored with 'glm' objects", call.=FALSE) so <- summary.glm(object, correlation = FALSE, ...) v<-so$dispersion * so$cov.unscaled return(v) } if(inherits(object, "lm")){ if(var.diff){ if(length(object$nameUV$Z)>1) { var.diff<-FALSE warning("var.diff set to FALSE with multiple segmented variables", call.=FALSE) } v<-summary.segmented(object, var.diff=TRUE, correlation = FALSE, ...)$cov.var.diff } else { so<-summary.segmented(object, var.diff=FALSE, correlation = FALSE, ...) v<-so$sigma^2 * so$cov.unscaled #object$cov.unscaled.is } return(v) } else { #in tutti gli altri casi.. if(class(object)[1]=="segmented") class(object)<-class(object)[-1] v<-vcov(object) #paste("vcov.",class(object),sep="") return(v) } } #end else is } #end fn segmented/R/slope.R0000644000176200001440000001453713573454507013722 0ustar liggesusers`slope` <- function(ogg, parm, conf.level=0.95, rev.sgn=FALSE, APC=FALSE, .vcov=NULL,..., digits = max(4, getOption("digits") - 2)){ #-- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") #riscostruisce il nome con il "." (che era stato scomposto da strsplit()) nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #-- # if(!"segmented"%in%class(ogg)) stop("A 'segmented' model is requested") #commentato il 28/05/19. vcov() fa gia' questo controllo.. # if(var.diff && length(ogg$nameUV$Z)>1) { # var.diff<-FALSE # warning("var.diff set to FALSE with multiple segmented variables", call.=FALSE) # } #se e' un "newsegmented" # if(!is.null(ogg$R.slope)) { # covv<-old.coef.var(ogg) # ogg$coefficients<- covv$b # covv<- covv$cov # ogg$psi<-old.psi(ogg) # ogg$nameUV<-old.nomi(ogg) # } else { covv<-try(vcov(ogg,...), silent=TRUE) # } covv <- if(is.null(.vcov)) vcov(ogg, ...) else .vcov if(!all(dim(covv)==c(length(coef(ogg)), length(coef(ogg))))) stop("Incorrect dimension of cov matrix", call. = FALSE) nomepsi<-rownames(ogg$psi) #OK nomeU<-ogg$nameUV$U nomeZ<-ogg$nameUV$Z if(missing(parm)) { nomeZ<- ogg$nameUV$Z if(length(rev.sgn)==1) rev.sgn<-rep(rev.sgn,length(nomeZ)) } else { if(! all(parm %in% ogg$nameUV$Z)) {stop("invalid parm")} else {nomeZ<-parm} } if(length(rev.sgn)!=length(nomeZ)) rev.sgn<-rep(rev.sgn, length.out=length(nomeZ)) nomi<-names(coef(ogg)) nomi<-nomi[-match(nomepsi,nomi)] #escludi i coef delle V index<-vector(mode = "list", length = length(nomeZ)) for(i in 1:length(nomeZ)) { #---> DA RIMUOVERE E SOSTITUIRE CON QUELLI DI SUBITO DOPO? # #id.cof.U<-grep(paste("\\.",nomeZ[i],"$",sep=""), nomi, value=FALSE) # #psii<-ogg$psi[grep(paste("\\.",nomeZ[i],"$",sep=""), rownames(ogg$psi), value=FALSE),2] # #id.cof.U<- match(grep(nomeZ[i], ogg$nameUV$U, value=TRUE), nomi) # #psii<-ogg$psi[grep(nomeZ[i], ogg$nameUV$V, value=TRUE),2] # #il paste con "$" (paste("\\.",nomeZ[i],"$",sep="")) e' utile perche' serve a distinguere variabili con nomi simili (ad es., "x" e "xx") # #Comunque nella versione dopo la 0.3-1.0 ho (FINALMENTE) risolto mettendo f.U # id.cof.U<- f.U(ogg$nameUV$U, nomeZ[i]) # #id.cof.U e' la posizione nel vettore ogg$nameUV$U; la seguente corregge per eventuali variabili che ci sono prima (ad es., interc) # id.cof.U<- id.cof.U + (match(ogg$nameUV$U[1], nomi)-1) # psii<- ogg$psi[f.U(ogg$nameUV$V, nomeZ[i]) , "Est."] # id.cof.U <- id.cof.U[order(psii)] #---> #questi funzionano anche con oggetti da segreg nomiPsi<-grep(paste(".", nomeZ[i], sep="") , ogg$nameUV$V, value=TRUE) psii<- ogg$psi[nomiPsi , "Est."] nomiU<-grep(paste(".", nomeZ[i], sep="") , ogg$nameUV$U, value=TRUE) #cof<-coef(ogg)[nomiU] id.cof.U<- match(nomiU, names(coef(ogg))) #prima era names(ogg$coefficients) index[[i]]<-c(match(nomeZ[i],nomi), id.cof.U) } Ris<-list() #digits <- max(3, getOption("digits") - 3) rev.sgn<-rep(rev.sgn, length.out=length(nomeZ)) # transf=c("x","1") # if( (length(transf)!=2) || !(length(transf)==1 && transf=="APC")) stop("'error in transf'") # if(transf=="APC") transf<-c("100*(exp(x)-1)", "100*exp(x)") # my.f<-function(x)eval(parse(text=transf[1])) # my.f.deriv<-function(x)eval(parse(text=transf[2])) #browser() for(i in 1:length(index)){ ind<-as.numeric(na.omit(unlist(index[[i]]))) M<-matrix(1,length(ind),length(ind)) M[row(M) y) else -pow * ((x - y) * (x > y))^(pow - 1) } if (is.null(control$fn.obj)) fn.obj <- "-as.numeric(logLik(x))" else fn.obj <- control$fn.obj if (missing(seg.Z)) { if (length(all.vars(formula(obj))) == 2) seg.Z <- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } n.Seg <- length(all.vars(seg.Z)) id.npsi <- FALSE if (missing(psi)) { if (n.Seg == 1) { if (missing(npsi)) npsi <- 1 npsi <- lapply(npsi, function(.x) .x) if (length(npsi) != length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi) <- all.vars(seg.Z) } else { if (missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call. = FALSE) if (length(npsi) != n.Seg) stop(" 'npsi' and seg.Z should have the same length") if (!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi <- lapply(npsi, function(.x) rep(NA, .x)) id.npsi <- TRUE } else { if (n.Seg == 1) { if (!is.list(psi)) { psi <- list(psi) names(psi) <- all.vars(seg.Z) } } else { if (!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if (n.Seg != length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if (!all(names(psi) %in% all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } min.step <- control$min.step alpha <- control$alpha it.max <- old.it.max <- control$it.max digits <- control$digits toll <- control$toll if (toll < 0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) visual <- control$visual stop.if.error <- fix.npsi <- control$fix.npsi n.boot <- control$n.boot size.boot <- control$size.boot gap <- control$gap random <- control$random pow <- control$pow conv.psi <- control$conv.psi visualBoot <- FALSE if (n.boot > 0) { if (!is.null(control$seed)) { set.seed(control$seed) employed.Random.seed <- control$seed } else { employed.Random.seed <- eval(parse(text = paste(sample(0:9, size = 6), collapse = ""))) set.seed(employed.Random.seed) } if (visual) { visual <- FALSE visualBoot <- TRUE } if (!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") } last <- control$last K <- control$K h <- control$h orig.call <- Call <- mf <- obj$call orig.call$formula <- mf$formula <- formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") if (class(mf$formula) == "name" && !"~" %in% paste(mf$formula)) mf$formula <- eval(mf$formula) #mf$formula <- update.formula(mf$formula, paste(seg.Z, collapse = ".+")) mf$formula <- update.formula1(mf$formula, paste(seg.Z, collapse = ".+"), opt=2) mfExt <- mf if (!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset) || !is.null(obj$call$id)) { mfExt$formula <- update.formula(mf$formula, paste(".~.+", paste(c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset), all.vars(obj$call$id)), collapse = "+"))) } mf <- eval(mf, parent.frame()) n <- nrow(mf) nomiOff <- setdiff(all.vars(formula(obj)), names(mf)) if (length(nomiOff) >= 1) mfExt$formula <- update.formula(mfExt$formula, paste(".~.+", paste(nomiOff, collapse = "+"), sep = "")) nomiTUTTI <- all.vars(mfExt$formula) nomiNO <- NULL for (i in nomiTUTTI) { r <- try(eval(parse(text = i), parent.frame()), silent = TRUE) if (class(r) != "try-error" && length(r) == 1 && !is.function(r)) nomiNO[[length(nomiNO) + 1]] <- i } if (!is.null(nomiNO)) mfExt$formula <- update.formula(mfExt$formula, paste(".~.-", paste(nomiNO, collapse = "-"), sep = "")) mfExt <- eval(mfExt, parent.frame()) if (inherits(obj, "coxph")) { is.Surv <- NA rm(is.Surv) for (i in 1:ncol(mfExt)) { if (is.Surv(mfExt[, i])) aa <- mfExt[, i][, 1:ncol(mfExt[, i])] } mfExt <- cbind(aa, mfExt) } id.seg <- match(all.vars(seg.Z), names(mfExt)) name.Z <- names(mfExt)[id.seg] Z <- mfExt[, id.seg, drop = FALSE] n.psi <- length(unlist(psi)) if (ncol(Z) == 1 && is.vector(psi) && (is.numeric(psi) || is.na(psi))) { psi <- list(as.numeric(psi)) names(psi) <- name.Z } if (!is.list(psi) || is.null(names(psi))) stop("psi should be a *named* list") id.nomiZpsi <- match(colnames(Z), names(psi)) if ((ncol(Z) != length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of Z and psi do not match") nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] if (id.npsi) { for (i in 1:length(psi)) { K <- length(psi[[i]]) if (any(is.na(psi[[i]]))) psi[[i]] <- if (control$quant) { quantile(Z[, i], prob = seq(0, 1, l = K + 2)[-c(1, K + 2)], names = FALSE) } else { (min(Z[, i]) + diff(range(Z[, i])) * (1:K)/(K + 1)) } } } else { for (i in 1:length(psi)) { if (any(is.na(psi[[i]]))) psi[[i]] <- if (control$quant) { quantile(Z[, i], prob = seq(0, 1, l = K + 2)[-c(1, K + 2)], names = FALSE) } else { (min(Z[, i]) + diff(range(Z[, i])) * (1:K)/(K + 1)) } } } initial.psi <- psi a <- sapply(psi, length) id.psi.group <- rep(1:length(a), times = a) Z <- matrix(unlist(mapply(function(x, y) rep(x, y), Z, a, SIMPLIFY = TRUE)), nrow = n) colnames(Z) <- nomiZ.vett <- rep(nome, times = a) psi <- unlist(psi) psi <- unlist(tapply(psi, id.psi.group, sort)) k <- ncol(Z) PSI <- matrix(rep(psi, rep(n, k)), ncol = k) c1 <- apply((Z <= PSI), 2, all) c2 <- apply((Z >= PSI), 2, all) if (sum(c1 + c2) != 0 || is.na(sum(c1 + c2))) stop("starting psi out of the admissible range") ripetizioni <- as.vector(unlist(tapply(id.psi.group, id.psi.group, function(x) 1:length(x)))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ.vett, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ.vett, sep = ".") nnomi <- c(nomiU, nomiV) U <- (Z - PSI) * (Z > PSI) if (pow[1] != 1) U <- U^pow[1] colnames(U) <- nomiU V <- -(Z > PSI) for (i in 1:k) { mfExt[nomiU[i]] <- U[, i] mfExt[nomiV[i]] <- V[, i] } Fo <- update.formula1(formula(obj), as.formula(paste(".~.+", paste(nnomi, collapse = "+"))), opt=1) Fo.noV <- update.formula1(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+"))), opt=1) call.ok <- update(obj, Fo, evaluate = FALSE, data = mfExt) call.noV <- update(obj, Fo.noV, evaluate = FALSE, data = mfExt) if (it.max == 0) { if (!is.null(call.noV[["subset"]])) call.noV[["subset"]] <- NULL obj1 <- eval(call.noV, envir = mfExt) return(obj1) } initial <- psi obj0 <- obj dev0 <- eval(parse(text = fn.obj), list(x = obj)) if (length(dev0) <= 0) stop("error in the objective to be minimized, see 'fn.obj' in ?seg.control") if (length(dev0) > 1) stop("the objective to be minimized is not scalar, see 'fn.obj' in ?seg.control") if (is.na(dev0)) dev0 <- 10 list.obj <- list(obj) nomiOK <- nomiU opz <- list(toll = toll, h = h, stop.if.error = stop.if.error, dev0 = dev0, visual = visual, it.max = it.max, nomiOK = nomiOK, id.psi.group = id.psi.group, gap = gap, visualBoot = visualBoot, pow = pow, digits = digits, conv.psi = conv.psi, alpha = alpha, fix.npsi = fix.npsi, min.step = min.step) opz$call.ok <- call.ok opz$call.noV <- call.noV opz$formula.orig <- formula(obj) opz$nomiU <- nomiU opz$nomiV <- nomiV opz$fn.obj <- fn.obj opz <- c(opz, ...) if (n.boot <= 0) { obj <- seg.def.fit(obj, Z, PSI, mfExt, opz) } else { obj <- seg.def.fit.boot(obj, Z, PSI, mfExt, opz, n.boot = n.boot, size.boot = size.boot, random = random) } if (!is.list(obj)) { warning("No breakpoint estimated", call. = FALSE) return(obj0) } if (!is.null(obj$obj$df.residual) && !is.na(obj$obj$df.residual)) { if (obj$obj$df.residual == 0) warning("no residual degrees of freedom (other warnings expected)", call. = FALSE) } id.psi.group <- obj$id.psi.group nomiU <- nomiOK <- obj$nomiOK nomiVxb <- sub("U", "psi", nomiOK) nomiFINALI <- unique(sub("U[1-9]*[0-9].", "", nomiOK)) nomiSenzaPSI <- setdiff(name.Z, nomiFINALI) if (length(nomiSenzaPSI) >= 1) warning("no breakpoints found for: ", paste(nomiSenzaPSI, " "), call. = FALSE) it <- obj$it psi <- obj$psi psi.values <- if (n.boot <= 0) obj$psi.values else obj$boot.restart U <- obj$U V <- obj$V id.warn <- obj$id.warn for (jj in colnames(V)) { VV <- V[, which(colnames(V) == jj), drop = FALSE] sumV <- abs(rowSums(VV)) if (any(table(sumV) <= 1) && stop.if.error) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") } rangeZ <- obj$rangeZ mfExt <- obj$mfExt names(mfExt)[match(obj$nomiV, names(mfExt))] <- nomiVxb R <- obj$R R.noV <- obj$R.noV r <- obj$r obj <- obj$obj k <- length(psi) #browser() #coef(obj) ha gia i nomi corretti... #all.coef <- coef(obj) #names(all.coef) <- c(names(coef(obj0)), nomiU, nomiVxb) #beta.c <- all.coef[nomiU] beta.c<-coef(obj)[nomiU] Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) nnomi <- c(nomiU, nomiVxb) for (i in 1:ncol(U)) { mfExt[nomiU[i]] <- mf[nomiU[i]] <- U[, i] mfExt[nomiVxb[i]] <- mf[nomiVxb[i]] <- Vxb[, i] } Fo <- update.formula1(formula(obj0), as.formula(paste(".~.+", paste(nnomi, collapse = "+"))), opt=1) objF <- update(obj0, Fo, evaluate = FALSE, data = mfExt) if (!is.null(objF[["subset"]])) objF[["subset"]] <- NULL if (is.null(opz$constr)) opz$constr <- 0 if ((opz$constr %in% 1:2) && class(obj0) == "rq") { objF$method <- "fnc" objF$R <- quote(R) objF$r <- quote(r) } objF <- eval(objF, envir = mfExt) objF$offset <- obj0$offset isNAcoef <- any(is.na(coef(objF))) if (isNAcoef) { if (stop.if.error) { cat("breakpoint estimate(s):", as.vector(psi), "\n") stop("at least one coef is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE) } else { warning("some estimate is NA: too many breakpoints? 'var(hat.psi)' cannot be computed \n ..returning a 'lm' model", call. = FALSE) Fo <- update.formula1(formula(obj0), as.formula(paste(".~.+", paste(nomiU, collapse = "+"))), opt=1) objF <- if ((opz$constr %in% 1:2) && class(obj0) == "rq") { update(obj0, formula = Fo, R = R.noV, r = r, method = "fnc", evaluate = TRUE, data = mfExt) } else { update(obj0, Fo, evaluate = TRUE, data = mfExt) } names(psi) <- nomiVxb objF$psi <- psi return(objF) } } #4/12/19: modifica fatta per consentire betareg.. Attenzione #semplicemente controlla se la componente "coef*" e' una lista o no.. #COSA succede con geese models? if(!is.list(objF[[grep("coef", names(objF), value = TRUE)]])){ names.coef <- names(coef(objF)) names(obj[[grep("coef", names(obj), value = TRUE)]]) <- names(objF[[grep("coef", names(objF), value = TRUE)]]) objF[[grep("coef", names(objF), value = TRUE)]][names.coef] <- coef(obj)[names.coef] } else { #names.coef <- names(objF[[grep("coef", names(objF), value = TRUE)]][[1]]) names(obj[[grep("coef", names(obj), value = TRUE)]][[1]]) <- names(objF[[grep("coef", names(objF), value = TRUE)]][[1]]) objF[[grep("coef", names(objF), value = TRUE)]][[1]] <- obj[[grep("coef", names(obj), value = TRUE)]][[1]] objF[[grep("coef", names(objF), value = TRUE)]][[2]] <- obj[[grep("coef", names(obj), value = TRUE)]][[2]] } if (!is.null(objF$pseudo.r.squared)) objF$pseudo.r.squared <- obj$pseudo.r.squared if (!is.null(objF$geese$beta)) objF$geese$beta <- obj$coefficients #oppure objF$coefficients? if (!is.null(objF$geese$gamma)) objF$geese$gamma <- obj$geese$gamma if (!is.null(objF$geese$alpha)) objF$geese$alpha <- obj$geese$alpha if (!is.null(objF$fitted.values)) objF$fitted.values <- obj$fitted.values if (!is.null(objF$residuals)) objF$residuals <- obj$residuals if (!is.null(objF$linear.predictors)) objF$linear.predictors <- obj$linear.predictors if (!is.null(objF$deviance)) objF$deviance <- obj$deviance if (!is.null(objF$weights)) objF$weights <- obj$weights if (!is.null(objF$aic)) objF$aic <- obj$aic + 2 * k if (!is.null(objF$loglik)) objF$loglik <- obj$loglik if (!is.null(objF$rho)) objF$rho <- obj$rho if (!is.null(objF$dual)) objF$dual <- obj$dual if (!is.null(objF$penalized.deviance)) objF$penalized.deviance <- obj$penalized.deviance if (!is.null(objF$ModifiedScores)) objF$ModifiedScores <- c(obj$ModifiedScores, rep(0, k)) Cov <- try(vcov(objF), silent = TRUE) if(inherits(Cov, "try-error")){ #if (class(Cov) == "try-error") { warning("cannot compute the covariance matrix", call. = FALSE) vv <- NA } else { vv <- Cov[nomiVxb, nomiVxb, drop=FALSE] } ris.psi <- matrix(NA, length(psi), 3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[, 2] <- psi ris.psi[, 3] <- sqrt(diag(vv)) a <- tapply(id.psi.group, id.psi.group, length) a.ok <- NULL for (j in name.Z) { if (j %in% nomiFINALI) { a.ok[length(a.ok) + 1] <- a[1] a <- a[-1] } else { a.ok[length(a.ok) + 1] <- 0 } } initial <- unlist(mapply(function(x, y) { if (is.na(x)[1]) rep(x, y) else x }, initial.psi[nomiFINALI], a.ok[a.ok != 0], SIMPLIFY = TRUE)) if (opz$stop.if.error) ris.psi[, 1] <- initial objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- it objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) objF$id.group <- if (length(name.Z) <= 1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn objF$orig.call <- orig.call if (model) objF$model <- mf if (n.boot > 0) objF$seed <- employed.Random.seed if (keep.class) class(objF) <- c("segmented", class(obj0)) list.obj[[length(list.obj) + 1]] <- objF class(list.obj) <- "segmented" if (last) list.obj <- list.obj[[length(list.obj)]] warning("The returned fit is ok, but not of class 'segmented'. If interested, call explicitly the segmented methods (plot.segmented, confint.segmented,..)", call. = FALSE) return(list.obj) } segmented/R/seg.control.R0000644000176200001440000000132413501716276015016 0ustar liggesusers`seg.control` <- function(n.boot=10, display=FALSE, tol=1e-5, it.max=30, fix.npsi=TRUE, K=10, quant=TRUE, maxit.glm=25, h=1, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=TRUE, seed=NULL, fn.obj=NULL, digits=NULL, conv.psi=FALSE, alpha=.02, min.step=.0001, powers=c(1,1), last=TRUE, stop.if.error=NULL, gap=FALSE){ list(toll=tol,it.max=it.max,visual=display,stop.if.error=stop.if.error, K=K,last=last,maxit.glm=maxit.glm,h=h,n.boot=n.boot, size.boot=size.boot, gap=gap, jt=jt, nonParam=nonParam, random=random, pow=powers, seed=seed, quant=quant, fn.obj=fn.obj, digits=digits, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step)} segmented/R/seg.Ar.fit.r0000644000176200001440000003110713501711504014510 0ustar liggesusersseg.Ar.fit<-function(obj, XREG, Z, PSI, opz, return.all.sol=FALSE){ #----------------- useExp.k=TRUE #----------------- est.k<-function(x1,y1,L0){ ax<-log(x1) .x<-cbind(1,ax,ax^2) b<-drop(solve(crossprod(.x),crossprod(.x,y1))) const<-b[1]-L0 DD<-sqrt(b[2]^2-4*const*b[3]) kk<-exp((-b[2]+ DD) /(2*b[3])) return(round(kk)) # ff<-function(xx) b[1]+b[2]*xx + b[3]*xx^2+ L0 # a<-uniroot(ff, c(log(x[4]), 3.4)) } #----------------- dpmax<-function(x,y,pow=1){ #deriv pmax if(pow==1) -(x>y) #ifelse(x>y, -1, 0) else -pow*((x-y)*(x>y))^(pow-1)#-pow*pmax(x-y,0)^(pow-1) } #----------- in.psi<-function(LIM, PSI, ret.id=TRUE){ #check if psi is inside the range a<-PSI[1,]<=LIM[1,] b<-PSI[1,]>=LIM[2,] is.ok<- !a & !b #TRUE se psi e' OK if(ret.id) return(is.ok) isOK<- all(is.ok) && all(!is.na(is.ok)) isOK} #------------ far.psi<-function(Z, PSI, id.psi.group, ret.id=TRUE) { #id.far.ok<-sapply(unique(id.psi.group), function(.x) (table(rowSums(((Z>PSI)[,id.psi.group==.x,drop=FALSE])))>=2)[-1]) #[-1] esclude lo zero, xPSI)[,id.psi.group==.x,drop=FALSE]))+1)>=2)[-1]) #[-1] esclude lo zero, xPSI)) #pmax((Z - PSI), 0)^pow[1] colnames(U)<-nomiU if(pow[1]!=1) U<-U^pow[1] obj0 <- suppressWarnings(try(eval(call.noV.noinit), silent=TRUE)) if(class(obj0)[1]=="try-error") stop("The first fit with U variables does not work..", call.=FALSE)#obj0 <- suppressWarnings(eval(call.noV.noinit)) ##a volte con i valori iniziali arima() )non converge!! Quindi provo senza init L0<- -obj0$loglik n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- opz$dev0 #del modello iniziale (senza psi) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi.values[[length(psi.values) + 1]] <- psi #psi iniziali #============================================== if (visual) { cat(paste("iter = ", sprintf("%2.0f",0), " llik = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), -L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " k = ", sprintf("%2.0f", NA), " n.psi = ",formatC(length(unlist(psi)),digits=0,format="f"), " ini.psi = ",paste(formatC(unlist(psi),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #============================================== id.warn <- FALSE while (abs(epsilon) > toll) { it<-it+1 n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if(n.psi1!=n.psi0){ U <- ((Z-PSI)*(Z>PSI)) #pmax((Z - PSI), 0)^pow[1] if(pow[1]!=1) U<-U^pow[1] obj0 <- suppressWarnings(try(eval(call.noV), silent=TRUE)) if(class(obj0)[1]=="try-error") obj0 <- suppressWarnings(eval(call.noV.noinit)) ##a volte con i valori iniziali arima() )non converge!! Quindi provo senza init L0<- -obj0$loglik } V <- dpmax(Z,PSI,pow=pow[2])# ifelse((Z > PSI), -1, 0) X <- cbind(XREG, U, V) rownames(X) <- NULL colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U", 1:ncol(U), sep = ""), paste("V", 1:ncol(V), sep = "")) obj <- suppressWarnings(try(eval(call.ok), silent=TRUE)) if(class(obj)[1]=="try-error") obj <- suppressWarnings(eval(call.ok.noinit)) ##a volte con i valori iniziali arima() )non converge!! Quindi provo senza init beta.c <- coef(obj)[paste("U", 1:ncol(U), sep = "")] gamma.c <- coef(obj)[paste("V", 1:ncol(V), sep = "")] if(any(is.na(c(beta.c, gamma.c)))){ if(fix.npsi) { if(return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } else { id.coef.ok<-!is.na(gamma.c) psi<-psi[id.coef.ok] if(length(psi)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } gamma.c<-gamma.c[id.coef.ok] beta.c<-beta.c[id.coef.ok] Z<-Z[, id.coef.ok, drop=FALSE] rangeZ <- rangeZ[,id.coef.ok, drop=FALSE] limZ <- limZ[,id.coef.ok, drop=FALSE] nomiOK<-nomiOK[id.coef.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.coef.ok] names(psi)<-id.psi.group } } psi.old<-psi psi <- psi.old + gamma.c/beta.c if(!is.null(digits)) psi<-round(psi, digits) PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) #--modello con il nuovo psi U<-(Z-PSI)*(Z>PSI) #in seg.(g)lm.fit l'ho chiamata U.. if(pow[1]!=1) U<-U^pow[1] #call.noV$init<-quote(coef(...........)) obj1 <- suppressWarnings(try(eval(call.noV), silent=TRUE)) if(class(obj1)[1]=="try-error") obj1 <- suppressWarnings(eval(call.noV.noinit)) L1<- if(class(obj1)[1]=="try-error") L0+10 else -obj1$loglik use.k<-k<-1 L1.k<-NULL L1.k[length(L1.k)+1]<-L1 while(L1>L0){ k<-k+1 use.k <- if(useExp.k) 2^(k-1) else k # if(k>=4){ # xx<-1:k # use.k<-est.k(xx, -L1.k[1:k],-L0) # } psi <- psi.old + (gamma.c/beta.c)/(use.k*h) #psi <- psi.old[id.psi.ok] + (gamma.c[id.psi.ok]/beta.c[id.psi.ok])/(use.k*h) if(!is.null(digits)) psi<-round(psi, digits) PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) #qui o si aggiusta psi per farlo rientrare nei limiti, o si elimina, oppure se obj1 sotto non funziona semplicemente continua.. U<-(Z-PSI)*(Z>PSI) if(pow[1]!=1) U<-U^pow[1] obj1 <- suppressWarnings(try(eval(call.noV), silent=TRUE)) L1<- if(class(obj1)[1]=="try-error") L0+10 else -obj1$loglik L1.k[length(L1.k)+1]<-L1 if(1/(use.k*h)= it.max) { id.warn <- TRUE break } #Mi sa che non servono i controlli.. soprattutto se non ha fatto step-halving #check if i psi ottenuti sono nel range o abbastanza lontani id.psi.far <-far.psi(Z, PSI, id.psi.group, TRUE) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if(!all(id.psi.ok)){ if(fix.npsi){ psi<-adj.psi(psi, limZ) #within range!!! id.psi.far<-far.psi(Z, PSI, id.psi.group, TRUE) psi<-psi*ifelse(id.psi.far,1,.9) PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) } else { Z<-Z[, id.psi.ok, drop=FALSE] PSI<-PSI[, id.psi.ok, drop=FALSE] rangeZ <- rangeZ[,id.psi.ok,drop=FALSE] limZ <- limZ[,id.psi.ok,drop=FALSE] nomiOK<-nomiOK[id.psi.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.psi.ok] psi.old<- psi.old[id.psi.ok] psi<- psi[id.psi.ok] names(psi)<-id.psi.group if(ncol(PSI)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } } } } #end while_it ##============================================================================= if(id.warn) warning(paste("max number of iterations (", it,") attained",sep=""), call. = FALSE) attr( psi.values, "dev") <- dev.values attr( psi.values, "k")<- k.values #ordina i breakpoints.. psi<-unlist(tapply(psi, id.psi.group, sort)) names(psi)<-id.psi.group names.coef<-names(coef(obj)) #names(obj$coefficients) #obj e' quello vecchio che include U1,.. V1,... PSI.old<-PSI PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) #U e V possono essere cambiati (rimozione/ordinamento psi.. ) per cui si deve ricalcolare il tutto, altrimenti sarebbe uguale a U1 e obj1 if(sd(PSI-PSI.old)>0){ U <- (Z-PSI)*(Z>PSI) colnames(U)<-paste("U", 1:ncol(U), sep = "") V <- -(Z>PSI) colnames(V)<-paste("V", 1:ncol(V), sep = "") #X <- cbind(XREG, U, V) #rownames(X) <- NULL obj <- suppressWarnings(try(eval(call.noV), silent=TRUE)) L1<- -obj$loglik } else { obj<-obj1 } obj$coef<-c(obj$coef, rep(0,ncol(V))) names(obj$coef)<-names.coef obj$epsilon <- epsilon obj$it <- it obj<-list(obj=obj,it=it,psi=psi, psi.values=psi.values, U=U,V=V,rangeZ=rangeZ, epsilon=epsilon,nomiOK=nomiOK, SumSquares.no.gap=L1, id.psi.group=id.psi.group,id.warn=id.warn) #inserire id.psi.ok? return(obj) } # if(return.all.sol) { # obj.noV <- suppressWarnings(eval(call.noV)) #, envir=mfExt # #mio.init.noV<-obj.noV$coef # #mio.init.noV<- c(0,obj.noV$coef[-1]) # dev.new1 <- -obj.noV$loglik # #dev.new1 <- sum(mylm(x = cbind(XREG, U), y = y, w = w, offs = offs)$residuals^2) # } # # beta.c<-coef(obj)[nomiU] # gamma.c<-coef(obj)[nomiV] segmented/R/seg.def.fit.boot.r0000644000176200001440000001341713476173376015676 0ustar liggesusersseg.def.fit.boot<-function(obj, Z, PSI, mfExt, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #------------- visualBoot<-opz$visualBoot opz.boot<-opz opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <-1 n<-nrow(mfExt) o0<-try(seg.def.fit(obj, Z, PSI, mfExt, opz), silent=TRUE) rangeZ <- apply(Z, 2, range) #serve sempre if(!is.list(o0)) { o0<- seg.def.fit(obj, Z, PSI, mfExt, opz, return.all.sol=TRUE) o0<-extract.psi(o0) if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0<-try(seg.def.fit(obj, Z, PSI1, mfExt, opz1), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n # na<- ,,apply(...,2,function(x)mean(is.na(x))) Z.orig<-Z # if(visualBoot) cat(0, " ", formatC(opz$dev0, 3, format = "f"),"", "(No breakpoint(s))", "\n") count.random<-0 for(k in seq(n.boot)){ PSI <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(seg.def.fit(obj, Z[id,,drop=FALSE], PSI[id,,drop=FALSE], mfExt[id,,drop=FALSE], opz.boot), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) ##----> o.boot<-try(seg.lm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz.boot), silent=TRUE) #in realta' la risposta dovrebbe essere "yy" da cambiare in mfExt o.boot<- try(seg.def.fit(obj, Z.orig, PSI, mfExt, opz.boot), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) } PSI <- matrix(rep(est.psi.boot, rep(nrow(Z), length(est.psi.boot))), ncol = length(est.psi.boot)) opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o <- try(seg.def.fit(obj, Z.orig, PSI, mfExt, opz, return.all.sol=TRUE), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o <- try(seg.def.fit(obj, Z, PSI1, mfExt, opz1), silent=TRUE) count.random<-count.random+1 } if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if (visualBoot) { flush.console() cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.min.f = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0 <- try(seg.def.fit(obj, Z, PSI1, mfExt, opz1), silent=TRUE) } if(!is.list(o0)) return(0) o0$boot.restart<-ris return(o0) }segmented/R/plot.segmented.R0000644000176200001440000003112113501664736015512 0ustar liggesusersplot.segmented<-function (x, term, add = FALSE, res = FALSE, conf.level = 0, interc=TRUE, link = TRUE, res.col = 1, rev.sgn = FALSE, const = 0, shade=FALSE, rug=!add, dens.rug=FALSE, dens.col = grey(0.8), transf=I, isV=FALSE, is=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL,...){ #funzione plot.segmented che consente di disegnare anche i pointwise CI f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #-------------- enl.range<-function(..., enlarge=TRUE){ #modifica il min dei valori in ... r<-range(...) if(enlarge) r[1]<-if(sign(r[1])>0) r[1]*.9 else r[1]*1.1 r } #-------------- #se l'oggetto e' segmented.Arima il nome dell'eventuale interc va sostituito.. # if((all(class(x)==c("segmented", "Arima")))) names(x$coef)<-gsub("intercept", "(Intercept)", names(coef(x))) if(all(c("segmented", "Arima") %in% class(x))) names(x$coef)<-gsub("intercept", "(Intercept)", names(coef(x))) #-------------- linkinv <- !link if (inherits(x, what = "glm", which = FALSE) && linkinv && !is.null(x$offset) && res) stop("residuals with offset on the response scale?") if(conf.level< 0 || conf.level>.9999) stop("meaningless 'conf.level'") if ((inherits(x, what = "glm", which = FALSE) && linkinv) || res) { if(!(identical(transf, I) || identical(transf, "I"))) {transf<-I; warning("'transf' set to I with 'res=TRUE' and/or 'link=FALSE'.")} } # show.gap<-FALSE if (missing(term)) { if (length(x$nameUV$Z) > 1) { stop("please, specify `term'") } else { term <- x$nameUV$Z } } else { dterm<- deparse(substitute(term)) if(dterm %in% x$nameUV$Z) term<-dterm if (! isTRUE(term %in% x$nameUV$Z)) stop("invalid `term'") } opz <- list(...) col.shade<-if(!is.null(opz$col.shade)) adjustcolor(opz$col.shade, .15) else "gray" cols<- if("col"%in% names(opz)) opz$col else 1 lwds<- if("lwd"%in% names(opz)) opz$lwd else 1 ltys<- if("lty"%in% names(opz)) opz$lty else 1 cexs<- if("cex"%in% names(opz)) opz$cex else 1 pchs<- if("pch"%in% names(opz)) opz$pch else 1 ylabs<- if("ylab"%in% names(opz)) opz$ylab else paste("Effect of ", term, sep = " ") xlabs<- if("xlab"%in% names(opz)) opz$xlab else term # #a <- intercept(x, term, gap = show.gap)[[1]][, "Est."] a <- intercept(x, term, digits=20)[[1]][, "Est."] #Poiche' intercept() restituisce quantita' che includono sempre l'intercetta del modello, questa va eliminata se interc=FALSE if(!interc && ("(Intercept)" %in% names(coef(x)))) a<- a-coef(x)["(Intercept)"] b <- slope(x, term, digits=20)[[1]][, "Est."] #id <- grep(paste("\\.", term, "$", sep = ""), rownames(x$psi), value = FALSE) #confondeva "psi1.x","psi1.neg.x" id <- f.U(rownames(x$psi), term) est.psi <- x$psi[id, "Est."] K <- length(est.psi) val <- sort(c(est.psi, x$rangeZ[, term])) #---------aggiunta per gli IC rangeCI<-NULL n<-length(x$residuals) #fitted.values - Arima non ha "fitted.values", ma ha "residuals".. tipo<- if(inherits(x, what = "glm", which = FALSE) && link) "link" else "response" vall<-sort(c(seq(min(val), max(val), l=150), est.psi)) #ciValues<-predict.segmented(x, newdata=vall, se.fit=TRUE, type=tipo, level=conf.level) vall.list<-list(vall) names(vall.list)<-term if(conf.level>0) { #k.alpha<-if(inherits(x, what = c("glm","Arima"), which = FALSE)) abs(qnorm((1-conf.level)/2)) else abs(qt((1-conf.level)/2, x$df.residual)) #cambiato nella 0.5-2.0: k.alpha<- if(all(c("segmented","lm") %in% class(x))) abs(qt((1-conf.level)/2, x$df.residual)) else abs(qnorm((1-conf.level)/2)) ciValues<-broken.line(x, vall.list, link=link, interc=interc, se.fit=TRUE, isV=isV, is=is, var.diff=var.diff, p.df=p.df, .vcov=.vcov) ciValues<-cbind(ciValues$fit, ciValues$fit- k.alpha*ciValues$se.fit, ciValues$fit + k.alpha*ciValues$se.fit) #---> transf... ciValues<-apply(ciValues, 2, transf) rangeCI<-range(ciValues) #ciValues e' una matrice di length(val)x3. Le 3 colonne: stime, inf, sup #polygon(c(vall, rev(vall)), c(ciValues[,2],rev(ciValues[,3])), col = "gray", border=NA) } #--------- a.ok <- c(a[1], a) b.ok <- c(b[1], b) y.val <- a.ok + b.ok * val + const a.ok1 <- c(a, a[length(a)]) b.ok1 <- c(b, b[length(b)]) y.val <- y.val1 <- a.ok1 + b.ok1 * val + const s <- 1:(length(val) - 1) # xvalues <- if(all(class(x)==c("segmented", "Arima"))) x$Z[,1] else x$model[, term] xvalues <- if(all(c("segmented", "Arima") %in% class(x))) x$Z[,1] else x$model[, term] if (rev.sgn) { val <- -val xvalues <- -xvalues } m <- cbind(val[s], y.val1[s], val[s + 1], y.val[s + 1]) #values where to compute predictions (useful only if res=TRUE) if(res){ new.d<-data.frame(ifelse(rep(rev.sgn, length(xvalues)),-xvalues, xvalues)) names(new.d)<-term fit0 <- broken.line(x, new.d, link = link, interc=interc, se.fit=FALSE)$fit } #------------------------------------------------------------------------------- if (inherits(x, what = "glm", which = FALSE) && linkinv) { #se GLM con link=FALSE (ovvero linkinv=TRUE) fit <- if (res) #predict.segmented(x, ifelse(rep(rev.sgn, length(xvalues)),-xvalues,xvalues), type=tipo) + resid(x, "response") + const #broken.line(x, term, gap = show.gap, link = link) + resid(x, "response") + const fit0 + resid(x, "response") + const else x$family$linkinv(c(y.val, y.val1)) # xout <- sort(c(seq(val[1], val[length(val)], l = 150), val[-c(1, length(val))])) xout <- sort(c(seq(val[1], val[length(val)], l = 150), val[-c(1, length(val))],val[-c(1, length(val))]*1.0001)) l <- suppressWarnings(approx(as.vector(m[, c(1, 3)]), as.vector(m[, c(2, 4)]), xout = xout)) val[length(val)]<-max(l$x) #aggiunto 11/09/17 id.group <- cut(l$x, val, FALSE, TRUE) yhat <- l$y xhat <- l$x m[, c(2, 4)] <- x$family$linkinv(m[, c(2, 4)]) if (!add) { plot(as.vector(m[, c(1, 3)]), as.vector(m[, c(2, 4)]), type = "n", xlab = xlabs, ylab = ylabs, main = opz$main, sub = opz$sub, cex.axis = opz$cex.axis, cex.lab = opz$cex.lab, xlim = opz$xlim, ylim = if(is.null(opz$ylim)) enl.range(fit, rangeCI, enlarge=dens.rug) else opz$ylim ) if(dens.rug){ density <- density( xvalues ) # the height of the densityity curve max.density <- max(density$y) # Get the boundaries of the plot to # put the density polygon at the x-line plot_coordinates <- par("usr") # get the "length" and range of the y-axis y.scale <- plot_coordinates[4] - plot_coordinates[3] # transform the y-coordinates of the density # to the lower 10% of the plotting panel density$y <- (0.1 * y.scale / max.density) * density$y + plot_coordinates[3] ## plot the polygon polygon( density$x , density$y , border = F , col = dens.col) box() } if(rug) { segments(xvalues, rep(par()$usr[3],length(xvalues)), xvalues, rep(par()$usr[3],length(xvalues))+ abs(diff(par()$usr[3:4]))/40)} } if(conf.level>0){ if(rev.sgn) vall<- -vall if(shade) { polygon(c(vall, rev(vall)), c(ciValues[,2],rev(ciValues[,3])), col = col.shade, border=NA) } else { matlines(vall, ciValues[,-1], type="l", lty=2, col=cols)} } if (res) points(xvalues, fit, cex = cexs, pch = pchs, col = res.col) yhat <- x$family$linkinv(yhat) if (length(cols) == 1) cols <- rep(cols, max(id.group)) if (length(lwds) == 1) lwds <- rep(lwds, max(id.group)) if (length(ltys) == 1) ltys <- rep(ltys, max(id.group)) for (i in 1:max(id.group)) { lines(xhat[id.group == i], yhat[id.group == i], col = cols[i], lwd = lwds[i], lty = ltys[i]) } #------------------------------------------------------------------------------- } else { #se LM o "GLM con link=TRUE (ovvero linkinv=FALSE)" ##---> transf!!! y.val<- do.call(transf, list(y.val)) y.val1<-do.call(transf, list(y.val1)) r <- cbind(val, y.val) r1 <- cbind(val, y.val1) rr <- rbind(r, r1) fit <- c(y.val, y.val1) if (res) { ress <- if (inherits(x, what = "glm", which = FALSE)) residuals(x, "working") #* sqrt(x$weights) mgcv::gam() usa " ..*sqrt(x$weights)/mean(sqrt(x$weights))" else resid(x) #if(!is.null(x$offset)) ress<- ress - x$offset #fit <- broken.line(x, term, gap = show.gap, link = link, interc = TRUE) + ress + const #fit <- predict.segmented(x, ifelse(rep(rev.sgn, length(xvalues)),-xvalues,xvalues), type=tipo) + ress + const fit <- fit0 + ress + const } if (!add) plot(rr, type = "n", xlab = xlabs, ylab = ylabs, main = opz$main, sub = opz$sub, xlim = opz$xlim, cex.axis = opz$cex.axis, cex.lab = opz$cex.lab, #ylim = if(is.null(opz$ylim)) enl.range(fit, rangeCI, enlarge=dens.rug) else opz$ylim) ylim = if(is.null(opz$ylim)) enl.range(fit, rangeCI, do.call(transf, list(m[, c(2,4)])), enlarge=dens.rug) else opz$ylim) if(dens.rug){ density <- density( xvalues ) # the height of the densityity curve max.density <- max(density$y) # Get the boundaries of the plot to # put the density polygon at the x-line plot_coordinates <- par("usr") # get the "length" and range of the y-axis y.scale <- plot_coordinates[4] - plot_coordinates[3] # transform the y-coordinates of the density # to the lower 10% of the plotting panel density$y <- (0.1 * y.scale / max.density) * density$y + plot_coordinates[3] ## plot the polygon polygon( density$x , density$y , border = F , col = dens.col) box() } if(rug) {segments(xvalues, rep(par()$usr[3],length(xvalues)), xvalues, rep(par()$usr[3],length(xvalues))+ abs(diff(par()$usr[3:4]))/40)} if(conf.level>0) { if(rev.sgn) vall<- -vall if(shade) polygon(c(vall, rev(vall)), c(ciValues[,2],rev(ciValues[,3])), col = col.shade, border=NA) else matlines(vall, ciValues[,-1], type="l", lty=2, col=cols) } if (res) points(xvalues, fit, cex = cexs, pch = pchs, col = res.col) #aggiunto 06/2019 perche' sotto disegnava linee (e non curve) # segments(m[, 1], do.call(transf, list(m[, 2])), m[, 3], do.call(transf, list(m[, 4])), # col = cols, lwd = lwds, lty = ltys) xout <- sort(c(seq(val[1], val[length(val)], l = 150), val[-c(1, length(val))],val[-c(1, length(val))]*1.0001)) l <- suppressWarnings(approx(as.vector(m[, c(1, 3)]), as.vector(m[, c(2, 4)]), xout = xout)) val[length(val)]<-max(l$x) #aggiunto 11/09/17 id.group <- cut(l$x, val, FALSE, TRUE) xhat <- l$x yhat <- l$y # browser() yhat <- do.call(transf, list(yhat)) #transf(yhat) if (length(cols) == 1) cols <- rep(cols, max(id.group)) if (length(lwds) == 1) lwds <- rep(lwds, max(id.group)) if (length(ltys) == 1) ltys <- rep(ltys, max(id.group)) for (i in 1:max(id.group)) { lines(xhat[id.group == i], yhat[id.group == i], col = cols[i], lwd = lwds[i], lty = ltys[i]) } } invisible(NULL) } segmented/R/points.segmented.r0000644000176200001440000000264212766105434016113 0ustar liggesuserspoints.segmented<-function(x, term, interc=TRUE, link=TRUE, rev.sgn=FALSE, transf=I, ...){ #-------------- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #------------- if(missing(term)){ if(length(x$nameUV$Z)>1 ) {stop("please, specify `term'")} else {term<-x$nameUV$Z} } opz<-list(...) nameV<- x$nameUV$V[f.U(x$nameUV$V, term)] psii<- x$psi[nameV, "Est."] d<-data.frame(a=psii) names(d)<-term opz$y<-broken.line(x,d, se.fit=FALSE, interc=interc, link=link)[[1]] if(rev.sgn) psii<- -psii opz$x<- psii if(is.null(opz$cex)) opz$cex<-1.5 if(is.null(opz$lwd)) opz$lwd<-2 opz$y<-do.call(transf, list(opz$y)) do.call(points, opz) invisible(NULL) } segmented/R/seg.lm.fit.boot.r0000644000176200001440000001622513476173422015540 0ustar liggesusersseg.lm.fit.boot <- function(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di #---------------------------------- # sum.of.squares<-function(obj.seg){ # #computes the "correct" SumOfSquares from a segmented" fit # b<-obj.seg$obj$coef # X<-qr.X(obj.seg$obj$qr) #X<-model.matrix(obj.seg) # X<-X[,!is.na(b)] # b<-b[!is.na(b)] # rev.b<-rev(b) # rev.b[1:length(obj.seg$psi)]<-0 # b<-rev(rev.b) # new.fitted<-drop(X%*%b) # new.res<- obj.seg$obj$residuals + obj.seg$obj$fitted - new.fitted # ss<-sum(new.res^2) # ss # } #-------- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #------------- visualBoot<-opz$visualBoot opz.boot<-opz opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <-1 n<-length(y) o0<-try(suppressWarnings(seg.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=FALSE)), silent=TRUE) rangeZ <- apply(Z, 2, range) #serve sempre if(!is.list(o0)) { o0<- suppressWarnings(seg.lm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=TRUE)) o0<-extract.psi(o0) ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0<-try(suppressWarnings(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(NA, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n # na<- ,,apply(...,2,function(x)mean(is.na(x))) Z.orig<-Z # if(visualBoot) cat(0, " ", formatC(opz$dev0, 3, format = "f"),"", "(No breakpoint(s))", "\n") count.random<-0 for(k in seq(n.boot)){ PSI <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(seg.lm.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], offs[id], opz.boot)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(suppressWarnings(seg.lm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz.boot)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) } PSI <- matrix(rep(est.psi.boot, rep(nrow(Z), length(est.psi.boot))), ncol = length(est.psi.boot)) opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o<-try(suppressWarnings(seg.lm.fit(y, XREG, Z.orig, PSI, w, offs, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o<-try(suppressWarnings(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1)), silent=TRUE) count.random<-count.random+1 } #se il modello e' stato stimato controlla se la soluzione e' migliore.. if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if (visualBoot) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) SS.ok<-min(all.selected.ss) id.accept<- ((abs(all.ss-SS.ok)/SS.ok )<= 0.05) psi.mean<-apply(all.est.psi[id.accept,,drop=FALSE], 2, mean) # est.psi0<-psi.mean # #devi ristimare il modello con psi.mean # PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) # o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0<-try(seg.lm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) } if(!is.list(o0)) return(0) o0$boot.restart<-ris return(o0) }segmented/R/seg.glm.fit.boot.r0000644000176200001440000001402713476173360015706 0ustar liggesusersseg.glm.fit.boot<-function(y, XREG, Z, PSI, w, offs, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE){ #random: if TRUE, when the algorithm fails in minimizing f(y), random numbers are used as final estimates. # If the algorithm fails in minimizing f(y*), the final estimates (to be used as starting values with # the original responses y) *always* are replaced by random numbers (regardless of the random argument) #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore del primo tentativo #show.history() se c'e' stato boot restart potrebbe produrre un grafico 2x1 di "dev vs it" and "no.of distinct vs it" #-------- extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(dev.no.gap=dev.ok, psi=psi.ok) r } #------------- if(!nonParam){ nonParam<-TRUE warning("`nonParam' set to TRUE for segmented glm..", call.=FALSE) } visualBoot<-opz$visualBoot opz.boot<-opz opz.boot$pow=c(1,1) #c(1.1,1.2) opz1<-opz opz1$it.max <-1 n<-length(y) o0<-try(seg.glm.fit(y, XREG, Z, PSI, w, offs, opz), silent=TRUE) rangeZ <- apply(Z, 2, range) #serve sempre if(!is.list(o0)) { o0<- seg.glm.fit(y, XREG, Z, PSI, w, offs, opz, return.all.sol=TRUE) o0<-extract.psi(o0) if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$dev.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("semiparametric boot requires reasonable fitted values. try a different psi or use nonparam boot") if(random) { est.psi00<-est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0<-try(seg.glm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) ss00<-o0$dev.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z # if(visualBoot) cat(0, " ", formatC(opz$dev0, 3, format = "f"),"", "(No breakpoint(s))", "\n") count.random<-0 for(k in seq(n.boot)){ PSI <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(seg.glm.fit(y[id], XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], w[id], offs[id], opz), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0),size=n, replace=TRUE) o.boot<-try(seg.glm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) } PSI <- matrix(rep(est.psi.boot, rep(nrow(Z), length(est.psi.boot))), ncol = length(est.psi.boot)) opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o<-try(seg.glm.fit(y, XREG, Z.orig, PSI, w, offs, opz), silent=TRUE) if(!is.list(o) && random){ est.psi00<-est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o<-try(seg.glm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) count.random<-count.random+1 } if(is.list(o)){ if(!"coefficients"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$dev.no.gap if(o$dev.no.gap<=ifelse(is.list(o0), o0$dev.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$dev.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if (visualBoot) { flush.console() #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), o0$dev.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0<-try(seg.glm.fit(y, XREG, Z, PSI1, w, offs, opz1), silent=TRUE) } if(!is.list(o0)) return(0) o0$boot.restart<-ris return(o0) }segmented/R/draw.history.R0000644000176200001440000001124113476233332015213 0ustar liggesusersdraw.history<-function(obj,term,...){ #show.history() se c'e' stato boot restart potrebbe produrre un grafico 2x1 di "dev vs it" and "no.of distinct vs it" #-- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #-- if(missing(term)){ if(length(obj$nameUV$Z)>1 ) {stop("please, specify `term'")} else {term<-obj$nameUV$Z} } opz<-list(...) range.ok<-obj$rangeZ[,term] id.ok<- f.U(rownames(obj$psi), term) est.psi<-obj$psi[id.ok,"Est."] if(is.null(opz$ylim)) opz$ylim<-range.ok if(is.null(opz$col)) opz$col<-1 if(is.null(opz$pch)) opz$pch<-1:length(est.psi) if(is.null(opz$xlab)) opz$xlab<-"iterations" if(is.null(opz$ylab)) opz$ylab<-paste("breakpoint ","(",term,")",sep="") if(is.null(opz$type)) opz$type<-"o" opz$xaxt<-"n" if(is.null(obj$seed)) { #NO boot if(all(diff(sapply(obj$psi.history, length)[-1])==0)){ #non-autom (elemento [1] e' NA) A<-t(matrix(unlist(obj$psi.history)[-1],nrow=nrow(obj$psi),byrow=FALSE)) colnames(A)<-rownames(obj$psi) opz$x<-0:(nrow(A)-1) opz$y<-A[,id.ok] par(mfrow=c(1,2)) do.call(matplot, opz) #matplot(0:(nrow(A)-2), A[-1,id.ok],type="o",pch=1:length(est.psi),col=1, # xlab=, ylab=, # ylim=range.ok, xaxt="n",...) axis(1,at=0:(nrow(A)-1),cex.axis=.7) abline(h=est.psi,lty=3,col=opz$col) plot(0:(nrow(A)-1), attr(obj$psi.history,"dev")[-1], ylab="deviance", xlab="iterations", type="o", xaxt="n") axis(1,at=0:(nrow(A)-1),cex.axis=.7) abline(h = min(attr(obj$psi.history,"dev")),lty=3,col=opz$col) } else { #automatic psihist<-obj$psi.history[-1] id.iter<-rep(1:length(psihist), times=sapply(psihist, length)) psi.history<-unlist(psihist) nomi<-unlist(sapply(psihist, names)) d<-data.frame(iter=id.iter, psi=psi.history, nomi=nomi) #associa i nomi delle componenti di $psi.history (che sono indici 1,2,..) con i nomi della variabile term ii<-unique(names(obj$psi.history[[length(obj$psi.history)]])[id.ok]) if(length(ii)>1) stop("some error in the names?..") with(d[d$nomi==ii,], plot(iter, psi, xlab=opz$xlab, ylab=opz$ylab, xaxt="n",...)) axis(1,at=unique(d$iter),cex.axis=.7) #se vuoi proprio associare le stime tra le diverse iterazioni #(per poi unire nel grafico i punti con le linee. Ovviamente alcune linee saranno interrotte) # for(i in 1:length(obj$psi.history)) { # a<-obj$psi.history[[i]] # for(j in 1:length(est.psi)){ # psij<-est.psi[j] #a<- ..names match # r[i,j]<-a[which.min(abs(a-psij))] # a<-setdiff(a, r[i,j]) } } else { #se boot par(mfrow=c(1,2)) plot(obj$psi.history$all.selected.ss, type="b", xlab="bootstrap replicates", ylab="RSS (selected values)", xaxt="n", pch=20) axis(1,at=1:length(obj$psi.history$all.selected.ss),cex.axis=.7) #unicita' delle soluzioni if(is.vector(obj$psi.history$all.selected.psi)){ psi.matr<-m<-matrix(obj$psi.history$all.selected.psi, ncol=1) } else { psi.matr<-m<-obj$psi.history$all.selected.psi[,id.ok,drop=FALSE] } for(i in 1:nrow(m)) m[i,]<-apply(psi.matr[1:i,,drop=FALSE],2,function(xx)length(unique(xx))) m<-t(t(m)+.1*(0:(ncol(m)-1))) matplot(1:nrow(m),m, pch=1:ncol(m), type="b", col=1:ncol(m), ylab="no. of distinct solutions",xlab="bootstrap replicates", xaxt="n") axis(1,at=1:nrow(m),cex.axis=.7) } } #end_fn segmented/R/lines.segmented.R0000644000176200001440000000203613464776550015657 0ustar liggesuserslines.segmented<-function(x, term, bottom=TRUE, shift=TRUE, conf.level=0.95, k=50, pch=18, rev.sgn=FALSE,...){ if(missing(term)){ if(length(x$nameUV$Z)>1 ) {stop("please, specify `term'")} else {term<-x$nameUV$Z} } ss<-list(...) metodo<- if(!is.null(ss$method)) ss$method else "delta" colore<- if(is.null(ss$col)) 1 else ss$col usr <- par("usr") h<-(usr[4]-usr[3])/abs(k) y<- if(bottom) usr[3]+h else usr[4]-h m<- confint.segmented(object=x,parm=term,level=conf.level,rev.sgn=rev.sgn,digits=15,method=metodo) #m<-r[[term]] #FORSE non e' necessaria #if(rev.sgn) m<- -m #ma invece serve il seguente (se length(psi)=1 e rev.sgn=T): m<-matrix(m,ncol=3) if(nrow(m)>1) m<-m[order(m[,1]),] est.psi<-m[,1] lower.psi<-m[,2] upper.psi<-m[,3] if(length(est.psi)>1) { y<- if(shift) y+seq(-h/2,h/2,length=length(est.psi)) else rep(y,length(est.psi)) } segments(lower.psi, y, upper.psi, y, ...) points(est.psi,y,type="p",pch=pch,col=colore) } segmented/R/segmented.R0000644000176200001440000000021513476226436014537 0ustar liggesusers`segmented` <- function(obj, seg.Z, psi, npsi, control=seg.control(), model=TRUE, ...){ UseMethod("segmented") } segmented/R/print.summary.segmented.R0000644000176200001440000000656213474241566017400 0ustar liggesusers`print.summary.segmented` <- function(x, short = x$short, var.diff = x$var.diff, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"),...){ cat("\n\t***Regression Model with Segmented Relationship(s)***\n\n") cat( "Call: \n" ) print( x$call ) cat("\nEstimated Break-Point(s):\n ") est.psi<-x$psi[,-1,drop=FALSE] rownames(est.psi)<-rownames(x$psi) print(round(est.psi,3)) #era "signif(,4)" # cat("\nt value for the gap-variable(s) V: ",x$gap[,3],"\n") #if(any(abs(x$gap[,3])>1.96)) cat(" Warning:", sum(abs(x$gap[,3])>1.96),"gap coefficient(s) significant at 0.05 level\n") if(short){ cat("\nDifference-in-slopes parameter(s):\n") #print(x$Ttable[(nrow(x$Ttable)-nrow(x$psi)+1):nrow(x$Ttable),])} nome<-rownames(x$psi) #nome<-as.character(parse("",text=nome)) #aa<-grep("U",rownames(x$Ttable)) #bb<-unlist(sapply(nome,function(xx){grep(xx,rownames(x$Ttable))},simplify=FALSE,USE.NAMES=FALSE)) #cc<-intersect(aa,bb) #indices of diff-slope parameters nomiU<-rownames(x$gap) #idU<-match(nomiU,rownames(x$Ttable)) print(x$Ttable[nomiU,]) } else {cat("\nMeaningful coefficients of the linear terms:\n") if(is.null(dim(x$Ttable))){ print(x$Ttable) #printCoefmat(matrix(x$Ttable,nrow=1,ncol=4,dimnames=list(" ",names(x$Ttable))),has.Pvalue=FALSE) } else { printCoefmat(x$Ttable, digits = digits, signif.stars = signif.stars,na.print = "NA", ...) } } if("summary.lm"%in%class(x)){ #for lm if(var.diff){ for(i in 1:length(x$sigma.new)){ cat("\nResidual standard error ",i,":", format(signif(x$sigma.new[i], digits)), "on", x$df.new[i], "degrees of freedom")} cat("\n") } else { cat("\nResidual standard error:", format(signif(x$sigma, digits)), "on", x$df[2], "degrees of freedom\n")} if (!is.null(x$fstatistic)) { cat("Multiple R-Squared:", formatC(x$r.squared, digits = digits)) cat(", Adjusted R-squared:", formatC(x$adj.r.squared, digits = digits), "\n")} } if("summary.glm"%in%class(x)){ #for glm cat("(Dispersion parameter for ", x$family$family, " family taken to be ", format(x$dispersion), ")\n\n", apply(cbind(paste(format.default(c("Null", "Residual"), width = 8, flag = ""), "deviance:"), format(unlist(x[c("null.deviance", "deviance")]), digits = max(5, digits + 1)), " on", format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"), 1, paste, collapse = " "), "AIC: ", format(x$aic, digits = max(4, digits + 1)), "\n", sep = "") } if("summary.Arima"%in%class(x)){#for Arima cm <- x$call$method if (is.null(cm) || cm != "CSS") cat("\nsigma^2 estimated as ", format(x$sigma2, digits = digits), ", log likelihood = ", format(round(x$loglik, 2)), ", aic = ", format(round(x$aic, 2)), "\n", sep = "") else cat("\nsigma^2 estimated as ", format(x$sigma2, digits = digits), ", part log likelihood = ", format(round(x$loglik, 2)), "\n", sep = "") } invisible(x) cat("\nConvergence",if(x$conv.warn) "*not*" else NULL , "attained in",x$it,"iter. (rel. change",paste(signif(x$epsilon,5),")\n",sep="")) } segmented/R/seg.lm.fit.r0000644000176200001440000003011113501711404014547 0ustar liggesusersseg.lm.fit<-function(y,XREG,Z,PSI,w,offs,opz,return.all.sol=FALSE){ useExp.k=TRUE #----------------- est.k<-function(x1,y1,L0){ ax<-log(x1) .x<-cbind(1,ax,ax^2) b<-drop(solve(crossprod(.x),crossprod(.x,y1))) const<-b[1]-L0 DD<-sqrt(b[2]^2-4*const*b[3]) kk<-exp((-b[2]+ DD) /(2*b[3])) return(round(kk)) # ff<-function(xx) b[1]+b[2]*xx + b[3]*xx^2+ L0 # a<-uniroot(ff, c(log(x[4]), 3.4)) } #----------------- dpmax<-function(x,y,pow=1){ #deriv pmax if(pow==1) -(x>y) #ifelse(x>y, -1, 0) else -pow*((x-y)*(x>y))^(pow-1)#-pow*pmax(x-y,0)^(pow-1) } #----------- mylm<-function(x,y,w,offs=rep(0,length(y))){ x1<-x*sqrt(w) y<-y-offs y1<-y*sqrt(w) b<-drop(solve(crossprod(x1),crossprod(x1,y1))) fit<-drop(tcrossprod(x,t(b))) r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r, df.residual=length(y)-length(b)) o } #----------- mylmADD<-function(invXtX, X, v, Xty, y){ #v: new column to be added vtv<-sum(v^2) Xtv<-crossprod(X,v) #-colSums(X[v!=0,,drop=FALSE]) #oppure -.colSums(X[v!=0,,drop=FALSE],n,p) m<-invXtX %*% Xtv d<-drop(1/(vtv- t(Xtv) %*% m)) r<- -d*m invF <- invXtX + d*tcrossprod(m) newINV<- rbind(cbind(invF, r), c(t(r), d)) b<-crossprod(newINV, c(Xty, sum(v*y))) fit<- tcrossprod(cbind(X,v), t(b)) #cbind(X,v) %*% b r<-y-fit o<-list(coefficients=b,fitted.values=fit,residuals=r) o } #----------- in.psi<-function(LIM, PSI, ret.id=TRUE){ #check if psi is inside the range a<-PSI[1,]<=LIM[1,] b<-PSI[1,]>=LIM[2,] is.ok<- !a & !b #TRUE se psi e' OK if(ret.id) return(is.ok) isOK<- all(is.ok) && all(!is.na(is.ok)) isOK} #------------ far.psi<-function(Z, PSI, id.psi.group, ret.id=TRUE) { #id.far.ok<-sapply(unique(id.psi.group), function(.x) (table(rowSums(((Z>PSI)[,id.psi.group==.x,drop=FALSE])))>=2)[-1]) #[-1] esclude lo zero, xPSI)[,id.psi.group==.x,drop=FALSE]))+1)>=2)[-1]) #[-1] esclude lo zero, xPSI)) #pmax((Z - PSI), 0)^pow[1] if(pow[1]!=1) U<-U^pow[1] obj0 <- mylm(cbind(XREG, U), y, w, offs) #lm.wfit(cbind(XREG, U), y, w, offs) #se 1 psi, si puo' usare la funz efficiente.. L0<- sum(obj0$residuals^2*w) n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- opz$dev0 #del modello iniziale (senza psi) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi.values[[length(psi.values) + 1]] <- psi #psi iniziali #============================================== if (visual) { cat(paste("iter = ", sprintf("%2.0f",0), " dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " k = ", sprintf("%2.0f", NA), " n.psi = ",formatC(length(unlist(psi)),digits=0,format="f"), " ini.psi = ",paste(formatC(unlist(psi),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #============================================== id.warn <- FALSE while (abs(epsilon) > toll) { it<-it+1 n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if(n.psi1!=n.psi0){ U <- ((Z-PSI)*(Z>PSI)) #pmax((Z - PSI), 0)^pow[1] if(pow[1]!=1) U<-U^pow[1] obj0 <- mylm(cbind(XREG, U), y, w, offs)#lm.wfit(cbind(XREG, U), y, w, offs) #se 1 psi, si puo' usare la funz efficiente.. L0<- sum(obj0$residuals^2*w) } V <- dpmax(Z,PSI,pow=pow[2])# ifelse((Z > PSI), -1, 0) X <- cbind(XREG, U, V) rownames(X) <- NULL colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U", 1:ncol(U), sep = ""), paste("V", 1:ncol(V), sep = "")) obj <- lm.wfit(x = X, y = y, w = w, offset = offs) #mylm(X, y, w, offs) # beta.c <- coef(obj)[paste("U", 1:ncol(U), sep = "")] gamma.c <- coef(obj)[paste("V", 1:ncol(V), sep = "")] if(any(is.na(c(beta.c, gamma.c)))){ if(fix.npsi) { if(return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } else { id.coef.ok<-!is.na(gamma.c) psi<-psi[id.coef.ok] if(length(psi)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } gamma.c<-gamma.c[id.coef.ok] beta.c<-beta.c[id.coef.ok] Z<-Z[, id.coef.ok, drop=FALSE] rangeZ <- rangeZ[,id.coef.ok, drop=FALSE] limZ <- limZ[,id.coef.ok, drop=FALSE] nomiOK<-nomiOK[id.coef.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.coef.ok] names(psi)<-id.psi.group } } psi.old<-psi psi <- psi.old + gamma.c/beta.c if(!is.null(digits)) psi<-round(psi, digits) PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) #--modello con il nuovo psi U1<-(Z-PSI)*(Z>PSI) if(pow[1]!=1) U1<-U1^pow[1] obj1 <- try(mylm(cbind(XREG, U1), y, w, offs), silent = TRUE) #lm.wfit(cbind(XREG, pmax(Z-PSI,0)), y, w, offs) L1<- if(class(obj1)[1]=="try-error") L0+10 else sum(obj1$residuals^2*w) use.k<-k<-1 L1.k<-NULL L1.k[length(L1.k)+1]<-L1 while(L1>L0){ #ATTENZIONE: i gamma.c e beta.c vengono dal modello, ma poi dopo il modello (linee 152-167) viene fatto un controllo che puo' eliminare break e ridurre le colonne di Z. #Per cui puo' risultare ncol(PSI)>ncol(Z). Quindi o non si fanno i controlli ( potrebbe essere perche' tanto c'e' il try(..)) oppure semplicemente # si prendono le stime corrispondenti alle colonne "ok". psi <- psi.old[id.psi.ok] + (gamma.c[id.psi.ok]/beta.c[id.psi.ok])/(use.k*h) k<-k+1 use.k <- if(useExp.k) 2^(k-1) else k # if(k>=4){ # xx<-1:k # use.k<-est.k(xx, -L1.k[1:k],-L0) # } psi <- psi.old + (gamma.c/beta.c)/(use.k*h) #psi <- psi.old[id.psi.ok] + (gamma.c[id.psi.ok]/beta.c[id.psi.ok])/(use.k*h) if(!is.null(digits)) psi<-round(psi, digits) PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) #qui o si aggiusta psi per farlo rientrare nei limiti, o si elimina, oppure se obj1 sotto non funziona semplicemente continua.. U1<-(Z-PSI)*(Z>PSI) if(pow[1]!=1) U1<-U1^pow[1] obj1 <- try(mylm(cbind(XREG, U1), y, w, offs), silent=TRUE) #lm.wfit(cbind(X,U1), y, w, offs) L1<- if(class(obj1)[1]=="try-error") L0+10 else sum(obj1$residuals^2*w) L1.k[length(L1.k)+1]<-L1 if(1/(use.k*h)16 || psi<20) browser() # if(it==5) browser() if (visual) { flush.console() # spp <- if (it < 10) " " else NULL # cat(paste("iter = ", spp, it, # " dev = ",sprintf('%8.5f',L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("iter = ", sprintf("%2.0f",it), " dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L1), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " k = ", sprintf("%2.0f", k), " n.psi = ",formatC(length(unlist(psi)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(psi),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } epsilon <- if(conv.psi) max(abs((psi -psi.old)/psi.old)) else (L0 - L1)/(abs(L0) + 0.1) L0<-L1 U <-U1 k.values[length(k.values)+1]<-use.k psi.values[[length(psi.values) + 1]] <- psi dev.values[length(dev.values) + 1] <- L0 if (it >= it.max) { id.warn <- TRUE break } #Mi sa che non servono i controlli.. soprattutto se non ha fatto step-halving #check if i psi ottenuti sono nel range o abbastanza lontani id.psi.far <-far.psi(Z, PSI, id.psi.group, TRUE) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if(!all(id.psi.ok)){ if(fix.npsi){ psi<-adj.psi(psi, limZ) #within range!!! id.psi.far<-far.psi(Z, PSI, id.psi.group, TRUE) psi<-psi*ifelse(id.psi.far,1,.9) PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) } else { Z<-Z[, id.psi.ok, drop=FALSE] PSI<-PSI[, id.psi.ok, drop=FALSE] rangeZ <- rangeZ[,id.psi.ok,drop=FALSE] limZ <- limZ[,id.psi.ok,drop=FALSE] nomiOK<-nomiOK[id.psi.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.psi.ok] psi.old<- psi.old[id.psi.ok] psi<- psi[id.psi.ok] names(psi)<-id.psi.group if(ncol(PSI)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } } } } #end while_it ##============================================================================= if(id.warn) warning(paste("max number of iterations (", it,") attained",sep=""), call. = FALSE) attr( psi.values, "dev") <- dev.values attr( psi.values, "k")<- k.values #ordina i breakpoints.. psi<-unlist(tapply(psi, id.psi.group, sort)) names(psi)<-id.psi.group names.coef<-names(obj$coefficients) #obj e' quello vecchio che include U1,.. V1,... PSI.old<-PSI PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) #U e V possono essere cambiati (rimozione/ordinamento psi.. ) per cui si deve ricalcolare il tutto, altrimenti sarebbe uguale a U1 e obj1 if(sd(PSI-PSI.old)>0){ U <- (Z-PSI)*(Z>PSI) colnames(U)<-paste("U", 1:ncol(U), sep = "") V <- -(Z>PSI) colnames(V)<-paste("V", 1:ncol(V), sep = "") # X <- cbind(XREG, U, V) # rownames(X) <- NULL obj <- lm.wfit(x = cbind(XREG, U), y = y, w = w, offset = offs) L1<-sum(obj$residuals^2*w) } else { obj<-obj1 } obj$coefficients<-c(obj$coefficients, rep(0,ncol(V))) names(obj$coefficients)<-names.coef obj$epsilon <- epsilon obj$it <- it obj<-list(obj=obj,it=it,psi=psi, psi.values=psi.values, U=U,V=V,rangeZ=rangeZ, epsilon=epsilon,nomiOK=nomiOK, SumSquares.no.gap=L1, id.psi.group=id.psi.group,id.warn=id.warn) #inserire id.psi.ok? return(obj) } segmented/R/broken.line.r0000644000176200001440000001526513477726406015050 0ustar liggesusersbroken.line<-function(ogg, term=NULL, link=TRUE, interc=TRUE, se.fit=TRUE, isV=FALSE, .vcov=NULL, ...){ #ogg: l'oggetto segmented #term: una lista *nominata* con i valori rispetto a cui calcolare i fitted # OPPURE una stringa per indicare la variabile segmented OPPURE NULL (se c'e' solo una variabile) #is: 2 valori T/F per indicare se le variabili U e V nella matrice X, andrebbero sostituite con le versioni ind-smooth prima di calcolare var(X\hat\beta) #...: argomenti da passare a vcov.segmented(): per esempio var.diff, is, p.df if(length(isV)==1) isV<-c(FALSE,isV) dummy.matrix<-NULL dummy.matrix<-function(x.values, x.name, obj.seg, psi.est=TRUE, isV=FALSE){ #given the segmented fit 'obj.seg' and a segmented variable x.name with corresponding values x.values, #this function simply returns a matrix with columns (x, (x-psi)_+, -b*I(x>psi)) #or ((x-psi)_+, -b*I(x>psi)) if obj.seg does not include the coef for the linear "x" f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } if(length(isV)==1) isV<-c(FALSE,isV) n<-length(x.values) #le seguenti righe selezionavano (ERRONEAMENTE) sia "U1.x" sia "U1.neg.x" (se "x" e "neg.x" erano segmented covariates) #nameU<- grep(paste("\\.",x.name,"$", sep=""), obj.seg$nameUV$U, value = TRUE) #nameV<- grep(paste("\\.",x.name,"$", sep=""), obj.seg$nameUV$V, value = TRUE) nameU<-obj.seg$nameUV$U[f.U(obj.seg$nameUV$U,x.name)] nameV<-obj.seg$nameUV$V[f.U(obj.seg$nameUV$V,x.name)] diffSlope<-coef(obj.seg)[nameU] est.psi<-obj.seg$psi[nameV, "Est."] se.psi<-obj.seg$psi[nameV, "St.Err"] if(any(is.na(se.psi))) stop("The St.Err. of psi is NA", call. = FALSE) k<-length(est.psi) PSI <- matrix(rep(est.psi, rep(n, k)), ncol = k) SE.PSI <- matrix(rep(se.psi, rep(n, k)), ncol = k) newZ<-matrix(x.values, nrow=n,ncol=k, byrow = FALSE) dummy1<-if(isV[1]) (newZ-PSI)*pnorm((newZ-PSI)/SE.PSI) else (newZ-PSI)*(newZ>PSI) #pmax(newZ-PSI,0) if(psi.est){ V<-if(isV[2]) -pnorm((newZ-PSI)/SE.PSI) else -(newZ>PSI) #ifelse(newZ>PSI,-1,0) dummy2<- if(k==1) V*diffSlope else V%*%diag(diffSlope) #t(diffSlope*t(-I(newZ>PSI))) newd<-cbind(x.values,dummy1,dummy2) colnames(newd)<-c(x.name,nameU, nameV) } else { newd<-cbind(x.values,dummy1) colnames(newd)<-c(x.name,nameU) } if(!x.name%in%names(coef(obj.seg))) newd<-newd[,-1,drop=FALSE] return(newd) } #-------------- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #------------- if(se.fit) { if(is.null(.vcov)) .vcov<-vcov.segmented(ogg, ...) if(!all(dim(.vcov)==c(length(ogg$coef), length(ogg$coef)))) stop("Incorrect dimension of cov matrix", call. = FALSE) } xvalues<-term nomeV <- ogg$nameUV$V nomeU <- ogg$nameUV$U nomeZ <- ogg$nameUV$Z n.seg<-length(nomeZ) if(is.null(xvalues)){ if(n.seg>1) stop("there are multiple segmented covariates. Please specify one.") xvalues<-ogg$model[nomeZ] } if(is.character(xvalues)){ if(!xvalues %in% nomeZ) stop("'xvalues' is not a segmented covariate") xvalues<-ogg$model[xvalues] } nomeOK<-names(xvalues) if(length(nomeOK)>1) stop("Please specify one variable") if(!nomeOK %in% nomeZ) stop("'names(xvalues)' is not a segmented covariate") #if(n.seg>1 && !is.list(x.values)) stop("with multiple segmented covariates, please specify a named dataframe") #x.values<-data.frame(x.values) #names(x.values)<-nomeZ nomi <- names(coef(ogg)) nomiSenzaV <- nomiSenzaU <- nomi nomiSenzaU[match(nomeU, nomi)] <- "" nomiSenzaV[match(nomeV, nomi)] <- "" index <- vector(mode = "list", length = length(nomeZ)) for (i in 1:n.seg) { index[[i]] <- c(match(nomeZ[i], nomi), f.U(ogg$nameUV$U, nomeZ[i]) + (match(ogg$nameUV$U[1], nomi)-1), f.U(ogg$nameUV$V, nomeZ[i]) + (match(ogg$nameUV$V[1], nomi)-1)) #grep(paste("\\.", nomeZ[i], "$", sep = ""), nomiSenzaV, value = FALSE), #grep(paste("\\.", nomeZ[i], "$", sep = ""), nomiSenzaU, value = FALSE)) } ste.fit<-fit <- vector(mode = "list", length = length(nomeZ)) for (i in 1:n.seg) { x.name <- nomeZ[i] Xfit<-dummy.matrix(unlist(xvalues), x.name, ogg, isV=FALSE) if(se.fit) X<-dummy.matrix(unlist(xvalues), x.name, ogg, isV=isV)#<--NB: xvalues non varia con i!!! perche' farlo calcolare comunque? ind <- as.numeric(na.omit(unlist(index[[i]]))) if(interc && "(Intercept)"%in%nomi) { ind<- c(match("(Intercept)",nomi),ind) Xfit<-cbind(1,Xfit) if(se.fit) X<-cbind(1,X) } cof <- coef(ogg)[ind] fit[[i]]<-drop(Xfit%*%cof) if(se.fit) ste.fit[[i]] <- sqrt(rowSums((X %*% .vcov[ind,ind]) * X)) #sqrt(diag(X%*%Var%*%t(X))) #ste.fit[[i]] <- if(!se.fit) 10 else sqrt(rowSums((X %*% vcov.segmented(ogg,...)[ind,ind]) * X)) #sqrt(diag(X%*%Var%*%t(X))) } names(fit)<- names(ste.fit)<- nomeZ r<-list(fit=fit[[nomeOK]], se.fit=ste.fit[[nomeOK]]) if (inherits(ogg, what = "glm", FALSE) && !link){ if(se.fit) r[[2]] <- ogg$family$mu.eta(r[[1]])*r[[2]] r[[1]] <- ogg$family$linkinv(r[[1]]) } if(!se.fit) r<-r[1] #metti r[[1]] se vuoi fare restituire un vettore return(r) } segmented/R/confint.segmented.R0000644000176200001440000011762113476432172016204 0ustar liggesusers`confint.segmented` <- function(object, parm, level=0.95, method=c("delta", "score", "gradient"), rev.sgn=FALSE, var.diff=FALSE, is=FALSE, digits=max(4, getOption("digits") - 1), ...){ #...: argomenti da passare solo a confintSegIS. Questi sono "h", "d.h", "bw" (bw="(1/n)^(1/2)"), nvalues, msgWarn o useSeg. method<-match.arg(method) cls<-class(object) if(length(cls)==1) cls<-c(cls, cls) if(method%in%c("score", "gradient") && !all(cls[1:2]==c("segmented","lm"))) stop("Score- or Gradient-based CI only work with segmented lm models") #======================================================================================================= #========== metodo Delta #======================================================================================================= confintSegDelta<- function(object, parm, level=0.95, rev.sgn=FALSE, var.diff=FALSE, is=FALSE, ...){ #-- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #-- # if(!"segmented"%in%class(object)) stop("A segmented model is needed") if(var.diff && length(object$nameUV$Z)>1) { var.diff<-FALSE warning(" 'var.diff' set to FALSE with multiple segmented variables", call.=FALSE) } #nomi delle variabili segmented: if(missing(parm)) { nomeZ<- object$nameUV$Z if(length(rev.sgn)==1) rev.sgn<-rep(rev.sgn,length(nomeZ)) } else { if(! all(parm %in% object$nameUV$Z)) {stop("invalid 'parm' name", call.=FALSE)} else {nomeZ<-parm} } if(length(nomeZ)>1) { warning("There are multiple segmented terms. The first is taken", call.=FALSE, immediate. = TRUE) nomeZ<-nomeZ[1] } if(length(rev.sgn)!=length(nomeZ)) rev.sgn<-rep(rev.sgn, length.out=length(nomeZ)) rr<-list() z<-if("lm"%in%class(object)) abs(qt((1-level)/2,df=object$df.residual)) else abs(qnorm((1-level)/2)) for(i in 1:length(nomeZ)){ #per ogni variabile segmented `parm' (tutte o selezionata).. #nomi.U<-grep(paste("\\.",nomeZ[i],"$",sep=""),object$nameUV$U,value=TRUE) #nomi.V<-grep(paste("\\.",nomeZ[i],"$",sep=""),object$nameUV$V,value=TRUE) nomi.U<- object$nameUV$U[f.U(object$nameUV$U, nomeZ[i])] nomi.V<- object$nameUV$V[f.U(object$nameUV$V, nomeZ[i])] m<-matrix(,length(nomi.U),3) colnames(m)<-c("Est.",paste("CI","(",level*100,"%",")",c(".low",".up"),sep="")) for(j in 1:length(nomi.U)){ #per ogni psi della stessa variabile segmented.. sel<-c(nomi.V[j],nomi.U[j]) V<-vcov(object,var.diff=var.diff, is=is, ...)[sel,sel] #questa e' vcov di (psi,U) b<-coef(object)[sel[2]] #diff-Slope th<-c(b,1) orig.coef<-drop(diag(th)%*%coef(object)[sel]) #sono i (gamma,beta) th*coef(ogg)[sel] gammma<-orig.coef[1] est.psi<-object$psi[sel[1],2] V<-diag(th)%*%V%*%diag(th) #2x2 vcov() di gamma e beta se.psi<-sqrt((V[1,1]+V[2,2]*(gammma/b)^2-2*V[1,2]*(gammma/b))/b^2) r<-c(est.psi, est.psi-z*se.psi, est.psi+z*se.psi) if(rev.sgn[i]) r<-c(-r[1],rev(-r[2:3])) m[j,]<-r } #end loop j (ogni psi della stessa variabile segmented) #CONTROLLA QUESTO:..sarebbe piu' bello m<-m[order(m[,1]),,drop=FALSE] rownames(m)<-nomi.V #if(nrow(m)==1) rownames(m)<-"" else m<-m[order(m[,1]),] if(rev.sgn[i]) { #m<-m[nrow(m):1,] rownames(m)<-rev(rownames(m)) } rr[[length(rr)+1]]<- m #signif(m,digits) } #end loop i (ogni variabile segmented) names(rr)<-nomeZ return(rr[[1]]) } #end_function #======================================================================================================= #========== metodo Score #======================================================================================================= confintSegIS<-function(obj, parm, d.h=1.5, h=2.5, conf.level=level, ...){ #wrapper per ci.IS().. #d.h: incremento di h.. #se h o d.h sono negativi, tutto il range #========================================================================== #========================================================================== #========================================================================== ci.IS <- function(obj.seg, nomeZ, nomeUj, stat = c("score", "gradient"), transf=FALSE, h = -1, sigma, conf.level = 0.95, use.z = FALSE, is = TRUE, fit.is = TRUE, var.is=TRUE, bw=NULL, smooth = 0, msgWarn = FALSE, n.values = 50, altro = FALSE, cadj = FALSE, plot = FALSE, add=FALSE, agg=FALSE, raw=FALSE, useSeg=FALSE) { #smooth: se 0, i valori decrescenti dello IS score vengono eliminati; porta ad una curva U troppo ripida e quindi IC troppo stretti.. # se 2, B-spline con vincoli di monot e di "passaggio da est.psi" #useSeg, se TRUE (e se smooth>0) viene applicato segmented per selezionare solo i rami con pendenza negativa # dovrebbe essere usato con smooth>0 e se h=-1 (all.range=TRUE) #transf: funziona solo con grad #obj.seg: oggetto restituito da segmented #h: costante per definire il range dei valori di riferimento. Should be >1. # Se NULL viene considerato l'intervallo 'est.psi +/- se*(zalpha*1.5) dove zalpha ? il quantile che dipende da conf.level # Se qualche negativo, viene considerato il range della x dal quantile 0.02 a quello 0.98. # Se >0 il range e' est.psi +/- h* zalpha * se.psi # sigma se mancante viene assunta la stima presa dall'oggetto obj.seg.. # use.z: se TRUE i quantili della z, otherwise la t_{n-p} # stat: which statistic use # agg if TRUE, and plot=TRUE and est.psi!= dalla radice che annulla lo IS score, allora l'IC ? shiftato.. # is, fit.is, var.is: logical, induced smoothing? # plot: la linea nera e' lo score originale (if raw=TRUE) # la linea rossa e' lo score IS # le linea verde e' lo IS score con i pezzi decrescenti eliminati # se useSeg=T aggiunge una linea segmented.. # # # conf.level: confidence levels can be vector # fit.is: i fitted del modello nullo provengono da un modello in cui (x-psi)_+ ? # sostituito dall'approx smooth? # bw: the bandwidth in the kernel.. If NULL the SE(\hat\psi) is used, otherwise use a string, something like "1/n" or "sqrt(1/n)" # cadj: se TRUE l'approx di Ca.... che fa riferimentimento ad una Normale # #========================================================================== #========================================================================== #========================================================================== u.psiX <- function(psi, sigma, x, y, XREG = NULL, scale = FALSE, est.psi = NULL, interc = FALSE, pow = c(1, 1), lag = 0, robust = FALSE, GS = FALSE, is = FALSE, se.psi, var.is = TRUE, which.return = 3, fit.is = FALSE, altro = FALSE, cadj = FALSE, transf=FALSE) { # Restituisce score e/o var, e/o score stand. (vedi 'which.return') Inoltre se robust=TRUE calcola la # var robusta est.psi: o NULL oppure uno scalare con attributi 'b' e 'fitted' se lag>0 allora la # variabile V viene modificata nell'intorno di psi. Valori di pow diversi da uno sono ignorati quando # lag>0 pow: due potenze dei termini (x-psi)_+ e I(x>psi) se GS=TRUE calcola la statistica GS. # richiede 'est.psi', e 'scale' ? ignorato which.return. 3 means the scaled score, 1= the unscaled # score, 2=the sqrt(variance) (see the last row) # is: se TRUE lo smoothing indotto al num # var.is: se TRUE lo smooth indotto viene usato anche per il denom (ovvero per la var dello score) # U.is: se TRUE (provided that is=TRUE) the design matrix includes (x-psi)*pnorm((x-psi)/se) rather than pmax(x-psi,0) #altro: se TRUE (and fit.is=TRUE), U.psi = (x-psi)*pnorm((x-psi)/se) + h*dnorm((x-psi)/h) #-------------------------------------------- varUpsi.fn <- function(X, sigma = 1, r = NULL) { #X: the design matrix. The 1st column corresponds to psi #r: the residual vector. If NULL the usual model-based (rather than robust) variance is returned. INF<- if(length(sigma)==1) # (sigma^2)*crossprod(X) else crossprod(X,diag(sigma^2))%*%X INF <- crossprod(X)/(sigma^2) if (is.null(r)) { vv <- INF[1, 1] - (INF[1, -1] %*% solve(INF[-1, -1], INF[-1, 1])) } else { u <- X * r/(sigma^2) V <- crossprod(u) #nrow(X)*var(u) I22 <- solve(INF[-1, -1]) vv <- V[1, 1] - INF[1, -1] %*% I22 %*% V[1, -1] - V[1, -1] %*% I22 %*% INF[-1, 1] + INF[1, -1] %*% I22 %*% V[-1, -1] %*% I22 %*% INF[-1, 1] } return(vv) } # f.f<-function(x,psi,l=0){ x1<-1*I(x>psi) id<-which(x1>=1)[1] id.change <- # max(1,(id-l)):min(length(x),(id+l)) val<-((1/(2*l+1))*( 1:(2*l+1)))[1:length(id.change)] # #if(length(id.change)!=length(val)) return x1[id.change]<-val x1<- -x1 x1 } dpmax <- function(x, y, pow = 1) { # derivata prima di pmax; se pow=1 ? -I(x>psi) if (pow == 1) -(x > y) else -pow * (x>y)*(x - y)^(pow - 1) #ifelse(x > y, -1, 0) else -pow * pmax(x - y, 0)^(pow - 1) } if (cadj && which.return != 3) stop("cadj=TRUE can return only the studentized score") if (is && missing(se.psi)) stop("is=TRUE needs se.psi") if (interc) XREG <- cbind(rep(1, length(y)), XREG) if(fit.is) { XX<- if(altro) cbind((x-psi)*pnorm((x - psi)/se.psi)+se.psi*dnorm((x-psi)/se.psi), XREG) else cbind((x-psi)*pnorm((x-psi)/se.psi), XREG) o <- lm.fit(x = XX, y = y) #o <- lm.fit(x = cbind(XREG, (x - psi) * pnorm((x - psi)/se.psi)), y = y) } else { .U<-(x > psi)*(x-psi) if(pow[1]!=1) .U<-.U^pow[1] XX<- cbind(.U, XREG) #cbind(pmax(x - psi, 0)^pow[1], XREG) o <- lm.fit(x = XX, y = y) #o <- lm.fit(x = cbind(XREG, pmax(x - psi, 0)), y = y) #o<-lm(y~0+XREG+pmax(x-psi,0)) } #b <- o$coef[length(o$coef)] b <- o$coef[1] mu <- o$fitted.values n <- length(mu) # if (cadj) sigma <- sqrt(sum(o$residuals^2)/(n - sum(!is.na(o$coef)) - 1)) # V <- if (lag == 0) dpmax(x, psi, pow = pow[2]) else f.f(x, psi, lag) #V <- rowMeans(sapply(x, function(xx){-I(x>xx)})) V<-NULL #serve per il check.. if (GS) { if (is.null(est.psi)) stop("'GS=TRUE' needs 'est.psi'") gs <- b * (sum((y - mu) * V)/(sigma^2)) * (est.psi - psi) gs <- sqrt(pmax(gs, 0)) * sign(est.psi - psi) return(gs) } if(is){ r<- -b*sum(((y-mu)*pnorm((x - psi)/se.psi)))/sigma^2 XX<- if(var.is) cbind(-b*pnorm((x - psi)/se.psi), XX) else cbind(-b*I(x > psi), XX) } else { r<- -b*sum((y-mu)*I(x > psi))/sigma^2 XX<- cbind(-b*I(x > psi), XX) } #XX <- if (is) cbind(-b * pnorm((x - psi)/se.psi), (x - psi)*pnorm((x - psi)/se.psi), XREG) else cbind(b * V, pmax(x - psi, 0)^pow[1], XREG) #r <- drop(crossprod(XX, y - mu))/sigma^2 #if (is && altro) r[1] <- r[1] + (b^2) * se.psi * sum(dnorm((x - psi)/se.psi))/sigma^2 #if (!var.is) XX <- cbind(b * V, pmax(x - psi, 0)^pow[1], XREG) if (scale) { if (!is.null(est.psi)) { # questo e' se devi usare l'inf osservata. Cmq visto che dipende da est.psi e non psi, se scale=TRUE # sarebbe inutile calcolarla ogni volta.. mu <- attr(est.psi, "fitted") est.b <- attr(est.psi, "b") est.psi <- as.numeric(est.psi) #V <- if (lag == 0) dpmax(x, est.psi, pow = pow[2]) else f.f(x, est.psi, lag) #V <- rowMeans(sapply(x, function(xx){-I(x>xx)})) #XX <- cbind(est.b * V, pmax(x - psi, 0)^pow[1], XREG) if(is){ XX<- if(var.is) cbind(-est.b*pnorm((x - est.psi)/se.psi), XX[,-1]) else cbind(-est.b*I(x > est.psi), XX[,-1]) } else { XX<- cbind(-est.b*I(x > est.psi), XX[,-1]) } } # INF<- if(length(sigma)==1) (sigma^2)*crossprod(XX) else crossprod(XX,diag(sigma^2))%*%XX # v.Upsi<-INF[1,1]-(INF[1,-1] %*% solve(INF[-1,-1],INF[-1,1])) rr <- if (robust) (y - mu) else NULL v.Upsi <- try(varUpsi.fn(XX, sigma, r = rr), silent = TRUE) if (!is.numeric(v.Upsi)) return(NA) if (v.Upsi <= 0) return(NA) # r<-r[1]/sqrt(v.Upsi) } names(r) <- NULL #r <- c(r[1], v.Upsi, r[1]/sqrt(max(v.Upsi, 0))) r <- c(r, v.Upsi, r/sqrt(max(v.Upsi, 0))) r <- r[which.return] if (cadj) r <- sign(r) * sqrt((r^2) * (1 - (3 - (r^2))/(2 * n))) r } # per disegnare devi vettorizzare u.psiXV <- Vectorize(u.psiX, vectorize.args = "psi", USE.NAMES = FALSE) #========================================================================== gs.fn <- function(x, y, estpsi, sigma2, psivalue, pow = c(1,1), adj = 1, is = FALSE, sepsi, XREG = NULL, fit.is = FALSE, altro = FALSE, transf=FALSE) { # calcola la statist gradiente #x,y i dati; estpsi la stima di psi #a: la costante per lisciare I(x>psi)-> aI(x>psi)^{a-1} (ignorata se is=TRUE) # # is: se TRUE calcola la GS usando lo score 'naturally smoothed' #adj. Se 0 non fa alcuna modifica e cosi' potrebbe risultare non-positiva. Se 1 e 2 vedi i codici all'interno logitDeriv<-function(kappa) exp(kappa)*diff(intv)/((1+exp(kappa))^2) logit<-function(psi) log((psi-min(intv))/(max(intv)-psi)) logitInv<-function(kappa) (min(intv)+max(intv)*exp(kappa))/(1+exp(kappa)) intv<-quantile(x, probs=c(.02,.98),names=FALSE) if (is && missing(sepsi)) stop("SE(psi) is requested when is=TRUE") k <- length(psivalue) r <- vector(length = k) for (i in 1:k) { psii <- psivalue[i] #prima dell'aggiunta di altro..' # if (fit.is) { # X <- cbind(1, x, (x - psii) * pnorm((x - psii)/sepsi), XREG) # } else { # X <- cbind(1, x, pmax(x - psii, 0), XREG) # } if(fit.is) { X<- if(altro) cbind(1,x, (x-psii)*pnorm((x - psii)/sepsi)+sepsi*dnorm((x-psii)/sepsi), XREG) else cbind(1,x,(x-psii)*pnorm((x-psii)/sepsi), XREG) } else { .U<- (x-psii)*(x>psii) if(pow[1]!=1) .U <- .U^pow[1] X<- cbind(1, x, .U, XREG) #X<- cbind(1,x,pmax(x - psii, 0)^pow[1], XREG) } o <- lm.fit(y = y, x = X) b <- o$coef[3] if (is) { v <- pnorm((x - psii)/sepsi) } else { v <- if (pow[2] == 1) I(x > psii) else pow[2] * pmax(x - psii, 0)^(pow[2] - 1) } if(transf) v<-v * logitDeriv(logit(psii)) r[i] <- -(b/sigma2) * sum((y - o$fitted) * v) r[i] <- if(!transf) r[i]*(estpsi - psii) else r[i]*(logit(estpsi) - logit(psii)) if (altro && fit.is) r[i] <- r[i] + (estpsi - psii) * ((b * sepsi * sum(dnorm((x - psii)/sepsi))) * (b/sigma2)) } if (adj > 0) { r<- if (adj == 1) pmax(r, 0) else abs(r) } if(transf) psivalue<-logit(psivalue) segni<-if(transf) sign(logit(estpsi) - psivalue) else sign(estpsi - psivalue) #plot(psivalue, r, type="o") r <- cbind(psi = psivalue, gs.Chi = r, gs.Norm = sqrt(r) * segni ) r } #========================================================================== monotSmooth <- function(xx, yy, hat.psi, k = 20, w = 0) { # xx: esplicativa yy: yy la risposta hat.psi: la stima del psi k: se ? uno scalare allora il rango # della base, altrimenti i nodi.. w: l'esponente per costruire il vettore dei pesi (per dare pi? peso # 'localmente') #------------------- bspline <- function(x, ndx, xlr = NULL, knots, deg = 3, deriv = 0) { # x: vettore di dati xlr: il vettore di c(xl,xr) ndx: n.intervalli in cui dividere il range deg: il # grado della spline #require(splines) if (missing(knots)) { if (is.null(xlr)) { xl <- min(x) - 0.01 * diff(range(x)) xr <- max(x) + 0.01 * diff(range(x)) } else { if (length(xlr) != 2) stop("quando fornito, xlr deve avere due componenti") xl <- xlr[1] xr <- xlr[2] } dx <- (xr - xl)/ndx knots <- seq(xl - deg * dx, xr + deg * dx, by = dx) } B <- splineDesign(knots, x, ord = deg + 1, derivs = rep(deriv, length(x))) # B<-spline.des(knots,x,bdeg+1,0*x) #$design r <- list(B = B, degree = deg, knots = knots) #, dx=dx, nterm=ndx) r #the B-spline base matrix } #end_fn #--------- if (length(k) == 1) r <- bspline(xx, ndx = k) else r <- bspline(xx, knots = k) B <- r$B knots <- r$knots degree <- r$degree D1 <- diff(diag(ncol(B)), diff = 1) d <- drop(solve(crossprod(B), crossprod(B, yy))) # calcola monotone splines. La pen si riferisce solo alle diff dei coef della base!! # rx <- range(xx) nterm <- round(nterm) dx <- (rx[2] - rx[1])/nterm knots <- c(rx[1] + dx * # ((-degree):(nterm - 1)), rx[2] + dx * (0:degree)) B0 <- spline.des(knots, c(min(xx), hat.psi, max(xx)), degree + 1)$design P <- tcrossprod(B0[2, ]) * 10^12 e <- rep(1, length(d)) ww <- (1/(abs(xx - hat.psi) + diff(range(xx))/100))^w it <- 0 while (!isTRUE(all.equal(e, rep(0, length(e))))) { v <- 1 * I(diff(d) > 0) E <- (10^12) * crossprod(D1 * sqrt(v)) #t(D1) %*%diag(v)%*%D1 # d.old <- d #a.new M <- crossprod(B * sqrt(ww)) + E + P #t(B)%*% B + E + P d <- drop(solve(M+.001*diag(ncol(M)), crossprod(B, ww * yy))) #d <- drop(solve(M, t(B)%*% yy)) e <- d - d.old it <- it + 1 if (it >= 20) break } #end_while fit <- drop(B %*% d) return(fit) } #========================================================================== miop<-function(x,y,xs=x, ys=y, h=FALSE,v=FALSE, only.lines=FALSE, top=TRUE, right=TRUE, col.h=grey(.6), col.v=col.h,...){ #disegna il calssico plot(x,y,..) e poi aggiunge le proiezioni orizzontali e/o verticali #x, y : vettori per cui disegnare il grafico #xs, ys: punti rispetto a cui disegnare le proiezioni (default a tutti) #h, v: disegnare le linee horizontal and vertical? #top: le linee v riportarle verso l'alto (TRUE) o il basso? #right: le linee horiz riportarle verso destra (TRUE) o sinistra? #only.lines: se TRUE disegna (aggiungendo in un plot *esistente*) solo le "proiezioni" (linee "v" e "h") if(only.lines) h<-v<-TRUE if(!only.lines) plot(x,y,type="l",...) # col.h<-col.v<-1:length(xs) if(v){ y0<- if(top) par()$usr[4] else par()$usr[3] segments(xs, y0, xs,ys, col=col.v, lty=3) } if(h){ x0<-if(right) par()$usr[2] else par()$usr[1] segments(xs,ys, x0,ys,col=col.h, lty=3, lwd=1.2) } invisible(NULL) } #========================================================================== f.Left<-function(x,y){ yy<-rev(y) xx<-rev(x) idList<-NULL while(any(diff(yy)<0)){ id<-which(diff(yy)<0)[1] idList[length(idList)+1]<- id+1 yy<-yy[-(id+1)] xx<-xx[-(id+1)] } r<-cbind(xx,yy) r } #========================================================================== f.Right<-function(x,y){ #elimina i valori che violano la monotonic xx<-x yy<-y idList<-NULL while(any(diff(yy)>0)){ id<-which(diff(yy)>0)[1] idList[length(idList)+1]<- id+1 yy<-yy[-(id+1)] xx<-xx[-(id+1)] } r<-cbind(xx,yy) r } #========================================================================== #========================================================================== #========================================================================== stat <- match.arg(stat) if (missing(sigma)) sigma <- summary.lm(obj.seg)$sigma if (cadj) use.z = TRUE zalpha <- if (use.z) -qnorm((1 - conf.level)/2) else -qt((1 - conf.level)/2, df = obj.seg$df.residual) if(!is.numeric(h)) stop(" 'h' should be numeric") if(sign(h)>=0) h<-abs(h[1]) Y <- obj.seg$model[, 1] #la risposta X <- obj.seg$model[, nomeZ] formula.lin<- update.formula(formula(obj.seg), paste(".~.", paste("-",paste(obj.seg$nameUV$V,collapse = "-")))) #remove *all* V variables formula.lin<- update.formula(formula.lin, paste(".~.-", nomeUj)) #formula.lin <- update.formula(formula(obj.seg), paste(".~.", paste("-",paste(c(obj.seg$nameUV$U,obj.seg$nameUV$V),collapse = "-")))) XREG <- model.matrix(formula.lin, data = obj.seg$model) if (ncol(XREG) == 0) XREG <- NULL nomePsij<-sub("U","psi", nomeUj) est.psi <- obj.seg$psi[nomePsij, "Est."] se.psi <- obj.seg$psi[nomePsij, "St.Err"] if (any(h < 0)) { all.range <- TRUE valori <- seq(quantile(X,probs=.05, names=FALSE), quantile(X,probs=.95, names=FALSE), l = n.values) } else { all.range <- FALSE valori <- seq(max(quantile(X,probs=.05, names=FALSE), est.psi - h * se.psi), min(quantile(X,probs=.95, names=FALSE), est.psi + h * se.psi), l = n.values) } n <- length(Y) min.X <- min(X) max.X <- max(X) if(!is.null(bw)) se.psi<-eval(parse(text=bw)) if (stat == "score") { U.valori <- u.psiXV(psi = valori, sigma = sigma, x = X, y = Y, XREG = XREG, is = is, se.psi = se.psi, scale = TRUE, pow = c(1, 1), fit.is = fit.is, altro = altro, cadj = cadj, var.is=var.is, transf=transf) statlab<-"Score statistic" if(plot && raw) U.raw <- u.psiXV(valori, sigma, X, Y, XREG, is=FALSE, scale=TRUE, pow = c(1, 1), fit.is=FALSE, altro =altro, cadj = cadj, var.is=FALSE, transf=transf) } else { U.valori <- gs.fn(X, Y, est.psi, sigma^2, valori, is = is, sepsi = se.psi, XREG = XREG, fit.is = fit.is, altro = altro, transf=transf, pow=c(1,1))[, 3] statlab<-"Gradient statistic" if(plot && raw) U.raw <- gs.fn(X, Y, est.psi, sigma^2, valori, is=FALSE, XREG=XREG, fit.is=FALSE, altro=altro, transf=transf)[,3] } if(any(is.na(U.valori))) { #stop("NA in the statistic values") warning("removing NA in the statistic values") valori<-valori[!is.na(U.valori)] U.valori<-U.valori[!is.na(U.valori)] } logit<-function(psi) log((psi-min(intv))/(max(intv)-psi)) logitInv<-function(kappa) (min(intv)+max(intv)*exp(kappa))/(1+exp(kappa)) intv<-quantile(X, probs=c(.02,.98),names=FALSE) if (stat == "gradient" && transf) { est.psi<- logit(est.psi) valori<- logit(valori) x.lab<- "kappa" } if(plot && !add) { x.lab<-"psi" if(raw) { plot(valori, U.raw, xlab=x.lab, ylab=statlab, type="l") points(valori, U.valori, xlab=x.lab, ylab=statlab, type="l", col=2) } else { plot(valori, U.valori, xlab=x.lab, ylab=statlab, type="l", col=2) } abline(h=0, lty=3) segments(est.psi,0, est.psi, -20, lty=2) } if(prod(range(U.valori))>=0) stop("the signs of stat at extremes are not discordant, increase 'h' o set 'h=-1' ") if(smooth==0){ #rimuovi i pezzi di U.valori decrescenti.. ####left valoriLeft<-valori[valori<=est.psi] #valori[U.valori>=0] UvaloriLeft<-U.valori[valori<=est.psi] #U.valori[U.valori>=0] vLeft<-f.Left(valoriLeft,UvaloriLeft) #rendi monotona la curva.. valori.ok<-vLeft[,1] Uvalori.ok<-vLeft[,2] f.interpL <- splinefun(Uvalori.ok, valori.ok, method="mono") ####right valoriRight<-valori[valori>=est.psi] #valori[U.valori<0] UvaloriRight<-U.valori[valori>=est.psi] #U.valori[U.valori<0] vRight<-f.Right(valoriRight,UvaloriRight) valori.ok<-vRight[,1] Uvalori.ok<-vRight[,2] f.interpR <- splinefun(Uvalori.ok, valori.ok, method="mono") } else { #if smooth>0 if(useSeg){ oseg<-try(suppressWarnings(segmented(lm(U.valori~valori), ~valori, psi=quantile(valori, c(.25,.75),names=FALSE), control=seg.control(n.boot=0,stop.if.error = F))),silent=TRUE) #seg.lm.fit.boot(U.valori, XREG, Z, PSI, w, offs, opz) if(class(oseg)[1]=="try-error"){ oseg<-try(suppressWarnings(segmented(lm(U.valori~valori), ~valori, psi=quantile(valori, .5,names=FALSE), control=seg.control(n.boot=0))),silent=TRUE) } if(class(oseg)[1]=="segmented"){ if(plot) lines(valori, oseg$fitted, lty=3, lwd=1.5) soglie<-oseg$psi[,2] iid<-cut(valori,c(min(valori)-1000, soglie, max(valori)+1000), labels=FALSE) slopes<-cumsum(oseg$coef[2:(length(oseg$coef)-length(soglie))]) slopes<-rep(slopes,table(iid)) valori<-valori[slopes<=0] U.valori<-U.valori[slopes<=0] } } fr<-monotSmooth(valori,U.valori,est.psi,k=7) fr<- fr -(.2/diff(range(valori))) *(valori-mean(valori)) #add a small negative trend to avoid constant values in U.. vLeft<-cbind(valori[valori<=est.psi], fr[valori<=est.psi]) vRight<-cbind(valori[valori>=est.psi], fr[valori>=est.psi]) if(!all.range){ if( (min(valori)> intv[1]) && (fr[1]< max(zalpha))) return("errLeft") if( (max(valori)< intv[2]) && (fr[length(fr)]> min(-zalpha))) return("errRight") } f.interpL<-f.interpR<-splinefun(fr,valori,"m") }#end_if smooth L<-f.interpL(zalpha) U<-f.interpR(-zalpha) #browser() #il valore che annulla lo IS score puo' essere differente dalla stima di segmented # quindi salviamo questo "delta": gli IC potrebbero essere aggiustati con IC+delta delta<- est.psi-f.interpL(0) #if(abs((f.interpL(0)-f.interpR(0))/f.interpR(0))>.001) if(plot){ if(!agg) delta<-0 #if(raw) plot(valori, U.raw, xlab="psi", ylab=statlab, type="l") else plot(valori, U.valori, xlab="psi", ylab=statlab, type="n") lines(vLeft, col=3); lines(vRight, col=3) vv<-seq(0,zalpha*1.2,l=50) lines(f.interpL(vv)+delta,vv, col=grey(.8, alpha=.6), lwd=4) vv<-seq(0,-zalpha*1.2,l=50) lines(f.interpR(vv)+delta,vv, col=grey(.8, alpha=.6), lwd=4) points(est.psi, 0, pch=19) miop(c(L,U)+delta,c(zalpha,-zalpha),only.lines=TRUE,top=FALSE, right=FALSE) } if (stat == "gradient" && transf) { L<-logitInv(L) U<-logitInv(U) } L<- pmax(L, quantile(X,probs=.02)) U<- pmin(U,quantile(X,probs=.98)) #r<-cbind(lower=L,upper=U) #rownames(r) <- paste(conf.level) #attr(r, "delta")<-delta r<-c(est.psi, L, U) return(r) } #end fn #-------------------------------------------------------------------------- #========================================================================== #========================================================================== #========================================================================== if(!all(class(obj) == c("segmented","lm"))) stop("A segmented lm object is requested") if(missing(parm)){ nomeZ<- parm<- obj$nameUV$Z } else { if(!all(parm %in% obj$nameUV$Z)) stop("invalid 'parm' ") nomeZ<-parm } if(length(parm)>1) { warning("There are multiple segmented terms. The first is taken", call.=FALSE, immediate. = TRUE) nomeZ<-parm[1] } nomiU.term<-grep(nomeZ, obj$nameUV$U, value=TRUE) #termini U per la *stessa* variabile.. #npsi.term<- length(nomiU.term) #no. di breakpoints for the same variable. ra<-matrix(NA, length(nomiU.term), 3) rownames(ra)<- nomiU.term for(U.j in nomiU.term){ if(any(c(d.h, h)<0)) { ra[U.j,]<-ci.IS(obj, nomeZ, U.j, h=-1, conf.level=level, ...) } d.h<-min(max(d.h, 1.5),10) a<-"start" it<-0 while(is.character(a)){ a<- try(ci.IS(obj, nomeZ, U.j, h=h, conf.level=level, ...), silent=TRUE) h<-h*d.h it<-it+1 #cat(it,"\n") if(it>=20) break } ra[U.j,]<-a } colnames(ra)<-c("Est.",paste("CI","(",level*100,"%",")",c(".low",".up"),sep="")) rownames(ra)<-sub("U","psi", nomiU.term) ra } #end fn confintSegIS #======================================================================================================= #========== inizio funzione #======================================================================================================= if(method=="delta"){ r<-confintSegDelta(object, parm, level, rev.sgn, var.diff, is, ...) } else { r<-confintSegIS(object, parm, stat=method, conf.level=level, ...) } r<-signif(r,digits) return(r) } segmented/R/summary.segmented.R0000644000176200001440000001074213477725602016241 0ustar liggesusers`summary.segmented` <- function(object, short=FALSE, var.diff=FALSE, p.df="p", .vcov=NULL, ...){ if(is.null(object$psi)) object<-object[[length(object)]] #i seguenti per calcolare aa,bb,cc funzionano per lm e glm, da verificare con arima.... # nome<-rownames(object$psi) # nome<-as.character(parse("",text=nome)) # aa<-grep("U",names(coef(object)[!is.na(coef(object))])) # bb<-unlist(sapply(nome,function(x){grep(x,names(coef(object)[!is.na(coef(object))]))},simplify=FALSE,USE.NAMES=FALSE)) # cc<-intersect(aa,bb) #indices of diff-slope parameters # iV<- -grep("psi.",names(coef(object)[!is.na(coef(object))]))#indices of all but the Vs if(!is.null(.vcov)) var.diff<-FALSE if(var.diff && length(object$nameUV$Z)>1) { var.diff<-FALSE warning(" 'var.diff' set to FALSE with multiple segmented variables", call.=FALSE) } nomiU<-object$nameUV$U nomiV<-object$nameUV$V idU<-match(nomiU,names(coef(object)[!is.na(coef(object))])) idV<-match(nomiV,names(coef(object)[!is.na(coef(object))])) beta.c<- coef(object)[nomiU] #per metodo default.. ma serve???? if("segmented.default" == as.character(object$call)[1]){ summ <- c(summary(object, ...), object["psi"]) summ[c("it","epsilon")]<-object[c("it","epsilon")] #v<-try(vcov(object), silent=TRUE) #if(class(v)!="try-error") v<-sqrt(diag(v)) return(summ) } if("lm"%in%class(object) && !"glm"%in%class(object)){ summ <- c(summary.lm(object, ...), object["psi"]) summ$Ttable<-summ$coefficients if(var.diff){ #modifica gli SE Qr <- object$qr p <- object$rank #n.parametri stimati p1 <- 1L:p inv.XtX <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) X <- qr.X(Qr,FALSE) attr(X, "assign") <- NULL K<-length(unique(object$id.group)) #n.gruppi (=n.psi+1) dev.new<-tapply(object$residuals, object$id.group, function(.x){sum(.x^2)}) summ$df.new<-tapply(object$residuals, object$id.group, function(.x){(length(.x)-eval(parse(text=p.df)))}) if(any(summ$df.new<=0)) stop("nonpositive df when computig the group-specific variances.. reduce 'p.df'?", call. = FALSE) summ$sigma.new<-sqrt(dev.new/summ$df.new) sigma.i<-rowSums(model.matrix(~0+factor(object$id.group))%*%diag(summ$sigma.new)) var.b<-inv.XtX%*%crossprod(X*sigma.i)%*%inv.XtX #sqrt(rowSums((X %*% V) * X)) dimnames(var.b)<-dimnames(summ$cov.unscaled) summ$cov.var.diff<-var.b summ$Ttable[,2]<-sqrt(diag(var.b)) summ$Ttable[,3]<-summ$Ttable[,1]/summ$Ttable[,2] summ$Ttable[,4]<- 2 * pt(abs(summ$Ttable[,3]),df=object$df.residual, lower.tail = FALSE) dimnames(summ$Ttable) <- list(names(object$coefficients)[Qr$pivot[p1]], c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) } if(!is.null(.vcov)){ summ$Ttable[,2]<-sqrt(diag(.vcov)) summ$Ttable[,3]<-summ$Ttable[,1]/summ$Ttable[,2] summ$Ttable[,4]<- 2 * pt(abs(summ$Ttable[,3]),df=object$df.residual, lower.tail = FALSE) #dimnames(summ$Ttable) <- list(names(object$coefficients)[Qr$pivot[p1]], c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) } summ$Ttable[idU,4]<-NA summ$Ttable<-summ$Ttable[-idV,] summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] summ$var.diff<-var.diff summ$short<-short class(summ) <- c("summary.segmented", "summary.lm") return(summ) } #if("glm"%in%class(object)){ if(inherits(object, "glm")){ summ <- c(summary.glm(object, ...), object["psi"]) summ$Ttable<-summ$coefficients[-idV,] summ$Ttable[idU,4]<-NA summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] summ$short<-short class(summ) <- c("summary.segmented", "summary.glm") return(summ)} if("Arima"%in%class(object)){ #da controllare coeff<-object$coef v<-sqrt(diag(object$var.coef)) Ttable<-cbind(coeff[-idV],v[-idV],coeff[-idV]/v[-idV]) colnames(Ttable)<-c("Estimate","Std. Error","t value") object$Ttable<-Ttable object$short<-short summ<-object summ[c("it","epsilon","conv.warn")]<-object[c("it","epsilon","id.warn")] class(summ) <- c("summary.segmented", "summary.Arima") return(summ)} } segmented/R/pscore.test.R0000644000176200001440000005124013475757136015046 0ustar liggesusers`pscore.test`<- function(obj, seg.Z, k = 10, alternative = c("two.sided", "less", "greater"), values=NULL, dispersion=NULL, df.t=NULL, more.break=FALSE, n.break=1) { #------------------------------------------------------------------------------- test.Sc2<-function(y, z, xreg, sigma=NULL, values=NULL, fn="pmax(x-p,0)", df.t="Inf", alternative, w=NULL, offs=NULL, nbreaks=1, ties.ok=FALSE){ #xreg: la matrice del disegno del modello nullo. Se mancante viene assunta solo l'intercetta. #Attenzione che se invXtX e xx vengono entrambe fornite, non viene fatto alcun controllo #invXtX: {X'X}^{-1}. if missing it is computed from xreg #sigma: the sd. If missing it is computed from data (under the *null* model) #values: the values with respect to ones to compute the average term. If NULL 10 values from min(z) to max(z) are taken. if(!is.null(offs)) y<-y-offs n<-length(y) if(missing(xreg)) xreg<-cbind(rep(1,n)) id.ok<-complete.cases(cbind(y,z,xreg)) y<-y[id.ok] z<-z[id.ok] xreg<-xreg[id.ok,,drop=FALSE] n<-length(y) k=ncol(xreg) #per un modello ~1+x if(is.null(values)) values<-seq(min(z), max(z), length=10) n1<-length(values) PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) #(era X2) matrice di valori di psi if(is.matrix(z)) { X1<-matrix(z[,1], nrow=n, ncol=n1, byrow=FALSE) X2<-matrix(z[,2], nrow=n, ncol=n1, byrow=FALSE) X<-eval(parse(text=fn), list(x=X1, y=X2, p=PSI)) #X<-pmax(X1-X2,0) pmaxMedio<-rowMeans(X) } else { X1<-matrix(z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z if(length(fn)<=1){ X<-eval(parse(text=fn), list(x=X1, p=PSI)) #X<-pmax(X1-PSI,0) pmaxMedio <- rowMeans(X) if(nbreaks>1){ XX<-sapply(1:length(values), function(.x) X[,-(1:.x), drop=FALSE]) XX<-do.call("cbind", XX) if(ties.ok) XX<-cbind(X, XX) pmaxMedio2 <- rowMeans(XX) pmaxMedio <- cbind(pmaxMedio, pmaxMedio2) } } else { pmaxMedio<-matrix(NA,n,length(fn)) #list.X<-vector("list", length=length(fn)) for(j in 1:length(fn)){ #list.X[[j]]<-eval(parse(text=fn[j]), list(x=X1, p=PSI)) X<-eval(parse(text=fn[[j]]), list(x=X1, p=PSI)) pmaxMedio[,j]<-rowMeans(X) } } } if(is.null(w)) w<-1 invXtX<-solve(crossprod(sqrt(w)*xreg)) IA<-diag(n) - xreg%*%tcrossprod(invXtX, xreg*w) #I-hat matrix sc<-t(pmaxMedio*w) %*% IA %*% y v.s<- t(pmaxMedio*w) %*% crossprod(t(IA)/sqrt(w))%*%(w*pmaxMedio) ris<-if(nbreaks==1) drop(sc/(sigma*sqrt(v.s))) else drop(crossprod(sc,solve(v.s,sc)))/(sigma^2) #if(length(fn)<=1 && cadj) ris<- sign(ris)*sqrt((ris^2)*(1-(3-(ris^2))/(2*n))) #passa alla F.. df.t<-eval(parse(text=df.t)) p2<- if(nbreaks==1) 2*pt(abs(ris), df=df.t, lower.tail=FALSE) else pchisq(ris, df=nbreaks, lower.tail=FALSE)#pf((ris/nbreaks)/(sigma^2), df1=nbreaks, df2=df.t, lower.tail =FALSE)# pvalue<-switch(alternative, less = pt(ris, df=df.t, lower.tail =TRUE) , greater = pt(ris, df=df.t, lower.tail =FALSE) , two.sided = p2) #pvalue<- 2*pt(abs(ris), df=df.t, lower.tail =FALSE) r<-c(ris, pvalue)#, pmaxMedio) r #return(pmaxMedio) } #------------------------------------------------------------------------------- scGLM<-function(y, z, xreg, family, values = NULL, size=1, weights.var, fn="pmax(x-p,0)", alternative=alternative){ #score test for GLM #size (only if family=binomial()) #weights.var: weights to be used for variance computations. If missing the weights come from the null fit output<-match.arg(output) n<-length(y) if(missing(xreg)) xreg<-cbind(rep(1,n)) id.ok<-complete.cases(cbind(y,z,xreg)) y<-y[id.ok] z<-z[id.ok] xreg<-xreg[id.ok,,drop=FALSE] n<-length(y) if(family$family=="poisson") size=1 if(length(size)==1) size<-rep(size,n) yN<-y/size k=ncol(xreg) #per un modello ~1+x if(is.null(values)) values<-seq(min(z), max(z), length=10) n1<-length(values) PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) #(era X2) matrice di valori di psi X1<-matrix(z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z X<-eval(parse(text=fn), list(x=X1, p=PSI)) #X<-pmax(X1-X2,0) pmaxMedio<-rowMeans(X) o<-glm.fit(yN, x=xreg, weights=size, family=family) r<-y-(o$fitted*size) sc<-drop(crossprod(r, pmaxMedio)) # if(output=="unst.score") return(drop(sc)) p <- o$rank Qr <- o$qr COV <- chol2inv(Qr$qr[1:p, 1:p, drop = FALSE]) #vcov(glm(y~x, family=poisson)) A<-xreg%*%COV%*%crossprod(xreg, diag(o$weights)) h<- drop(tcrossprod(pmaxMedio, diag(n)- A)) if(missing(weights.var)) weights.var<-o$weights v.s<- drop(crossprod(h*sqrt(weights.var))) #t(h)%*%diag(exp(lp))%*%h ris<-if(length(fn)<=1) sc/sqrt(v.s) else drop(crossprod(sc,solve(v.s,sc))) # if(output=="score") return(drop(ris)) pvalue<- switch(alternative, less = pnorm(ris, lower.tail =TRUE) , greater = pnorm(ris, lower.tail =FALSE) , two.sided = 2*pnorm(abs(ris), lower.tail =FALSE) ) # pvalue<- if(length(fn)<=1) 2*pnorm(abs(ris), lower.tail =FALSE) else pchisq(ris,df=length(fn), lower.tail =FALSE) # NB: se calcoli ris<-drop(t(sc)%*%solve(v.s,sc))/(length(fn)*sigma^2) devi usare pf(ris,df1=length(fn),df2=df.t, lower.tail =FALSE) return(c(ris, pvalue)) } #---------------------------------------------------- if(!inherits(obj, "lm")) stop("A '(g)lm', or 'segmented-(g)lm' model is requested") fn="pmax(x-p,0)" ties.ok=FALSE if(missing(seg.Z)){ if(inherits(obj, "segmented") && length(obj$nameUV$Z)==1) seg.Z<- as.formula(paste("~", obj$nameUV$Z )) if(!inherits(obj, "segmented") && length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) } else { if(class(seg.Z)!="formula") stop("'seg.Z' should be an one-sided formula") } if(any(c("$","[") %in% all.names(seg.Z))) stop(" '$' or '[' not allowed in 'seg.Z' ") name.Z <- all.vars(seg.Z) if(length(name.Z)>1) stop("Only one variable can be specified in 'seg.Z' ") nomiU.term<-grep(name.Z, obj$nameUV$U, value=TRUE) #termini U per relativi alla variabile nomeZ #se length(nomiU.term)==0 la variabile in seg.Z non e' nel modello (si sta assumendo che la left slope ==0) if(length(nomiU.term)==0 && more.break) warning(paste("variable", name.Z, "has no breakpoint.. 'more.break=TRUE' ignored"), call.=FALSE) #browser() if(k<=1) stop("k>1 requested! k>=10 is recommended") if(k<10) warnings("k>=10 is recommended") alternative <- match.arg(alternative) if(!n.break%in%1:2) stop(" 'n.break' should be 1 or 2", call. = FALSE) if(n.break==2) alternative<-"two.sided" isGLM<-"glm"%in%class(obj) if(isGLM){ if (is.null(dispersion)) dispersion <- summary.glm(obj)$dispersion if(inherits(obj, "segmented")){ if(more.break && !name.Z %in% obj$nameUV$Z) stop(" 'more.break' is meaningful only if at least 1 breakpoint has been estimated") Call<-mf<-obj$orig.call #del GLM formulaSeg <-formula(obj) #contiene le variabili U e le psi formulaNull<- update.formula(formulaSeg, paste("~.-",paste(obj$nameUV$V, collapse="-"))) #rimuovi le variabili "psi.." #se length(nomiU.term)==0 la variabile in seg.Z non e' nel modello (si sta assumendo che la left slope ==0) if(!more.break && length(nomiU.term)>0){ if(length(nomiU.term)>1) stop(" 'more.break=FALSE' does not work with multiple breakpoints referring to the same variable specified in seg.Z", call. = FALSE) formulaNull <-update.formula(formulaNull,paste("~.-",paste(nomiU.term, collapse="-"))) #non contiene U del termine di interesse, MA contiene eventuali altri termini U } mf$formula<-formulaNull mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #se il modello inziale non contiene seg.Z.. if(!is.null(obj$orig.call$offset) || !is.null(obj$orig.call$weights) || !is.null(obj$orig.call$subset)){ mf$formula <- update.formula(mf$formula, paste(".~.+", paste(c(all.vars(obj$orig.call$offset), all.vars(obj$orig.call$weights), all.vars(obj$orig.call$subset)), collapse = "+"))) } m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") for(i in 1:length(obj$nameUV$U)) assign(obj$nameUV$U[i], obj$model[,obj$nameUV$U[i]], envir=parent.frame()) mf <- eval(mf, parent.frame()) mt <- attr(mf, "terms") #interc<-attr(mt,"intercept") y <- model.response(mf, "any") X0<- if (!is.empty.model(mt)) model.matrix(mt, mf) Z<-X0[ ,match(name.Z, colnames(X0))] n<-length(Z) if(is.null(values)) values<-seq(min(Z), max(Z), length=k) #values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) n1<-length(values) X1<-matrix(Z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) X<-eval(parse(text=fn), list(x=X1, p=PSI)) # fn t.c. length(fn)<=1; fn="pmax(x-p,0)" definita sopra.. pmaxMedio <-as.matrix(rowMeans(X)) if(n.break>1){ XX<-sapply(1:length(values), function(.x) X[,-(1:.x), drop=FALSE]) XX<-do.call("cbind", XX) if(ties.ok) XX<-cbind(X, XX) pmaxMedio2 <- rowMeans(XX) pmaxMedio <- cbind(pmaxMedio, pmaxMedio2) } #necessario salvare pmaxMedio in mf??? mf$pmaxMedio<-pmaxMedio Call$formula<- formulaNull Call$data<-quote(mf) obj0<-eval(Call) # pos<-1 # assign("mf", mf, envir=as.environment(pos)) # r<-as.numeric(as.matrix(add1(obj0, ~.+pmaxMedio, scale=dispersion, test="Rao"))[2,c("scaled Rao sc.", "Pr(>Chi)")]) ws <- sqrt(obj0$weights[obj0$weights>0]) res<-obj0$residuals[obj0$weights>0] zw <- ws * res A <- qr.resid(obj0$qr, ws * pmaxMedio[obj0$weights>0,]) u<-t(A)%*% zw v<-crossprod(A) r<-if(n.break==1) u/sqrt(v*dispersion) else t(u)%*% solve(v) %*%u/dispersion #r<- (colSums(as.matrix(A * zw))/sqrt(colSums(as.matrix(A * A)))/sqrt(dispersion)) p2<- if(n.break==1) 2*pnorm(abs(r), lower.tail=FALSE) else pchisq(r, df=n.break, lower.tail=FALSE) pvalue<- switch(alternative, less = pnorm(r, lower.tail =TRUE) , greater = pnorm(r, lower.tail =FALSE) , two.sided = p2) r<-c(r, pvalue) # ================fine se e' GLM+segmented. } else { #=================Se e' GLM (non segmented) Call<-mf<-obj$call mf$formula<-formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") formulaNull <- formula(obj) mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #aggiunto 12/03/18 (non trovava la variable weights perche' era salvata in mf come "(weights)") if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mf$formula <-update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+"))) } mf <- eval(mf, parent.frame()) mt <- attr(mf, "terms") XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf) n <- nrow(XREG) Z<- XREG[,match(name.Z, colnames(XREG))] if(!name.Z %in% names(coef(obj))) XREG<-XREG[,-match(name.Z, colnames(XREG)),drop=FALSE] if(is.null(values)) values<-seq(min(Z), max(Z), length=k) #values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) n1<-length(values) PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) #(era X2) matrice di valori di psi X1<-matrix(Z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z X<-eval(parse(text=fn), list(x=X1, p=PSI)) # fn t.c. length(fn)<=1 pmaxMedio<-as.matrix(rowMeans(X)) if(n.break>1){ XX<-sapply(1:length(values), function(.x) X[,-(1:.x), drop=FALSE]) XX<-do.call("cbind", XX) if(ties.ok) XX<-cbind(X, XX) pmaxMedio2 <- rowMeans(XX) pmaxMedio <- cbind(pmaxMedio, pmaxMedio2) } #r<-as.numeric(as.matrix(add1(update(obj, data=mf), ~.+pmaxMedio, scale=dispersion, test="Rao"))[2,c("scaled Rao sc.", "Pr(>Chi)")]) #Call$formula<- formulaNull #Call$data<-quote(mf) #obj0<-eval(Call) ws <- sqrt(obj$weights[obj$weights>0]) res<-obj$residuals[obj$weights>0] zw <- ws * res A <- qr.resid(obj$qr, ws * pmaxMedio[obj$weights>0,]) u<-t(A)%*% zw v<-crossprod(A) r<-if(n.break==1) u/sqrt(v*dispersion) else t(u)%*% solve(v) %*%u/dispersion #r<- (colSums(as.matrix(A * zw))/sqrt(colSums(as.matrix(A * A)))/sqrt(dispersion)) p2<- if(n.break==1) 2*pnorm(abs(r), lower.tail=FALSE) else pchisq(r, df=n.break, lower.tail=FALSE) pvalue<- switch(alternative, less = pnorm(r, lower.tail =TRUE) , greater = pnorm(r, lower.tail =FALSE) , two.sided = p2) r<-c(r, pvalue) #fine se e' un GLM } } else { ##============================== Se e' un LM.. if(is.null(dispersion)) dispersion<- summary(obj)$sigma^2 if(is.null(df.t)) df.t <- obj$df.residual #df.ok<- if(!is.null(df.t)) df.t else obj$df.residual #============ se e' LM+segmented if(inherits(obj, "segmented")){ if(more.break && !name.Z %in% obj$nameUV$Z) stop(" stop 'more.break' is meaningful only if at least 1 breakpoint has been estimated", call.=FALSE ) Call<-mf<-obj$orig.call mf$formula<-formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #formulaOrig<-formula(obj) if(!is.null(obj$orig.call$offset) || !is.null(obj$orig.call$weights) || !is.null(obj$orig.call$subset)){ mf$formula <- update.formula(mf$formula, paste(".~.+", paste(c(all.vars(obj$orig.call$offset), all.vars(obj$orig.call$weights), all.vars(obj$orig.call$subset)), collapse = "+"))) } mf$formula<-update.formula(mf$formula,paste("~.-",paste(obj$nameUV$V, collapse="-"))) #rimuovi le variabili "psi.." if(!more.break) { if(length(nomiU.term)>1) stop(" 'more.break=FALSE' does not work with multiple breakpoints referring to the same variable specified in seg.Z", call. = FALSE) #ovvero il test funziona per un solo breakpoint.. mf$formula<-update.formula(mf$formula,paste("~.-",paste(nomiU.term, collapse="-"))) #rimuovi il termine U in questione, cioe' solo per una variabile #altre variabili "U" relative a piu' variabili devono rimanere.. } formulaNull <- formula(mf) ############### #PERCHE' NON estrarre direttamente la model.matrix(obj) #X <-model.matrix(obj) #X <- X[, !(colnames(X) %in% obj$nameUV$V), drop=FALSE] #perche' poi se la variabile NON e' nel modello (perche' left slope=0) non so come recuperarla if(more.break) { mf$data<-quote(model.frame(obj)) mf<-eval(mf) } else { mf <- eval(mf, parent.frame()) } #for(i in 1:length(obj$nameUV$U)) assign(obj$nameUV$U[i], obj$model[,obj$nameUV$U[i]], envir=parent.frame()) y <- model.response(mf, "any") weights <- as.vector(model.weights(mf)) offset <- as.vector(model.offset(mf)) mt <- attr(mf, "terms") #interc<-attr(mt,"intercept") X0<- if (!is.empty.model(mt)) model.matrix(mt, mf) Z<-X0[ ,match(name.Z, colnames(X0))] n<-length(Z) if(is.null(values)) values<-seq(min(Z), max(Z), length=k) #values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) # n1<-length(values) # X1<-matrix(Z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z # PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) # X<-eval(parse(text=fn), list(x=X1, p=PSI)) # fn t.c. length(fn)<=1; fn="pmax(x-p,0)" definita sopra.. # mf$pmaxMedio<- pmaxMedio <-rowMeans(X) r<-test.Sc2(y=y, z=Z, xreg=X0, sigma=sqrt(dispersion), values=values, fn=fn, df.t=df.t, alternative=alternative, w=weights, offs=offset, nbreaks=n.break, ties.ok=FALSE) #fine se e' LM+segmented. } else { #=================Se e' LM (non segmented) Call<-mf<-obj$call mf$formula<-formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") formulaNull <- formula(obj) mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #aggiunto 12/03/18 (non trovava la variable weights perche' era salvata in mf come "(weights)") if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mf$formula <-update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+"))) } mf <- eval(mf, parent.frame()) y <- model.response(mf, "any") weights <- as.vector(model.weights(mf)) offset <- as.vector(model.offset(mf)) mt <- attr(mf, "terms") XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf) n <- nrow(XREG) Z<- XREG[,match(name.Z, colnames(XREG))] if(!name.Z %in% names(coef(obj))) XREG<-XREG[,-match(name.Z, colnames(XREG)),drop=FALSE] if(is.null(values)) values<-seq(min(Z), max(Z), length=k) #values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) #n1<-length(values) #PSI<-matrix(values, nrow=n, ncol=n1, byrow=TRUE) #(era X2) matrice di valori di psi #X1<-matrix(Z, nrow=n, ncol=n1, byrow=FALSE) #matrice della variabile Z #X<-eval(parse(text=fn), list(x=X1, p=PSI)) # fn t.c. length(fn)<=1 #pmaxMedio<-rowMeans(X) r<-test.Sc2(y=y, z=Z, xreg=XREG, sigma=sqrt(dispersion), values=values, fn=fn, df.t=df.t, alternative=alternative, w=weights, offs=offset, nbreaks=n.break, ties.ok=FALSE) #r<-as.numeric(as.matrix(add1(update(obj, data=mf), ~.+pmaxMedio, scale=dispersion, test="Rao"))[2,c("scaled Rao sc.", "Pr(>Chi)")]) } } if(is.null(obj$family$family)) { famiglia<-"gaussian" legame<-"identity" } else { famiglia<-obj$family$family legame<-obj$family$link } out <- list(method = "Score test for one/two changes in the slope", data.name=paste("formula =", as.expression(formulaNull), "\nbreakpoint for variable =", name.Z, "\nmodel =",famiglia,", link =", legame ,", method =", obj$call[[1]]), statistic = c(`observed value` = r[1]), parameter = c(n.points = length(values)), p.value = r[2], #alternative = paste(alternative, " (",n.break ,"breakpoint ) ") alternative = paste(alternative," (",n.break ,if(n.break==1) " breakpoint) " else " breakpoints) ", sep="")) class(out) <- "htest" return(out) } segmented/R/segmented.glm.R0000644000176200001440000005352113501720024015302 0ustar liggesusers`segmented.glm` <- function(obj, seg.Z, psi, npsi, control = seg.control(), model = TRUE, keep.class=FALSE, ...) { # n.Seg<-1 # if(missing(seg.Z) && length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) # if(missing(psi)){if(length(all.vars(seg.Z))>1) stop("provide psi") else psi<-Inf} # if(length(all.vars(seg.Z))>1 & !is.list(psi)) stop("`psi' should be a list with more than one covariate in `seg.Z'") # if(is.list(psi)){ # if(length(all.vars(seg.Z))!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") # if(any(is.na(match(all.vars(seg.Z),names(psi), nomatch = NA)))) stop("Variables in `seg.Z' and `psi' do not match") # n.Seg <- length(psi) # } # if(length(all.vars(seg.Z))!=n.Seg) stop("A wrong number of terms in `seg.Z' or `psi'") if(missing(seg.Z)) { if(length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } n.Seg<-length(all.vars(seg.Z)) id.npsi<-FALSE if(missing(psi)) { if(n.Seg==1){ if(missing(npsi)) npsi<-1 npsi<-lapply(npsi, function(.x).x) if(length(npsi)!=length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi)<-all.vars(seg.Z) } else {#se n.Seg>1 if(missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call.=FALSE) if(length(npsi)!=n.Seg) stop(" 'npsi' and seg.Z should have the same length") if(!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi<-lapply(npsi, function(.x) rep(NA,.x)) id.npsi<-TRUE ##id.npsi<-FALSE #e' stato fornito npsi? } else { if(n.Seg==1){ if(!is.list(psi)) {psi<-list(psi);names(psi)<-all.vars(seg.Z)} } else {#se n.Seg>1 if(!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if(n.Seg!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if(!all(names(psi)%in%all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } maxit.glm <- control$maxit.glm it.max <- old.it.max<- control$it.max min.step<-control$min.step alpha<-control$alpha digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) visual <- control$visual stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow conv.psi<-control$conv.psi visualBoot<-FALSE if(n.boot>0){ if(!is.null(control$seed)) { set.seed(control$seed) employed.Random.seed<-control$seed } else { employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(employed.Random.seed) } if(visual) {visual<-FALSE; visualBoot<-TRUE}#warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} if(!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") } last <- control$last K<-control$K h<-min(abs(control$h),1) if(h<1) it.max<-it.max+round(it.max/2) # if(!stop.if.error) objInitial<-obj #------------------------------- # #una migliore soluzione......... # objframe <- update(obj, model = TRUE, x = TRUE, y = TRUE) # y <- objframe$y # a <- model.matrix(seg.Z, data = eval(obj$call$data)) # a <- subset(a, select = colnames(a)[-1]) orig.call<-Call<-mf<-obj$call orig.call$formula<-mf$formula<-formula(obj) #per consentire lm(y~.) m <- match(c("formula", "data", "subset", "weights", "na.action","etastart","mustart","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") #non so a che serva la seguente linea.. if(class(mf$formula)=="name" && !"~"%in%paste(mf$formula)) mf$formula<-eval(mf$formula) #orig.call$formula<-update.formula(orig.call$formula, paste("~.-",all.vars(seg.Z))) #utile per plotting # nomeRispo<-strsplit(paste(formula(obj))[2],"/")[[1]] #eventuali doppi nomi separati da "/" (tipo "y/n" per GLM binom) #la linea sotto aggiunge nel mf anche la variabile offs.. # if(length(all.vars(formula(obj)))>1){ # id.rispo<-1 # if(length(nomeRispo)>=2) id.rispo<-1:2 # #questo serve quando formula(obj) ha solo l'intercept # agg<-if(length(all.vars(formula(obj))[-id.rispo])==0) "" else "+" # mf$formula<-update.formula(mf$formula,paste(paste(seg.Z,collapse=".+"),agg,paste(all.vars(formula(obj))[-id.rispo],collapse="+"))) # } else { # mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) # } mfExt<- mf mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) # mfExt$formula<- update.formula(mfExt$formula,paste(paste(seg.Z,collapse=".+"),"+",paste(all.vars(formula(obj)),collapse="+"))) # mfExt$formula<- if(!is.null(obj$call$data)) # update.formula(mf$formula,paste(".~",paste(all.vars(obj$call), collapse="+"),"-",obj$call$data,sep="")) # else update.formula(mf$formula,paste(".~",paste(all.vars(obj$call), collapse="+"),sep="")) #----------- if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mfExt$formula <- update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+") )) } mf <- eval(mf, parent.frame()) n<-nrow(mf) #La linea sotto serve per inserire in mfExt le eventuali variabili contenute nella formula con offset(..) # o anche variabili che rientrano in espressioni (ad es., y/n o I(y*n)) nomiOff<-setdiff(all.vars(formula(obj)), names(mf)) if(length(nomiOff)>=1) mfExt$formula<-update.formula(mfExt$formula,paste(".~.+", paste( nomiOff, collapse="+"), sep="")) #ago 2014 c'e' la questione di variabili aggiuntive... nomiTUTTI<-all.vars(mfExt$formula) #comprende anche altri nomi (ad es., threshold) "variabili" nomiNO<-NULL #dovrebbe contenere for(i in nomiTUTTI){ r<-try(eval(parse(text=i), parent.frame()), silent=TRUE) if(class(r)!="try-error" && length(r)==1 && !is.function(r)) nomiNO[[length(nomiNO)+1]]<-i } #nomiNO dovrebbe contenere i nomi delle "altre variabili" (come th in subset=x=2) mf[nomeRispo[1]]<-weights*y id.duplic<-match(all.vars(formula(obj)),all.vars(seg.Z),nomatch=0)>0 if(any(id.duplic)) { #new.mf<-mf[,id.duplic,drop=FALSE] new.mf<-mf[,all.vars(formula(obj))[id.duplic],drop=FALSE] new.XREGseg<-data.matrix(new.mf) XREG<-cbind(XREG,new.XREGseg) } n.psi<- length(unlist(psi)) id.n.Seg<-(ncol(XREG)-n.Seg+1):ncol(XREG) XREGseg<-XREG[,id.n.Seg,drop=FALSE] #XREG<-XREG[,-id.n.Seg,drop=FALSE] #XREG<-model.matrix(obj0) non va bene perche' non elimina gli eventuali mancanti in seg.Z.. #Due soluzioni #XREG<-XREG[,colnames(model.matrix(obj)),drop=FALSE] #XREG<-XREG[,match(c("(Intercept)",all.vars(formula(obj))[-1]),colnames(XREG),nomatch =0),drop=FALSE] XREG <- XREG[, match(c("(Intercept)", namesXREG0),colnames(XREG), nomatch = 0), drop = FALSE] XREG<-XREG[,unique(colnames(XREG)), drop=FALSE] ################# #if(ncol(XREGseg)==1 && length(psi)==1 && n.psi==1 && !any(is.na(psi))) { if(psi==Inf) psi<-median(XREGseg)} ################# n <- nrow(XREG) #Z <- list(); for (i in colnames(XREGseg)) Z[[length(Z) + 1]] <- XREGseg[, i] Z<-lapply(apply(XREGseg,2,list),unlist) #prende anche i nomi! name.Z <- names(Z) <- colnames(XREGseg) if(length(Z)==1 && is.vector(psi) && (is.numeric(psi)||is.na(psi))){ psi <- list(as.numeric(psi)) names(psi)<-name.Z } if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) || is.null(names(psi))) stop("Z and psi have to be *named* list") id.nomiZpsi <- match(names(Z), names(psi)) if ((length(Z)!=length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of Z and psi do not match") #dd <- match(names(Z), names(psi)) nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] if(id.npsi){ for(i in 1:length(psi)) { K<-length(psi[[i]]) if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) {quantile(Z[[i]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE)} else {(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1))} } } else { for(i in 1:length(psi)) { if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) {quantile(Z[[i]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE)} else {(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1))} } } initial.psi<-psi a <- sapply(psi, length)#b <- rep(1:length(a), times = a) id.psi.group <- rep(1:length(a), times = a) #identificativo di apparteneza alla variabile #Znew <- list() #for (i in 1:length(psi)) Znew[[length(Znew) + 1]] <- rep(Z[i], a[i]) #Z <- matrix(unlist(Znew), nrow = n) Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n) psi <- unlist(psi) psi<-unlist(tapply(psi,id.psi.group,sort)) k <- ncol(Z) PSI <- matrix(rep(psi, rep(n, k)), ncol = k) colnames(Z) <- nomiZ <- rep(nome, times = a) ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], function(xxx) {1:xxx}))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ, sep = ".") #forse non serve crearsi l'ambiente KK, usa mf.. #obj <- update(obj, formula = Fo, data = mf) #if (model.frame) obj$model <- mf #controlla che model.frame() funzioni sull'oggetto restituito # KK <- new.env() # for (i in 1:ncol(objframe$model)) assign(names(objframe$model[i]), objframe$model[[i]], envir = KK) if (it.max == 0) { #mf<-cbind(mf, mfExt) U <- (Z>PSI)*(Z-PSI) #pmax((Z - PSI), 0) colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") nomiU <- paste("U", colnames(U), sep = "") #for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) #e' necessario il for? puoi usare colnames(U)<-nomiU;mf[nomiU]<-U for(i in 1:ncol(U)) mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] Fo <- update.formula(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) #obj <- update(obj, formula = Fo, data = KK) obj <- update(obj, formula = Fo, data = mfExt, evaluate=FALSE) if(!is.null(obj[["subset"]])) obj[["subset"]]<-NULL obj<-eval(obj, envir=mfExt) if (model) obj$model <-mf #obj$model <- data.frame(as.list(KK)) names(psi)<-paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") obj$psi <- psi return(obj) } #XREG <- model.matrix(obj) creata sopra #o <- model.offset(objframe) #w <- model.weights(objframe) if (is.null(weights)) weights <- rep(1, n) if (is.null(offs)) offs <- rep(0, n) fam <- family(obj) initial <- psi obj0 <- obj dev0<-obj$dev list.obj <- list(obj) # psi.values <- NULL nomiOK<-nomiU opz<-list(toll=toll,h=h,stop.if.error=stop.if.error,dev0=dev0,visual=visual,it.max=it.max,nomiOK=nomiOK, fam=fam, eta0=obj$linear.predictors, maxit.glm=maxit.glm, id.psi.group=id.psi.group, gap=gap, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step, pow=pow, visualBoot=visualBoot, digits=digits) if(n.boot<=0){ obj<-seg.glm.fit(y,XREG,Z,PSI,weights,offs,opz) } else { obj<-seg.glm.fit.boot(y, XREG, Z, PSI, weights, offs, opz, n.boot=n.boot, size.boot=size.boot, random=random) #jt, nonParam } if(!is.list(obj)){ warning("No breakpoint estimated", call. = FALSE) return(obj0) } id.psi.group<-obj$id.psi.group nomiOK<-obj$nomiOK #nomiFINALI<-unique(sapply(strsplit(nomiOK, split="[.]"), function(x)x[2])) #nomi delle variabili con breakpoint stimati! nomiFINALI<-unique(sub("U[1-9]*[0-9].", "", nomiOK)) #nomi originali delle variabili con breakpoint stimati! #se e' stata usata una proc automatica "nomiFINALI" sara' differente da "name.Z" nomiSenzaPSI<-setdiff(name.Z,nomiFINALI) if(length(nomiSenzaPSI)>=1) warning("no breakpoints found for: ", paste(nomiSenzaPSI," "), call. = FALSE) it<-obj$it psi<-obj$psi k<-length(psi) psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart U<-obj$U V<-obj$V # #commentati il 28/5 solo per imitare segmented.lm # for(jj in colnames(V)) { # VV<-V[, which(colnames(V)==jj),drop=FALSE] # sumV<-abs(rowSums(VV)) # # if( (any(diff(sumV)>=2) #se ci sono due breakpoints equivalenti # # || any(table(sumV)<=1))) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") # if(any(table(sumV)<=1) && stop.if.error) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") # } rangeZ<-obj$rangeZ obj<-obj$obj k<- length(psi) beta.c <- coef(obj)[paste("U", 1:ncol(U), sep = "")] Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) #psi.values[[length(psi.values) + 1]] <- psi #in LM e' commentata.. id.warn <- FALSE if (n.boot<=0 && it > it.max) { #it >= (it.max+1) warning("max number of iterations attained", call. = FALSE) id.warn <- TRUE } Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) #se usi una procedura automatica devi cambiare ripetizioni, nomiU e nomiV, e quindi: length.psi<-tapply(as.numeric(as.character(names(psi))), as.numeric(as.character(names(psi))), length) forma.nomiU<-function(xx,yy)paste("U",1:xx, ".", yy, sep="") forma.nomiVxb<-function(xx,yy)paste("psi",1:xx, ".", yy, sep="") nomiU <- unlist(mapply(forma.nomiU, length.psi, nomiFINALI)) #invece di un ciclo #paste("U",1:length.psi[i], ".", name.Z[i]) nomiVxb <- unlist(mapply(forma.nomiVxb, length.psi, nomiFINALI)) #se nomiOK sopra contiene gia' le U1.x,ecc... perche' non fare?nomiVxb<-sub("U","psi", nomiOK) #mf<-cbind(mf, mfExt) for(i in 1:ncol(U)) { mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] mfExt[nomiVxb[i]]<-mf[nomiVxb[i]]<-Vxb[,i] } # for (i in 1:ncol(U)) { # assign(nomiU[i], U[, i], envir = KK) # assign(nomiVxb[i], Vxb[, i], envir = KK) # } nnomi <- c(nomiU, nomiVxb) Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) #la seguente linea si potrebbe rimuovere perche' in mfExt c'e' gia' tutto.. if(is.matrix(y)&& (fam$family=="binomial" || fam$family=="quasibinomial")){ mfExt<-cbind(mfExt[[1]], mfExt[,-1]) } objF <- update(obj0, formula = Fo, data = mfExt, evaluate=FALSE) if(!is.null(objF[["subset"]])) objF[["subset"]]<-NULL objF<-eval(objF, envir=mfExt) #C'e' un problema..controlla obj (ha due "(Intercepts)" - bhu.. al 27/03/14 non mi sembra! #Puo' capitare che psi sia ai margini e ci sono 1 o 2 osservazioni in qualche intervallo. Oppure ce ne # sono di piu' ma hanno gli stessi valori di x objF$offset<- obj0$offset isNAcoef<-any(is.na(objF$coefficients)) if(isNAcoef){ if(stop.if.error) { cat("breakpoint estimate(s):", as.vector(psi),"\n") stop("at least one coef is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE)} else { warning("some estimate is NA: too many breakpoints? 'var(hat.psi)' cannot be computed \n ..returning a 'lm' model", call. = FALSE) Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) objF <- update(obj0, formula = Fo, evaluate=TRUE, data = mfExt) names(psi)<-nomiVxb objF$psi<-psi return(objF) } } #aggiornare qui i weights???? (piuttosto che sotto) #------>>> #------>>> #------>>> if(!gap){ names.coef<-names(objF$coefficients) names(obj$coefficients)[match(c(paste("U",1:k, sep=""), paste("V",1:k, sep="")), names(coef(obj)))]<- nnomi objF$coefficients[names.coef]<-obj$coefficients[names.coef] #sostituisce gli 0 objF$fitted.values<-obj$fitted.values objF$linear.predictors<-obj$linear.predictors objF$residuals<-obj$residuals objF$deviance<-obj$deviance objF$aic<-obj$aic + 2*ncol(Z) #k objF$weights<-obj$weights } Cov <- vcov(objF) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) #if(length(initial)!=length(psi)) initial<-rep(NA,length(psi)) a<-tapply(id.psi.group, id.psi.group, length) #ho sovrascritto "a" di sopra, ma non dovrebbe servire.. ris.psi<-matrix(,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi ris.psi[,3]<-sqrt(vv) #NB "a" deve essere un vettore che si appatta con "initial.psi" per ottnetere "initial" sotto... Se una variabile alla fine risulta # senza breakpoint questo non avviene e ci sono problemi nella formazione di "initial". Allora costruisco a.ok a.ok<-NULL for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } # initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) if(stop.if.error) ris.psi[,1]<-initial objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- (it - 1) objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) #Z = name.Z objF$id.group <- if(length(name.Z)<=1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn objF$orig.call<-orig.call if (model) objF$model <- mf #objF$mframe <- data.frame(as.list(KK)) if(n.boot>0) objF$seed<-employed.Random.seed class(objF) <- c("segmented", class(obj0)) list.obj[[length(list.obj) + 1]] <- objF class(list.obj) <- "segmented" if (last) list.obj <- list.obj[[length(list.obj)]] return(list.obj) } segmented/R/predict.segmented.r0000644000176200001440000001074513501727630016230 0ustar liggesuserspredict.segmented<-function(object, newdata, ...){ #rev: 30/10/2013: it seems to work correctly, even with the minus variable (null right slope..) #rev: 14/4/2014 now it works like predict.lm/glm #BUT problems if type="terms" (in realta' funziona, il problema e' che # restituisce una colonna per "x", "U.x", "psi.x".. (Eventualmente si dovrebbero sommare..) #if(!is.null(object$orig.call$offset)) stop("predict.segmented can not handle argument 'offset'. Include it in formula!") vS<-function(obj){ X<-model.matrix(obj) nomiZ<- obj$nameUV$Z nomiV<- obj$nameUV$V for(i in 1:length(nomiZ)){ nomeZ<-nomiZ[i] nomeV<-nomiV[i] Z<-X[,nomeZ] est.psi<- obj$psi[nomeV,"Est."] se.psi<- obj$psi[nomeV,"St.Err"] X[,nomeV]<-X[,nomeV]*pnorm((Z-est.psi)/se.psi) } s2<-summary.lm(obj)$sigma^2 s2*solve(crossprod(X)) } #se gli passo isV=TRUE cosa cambia in predict?? dummy.matrix<-function(x.values, x.name, obj.seg, psi.est=TRUE, isV=FALSE){ #given the segmented fit 'obj.seg' and a segmented variable x.name with corresponding values x.values, #this function simply returns a matrix with columns (x, (x-psi)_+, -b*I(x>psi)) #or ((x-psi)_+, -b*I(x>psi)) if obj.seg does not include the coef for the linear "x" f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } if(length(isV)==1) isV<-c(FALSE,isV) n<-length(x.values) #le seguenti righe selezionavano (ERRONEAMENTE) sia "U1.x" sia "U1.neg.x" (se "x" e "neg.x" erano segmented covariates) #nameU<- grep(paste("\\.",x.name,"$", sep=""), obj.seg$nameUV$U, value = TRUE) #nameV<- grep(paste("\\.",x.name,"$", sep=""), obj.seg$nameUV$V, value = TRUE) nameU<-obj.seg$nameUV$U[f.U(obj.seg$nameUV$U,x.name)] nameV<-obj.seg$nameUV$V[f.U(obj.seg$nameUV$V,x.name)] diffSlope<-coef(obj.seg)[nameU] est.psi<-obj.seg$psi[nameV,"Est."] se.psi<-obj.seg$psi[nameV, "St.Err"] k<-length(est.psi) PSI <- matrix(rep(est.psi, rep(n, k)), ncol = k) SE.PSI <- matrix(rep(se.psi, rep(n, k)), ncol = k) newZ<-matrix(x.values, nrow=n,ncol=k, byrow = FALSE) dummy1<-if(isV[1]) (newZ-PSI)*pnorm((newZ-PSI)/SE.PSI) else (newZ-PSI)*(newZ>PSI) #pmax(newZ-PSI,0) if(psi.est){ V<-if(isV[2]) -pnorm((newZ-PSI)/SE.PSI) else -(newZ>PSI) #ifelse(newZ>PSI,-1,0) dummy2<- if(k==1) V*diffSlope else V%*%diag(diffSlope) #t(diffSlope*t(-I(newZ>PSI))) newd<-cbind(x.values,dummy1,dummy2) colnames(newd)<-c(x.name,nameU, nameV) } else { newd<-cbind(x.values,dummy1) colnames(newd)<-c(x.name,nameU) } if(!x.name%in%names(coef(obj.seg))) newd<-newd[,-1,drop=FALSE] return(newd) } #-------------------------------------------------------------- if(missing(newdata)){ newd.ok<-model.frame(object) } else { #devi trasformare la variabili segmented attraverso dummy.matrix() nameU<-object$nameUV$U nameV<-object$nameUV$V nameZ<-object$nameUV$Z n<-nrow(newdata) r<-NULL for(i in 1:length(nameZ)){ x.values<-newdata[[nameZ[i]]] DM<-dummy.matrix(x.values, nameZ[i], object) r[[i]]<-DM } newd.ok<-data.frame(matrix(unlist(r), nrow=n, byrow = FALSE)) names(newd.ok)<- unlist(sapply(r, colnames)) idZ<-match(nameZ, names( newdata)) newdata<-cbind(newdata[,-idZ, drop=FALSE], newd.ok) # newdata<-subset(newdata, select=-idZ) #newdata<-cbind(newdata, newd.ok) #e' una ripetizione (refuso?) comunque controlla } class(object)<-class(object)[-1] f<-predict(object, newdata=newdata, ...) #f<-if(inherits(object, what = "glm", which = FALSE)) predict.glm(object, newdata=newd.ok, ...) else predict.lm(object, newdata=newd.ok, ...) return(f) #sommare se "terms"? } segmented/R/seg.Ar.fit.boot.r0000644000176200001440000001360313476173372015473 0ustar liggesusersseg.Ar.fit.boot<-function(obj, XREG, Z, PSI, opz, n.boot=10, size.boot=NULL, jt=FALSE, nonParam=TRUE, random=FALSE){ #random se TRUE prende valori random quando e' errore: comunque devi modificare qualcosa (magari con it.max) # per fare restituire la dev in corrispondenza del punto psi-random #nonParm. se TRUE implemneta il case resampling. Quello semiparam dipende dal non-errore di extract.psi<-function(lista){ #serve per estrarre il miglior psi.. dev.values<-lista[[1]][-1] #remove the 1st one referring to model without psi psi.values<-lista[[2]][-1] #remove the 1st one (NA) dev.ok<-min(dev.values) id.dev.ok<-which.min(dev.values) if(is.list(psi.values)) psi.values<-matrix(unlist(psi.values), nrow=length(dev.values), byrow=TRUE) if(!is.matrix(psi.values)) psi.values<-matrix(psi.values) psi.ok<-psi.values[id.dev.ok,] r<-list(SumSquares.no.gap=dev.ok, psi=psi.ok) r } #------------- visualBoot<-opz$visualBoot opz.boot<-opz opz1<-opz opz.boot$pow=c(1,1) #c(1.1,1.2) opz.boot$it.max<-20 opz1$it.max <-1 n<-nrow(Z) o0<-try(suppressWarnings(seg.Ar.fit(obj, XREG, Z, PSI, opz)), silent=TRUE) rangeZ <- apply(Z, 2, range) #serve sempre if(!is.list(o0)) { o0<- seg.Ar.fit(obj, XREG, Z, PSI, opz, return.all.sol=TRUE) o0<-extract.psi(o0) ss00<-opz$dev0 if(!nonParam) {warning("using nonparametric boot");nonParam<-TRUE} } if(is.list(o0)){ est.psi00<-est.psi0<-o0$psi ss00<-o0$SumSquares.no.gap if(!nonParam) fitted.ok<-fitted(o0) } else { if(!nonParam) stop("the first fit failed and I cannot extract fitted values for the semipar boot") if(random) { est.psi00<-est.psi0<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0<-try(suppressWarnings(seg.Ar.fit(obj, Z, PSI1, opz1)), silent=TRUE) ss00<-o0$SumSquares.no.gap } else { est.psi00<-est.psi0<-apply(PSI,2,mean) ss00<-opz$dev0 } } n.intDev0<-nchar(strsplit(as.character(ss00),"\\.")[[1]][1]) all.est.psi.boot<-all.selected.psi<-all.est.psi<-matrix(, nrow=n.boot, ncol=length(est.psi0)) all.ss<-all.selected.ss<-rep(NA, n.boot) if(is.null(size.boot)) size.boot<-n Z.orig<-Z #if(visualBoot) cat(0, " ", formatC(opz$dev0, 3, format = "f"),"", "(No breakpoint(s))", "\n") count.random<-0 for(k in seq(n.boot)){ PSI <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) if(jt) Z<-apply(Z.orig,2,jitter) if(nonParam){ id<-sample(n, size=size.boot, replace=TRUE) o.boot<-try(suppressWarnings(seg.Ar.fit(obj, XREG[id,,drop=FALSE], Z[id,,drop=FALSE], PSI[id,,drop=FALSE], opz.boot)), silent=TRUE) } else { yy<-fitted.ok+sample(residuals(o0), size=n, replace=TRUE) ##----> o.boot<-try(seg.lm.fit(yy, XREG, Z.orig, PSI, weights, offs, opz.boot), silent=TRUE) #in realta' la risposta dovrebbe essere "yy" da cambiare in mfExt o.boot<- try(suppressWarnings(seg.Ar.fit(obj, XREG, Z.orig, PSI, opz.boot)), silent=TRUE) } if(is.list(o.boot)){ all.est.psi.boot[k,]<-est.psi.boot<-o.boot$psi } else { est.psi.boot<-apply(rangeZ,2,function(r)runif(1,r[1],r[2])) } PSI <- matrix(rep(est.psi.boot, rep(nrow(Z), length(est.psi.boot))), ncol = length(est.psi.boot)) #opz$h<-max(opz$h*.9, .2) opz$it.max<-opz$it.max+1 o <- try(suppressWarnings(seg.Ar.fit(obj, XREG, Z.orig, PSI, opz, return.all.sol=TRUE)), silent=TRUE) if(!is.list(o) && random){ est.psi0<-apply(rangeZ,2,function(r) runif(1,r[1],r[2])) PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o <- try(suppressWarnings(seg.Ar.fit(obj, XREG, Z, PSI1, opz1)), silent=TRUE) count.random<-count.random+1 } if(is.list(o)){ if(!"coef"%in%names(o$obj)) o<-extract.psi(o) all.est.psi[k,]<-o$psi all.ss[k]<-o$SumSquares.no.gap if(o$SumSquares.no.gap<=ifelse(is.list(o0), o0$SumSquares.no.gap, 10^12)) o0<-o est.psi0<-o0$psi all.selected.psi[k,] <- est.psi0 all.selected.ss[k]<-o0$SumSquares.no.gap #min(c(o$SumSquares.no.gap, o0$SumSquares.no.gap)) } if (visualBoot) { flush.console() #n.intDev0<-nchar(strsplit(as.character(dev.values[2]),"\\.")[[1]][1]) cat(paste("boot sample = ", sprintf("%2.0f",k), " opt.llik = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), -o0$SumSquares.no.gap), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " n.psi = ",formatC(length(unlist(est.psi0)),digits=0,format="f"), " est.psi = ",paste(formatC(unlist(est.psi0),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } } #end n.boot all.selected.psi<-rbind(est.psi00,all.selected.psi) all.selected.ss<-c(ss00, all.selected.ss) ris<-list(all.selected.psi=drop(all.selected.psi),all.selected.ss=all.selected.ss, all.psi=all.est.psi, all.ss=all.ss) if(is.null(o0$obj)){ PSI1 <- matrix(rep(est.psi0, rep(nrow(Z), length(est.psi0))), ncol = length(est.psi0)) o0 <- try(suppressWarnings(seg.Ar.fit(obj, XREG, Z, PSI1, opz1)), silent=TRUE) } if(!is.list(o0)) return(0) o0$boot.restart<-ris return(o0) }segmented/R/intercept.r0000644000176200001440000000716013205262246014614 0ustar liggesusersintercept<-function (ogg, parm, rev.sgn = FALSE, var.diff = FALSE, digits = max(4, getOption("digits") - 2)){ #corregge in caso di no model intercept -- CHE VOLEVO DIRE?? #forse che adesso funziona se nel modello non c'e' l'interc. #-- f.U<-function(nomiU, term=NULL){ #trasforma i nomi dei coeff U (o V) nei nomi delle variabili corrispondenti #and if 'term' is provided (i.e. it differs from NULL) the index of nomiU matching term are returned k<-length(nomiU) nomiUsenzaU<-strsplit(nomiU, "\\.") nomiU.ok<-vector(length=k) for(i in 1:k){ nomi.i<-nomiUsenzaU[[i]][-1] if(length(nomi.i)>1) nomi.i<-paste(nomi.i,collapse=".") nomiU.ok[i]<-nomi.i } if(!is.null(term)) nomiU.ok<-(1:k)[nomiU.ok%in%term] return(nomiU.ok) } #-- #if (!"segmented" %in% class(ogg)) stop("A segmented model is needed") if (var.diff && length(ogg$nameUV$Z) > 1) { var.diff <- FALSE warning("var.diff set to FALSE with multiple segmented variables", call. = FALSE) } nomepsi <- rownames(ogg$psi) nomeU <- ogg$nameUV[[1]] nomeZ <- ogg$nameUV[[3]] if (missing(parm)) { nomeZ <- ogg$nameUV[[3]] if (length(rev.sgn) == 1) rev.sgn <- rep(rev.sgn, length(nomeZ)) } else { if (!all(parm %in% ogg$nameUV[[3]])) { stop("invalid parm") } else { nomeZ <- parm } } if (length(rev.sgn) != length(nomeZ)) rev.sgn <- rep(rev.sgn, length.out = length(nomeZ)) nomi <- names(coef(ogg)) nomi <- nomi[-match(nomepsi, nomi)] Allpsi <- index <- vector(mode = "list", length = length(nomeZ)) # gapCoef<-summary.segmented(ogg)$gap ##eliminato 10/11/15 Ris <- list() rev.sgn <- rep(rev.sgn, length.out = length(nomeZ)) if("(Intercept)"%in%names(coef(ogg))){ alpha0 <- alpha00 <- coef(ogg)["(Intercept)"]} else {alpha0 <- alpha00 <-0} #per ogni variabile segmented... for (i in 1:length(nomeZ)) { # id.cof.U <- grep(paste("\\.", nomeZ[i], "$", sep = ""), nomi, value = FALSE) # psii <- ogg$psi[grep(paste("\\.", nomeZ[i], "$", sep = ""), rownames(ogg$psi), value = FALSE), 2] id.cof.U <- f.U(ogg$nameUV$U, nomeZ[i]) + (match(ogg$nameUV$U[1], nomi)-1) psii<- ogg$psi[f.U(ogg$nameUV$V, nomeZ[i]) , "Est."] Allpsi[[i]] <- sort(psii, decreasing = FALSE) id.cof.U <- id.cof.U[order(psii)] index[[i]] <- id.cof.U alpha0<-if("(Intercept)"%in%names(coef(ogg))) coef(ogg)["(Intercept)"] else 0 ind <- as.numeric(na.omit(unlist(index[[i]]))) cof <- coef(ogg)[ind] alpha <- vector(length = length(ind)) #gapCoef.i<-gapCoef[grep(paste("\\.",nomeZ[i],"$",sep=""), rownames(gapCoef), value = FALSE),"Est."] # gapCoef.i<-gapCoef[f.U(rownames(gapCoef), nomeZ[i]) ,"Est."] ###eliminato 10/11/15 for (j in 1:length(cof)) { alpha[j] <- alpha0 - Allpsi[[i]][j] * cof[j] # if(gap) alpha[j] <- alpha[j] - gapCoef.i[j] ###eliminato 10/11/15 alpha0 <- alpha[j] } #if(gap) alpha<-alpha -gapCoef[grep(paste("\\.",nomeZ[i],"$",sep=""), rownames(gapCoef), value = FALSE),"Est."] cof.out <- c(alpha00, alpha) if(rev.sgn[i]) cof.out <- cof.out[length(cof.out):1] ris <- matrix(cof.out) dimnames(ris) <- list(paste("intercept", 1:nrow(ris), sep = ""), "Est.") Ris[[nomeZ[i]]] <- signif(ris, digits) } Ris } segmented/R/aapc.r0000644000176200001440000000553613474243164013536 0ustar liggesusersaapc<-function(ogg, parm, exp.it=FALSE, conf.level=0.95, wrong.se=TRUE, ...){ blockdiag <- function(...) { args <- list(...) nc <- sapply(args,ncol) cumnc <- cumsum(nc) ## nr <- sapply(args,nrow) ## NR <- sum(nr) NC <- sum(nc) rowfun <- function(m,zbefore,zafter) { cbind(matrix(0,ncol=zbefore,nrow=nrow(m)),m, matrix(0,ncol=zafter,nrow=nrow(m))) } ret <- rowfun(args[[1]],0,NC-ncol(args[[1]])) for (i in 2:length(args)) { ret <- rbind(ret,rowfun(args[[i]],cumnc[i-1],NC-cumnc[i])) } ret } if(missing(parm)) { nomeZ<- ogg$nameUV$Z # if(length(rev.sgn)==1) rev.sgn<-rep(rev.sgn,length(nomeZ)) } else { if(! all(parm %in% ogg$nameUV$Z)) {stop("invalid parm")} else {nomeZ<-parm} } #for(i in 1:length(nomeZ)) { term<-nomeZ[1] nomi.psi<- grep(paste("\\.",term,sep=""), ogg$nameUV$V, value=TRUE) nomi.slope<- grep(paste("\\.",term,sep=""), ogg$nameUV$U,value=TRUE) null.left<-TRUE if(term %in% names(coef(ogg))) { nomi.slope<-c(term, nomi.slope) null.left<-FALSE } a<- min(ogg$rangeZ[,parm])# min(x)-1 #se discreto b<- max(ogg$rangeZ[,parm]) est.slope <- slope(ogg, parm)[[1]][,1] est.psi <- ogg$psi[nomi.psi,2] est.w<- diff(c(a,est.psi,b))/(b-a) #drop(B%*%c(a,est.psi,b)) k<- length(est.psi)#n.changepoints A<-matrix(0,k+1,k+1) A[row(A)>=col(A)]<-1 B<-diff(diag(k+2),diff=1)/(b-a) mu<-drop(crossprod(est.w,est.slope)) xsi<-c(crossprod(est.w,A),crossprod(est.slope,B)) COV <- vcov(ogg,...) v.delta<-COV[nomi.slope,nomi.slope] # if(null.left) v.delta<-rbind(0,cbind(0,v.delta)) #v.delta<-vcov(ogg)[2:4,2:4] #questa e' la var cov della left slope e le altre diffSlope #v.psi<-vcov(ogg)[5:6,5:6] #questa e' la var-cov dei psi v.psi<-as.matrix(COV[nomi.psi,nomi.psi]) VC<-COV[nomi.psi, nomi.slope] VV<-blockdiag(v.delta,diag(1)*0,v.psi,diag(1)*0) id.cov1<- 1:length(est.slope) id.cov2<- seq.int((length(est.slope)+2), length.out=length(est.psi)) if(null.left) { VC<-cbind(0,VC) #column relevant to the "x" term (missing) VV<- rbind(0,cbind(0,VV)) } VV[id.cov2,id.cov1]<-VC VV[id.cov1,id.cov2]<-t(VC) #VV[5:6,1:3]<-vcov(os)[5:6,2:4] #VV[1:3,5:6]<-vcov(os)[2:4,5:6] se.mu<-sqrt(drop(xsi%*%VV%*%xsi)) z<-abs(qnorm((1-conf.level)/2)) r<-c(Est=mu, St.Err=se.mu, mu+c(-z,z)*se.mu) cin <- paste("CI", "(", conf.level * 100, "%", ")", c(".l", ".u"), sep = "") names(r)<-c("Est.","St.Err",cin) if(wrong.se){ if(null.left) v.delta<-rbind(0,cbind(0, v.delta)) se.mu.wrong<- sqrt(drop(t(est.w)%*%A%*%v.delta%*%t(A)%*%est.w)) attr(r,"wrong.se")<- se.mu.wrong } if(exp.it) r<- exp(r[-2])-1 r } segmented/R/segmented.Arima.r0000644000176200001440000003202213501720474015616 0ustar liggesuserssegmented.Arima<- function(obj, seg.Z, psi, npsi, control = seg.control(), model = TRUE, keep.class=FALSE, ...) { #Richiede control$f.obj that should be a string like "sum(x$residuals^2)" or "x$dev" #----------------- dpmax<-function(x,y,pow=1){ #deriv pmax if(pow==1) -(x>y) #ifelse(x>y, -1, 0) else -pow*((x-y)*(x>y))^(pow-1)#-pow*pmax(x-y,0)^(pow-1) } #----------- # n.Seg<-1 # if(missing(seg.Z) && length(all.vars(o$call$xreg))==1) seg.Z<- as.formula(paste("~", all.vars(o$call$xreg))) # if(missing(psi)){if(length(all.vars(seg.Z))>1) stop("provide psi") else psi<-Inf} # if(length(all.vars(seg.Z))>1 & !is.list(psi)) stop("`psi' should be a list with more than one covariate in `seg.Z'") # if(is.list(psi)){ # if(length(all.vars(seg.Z))!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") # if(any(is.na(match(all.vars(seg.Z),names(psi), nomatch = NA)))) stop("Variables in `seg.Z' and `psi' do not match") # n.Seg <- length(psi) # } # if(length(all.vars(seg.Z))!=n.Seg) stop("A wrong number of terms in `seg.Z' or `psi'") if(missing(seg.Z)) { if(length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } n.Seg<-length(all.vars(seg.Z)) id.npsi<-FALSE if(missing(psi)) { if(n.Seg==1){ if(missing(npsi)) npsi<-1 npsi<-lapply(npsi, function(.x).x) if(length(npsi)!=length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi)<-all.vars(seg.Z) } else {#se n.Seg>1 if(missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call.=FALSE) if(length(npsi)!=n.Seg) stop(" 'npsi' and seg.Z should have the same length") if(!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi<-lapply(npsi, function(.x) rep(NA,.x)) id.npsi<-TRUE ##id.npsi<-FALSE #e' stato fornito npsi? } else { if(n.Seg==1){ if(!is.list(psi)) {psi<-list(psi);names(psi)<-all.vars(seg.Z)} } else {#se n.Seg>1 if(!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if(n.Seg!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if(!all(names(psi)%in%all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } min.step<-control$min.step alpha<-control$alpha it.max <- old.it.max<- control$it.max digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) visual <- control$visual stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow conv.psi<-control$conv.psi visualBoot<-FALSE if(n.boot>0){ if(!is.null(control$seed)) { set.seed(control$seed) employed.Random.seed<-control$seed } else { employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(employed.Random.seed) } if(visual) {visual<-FALSE; visualBoot<-TRUE}# warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} if(!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") } last <- control$last K<-control$K h<-control$h # if(h<1) it.max<-it.max+round(it.max/2) name.Z <-all.vars(seg.Z) if(length(name.Z)!=n.Seg) stop("errore strano 1") Z<-sapply(name.Z, function(xx) eval(parse(text=xx))) #e' sempre una matrice if(length(name.Z)!=ncol(Z)) stop("errore strano 2") n<-nrow(Z) n.psi<- length(unlist(psi)) ################# #if(ncol(Z)==1 && length(psi)==1 && n.psi==1 && !any(is.na(psi))) { if(psi==Inf) psi<-median(Z)} ################# if(ncol(Z)==1 && is.vector(psi) && (is.numeric(psi)||is.na(psi))){ psi <- list(as.numeric(psi)) names(psi)<-name.Z } if (!is.list(psi) || is.null(names(psi))) stop("psi should be a *named* list") id.nomiZpsi <- match(colnames(Z), names(psi)) if ((ncol(Z)!=length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of Z and psi do not match") nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] if(id.npsi){ for(i in 1:length(psi)) { K<-length(psi[[i]]) if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) {quantile(Z[,i], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE)} else {(min(Z[,i])+ diff(range(Z[,i]))*(1:K)/(K+1))} } } else { for(i in 1:length(psi)) { if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) {quantile(Z[,i], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE)} else {(min(Z[,i])+ diff(range(Z[,i]))*(1:K)/(K+1))} } } initial.psi<-psi a <- sapply(psi, length) #per evitare che durante il processo iterativo i psi non siano ordinati id.psi.group <- rep(1:length(a), times = a) #identificativo di apparteneza alla variabile Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n,byrow = TRUE) #negli altri metodi Z e' una lista per cui la linea di sopra diventa #Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n) colnames(Z) <- nomiZ.vett <- rep(nome, times = a) #SERVE??? si perche' Z e' senza colnames psi <- unlist(psi) #se psi e' numerico, la seguente linea restituisce i valori ordinati all'interno della variabile.. psi<-unlist(tapply(psi,id.psi.group,sort)) k <- ncol(Z) PSI <- matrix(rep(psi, rep(n, k)), ncol = k) #controllo se psi e' ammissibile.. c1 <- apply((Z <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Z >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") #ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], function(xxx) {1:xxx}))) ripetizioni <- as.vector(unlist(tapply(id.psi.group, id.psi.group, function(x) 1:length(x) ))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ.vett, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ.vett, sep = ".") nnomi <- c(nomiU, nomiV) XREG<-eval(obj$call$xreg) if(!is.null(XREG)){ #se ci sono factor? nomiXREG<-setdiff(names(obj$coef),c("intercept", paste("ar",1:100,sep=""), paste("ma",1:100,sep=""), paste("sma",1:100,sep=""), paste("sar",1:100,sep=""))) XREG<-matrix(XREG, ncol=length(nomiXREG)) colnames(XREG)<-nomiXREG #if((""%in%colnames(XREG)) || (" "%in%colnames(XREG))) stop("all columns in the matrix 'xreg' of 'obj' should be named.. ") if(length(nomiXREG) != ncol(XREG)) stop("ncol(XREG) does not match names of regression coefficients") } mio.init<-mio.init.noV<-NULL X<-NULL call.ok <- update(obj, xreg = X, init=mio.init, evaluate=FALSE) #ho messo X, piuttosto che cbind(XREG,U,V) call.noV <- update(obj, xreg = cbind(XREG,U), init=mio.init.noV, evaluate=FALSE) #, data = mfExt) #objF <- update(obj0, formula = Fo, data = KK) # call.noV <- update(obj, formula = Fo.noV, evaluate=FALSE, data = mfExt) #objF <- update(obj0, formula = Fo, data = KK) if (it.max == 0) { U<-(Z-PSI)*(Z>PSI) colnames(U)<-nomiU obj1 <- eval(call.noV) #, envir=mfExt) return(obj1) } #obj1 <- eval(call.ok, envir=mfExt) initial <- psi obj0 <- obj dev0<- -obj$loglik if(is.na(dev0)) dev0<-10 list.obj <- list(obj) nomiOK<-nomiU opz<-list(toll=toll,h=h,stop.if.error=stop.if.error,dev0=dev0,visual=visual,it.max=it.max, nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step) opz$call.ok<-call.ok opz$call.noV<-call.noV opz$nomiU<-nomiU opz$nomiV<-nomiV if(n.boot<=0){ obj<-seg.Ar.fit(obj, XREG, Z, PSI, opz) } else { obj<-seg.Ar.fit.boot(obj, XREG, Z, PSI, opz, n.boot=n.boot, size.boot=size.boot, random=random) #jt, nonParam } if(!is.list(obj)){ warning("No breakpoint estimated", call. = FALSE) return(obj0) } id.warn<-obj$id.warn id.psi.group<-obj$id.psi.group nomiU<-nomiOK<-obj$nomiOK #sarebbe nomiU #-- nomiVxb<-sub("U","psi", nomiOK) #nomiVxb<-paste("psi",sapply(strsplit(nomiOK,"U"), function(x){x[2]}), sep="") nomiFINALI<-unique(sub("U[1-9]*[0-9].", "", nomiOK)) #nomiFINALI<-unique(sapply(strsplit(nomiOK, split="[.]"), function(x)x[2])) #nomi delle variabili con breakpoint stimati! #se e' stata usata una proc automatica "nomiFINALI" sara' differente da "name.Z" nomiSenzaPSI<-setdiff(name.Z,nomiFINALI) if(length(nomiSenzaPSI)>=1) warning("no breakpoints found for: ", paste(nomiSenzaPSI," "), call. = FALSE) #-- it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart U<-obj$U V<-obj$V # return(obj) #if(any(table(rowSums(V))<=1)) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close") for(jj in colnames(V)) { VV<-V[, which(colnames(V)==jj), drop=FALSE] sumV<-abs(rowSums(VV)) if( #(any(diff(sumV)>=2)|| #se ci sono due breakpoints uguali any(table(sumV)<=1) && stop.if.error) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") } rangeZ<-obj$rangeZ obj<-obj$obj k<-length(psi) all.coef<-coef(obj) names(all.coef)<-c(names(obj0$coef), nomiU, nomiVxb) beta.c<- all.coef[nomiU] Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) nnomi <- c(nomiU, nomiVxb) XREG.ok<-cbind(XREG, U, Vxb) colnames(XREG.ok)[((ncol(XREG.ok)-length(nnomi)+1):ncol(XREG.ok))]<- nnomi objF <- update(obj0, xreg = XREG.ok, evaluate=TRUE) # #se usi una procedura automatica devi cambiare ripetizioni, nomiU e nomiV, e quindi: # length.psi<-tapply(as.numeric(as.character(names(psi))), as.numeric(as.character(names(psi))), length) if(any(is.na(objF$coef)) && stop.if.error){ stop("at least one coef estimate is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE) } names.coef <- names(coef(objF)) #names(obj$coef)<- names.coef# all.coef ha gia' i nomi.. objF$coef[names.coef]<-all.coef[names.coef] objF$residuals<- obj$residuals objF$loglik<-obj$loglik objF$sigma2 <-obj$sigma2 objF$aic <- obj$aic + 2*k if(any(is.na(objF$coef))){ stop("some estimate is NA: premature stopping with a large number of breakpoints?", call. = FALSE) } Cov<-objF$var.coef vv<- Cov[nomiVxb, nomiVxb, drop=FALSE] ris.psi<-matrix(NA,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi ris.psi[,3]<-sqrt(diag(vv)) a<-tapply(id.psi.group, id.psi.group, length) #ho sovrascritto "a" di sopra, ma non dovrebbe servire.. a.ok<-NULL for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } # initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) if(stop.if.error) ris.psi[,1]<-initial objF$Z <- Z objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- it objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) #Z = name.Z objF$id.group <- if(length(name.Z)<=1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn if(n.boot>0) objF$seed<-employed.Random.seed class(objF) <- c("segmented", class(obj0)) list.obj[[length(list.obj) + 1]] <- objF class(list.obj) <- "segmented" if (last) list.obj <- list.obj[[length(list.obj)]] # warning("'segmented.Arima' is at a preliminary stage. Estimates are OK, but the '*.segmented' methods are not expected to work", # call.=FALSE) return(list.obj) } #end function segmented/R/davies.test.r0000644000176200001440000002744313467272072015066 0ustar liggesusers#se n=1000: value out of range in 'gammafn' #warning se "lm" con "glm"??? `davies.test` <- function (obj, seg.Z, k = 10, alternative = c("two.sided", "less", "greater"), type=c("lrt","wald"), values=NULL, dispersion=NULL) { # extract.t.value.U<-function(x){ # #estrae il t-value dell'ultimo coeff in un oggetto restituito da lm.fit # #non serve... in realta' viene usata extract.t.value.U.glm() # #x<-x$obj # R<-qr.R(x$qr) # p<-ncol(R) # n<-length(x$fitted.values) # invR<-backsolve(R,diag(p)) # hat.sigma2<-sum(x$residuals^2)/(n-p) # #solve(crossprod(qr.X(x$qr))) # V<-tcrossprod(invR)*hat.sigma2 # tt<-x$coefficients[p]/sqrt(V[p,p]) # tt} #------------------------------------------------------------------------------- daviesLM<-function(y, z, xreg, weights, offs, values, k, alternative){ #Davies test with sigma unknown #-------------- #> gammaA<-function(x){ # x^(x-.5)*exp(-x)*sqrt(2*pi)*(1+1/(12*x)+1/(288*x^2)-139/(51840*x^3) -571/(2488320*x^4)) # } #exp(lgamma()) fn="pmax(x-p,0)" y<-y-offs n<-length(y) n1<-length(values) RIS<-matrix(,n1,2) X.psi<-matrix(,n,length(fn)) df.res<- n - ncol(xreg) - length(fn) for(i in 1:n1){ for(j in 1:length(fn)) X.psi[,j]<-eval(parse(text=fn[[j]]), list(x=z, p=values[i])) xx1.new<-cbind(X.psi,xreg) #lrt #mu1.new<-xx1.new%*%solve(crossprod(xx1.new), crossprod(xx1.new,y)) #rss1<-sum((y-mu1.new)^2) #sigma2<-if(missing(sigma)) rss1/(n-ncol(xx1.new)) else sigma^2 #RIS[i]<-((rss0-rss1)/ncol(X.psi))/sigma2 #Wald invXtX1<-try(solve(crossprod(sqrt(weights)*xx1.new)), silent=TRUE) if(class(invXtX1)!="try-error"){ hat.b<-drop(invXtX1%*%crossprod(weights*xx1.new,y)) mu1.new<-xx1.new%*%hat.b devE<-sum((weights*(y-mu1.new)^2)) hat.sigma<- sqrt(devE/df.res) RIS[i,1]<-hat.b[1]/(hat.sigma*sqrt(invXtX1[1, 1])) Z<-hat.b[1]/(sqrt(invXtX1[1, 1])) D2<- Z^2 + devE RIS[i,2]<-Z^2/D2 #beta } } valori<-values[!is.na(RIS[,1])] RIS<- RIS[!is.na(RIS[,1]),] V<-sum(abs(diff(asin(RIS[,2]^.5)))) onesided <- TRUE if (alternative == "less") { M <- min(RIS[,1]) best<-valori[which.min(RIS[,1])] p.naiv <- pt(M, df=df.res, lower.tail = TRUE) } else if (alternative == "greater") { M <- max(RIS[,1]) best<-valori[which.max(RIS[,1])] p.naiv <- pt(M, df=df.res, lower.tail = FALSE) } else { M <- max(abs(RIS[,1])) best<-valori[which.max(abs(RIS[,1]))] p.naiv <- pt(M, df=df.res, lower.tail = FALSE) onesided <- FALSE } u<-M^2/((n-ncol(xx1.new))+ M^2) approxx<-V*(((1-u)^((df.res-1)/2))*gamma(df.res/2+.5))/(2*gamma(df.res/2)*pi^.5) p.adj <- p.naiv + approxx p.adj <- ifelse(onesided, 1, 2) * p.adj p.adj<-list(p.adj=p.adj, valori=valori, ris.valori=RIS[,1], best=best) return(p.adj) # M<-max(abs(RIS[,1])) # u<-M^2/((n-ncol(xx1.new))+ M^2) # approxx<-V*(((1-u)^((df.res-1)/2))*gamma(df.res/2+.5))/(2*gamma(df.res/2)*pi^.5) # p.naiv<-pt(-abs(M), df=df.res) #naive p-value # p.adj<-2*(p.naiv+approxx) #adjusted p-value (upper bound) # p.adj<-min(p.adj, 1) # p.adj<-list(p.adj=p.adj, valori=values, ris.valori=RIS[,1], approxx=approxx, p.naiv=p.naiv) # return(p.adj) } #-------------------------------- daviesGLM<-function(y, z, xreg, weights, offs, values=NULL, k, list.glm, alternative){ #Davies test for GLM (via LRT or Wald) est.dispGLM<-function(object){ df.r <- object$df.residual dispersion <- if(object$family$family%in%c("poisson","binomial")) 1 else object$dev/df.r dispersion } extract.t.value.U.glm<-function(object,dispersion,isGLM=TRUE){ #estrae il t-value dell'ultimo coeff in un oggetto restituito da lm.wfit/glm.fit est.disp <- FALSE df.r <- object$df.residual if (is.null(dispersion)) dispersion <- if(isGLM&&(object$family$family%in%c("poisson","binomial"))) 1 else if (df.r > 0) { est.disp <- TRUE if (any(object$weights == 0)) warning("observations with zero weight not used for calculating dispersion") sum((object$weights * object$residuals^2)[object$weights > 0])/df.r } else { est.disp <- TRUE NaN } dispersion<-max(c(dispersion, 1e-10)) p <- object$rank p1 <- 1L:p Qr <- object$qr coef.p <- object$coefficients[Qr$pivot[p1]] covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p)) covmat <- dispersion * covmat.unscaled tvalue <- coef.p[1]/sqrt(covmat[1,1]) #<0.4.0-0 era coef.p[p]/sqrt(covmat[p,p]) tvalue }#end extract.t.value.U.glm #-------------- fn<-"pmax(x-p,0)" dev0<-list.glm$dev0 eta0<-list.glm$eta0 family=list.glm$family type<-list.glm$type dispersion<-list.glm$dispersion n<-length(y) r<-length(fn) n1<-length(values) RIS<-rep(NA, n1) X.psi<-matrix(,n,length(fn)) for(i in 1:n1){ for(j in 1:length(fn)) X.psi[,j]<-eval(parse(text=fn[[j]]), list(x=z, p=values[i])) xreg1<-cbind(X.psi,xreg) o1<-glm.fit(x = xreg1, y = y, weights = weights, offset = offs, family=family, etastart=eta0) dev<-o1$dev if (is.list(o1) && ncol(xreg1)==o1$rank) { RIS[i]<- if(type=="lrt") sqrt((dev0-dev)/est.dispGLM(o1))*sign(o1$coef[1]) else extract.t.value.U.glm(o1,dispersion) } } valori<-values[!is.na(RIS)] ris.valori<-RIS[!is.na(RIS)] V<-sum(abs(diff(ris.valori))) #-----Questo e' se il test di riferimento e' una \chi^2_r. (Dovresti considerare il LRT non segnato) #V<-sum(abs(diff(sqrt(RIS))))#nota sqrt #M<- max(RIS) #approxx<-(V*(M^((r-1)/2))*exp(-M/2)*2^(-r/2))/gamma(r/2) #p.naiv<-1-pchisq(M,df=r) #naive p-value #p.adj<-min(p.naiv+approxx,1) #adjusted p-value (upper bound) onesided <- TRUE if (alternative == "less") { M <- min(ris.valori) best<-valori[which.min(ris.valori)] p.naiv <- pnorm(M, lower.tail = TRUE) } else if (alternative == "greater") { M <- max(ris.valori) best<-valori[which.max(ris.valori)] p.naiv <- pnorm(M, lower.tail = FALSE) } else { M <- max(abs(ris.valori)) best<-valori[which.max(abs(ris.valori))] p.naiv <- pnorm(M, lower.tail = FALSE) onesided <- FALSE } approxx<-V*exp(-(M^2)/2)/sqrt(8*pi) p.adj <- p.naiv + approxx p.adj <- ifelse(onesided, 1, 2) * p.adj p.adj<-list(p.adj=p.adj, valori=valori, ris.valori=ris.valori, best=best) return(p.adj) } #------------------------------------------------------------------------------- if(!inherits(obj, "lm")) stop("A 'lm', 'glm', or 'segmented' model is requested") if(missing(seg.Z)){ if(inherits(obj, "segmented") && length(obj$nameUV$Z)==1) seg.Z<- as.formula(paste("~", obj$nameUV$Z )) if(!inherits(obj, "segmented") && length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) } else { if(class(seg.Z)!="formula") stop("'seg.Z' should be an one-sided formula") } if(k<=1) stop("k>1 requested! k>=10 is recommended") if(k<10) warnings("k>=10 is recommended") alternative <- match.arg(alternative) type <- match.arg(type) #if(length(all.vars(seg.Z))>1) warning("multiple segmented variables ignored in 'seg.Z'",call.=FALSE) name.Z <- all.vars(seg.Z) if(length(name.Z)>1) stop("Only a single segmented variable can be specified in 'seg.Z' ") isGLM<-"glm"%in%class(obj) Call<-mf<-obj$call mf$formula<-formula(obj) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) formulaOrig<-formula(obj) if(class(obj)[1]=="segmented"){ if(!is.null(eval(obj$call$obj)$call$data)) mf$data <- eval(obj$call$obj)$call$data mf$formula<-update.formula(mf$formula,paste("~.-",paste(obj$nameUV$V, collapse="-"))) for(i in 1:length(obj$nameUV$U)) assign(obj$nameUV$U[i], obj$model[,obj$nameUV$U[i]], envir=parent.frame()) formulaOrig<-update.formula(formulaOrig, paste("~.-",paste(obj$nameUV$V, collapse="-"))) } mf <- eval(mf, parent.frame()) weights <- as.vector(model.weights(mf)) offs <- as.vector(model.offset(mf)) if(!is.null(Call$weights)){ #"(weights)"%in%names(mf) names(mf)[which(names(mf)=="(weights)")]<-all.vars(Call$weights) #as.character(Call$weights) #aggiungere??? # mf["(weights)"]<-weights } mt <- attr(mf, "terms") interc<-attr(mt,"intercept") y <- model.response(mf, "any") XREG <- if (!is.empty.model(mt)) model.matrix(mt, mf, obj$contrasts) n <- nrow(XREG) if (is.null(weights)) weights <- rep(1, n) if (is.null(offs)) offs <- rep(0, n) name.Z <- all.vars(seg.Z) Z<-XREG[,match(name.Z, colnames(XREG))] if(!name.Z %in% names(coef(obj))) XREG<-XREG[,-match(name.Z, colnames(XREG)),drop=FALSE] list.glm<-list(dev0=obj$dev, eta0=obj$linear.predictor, family=family(obj), type=type, dispersion=dispersion) if(is.null(values)) values<-seq(sort(Z)[2], sort(Z)[(n - 1)], length = k) #values<-seq(min(z), max(z), length=k+2) #values<-values[-c(1,length(values))] if(class(obj)=="lm" || identical(class(obj),c("segmented","lm")) ) { if(n<=300) { rr<-daviesLM(y=y, z=Z, xreg=XREG, weights=weights, offs=offs, values=values, k=k, alternative=alternative) } else { list.glm$family<-gaussian() list.glm$type<-"wald" rr<-daviesGLM(y=y, z=Z, xreg=XREG, weights=weights, offs=offs, values=values, k=k, list.glm=list.glm, alternative=alternative) } } if(identical(class(obj),c("glm","lm")) || identical(class(obj),c("segmented","glm","lm"))) rr<-daviesGLM(y=y, z=Z, xreg=XREG, weights=weights, offs=offs, values=values, k=k, list.glm=list.glm, alternative=alternative) best<-rr$best p.adj<-rr$p.adj valori<-rr$valori ris.valori<-rr$ris.valori if(is.null(obj$family$family)) { famiglia<-"gaussian" legame<-"identity"} else { famiglia<-obj$family$family legame<-obj$family$link } out <- list(method = "Davies' test for a change in the slope", # data.name=paste("Model = ",famiglia,", link =", legame, # "\nformula =", as.expression(formulaOrig), # "\nsegmented variable =", name.Z), data.name=paste("formula =", as.expression(formulaOrig), ", method =", obj$call[[1]] , "\nmodel =",famiglia,", link =", legame, if(isGLM) paste(", statist =", type) else NULL , "\nsegmented variable =", name.Z), statistic = c("'best' at" = best), parameter = c(n.points = length(valori)), p.value = min(p.adj,1), alternative = alternative, process=cbind(psi.values=valori, stat.values=ris.valori)) class(out) <- "htest" return(out) } segmented/R/segmented.lm.R0000644000176200001440000005117013501717732015145 0ustar liggesusers`segmented.lm` <- function(obj, seg.Z, psi, npsi, control = seg.control(), model = TRUE, keep.class=FALSE, ...) { if(missing(seg.Z)) { if(length(all.vars(formula(obj)))==2) seg.Z<- as.formula(paste("~", all.vars(formula(obj))[2])) else stop("please specify 'seg.Z'") } n.Seg<-length(all.vars(seg.Z)) id.npsi<-FALSE #browser() if(missing(psi)) { if(n.Seg==1){ if(missing(npsi)) npsi<-1 npsi<-lapply(npsi, function(.x).x) if(length(npsi)!=length(all.vars(seg.Z))) stop("seg.Z and npsi do not match") names(npsi)<-all.vars(seg.Z) } else {#se n.Seg>1 if(missing(npsi)) stop(" with multiple segmented variables in seg.Z, 'psi' or 'npsi' should be supplied", call.=FALSE) if(length(npsi)!=n.Seg) stop(" 'npsi' and seg.Z should have the same length") if(!all(names(npsi) %in% all.vars(seg.Z))) stop(" names in 'npsi' and 'seg.Z' do not match") } psi<-lapply(npsi, function(.x) rep(NA,.x)) id.npsi<-TRUE ##id.npsi<-FALSE #e' stato fornito npsi? } else { if(n.Seg==1){ if(!is.list(psi)) {psi<-list(psi);names(psi)<-all.vars(seg.Z)} } else {#se n.Seg>1 if(!is.list(psi)) stop("with multiple terms in `seg.Z', `psi' should be a named list") if(n.Seg!=length(psi)) stop("A wrong number of terms in `seg.Z' or `psi'") if(!all(names(psi)%in%all.vars(seg.Z))) stop("Names in `seg.Z' and `psi' do not match") } } min.step<-control$min.step alpha<-control$alpha it.max <- old.it.max<- control$it.max digits<-control$digits toll <- control$toll if(toll<0) stop("Negative tolerance ('tol' in seg.control()) is meaningless", call. = FALSE) visual <- control$visual stop.if.error<-control$stop.if.error fix.npsi<-fix.npsi<-control$fix.npsi if(!is.null(stop.if.error)) {#if the old "stop.if.error" has been used.. warning(" Argument 'stop.if.error' is working, but will be removed in the next releases. Please use 'fix.npsi' for the future..") } else { stop.if.error<-fix.npsi } n.boot<-control$n.boot size.boot<-control$size.boot gap<-control$gap random<-control$random pow<-control$pow conv.psi<-control$conv.psi visualBoot<-FALSE if(n.boot>0){ if(!is.null(control$seed)) { set.seed(control$seed) employed.Random.seed<-control$seed } else { employed.Random.seed<-eval(parse(text=paste(sample(0:9, size=6), collapse=""))) set.seed(employed.Random.seed) } if(visual) {visual<-FALSE; visualBoot<-TRUE}# warning("`display' set to FALSE with bootstrap restart", call.=FALSE)} # if(!stop.if.error) stop("Bootstrap restart only with a fixed number of breakpoints") } last <- control$last K<-control$K h<-control$h #------------------------------- orig.call<-Call<-mf<-obj$call orig.call$formula<- mf$formula<-formula(obj) #per consentire lm(y~.) m <- match(c("formula", "data", "subset", "weights", "na.action","offset"), names(mf), 0L) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") if(class(mf$formula)=="name" && !"~"%in%paste(mf$formula)) mf$formula<-eval(mf$formula) #orig.call$formula<-update.formula(orig.call$formula, paste("~.-",all.vars(seg.Z))) # #genn 2013. dalla versione 0.2.9-4 ho tolto if(length(.. Tra l'altro non capisco perche' lo avevo fatto # if(length(all.vars(formula(obj)))>1){ # mf$formula<-update.formula(mf$formula,paste(paste(seg.Z,collapse=".+"),"+",paste(all.vars(formula(obj))[-1],collapse="+"))) # } else { # mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) # } #nov 2013 dalla versione 0.3-0.0 (che dovrebbe essere successiva alla 0.2-9.5) viene creato anche il modelframe esteso che comprende # termini "originali", prima che fossero trasformati (Ad es., x prima che ns(x) costruisca le basi). Questo permette di avere termini # ns(), poly(), bs() nel modello di partenza mfExt<- mf mf$formula<-update.formula(mf$formula,paste(seg.Z,collapse=".+")) #mfExt$formula<- update.formula(mfExt$formula,paste(paste(seg.Z,collapse=".+"),"+",paste(all.vars(formula(obj)),collapse="+"))) # mfExt$formula<- if(!is.null(obj$call$data)) # update.formula(mf$formula,paste(".~",paste(all.vars(obj$call), collapse="+"),"-",obj$call$data,sep="")) # else update.formula(mf$formula,paste(".~",paste(all.vars(obj$call), collapse="+"),sep="")) #----------- # browser() if(!is.null(obj$call$offset) || !is.null(obj$call$weights) || !is.null(obj$call$subset)){ mfExt$formula <- update.formula(mf$formula, paste(".~.+", paste( c(all.vars(obj$call$offset), all.vars(obj$call$weights), all.vars(obj$call$subset)), collapse = "+") )) } mf <- eval(mf, parent.frame()) n<-nrow(mf) #questo serve per inserire in mfExt le eventuali variabili contenute nella formula con offset(..) nomiOff<-setdiff(all.vars(formula(obj)), names(mf)) if(length(nomiOff)>=1) mfExt$formula<-update.formula(mfExt$formula,paste(".~.+", paste( nomiOff, collapse="+"), sep="")) #---------------------------------------------------- #ago 2014 c'e' la questione di variabili aggiuntive... nomiTUTTI<-all.vars(mfExt$formula) #comprende anche altri nomi (ad es., threshold) "variabili" nomiNO<-NULL for(i in nomiTUTTI){ r<-try(eval(parse(text=i), parent.frame()), silent=TRUE) if(class(r)!="try-error" && length(r)==1 && !is.function(r)) nomiNO[[length(nomiNO)+1]]<-i } #nomiNO dovrebbe contenere i nomi delle "altre variabili" (come th in subset=x0 if(any(id.duplic)) { #new.mf<-mf[,id.duplic,drop=FALSE] new.mf<-mf[,all.vars(formula(obj))[id.duplic],drop=FALSE] new.XREGseg<-data.matrix(new.mf) XREG<-cbind(XREG,new.XREGseg) } n.psi<- length(unlist(psi)) id.n.Seg<-(ncol(XREG)-n.Seg+1):ncol(XREG) XREGseg<-XREG[,id.n.Seg,drop=FALSE] #XREG<-XREG[,-id.n.Seg,drop=FALSE] #XREG<-model.matrix(obj0) non va bene perche' non elimina gli eventuali mancanti in seg.Z.. #Due soluzioni #XREG<-XREG[,colnames(model.matrix(obj)),drop=FALSE] #XREG<-XREG[,match(c("(Intercept)",all.vars(formula(obj))[-1]),colnames(XREG),nomatch =0),drop=FALSE] XREG <- XREG[, match(c("(Intercept)", namesXREG0),colnames(XREG), nomatch = 0), drop = FALSE] XREG<-XREG[,unique(colnames(XREG)), drop=FALSE] ################# #if(ncol(XREGseg)==1 && length(psi)==1 && n.psi==1 && !any(is.na(psi))) { if(psi==Inf) psi<-median(XREGseg)} ################# n <- nrow(XREG) Z<-lapply(apply(XREGseg,2,list),unlist) #prende anche i nomi! name.Z <- names(Z) <- colnames(XREGseg) if(length(Z)==1 && is.vector(psi) && (is.numeric(psi)||is.na(psi))){ psi <- list(as.numeric(psi)) names(psi)<-name.Z } if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) || is.null(names(psi))) stop("Z and psi have to be *named* list") id.nomiZpsi <- match(names(Z), names(psi)) if ((length(Z)!=length(psi)) || any(is.na(id.nomiZpsi))) stop("Length or names of Z and psi do not match") nome <- names(psi)[id.nomiZpsi] psi <- psi[nome] if(id.npsi){ for(i in 1:length(psi)) { K<-length(psi[[i]]) if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) {quantile(Z[[i]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE)} else {(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1))} } } else { for(i in 1:length(psi)) { if(any(is.na(psi[[i]]))) psi[[i]]<-if(control$quant) {quantile(Z[[i]], prob= seq(0,1,l=K+2)[-c(1,K+2)], names=FALSE)} else {(min(Z[[i]])+ diff(range(Z[[i]]))*(1:K)/(K+1))} } } initial.psi<-psi # browser() a <- sapply(psi, length) #per evitare che durante il processo iterativo i psi non siano ordinati id.psi.group <- rep(1:length(a), times = a) #identificativo di apparteneza alla variabile # #Znew <- list() #for (i in 1:length(psi)) Znew[[length(Znew) + 1]] <- rep(Z[i], a[i]) #Z <- matrix(unlist(Znew), nrow = n) Z<-matrix(unlist(mapply(function(x,y)rep(x,y),Z,a,SIMPLIFY = TRUE)),nrow=n) psi <- unlist(psi) #se psi e' numerico, la seguente linea restituisce i valori ordinati all'interno della variabile.. psi<-unlist(tapply(psi,id.psi.group,sort)) k <- ncol(Z) PSI <- matrix(rep(psi, rep(n, k)), ncol = k) #controllo se psi e' ammissibile.. c1 <- apply((Z <= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo <) c2 <- apply((Z >= PSI), 2, all) #dovrebbero essere tutti FALSE (prima era solo >) if(sum(c1 + c2) != 0 || is.na(sum(c1 + c2)) ) stop("starting psi out of the admissible range") colnames(Z) <- nomiZ <- rep(nome, times = a) ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], function(.x) {1:.x}))) nomiU <- paste("U", ripetizioni, sep = "") nomiU <- paste(nomiU, nomiZ, sep = ".") nomiV <- paste("V", ripetizioni, sep = "") nomiV <- paste(nomiV, nomiZ, sep = ".") #forse non serve crearsi l'ambiente KK, usa mf.. #obj <- update(obj, formula = Fo, data = mf) #if (model.frame) obj$model <- mf #controlla che model.frame() funzioni sull'oggetto restituito # KK <- new.env() # for (i in 1:ncol(objframe$model)) assign(names(objframe$model[i]), objframe$model[[i]], envir = KK) if (it.max == 0) { #mf<-cbind(mf, mfExt) U <- (Z>PSI)*(Z-PSI) #pmax((Z - PSI), 0) colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") nomiU <- paste("U", colnames(U), sep = "") #for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) #e' necessario il for? puoi usare colnames(U)<-nomiU;mf[nomiU]<-U for(i in 1:ncol(U)) mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] Fo <- update.formula(formula(obj), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) obj <- update(obj, formula = Fo, evaluate=FALSE, data=mfExt) #data = mf, if(!is.null(obj[["subset"]])) obj[["subset"]]<-NULL obj<-eval(obj, envir=mfExt) if (model) obj$model <-mf #obj$model <- data.frame(as.list(KK)) psi <- cbind(psi, psi, 0) rownames(psi) <- paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") colnames(psi) <- c("Initial", "Est.", "St.Err") #names(psi)<-paste(paste("psi", ripetizioni, sep = ""), nomiZ, sep=".") obj$psi <- psi return(obj) } #XREG <- model.matrix(obj) creata sopra #o <- model.offset(objframe) #w <- model.weights(objframe) if (is.null(weights)) weights <- rep(1, n) if (is.null(offs)) offs <- rep(0, n) initial <- psi obj0 <- obj dev0<-sum(obj$residuals^2) list.obj <- list(obj) # psi.values <- NULL nomiOK<-nomiU invXtX<-chol2inv(qr.R(obj$qr)) #(XtX)^{-1} Xty<-crossprod(XREG,y) opz<-list(toll=toll,h=h, stop.if.error=stop.if.error, dev0=dev0, visual=visual, it.max=it.max, nomiOK=nomiOK, id.psi.group=id.psi.group, gap=gap, visualBoot=visualBoot, pow=pow, digits=digits,invXtX=invXtX, Xty=Xty, conv.psi=conv.psi, alpha=alpha, fix.npsi=fix.npsi, min.step=min.step) if(n.boot<=0){ obj<-seg.lm.fit(y,XREG,Z,PSI,weights,offs,opz) } else { obj<-seg.lm.fit.boot(y, XREG, Z, PSI, weights, offs, opz, n.boot=n.boot, size.boot=size.boot, random=random) #jt, nonParam } if(!is.list(obj)){ warning("No breakpoint estimated", call. = FALSE) return(obj0) } if(obj$obj$df.residual==0) warning("no residual degrees of freedom (other warnings expected)", call.=FALSE) id.psi.group<-obj$id.psi.group nomiOK<-obj$nomiOK #nomiFINALI<-unique(sapply(strsplit(nomiOK, split="[.]"), function(x)x[2])) #nomi delle variabili con breakpoint stimati! #nomiFINALI<-sub("U[1-9].", "", nomiOK) #nomi originali delle variabili con breakpoint stimati! nomiFINALI<- unique(sub("U[1-9]*[0-9].", "", nomiOK)) #se e' stata usata una proc automatica "nomiFINALI" sara' differente da "name.Z" nomiSenzaPSI<-setdiff(name.Z,nomiFINALI) if(length(nomiSenzaPSI)>=1) warning("no breakpoints found for: ", paste(nomiSenzaPSI," "), call. = FALSE) it<-obj$it psi<-obj$psi psi.values<-if(n.boot<=0) obj$psi.values else obj$boot.restart U<-obj$U V<-obj$V id.warn<-obj$id.warn rangeZ<-obj$rangeZ obj<-obj$obj k<-length(psi) beta.c<-coef(obj)[paste("U", 1:ncol(U), sep = "")] #psi.values[[length(psi.values) + 1]] <- psi #non c'e' bisogno! Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) #se usi una procedura automatica devi cambiare ripetizioni, nomiU e nomiV, e quindi: length.psi<-tapply(as.numeric(as.character(names(psi))), as.numeric(as.character(names(psi))), length) forma.nomiU<-function(xx,yy)paste("U",1:xx, ".", yy, sep="") forma.nomiVxb<-function(xx,yy)paste("psi",1:xx, ".", yy, sep="") #nomiU <- unlist(mapply(forma.nomiU, length.psi, name.Z)) #invece di un ciclo #paste("U",1:length.psi[i], ".", name.Z[i]) #nomiVxb <- unlist(mapply(forma.nomiVxb, length.psi, name.Z)) nomiU <- unlist(mapply(forma.nomiU, length.psi, nomiFINALI)) #invece di un ciclo #paste("U",1:length.psi[i], ".", name.Z[i]) nomiVxb <- unlist(mapply(forma.nomiVxb, length.psi, nomiFINALI)) #mf<-cbind(mf, mfExt) #questo creava ripetizioni.. for(i in 1:ncol(U)) { mfExt[nomiU[i]]<-mf[nomiU[i]]<-U[,i] mfExt[nomiVxb[i]]<-mf[nomiVxb[i]]<-Vxb[,i] } nnomi <- c(nomiU, nomiVxb) # browser() Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nnomi, collapse = "+")))) #objF <- update(obj0, formula = Fo, data = KK) objF <- update(obj0, formula = Fo, evaluate=FALSE, data = mfExt) #eliminiamo subset, perche' se e' del tipo subset=x>min(x) allora continuerebbe a togliere 1 osservazione if(!is.null(objF[["subset"]])) objF[["subset"]]<-NULL objF<-eval(objF, envir=mfExt) # #11/10/16 il controllo e' stato commentato in modo tale da restituire anche un oggetto lm in cui psi viene considerato fisso.. # for(jj in colnames(V)) { # VV<-V[, which(colnames(V)==jj), drop=FALSE] # sumV<-abs(rowSums(VV)) ## if( (any(diff(sumV)>=2) #se ci sono due breakpoints uguali ## || any(table(sumV)<=1)) && stop.if.error) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") ## Tolto perche' se la variabile segmented non e' ordinata non ha senso.. ##magari potresti fare un abs(diff(psi))<=.0001? ma clusterizzato.. # if(any(table(sumV)<=1) && stop.if.error) stop("only 1 datum in an interval: breakpoint(s) at the boundary or too close each other") # } #Puo' capitare che psi sia ai margini o molto vicini (e ci sono solo 1 o 2 osservazioni in qualche intervallo. Oppure ce ne #sono di piu' ma hanno gli stessi valori di x. In questo caso objF$coef puo' avere mancanti.. names(which(is.na(coef(objF)))) objF$offset<- obj0$offset isNAcoef<-any(is.na(objF$coefficients)) if(isNAcoef){ if(stop.if.error) { cat("breakpoint estimate(s):", as.vector(psi),"\n") stop("at least one coef is NA: breakpoint(s) at the boundary? (possibly with many x-values replicated)", call. = FALSE) } else { warning("some estimate is NA: too many breakpoints? 'var(hat.psi)' cannot be computed \n ..returning a 'lm' model", call. = FALSE) Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", paste(nomiU, collapse = "+")))) objF <- update(obj0, formula = Fo, evaluate=TRUE, data = mfExt) names(psi)<-nomiVxb objF$psi<-psi return(objF) } } if(!gap){ names.coef<-names(objF$coefficients) #questi codici funzionano e si basano sull'assunzioni che le U e le V siano ordinate.. names(obj$coefficients)[match(c(paste("U",1:k, sep=""), paste("V",1:k, sep="")), names(coef(obj)))]<- nnomi objF$coefficients[names.coef]<-obj$coefficients[names.coef] #sostituisce gli 0 #objF$coefficients<-obj$coefficients #names(objF$coefficients)<-names.coef objF$fitted.values<-obj$fitted.values objF$residuals<-obj$residuals #objF$qr<-obj$qr #NON credo.. } Cov <- vcov(objF) id <- match(nomiVxb, names(coef(objF))) vv <- if (length(id) == 1) Cov[id, id] else diag(Cov[id, id]) #if(length(initial)!=length(psi)) initial<-rep(NA,length(psi)) #browser() a<-tapply(id.psi.group, id.psi.group, length) #ho sovrascritto "a" di sopra, ma non dovrebbe servire.. #browser() ris.psi<-matrix(NA,length(psi),3) colnames(ris.psi) <- c("Initial", "Est.", "St.Err") rownames(ris.psi) <- nomiVxb ris.psi[,2]<-psi ris.psi[,3]<-sqrt(vv) #NB "a" deve essere un vettore che si appatta con "initial.psi" per ottnetere "initial" sotto... Se una variabile alla fine risulta # senza breakpoint questo non avviene e ci sono problemi nella formazione di "initial". Allora costruisco a.ok a.ok<-NULL for(j in name.Z){ if(j %in% nomiFINALI) { a.ok[length(a.ok)+1]<-a[1] a<-a[-1] } else { a.ok[length(a.ok)+1]<-0 } #ifelse(name.Z %in% nomiFINALI,1,0) } #initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi, a.ok, SIMPLIFY = TRUE)) initial<-unlist(mapply(function(x,y){if(is.na(x)[1])rep(x,y) else x }, initial.psi[nomiFINALI], a.ok[a.ok!=0], SIMPLIFY = TRUE)) if(stop.if.error) ris.psi[,1]<-initial #psi <- cbind(initial, psi, sqrt(vv)) #rownames(psi) <- colnames(Cov)[id] objF$rangeZ <- rangeZ objF$psi.history <- psi.values objF$psi <- ris.psi objF$it <- it objF$epsilon <- obj$epsilon objF$call <- match.call() objF$nameUV <- list(U = drop(nomiU), V = rownames(ris.psi), Z = nomiFINALI) #Z = name.Z objF$id.group <- if(length(name.Z)<=1) -rowSums(as.matrix(V)) objF$id.psi.group <- id.psi.group objF$id.warn <- id.warn objF$orig.call<-orig.call if(model) objF$model <- mf #objF$mframe <- data.frame(as.list(KK)) # PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) # SE.PSI <- matrix(rep(sqrt(vv), rep(nrow(Z), length(psi))), ncol = length(psi)) # X.is<-model.matrix(Fo, data=objF$model) # X.is[,nomiVxb]<-pnorm((Z-PSI)/SE.PSI)%*% diag(-beta.c, ncol = length(beta.c)) # objF$cov.unscaled.is<-crossprod(X.is) #browser() if(n.boot>0) objF$seed<-employed.Random.seed class(objF) <- c("segmented", class(obj0)) list.obj[[length(list.obj) + 1]] <- objF class(list.obj) <- "segmented" if (last) list.obj <- list.obj[[length(list.obj)]] return(list.obj) } segmented/R/seg.glm.fit.r0000644000176200001440000002771113501711570014736 0ustar liggesusersseg.glm.fit<-function(y,XREG,Z,PSI,w,offs,opz,return.all.sol=FALSE){ #------------------------- useExp.k=TRUE #----------------- est.k<-function(x1,y1,L0){ ax<-log(x1) .x<-cbind(1,ax,ax^2) b<-drop(solve(crossprod(.x),crossprod(.x,y1))) const<-b[1]-L0 DD<-sqrt(b[2]^2-4*const*b[3]) kk<-exp((-b[2]+ DD) /(2*b[3])) return(round(kk)) # ff<-function(xx) b[1]+b[2]*xx + b[3]*xx^2+ L0 # a<-uniroot(ff, c(log(x[4]), 3.4)) } #----------------- dpmax<-function(x,y,pow=1){ #deriv pmax if(pow==1) -(x>y) #ifelse(x>y, -1, 0) else -pow*((x-y)*(x>y))^(pow-1)#-pow*pmax(x-y,0)^(pow-1) } #-------------------- in.psi<-function(LIM, PSI, ret.id=TRUE){ #check if psi is inside the range a<-PSI[1,]<=LIM[1,] b<-PSI[1,]>=LIM[2,] is.ok<- !a & !b #TRUE se psi e' OK if(ret.id) return(is.ok) isOK<- all(is.ok) && all(!is.na(is.ok)) isOK} #------------ far.psi<-function(Z, PSI, id.psi.group, ret.id=TRUE) { #id.far.ok<-sapply(unique(id.psi.group), function(.x) (table(rowSums(((Z>PSI)[,id.psi.group==.x,drop=FALSE])))>=2)[-1]) #[-1] esclude lo zero, xPSI)[,id.psi.group==.x,drop=FALSE]))+1)>=2)[-1]) #[-1] esclude lo zero, xPSI)) #pmax((Z - PSI), 0)^pow[1] if(pow[1]!=1) U<-U^pow[1] obj0 <- suppressWarnings(glm.fit(x = cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm), etastart = eta0)) eta0<- obj0$linear.predictors L0<- obj0$dev n.intDev0<-nchar(strsplit(as.character(L0),"\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- opz$dev0 #del modello iniziale (senza psi) dev.values[length(dev.values) + 1] <- L0 #modello con psi iniziali psi.values[[length(psi.values) + 1]] <- psi #psi iniziali #============================================== if (visual) { cat(paste("iter = ", sprintf("%2.0f",0), " dev = ", sprintf(paste("%", n.intDev0+6, ".5f",sep=""), L0), #formatC(L1,width=8, digits=5,format="f"), #era format="fg" " k = ", sprintf("%2.0f", NA), " n.psi = ",formatC(length(unlist(psi)),digits=0,format="f"), " ini.psi = ",paste(formatC(unlist(psi),digits=3,format="f"), collapse=" "), #sprintf('%.2f',x) sep=""), "\n") } #============================================== id.warn <- FALSE while (abs(epsilon) > toll) { it<-it+1 n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if(n.psi1!=n.psi0){ U <- ((Z-PSI)*(Z>PSI)) #pmax((Z - PSI), 0)^pow[1] if(pow[1]!=1) U<-U^pow[1] obj0 <- suppressWarnings(glm.fit(x = cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm), etastart = eta0)) eta0<-obj0$linear.predictors L0< - obj0$dev } V <- dpmax(Z,PSI,pow=pow[2])# ifelse((Z > PSI), -1, 0) X <- cbind(XREG, U, V) rownames(X) <- NULL colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U", 1:ncol(U), sep = ""), paste("V", 1:ncol(V), sep = "")) obj <- suppressWarnings(glm.fit(X, y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm), etastart = eta0)) eta0<-obj$linear.predictors beta.c <- coef(obj)[paste("U", 1:ncol(U), sep = "")] gamma.c <- coef(obj)[paste("V", 1:ncol(V), sep = "")] if(any(is.na(c(beta.c, gamma.c)))){ if(fix.npsi) { #stop("Estimates of beta or gamma are NA. Probably too many breakpoints being estimated.", call.=FALSE) if(return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call.=FALSE) } else { id.coef.ok<-!is.na(gamma.c) psi<-psi[id.coef.ok] if(length(psi)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } gamma.c<-gamma.c[id.coef.ok] beta.c<-beta.c[id.coef.ok] Z<-Z[, id.coef.ok, drop=FALSE] rangeZ <- rangeZ[,id.coef.ok, drop=FALSE] limZ <- limZ[,id.coef.ok, drop=FALSE] nomiOK<-nomiOK[id.coef.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.coef.ok] names(psi)<-id.psi.group } } psi.old<-psi psi <- psi.old + gamma.c/beta.c if(!is.null(digits)) psi<-round(psi, digits) PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) #--modello con il nuovo psi U1<-(Z-PSI)*(Z>PSI) if(pow[1]!=1) U1<-U1^pow[1] obj1 <- try(suppressWarnings(glm.fit(cbind(XREG, U1), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm), etastart = eta0)), silent = TRUE) L1<- if(class(obj1)[1]=="try-error") L0+10 else obj1$dev use.k<-k<-1 L1.k<-NULL L1.k[length(L1.k)+1]<-L1 while(L1>L0){ k<-k+1 use.k <- if(useExp.k) 2^(k-1) else k psi <- psi.old + (gamma.c/beta.c)/(use.k*h) if(!is.null(digits)) psi<-round(psi, digits) PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) U1<-(Z-PSI)*(Z>PSI) if(pow[1]!=1) U1<-U1^pow[1] obj1 <- try(suppressWarnings(glm.fit(cbind(XREG, U1), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm), etastart = eta0)), silent = TRUE) L1<- if(class(obj1)[1]=="try-error") L0+10 else obj1$dev L1.k[length(L1.k)+1]<-L1 if(1/(use.k*h)= it.max) { id.warn <- TRUE break } #Mi sa che non servono i controlli.. soprattutto se non ha fatto step-halving #check if i psi ottenuti sono nel range o abbastanza lontani id.psi.far <-far.psi(Z, PSI, id.psi.group, TRUE) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if(!all(id.psi.ok)){ if(fix.npsi){ psi<-adj.psi(psi, limZ) #within range!!! id.psi.far<-far.psi(Z, PSI, id.psi.group, TRUE) psi<-psi*ifelse(id.psi.far,1,.9) PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) } else { Z<-Z[, id.psi.ok, drop=FALSE] PSI<-PSI[, id.psi.ok, drop=FALSE] rangeZ <- rangeZ[,id.psi.ok,drop=FALSE] limZ <- limZ[,id.psi.ok,drop=FALSE] nomiOK<-nomiOK[id.psi.ok] #salva i nomi delle U per i psi ammissibili id.psi.group<-id.psi.group[id.psi.ok] psi.old<- psi.old[id.psi.ok] psi<- psi[id.psi.ok] names(psi)<-id.psi.group if(ncol(PSI)<=0) { warning(paste("All breakpoints have been removed after",it,"iterations.. returning 0"), call. = FALSE) return(0) } } } } #end while_it ##============================================================================= if(id.warn) warning(paste("max number of iterations (", it,") attained",sep=""), call. = FALSE) attr( psi.values, "dev") <- dev.values attr( psi.values, "k")<- k.values #ordina i breakpoints.. psi<-unlist(tapply(psi, id.psi.group, sort)) names(psi)<-id.psi.group names.coef<-names(obj$coefficients) #obj e' quello vecchio che include U1,.. V1,... PSI.old<-PSI PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) #U e V possono essere cambiati (rimozione/ordinamento psi.. ) per cui si deve ricalcolare il tutto, altrimenti sarebbe uguale a U1 e obj1 if(sd(PSI-PSI.old)>0){ U <- (Z-PSI)*(Z>PSI) colnames(U)<-paste("U", 1:ncol(U), sep = "") V <- -(Z>PSI) colnames(V)<-paste("V", 1:ncol(V), sep = "") # X <- cbind(XREG, U, V) # rownames(X) <- NULL obj <- try(suppressWarnings(glm.fit(cbind(XREG, U), y = y, offset = offs, weights = w, family = fam, control = glm.control(maxit = maxit.glm), etastart = eta0)), silent = TRUE) L1<- obj$dev } else { obj<-obj1 } obj$coefficients<-c(obj$coefficients, rep(0,ncol(V))) names(obj$coefficients)<-names.coef obj$epsilon <- epsilon obj$it <- it obj<-list(obj=obj,it=it,psi=psi, psi.values=psi.values, U=U,V=V,rangeZ=rangeZ, epsilon=epsilon,nomiOK=nomiOK, dev.no.gap=L1, id.psi.group=id.psi.group,id.warn=id.warn) #inserire id.psi.ok? return(obj) } segmented/R/seg.def.fit.r0000644000176200001440000002720213572477666014737 0ustar liggesusersseg.def.fit<-function (obj, Z, PSI, mfExt, opz, return.all.sol = FALSE) { useExp.k = TRUE est.k <- function(x1, y1, L0) { ax <- log(x1) .x <- cbind(1, ax, ax^2) b <- drop(solve(crossprod(.x), crossprod(.x, y1))) const <- b[1] - L0 DD <- sqrt(b[2]^2 - 4 * const * b[3]) kk <- exp((-b[2] + DD)/(2 * b[3])) return(round(kk)) } dpmax <- function(x, y, pow = 1) { if (pow == 1) -(x > y) else -pow * ((x - y) * (x > y))^(pow - 1) } in.psi <- function(LIM, PSI, ret.id = TRUE) { a <- PSI[1, ] <= LIM[1, ] b <- PSI[1, ] >= LIM[2, ] is.ok <- !a & !b if (ret.id) return(is.ok) isOK <- all(is.ok) && all(!is.na(is.ok)) isOK } far.psi <- function(Z, PSI, id.psi.group, ret.id = TRUE) { id.far.ok <- sapply(unique(id.psi.group), function(.x) (tabulate(rowSums(((Z > PSI)[, id.psi.group == .x, drop = FALSE])) + 1) >= 2)[-1]) id.far.ok <- unlist(id.far.ok) if (ret.id) return(id.far.ok) else return(all(id.far.ok)) } adj.psi <- function(psii, LIM) { pmin(pmax(LIM[1, ], psii), LIM[2, ]) } fn.costr <- function(n.psi, isLeft = 1, isInterc = 1) { IU <- -diag(n.psi) sumU <- diag(n.psi) sumU[row(sumU) > col(sumU)] <- 1 if (isLeft) { sumU <- cbind(1, sumU) IU <- diag(c(1, -rep(1, n.psi))) } A <- rbind(IU, sumU) if (isInterc) { A <- rbind(0, A) A <- cbind(c(1, rep(0, nrow(A) - 1)), A) } A <- cbind(A, matrix(0, nrow(A), n.psi)) A } vincoli <- FALSE c1 <- apply((Z <= PSI), 2, all) c2 <- apply((Z >= PSI), 2, all) if (sum(c1 + c2) != 0 || is.na(sum(c1 + c2))) stop("psi out of the range") n <- nrow(Z) min.step <- opz$min.step rangeZ <- apply(Z, 2, range) alpha <- opz$alpha limZ <- apply(Z, 2, quantile, names = FALSE, probs = c(alpha, 1 - alpha)) digits <- opz$digits pow <- opz$pow nomiOK <- opz$nomiOK toll <- opz$toll h <- opz$h conv.psi <- opz$conv.psi gap <- opz$gap stop.if.error <- opz$stop.if.error fix.npsi <- opz$fix.npsi dev.new <- opz$dev0 visual <- opz$visual id.psi.group <- opz$id.psi.group it.max <- old.it.max <- opz$it.max psi <- PSI[1, ] names(psi) <- id.psi.group epsilon <- 10 dev.values <- psi.values <- NULL it <- 0 epsilon <- 10 k.values <- dev.values <- NULL psi.values <- list() psi.values[[length(psi.values) + 1]] <- NA nomiU <- opz$nomiU nomiV <- opz$nomiV call.ok <- opz$call.ok call.noV <- opz$call.noV toll <- opz$toll if (!in.psi(limZ, PSI, FALSE)) stop("starting psi out of the range", call. = FALSE) if (!far.psi(Z, PSI, id.psi.group, FALSE)) stop("psi values too close each other. Please change (decreases number of) starting values", call. = FALSE) n.psi1 <- ncol(Z) if (is.null(opz$constr)) opz$constr <- 0 if ((opz$constr %in% 1:2) && class(obj) == "rq") { vincoli <- TRUE call.ok$method <- "fnc" call.ok$R <- quote(R) call.ok$r <- quote(r) call.noV$method <- "fnc" call.noV$R <- quote(R.noV) call.noV$r <- quote(r) } fn.obj <- opz$fn.obj U <- ((Z - PSI) * (Z > PSI)) colnames(U) <- nomiU if (pow[1] != 1) U <- U^pow[1] obj0 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) if ("try-error" %in% class(obj0)) stop("The first fit with U variables does not work..", call. = FALSE) L0 <- eval(parse(text = fn.obj), list(x = obj0)) n.intDev0 <- nchar(strsplit(as.character(L0), "\\.")[[1]][1]) dev.values[length(dev.values) + 1] <- opz$dev0 dev.values[length(dev.values) + 1] <- L0 psi.values[[length(psi.values) + 1]] <- psi if (visual) { cat(paste("iter = ", sprintf("%2.0f", 0), " min.f = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L0), " k = ", sprintf("%2.0f", NA), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " ini.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } id.warn <- FALSE while (abs(epsilon) > toll) { it <- it + 1 n.psi0 <- n.psi1 n.psi1 <- ncol(Z) if (n.psi1 != n.psi0) { U <- ((Z - PSI) * (Z > PSI)) if (pow[1] != 1) U <- U^pow[1] obj0 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) L0 <- eval(parse(text = fn.obj), list(x = obj0)) } V <- dpmax(Z, PSI, pow = pow[2]) for (i in 1:n.psi1) { mfExt[nomiU[i]] <- U[, i] mfExt[nomiV[i]] <- V[, i] } R <- fn.costr(ncol(U), 1, 1) R.noV <- R[, -((ncol(R) - 1) + seq_len(ncol(U))), drop = FALSE] r <- rep(0, nrow(R)) obj <- suppressWarnings(eval(call.ok, envir = mfExt)) beta.c <- coef(obj)[nomiU] gamma.c <- coef(obj)[nomiV] if (any(is.na(c(beta.c, gamma.c)))) { if (fix.npsi) { if (return.all.sol) return(list(dev.values, psi.values)) else stop("breakpoint estimate too close or at the boundary causing NA estimates.. too many breakpoints being estimated?", call. = FALSE) } else { id.coef.ok <- !is.na(gamma.c) psi <- psi[id.coef.ok] gamma.c <- gamma.c[id.coef.ok] beta.c <- beta.c[id.coef.ok] Z <- Z[, id.coef.ok, drop = FALSE] rangeZ <- rangeZ[, id.coef.ok, drop = FALSE] limZ <- limZ[, id.coef.ok, drop = FALSE] nomiOK <- nomiOK[id.coef.ok] id.psi.group <- id.psi.group[id.coef.ok] names(psi) <- id.psi.group } } psi.old <- psi psi <- psi.old + gamma.c/beta.c if (!is.null(digits)) psi <- round(psi, digits) PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) U <- (Z - PSI) * (Z > PSI) if (pow[1] != 1) U <- U^pow[1] for (i in 1:ncol(U)) mfExt[nomiU[i]] <- U[, i] obj1 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else eval(parse(text = fn.obj), list(x = obj1)) use.k <- k <- 1 L1.k <- NULL L1.k[length(L1.k) + 1] <- L1 while (L1 > L0) { k <- k + 1 use.k <- if (useExp.k) 2^(k - 1) else k psi <- psi.old + (gamma.c/beta.c)/(use.k * h) if (!is.null(digits)) psi <- round(psi, digits) PSI <- matrix(rep(psi, rep(n, length(psi))), ncol = length(psi)) U <- (Z - PSI) * (Z > PSI) if (pow[1] != 1) U <- U^pow[1] for (i in 1:ncol(U)) mfExt[nomiU[i]] <- U[, i] obj1 <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) L1 <- if (class(obj1)[1] == "try-error") L0 + 10 else eval(parse(text = fn.obj), list(x = obj1)) L1.k[length(L1.k) + 1] <- L1 if (1/(use.k * h) < min.step) { break } } if (visual) { flush.console() cat(paste("iter = ", sprintf("%2.0f", it), " min.f = ", sprintf(paste("%", n.intDev0 + 6, ".5f", sep = ""), L1), " k = ", sprintf("%2.0f", k), " n.psi = ", formatC(length(unlist(psi)), digits = 0, format = "f"), " est.psi = ", paste(formatC(unlist(psi), digits = 3, format = "f"), collapse = " "), sep = ""), "\n") } epsilon <- if (conv.psi) max(abs((psi - psi.old)/psi.old)) else (L0 - L1)/(abs(L0) + 0.1) L0 <- L1 k.values[length(k.values) + 1] <- use.k psi.values[[length(psi.values) + 1]] <- psi dev.values[length(dev.values) + 1] <- L0 if (it >= it.max) { id.warn <- TRUE break } id.psi.far <- far.psi(Z, PSI, id.psi.group, TRUE) id.psi.in <- in.psi(limZ, PSI, TRUE) id.psi.ok <- id.psi.in & id.psi.far if (!all(id.psi.ok)) { if (fix.npsi) { psi <- adj.psi(psi, limZ) id.psi.far <- far.psi(Z, PSI, id.psi.group, TRUE) psi <- psi * ifelse(id.psi.far, 1, 0.9) PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) } else { Z <- Z[, id.psi.ok, drop = FALSE] PSI <- PSI[, id.psi.ok, drop = FALSE] rangeZ <- rangeZ[, id.psi.ok, drop = FALSE] limZ <- limZ[, id.psi.ok, drop = FALSE] nomiOK <- nomiOK[id.psi.ok] id.psi.group <- id.psi.group[id.psi.ok] psi.old <- psi.old[id.psi.ok] psi <- psi[id.psi.ok] names(psi) <- id.psi.group if (ncol(PSI) <= 0) { warning(paste("All breakpoints have been removed after", it, "iterations.. returning 0"), call. = FALSE) return(0) } } } } if (id.warn) warning(paste("max number of iterations (", it, ") attained", sep = ""), call. = FALSE) attr(psi.values, "dev") <- dev.values attr(psi.values, "k") <- k.values psi <- unlist(tapply(psi, id.psi.group, sort)) names(psi) <- id.psi.group names.coef <- names(coef(obj)) PSI.old <- PSI PSI <- matrix(rep(psi, rep(nrow(Z), length(psi))), ncol = length(psi)) if (sd(PSI - PSI.old) > 0) { U <- (Z - PSI) * (Z > PSI) colnames(U) <- paste("U", 1:ncol(U), sep = "") V <- -(Z > PSI) colnames(V) <- paste("V", 1:ncol(V), sep = "") for (i in 1:n.psi1) { mfExt[nomiU[i]] <- U[, i] mfExt[nomiV[i]] <- V[, i] } obj <- suppressWarnings(try(eval(call.noV, envir = mfExt), silent = TRUE)) L1 <- eval(parse(text = fn.obj), list(x = obj)) } else { obj <- obj1 } nomeCoef <- grep("coef", names(obj), value = TRUE) if(is.list(obj[[nomeCoef]])) { obj[[nomeCoef]][[1]] <- c(obj[[nomeCoef]][[1]], rep(0, ncol(V))) names(obj[[nomeCoef]][[1]]) <- names.coef[1:length(obj[[nomeCoef]][[1]])] } else { obj[[nomeCoef]] <- c(obj[[nomeCoef]], rep(0, ncol(V))) names(obj[[nomeCoef]]) <- names.coef } obj$epsilon <- epsilon obj$it <- it obj <- list(obj = obj, it = it, psi = psi, psi.values = psi.values, U = U, V = V, rangeZ = rangeZ, epsilon = epsilon, nomiOK = nomiOK, SumSquares.no.gap = L1, id.psi.group = id.psi.group, id.warn = id.warn, nomiV = nomiV, nomiU = nomiU, mfExt = mfExt) if (vincoli) { obj$R <- R obj$R.noV <- R.noV obj$r <- r } return(obj) } segmented/MD50000644000176200001440000000620713573767742012567 0ustar liggesusers8eafc5bd210e73d8ecf1661be833ca94 *DESCRIPTION bc359de1bd677fd03d97a6fa98e38b00 *NAMESPACE d0b543248fcb20ad3763c810a7fc0708 *NEWS aece14408569bba96829d1732027071a *R/aapc.r 2132c71714d9e4b2af36647d15232596 *R/broken.line.r d6fe62ccf58f09944ed0a120ea33eb2c *R/confint.segmented.R 7afe2e556ac6eff74785535e20722264 *R/davies.test.r d903dc33c4afb744279482abb79c255a *R/draw.history.R 28ba9e24dab83fe7aac1ef9a70e517e5 *R/intercept.r c4fd6a4c4daa02134f6420ba44fe6d10 *R/lines.segmented.R a5c9ed1b8cccb7eec14c268b8c9eb6eb *R/plot.segmented.R 5f850c48c5c18919b2524b68be9c93c1 *R/points.segmented.r b5184d581cc4b79b81679de6ae9dcaa3 *R/predict.segmented.r 8f7b70deb3ca923aafa2371832733236 *R/print.segmented.R aa98f071f250d3dff817564df3e27580 *R/print.summary.segmented.R 8a37003fb7bf3a6b862218d0dc5f5eb1 *R/pscore.test.R 5fe841cc6705a271eca520b944bb3e40 *R/seg.Ar.fit.boot.r 51ac2f33ad60a448077dfc0a2539b897 *R/seg.Ar.fit.r 6df828ddd9be6c11b9a54b6c2609723b *R/seg.control.R 3c461c1b9e9e2ca89c26b18275176e81 *R/seg.def.fit.boot.r 5b313fbc5d93b14422ffe864858b6c35 *R/seg.def.fit.r 83037661f25d55f2361c5170aec7734d *R/seg.glm.fit.boot.r 0c2681afb86df746d123dd6ef4cbfca3 *R/seg.glm.fit.r 39fabe94fe87d64a5adb4eea8f8bec48 *R/seg.lm.fit.boot.r ac37904cf4a9b91360cea4667bcb7793 *R/seg.lm.fit.r 3edc2c3da46329ca26b287289b82a562 *R/segmented.Arima.r 8d92ce78874e2925e97a368bc37cb180 *R/segmented.R e22f316b28cfc8a9957a0dc10b49f537 *R/segmented.default.r 6127abc694b34993aae823da5d04f595 *R/segmented.glm.R bed25de41b4633fab085ae4aa9f759be *R/segmented.lm.R 06b14ce3aa571e402665b050c6a6b60b *R/slope.R 590478c605cabf2fc0e2a61b994d4ae9 *R/summary.segmented.R 63b88814d6226b4e21248b47f8d31548 *R/vcov.segmented.R 50afc74be51aecd0691631ef53d09af4 *data/down.R 740e771e9e108e424b747d141bedc9d9 *data/down.rda d20d682ba59c89349b3c178d1a64b024 *data/plant.R 3c250b9a087af91dfe4471d97c3af3d0 *data/plant.rda bc0f3b31246f0bc96f896182583dff03 *data/stagnant.R 7ecd7c4c6bc193a1ce4a31ef825f645a *data/stagnant.rda 300f0af2f9cdb8a458ec54e81b8b18e7 *inst/CITATION 1746dc32753440c73c11ccd8a3764db2 *man/aapc.Rd 2c3943a03c507bc67d8262d0aa66a5de *man/broken.line.Rd 5780d5a9c3e6362945e76e328967393f *man/confint.segmented.Rd 9e4fd590c5c2653a99c7cafc5a357406 *man/davies.test.Rd a1fd6bbde564db5be2a9dde1ecedfb2d *man/down.Rd 9599cad4d5ab3427a70dee6448b717a5 *man/draw.history.Rd 102ab54c8a51fa71fef9892646bda600 *man/intercept.Rd 7a543b5123d9c64b5a4da6aba4e1d3ea *man/lines.segmented.Rd 925cd1c6b4a05d3fca632a7f93999d00 *man/plant.Rd b5f507ade96b3dc16fec530c69dbcbe1 *man/plot.segmented.Rd b4da8d785ea0e901bf670dada5dd9a56 *man/points.segmented.Rd 729da2792e9ac01611a35212764e6cb5 *man/predict.segmented.Rd c5ff81f292c40cdc317fe2ddfc99c4cd *man/print.segmented.Rd b78f4115d98c01af50f170341958b25f *man/pscore.test.rd dd5140da01ce7e2f2ad2f50330005417 *man/seg.control.Rd 55f09cc33b69788ffaaad8aad83faa3a *man/seg.lm.fit.Rd 0ebcc83ac8f57bf1429ebfb0a913a9f8 *man/segmented-package.Rd 0a7a9dfbd1eb6ab1d14f301d5dbf3e72 *man/segmented.Rd c8f16e9aca10e740d22165132777f728 *man/slope.Rd 1c8095f4472b4cabf3dc3cae5132260f *man/stagnant.Rd 028642ed745c44283e034aee3e29c0f6 *man/summary.segmented.Rd c060fe758f50408b271cbb8c899e03fe *man/vcov.segmented.Rd segmented/inst/0000755000176200001440000000000013572471525013215 5ustar liggesuserssegmented/inst/CITATION0000644000176200001440000000526113475757056014366 0ustar liggesuserscitHeader("To cite segmented in publications use:") citEntry(entry="Article", title = "Estimating regression models with unknown break-points.", author = personList(as.person("Vito M.R. Muggeo")), journal = "Statistics in Medicine", year = "2003", volume = "22", pages = "3055--3071", textVersion = paste("Vito M. R. Muggeo (2003).", "Estimating regression models with unknown break-points.", "Statistics in Medicine, 22, 3055-3071.") ) citEntry(entry="Article", title = "segmented: an R Package to Fit Regression Models with Broken-Line Relationships.", author = personList(as.person("Vito M.R. Muggeo")), journal = "R News", year = "2008", volume = "8", number = "1", pages = "20--25", url = "https://cran.r-project.org/doc/Rnews/", textVersion = paste("Vito M. R. Muggeo (2008).", "segmented: an R Package to Fit Regression Models with Broken-Line Relationships.", "R News, 8/1, 20-25.", "URL https://cran.r-project.org/doc/Rnews/.") ) citEntry(entry="Article", title = "Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling.", author = personList(as.person("Vito M.R. Muggeo")), journal = "J of Statistical Computation and Simulation", year = "2016", volume = "86", pages = "3059-3067", textVersion = paste("Vito M. R. Muggeo (2016).", "Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling.", "J of Statistical Computation and Simulation, 86, 3059-3067.") ) citEntry(entry="Article", title = "Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach.", author = personList(as.person("Vito M.R. Muggeo")), journal = "Australian & New Zealand Journal of Statistics", year = "2017", volume = "59", pages = "311-322", url = "https://cran.r-project.org/doc/Rnews/", textVersion = paste("Vito M. R. Muggeo (2017).", "Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach.", "Australian & New Zealand Journal of Statistics, 59, 311-322.") )