rms/0000755000176200001440000000000014024574032011054 5ustar liggesusersrms/NAMESPACE0000644000176200001440000001444414024266540012304 0ustar liggesusersexport(annotateAnova,asis,bj,bjplot,bootBCa,bootcov,bootplot,bplot,calibrate,cph,catg,combineRelatedPredictors,confplot,contrast,coxphFit,cph,cr.setup,datadist,Design,DesignAssign,dxy.cens,effective.df,ExProb,fastbw,formatNP,gendata,gIndex,Glm,Gls,groupkm,gTrans,Hazard,hazard.ratio.plot,histdensity,"%ia%",ie.setup,interactions.containing,legend.nomabbrev,lm.pfit,lrm,lrm.fit,lrm.fit.bare,lrtest,lsp,matinv,matrx,modelData,Newlabels,Newlevels,nomogram,npsurv,ols,ols.influence,oos.loglik,orm,orm.fit,pantext,Penalty.matrix,Penalty.setup,pentrace,perimeter,perlcode,plotmathAnova,plot.contrast.rms,plot.lrm.partial,plot.xmean.ordinaly,pol,poma,pphsm,predab.resample,Predict,predictrms,prModFit,probabilityFamilies,psm,rcs,related.predictors,reListclean,robcov,Rq,sascode,scored,sensuc,setPb,show.influence,specs,strat,survreg.auxinfo,survdiffplot,survest,Survival,survplot,survplotp,univarLR,validate,val.prob,val.probg,val.surv,vif,which.influence) useDynLib(rms, .registration=TRUE, .fixes="F_") import(Hmisc) importFrom(survival, Surv, is.Surv, survConcordance.fit, coxph.fit, coxph.control, survfit, survfitcoxph.fit, survreg.fit, survreg.control, survpenal.fit, survfitKM, survreg.distributions, agreg.fit, agexact.fit, attrassign, untangle.specials) #importFrom(SparseM, solve, t, as.matrix) import(SparseM) importFrom(cluster, ellipsoidhull) importFrom(digest, digest) importFrom(MASS, kde2d, bandwidth.nrd, cov.mve) import(ggplot2) import(lattice) #import(gridExtra) # DOES NOT WORK UNLESS IN Depends IN DESCRIPTION #importFrom(gridExtra, arrangeGrob) importFrom(quantreg, rq.wfit, rq.fit, summary.rq) import(nlme) importFrom(rpart, rpart, rpart.control, prune) importFrom(polspline, hare, phare) import(multcomp) # importFrom(multcomp, confint.glht, glht) FAILS confint.glht not exported by namespace:multcomp importFrom(htmlTable, htmlTable, txtRound) importFrom(htmltools, HTML) importFrom(grDevices, dev.off, gray, grey, png, rgb, contourLines) importFrom(graphics, abline, axis, boxplot, hist, legend, lines, locator, mtext, pairs, par, plot, plot.new, points, segments, strwidth, symbols, text, title, grconvertX, grconvertY) importFrom(methods, existsFunction, getFunction, new) importFrom(stats, .getXlevels, AIC, approx, as.formula, asOneSidedFormula, binomial, coef, complete.cases, confint, contrasts, cor, dcauchy, delete.response, density, deviance, dnorm, family, fitted, formula, gaussian, glm, glm.control, glm.fit, is.empty.model, lm, lm.fit, lm.influence, lm.wfit, logLik, lowess, lsfit, make.link, median, model.extract, model.frame, model.offset, model.weights, na.fail, na.omit, naresid, nlminb, nobs, optim, pcauchy, pchisq, pf, plogis, pnorm, predict, pt, qcauchy, qchisq, qlogis, qnorm, qqline, qqnorm, qt, quantile, reformulate, reshape, resid, residuals, runif, sd, supsmu, terms, uniroot, update, var, vcov) importFrom(utils, capture.output, de, getS3method, getFromNamespace) S3method(AIC, rms) S3method(anova, rms) S3method(calibrate, cph) S3method(calibrate, default) S3method(calibrate, psm) S3method(contrast, rms) S3method(ExProb, orm) S3method(Function, cph) S3method(Function, rms) S3method(Hazard, psm) S3method(html, anova.rms) S3method(html, naprint.delete) S3method(html, summary.rms) S3method(html, validate) S3method(latex, anova.rms) S3method(latex, bj) S3method(latex, cph) S3method(latex, Glm) S3method(latex, Gls) S3method(latex, lrm) S3method(latex, naprint.delete) S3method(latex, ols) S3method(latex, orm) S3method(latex, pphsm) S3method(latex, psm) S3method(latex, Rq) S3method(latex, summary.rms) S3method(latex, validate) S3method(lines, residuals.psm.censored.normalized) S3method(logLik, Gls) S3method(logLik, ols) S3method(logLik, rms) S3method(Mean, cph) S3method(Mean, lrm) S3method(Mean, orm) S3method(Mean, psm) S3method(Newlabels, rms) S3method(Newlevels, rms) S3method(nobs, rms) S3method(plot, anova.rms) S3method(plot, calibrate) S3method(plot, calibrate.default) S3method(plot, contrast.rms) S3method(plot, ExProb) S3method(plot, gIndex) S3method(plot, lrm.partial) S3method(plot, nomogram) S3method(plot, pentrace) S3method(plot, Predict) S3method(plot, sensuc) S3method(plot, summary.rms) S3method(plot, validate.rpart) S3method(plot, val.prob) S3method(plot, val.surv) S3method(plot, val.survh) S3method(plot, xmean.ordinaly) S3method(plotp, Predict) S3method(ggplot, Predict) S3method(predict, bj) S3method(predict, cph) S3method(predict, Glm) S3method(predict, Gls) S3method(predict, lrm) S3method(predict, ols) S3method(predict, orm) S3method(predict, psm) S3method(predict, Rq) S3method(print, anova.rms) S3method(print, bj) S3method(print, calibrate) S3method(print, contrast.rms) S3method(print, cph) S3method(print, datadist) S3method(print, fastbw) S3method(print, gIndex) S3method(print, Glm) S3method(print, Gls) S3method(print, lrm) S3method(print, lrtest) S3method(print, nomogram) S3method(print, ols) S3method(print, orm) S3method(print, pentrace) S3method(print, pphsm) S3method(print, Predict) S3method(print, psm) S3method(print, Rq) S3method(print, specs.rms) S3method(print, summary.rms) S3method(print, summary.survreg2) S3method(print, survest.psm) S3method(print, validate) S3method(print, validate.rpart) S3method(print, val.prob) S3method(print, val.survh) S3method(Quantile, cph) S3method(Quantile, orm) S3method(Quantile, psm) S3method(rbind, Predict) S3method(residuals, bj) S3method(residuals, cph) S3method(residuals, lrm) S3method(residuals, ols) S3method(residuals, orm) S3method(residuals, psm) S3method(specs, rms) S3method(summary, rms) S3method(survest, cph) S3method(survest, psm) S3method(survfit, cph) S3method(survplot, residuals.psm.censored.normalized) S3method(survplot, rms) S3method(survplot, npsurv) S3method(survplotp, npsurv) S3method(Survival, cph) S3method(Survival, psm) S3method(validate, bj) S3method(validate, cph) S3method(validate, lrm) S3method(validate, ols) S3method(validate, orm) S3method(validate, psm) S3method(validate, rpart) S3method(validate, Rq) S3method(vcov, cph) S3method(vcov, Glm) S3method(vcov, Gls) S3method(vcov, lrm) S3method(vcov, ols) S3method(vcov, orm) S3method(vcov, pphsm) S3method(vcov, psm) S3method(vcov, rms) S3method("[", rms) S3method("as.data.frame", rms) rms/demo/0000755000176200001440000000000013555351226012006 5ustar liggesusersrms/demo/all.R0000644000176200001440000004133112576540040012677 0ustar liggesusers###################### # Detailed Example 1 # ###################### set.seed(17) # So can repeat random number sequence n <- 500 sex <- factor(sample(c('female','male'), n, rep=TRUE)) age <- rnorm(n, 50, 10) sys.bp <- rnorm(n, 120, 7) # Use two population models, one with a systolic # blood pressure effect and one without L <- ifelse(sex=='female', .1*(pmin(age,50)-50), .005*(age-50)^2) L.bp <- L + .4*(pmax(sys.bp,120)-120) dz <- ifelse(runif(n) <= plogis(L), 1, 0) dz.bp <- ifelse(runif(n) <= plogis(L.bp), 1, 0) # Use summary.formula in the Hmisc package to summarize the # data one predictor at a time s <- summary(dz.bp ~ age + sex + sys.bp) options(digits=3) print(s) plot(s) plsmo(age, dz, group=sex, fun=qlogis, ylim=c(-3,3)) plsmo(age, L, group=sex, method='raw', add=TRUE, prefix='True', trim=0) title('Lowess-smoothed Estimates with True Regression Functions') dd <- datadist(age, sex, sys.bp) options(datadist='dd') # can also do: dd <- datadist(dd, newvar) f <- lrm(dz ~ rcs(age,5)*sex, x=TRUE, y=TRUE) f # x=TRUE, y=TRUE for pentrace fpred <- Function(f) fpred fpred(age=30, sex=levels(sex)) anova(f) p <- Predict(f, age, sex, conf.int=FALSE) ggplot(p, rdata=data.frame(age, sex)) + geom_line(aes(x=age, y=L, color=sex), linetype='dotted', data=data.frame(age, L, sex)) # Specifying rdata to plot.Predict results in sex-specific # rug plots for age using the Hmisc histSpikeg function, which uses # ggplot geom_segment. True regression functions are drawn as # as dotted lines f.bp <- lrm(dz.bp ~ rcs(age,5)*sex + rcs(sys.bp,5)) p <- Predict(f.bp, age, sys.bp, np=75) bplot(p) # same as lfun=levelplot bplot(p, lfun=contourplot) bplot(p, lfun=wireframe) cat('Doing 25 bootstrap repetitions to validate model\n') validate(f, B=25) # in practice use 300+ cat('Doing 25 bootstrap reps to check model calibration\n') cal <- calibrate(f, B=25) # use 300+ plot(cal) title('Calibration of Unpenalized Model') p <- pentrace(f, penalty=c(.009,.009903,.02,.2,.5,1)) f <- update(f, penalty=p$penalty) f specs(f,long=TRUE) edf <- effective.df(f) p <- Predict(f, age, sex, conf.int=FALSE) # Plot penalized spline fit + true regression functions ggplot(p, rdata=llist(age, sex)) + geom_line(aes(x=age, y=L, color=sex), linetype='dotted', data=data.frame(age, L, sex)) options(digits=3) s <- summary(f) s plot(s) s <- summary(f, sex='male') plot(s) fpred <- Function(f) fpred fpred(age=30, sex=levels(sex)) sascode(fpred) cat('Doing 40 bootstrap reps to validate penalized model\n') validate(f, B=40) cat('Doing 40 bootstrap reps to check penalized model calibration\n') cal <- calibrate(f, B=40) plot(cal) title('Calibration of Penalized Model') nom <- nomogram(f.bp, fun=plogis, funlabel='Prob(dz)', fun.at=c(.15,.2,.3,.4,.5,.6,.7,.8,.9,.95,.975)) plot(nom, fun.side=c(1,3,1,3,1,3,1,3,1,3,1)) options(datadist=NULL) ##################### #Detailed Example 2 # ##################### # Simulate the data. n <- 1000 # define sample size set.seed(17) # so can reproduce the results treat <- factor(sample(c('a','b','c'), n, TRUE)) num.diseases <- sample(0:4, n, TRUE) age <- rnorm(n, 50, 10) cholesterol <- rnorm(n, 200, 25) weight <- rnorm(n, 150, 20) sex <- factor(sample(c('female','male'), n, TRUE)) label(age) <- 'Age' # label is in Hmisc label(num.diseases) <- 'Number of Comorbid Diseases' label(cholesterol) <- 'Total Cholesterol' label(weight) <- 'Weight, lbs.' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc # Specify population model for log odds that Y=1 L <- .1*(num.diseases-2) + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(treat=='a') + 3.5*(treat=='b')+2*(treat=='c')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) cholesterol[1:3] <- NA # 3 missings, at random ddist <- datadist(cholesterol, treat, num.diseases, age, weight, sex) # Could have used ddist <- datadist(data.frame.name) options(datadist="ddist") # defines data dist. to rms cholesterol <- impute(cholesterol) # see impute in Hmisc package # impute, describe, and several other basic functions are # distributed as part of the Hmisc package fit <- lrm(y ~ treat*log(cholesterol - 10) + scored(num.diseases) + rcs(age)) describe(y ~ treat + scored(num.diseases) + rcs(age)) # or use describe(formula(fit)) for all variables used in fit # describe function (in Hmisc) gets simple statistics on variables #fit <- robcov(fit) # Would make all statistics which follow # use a robust covariance matrix # would need x=TRUE, y=TRUE in lrm specs(fit) # Describe the design characteristics a <- anova(fit) print(a, which='subscripts') # print which parameters being tested plot(anova(fit)) # Depict Wald statistics graphically anova(fit, treat, cholesterol) # Test these 2 by themselves summary(fit) # Estimate effects using default ranges plot(summary(fit)) # Graphical display of effects with C.L. summary(fit, treat="b", age=60) # Specify reference cell and adjustment val summary(fit, age=c(50,70)) # Estimate effect of increasing age from # 50 to 70 summary(fit, age=c(50,60,70)) # Increase age from 50 to 70, # adjust to 60 when estimating # effects of other factors # If had not defined datadist, would have to define # ranges for all var. # Estimate and test treatment (b-a) effect averaged # over 3 cholesterols contrast(fit, list(treat='b',cholesterol=c(150,200,250)), list(treat='a',cholesterol=c(150,200,250)), type='average') # Remove type='average' to get 3 separate contrasts for b-a # Plot effects. ggplot(fit) plots effects of all predictors # The ref.zero parameter is helpful for showing effects of # predictors on a common scale for comparison of strength ggplot(Predict(fit, ref.zero=TRUE)) ggplot(Predict(fit, age=seq(20,80,length=100), treat, conf.int=FALSE)) # Plots relationship between age and log # odds, separate curve for each treat, no C.I. bplot(Predict(fit, age, cholesterol, np=70)) # image plot for age, cholesterol, and # log odds using default ranges for both variables p <- Predict(fit, num.diseases, fun=function(x) 1/(1+exp(-x)), conf.int=.9) #or fun=plogis ggplot(p, ylab="Prob", conf.int=.9, nlevels=5) # Treat as categorical variable even though numeric # Plot estimated probabilities instead of log odds # Again, if no datadist were defined, would have to # tell plot all limits logit <- predict(fit, expand.grid(treat="b",num.diseases=1:3, age=c(20,40,60), cholesterol=seq(100,300,length=10))) # Also see Predict # logit <- predict(fit, gendata(fit, nobs=12)) # Allows you to interactively specify 12 predictor combinations # Generate 9 combinations with other variables # set to defaults, get predicted values gdat <- gendata(fit, age = c(20,40,60), treat = c('a','b','c')) gdat median(cholesterol); median(num.diseases) logit <- predict(fit, gdat) # Since age doesn't interact with anything, we can quickly and # interactively try various transformations of age, # taking the spline function of age as the gold standard. We are # seeking a linearizing transformation. Here age is linear in the # population so this is not very productive. Also, if we simplify the # model the total degrees of freedom will be too small and # confidence limits too narrow, so this process is at odds with # correct statistical inference. ag <- 10:80 logit <- predict(fit, expand.grid(treat="a", num.diseases=0, age=ag, cholesterol=median(cholesterol)), type="terms")[,"age"] # Also see Predict # Note: if age interacted with anything, this would be the age # "main effect" ignoring interaction terms # Could also use # logit <- plot(f, age=ag, \dots)$x.xbeta[,2] # which allows evaluation of the shape for any level # of interacting factors. When age does not interact with # anything, the result from # predict(f, \dots, type="terms") would equal the result from # plot if all other terms were ignored # Could also use # logit <- predict(fit, gendata(fit, age=ag, cholesterol=median\dots)) plot(ag^.5, logit) # try square root vs. spline transform. plot(ag^1.5, logit) # try 1.5 power # w <- latex(fit) # invokes latex.lrm, creates fit.tex # print(w) # display or print model on screen # Draw a nomogram for the model fit plot(nomogram(fit, fun=plogis, funlabel="Prob[Y=1]")) # Compose S function to evaluate linear predictors from fit g <- Function(fit) g(treat='b', cholesterol=260, age=50) # Leave num.diseases at reference value # Use the Hmisc dataRep function to summarize sample # sizes for subjects as cross-classified on 2 key # predictors drep <- dataRep(~ roundN(age,10) + num.diseases) print(drep, long=TRUE) # Some approaches to making a plot showing how # predicted values vary with a continuous predictor # on the x-axis, with two other predictors varying fit <- lrm(y ~ log(cholesterol - 10) + num.diseases + rcs(age) + rcs(weight) + sex) combos <- gendata(fit, age=10:100, cholesterol=c(170,200,230), weight=c(150,200,250)) # num.diseases, sex not specified -> set to mode # can also used expand.grid or Predict combos$pred <- predict(fit, combos) require(lattice) xyplot(pred ~ age | cholesterol*weight, data=combos, type='l') xYplot(pred ~ age | cholesterol, groups=weight, data=combos, type='l') # in Hmisc xYplot(pred ~ age, groups=interaction(cholesterol,weight), data=combos, type='l') # Can also do this with plot.Predict or ggplot.Predict but a single # plot may be busy: ch <- c(170, 200, 230) p <- Predict(fit, age, cholesterol=ch, weight=150, conf.int=FALSE) plot(p, ~age | cholesterol) ggplot(p) # Here we use plot.Predict to make 9 separate plots, with CLs p <- Predict(fit, age, cholesterol=c(170,200,230), weight=c(150,200,250)) plot(p, ~age | cholesterol*weight) # Now do the same with ggplot ggplot(p, groups=FALSE) options(datadist=NULL) ###################### # Detailed Example 3 # ###################### n <- 2000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) t <- -log(runif(n))/h label(t) <- 'Follow-up Time' e <- ifelse(t<=cens,1,0) t <- pmin(t, cens) units(t) <- "Year" age.dec <- cut2(age, g=10, levels.mean=TRUE) dd <- datadist(age, sex, age.dec) options(datadist='dd') Srv <- Surv(t,e) # Fit a model that doesn't assume anything except # that deciles are adequate representations of age f <- cph(Srv ~ strat(age.dec)+strat(sex), surv=TRUE) # surv=TRUE speeds up computations, and confidence limits when # there are no covariables are still accurate. # Plot log(-log 3-year survival probability) vs. mean age # within age deciles and vs. sex p <- Predict(f, age.dec, sex, time=3, loglog=TRUE) plot(p) plot(p, ~ as.numeric(as.character(age.dec)) | sex, ylim=c(-5,-1)) # Show confidence bars instead. Note some limits are not present (infinite) agen <- as.numeric(as.character(p$age.dec)) xYplot(Cbind(yhat, lower, upper) ~ agen | sex, data=p) # Fit a model assuming proportional hazards for age and # absence of age x sex interaction f <- cph(Srv ~ rcs(age,4)+strat(sex), surv=TRUE) survplot(f, sex, n.risk=TRUE) # Add ,age=60 after sex to tell survplot use age=60 # Validate measures of model performance using the bootstrap # First must add data (design matrix and Srv) to fit object f <- update(f, x=TRUE, y=TRUE) validate(f, B=10, dxy=TRUE, u=5) # use t=5 for Dxy (only) # Use B=300 in practice # Validate model for accuracy of predicting survival at t=1 # Get Kaplan-Meier estimates by divided subjects into groups # of size 200 (for other values of u must put time.inc=u in # call to cph) cal <- calibrate(f, B=10, u=1, m=200) # B=300+ in practice plot(cal) # Check proportional hazards assumption for age terms z <- cox.zph(f, 'identity') print(z); plot(z) # Re-fit this model without storing underlying survival # curves for reference groups, but storing raw data with # the fit (could also use f <- update(f, surv=FALSE, x=TRUE, y=TRUE)) f <- cph(Srv ~ rcs(age,4)+strat(sex), x=TRUE, y=TRUE) # Get accurate C.L. for any age # Note: for evaluating shape of regression, we would not ordinarily # bother to get 3-year survival probabilities - would just use X * beta # We do so here to use same scale as nonparametric estimates f anova(f) ages <- seq(20, 80, by=4) # Evaluate at fewer points. Default is 100 # For exact C.L. formula n=100 -> much memory p <- Predict(f, age=ages, sex, time=3, loglog=TRUE) plot(p, ylim=c(-5,-1)) ggplot(p, ylim=c(-5, -1)) # Fit a model assuming proportional hazards for age but # allowing for general interaction between age and sex f <- cph(Srv ~ rcs(age,4)*strat(sex), x=TRUE, y=TRUE) anova(f) ages <- seq(20, 80, by=6) # Still fewer points - more parameters in model # Plot 3-year survival probability (log-log and untransformed) # vs. age and sex, obtaining accurate confidence limits plot(Predict(f, age=ages, sex, time=3, loglog=TRUE), ylim=c(-5,-1)) plot(Predict(f, age=ages, sex, time=3)) ggplot(Predict(f, age=ages, sex, time=3)) # Having x=TRUE, y=TRUE in fit also allows computation of influence stats r <- resid(f, "dfbetas") which.influence(f) # Use survest to estimate 3-year survival probability and # confidence limits for selected subjects survest(f, expand.grid(age=c(20,40,60), sex=c('Female','Male')), times=c(2,4,6), conf.int=.95) # Create an S function srv that computes fitted # survival probabilities on demand, for non-interaction model f <- cph(Srv ~ rcs(age,4)+strat(sex), surv=TRUE) srv <- Survival(f) # Define functions to compute 3-year estimates as a function of # the linear predictors (X*Beta) surv.f <- function(lp) srv(3, lp, stratum="sex=Female") surv.m <- function(lp) srv(3, lp, stratum="sex=Male") # Create a function that computes quantiles of survival time # on demand quant <- Quantile(f) # Define functions to compute median survival time med.f <- function(lp) quant(.5, lp, stratum="sex=Female") med.m <- function(lp) quant(.5, lp, stratum="sex=Male") # Draw a nomogram to compute several types of predicted values plot(nomogram(f, fun=list(surv.m, surv.f, med.m, med.f), funlabel=c("S(3 | Male)","S(3 | Female)", "Median (Male)","Median (Female)"), fun.at=list(c(.8,.9,.95,.98,.99),c(.1,.3,.5,.7,.8,.9,.95,.98), c(8,12),c(1,2,4,8,12)))) options(datadist=NULL) ######################################################## # Simple examples using small datasets for checking # # calculations across different systems in which random# # number generators cannot be synchronized. # ######################################################## x1 <- 1:20 x2 <- abs(x1-10) x3 <- factor(rep(0:2,length.out=20)) y <- c(rep(0:1,8),1,1,1,1) dd <- datadist(x1,x2,x3) options(datadist='dd') f <- lrm(y ~ rcs(x1,3) + x2 + x3) f specs(f, TRUE) anova(f) anova(f, x1, x2) plot(anova(f)) s <- summary(f) s plot(s, log=TRUE) par(mfrow=c(2,2)) plot(Predict(f)) par(mfrow=c(1,1)) plot(nomogram(f)) g <- Function(f) g(11,7,'1') contrast(f, list(x1=11,x2=7,x3='1'), list(x1=10,x2=6,x3='2')) fastbw(f) gendata(f, x1=1:5) # w <- latex(f) f <- update(f, x=TRUE,y=TRUE) which.influence(f) residuals(f,'gof') robcov(f)$var validate(f, B=10) cal <- calibrate(f, B=10) plot(cal) f <- ols(y ~ rcs(x1,3) + x2 + x3, x=TRUE, y=TRUE) anova(f) anova(f, x1, x2) plot(anova(f)) s <- summary(f) s plot(s, log=TRUE) plot(Predict(f)) plot(nomogram(f)) g <- Function(f) g(11,7,'1') contrast(f, list(x1=11,x2=7,x3='1'), list(x1=10,x2=6,x3='2')) fastbw(f) gendata(f, x1=1:5) # w <- latex(f) f <- update(f, x=TRUE,y=TRUE) which.influence(f) residuals(f,'dfbetas') robcov(f)$var validate(f, B=10) cal <- calibrate(f, B=10) plot(cal) S <- Surv(c(1,4,2,3,5,8,6,7,20,18,19,9,12,10,11,13,16,14,15,17)) survplot(npsurv(S ~ x3)) f <- psm(S ~ rcs(x1,3)+x2+x3, x=TRUE,y=TRUE) f # NOTE: LR chi-sq of 39.67 disagrees with that from old survreg # and old psm (77.65); suspect were also testing sigma=1 for(w in c('survival','hazard')) print(survest(f, data.frame(x1=7,x2=3,x3='1'), times=c(5,7), conf.int=.95, what=w)) # S-Plus 2000 using old survival package: # S(t):.925 .684 SE:0.729 0.556 Hazard:0.0734 0.255 plot(Predict(f, x1, time=5)) f$var set.seed(3) # robcov(f)$var when score residuals implemented bootcov(f, B=30)$var validate(f, B=10) cal <- calibrate(f, cmethod='KM', u=5, B=10, m=10) plot(cal) r <- resid(f) survplot(r) f <- cph(S ~ rcs(x1,3)+x2+x3, x=TRUE,y=TRUE,surv=TRUE,time.inc=5) f plot(Predict(f, x1, time=5)) robcov(f)$var bootcov(f, B=10) validate(f, B=10) cal <- calibrate(f, cmethod='KM', u=5, B=10, m=10) survplot(f, x1=c(2,19)) options(datadist=NULL) rms/demo/00Index0000644000176200001440000000007412061664560013140 0ustar liggesusersall Comprehensive Demonstrations of Use of the rms Package rms/README.md0000644000176200001440000000126513760523610012341 0ustar liggesusersrms ===== Regression Modeling Strategies Current Goals ============= * Implement estimation and prediction methods for the Bayesian partial proportional odds model `blrm` function Web Sites ============= * Overall: http://hbiostat.org/R/rms/ * Book: http://hbiostat.org/rms/ * CRAN: http://cran.r-project.org/web/packages/rms/ * Changelog: https://github.com/harrelfe/rms/commits/master/ To Do ===== * Fix survplot so that explicitly named adjust-to values are still in subtitles. See tests/cph2.s. * Fix fit.mult.impute to average sigma^2 and then take square root, instead of averaging sigma * Implement user-added distributions in psm - see https://github.com/harrelfe/rms/issues/41 rms/man/0000755000176200001440000000000014024510250011617 5ustar liggesusersrms/man/orm.fit.Rd0000644000176200001440000001365613714237251013513 0ustar liggesusers\name{orm.fit} \alias{orm.fit} \title{Ordinal Regression Model Fitter} \description{ Fits ordinal cumulative probability models for continuous or ordinal response variables, efficiently allowing for a large number of intercepts by capitalizing on the information matrix being sparse. Five different distribution functions are implemented, with the default being the logistic (yielding the proportional odds model). Penalized estimation will be implemented in the future. Weights are not implemented. The optimization method is Newton-Raphson with step-halving. Execution time is linear in the number of intercepts. } \usage{ orm.fit(x=NULL, y, family='logistic', offset=0., initial, maxit=12L, eps=.005, tol=1e-7, trace=FALSE, penalty.matrix=NULL, scale=FALSE) } \arguments{ \item{x}{ design matrix with no column for an intercept } \item{y}{ response vector, numeric, factor, or character. The ordering of levels is assumed from \code{factor(y)}. } \item{family}{the distribution family, corresponding to logistic (the default), Gaussian, Cauchy, Gumbel maximum (\eqn{exp(-exp(-x))}; extreme value type I), and Gumbel minimum (\eqn{1-exp(-exp(x))}) distributions. These are the cumulative distribution functions assumed for \eqn{Prob[Y \ge y | X]}. The \code{family} argument can be an unquoted or a quoted string, e.g. \code{family=loglog} or \code{family="loglog"}. To use a built-in family, the string must be one of the following corresponding to the previous list: \code{logistic, probit, loglog, cloglog, cauchit}. The user can also provide her own customized family by setting \code{family} to a list with elements \code{cumprob, inverse, deriv, deriv2}; see the body of \code{orm.fit} for examples. An additional element, \code{name} must be given, which is a character string used to name the family for \code{print} and \code{latex}.} \item{offset}{optional numeric vector containing an offset on the logit scale} \item{initial}{vector of initial parameter estimates, beginning with the intercepts. If \code{initial} is not specified, the function computes the overall score \eqn{\chi^2} test for the global null hypothesis of no regression.} \item{maxit}{maximum no. iterations (default=\code{12}).} \item{eps}{ difference in \eqn{-2 log} likelihood for declaring convergence. Default is \code{.005}. If the \eqn{-2 log} likelihood gets worse by eps/10 while the maximum absolute first derivative of \preformatted{-2 log} likelihood is below 1E-9, convergence is still declared. This handles the case where the initial estimates are MLEs, to prevent endless step-halving. } \item{tol}{Singularity criterion. Default is 1e-7} \item{trace}{ set to \code{TRUE} to print -2 log likelihood, step-halving fraction, change in -2 log likelihood, and maximum absolute value of first derivative at each iteration. } \item{penalty.matrix}{ a self-contained ready-to-use penalty matrix - see\code{lrm} } \item{scale}{set to \code{TRUE} to subtract column means and divide by column standard deviations of \code{x} before fitting, and to back-solve for the un-normalized covariance matrix and regression coefficients. This can sometimes make the model converge for very large sample sizes where for example spline or polynomial component variables create scaling problems leading to loss of precision when accumulating sums of squares and crossproducts.} } \value{ a list with the following components: \item{call}{ calling expression } \item{freq}{ table of frequencies for \code{y} in order of increasing \code{y} } \item{yunique}{vector of sorted unique values of \code{y}} \item{stats}{ vector with the following elements: number of observations used in the fit, number of unique \code{y} values, median \code{y} from among the observations used in the fit, maximum absolute value of first derivative of log likelihood, model likelihood ratio chi-square, d.f., P-value, score chi-square and its P-value, Spearman's \eqn{\rho} rank correlation between linear predictor and \code{y}, the Nagelkerke \eqn{R^2} index, the \eqn{g}-index, \eqn{gr} (the \eqn{g}-index on the ratio scale), and \eqn{pdm} (the mean absolute difference between 0.5 and the estimated probability that \eqn{y\geq} the marginal median). When \code{penalty.matrix} is present, the \eqn{\chi^2}{chi-square}, d.f., and P-value are not corrected for the effective d.f. } \item{fail}{ set to \code{TRUE} if convergence failed (and \code{maxit>1}) } \item{coefficients}{ estimated parameters } \item{var}{ estimated variance-covariance matrix (inverse of information matrix). Note that in the case of penalized estimation, \code{var} is not the improved sandwich-type estimator (which \code{lrm} does compute). The only intercept parameter included in the stored object is the middle intercept. } \item{family, trans}{see \code{\link{orm}}} \item{deviance}{ -2 log likelihoods. When an offset variable is present, three deviances are computed: for intercept(s) only, for intercepts+offset, and for intercepts+offset+predictors. When there is no offset variable, the vector contains deviances for the intercept(s)-only model and the model with intercept(s) and predictors. } \item{non.slopes}{number of intercepts in model} \item{interceptRef}{the index of the middle (median) intercept used in computing the linear predictor and \code{var}} \item{linear.predictors}{the linear predictor using the first intercept} \item{penalty.matrix}{see above} \item{info.matrix}{see \code{\link{orm}}} } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{orm}}, \code{\link{lrm}}, \code{\link{glm}}, \code{\link{gIndex}}, \code{\link[SparseM:SparseM.solve]{solve}} } \examples{ #Fit an additive logistic model containing numeric predictors age, #blood.pressure, and sex, assumed to be already properly coded and #transformed # # fit <- orm.fit(cbind(age,blood.pressure,sex), death) } \keyword{models} \keyword{regression} \concept{logistic regression model} rms/man/predab.resample.Rd0000644000176200001440000002307413714237251015174 0ustar liggesusers\name{predab.resample} \alias{predab.resample} \title{Predictive Ability using Resampling} \description{ \code{predab.resample} is a general-purpose function that is used by functions for specific models. It computes estimates of optimism of, and bias-corrected estimates of a vector of indexes of predictive accuracy, for a model with a specified design matrix, with or without fast backward step-down of predictors. If \code{bw=TRUE}, the design matrix \code{x} must have been created by \code{ols}, \code{lrm}, or \code{cph}. If \code{bw=TRUE}, \code{predab.resample} stores as the \code{kept} attribute a logical matrix encoding which factors were selected at each repetition. } \usage{ predab.resample(fit.orig, fit, measure, method=c("boot","crossvalidation",".632","randomization"), bw=FALSE, B=50, pr=FALSE, prmodsel=TRUE, rule="aic", type="residual", sls=.05, aics=0, tol=1e-12, force=NULL, estimates=TRUE, non.slopes.in.x=TRUE, kint=1, cluster, subset, group=NULL, allow.varying.intercepts=FALSE, debug=FALSE, \dots) } \arguments{ \item{fit.orig}{ object containing the original full-sample fit, with the \code{x=TRUE} and \code{y=TRUE} options specified to the model fitting function. This model should be the FULL model including all candidate variables ever excluded because of poor associations with the response. } \item{fit}{ a function to fit the model, either the original model fit, or a fit in a sample. fit has as arguments \code{x},\code{y}, \code{iter}, \code{penalty}, \code{penalty.matrix}, \code{xcol}, and other arguments passed to \code{predab.resample}. If you don't want \code{iter} as an argument inside the definition of \code{fit}, add \dots to the end of its argument list. \code{iter} is passed to \code{fit} to inform the function of the sampling repetition number (0=original sample). If \code{bw=TRUE}, \code{fit} should allow for the possibility of selecting no predictors, i.e., it should fit an intercept-only model if the model has intercept(s). \code{fit} must return objects \code{coef} and \code{fail} (\code{fail=TRUE} if \code{fit} failed due to singularity or non-convergence - these cases are excluded from summary statistics). \code{fit} must add design attributes to the returned object if \code{bw=TRUE}. The \code{penalty.matrix} parameter is not used if \code{penalty=0}. The \code{xcol} vector is a vector of columns of \code{X} to be used in the current model fit. For \code{ols} and \code{psm} it includes a \code{1} for the intercept position. \code{xcol} is not defined if \code{iter=0} unless the initial fit had been from a backward step-down. \code{xcol} is used to select the correct rows and columns of \code{penalty.matrix} for the current variables selected, for example. } \item{measure}{ a function to compute a vector of indexes of predictive accuracy for a given fit. For \code{method=".632"} or \code{method="crossval"}, it will make the most sense for measure to compute only indexes that are independent of sample size. The measure function should take the following arguments or use \dots: \code{xbeta} (X beta for current fit), \code{y}, \code{evalfit}, \code{fit}, \code{iter}, and \code{fit.orig}. \code{iter} is as in \code{fit}. \code{evalfit} is set to \code{TRUE} by \code{predab.resample} if the fit is being evaluated on the sample used to make the fit, \code{FALSE} otherwise; \code{fit.orig} is the fit object returned by the original fit on the whole sample. Using \code{evalfit} will sometimes save computations. For example, in bootstrapping the area under an ROC curve for a logistic regression model, \code{lrm} already computes the area if the fit is on the training sample. \code{fit.orig} is used to pass computed configuration parameters from the original fit such as quantiles of predicted probabilities that are used as cut points in other samples. The vector created by measure should have \code{names()} associated with it. } \item{method}{ The default is \code{"boot"} for ordinary bootstrapping (Efron, 1983, Eq. 2.10). Use \code{".632"} for Efron's \code{.632} method (Efron, 1983, Section 6 and Eq. 6.10), \code{"crossvalidation"} for grouped cross--validation, \code{"randomization"} for the randomization method. May be abbreviated down to any level, e.g. \code{"b"}, \code{"."}, \code{"cross"}, \code{"rand"}. } \item{bw}{ Set to \code{TRUE} to do fast backward step-down for each training sample. Default is \code{FALSE}. } \item{B}{ Number of repetitions, default=50. For \code{method="crossvalidation"}, this is also the number of groups the original sample is split into. } \item{pr}{ \code{TRUE} to print results for each sample. Default is \code{FALSE}. } \item{prmodsel}{ set to \code{FALSE} to suppress printing of model selection output such as that from \code{\link{fastbw}}.} \item{rule}{ Stopping rule for fastbw, \code{"aic"} or \code{"p"}. Default is \code{"aic"} to use Akaike's information criterion. } \item{type}{ Type of statistic to use in stopping rule for fastbw, \code{"residual"} (the default) or \code{"individual"}. } \item{sls}{ Significance level for stopping in fastbw if \code{rule="p"}. Default is \code{.05}. } \item{aics}{ Stopping criteria for \code{rule="aic"}. Stops deleting factors when chi-square - 2 times d.f. falls below \code{aics}. Default is \code{0}. } \item{tol}{ Tolerance for singularity checking. Is passed to \code{fit} and \code{fastbw}. } \item{force}{see \code{\link{fastbw}}} \item{estimates}{see \code{\link{print.fastbw}}} \item{non.slopes.in.x}{set to \code{FALSE} if the design matrix \code{x} does not have columns for intercepts and these columns are needed} \item{kint}{ For multiple intercept models such as the ordinal logistic model, you may specify which intercept to use as \code{kint}. This affects the linear predictor that is passed to \code{measure}. } \item{cluster}{ Vector containing cluster identifiers. This can be specified only if \code{method="boot"}. If it is present, the bootstrap is done using sampling with replacement from the clusters rather than from the original records. If this vector is not the same length as the number of rows in the data matrix used in the fit, an attempt will be made to use \code{naresid} on \code{fit.orig} to conform \code{cluster} to the data. See \code{bootcov} for more about this. } \item{subset}{ specify a vector of positive or negative integers or a logical vector when you want to have the \code{measure} function compute measures of accuracy on a subset of the data. The whole dataset is still used for all model development. For example, you may want to \code{validate} or \code{calibrate} a model by assessing the predictions on females when the fit was based on males and females. When you use \code{cr.setup} to build extra observations for fitting the continuation ratio ordinal logistic model, you can use \code{subset} to specify which \code{cohort} or observations to use for deriving indexes of predictive accuracy. For example, specify \code{subset=cohort=="all"} to validate the model for the first layer of the continuation ratio model (Prob(Y=0)). } \item{group}{ a grouping variable used to stratify the sample upon bootstrapping. This allows one to handle k-sample problems, i.e., each bootstrap sample will be forced to selected the same number of observations from each level of group as the number appearing in the original dataset. } \item{allow.varying.intercepts}{set to \code{TRUE} to not throw an error if the number of intercepts varies from fit to fit} \item{debug}{set to \code{TRUE} to print subscripts of all training and test samples} \item{\dots}{ The user may add other arguments here that are passed to \code{fit} and \code{measure}. }} \value{ a matrix of class \code{"validate"} with rows corresponding to indexes computed by \code{measure}, and the following columns: \item{index.orig}{ indexes in original overall fit } \item{training}{ average indexes in training samples } \item{test}{ average indexes in test samples } \item{optimism}{ average \code{training-test} except for \code{method=".632"} - is .632 times \code{(index.orig - test)} } \item{index.corrected}{ \code{index.orig-optimism} } \item{n}{ number of successful repetitions with the given index non-missing }. Also contains an attribute \code{keepinfo} if \code{measure} returned such an attribute when run on the original fit. } \details{ For \code{method=".632"}, the program stops with an error if every observation is not omitted at least once from a bootstrap sample. Efron's ".632" method was developed for measures that are formulated in terms on per-observation contributions. In general, error measures (e.g., ROC areas) cannot be written in this way, so this function uses a heuristic extension to Efron's formulation in which it is assumed that the average error measure omitting the \code{i}th observation is the same as the average error measure omitting any other observation. Then weights are derived for each bootstrap repetition and weighted averages over the \code{B} repetitions can easily be computed. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Efron B, Tibshirani R (1997). Improvements on cross-validation: The .632+ bootstrap method. JASA 92:548--560. } \seealso{ \code{\link{rms}}, \code{\link{validate}}, \code{\link{fastbw}}, \code{\link{lrm}}, \code{\link{ols}}, \code{\link{cph}}, \code{\link{bootcov}}, \code{\link{setPb}} } \examples{ # See the code for validate.ols for an example of the use of # predab.resample } \keyword{models} \concept{model validation} \concept{bootstrap} \concept{predictive accuracy} rms/man/predict.lrm.Rd0000644000176200001440000001230313751560342014345 0ustar liggesusers\name{predict.lrm} \alias{predict.lrm} \alias{predict.orm} \alias{Mean.lrm} \alias{Mean.orm} \title{ Predicted Values for Binary and Ordinal Logistic Models } \description{ Computes a variety of types of predicted values for fits from \code{lrm} and \code{orm}, either from the original dataset or for new observations. The \code{Mean.lrm} and \code{Mean.orm} functions produce an R function to compute the predicted mean of a numeric ordered response variable given the linear predictor, which is assumed to use the first intercept when it was computed. The returned function has two optional arguments if confidence intervals are desired: \code{conf.int} and the design matrix \code{X}. When this derived function is called with nonzero \code{conf.int}, an attribute named \code{limits} is attached to the estimated mean. This is a list with elements \code{lower} and \code{upper} containing normal approximations for confidence limits using the delta method. } \usage{ \method{predict}{lrm}(object, \dots, type=c("lp", "fitted", "fitted.ind", "mean", "x", "data.frame", "terms", "cterms", "ccterms", "adjto","adjto.data.frame", "model.frame"), se.fit=FALSE, codes=FALSE) \method{predict}{orm}(object, \dots, type=c("lp", "fitted", "fitted.ind", "mean", "x", "data.frame", "terms", "cterms", "ccterms", "adjto","adjto.data.frame", "model.frame"), se.fit=FALSE, codes=FALSE) \method{Mean}{lrm}(object, codes=FALSE, \dots) \method{Mean}{orm}(object, codes=FALSE, \dots) } \arguments{ \item{object}{a object created by \code{lrm} or \code{orm}} \item{\dots}{ arguments passed to \code{predictrms}, such as \code{kint} and \code{newdata} (which is used if you are predicting \code{out of data}). See \code{predictrms} to see how NAs are handled. Ignored for other functions. } \item{type}{ See \code{predict.rms} for \code{"x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame"} and \code{"model.frame"}. \code{type="lp"} is used to get linear predictors (using the first intercept by default; specify \code{kint} to use others). \code{type="fitted"} is used to get all the probabilities \eqn{Y\geq j}. \code{type="fitted.ind"} gets all the individual probabilities \eqn{Y=j} (not recommended for \code{orm} fits). For an ordinal response variable, \code{type="mean"} computes the estimated mean \eqn{Y} by summing values of \eqn{Y} multiplied by the estimated \eqn{Prob(Y=j)}. If \eqn{Y} was a character or \code{factor} object, the levels are the character values or factor levels, so these must be translatable to numeric, unless \code{codes=TRUE}. See the Hannah and Quigley reference below for the method of estimating (and presenting) the mean score. If you specify \code{type="fitted","fitted.ind","mean"} you may not specify \code{kint}. } \item{se.fit}{ applies only to \code{type="lp"}, to get standard errors. } \item{codes}{ if \code{TRUE}, \code{type="mean"}, \code{Mean.lrm}, and \code{Mean.orm} use the integer codes \eqn{1,2,\ldots,k} for the \eqn{k}-level response in computing the predicted mean response. } } \value{ a vector (\code{type="lp"} with \code{se.fit=FALSE}, or \code{type="mean"} or only one observation being predicted), a list (with elements \code{linear.predictors} and \code{se.fit} if \code{se.fit=TRUE}), a matrix (\code{type="fitted"} or \code{type="fitted.ind"}), a data frame, or a design matrix. For \code{Mean.lrm} and \code{Mean.orm}, the result is an R function. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com\cr For the \code{Quantile} function:\cr Qi Liu and Shengxin Tu\cr Department of Biostatistics, Vanderbilt University } \references{ Hannah M, Quigley P: Presentation of ordinal regression analysis on the original scale. Biometrics 52:771--5; 1996. } \seealso{ \code{\link{lrm}}, \code{\link{orm}}, \code{\link{predict.rms}}, \code{\link{naresid}}, \code{\link{contrast.rms}} } \examples{ # See help for predict.rms for several binary logistic # regression examples # Examples of predictions from ordinal models set.seed(1) y <- factor(sample(1:3, 400, TRUE), 1:3, c('good','better','best')) x1 <- runif(400) x2 <- runif(400) f <- lrm(y ~ rcs(x1,4)*x2, x=TRUE) #x=TRUE needed for se.fit # Get 0.95 confidence limits for Prob[better or best] L <- predict(f, se.fit=TRUE) #omitted kint= so use 1st intercept plogis(with(L, linear.predictors + 1.96*cbind(-se.fit,se.fit))) predict(f, type="fitted.ind")[1:10,] #gets Prob(better) and all others d <- data.frame(x1=c(.1,.5),x2=c(.5,.15)) predict(f, d, type="fitted") # Prob(Y>=j) for new observation predict(f, d, type="fitted.ind") # Prob(Y=j) predict(f, d, type='mean', codes=TRUE) # predicts mean(y) using codes 1,2,3 m <- Mean(f, codes=TRUE) lp <- predict(f, d) m(lp) # Can use function m as an argument to Predict or nomogram to # get predicted means instead of log odds or probabilities dd <- datadist(x1,x2); options(datadist='dd') m plot(Predict(f, x1, fun=m), ylab='Predicted Mean') # Note: Run f through bootcov with coef.reps=TRUE to get proper confidence # limits for predicted means from the prop. odds model options(datadist=NULL) } \keyword{models} \keyword{regression} \concept{logistic regression model} rms/man/nomogram.Rd0000644000176200001440000005371613717762165013767 0ustar liggesusers\name{nomogram} \alias{nomogram} \alias{print.nomogram} \alias{plot.nomogram} \alias{legend.nomabbrev} \title{Draw a Nomogram Representing a Regression Fit} \description{ Draws a partial nomogram that can be used to manually obtain predicted values from a regression model that was fitted with \code{rms}. The nomogram does not have lines representing sums, but it has a reference line for reading scoring points (default range 0--100). Once the reader manually totals the points, the predicted values can be read at the bottom. Non-monotonic transformations of continuous variables are handled (scales wrap around), as are transformations which have flat sections (tick marks are labeled with ranges). If interactions are in the model, one variable is picked as the \dQuote{axis variable}, and separate axes are constructed for each level of the interacting factors (preference is given automatically to using any discrete factors to construct separate axes) and levels of factors which are indirectly related to interacting factors (see DETAILS). Thus the nomogram is designed so that only one axis is actually read for each variable, since the variable combinations are disjoint. For categorical interacting factors, the default is to construct axes for all levels. The user may specify coordinates of each predictor to label on its axis, or use default values. If a factor interacts with other factors, settings for one or more of the interacting factors may be specified separately (this is mandatory for continuous variables). Optional confidence intervals will be drawn for individual scores as well as for the linear predictor. If more than one confidence level is chosen, multiple levels may be displayed using different colors or gray scales. Functions of the linear predictors may be added to the nomogram. The \code{\link{datadist}} object that was in effect when the model was fit is used to specify the limits of the axis for continuous predictors when the user does not specify tick mark locations in the \code{nomogram} call. \code{print.nomogram} prints axis information stored in an object returned by \code{nomogram}. This is useful in producing tables of point assignments by levels of predictors. It also prints how many linear predictor units there are per point and the number of points per unit change in the linear predictor. \code{legend.nomabbrev} draws legends describing abbreviations used for labeling tick marks for levels of categorical predictors. } \usage{ nomogram(fit, ..., adj.to, lp=TRUE, lp.at=NULL, fun=NULL, fun.at=NULL, fun.lp.at=NULL, funlabel="Predicted Value", interact=NULL, kint=NULL, conf.int=FALSE, conf.lp=c("representative", "all", "none"), est.all=TRUE, posterior.summary=c('mean', 'median', 'mode'), abbrev=FALSE, minlength=4, maxscale=100, nint=10, vnames=c("labels","names"), varname.label=TRUE, varname.label.sep="=", omit=NULL, verbose=FALSE) \method{print}{nomogram}(x, dec=0, \dots) \method{plot}{nomogram}(x, lplabel="Linear Predictor", fun.side, col.conf=c(1, 0.3), conf.space=c(.08,.2), label.every=1, force.label=FALSE, xfrac=.35, cex.axis=.85, cex.var=1, col.grid=NULL, varname.label=TRUE, varname.label.sep="=", ia.space=.7, tck=NA, tcl=-0.25, lmgp=.4, naxes, points.label='Points', total.points.label='Total Points', total.sep.page=FALSE, total.fun, cap.labels=FALSE, ...) legend.nomabbrev(object, which, x, y, ncol=3, \dots) } \arguments{ \item{fit}{ a regression model fit that was created with \code{rms}, and (usually) with \code{options(datadist = "object.name")} in effect. } \item{\dots}{ settings of variables to use in constructing axes. If \code{datadist} was in effect, the default is to use \code{pretty(total range, nint)} for continuous variables, and the class levels for discrete ones. For \code{legend.nomabbrev}, \code{\dots} specifies optional parameters to pass to \code{legend}. Common ones are \code{bty = "n"} to suppress drawing the box. You may want to specify a non-proportionally spaced font (e.g., courier) number if abbreviations are more than one letter long. This will make the abbreviation definitions line up (e.g., specify \code{font = 2}, the default for courier). Ignored for \code{print} and \code{plot}. } \item{adj.to}{ If you didn't define \code{datadist} for all predictors, you will have to define adjustment settings for the undefined ones, e.g. \code{adj.to= list(age = 50, sex = "female")}. } \item{lp}{ Set to \code{FALSE} to suppress creation of an axis for scoring \eqn{X\beta}{X beta}. } \item{lp.at}{ If \code{lp=TRUE}, \code{lp.at} may specify a vector of settings of \eqn{X\beta}{X beta}. Default is to use \code{pretty(range of linear predictors, nint)}. } \item{fun}{ an optional function to transform the linear predictors, and to plot on another axis. If more than one transformation is plotted, put them in a list, e.g. \code{list(function(x) x/2, function(x) 2*x)}. Any function values equal to \code{NA} will be ignored. } \item{fun.at}{ function values to label on axis. Default \code{fun} evaluated at \code{lp.at}. If more than one \code{fun} was specified, using a vector for \code{fun.at} will cause all functions to be evaluated at the same argument values. To use different values, specify a list of vectors for \code{fun.at}, with elements corresponding to the different functions (lists of vectors also applies to \code{fun.lp.at} and \code{fun.side}). } \item{fun.lp.at}{ If you want to evaluate one of the functions at a different set of linear predictor values than may have been used in constructing the linear predictor axis, specify a vector or list of vectors of linear predictor values at which to evaluate the function. This is especially useful for discrete functions. The presence of this attribute also does away with the need for \code{nomogram} to compute numerical approximations of the inverse of the function. It also allows the user-supplied function to return \code{factor} objects, which is useful when e.g. a single tick mark position actually represents a range. If the \code{fun.lp.at} parameter is present, the \code{fun.at} vector for that function is ignored. } \item{funlabel}{ label for \code{fun} axis. If more than one function was given but funlabel is of length one, it will be duplicated as needed. If \code{fun} is a list of functions for which you specified names (see the final example below), these names will be used as labels. } \item{interact}{ When a continuous variable interacts with a discrete one, axes are constructed so that the continuous variable moves within the axis, and separate axes represent levels of interacting factors. For interactions between two continuous variables, all but the axis variable must have discrete levels defined in \code{interact}. For discrete interacting factors, you may specify levels to use in constructing the multiple axes. For continuous interacting factors, you must do this. Examples: \code{interact = list(age = seq(10,70,by=10), treat = c("A","B","D"))}. } \item{kint}{ for models such as the ordinal models with multiple intercepts, specifies which one to use in evaluating the linear predictor. Default is to use \code{fit$interceptRef} if it exists, or 1. } \item{conf.int}{ confidence levels to display for each scoring. Default is \code{FALSE} to display no confidence limits. Setting \code{conf.int} to \code{TRUE} is the same as setting it to \code{c(0.7, 0.9)}, with the line segment between the 0.7 and 0.9 levels shaded using gray scale. } \item{conf.lp}{ default is \code{"representative"} to group all linear predictors evaluated into deciles, and to show, for the linear predictor confidence intervals, only the mean linear predictor within the deciles along with the median standard error within the deciles. Set \code{conf.lp = "none"} to suppress confidence limits for the linear predictors, and to \code{"all"} to show all confidence limits. } \item{est.all}{ To plot axes for only the subset of variables named in \code{\dots}, set \code{est.all = FALSE}. Note: This option only works when zero has a special meaning for the variables that are omitted from the graph. } \item{posterior.summary}{when operating on a Bayesian model such as a result of \code{blrm} specifies whether to use posterior mean (default) vs. posterior mode/median of parameter values in constructing the nomogram} \item{abbrev}{ Set to \code{TRUE} to use the \code{\link{abbreviate}} function to abbreviate levels of categorical factors, both for labeling tick marks and for axis titles. If you only want to abbreviate certain predictor variables, set \code{abbrev} to a vector of character strings containing their names. } \item{minlength}{ applies if \code{abbrev = TRUE}. Is the minimum abbreviation length passed to the \code{\link{abbreviate}} function. If you set \code{minlength = 1}, the letters of the alphabet are used to label tick marks for categorical predictors, and all letters are drawn no matter how close together they are. For labeling axes (interaction settings), \code{minlength = 1} causes \code{minlength = 4} to be used. } \item{maxscale}{ default maximum point score is 100 } \item{nint}{ number of intervals to label for axes representing continuous variables. See \code{\link{pretty}}. } \item{vnames}{ By default, variable labels are used to label axes. Set \code{vnames = "names"} to instead use variable names. } \item{omit}{ vector of character strings containing names of variables for which to suppress drawing axes. Default is to show all variables. } \item{verbose}{ set to \code{TRUE} to get printed output detailing how tick marks are chosen and labeled for function axes. This is useful in seeing how certain linear predictor values cannot be solved for using inverse linear interpolation on the (requested linear predictor values, function values at these lp values). When this happens you will see \code{NA}s in the verbose output, and the corresponding tick marks will not appear in the nomogram. } \item{x}{an object created by \code{nomogram}, or the x coordinate for a legend} \item{dec}{ number of digits to the right of the decimal point, for rounding point scores in \code{print.nomogram}. Default is to round to the nearest whole number of points. } \item{lplabel}{ label for linear predictor axis. Default is \code{"Linear Predictor"}. } \item{fun.side}{ a vector or list of vectors of \code{side} parameters for the \code{axis} function for labeling function values. Values may be 1 to position a tick mark label below the axis (the default), or 3 for above the axis. If for example an axis has 5 tick mark labels and the second and third will run into each other, specify \code{fun.side=c(1,1,3,1,1)} (assuming only one function is specified as \code{fun}). } \item{col.conf}{ colors corresponding to \code{conf.int}. } \item{conf.space}{ a 2-element vector with the vertical range within which to draw confidence bars, in units of 1=spacing between main bars. Four heights are used within this range (8 for the linear predictor if more than 16 unique values were evaluated), cycling them among separate confidence intervals to reduce overlapping. } \item{label.every}{ Specify \code{label.every = i} to label on every \code{i}th tick mark. } \item{force.label}{ set to \code{TRUE} to force every tick mark intended to be labeled to have a label plotted (whether the labels run into each other or not) } \item{xfrac}{ fraction of horizontal plot to set aside for axis titles } \item{cex.axis}{ character size for tick mark labels } \item{cex.var}{ character size for axis titles (variable names) } \item{col.grid}{ If left unspecified, no vertical reference lines are drawn. Specify a vector of length one (to use the same color for both minor and major reference lines) or two (corresponding to the color for the major and minor divisions, respectively) containing colors, to cause vertical reference lines to the top points scale to be drawn. For R, a good choice is \code{col.grid = gray(c(0.8, 0.95))}. } \item{varname.label}{ In constructing axis titles for interactions, the default is to add \code{(interacting.varname = level)} on the right. Specify \code{varname.label = FALSE} to instead use \code{"(level)"}. } \item{varname.label.sep}{ If \code{varname.label = TRUE}, you can change the separator to something other than \code{=} by specifying this parameter. } \item{ia.space}{ When multiple axes are draw for levels of interacting factors, the default is to group combinations related to a main effect. This is done by spacing the axes for the second to last of these within a group only 0.7 (by default) of the way down as compared with normal space of 1 unit. } \item{tck}{ see \code{tck} under \code{\link{par}} } \item{tcl}{length of tick marks in nomogram} \item{lmgp}{ spacing between numeric axis labels and axis (see \code{\link{par}} for \code{mgp}) } \item{naxes}{ maximum number of axes to allow on one plot. If the nomogram requires more than one \dQuote{page}, the \dQuote{Points} axis will be repeated at the top of each page when necessary. } \item{points.label}{ a character string giving the axis label for the points scale } \item{total.points.label}{ a character string giving the axis label for the total points scale } \item{total.sep.page}{ set to \code{TRUE} to force the total points and later axes to be placed on a separate page } \item{total.fun}{ a user-provided function that will be executed before the total points axis is drawn. Default is not to execute a function. This is useful e.g. when \code{total.sep.page = TRUE} and you wish to use \code{locator} to find the coordinates for positioning an abbreviation legend before it's too late and a new page is started (i.e., \code{total.fun = function() print(locator(1))}). } \item{cap.labels}{logical: should the factor labels have their first letter capitalized?} \item{object}{ the result returned from \code{nomogram} } \item{which}{ a character string giving the name of a variable for which to draw a legend with abbreviations of factor levels } \item{y}{ y-coordinate to pass to the \code{legend} function. This is the upper left corner of the legend box. You can omit \code{y} if \code{x} is a list with named elements \code{x} and \code{y}. To use the mouse to locate the legend, specify \code{locator(1)} for \code{x}. For \code{print}, \code{x} is the result of \code{nomogram}. } \item{ncol}{ the number of columns to form in drawing the legend. } } \value{ a list of class \code{"nomogram"} that contains information used in plotting the axes. If you specified \code{abbrev = TRUE}, a list called \code{abbrev} is also returned that gives the abbreviations used for tick mark labels, if any. This list is useful for making legends and is used by \code{legend.nomabbrev} (see the last example). The returned list also has components called \code{total.points}, \code{lp}, and the function axis names. These components have components \code{x} (\code{at} argument vector given to \code{axis}), \code{y} (\code{pos} for \code{axis}), and \code{x.real}, the x-coordinates appearing on tick mark labels. An often useful result is stored in the list of data for each axis variable, namely the exact number of points that correspond to each tick mark on that variable's axis. } \details{ A variable is considered to be discrete if it is categorical or ordered or if \code{\link{datadist}} stored \code{values} for it (meaning it had \code{<11} unique values). A variable is said to be indirectly related to another variable if the two are related by some interaction. For example, if a model has variables a, b, c, d, and the interactions are a:c and c:d, variable d is indirectly related to variable a. The complete list of variables related to a is c, d. If an axis is made for variable a, several axes will actually be drawn, one for each combination of c and d specified in \code{interact}. Note that with a caliper, it is easy to continually add point scores for individual predictors, and then to place the caliper on the upper \dQuote{Points} axis (with extrapolation if needed). Then transfer these points to the \dQuote{Total Points} axis. In this way, points can be added without writing them down. Confidence limits for an individual predictor score are really confidence limits for the entire linear predictor, with other predictors set to adjustment values. If \code{lp = TRUE}, all confidence bars for all linear predictor values evaluated are drawn. The extent to which multiple confidence bars of differing widths appear at the same linear predictor value means that precision depended on how the linear predictor was arrived at (e.g., a certain value may be realized from a setting of a certain predictor that was associated with a large standard error on the regression coefficients for that predictor). On occasion, you may want to reverse the regression coefficients of a model to make the \dQuote{points} scales reverse direction. For parametric survival models, which are stated in terms of increasing regression effects meaning longer survival (the opposite of a Cox model), just do something like \code{fit$coefficients <- -fit$coefficients} before invoking \code{nomogram}, and if you add function axes, negate the function arguments. For the Cox model, you also need to negate \code{fit$center}. If you omit \code{lp.at}, also negate \code{fit$linear.predictors}. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \references{ Banks J: Nomograms. Encylopedia of Statistical Sciences, Vol 6. Editors: S Kotz and NL Johnson. New York: Wiley; 1985. Lubsen J, Pool J, van der Does, E: A practical device for the application of a diagnostic or prognostic function. Meth. Inform. Med. 17:127--129; 1978. Wikipedia: Nomogram, \url{https://en.wikipedia.org/wiki/Nomogram}. } \seealso{ \code{\link{rms}}, \code{\link{plot.Predict}}, \code{\link{ggplot.Predict}}, \code{\link{plot.summary.rms}}, \code{\link{axis}}, \code{\link{pretty}}, \code{\link{approx}}, \code{\link{latexrms}}, \code{\link{rmsMisc}} } \examples{ n <- 1000 # define sample size set.seed(17) # so can reproduce the results d <- data.frame(age = rnorm(n, 50, 10), blood.pressure = rnorm(n, 120, 15), cholesterol = rnorm(n, 200, 25), sex = factor(sample(c('female','male'), n,TRUE))) # Specify population model for log odds that Y=1 # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] d <- upData(d, L = .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')), y = ifelse(runif(n) < plogis(L), 1, 0)) ddist <- datadist(d); options(datadist='ddist') f <- lrm(y ~ lsp(age,50) + sex * rcs(cholesterol, 4) + blood.pressure, data=d) nom <- nomogram(f, fun=function(x)1/(1+exp(-x)), # or fun=plogis fun.at=c(.001,.01,.05,seq(.1,.9,by=.1),.95,.99,.999), funlabel="Risk of Death") #Instead of fun.at, could have specified fun.lp.at=logit of #sequence above - faster and slightly more accurate plot(nom, xfrac=.45) print(nom) nom <- nomogram(f, age=seq(10,90,by=10)) plot(nom, xfrac=.45) g <- lrm(y ~ sex + rcs(age, 3) * rcs(cholesterol, 3), data=d) nom <- nomogram(g, interact=list(age=c(20,40,60)), conf.int=c(.7,.9,.95)) plot(nom, col.conf=c(1,.5,.2), naxes=7) w <- upData(d, cens = 15 * runif(n), h = .02 * exp(.04 * (age - 50) + .8 * (sex == 'Female')), d.time = -log(runif(n)) / h, death = ifelse(d.time <= cens, 1, 0), d.time = pmin(d.time, cens)) f <- psm(Surv(d.time,death) ~ sex * age, data=w, dist='lognormal') med <- Quantile(f) surv <- Survival(f) # This would also work if f was from cph plot(nomogram(f, fun=function(x) med(lp=x), funlabel="Median Survival Time")) nom <- nomogram(f, fun=list(function(x) surv(3, x), function(x) surv(6, x)), funlabel=c("3-Month Survival Probability", "6-month Survival Probability")) plot(nom, xfrac=.7) \dontrun{ nom <- nomogram(fit.with.categorical.predictors, abbrev=TRUE, minlength=1) nom$x1$points # print points assigned to each level of x1 for its axis #Add legend for abbreviations for category levels abb <- attr(nom, 'info')$abbrev$treatment legend(locator(1), abb$full, pch=paste(abb$abbrev,collapse=''), ncol=2, bty='n') # this only works for 1-letter abbreviations #Or use the legend.nomabbrev function: legend.nomabbrev(nom, 'treatment', locator(1), ncol=2, bty='n') } #Make a nomogram with axes predicting probabilities Y>=j for all j=1-3 #in an ordinal logistic model, where Y=0,1,2,3 w <- upData(w, Y = ifelse(y==0, 0, sample(1:3, length(y), TRUE))) g <- lrm(Y ~ age+rcs(cholesterol,4) * sex, data=w) fun2 <- function(x) plogis(x-g$coef[1]+g$coef[2]) fun3 <- function(x) plogis(x-g$coef[1]+g$coef[3]) f <- Newlabels(g, c(age='Age in Years')) #see Design.Misc, which also has Newlevels to change #labels for levels of categorical variables g <- nomogram(f, fun=list('Prob Y>=1'=plogis, 'Prob Y>=2'=fun2, 'Prob Y=3'=fun3), fun.at=c(.01,.05,seq(.1,.9,by=.1),.95,.99)) plot(g, lmgp=.2, cex.axis=.6) options(datadist=NULL) } \keyword{models} \keyword{regression} \keyword{hplot} rms/man/gIndex.Rd0000644000176200001440000001732113714237251013344 0ustar liggesusers\name{gIndex} \alias{gIndex} \alias{print.gIndex} \alias{plot.gIndex} \title{Calculate Total and Partial g-indexes for an rms Fit} \description{ \code{gIndex} computes the total \eqn{g}-index for a model based on the vector of linear predictors, and the partial \eqn{g}-index for each predictor in a model. The latter is computed by summing all the terms involving each variable, weighted by their regression coefficients, then computing Gini's mean difference on this sum. For example, a regression model having age and sex and age*sex on the right hand side, with corresponding regression coefficients \eqn{b_{1}, b_{2}, b_{3}}{b1, b2, b3} will have the \eqn{g}-index for age computed from Gini's mean difference on the product of age \eqn{\times (b_{1} + b_{3}w)}{times (b1 + b3*w)} where \eqn{w} is an indicator set to one for observations with sex not equal to the reference value. When there are nonlinear terms associated with a predictor, these terms will also be combined. A \code{print} method is defined, and there is a \code{plot} method for displaying \eqn{g}-indexes using a dot chart. These functions use \code{Hmisc::GiniMd}. } \usage{ gIndex(object, partials=TRUE, type=c('ccterms', 'cterms', 'terms'), lplabel=if(length(object$scale) && is.character(object$scale)) object$scale[1] else 'X*Beta', fun, funlabel=if(missing(fun)) character(0) else deparse(substitute(fun)), postfun=if(length(object$scale)==2) exp else NULL, postlabel=if(length(postfun)) ifelse(missing(postfun), if((length(object$scale) > 1) && is.character(object$scale)) object$scale[2] else 'Anti-log', deparse(substitute(postfun))) else character(0), \dots) \method{print}{gIndex}(x, digits=4, abbrev=FALSE, vnames=c("names","labels"), \dots) \method{plot}{gIndex}(x, what=c('pre', 'post'), xlab=NULL, pch=16, rm.totals=FALSE, sort=c('descending', 'ascending', 'none'), \dots) } \arguments{ \item{object}{result of an \code{rms} fitting function} \item{partials}{set to \code{FALSE} to suppress computation of partial \eqn{g}s} \item{type}{defaults to \code{'ccterms'} which causes partial discrimination indexes to be computed after maximally combining all related main effects and interactions. The is usually the only way that makes sense when considering partial linear predictors. Specify \code{type='cterms'} to only combine a main effect with interactions containing it, not also with other main effects connected through interactions. Use \code{type='terms'} to separate interactions into their own effects.} \item{lplabel}{a replacement for default values such as \code{"X*Beta"} or \code{"log odds"}/} \item{fun}{an optional function to transform the linear predictors before computing the total (only) \eqn{g}. When this is present, a new component \code{gtrans} is added to the attributes of the object resulting from \code{gIndex}.} \item{funlabel}{a character string label for \code{fun}, otherwise taken from the function name itself} \item{postfun}{a function to transform \eqn{g} such as \code{exp} (anti-log), which is the default for certain models such as the logistic and Cox models} \item{postlabel}{a label for \code{postfun}} \item{\dots}{ For \code{gIndex}, passed to \code{predict.rms}. Ignored for \code{print}. Passed to \code{\link[Hmisc]{dotchart2}} for \code{plot}. } \item{x}{ an object created by \code{gIndex} (for \code{print} or \code{plot}) } \item{digits}{causes rounding to the \code{digits} decimal place} \item{abbrev}{set to \code{TRUE} to abbreviate labels if \code{vname="labels"}} \item{vnames}{set to \code{"labels"} to print predictor labels instead of names} \item{what}{set to \code{"post"} to plot the transformed \eqn{g}-index if there is one (e.g., ratio scale)} \item{xlab}{\eqn{x}-axis label; constructed by default} \item{pch}{plotting character for point} \item{rm.totals}{set to \code{TRUE} to remove the total \eqn{g}-index when plotting} \item{sort}{specifies how to sort predictors by \eqn{g}-index; default is in descending order going down the dot chart} } \details{ For stratification factors in a Cox proportional hazards model, there is no contribution of variation towards computing a partial \eqn{g} except from terms that interact with the stratification variable. } \value{ \code{gIndex} returns a matrix of class \code{"gIndex"} with auxiliary information stored as attributes, such as variable labels. \code{GiniMd} returns a scalar. } \references{ David HA (1968): Gini's mean difference rediscovered. Biometrika 55:573--575. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \seealso{\code{\link{predict.rms}},\code{\link[Hmisc]{GiniMd}}} \examples{ set.seed(1) n <- 40 x <- 1:n w <- factor(sample(c('a','b'), n, TRUE)) u <- factor(sample(c('A','B'), n, TRUE)) y <- .01*x + .2*(w=='b') + .3*(u=='B') + .2*(w=='b' & u=='B') + rnorm(n)/5 dd <- datadist(x,w,u); options(datadist='dd') f <- ols(y ~ x*w*u, x=TRUE, y=TRUE) f anova(f) z <- list() for(type in c('terms','cterms','ccterms')) { zc <- predict(f, type=type) cat('type:', type, '\n') print(zc) z[[type]] <- zc } zc <- z$cterms GiniMd(zc[, 1]) GiniMd(zc[, 2]) GiniMd(zc[, 3]) GiniMd(f$linear.predictors) g <- gIndex(f) g g['Total',] gIndex(f, partials=FALSE) gIndex(f, type='cterms') gIndex(f, type='terms') y <- y > .8 f <- lrm(y ~ x * w * u, x=TRUE, y=TRUE) gIndex(f, fun=plogis, funlabel='Prob[y=1]') # Manual calculation of combined main effect + interaction effort of # sex in a 2x2 design with treatments A B, sexes F M, # model -.1 + .3*(treat=='B') + .5*(sex=='M') + .4*(treat=='B' & sex=='M') set.seed(1) X <- expand.grid(treat=c('A','B'), sex=c('F', 'M')) a <- 3; b <- 7; c <- 13; d <- 5 X <- rbind(X[rep(1, a),], X[rep(2, b),], X[rep(3, c),], X[rep(4, d),]) y <- with(X, -.1 + .3*(treat=='B') + .5*(sex=='M') + .4*(treat=='B' & sex=='M')) f <- ols(y ~ treat*sex, data=X, x=TRUE) gIndex(f, type='cterms') k <- coef(f) b1 <- k[2]; b2 <- k[3]; b3 <- k[4] n <- nrow(X) ( (a+b)*c*abs(b2) + (a+b)*d*abs(b2+b3) + c*d*abs(b3))/(n*(n-1)/2 ) # Manual calculation for combined age effect in a model with sex, # age, and age*sex interaction a <- 13; b <- 7 sex <- c(rep('female',a), rep('male',b)) agef <- round(runif(a, 20, 30)) agem <- round(runif(b, 20, 40)) age <- c(agef, agem) y <- (sex=='male') + age/10 - (sex=='male')*age/20 f <- ols(y ~ sex*age, x=TRUE) f gIndex(f, type='cterms') k <- coef(f) b1 <- k[2]; b2 <- k[3]; b3 <- k[4] n <- a + b sp <- function(w, z=w) sum(outer(w, z, function(u, v) abs(u-v))) ( abs(b2)*sp(agef) + abs(b2+b3)*sp(agem) + 2*sp(b2*agef, (b2+b3)*agem) ) / (n*(n-1)) ( abs(b2)*GiniMd(agef)*a*(a-1) + abs(b2+b3)*GiniMd(agem)*b*(b-1) + 2*sp(b2*agef, (b2+b3)*agem) ) / (n*(n-1)) \dontrun{ # Compare partial and total g-indexes over many random fits plot(NA, NA, xlim=c(0,3), ylim=c(0,3), xlab='Global', ylab='x1 (black) x2 (red) x3 (green) x4 (blue)') abline(a=0, b=1, col=gray(.9)) big <- integer(3) n <- 50 # try with n=7 - see lots of exceptions esp. for interacting var for(i in 1:100) { x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) x4 <- runif(n) y <- x1 + x2 + x3 + x4 + 2*runif(n) f <- ols(y ~ x1*x2+x3+x4, x=TRUE) # f <- ols(y ~ x1+x2+x3+x4, x=TRUE) # also try this w <- gIndex(f)[,1] gt <- w['Total'] points(gt, w['x1, x2']) points(gt, w['x3'], col='green') points(gt, w['x4'], col='blue') big[1] <- big[1] + (w['x1, x2'] > gt) big[2] <- big[2] + (w['x3'] > gt) big[3] <- big[3] + (w['x4'] > gt) } print(big) } options(datadist=NULL) } \keyword{predictive accuracy} \keyword{robust} \keyword{univar} rms/man/val.surv.Rd0000644000176200001440000002522613714237251013711 0ustar liggesusers\name{val.surv} \alias{val.surv} \alias{plot.val.surv} \alias{plot.val.survh} \alias{print.val.survh} \title{ Validate Predicted Probabilities Against Observed Survival Times } \description{ The \code{val.surv} function is useful for validating predicted survival probabilities against right-censored failure times. If \code{u} is specified, the hazard regression function \code{hare} in the \code{polspline} package is used to relate predicted survival probability at time \code{u} to observed survival times (and censoring indicators) to estimate the actual survival probability at time \code{u} as a function of the estimated survival probability at that time, \code{est.surv}. If \code{est.surv} is not given, \code{fit} must be specified and the \code{survest} function is used to obtain the predicted values (using \code{newdata} if it is given, or using the stored linear predictor values if not). \code{hare} is given the sole predictor \code{fun(est.surv)} where \code{fun} is given by the user or is inferred from \code{fit}. \code{fun} is the function of predicted survival probabilities that one expects to create a linear relationship with the linear predictors. \code{hare} uses an adaptive procedure to find a linear spline of \code{fun(est.surv)} in a model where the log hazard is a linear spline in time \eqn{t}, and cross-products between the two splines are allowed so as to not assume proportional hazards. Thus \code{hare} assumes that the covariate and time functions are smooth but not much else, if the number of events in the dataset is large enough for obtaining a reliable flexible fit. There are special \code{print} and \code{plot} methods when \code{u} is given. In this case, \code{val.surv} returns an object of class \code{"val.survh"}, otherwise it returns an object of class \code{"val.surv"}. If \code{u} is not specified, \code{val.surv} uses Cox-Snell (1968) residuals on the cumulative probability scale to check on the calibration of a survival model against right-censored failure time data. If the predicted survival probability at time \eqn{t} for a subject having predictors \eqn{X} is \eqn{S(t|X)}, this method is based on the fact that the predicted probability of failure before time \eqn{t}, \eqn{1 - S(t|X)}, when evaluated at the subject's actual survival time \eqn{T}, has a uniform (0,1) distribution. The quantity \eqn{1 - S(T|X)} is right-censored when \eqn{T} is. By getting one minus the Kaplan-Meier estimate of the distribution of \eqn{1 - S(T|X)} and plotting against the 45 degree line we can check for calibration accuracy. A more stringent assessment can be obtained by stratifying this analysis by an important predictor variable. The theoretical uniform distribution is only an approximation when the survival probabilities are estimates and not population values. When \code{censor} is specified to \code{val.surv}, a different validation is done that is more stringent but that only uses the uncensored failure times. This method is used for type I censoring when the theoretical censoring times are known for subjects having uncensored failure times. Let \eqn{T}, \eqn{C}, and \eqn{F} denote respectively the failure time, censoring time, and cumulative failure time distribution (\eqn{1 - S}). The expected value of \eqn{F(T | X)} is 0.5 when \eqn{T} represents the subject's actual failure time. The expected value for an uncensored time is the expected value of \eqn{F(T | T \leq C, X) = 0.5 F(C | X)}. A smooth plot of \eqn{F(T|X) - 0.5 F(C|X)} for uncensored \eqn{T} should be a flat line through \eqn{y=0} if the model is well calibrated. A smooth plot of \eqn{2F(T|X)/F(C|X)} for uncensored \eqn{T} should be a flat line through \eqn{y=1.0}. The smooth plot is obtained by smoothing the (linear predictor, difference or ratio) pairs. } \usage{ val.surv(fit, newdata, S, est.surv, censor, u, fun, lim, evaluate=100, pred, maxdim=5, ...) \method{print}{val.survh}(x, ...) \method{plot}{val.survh}(x, lim, xlab, ylab, riskdist=TRUE, add=FALSE, scat1d.opts=list(nhistSpike=200), ...) \method{plot}{val.surv}(x, group, g.group=4, what=c('difference','ratio'), type=c('l','b','p'), xlab, ylab, xlim, ylim, datadensity=TRUE, \dots) } \arguments{ \item{fit}{a fit object created by \code{cph} or \code{psm}} \item{newdata}{ a data frame for which \code{val.surv} should obtain predicted survival probabilities. If omitted, survival estimates are made for all of the subjects used in \code{fit}. } \item{S}{an \code{\link[survival]{Surv}} object} \item{est.surv}{ a vector of estimated survival probabilities corresponding to times in the first column of \code{S}. } \item{censor}{ a vector of censoring times. Only the censoring times for uncensored observations are used. } \item{u}{a single numeric follow-up time} \item{fun}{a function that transforms survival probabilities into the scale of the linear predictor. If \code{fit} is given, and represents either a Cox, Weibull, or exponential fit, \code{fun} is automatically set to log(-log(p)).} \item{lim}{a 2-vector specifying limits of predicted survival probabilities for obtaining estimated actual probabilities at time \code{u}. Default for \code{val.surv} is the limits for predictions from \code{datadist}, which for large \eqn{n} is the 10th smallest and 10th largest predicted survival probability. For \code{plot.val.survh}, the default for \code{lim} is the range of the combination of predicted probabilities and calibrated actual probabilities. \code{lim} is used for both axes of the calibration plot.} \item{evaluate}{the number of evenly spaced points over the range of predicted probabilities. This defines the points at which calibrated predictions are obtained for plotting.} \item{pred}{a vector of points at which to evaluate predicted probabilities, overriding \code{lim}} \item{maxdim}{see \code{\link[polspline]{hare}}} \item{x}{result of \code{val.surv}} \item{xlab}{x-axis label. For \code{plot.survh}, defaults for \code{xlab} and \code{ylab} come from \code{u} and the units of measurement for the raw survival times.} \item{ylab}{y-axis label} \item{riskdist}{set to \code{FALSE} to not call \code{scat1d} to draw the distribution of predicted (uncalibrated) probabilities} \item{add}{set to \code{TRUE} if adding to an existing plot} \item{scat1d.opts}{a \code{list} of options to pass to \code{scat1d}. By default, the option \code{nhistSpike=200} is passed so that a spike histogram is used if the sample size exceeds 200.} \item{\dots}{When \code{u} is given to \code{val.surv}, \dots represents optional arguments to \code{hare}. It can represent arguments to pass to \code{plot} or \code{lines} for \code{plot.val.survh}. Otherwise, \dots contains optional arguments for \code{plsmo} or \code{plot}. For \code{print.val.survh}, \dots is ignored.} \item{group}{ a grouping variable. If numeric this variable is grouped into \code{g.group} quantile groups (default is quartiles). \code{group}, \code{g.group}, \code{what}, and \code{type} apply when \code{u} is not given.} \item{g.group}{ number of quantile groups to use when \code{group} is given and variable is numeric. } \item{what}{ the quantity to plot when \code{censor} was in effect. The default is to show the difference between cumulative probabilities and their expectation given the censoring time. Set \code{what="ratio"} to show the ratio instead. } \item{type}{ Set to the default (\code{"l"}) to plot the trend line only, \code{"b"} to plot both individual subjects ratios and trend lines, or \code{"p"} to plot only points. } \item{xlim}{ } \item{ylim}{ axis limits for \code{plot.val.surv} when the \code{censor} variable was used. } \item{datadensity}{ By default, \code{plot.val.surv} will show the data density on each curve that is created as a result of \code{censor} being present. Set \code{datadensity=FALSE} to suppress these tick marks drawn by \code{scat1d}. } } \value{a list of class \code{"val.surv"} or \code{"val.survh"}} \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Cox DR, Snell EJ (1968):A general definition of residuals (with discussion). JRSSB 30:248--275. Kooperberg C, Stone C, Truong Y (1995): Hazard regression. JASA 90:78--94. May M, Royston P, Egger M, Justice AC, Sterne JAC (2004):Development and validation of a prognostic model for survival time data: application to prognosis of HIV positive patients treated with antiretroviral therapy. Stat in Med 23:2375--2398. Stallard N (2009): Simple tests for th external validation of mortality prediction scores. Stat in Med 28:377--388. } \seealso{ \code{\link{validate}}, \code{\link{calibrate}}, \code{\link[polspline]{hare}}, \code{\link[Hmisc]{scat1d}}, \code{\link{cph}}, \code{\link{psm}}, \code{\link{groupkm}} } \examples{ # Generate failure times from an exponential distribution set.seed(123) # so can reproduce results n <- 1000 age <- 50 + 12*rnorm(n) sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) t <- -log(runif(n))/h units(t) <- 'Year' label(t) <- 'Time to Event' ev <- ifelse(t <= cens, 1, 0) t <- pmin(t, cens) S <- Surv(t, ev) # First validate true model used to generate data # If hare is available, make a smooth calibration plot for 1-year # survival probability where we predict 1-year survival using the # known true population survival probability # In addition, use groupkm to show that grouping predictions into # intervals and computing Kaplan-Meier estimates is not as accurate. if('polspline' \%in\% row.names(installed.packages())) { s1 <- exp(-h*1) w <- val.surv(est.surv=s1, S=S, u=1, fun=function(p)log(-log(p))) plot(w, lim=c(.85,1), scat1d.opts=list(nhistSpike=200, side=1)) groupkm(s1, S, m=100, u=1, pl=TRUE, add=TRUE) } # Now validate the true model using residuals w <- val.surv(est.surv=exp(-h*t), S=S) plot(w) plot(w, group=sex) # stratify by sex # Now fit an exponential model and validate # Note this is not really a validation as we're using the # training data here f <- psm(S ~ age + sex, dist='exponential', y=TRUE) w <- val.surv(f) plot(w, group=sex) # We know the censoring time on every subject, so we can # compare the predicted Pr[T <= observed T | T>c, X] to # its expectation 0.5 Pr[T <= C | X] where C = censoring time # We plot a ratio that should equal one w <- val.surv(f, censor=cens) plot(w) plot(w, group=age, g=3) # stratify by tertile of age } \keyword{models} \keyword{regression} \keyword{smooth} \keyword{survival} \concept{model validation} \concept{predictive accuracy} rms/man/ExProb.Rd0000644000176200001440000000663213751567615013343 0ustar liggesusers\name{ExProb} \alias{ExProb} \alias{ExProb.orm} \alias{plot.ExProb} \title{Function Generator For Exceedance Probabilities} \description{ For an \code{orm} object generates a function for computing the estimates of the function Prob(Y>=y) given one or more values of the linear predictor using the reference (median) intercept. This function can optionally be evaluated at only a set of user-specified \code{y} values, otherwise a right-step function is returned. There is a plot method for plotting the step functions, and if more than one linear predictor was evaluated multiple step functions are drawn. \code{ExProb} is especially useful for \code{\link{nomogram}}. Optionally a normal approximation for a confidence interval for exceedance probabilities will be computed using the delta method, if \code{conf.int > 0} is specified to the function generated from calling \code{ExProb}. In that case, a \code{"lims"} attribute is included in the result computed by the derived cumulative probability function. } \usage{ ExProb(object, \dots) \method{ExProb}{orm}(object, codes = FALSE, ...) \method{plot}{ExProb}(x, \dots, data=NULL, xlim=NULL, xlab=x$yname, ylab=expression(Prob(Y>=y)), col=par('col'), col.vert='gray85', pch=20, pch.data=21, lwd=par('lwd'), lwd.data=lwd, lty.data=2, key=TRUE) } \arguments{ \item{object}{a fit object from \code{orm}} \item{codes}{if \code{TRUE}, \code{ExProb} use the integer codes \eqn{1,2,\ldots,k} for the \eqn{k}-level response instead of its original unique values} \item{\dots}{ignored for \code{ExProb}. Passed to \code{plot} for \code{plot.ExProb}} \item{data}{Specify \code{data} if you want to add stratified empirical probabilities to the graph. If \code{data} is a numeric vector, it is assumed that no groups are present. Otherwise \code{data} must be a list or data frame where the first variable is the grouping variable (corresponding to what made the linear predictor vary) and the second variable is the data vector for the \code{y} variable. The rows of data should be sorted to be in order of the linear predictor argument. } \item{x}{an object created by running the function created by \code{ExProb}} \item{xlim}{limits for x-axis; default is range of observed \code{y}} \item{xlab}{x-axis label} \item{ylab}{y-axis label} \item{col}{color for horizontal lines and points} \item{col.vert}{color for vertical discontinuities} \item{pch}{plotting symbol for predicted curves} \item{lwd}{line width for predicted curves} \item{pch.data,lwd.data,lty.data}{plotting parameters for data} \item{key}{set to \code{FALSE} to suppress key in plot if \code{data} is given} } \value{ \code{ExProb} returns an R function. Running the function returns an object of class \code{"ExProb"}. } \author{Frank Harrell and Shengxin Tu} \seealso{\code{\link{orm}}, \code{\link{Quantile.orm}}} \examples{ set.seed(1) x1 <- runif(200) yvar <- x1 + runif(200) f <- orm(yvar ~ x1) d <- ExProb(f) lp <- predict(f, newdata=data.frame(x1=c(.2,.8))) w <- d(lp) s1 <- abs(x1 - .2) < .1 s2 <- abs(x1 - .8) < .1 plot(w, data=data.frame(x1=c(rep(.2, sum(s1)), rep(.8, sum(s2))), yvar=c(yvar[s1], yvar[s2]))) qu <- Quantile(f) abline(h=c(.1,.5), col='gray80') abline(v=qu(.5, lp), col='gray80') abline(v=qu(.9, lp), col='green') } rms/man/rms-internal.Rd0000644000176200001440000000060313667163524014544 0ustar liggesusers\name{rms-internal} \title{Internal rms functions} \alias{annotateAnova} \alias{coxphFit} \alias{lm.pfit} \alias{ols.influence} \alias{plotmathAnova} \alias{probabilityFamilies} \alias{prType} \alias{as.data.frame.rms} \alias{survreg.auxinfo} \alias{val.probg} \description{Internal rms functions} \details{These are not to be called by the user or are undocumented.} \keyword{internal} rms/man/setPb.Rd0000644000176200001440000000516313701123070013171 0ustar liggesusers\name{setPb} \alias{setPb} \title{Progress Bar for Simulations} \description{ Depending on prevailing \code{options(showprogress=)} and availability of the \code{tcltk} package, sets up a progress bar and creates a function for simple updating of the bar as iterations progress. Setting \code{options(showprogressbar=FALSE)} or \code{options(showprogressbar='none')} results in no progress being shown. Setting the option to something other than \code{"tk"} or \code{"none"} results in the console being used to show the current iteration number and intended number of iterations, the same as if \code{tcltk} is not installed. It is not recommended that the \code{"tk"} be used for simulations requiring fewer than 10 seconds for more than 100 iterations, as the time required to update the pop-up window will be more than the time required to do the simulations. This problem can be solved by specifying, for example, \code{every=10} to \code{setPb} or to the function created by \code{setPb}, or by using \code{options(showevery=10)} before \code{setPb} is called. If \code{options(showprogress=)} is not specified, progress is shown in the console with an iteration counter. } \usage{ setPb(n, type = c("Monte Carlo Simulation", "Bootstrap", "Cross-Validation"), label, usetk = TRUE, onlytk=FALSE, every=1) } \arguments{ \item{n}{maximum number of iterations} \item{type}{type of simulation. Used for the progress bar title if \code{tcltk} is being used.} \item{label}{used to customize the bar label if present, overriding \code{type}} \item{usetk}{set to \code{FALSE} to override, acting as though the \code{tcltk} package were not installed} \item{onlytk}{set to \code{TRUE} to not write to the console even if \code{tcltk} is unavailable and \code{showprogressbar} is not \code{FALSE} or \code{"none"}} \item{every}{print a message for every \code{every} iterations} } \value{a function that should be called by the user once per iteration, specifying the iteration number as the sole argument} \author{Frank Harrell} \seealso{\code{\link[tcltk:tkProgressBar]{tkProgressBar}}, \code{\link[tcltk:tkProgressBar]{setTkProgressBar}}} \examples{ \dontrun{ options(showprogress=TRUE) # same as ='tk' pb <- setPb(1000) for(i in 1:1000) { pb(i) # pb(i, every=10) to only show for multiples of 10 # your calculations } # Force rms functions to do simulations to not report progress options(showprogress='none') # For functions that do simulations to use the console instead of pop-up # Even with tcltk is installed options(showprogress='console') pb <- setPb(1000, label='Random Sampling') } } \keyword{utilities} rms/man/plotp.Predict.Rd0000644000176200001440000001576213714237251014664 0ustar liggesusers\name{plotp.Predict} \alias{plotp.Predict} \title{Plot Effects of Variables Estimated by a Regression Model Fit Using plotly} \description{ Uses \code{plotly} graphics (without using ggplot2) to plot the effect of one or two predictors on the linear predictor or X beta scale, or on some transformation of that scale. The first argument specifies the result of the \code{Predict} function. The predictor is always plotted in its original coding. Hover text shows point estimates, confidence intervals, and on the leftmost x-point, adjustment variable settings. If \code{Predict} was run with no variable settings, so that each predictor is varied one at a time, the result of \code{plotp.Predict} is a list with two elements. The first, named \code{Continuous}, is a \code{plotly} object containing a single graphic with all the continuous predictors varying. The second, named \code{Categorical}, is a \code{plotly} object containing a single graphic with all the categorical predictors varying. If there are no categorical predictors, the value returned by by \code{plotp.Predict} is a single \code{plotly} object and not a list of objects. If \code{rdata} is given, a spike histogram is drawn showing the location/density of data values for the \eqn{x}-axis variable. If there is a superposition variable that generated separate curves, the data density specific to each class of points is shown. The histograms are drawn by \code{histSpikeg}. To plot effects instead of estimates (e.g., treatment differences as a function of interacting factors) see \code{contrast.rms} and \code{summary.rms}. Unlike \code{ggplot.Predict}, \code{plotp.Predict} does not handle \code{groups}, \code{anova}, or \code{perim} arguments. } \usage{ \method{plotp}{Predict}(data, subset, xlim, ylim, xlab, ylab, rdata=NULL, nlevels=3, vnames=c('labels','names'), histSpike.opts=list(frac=function(f) 0.01 + 0.02 * sqrt(f - 1)/sqrt(max(f, 2) - 1), side=1, nint=100), ncols=3, width=800, ...) } \arguments{ \item{data}{a data frame created by \code{Predict}} \item{subset}{a subsetting expression for restricting the rows of \code{data} that are used in plotting. For example, predictions may have been requested for males and females but one wants to plot only females.} \item{xlim}{ignored unless predictors were specified to \code{Predict}. Specifies the x-axis limits of the single plot produced.} \item{ylim}{ Range for plotting on response variable axis. Computed by default and includes the confidence limits. } \item{xlab}{ Label for \code{x}-axis when a single plot is made, i.e., when a predictor is specified to \code{Predict}. Default is one given to \code{asis, rcs}, etc., which may have been the \code{"label"} attribute of the variable. } \item{ylab}{ Label for \code{y}-axis. If \code{fun} is not given, default is \code{"log Odds"} for \code{lrm}, \code{"log Relative Hazard"} for \code{cph}, name of the response variable for \code{ols}, \code{TRUE} or \code{log(TRUE)} for \code{psm}, or \code{"X * Beta"} otherwise. Specify \code{ylab=NULL} to omit \code{y}-axis labels. } \item{rdata}{a data frame containing the original raw data on which the regression model were based, or at least containing the \eqn{x}-axis and grouping variable. If \code{rdata} is present and contains the needed variables, the original data are added to the graph in the form of a spike histogram using \code{histSpikeg} in the Hmisc package. } \item{nlevels}{ A non-numeric x-axis variable with \code{nlevels} or fewer unique values will cause a horizontal dot plot to be drawn instead of an x-y plot. } \item{vnames}{applies to the case where multiple plots are produced separately by predictor. Set to \code{'names'} to use variable names instead of labels for these small plots.} \item{histSpike.opts}{a list containing named elements that specifies parameters to \code{\link[Hmisc:scat1d]{histSpikeg}} when \code{rdata} is given. The \code{col} parameter is usually derived from other plotting information and not specified by the user.} \item{ncols}{number of columns of plots to use when plotting multiple continuous predictors} \item{width}{width in pixels for \code{plotly} graphics} \item{\dots}{ignored} } \value{a \code{plotly} object or a list containing two elements, each one a \code{plotly} object} \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Fox J, Hong J (2009): Effect displays in R for multinomial and proportional-odds logit models: Extensions to the effects package. J Stat Software 32 No. 1. } \seealso{ \code{\link{Predict}}, \code{\link{rbind.Predict}}, \code{\link{datadist}}, \code{\link{predictrms}}, \code{\link{contrast.rms}}, \code{\link{summary.rms}}, \code{\link{rms}}, \code{\link{rmsMisc}}, \code{\link{plot.Predict}}, \code{\link{ggplot.Predict}}, \code{\link[Hmisc]{histSpikeg}}, \code{\link[Hmisc]{Overview}} } \examples{ \dontrun{ n <- 350 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) + .01 * (blood.pressure - 120) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) p <- plotp(Predict(fit)) p$Continuous p$Categorical # When using Rmarkdown html notebook, best to use # prList(p) to render the two objects plotp(Predict(fit), rdata=llist(blood.pressure, age))$Continuous # spike histogram plot for two of the predictors p <- Predict(fit, name=c('age','cholesterol')) # Make 2 plots plotp(p) p <- Predict(fit, age, sex) plotp(p, rdata=llist(age,sex)) # rdata= allows rug plots (1-dimensional scatterplots) # on each sex's curve, with sex- # specific density of age # If data were in data frame could have used that p <- Predict(fit, age=seq(20,80,length=100), sex='male', fun=plogis) # works if datadist not used plotp(p, ylab='P') # plot predicted probability in place of log odds # Compute predictions for three predictors, with superpositioning or # conditioning on sex, combined into one graph p1 <- Predict(fit, age, sex) p2 <- Predict(fit, cholesterol, sex) p3 <- Predict(fit, blood.pressure, sex) p <- rbind(age=p1, cholesterol=p2, blood.pressure=p3) plotp(p, ncols=2, rdata=llist(age, cholesterol, sex)) } } \keyword{models} \keyword{hplot} \keyword{htest} rms/man/bootcov.Rd0000644000176200001440000004561713751537327013622 0ustar liggesusers\name{bootcov} \alias{bootcov} \alias{bootplot} \alias{bootplot.bootcov} \alias{confplot} \alias{confplot.bootcov} \alias{histdensity} \title{Bootstrap Covariance and Distribution for Regression Coefficients} \description{ \code{bootcov} computes a bootstrap estimate of the covariance matrix for a set of regression coefficients from \code{ols}, \code{lrm}, \code{cph}, \code{psm}, \code{Rq}, and any other fit where \code{x=TRUE, y=TRUE} was used to store the data used in making the original regression fit and where an appropriate \code{fitter} function is provided here. The estimates obtained are not conditional on the design matrix, but are instead unconditional estimates. For small sample sizes, this will make a difference as the unconditional variance estimates are larger. This function will also obtain bootstrap estimates corrected for cluster sampling (intra-cluster correlations) when a "working independence" model was used to fit data which were correlated within clusters. This is done by substituting cluster sampling with replacement for the usual simple sampling with replacement. \code{bootcov} has an option (\code{coef.reps}) that causes all of the regression coefficient estimates from all of the bootstrap re-samples to be saved, facilitating computation of nonparametric bootstrap confidence limits and plotting of the distributions of the coefficient estimates (using histograms and kernel smoothing estimates). The \code{loglik} option facilitates the calculation of simultaneous confidence regions from quantities of interest that are functions of the regression coefficients, using the method of Tibshirani(1996). With Tibshirani's method, one computes the objective criterion (-2 log likelihood evaluated at the bootstrap estimate of \eqn{\beta}{beta} but with respect to the original design matrix and response vector) for the original fit as well as for all of the bootstrap fits. The confidence set of the regression coefficients is the set of all coefficients that are associated with objective function values that are less than or equal to say the 0.95 quantile of the vector of \code{B + 1} objective function values. For the coefficients satisfying this condition, predicted values are computed at a user-specified design matrix \code{X}, and minima and maxima of these predicted values (over the qualifying bootstrap repetitions) are computed to derive the final simultaneous confidence band. The \code{bootplot} function takes the output of \code{bootcov} and either plots a histogram and kernel density estimate of specified regression coefficients (or linear combinations of them through the use of a specified design matrix \code{X}), or a \code{qqnorm} plot of the quantities of interest to check for normality of the maximum likelihood estimates. \code{bootplot} draws vertical lines at specified quantiles of the bootstrap distribution, and returns these quantiles for possible printing by the user. Bootstrap estimates may optionally be transformed by a user-specified function \code{fun} before plotting. The \code{confplot} function also uses the output of \code{bootcov} but to compute and optionally plot nonparametric bootstrap pointwise confidence limits or (by default) Tibshirani (1996) simultaneous confidence sets. A design matrix must be specified to allow \code{confplot} to compute quantities of interest such as predicted values across a range of values or differences in predicted values (plots of effects of changing one or more predictor variable values). \code{bootplot} and \code{confplot} are actually generic functions, with the particular functions \code{bootplot.bootcov} and \code{confplot.bootcov} automatically invoked for \code{bootcov} objects. A service function called \code{histdensity} is also provided (for use with \code{bootplot}). It runs \code{hist} and \code{density} on the same plot, using twice the number of classes than the default for \code{hist}, and 1.5 times the \code{width} than the default used by \code{density}. A comprehensive example demonstrates the use of all of the functions. } \usage{ bootcov(fit, cluster, B=200, fitter, coef.reps=TRUE, loglik=FALSE, pr=FALSE, maxit=15, eps=0.0001, group=NULL, stat=NULL) bootplot(obj, which=1 : ncol(Coef), X, conf.int=c(.9,.95,.99), what=c('density', 'qqnorm', 'box'), fun=function(x) x, labels., \dots) confplot(obj, X, against, method=c('simultaneous','pointwise'), conf.int=0.95, fun=function(x)x, add=FALSE, lty.conf=2, \dots) histdensity(y, xlab, nclass, width, mult.width=1, \dots) } \arguments{ \item{fit}{ a fit object containing components \code{x} and \code{y}. For fits from \code{cph}, the \code{"strata"} attribute of the \code{x} component is used to obtain the vector of stratum codes. } \item{obj}{ an object created by \code{bootcov} with \code{coef.reps=TRUE}. } \item{X}{ a design matrix specified to \code{confplot}. See \code{predict.rms} or \code{contrast.rms}. For \code{bootplot}, \code{X} is optional. } \item{y}{ a vector to pass to \code{histdensity}. \code{NA}s are ignored. } \item{cluster}{ a variable indicating groupings. \code{cluster} may be any type of vector (factor, character, integer). Unique values of \code{cluster} indicate possibly correlated groupings of observations. Note the data used in the fit and stored in \code{fit$x} and \code{fit$y} may have had observations containing missing values deleted. It is assumed that if there were any NAs, an \code{naresid} function exists for the class of \code{fit}. This function restores NAs so that the rows of the design matrix coincide with \code{cluster}. } \item{B}{ number of bootstrap repetitions. Default is 200. } \item{fitter}{ the name of a function with arguments \code{(x,y)} that will fit bootstrap samples. Default is taken from the class of \code{fit} if it is \code{ols}, \code{lrm}, \code{cph}, \code{psm}, \code{Rq}. } \item{coef.reps}{ set to \code{TRUE} if you want to store a matrix of all bootstrap regression coefficient estimates in the returned component \code{boot.Coef}. } \item{loglik}{ set to \code{TRUE} to store -2 log likelihoods for each bootstrap model, evaluated against the original \code{x} and \code{y} data. The default is to do this when \code{coef.reps} is specified as \code{TRUE}. The use of \code{loglik=TRUE} assumes that an \code{oos.loglik} method exists for the type of model being analyzed, to calculate out-of-sample -2 log likelihoods (see \code{rmsMisc}). After the \code{B} -2 log likelihoods (stored in the element named \code{boot.loglik} in the returned fit object), the \code{B+1} element is the -2 log likelihood for the original model fit. } \item{pr}{ set to \code{TRUE} to print the current sample number to monitor progress. } \item{maxit}{maximum number of iterations, to pass to \code{fitter}} \item{eps}{argument to pass to various fitters} \item{group}{ a grouping variable used to stratify the sample upon bootstrapping. This allows one to handle k-sample problems, i.e., each bootstrap sample will be forced to select the same number of observations from each level of group as the number appearing in the original dataset. You may specify both \code{group} and \code{cluster}. } \item{stat}{ a single character string specifying the name of a \code{stats} element produced by the fitting function to save over the bootstrap repetitions. The vector of saved statistics will be in the \code{boot.stats} part of the list returned by \code{bootcov}. } \item{which}{ one or more integers specifying which regression coefficients to plot for \code{bootplot} } \item{conf.int}{ a vector (for \code{bootplot}, default is \code{c(.9,.95,.99)}) or scalar (for \code{confplot}, default is \code{.95}) confidence level. } \item{what}{ for \code{bootplot}, specifies whether a density or a q-q plot is made, a \code{ggplot2} is used to produce a box plot of all coefficients over the bootstrap reps } \item{fun}{ for \code{bootplot} or \code{confplot} specifies a function used to translate the quantities of interest before analysis. A common choice is \code{fun=exp} to compute anti-logs, e.g., odds ratios. } \item{labels.}{ a vector of labels for labeling the axes in plots produced by \code{bootplot}. Default is row names of \code{X} if there are any, or sequential integers. } \item{\dots}{ For \code{bootplot} these are optional arguments passed to \code{histdensity}. Also may be optional arguments passed to \code{plot} by \code{confplot} or optional arguments passed to \code{hist} from \code{histdensity}, such as \code{xlim} and \code{breaks}. The argument \code{probability=TRUE} is always passed to \code{hist}. } \item{against}{ For \code{confplot}, specifying \code{against} causes a plot to be made (or added to). The \code{against} variable is associated with rows of \code{X} and is used as the x-coordinates. } \item{method}{ specifies whether \code{"pointwise"} or \code{"simultaneous"} confidence regions are derived by \code{confplot}. The default is simultaneous. } \item{add}{ set to \code{TRUE} to add to an existing plot, for \code{confplot} } \item{lty.conf}{ line type for plotting confidence bands in \code{confplot}. Default is 2 for dotted lines. } \item{xlab}{ label for x-axis for \code{histdensity}. Default is \code{label} attribute or argument name if there is no \code{label}. } \item{nclass}{ passed to \code{hist} if present } \item{width}{ passed to \code{density} if present } \item{mult.width}{ multiplier by which to adjust the default \code{width} passed to \code{density}. Default is 1. } } \value{ a new fit object with class of the original object and with the element \code{orig.var} added. \code{orig.var} is the covariance matrix of the original fit. Also, the original \code{var} component is replaced with the new bootstrap estimates. The component \code{boot.coef} is also added. This contains the mean bootstrap estimates of regression coefficients (with a log scale element added if applicable). \code{boot.Coef} is added if \code{coef.reps=TRUE}. \code{boot.loglik} is added if \code{loglik=TRUE}. If \code{stat} is specified an additional vector \code{boot.stats} will be contained in the returned object. \code{B} contains the number of successfully fitted bootstrap resamples. A component \code{clusterInfo} is added to contain elements \code{name} and \code{n} holding the name of the \code{cluster} variable and the number of clusters. \code{bootplot} returns a (possible matrix) of quantities of interest and the requested quantiles of them. \code{confplot} returns three vectors: \code{fitted}, \code{lower}, and \code{upper}. } \section{Side Effects}{ \code{bootcov} prints if \code{pr=TRUE} } \details{ If the fit has a scale parameter (e.g., a fit from \code{psm}), the log of the individual bootstrap scale estimates are added to the vector of parameter estimates and and column and row for the log scale are added to the new covariance matrix (the old covariance matrix also has this row and column). For \code{Rq} fits, the \code{tau}, \code{method}, and \code{hs} arguments are taken from the original fit. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com}\cr Bill Pikounis\cr Biometrics Research Department\cr Merck Research Laboratories\cr \email{v\_bill\_pikounis@merck.com} } \references{ Feng Z, McLerran D, Grizzle J (1996): A comparison of statistical methods for clustered data analysis with Gaussian error. Stat in Med 15:1793--1806. Tibshirani R, Knight K (1996): Model search and inference by bootstrap "bumping". Department of Statistics, University of Toronto. Technical report available from \cr http://www-stat.stanford.edu/~tibs/. Presented at the Joint Statistical Meetings, Chicago, August 1996. } \seealso{ \code{\link{robcov}}, \code{\link{sample}}, \code{\link{rms}}, \code{\link{lm.fit}}, \code{\link{lrm.fit}}, \code{\link[survival]{survival-internal}}, \code{\link{predab.resample}}, \code{\link{rmsMisc}}, \code{\link{Predict}}, \code{\link{gendata}}, \code{\link{contrast.rms}}, \code{\link{Predict}}, \code{\link{setPb}}, \code{multiwayvcov::cluster.boot} } \examples{ set.seed(191) x <- exp(rnorm(200)) logit <- 1 + x/2 y <- ifelse(runif(200) <= plogis(logit), 1, 0) f <- lrm(y ~ pol(x,2), x=TRUE, y=TRUE) g <- bootcov(f, B=50, pr=TRUE) anova(g) # using bootstrap covariance estimates fastbw(g) # using bootstrap covariance estimates beta <- g$boot.Coef[,1] hist(beta, nclass=15) #look at normality of parameter estimates qqnorm(beta) # bootplot would be better than these last two commands # A dataset contains a variable number of observations per subject, # and all observations are laid out in separate rows. The responses # represent whether or not a given segment of the coronary arteries # is occluded. Segments of arteries may not operate independently # in the same patient. We assume a "working independence model" to # get estimates of the coefficients, i.e., that estimates assuming # independence are reasonably efficient. The job is then to get # unbiased estimates of variances and covariances of these estimates. set.seed(2) n.subjects <- 30 ages <- rnorm(n.subjects, 50, 15) sexes <- factor(sample(c('female','male'), n.subjects, TRUE)) logit <- (ages-50)/5 prob <- plogis(logit) # true prob not related to sex id <- sample(1:n.subjects, 300, TRUE) # subjects sampled multiple times table(table(id)) # frequencies of number of obs/subject age <- ages[id] sex <- sexes[id] # In truth, observations within subject are independent: y <- ifelse(runif(300) <= prob[id], 1, 0) f <- lrm(y ~ lsp(age,50)*sex, x=TRUE, y=TRUE) g <- bootcov(f, id, B=50) # usually do B=200 or more diag(g$var)/diag(f$var) # add ,group=w to re-sample from within each level of w anova(g) # cluster-adjusted Wald statistics # fastbw(g) # cluster-adjusted backward elimination plot(Predict(g, age=30:70, sex='female')) # cluster-adjusted confidence bands # Get design effects based on inflation of the variances when compared # with bootstrap estimates which ignore clustering g2 <- bootcov(f, B=50) diag(g$var)/diag(g2$var) # Get design effects based on pooled tests of factors in model anova(g2)[,1] / anova(g)[,1] # Simulate binary data where there is a strong # age x sex interaction with linear age effects # for both sexes, but where not knowing that # we fit a quadratic model. Use the bootstrap # to get bootstrap distributions of various # effects, and to get pointwise and simultaneous # confidence limits set.seed(71) n <- 500 age <- rnorm(n, 50, 10) sex <- factor(sample(c('female','male'), n, rep=TRUE)) L <- ifelse(sex=='male', 0, .1*(age-50)) y <- ifelse(runif(n)<=plogis(L), 1, 0) f <- lrm(y ~ sex*pol(age,2), x=TRUE, y=TRUE) b <- bootcov(f, B=50, loglik=TRUE, pr=TRUE) # better: B=500 par(mfrow=c(2,3)) # Assess normality of regression estimates bootplot(b, which=1:6, what='qq') # They appear somewhat non-normal # Plot histograms and estimated densities # for 6 coefficients w <- bootplot(b, which=1:6) # Print bootstrap quantiles w$quantiles # Show box plots for bootstrap reps for all coefficients bootplot(b, what='box') # Estimate regression function for females # for a sequence of ages ages <- seq(25, 75, length=100) label(ages) <- 'Age' # Plot fitted function and pointwise normal- # theory confidence bands par(mfrow=c(1,1)) p <- Predict(f, age=ages, sex='female') plot(p) # Save curve coordinates for later automatic # labeling using labcurve in the Hmisc library curves <- vector('list',8) curves[[1]] <- with(p, list(x=age, y=lower)) curves[[2]] <- with(p, list(x=age, y=upper)) # Add pointwise normal-distribution confidence # bands using unconditional variance-covariance # matrix from the 500 bootstrap reps p <- Predict(b, age=ages, sex='female') curves[[3]] <- with(p, list(x=age, y=lower)) curves[[4]] <- with(p, list(x=age, y=upper)) dframe <- expand.grid(sex='female', age=ages) X <- predict(f, dframe, type='x') # Full design matrix # Add pointwise bootstrap nonparametric # confidence limits p <- confplot(b, X=X, against=ages, method='pointwise', add=TRUE, lty.conf=4) curves[[5]] <- list(x=ages, y=p$lower) curves[[6]] <- list(x=ages, y=p$upper) # Add simultaneous bootstrap confidence band p <- confplot(b, X=X, against=ages, add=TRUE, lty.conf=5) curves[[7]] <- list(x=ages, y=p$lower) curves[[8]] <- list(x=ages, y=p$upper) lab <- c('a','a','b','b','c','c','d','d') labcurve(curves, lab, pl=TRUE) # Now get bootstrap simultaneous confidence set for # female:male odds ratios for a variety of ages dframe <- expand.grid(age=ages, sex=c('female','male')) X <- predict(f, dframe, type='x') # design matrix f.minus.m <- X[1:100,] - X[101:200,] # First 100 rows are for females. By subtracting # design matrices are able to get Xf*Beta - Xm*Beta # = (Xf - Xm)*Beta confplot(b, X=f.minus.m, against=ages, method='pointwise', ylab='F:M Log Odds Ratio') confplot(b, X=f.minus.m, against=ages, lty.conf=3, add=TRUE) # contrast.rms makes it easier to compute the design matrix for use # in bootstrapping contrasts: f.minus.m <- contrast(f, list(sex='female',age=ages), list(sex='male', age=ages))$X confplot(b, X=f.minus.m) # For a quadratic binary logistic regression model use bootstrap # bumping to estimate coefficients under a monotonicity constraint set.seed(177) n <- 400 x <- runif(n) logit <- 3*(x^2-1) y <- rbinom(n, size=1, prob=plogis(logit)) f <- lrm(y ~ pol(x,2), x=TRUE, y=TRUE) k <- coef(f) k vertex <- -k[2]/(2*k[3]) vertex # Outside [0,1] so fit satisfies monotonicity constraint within # x in [0,1], i.e., original fit is the constrained MLE g <- bootcov(f, B=50, coef.reps=TRUE, loglik=TRUE) bootcoef <- g$boot.Coef # 100x3 matrix vertex <- -bootcoef[,2]/(2*bootcoef[,3]) table(cut2(vertex, c(0,1))) mono <- !(vertex >= 0 & vertex <= 1) mean(mono) # estimate of Prob{monotonicity in [0,1]} var(bootcoef) # var-cov matrix for unconstrained estimates var(bootcoef[mono,]) # for constrained estimates # Find second-best vector of coefficient estimates, i.e., best # from among bootstrap estimates g$boot.Coef[order(g$boot.loglik[-length(g$boot.loglik)])[1],] # Note closeness to MLE \dontrun{ # Get the bootstrap distribution of the difference in two ROC areas for # two binary logistic models fitted on the same dataset. This analysis # does not adjust for the bias ROC area (C-index) due to overfitting. # The same random number seed is used in two runs to enforce pairing. set.seed(17) x1 <- rnorm(100) x2 <- rnorm(100) y <- sample(0:1, 100, TRUE) f <- lrm(y ~ x1, x=TRUE, y=TRUE) g <- lrm(y ~ x1 + x2, x=TRUE, y=TRUE) set.seed(3) f <- bootcov(f, stat='C') set.seed(3) g <- bootcov(g, stat='C') dif <- g$boot.stats - f$boot.stats hist(dif) quantile(dif, c(.025,.25,.5,.75,.975)) # Compute a z-test statistic. Note that comparing ROC areas is far less # powerful than likelihood or Brier score-based methods z <- (g$stats['C'] - f$stats['C'])/sd(dif) names(z) <- NULL c(z=z, P=2*pnorm(-abs(z))) } } \keyword{models} \keyword{regression} \keyword{htest} \keyword{methods} \keyword{hplot} \concept{bootstrap} \concept{sampling} rms/man/bj.Rd0000644000176200001440000002563713714237251012532 0ustar liggesusers\name{bj} \alias{bj} \alias{bj.fit} \alias{residuals.bj} \alias{print.bj} \alias{validate.bj} \alias{bjplot} \title{ Buckley-James Multiple Regression Model } \description{ \code{bj} fits the Buckley-James distribution-free least squares multiple regression model to a possibly right-censored response variable. This model reduces to ordinary least squares if there is no censoring. By default, model fitting is done after taking logs of the response variable. \code{bj} uses the \code{rms} class for automatic \code{anova}, \code{fastbw}, \code{validate}, \code{Function}, \code{nomogram}, \code{summary}, \code{plot}, \code{bootcov}, and other functions. The \code{bootcov} function may be worth using with \code{bj} fits, as the properties of the Buckley-James covariance matrix estimator are not fully known for strange censoring patterns. For the \code{print} method, format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. The \code{residuals.bj} function exists mainly to compute residuals and to censor them (i.e., return them as \code{Surv} objects) just as the original failure time variable was censored. These residuals are useful for checking to see if the model also satisfies certain distributional assumptions. To get these residuals, the fit must have specified \code{y=TRUE}. The \code{bjplot} function is a special plotting function for objects created by \code{bj} with \code{x=TRUE, y=TRUE} in effect. It produces three scatterplots for every covariate in the model: the first plots the original situation, where censored data are distingushed from non-censored data by a different plotting symbol. In the second plot, called a renovated plot, vertical lines show how censored data were changed by the procedure, and the third is equal to the second, but without vertical lines. Imputed data are again distinguished from the non-censored by a different symbol. The \code{validate} method for \code{bj} validates the Somers' \code{Dxy} rank correlation between predicted and observed responses, accounting for censoring. The primary fitting function for \code{bj} is \code{bj.fit}, which does not allow missing data and expects a full design matrix as input. } \usage{ bj(formula, data=environment(formula), subset, na.action=na.delete, link="log", control, method='fit', x=FALSE, y=FALSE, time.inc) \method{print}{bj}(x, digits=4, long=FALSE, coefs=TRUE, title="Buckley-James Censored Data Regression", \dots) \method{residuals}{bj}(object, type=c("censored","censored.normalized"),\dots) bjplot(fit, which=1:dim(X)[[2]]) \method{validate}{bj}(fit, method="boot", B=40, bw=FALSE,rule="aic",type="residual",sls=.05,aics=0, force=NULL, estimates=TRUE, pr=FALSE, tol=1e-7, rel.tolerance=1e-3, maxiter=15, \dots) bj.fit(x, y, control) } \arguments{ \item{formula}{ an S statistical model formula. Interactions up to third order are supported. The left hand side must be a \code{Surv} object. } \item{data,subset,na.action}{the usual statistical model fitting arguments} \item{fit}{ a fit created by \code{bj}, required for all functions except \code{bj}. } \item{x}{ a design matrix with or without a first column of ones, to pass to \code{bj.fit}. All models will have an intercept. For \code{print.bj} is a result of \code{bj}. For \code{bj}, set \code{x=TRUE} to include the design matrix in the fit object. } \item{y}{ a \code{Surv} object to pass to \code{bj.fit} as the two-column response variable. Only right censoring is allowed, and there need not be any censoring. For \code{bj}, set \code{y} to \code{TRUE} to include the two-column response matrix, with the event/censoring indicator in the second column. The first column will be transformed according to \code{link}, and depending on \code{na.action}, rows with missing data in the predictors or the response will be deleted. } \item{link}{ set to, for example, \code{"log"} (the default) to model the log of the response, or \code{"identity"} to model the untransformed response. } \item{control}{ a list containing any or all of the following components: \code{iter.max} (maximum number of iterations allowed, default is 20), \code{eps} (convergence criterion: concergence is assumed when the ratio of sum of squared errors from one iteration to the next is between 1-\code{eps} and 1+\code{eps}), \code{trace} (set to \code{TRUE} to monitor iterations), \code{tol} (matrix singularity criterion, default is 1e-7), and 'max.cycle' (in case of nonconvergence the program looks for a cycle that repeats itself, default is 30). } \item{method}{ set to \code{"model.frame"} or \code{"model.matrix"} to return one of those objects rather than the model fit. } \item{time.inc}{ setting for default time spacing. Default is 30 if time variable has \code{units="Day"}, 1 otherwise, unless maximum follow-up time \eqn{< 1}. Then max time/10 is used as \code{time.inc}. If \code{time.inc} is not given and max time/default \code{time.inc} is \eqn{> 25}, \code{time.inc} is increased. } \item{digits}{ number of significant digits to print if not 4. } \item{long}{ set to \code{TRUE} to print the correlation matrix for parameter estimates } \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{title}{a character string title to be passed to \code{prModFit}} \item{object}{the result of \code{bj}} \item{type}{ type of residual desired. Default is censored unnormalized residuals, defined as link(Y) - linear.predictors, where the link function was usually the log function. You can specify \code{type="censored.normalized"} to divide the residuals by the estimate of \code{sigma}. } \item{which}{ vector of integers or character strings naming elements of the design matrix (the names of the original predictors if they entered the model linearly) for which to have \code{bjplot} make plots of only the variables listed in \code{which} (names or numbers). } \item{B,bw,rule,sls,aics,force,estimates,pr,tol,rel.tolerance,maxiter}{see \code{\link{predab.resample}}} \item{\dots}{ ignored for \code{print}; passed through to \code{predab.resample} for \code{validate} } } \value{ \code{bj} returns a fit object with similar information to what \code{survreg}, \code{psm}, \code{cph} would store as well as what \code{rms} stores and \code{units} and \code{time.inc}. \code{residuals.bj} returns a \code{Surv} object. One of the components of the \code{fit} object produced by \code{bj} (and \code{bj.fit}) is a vector called \code{stats} which contains the following names elements: \code{"Obs", "Events", "d.f.","error d.f.","sigma","g"}. Here \code{sigma} is the estimate of the residual standard deviation. \code{g} is the \eqn{g}-index. If the link function is \code{"log"}, the \eqn{g}-index on the anti-log scale is also returned as \code{gr}. } \details{ The program implements the algorithm as described in the original article by Buckley & James. Also, we have used the original Buckley & James prescription for computing variance/covariance estimator. This is based on non-censored observations only and does not have any theoretical justification, but has been shown in simulation studies to behave well. Our experience confirms this view. Convergence is rather slow with this method, so you may want to increase the number of iterations. Our experience shows that often, in particular with high censoring, 100 iterations is not too many. Sometimes the method will not converge, but will instead enter a loop of repeating values (this is due to the discrete nature of Kaplan and Meier estimator and usually happens with small sample sizes). The program will look for such a loop and return the average betas. It will also issue a warning message and give the size of the cycle (usually less than 6). } \author{ Janez Stare\cr Department of Biomedical Informatics\cr Ljubljana University\cr Ljubljana, Slovenia\cr \email{janez.stare@mf.uni-lj.si} Harald Heinzl\cr Department of Medical Computer Sciences\cr Vienna University\cr Vienna, Austria\cr \email{harald.heinzl@akh-wien.ac.at} Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr \email{fh@fharrell.com} } \references{ Buckley JJ, James IR. Linear regression with censored data. Biometrika 1979; 66:429--36. Miller RG, Halpern J. Regression with censored data. Biometrika 1982; 69: 521--31. James IR, Smith PJ. Consistency results for linear regression with censored data. Ann Statist 1984; 12: 590--600. Lai TL, Ying Z. Large sample theory of a modified Buckley-James estimator for regression analysis with censored data. Ann Statist 1991; 19: 1370--402. Hillis SL. Residual plots for the censored data linear regression model. Stat in Med 1995; 14: 2023--2036. Jin Z, Lin DY, Ying Z. On least-squares regression with censored data. Biometrika 2006; 93:147--161. } \seealso{ \code{\link{rms}}, \code{\link{psm}}, \code{\link{survreg}}, \code{\link{cph}}, \code{\link[survival]{Surv}}, \code{\link[Hmisc]{na.delete}}, \code{\link[Hmisc]{na.detail.response}}, \code{\link{datadist}}, \code{\link[Hmisc]{rcorr.cens}}, \code{\link{GiniMd}}, \code{\link{prModFit}}, \code{\link{dxy.cens}} } \examples{ suppressWarnings(RNGversion("3.5.0")) set.seed(1) ftime <- 10*rexp(200) stroke <- ifelse(ftime > 10, 0, 1) ftime <- pmin(ftime, 10) units(ftime) <- "Month" age <- rnorm(200, 70, 10) hospital <- factor(sample(c('a','b'),200,TRUE)) dd <- datadist(age, hospital) options(datadist="dd") # Prior to rms 6.0 and R 4.0 the following worked with 5 knots f <- bj(Surv(ftime, stroke) ~ rcs(age,3) + hospital, x=TRUE, y=TRUE) # add link="identity" to use a censored normal regression model instead # of a lognormal one anova(f) fastbw(f) validate(f, B=15) plot(Predict(f, age, hospital)) # needs datadist since no explicit age,hosp. coef(f) # look at regression coefficients coef(psm(Surv(ftime, stroke) ~ rcs(age,3) + hospital, dist='lognormal')) # compare with coefficients from likelihood-based # log-normal regression model # use dist='gau' not under R r <- resid(f, 'censored.normalized') survplot(npsurv(r ~ 1), conf='none') # plot Kaplan-Meier estimate of # survival function of standardized residuals survplot(npsurv(r ~ cut2(age, g=2)), conf='none') # may desire both strata to be n(0,1) options(datadist=NULL) } \keyword{models} \keyword{survival} rms/man/datadist.Rd0000644000176200001440000001557213714237251013731 0ustar liggesusers\name{datadist} \alias{datadist} \alias{print.datadist} \title{ Distribution Summaries for Predictor Variables } \description{ For a given set of variables or a data frame, determines summaries of variables for effect and plotting ranges, values to adjust to, and overall ranges for \code{Predict}, \code{plot.Predict}, \code{ggplot.Predict}, \code{summary.rms}, \code{survplot}, and \code{nomogram.rms}. If \code{datadist} is called before a model fit and the resulting object pointed to with \code{options(datadist="name")}, the data characteristics will be stored with the fit by \code{Design()}, so that later predictions and summaries of the fit will not need to access the original data used in the fit. Alternatively, you can specify the values for each variable in the model when using these 3 functions, or specify the values of some of them and let the functions look up the remainder (of say adjustmemt levels) from an object created by \code{datadist}. The best method is probably to run \code{datadist} once before any models are fitted, storing the distribution summaries for all potential variables. Adjustment values are \code{0} for binary variables, the most frequent category (or optionally the first category level) for categorical (\code{factor}) variables, the middle level for \code{ordered factor} variables, and medians for continuous variables. See descriptions of \code{q.display} and \code{q.effect} for how display and effect ranges are chosen for continuous variables. } \usage{ datadist(\dots, data, q.display, q.effect=c(0.25, 0.75), adjto.cat=c('mode','first'), n.unique=10) \method{print}{datadist}(x, \dots) # options(datadist="dd") # used by summary, plot, survplot, sometimes predict # For dd substitute the name of the result of datadist } \arguments{ \item{...}{ a list of variable names, separated by commas, a single data frame, or a fit with \code{Design} information. The first element in this list may also be an object created by an earlier call to \code{datadist}; then the later variables are added to this \code{datadist} object. For a fit object, the variables named in the fit are retrieved from the active data frame or from the location pointed to by \code{data=frame number} or \code{data="data frame name"}. For \code{print}, is ignored. } \item{data}{ a data frame or a search position. If \code{data} is a search position, it is assumed that a data frame is attached in that position, and all its variables are used. If you specify both individual variables in \code{\dots} and \code{data}, the two sets of variables are combined. Unless the first argument is a fit object, \code{data} must be an integer. } \item{q.display}{ set of two quantiles for computing the range of continuous variables to use in displaying regression relationships. Defaults are \eqn{q} and \eqn{1-q}, where \eqn{q=10/max(n,200)}, and \eqn{n} is the number of non-missing observations. Thus for \eqn{n<200}, the .05 and .95 quantiles are used. For \eqn{n\geq 200}, the \eqn{10^{th}} smallest and \eqn{10^{th}} largest values are used. If you specify \code{q.display}, those quantiles are used whether or not \eqn{n<200}. } \item{q.effect}{ set of two quantiles for computing the range of continuous variables to use in estimating regression effects. Defaults are c(.25,.75), which yields inter-quartile-range odds ratios, etc. } \item{adjto.cat}{ default is \code{"mode"}, indicating that the modal (most frequent) category for categorical (factor) variables is the adjust-to setting. Specify \code{"first"} to use the first level of factor variables as the adjustment values. In the case of many levels having the maximum frequency, the first such level is used for \code{"mode"}. } \item{n.unique}{ variables having \code{n.unique} or fewer unique values are considered to be discrete variables in that their unique values are stored in the \code{values} list. This will affect how functions such as \code{nomogram.Design} determine whether variables are discrete or not. } \item{x}{result of \code{datadist}} } \value{ a list of class \code{"datadist"} with the following components \item{limits}{ a \eqn{7 \times k} vector, where \eqn{k} is the number of variables. The 7 rows correspond to the low value for estimating the effect of the variable, the value to adjust the variable to when examining other variables, the high value for effect, low value for displaying the variable, the high value for displaying it, and the overall lowest and highest values. } \item{values}{ a named list, with one vector of unique values for each numeric variable having no more than \code{n.unique} unique values }} \details{ For categorical variables, the 7 limits are set to character strings (factors) which correspond to \code{c(NA,adjto.level,NA,1,k,1,k)}, where \code{k} is the number of levels. For ordered variables with numeric levels, the limits are set to \code{c(L,M,H,L,H,L,H)}, where \code{L} is the lowest level, \code{M} is the middle level, and \code{H} is the highest level. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link[Hmisc]{describe}}, \code{\link{Predict}}, \code{\link{summary.rms}} } \examples{ \dontrun{ d <- datadist(data=1) # use all variables in search pos. 1 d <- datadist(x1, x2, x3) page(d) # if your options(pager) leaves up a pop-up # window, this is a useful guide in analyses d <- datadist(data=2) # all variables in search pos. 2 d <- datadist(data=my.data.frame) d <- datadist(my.data.frame) # same as previous. Run for all potential vars. d <- datadist(x2, x3, data=my.data.frame) # combine variables d <- datadist(x2, x3, q.effect=c(.1,.9), q.display=c(0,1)) # uses inter-decile range odds ratios, # total range of variables for regression function plots d <- datadist(d, z) # add a new variable to an existing datadist options(datadist="d") #often a good idea, to store info with fit f <- ols(y ~ x1*x2*x3) options(datadist=NULL) #default at start of session f <- ols(y ~ x1*x2) d <- datadist(f) #info not stored in `f' d$limits["Adjust to","x1"] <- .5 #reset adjustment level to .5 options(datadist="d") f <- lrm(y ~ x1*x2, data=mydata) d <- datadist(f, data=mydata) options(datadist="d") f <- lrm(y ~ x1*x2) #datadist not used - specify all values for summary(f, x1=c(200,500,800), x2=c(1,3,5)) # obtaining predictions plot(Predict(f, x1=200:800, x2=3)) # or ggplot() # Change reference value to get a relative odds plot for a logistic model d$limits$age[2] <- 30 # make 30 the reference value for age # Could also do: d$limits["Adjust to","age"] <- 30 fit <- update(fit) # make new reference value take effect plot(Predict(fit, age, ref.zero=TRUE, fun=exp), ylab='Age=x:Age=30 Odds Ratio') # or ggplot() } } \keyword{models} \keyword{nonparametric} \keyword{regression} rms/man/contrast.Rd0000644000176200001440000004415013725476127013774 0ustar liggesusers\name{contrast.rms} \alias{contrast} \alias{contrast.rms} \alias{print.contrast.rms} \title{General Contrasts of Regression Coefficients} \description{ This function computes one or more contrasts of the estimated regression coefficients in a fit from one of the functions in rms, along with standard errors, confidence limits, t or Z statistics, P-values. General contrasts are handled by obtaining the design matrix for two sets of predictor settings (\code{a}, \code{b}) and subtracting the corresponding rows of the two design matrics to obtain a new contrast design matrix for testing the \code{a} - \code{b} differences. This allows for quite general contrasts (e.g., estimated differences in means between a 30 year old female and a 40 year old male). This can also be used to obtain a series of contrasts in the presence of interactions (e.g., female:male log odds ratios for several ages when the model contains age by sex interaction). Another use of \code{contrast} is to obtain center-weighted (Type III test) and subject-weighted (Type II test) estimates in a model containing treatment by center interactions. For the latter case, you can specify \code{type="average"} and an optional \code{weights} vector to average the within-center treatment contrasts. The design contrast matrix computed by \code{contrast.rms} can be used by other functions. When the model was fitted by a Bayesian function such as \code{blrm}, highest posterior density intervals for contrasts are computed instead, along with the posterior probability that the contrast is positive. \code{posterior.summary} specifies whether posterior mean/median/mode is to be used for contrast point estimates. \code{contrast.rms} also allows one to specify four settings to contrast, yielding contrasts that are double differences - the difference between the first two settings (\code{a} - \code{b}) and the last two (\code{a2} - \code{b2}). This allows assessment of interactions. If \code{usebootcoef=TRUE}, the fit was run through \code{bootcov}, and \code{conf.type="individual"}, the confidence intervals are bootstrap nonparametric percentile confidence intervals, basic bootstrap, or BCa intervals, obtained on contrasts evaluated on all bootstrap samples. By omitting the \code{b} argument, \code{contrast} can be used to obtain an average or weighted average of a series of predicted values, along with a confidence interval for this average. This can be useful for "unconditioning" on one of the predictors (see the next to last example). Specifying \code{type="joint"}, and specifying at least as many contrasts as needed to span the space of a complex test, one can make multiple degree of freedom tests flexibly and simply. Redundant contrasts will be ignored in the joint test. See the examples below. These include an example of an "incomplete interaction test" involving only two of three levels of a categorical variable (the test also tests the main effect). When more than one contrast is computed, the list created by \code{contrast.rms} is suitable for plotting (with error bars or bands) with \code{xYplot} or \code{Dotplot} (see the last example before the \code{type="joint"} examples). When \code{fit} is the result of a Bayesian model fit and \code{fun} is specified, \code{contrast.rms} operates altogether differently. \code{a} and \code{b} must both be specified and \code{a2, b2} not specified. \code{fun} is evaluated on the estimates separately on \code{a} and \code{b} and the subtraction is deferred. So even in the absence of interactions, when \code{fun} is nonlinear, the settings of factors (predictors) will not cancel out and estimates of differences will be covariate-specific (unless there are no covariates in the model besides the one being varied to get from \code{a} to \code{b}). } \usage{ contrast(fit, \dots) \method{contrast}{rms}(fit, a, b, a2, b2, ycut=NULL, cnames=NULL, fun=NULL, funint=TRUE, type=c("individual", "average", "joint"), conf.type=c("individual","simultaneous"), usebootcoef=TRUE, boot.type=c("percentile","bca","basic"), posterior.summary=c('mean', 'median', 'mode'), weights="equal", conf.int=0.95, tol=1e-7, expand=TRUE, \dots) \method{print}{contrast.rms}(x, X=FALSE, fun=function(u)u, jointonly=FALSE, prob=0.95, \dots) } \arguments{ \item{fit}{ a fit of class \code{"rms"} } \item{a}{ a list containing settings for all predictors that you do not wish to set to default (adjust-to) values. Usually you will specify two variables in this list, one set to a constant and one to a sequence of values, to obtain contrasts for the sequence of values of an interacting factor. The \code{gendata} function will generate the necessary combinations and default values for unspecified predictors, depending on the \code{expand} argument. } \item{b}{ another list that generates the same number of observations as \code{a}, unless one of the two lists generates only one observation. In that case, the design matrix generated from the shorter list will have its rows replicated so that the contrasts assess several differences against the one set of predictor values. This is useful for comparing multiple treatments with control, for example. If \code{b} is missing, the design matrix generated from \code{a} is analyzed alone. } \item{a2}{an optional third list of settings of predictors} \item{b2}{an optional fourth list of settings of predictors. Mandatory if \code{a2} is given.} \item{ycut}{used of the fit is a constrained partial proportional odds model fit, to specify the single value or vector of values (corresponding to the multiple contrasts) of the response variable to use in forming contrasts. When there is non-proportional odds, odds ratios will vary over levels of the response variable. When there are multiple contrasts and only one value is given for \code{ycut}, that value will be propagated to all contrasts. To show the effect of non-proportional odds, let \code{ycut} vary.} \item{cnames}{ vector of character strings naming the contrasts when \code{type!="average"}. Usually \code{cnames} is not necessary as \code{contrast.rms} tries to name the contrasts by examining which predictors are varying consistently in the two lists. \code{cnames} will be needed when you contrast "non-comparable" settings, e.g., you compare \code{list(treat="drug", age=c(20,30))} with \code{list(treat="placebo"), age=c(40,50))} } \item{fun}{a function to evaluate on the linear predictor for each of \code{a} and \code{b}. Applies to Bayesian model fits. Also, a function to transform the contrast, SE, and lower and upper confidence limits before printing. For example, specify \code{fun=exp} to anti-log them for logistic models.} \item{type}{ set \code{type="average"} to average the individual contrasts (e.g., to obtain a Type II or III contrast). Set \code{type="joint"} to jointly test all non-redundant contrasts with a multiple degree of freedom test and no averaging. } \item{conf.type}{ The default type of confidence interval computed for a given individual (1 d.f.) contrast is a pointwise confidence interval. Set \code{conf.type="simultaneous"} to use the \code{multcomp} package's \code{glht} and \code{confint} functions to compute confidence intervals with simultaneous (family-wise) coverage, thus adjusting for multiple comparisons. Note that individual P-values are not adjusted for multiplicity. } \item{usebootcoef}{ If \code{fit} was the result of \code{bootcov} but you want to use the bootstrap covariance matrix instead of the nonparametric percentile, basic, or BCa method for confidence intervals (which uses all the bootstrap coefficients), specify \code{usebootcoef=FALSE}.} \item{boot.type}{set to \code{'bca'} to compute BCa confidence limits or \code{'basic'} to use the basic bootstrap. The default is to compute percentile intervals} \item{posterior.summary}{By default the posterior mean is used. Specify \code{posterior.summary='median'} to instead use the posterior median and likewise \code{posterior.summary='mode'}. Unlike other functions, \code{contrast.rms} does not default to \code{'mode'} because point estimates come from contrasts and not the original model coefficients point estimates.} \item{weights}{ a numeric vector, used when \code{type="average"}, to obtain weighted contrasts } \item{conf.int}{ confidence level for confidence intervals for the contrasts (HPD interval probability for Bayesian analyses) } \item{tol}{tolerance for \code{qr} function for determining which contrasts are redundant, and for inverting the covariance matrix involved in a joint test} \item{expand}{set to \code{FALSE} to have \code{gendata} not generate all possible combinations of predictor settings. This is useful when getting contrasts over irregular predictor settings.} \item{\dots}{passed to \code{print} for main output. A useful thing to pass is \code{digits=4}.} \item{x}{result of \code{contrast}} \item{X}{ set \code{X=TRUE} to print design matrix used in computing the contrasts (or the average contrast) } \item{funint}{set to \code{FALSE} if \code{fun} is not a function such as the result of \code{Mean}, \code{Quantile}, or \code{ExProb} that contains an \code{intercepts} argument} \item{jointonly}{set to \code{FALSE} to omit printing of individual contrasts} \item{prob}{highest posterior density interval probability when the fit was Bayesian and \code{fun} was specified to \code{contrast.rms}} } \value{ a list of class \code{"contrast.rms"} containing the elements \code{Contrast}, \code{SE}, \code{Z}, \code{var}, \code{df.residual} \code{Lower}, \code{Upper}, \code{Pvalue}, \code{X}, \code{cnames}, \code{redundant}, which denote the contrast estimates, standard errors, Z or t-statistics, variance matrix, residual degrees of freedom (this is \code{NULL} if the model was not \code{ols}), lower and upper confidence limits, 2-sided P-value, design matrix, contrast names (or \code{NULL}), and a logical vector denoting which contrasts are redundant with the other contrasts. If there are any redundant contrasts, when the results of \code{contrast} are printed, and asterisk is printed at the start of the corresponding lines. The object also contains \code{ctype} indicating what method was used for compute confidence intervals. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University School of Medicine\cr fh@fharrell.com } \seealso{ \code{\link{Predict}}, \code{\link{gendata}}, \code{\link{bootcov}}, \code{\link{summary.rms}}, \code{\link{anova.rms}}, } \examples{ set.seed(1) age <- rnorm(200,40,12) sex <- factor(sample(c('female','male'),200,TRUE)) logit <- (sex=='male') + (age-40)/5 y <- ifelse(runif(200) <= plogis(logit), 1, 0) f <- lrm(y ~ pol(age,2)*sex) anova(f) # Compare a 30 year old female to a 40 year old male # (with or without age x sex interaction in the model) contrast(f, list(sex='female', age=30), list(sex='male', age=40)) # Test for interaction between age and sex, duplicating anova contrast(f, list(sex='female', age=30), list(sex='male', age=30), list(sex='female', age=c(40,50)), list(sex='male', age=c(40,50)), type='joint') # Duplicate overall sex effect in anova with 3 d.f. contrast(f, list(sex='female', age=c(30,40,50)), list(sex='male', age=c(30,40,50)), type='joint') # For a model containing two treatments, centers, and treatment # x center interaction, get 0.95 confidence intervals separately # by center center <- factor(sample(letters[1 : 8], 500, TRUE)) treat <- factor(sample(c('a','b'), 500, TRUE)) y <- 8*(treat == 'b') + rnorm(500, 100, 20) f <- ols(y ~ treat*center) lc <- levels(center) contrast(f, list(treat='b', center=lc), list(treat='a', center=lc)) # Get 'Type III' contrast: average b - a treatment effect over # centers, weighting centers equally (which is almost always # an unreasonable thing to do) contrast(f, list(treat='b', center=lc), list(treat='a', center=lc), type='average') # Get 'Type II' contrast, weighting centers by the number of # subjects per center. Print the design contrast matrix used. k <- contrast(f, list(treat='b', center=lc), list(treat='a', center=lc), type='average', weights=table(center)) print(k, X=TRUE) # Note: If other variables had interacted with either treat # or center, we may want to list settings for these variables # inside the list()'s, so as to not use default settings # For a 4-treatment study, get all comparisons with treatment 'a' treat <- factor(sample(c('a','b','c','d'), 500, TRUE)) y <- 8*(treat == 'b') + rnorm(500, 100, 20) dd <- datadist(treat, center); options(datadist='dd') f <- ols(y ~ treat*center) lt <- levels(treat) contrast(f, list(treat=lt[-1]), list(treat=lt[ 1]), cnames=paste(lt[-1], lt[1], sep=':'), conf.int=1 - .05 / 3) # Compare each treatment with average of all others for(i in 1 : length(lt)) { cat('Comparing with', lt[i], '\n\n') print(contrast(f, list(treat=lt[-i]), list(treat=lt[ i]), type='average')) } options(datadist=NULL) # Six ways to get the same thing, for a variable that # appears linearly in a model and does not interact with # any other variables. We estimate the change in y per # unit change in a predictor x1. Methods 4, 5 also # provide confidence limits. Method 6 computes nonparametric # bootstrap confidence limits. Methods 2-6 can work # for models that are nonlinear or non-additive in x1. # For that case more care is needed in choice of settings # for x1 and the variables that interact with x1. \dontrun{ coef(fit)['x1'] # method 1 diff(predict(fit, gendata(x1=c(0,1)))) # method 2 g <- Function(fit) # method 3 g(x1=1) - g(x1=0) summary(fit, x1=c(0,1)) # method 4 k <- contrast(fit, list(x1=1), list(x1=0)) # method 5 print(k, X=TRUE) fit <- update(fit, x=TRUE, y=TRUE) # method 6 b <- bootcov(fit, B=500) contrast(fit, list(x1=1), list(x1=0)) # In a model containing age, race, and sex, # compute an estimate of the mean response for a # 50 year old male, averaged over the races using # observed frequencies for the races as weights f <- ols(y ~ age + race + sex) contrast(f, list(age=50, sex='male', race=levels(race)), type='average', weights=table(race)) # For a Bayesian model get the highest posterior interval for the # difference in two nonlinear functions of predicted values # Start with the mean from a proportional odds model g <- blrm(y ~ x) M <- Mean(g) contrast(g, list(x=1), list(x=0), fun=M) # For the median we have to make sure that contrast can pass the # per-posterior-draw vector of intercepts through qu <- Quantile(g) med <- function(lp, intercepts) qu(0.5, lp, intercepts=intercepts) contrast(g, list(x=1), list(x=0), fun=med) } # Plot the treatment effect (drug - placebo) as a function of age # and sex in a model in which age nonlinearly interacts with treatment # for females only set.seed(1) n <- 800 treat <- factor(sample(c('drug','placebo'), n,TRUE)) sex <- factor(sample(c('female','male'), n,TRUE)) age <- rnorm(n, 50, 10) y <- .05*age + (sex=='female')*(treat=='drug')*.05*abs(age-50) + rnorm(n) f <- ols(y ~ rcs(age,4)*treat*sex) d <- datadist(age, treat, sex); options(datadist='d') # show separate estimates by treatment and sex ggplot(Predict(f, age, treat, sex='female')) ggplot(Predict(f, age, treat, sex='male')) ages <- seq(35,65,by=5); sexes <- c('female','male') w <- contrast(f, list(treat='drug', age=ages, sex=sexes), list(treat='placebo', age=ages, sex=sexes)) # add conf.type="simultaneous" to adjust for having done 14 contrasts xYplot(Cbind(Contrast, Lower, Upper) ~ age | sex, data=w, ylab='Drug - Placebo') w <- as.data.frame(w[c('age','sex','Contrast','Lower','Upper')]) ggplot(w, aes(x=age, y=Contrast)) + geom_point() + facet_grid(sex ~ .) + geom_errorbar(aes(ymin=Lower, ymax=Upper), width=0) xYplot(Cbind(Contrast, Lower, Upper) ~ age, groups=sex, data=w, ylab='Drug - Placebo', method='alt bars') options(datadist=NULL) # Examples of type='joint' contrast tests set.seed(1) x1 <- rnorm(100) x2 <- factor(sample(c('a','b','c'), 100, TRUE)) dd <- datadist(x1, x2); options(datadist='dd') y <- x1 + (x2=='b') + rnorm(100) # First replicate a test statistic from anova() f <- ols(y ~ x2) anova(f) contrast(f, list(x2=c('b','c')), list(x2='a'), type='joint') # Repeat with a redundancy; compare a vs b, a vs c, b vs c contrast(f, list(x2=c('a','a','b')), list(x2=c('b','c','c')), type='joint') # Get a test of association of a continuous predictor with y # First assume linearity, then cubic f <- lrm(y>0 ~ x1 + x2) anova(f) contrast(f, list(x1=1), list(x1=0), type='joint') # a minimum set of contrasts xs <- seq(-2, 2, length=20) contrast(f, list(x1=0), list(x1=xs), type='joint') # All contrasts were redundant except for the first, because of # linearity assumption f <- lrm(y>0 ~ pol(x1,3) + x2) anova(f) contrast(f, list(x1=0), list(x1=xs), type='joint') print(contrast(f, list(x1=0), list(x1=xs), type='joint'), jointonly=TRUE) # All contrasts were redundant except for the first 3, because of # cubic regression assumption # Now do something that is difficult to do without cryptic contrast # matrix operations: Allow each of the three x2 groups to have a different # shape for the x1 effect where x1 is quadratic. Test whether there is # a difference in mean levels of y for x2='b' vs. 'c' or whether # the shape or slope of x1 is different between x2='b' and x2='c' regardless # of how they differ when x2='a'. In other words, test whether the mean # response differs between group b and c at any value of x1. # This is a 3 d.f. test (intercept, linear, quadratic effects) and is # a better approach than subsetting the data to remove x2='a' then # fitting a simpler model, as it uses a better estimate of sigma from # all the data. f <- ols(y ~ pol(x1,2) * x2) anova(f) contrast(f, list(x1=xs, x2='b'), list(x1=xs, x2='c'), type='joint') # Note: If using a spline fit, there should be at least one value of # x1 between any two knots and beyond the outer knots. options(datadist=NULL) } \keyword{htest} \keyword{models} \keyword{regression} rms/man/print.cph.Rd0000644000176200001440000000253013020543251014015 0ustar liggesusers\name{print.cph} \alias{print.cph} \title{Print cph Results} \description{ Formatted printing of an object of class \code{cph}. Prints strata frequencies, parameter estimates, standard errors, z-statistics, numbers of missing values, etc. Format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. } \usage{ \method{print}{cph}(x, digits=4, table=TRUE, conf.int=FALSE, coefs=TRUE, title='Cox Proportional Hazards Model', \dots) } \arguments{ \item{x}{fit object} \item{digits}{number of digits to right of decimal place to print} \item{conf.int}{ set to e.g. .95 to print 0.95 confidence intervals on simple hazard ratios (which are usually meaningless as one-unit changes are seldom relevant and most models contain multiple terms per predictor) } \item{table}{ set to \code{FALSE} to suppress event frequency statistics } \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{title}{a character string title to be passed to \code{prModFit}} \item{\dots}{arguments passed to \code{prModFit}} } \seealso{ \code{\link[survival]{coxph}}, \code{\link{prModFit}} } \keyword{print} rms/man/summary.rms.Rd0000644000176200001440000003173513715305277014435 0ustar liggesusers\name{summary.rms} \alias{summary.rms} \alias{print.summary.rms} \alias{latex.summary.rms} \alias{html.summary.rms} \alias{plot.summary.rms} \title{Summary of Effects in Model} \description{ \code{summary.rms} forms a summary of the effects of each factor. When \code{summary} is used to estimate odds or hazard ratios for continuous variables, it allows the levels of interacting factors to be easily set, as well as allowing the user to choose the interval for the effect. This method of estimating effects allows for nonlinearity in the predictor. Factors requiring multiple parameters are handled, as \code{summary} obtains predicted values at the needed points and takes differences. By default, inter-quartile range effects (odds ratios, hazards ratios, etc.) are printed for continuous factors, and all comparisons with the reference level are made for categorical factors. \code{print.summary.rms} prints the results, \code{latex.summary.rms} and \code{html.summary.rms} typeset the results, and \code{plot.summary.rms} plots shaded confidence bars to display the results graphically. The longest confidence bar on each page is labeled with confidence levels (unless this bar has been ignored due to \code{clip}). By default, the following confidence levels are all shown: .9, .95, and .99, using blue of different transparencies. The \code{plot} method currently ignores bootstrap and Bayesian highest posterior density intervals but approximates intervals based on standard errors. The \code{html} method is for use with R Markdown using html. The \code{print} method will call the \code{latex} or \code{html} method if \code{options(prType=)} is set to \code{"latex"} or \code{"html"}. For \code{"latex"} printing through \code{print()}, the LaTeX table environment is turned off. If \code{usebootcoef=TRUE} and the fit was run through \code{bootcov}, the confidence intervals are bootstrap nonparametric percentile confidence intervals, basic bootstrap, or BCa intervals, obtained on contrasts evaluated on all bootstrap samples. If \code{options(grType='plotly')} is in effect and the \code{plotly} package is installed, \code{plot} is used instead of base graphics to draw the point estimates and confidence limits when the \code{plot} method for \code{summary} is called. Colors and other graphical arguments to \code{plot.summary} are ignored in this case. Various special effects are implemented such as only drawing 0.95 confidence limits by default but including a legend that allows the other CLs to be activated. Hovering over point estimates shows adjustment values if there are any. \code{nbar} is not implemented for \code{plotly}. } \usage{ \method{summary}{rms}(object, \dots, ycut=NULL, est.all=TRUE, antilog, conf.int=.95, abbrev=FALSE, vnames=c("names","labels"), conf.type=c('individual','simultaneous'), usebootcoef=TRUE, boot.type=c("percentile","bca","basic"), posterior.summary=c('mean', 'median', 'mode'), verbose=FALSE) \method{print}{summary.rms}(x, \dots, table.env=FALSE) \method{latex}{summary.rms}(object, title, table.env=TRUE, \dots) \method{html}{summary.rms}(object, digits=4, dec=NULL, \dots) \method{plot}{summary.rms}(x, at, log=FALSE, q=c(0.9, 0.95, 0.99), xlim, nbar, cex=1, nint=10, cex.main=1, clip=c(-1e30,1e30), main, col=rgb(red=.1,green=.1,blue=.8,alpha=c(.1,.4,.7)), col.points=rgb(red=.1,green=.1,blue=.8,alpha=1), pch=17, lwd=if(length(q) == 1) 3 else 2 : (length(q) + 1), digits=4, \dots) } \arguments{ \item{object}{ a \code{rms} fit object. Either \code{options(datadist)} should have been set before the fit, or \code{datadist()} and \code{options(datadist)} run before \code{summary}. For \code{latex} is the result of \code{summary}. } \item{\dots}{ For \code{summary}, omit list of variables to estimate effects for all predictors. Use a list of variables of the form \code{age, sex} to estimate using default ranges. Specify \code{age=50} for example to adjust age to 50 when testing other factors (this will only matter for factors that interact with age). Specify e.g. \code{age=c(40,60)} to estimate the effect of increasing age from 40 to 60. Specify \code{age=c(40,50,60)} to let age range from 40 to 60 and be adjusted to 50 when testing other interacting factors. For category factors, a single value specifies the reference cell and the adjustment value. For example, if \code{treat} has levels \code{"a", "b"} and \code{"c"} and \code{treat="b"} is given to \code{summary}, treatment \code{a} will be compared to \code{b} and \code{c} will be compared to \code{b}. Treatment \code{b} will be used when estimating the effect of other factors. Category variables can have category labels listed (in quotes), or an unquoted number that is a legal level, if all levels are numeric. You need only use the first few letters of each variable name - enough for unique identification. For variables not defined with \code{datadist}, you must specify 3 values, none of which are \code{NA}. Also represents other arguments to pass to \code{latex}, is ignored for \code{print} and \code{plot}. } \item{ycut}{must be specified if the fit is a partial proportional odds model. Specifies the single value of the response variable used to estimate ycut-specific regression effects, e.g., odds ratios} \item{est.all}{ Set to \code{FALSE} to only estimate effects of variables listed. Default is \code{TRUE}. } \item{antilog}{ Set to \code{FALSE} to suppress printing of anti-logged effects. Default is \code{TRUE} if the model was fitted by \code{lrm} or \code{cph}. Antilogged effects will be odds ratios for logistic models and hazard ratios for proportional hazards models. } \item{conf.int}{ Defaults to \code{.95} for \code{95\%} confidence intervals of effects.} \item{abbrev}{ Set to \code{TRUE} to use the \code{abbreviate} function to shorten factor levels for categorical variables in the model.} \item{vnames}{ Set to \code{"labels"} to use variable labels to label effects. Default is \code{"names"} to use variable names.} \item{conf.type}{ The default type of confidence interval computed for a given individual (1 d.f.) contrast is a pointwise confidence interval. Set \code{conf.type="simultaneous"} to use the \code{multcomp} package's \code{glht} and \code{confint} functions to compute confidence intervals with simultaneous (family-wise) coverage, thus adjusting for multiple comparisons. Contrasts are simultaneous only over groups of intervals computed together. } \item{usebootcoef}{ If \code{fit} was the result of \code{bootcov} but you want to use the bootstrap covariance matrix instead of the nonparametric percentile, basic, or BCa methods for confidence intervals (which uses all the bootstrap coefficients), specify \code{usebootcoef=FALSE}.} \item{boot.type}{set to \code{'bca'} to compute BCa confidence limits or to \code{'basic'} to use the basic bootstrap. The default is to compute percentile intervals.} \item{posterior.summary}{set to \code{'mode'} or \code{'median'} to use the posterior mean/median instead of the mean for point estimates of contrasts} \item{verbose}{set to \code{TRUE} when \code{conf.type='simultaneous'} to get output describing scope of simultaneous adjustments} \item{x}{result of \code{summary}} \item{title}{ \code{title} to pass to \code{latex}. Default is name of fit object passed to \code{summary} prefixed with \code{"summary"}.} \item{table.env}{see \code{\link[Hmisc]{latex}}} \item{digits,dec}{for \code{html.summary.rms}; \code{digits} is the number of significant digits for printing for effects, standard errors, and confidence limits. It is ignored if \code{dec} is given. The statistics are rounded to \code{dec} digits to the right of the decimal point of \code{dec} is given. \code{digits} is also the number of significant digits to format numeric hover text and labels for \code{plotly}.} \item{at}{ vector of coordinates at which to put tick mark labels on the main axis. If \code{log=TRUE}, \code{at} should be in anti-log units. } \item{log}{ Set to \code{TRUE} to plot on \eqn{X\beta}{X beta} scale but labeled with anti-logs. } \item{q}{scalar or vector of confidence coefficients to depict} \item{xlim}{ X-axis limits for \code{plot} in units of the linear predictors (log scale if \code{log=TRUE}). If \code{at} is specified and \code{xlim} is omitted, \code{xlim} is derived from the range of \code{at}. } \item{nbar}{ Sets up plot to leave room for \code{nbar} horizontal bars. Default is the number of non-interaction factors in the model. Set \code{nbar} to a larger value to keep too much surrounding space from appearing around horizontal bars. If \code{nbar} is smaller than the number of bars, the plot is divided into multiple pages with up to \code{nbar} bars on each page. } \item{cex}{\code{cex} parameter for factor labels.} \item{nint}{Number of tick mark numbers for \code{pretty}.} \item{cex.main}{\code{cex} parameter for main title. Set to \code{0} to suppress the title.} \item{clip}{ confidence limits outside the interval \code{c(clip[1], clip[2])} will be ignored, and \code{clip} also be respected when computing \code{xlim} when \code{xlim} is not specified. \code{clip} should be in the units of \code{fun(x)}. If \code{log=TRUE}, \code{clip} should be in \eqn{X\beta}{X beta} units. } \item{main}{ main title. Default is inferred from the model and value of \code{log}, e.g., \code{"log Odds Ratio"}. } \item{col}{vector of colors, one per value of \code{q}} \item{col.points}{color for points estimates} \item{pch}{symbol for point estimates. Default is solid triangle.} \item{lwd}{line width for confidence intervals, corresponding to \code{q}} } \value{ For \code{summary.rms}, a matrix of class \code{summary.rms} with rows corresponding to factors in the model and columns containing the low and high values for the effects, the range for the effects, the effect point estimates (difference in predicted values for high and low factor values), the standard error of this effect estimate, and the lower and upper confidence limits. If \code{fit$scale.pred} has a second level, two rows appear for each factor, the second corresponding to anti--logged effects. Non--categorical factors are stored first, and effects for any categorical factors are stored at the end of the returned matrix. \code{scale.pred} and \code{adjust}. \code{adjust} is a character string containing levels of adjustment variables, if there are any interactions. Otherwise it is "". \code{latex.summary.rms} returns an object of class \code{c("latex","file")}. It requires the \code{latex} function in Hmisc. } \author{ Frank Harrell\cr Hui Nian\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{datadist}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{rmsMisc}}, \code{\link[Hmisc]{Misc}}, \code{\link{pretty}}, \code{\link{contrast.rms}} } \examples{ n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4))) s <- summary(fit) # Estimate effects using default ranges # Gets odds ratio for age=3rd quartile # compared to 1st quartile \dontrun{ latex(s) # Use LaTeX to print nice version latex(s, file="") # Just write LaTeX code to console html(s) # html/LaTeX to console for knitr # Or: options(prType='latex') summary(fit) # prints with LaTeX, table.env=FALSE options(prType='html') summary(fit) # prints with html } summary(fit, sex='male', age=60) # Specify ref. cell and adjustment val summary(fit, age=c(50,70)) # Estimate effect of increasing age from # 50 to 70 s <- summary(fit, age=c(50,60,70)) # Increase age from 50 to 70, adjust to # 60 when estimating effects of other factors #Could have omitted datadist if specified 3 values for all non-categorical #variables (1 value for categorical ones - adjustment level) plot(s, log=TRUE, at=c(.1,.5,1,1.5,2,4,8)) options(datadist=NULL) } \keyword{models} \keyword{regression} \keyword{htest} \keyword{survival} \keyword{hplot} \keyword{interface} \concept{logistic regression model} rms/man/specs.rms.Rd0000644000176200001440000000254313714237251014043 0ustar liggesusers\name{specs.rms} \alias{specs.rms} \alias{specs} \alias{print.specs.rms} \title{rms Specifications for Models} \description{ Prints the design specifications, e.g., number of parameters for each factor, levels of categorical factors, knot locations in splines, pre-transformations, etc. } \usage{ specs(fit, \dots) \method{specs}{rms}(fit, long=FALSE, \dots) \method{print}{specs.rms}(x, \dots) } \arguments{ \item{fit}{ a fit object created with the \code{rms} library in effect } \item{x}{ an object returned by \code{specs} } \item{long}{ if \code{TRUE}, causes the plotting and estimation limits to be printed for each factor } \item{\dots}{ignored} } \value{ a list containing information about the fit and the predictors as elements } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{latexrms}}, \code{\link{datadist}} } \examples{ set.seed(1) blood.pressure <- rnorm(200, 120, 15) dd <- datadist(blood.pressure) options(datadist='dd') L <- .03*(blood.pressure-120) sick <- ifelse(runif(200) <= plogis(L), 1, 0) f <- lrm(sick ~ rcs(blood.pressure,5)) specs(f) # find out where 5 knots are placed g <- Glm(sick ~ rcs(blood.pressure,5), family=binomial) specs(g,long=TRUE) options(datadist=NULL) } \keyword{models} \keyword{regression} \keyword{methods} rms/man/psm.Rd0000644000176200001440000002647713714237251012741 0ustar liggesusers\name{psm} \alias{psm} \alias{print.psm} \alias{Hazard} \alias{Survival} \alias{Hazard.psm} \alias{Mean.psm} \alias{Quantile.psm} \alias{Survival.psm} \alias{residuals.psm} \alias{lines.residuals.psm.censored.normalized} \alias{survplot.residuals.psm.censored.normalized} \title{Parametric Survival Model} \description{ \code{psm} is a modification of Therneau's \code{survreg} function for fitting the accelerated failure time family of parametric survival models. \code{psm} uses the \code{rms} class for automatic \code{anova}, \code{fastbw}, \code{calibrate}, \code{validate}, and other functions. \code{Hazard.psm}, \code{Survival.psm}, \code{Quantile.psm}, and \code{Mean.psm} create S functions that evaluate the hazard, survival, quantile, and mean (expected value) functions analytically, as functions of time or probabilities and the linear predictor values. For the \code{print} method, format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. The \code{residuals.psm} function exists mainly to compute normalized (standardized) residuals and to censor them (i.e., return them as \code{Surv} objects) just as the original failure time variable was censored. These residuals are useful for checking the underlying distributional assumption (see the examples). To get these residuals, the fit must have specified \code{y=TRUE}. A \code{lines} method for these residuals automatically draws a curve with the assumed standardized survival distribution. A \code{survplot} method runs the standardized censored residuals through \code{npsurv} to get Kaplan-Meier estimates, with optional stratification (automatically grouping a continuous variable into quantiles) and then through \code{survplot.npsurv} to plot them. Then \code{lines} is invoked to show the theoretical curve. Other types of residuals are computed by \code{residuals} using \code{residuals.survreg}. } \usage{ psm(formula, data=environment(formula), weights, subset, na.action=na.delete, dist="weibull", init=NULL, scale=0, control=survreg.control(), parms=NULL, model=FALSE, x=FALSE, y=TRUE, time.inc, \dots) \method{print}{psm}(x, correlation=FALSE, digits=4, coefs=TRUE, title, \dots) Hazard(object, \dots) \method{Hazard}{psm}(object, \dots) # for psm fit # E.g. lambda <- Hazard(fit) Survival(object, \dots) \method{Survival}{psm}(object, \dots) # for psm # E.g. survival <- Survival(fit) \method{Quantile}{psm}(object, \dots) # for psm # E.g. quantsurv <- Quantile(fit) \method{Mean}{psm}(object, \dots) # for psm # E.g. meant <- Mean(fit) # lambda(times, lp) # get hazard function at t=times, xbeta=lp # survival(times, lp) # survival function at t=times, lp # quantsurv(q, lp) # quantiles of survival time # meant(lp) # mean survival time \method{residuals}{psm}(object, type=c("censored.normalized", "response", "deviance", "dfbeta", "dfbetas", "working", "ldcase", "ldresp", "ldshape", "matrix", "score"), \dots) \method{survplot}{residuals.psm.censored.normalized}(fit, x, g=4, col, main, \dots) \method{lines}{residuals.psm.censored.normalized}(x, n=100, lty=1, xlim, lwd=3, \dots) # for type="censored.normalized" } \arguments{ \item{formula}{ an S statistical model formula. Interactions up to third order are supported. The left hand side must be a \code{Surv} object. } \item{object}{a fit created by \code{psm}. For \code{survplot} with residuals from \code{psm}, \code{object} is the result of \code{residuals.psm}. } \item{fit}{a fit created by \code{psm}} \item{data,subset,weights,dist,scale,init,na.action,control}{see \code{survreg}.} \item{parms}{a list of fixed parameters. For the \eqn{t}-distribution this is the degrees of freedom; most of the distributions have no parameters.} \item{model}{ set to \code{TRUE} to include the model frame in the returned object } \item{x}{ set to \code{TRUE} to include the design matrix in the object produced by \code{psm}. For the \code{survplot} method, \code{x} is an optional stratification variable (character, numeric, or categorical). For \code{lines.residuals.psm.censored.normalized}, \code{x} is the result of \code{residuals.psm}. For \code{print} it is the result of \code{psm}. } \item{y}{ set to \code{TRUE} to include the \code{Surv()} matrix } \item{time.inc}{ setting for default time spacing. Used in constructing time axis in \code{survplot}, and also in make confidence bars. Default is 30 if time variable has \code{units="Day"}, 1 otherwise, unless maximum follow-up time \eqn{< 1}. Then max time/10 is used as \code{time.inc}. If \code{time.inc} is not given and max time/default \code{time.inc} is \eqn{> 25}, \code{time.inc} is increased. } \item{correlation}{set to \code{TRUE} to print the correlation matrix for parameter estimates} \item{digits}{number of places to print to the right of the decimal point} \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{title}{a character string title to be passed to \code{prModFit}} \item{\dots}{ other arguments to fitting routines, or to pass to \code{survplot} from \cr \code{survplot.residuals.psm.censored.normalized}. Passed to the generic \code{lines} function for \code{lines}.} \item{times}{ a scalar or vector of times for which to evaluate survival probability or hazard } \item{lp}{ a scalar or vector of linear predictor values at which to evaluate survival probability or hazard. If both \code{times} and \code{lp} are vectors, they must be of the same length. } \item{q}{ a scalar or vector of probabilities. The default is .5, so just the median survival time is returned. If \code{q} and \code{lp} are both vectors, a matrix of quantiles is returned, with rows corresponding to \code{lp} and columns to \code{q}. } \item{type}{ type of residual desired. Default is censored normalized residuals, defined as (link(Y) - linear.predictors)/scale parameter, where the link function was usually the log function. See \code{survreg} for other types. \code{type="score"} returns the score residual matrix. } \item{n}{ number of points to evaluate theoretical standardized survival function for \cr \code{lines.residuals.psm.censored.normalized} } \item{lty}{ line type for \code{lines}, default is 1 } \item{xlim}{ range of times (or transformed times) for which to evaluate the standardized survival function. Default is range in normalized residuals. } \item{lwd}{ line width for theoretical distribution, default is 3 } \item{g}{ number of quantile groups to use for stratifying continuous variables having more than 5 levels } \item{col}{ vector of colors for \code{survplot} method, corresponding to levels of \code{x} (must be a scalar if there is no \code{x}) } \item{main}{ main plot title for \code{survplot}. If omitted, is the name or label of \code{x} if \code{x} is given. Use \code{main=""} to suppress a title when you specify \code{x}. }} \value{ \code{psm} returns a fit object with all the information \code{survreg} would store as well as what \code{rms} stores and \code{units} and \code{time.inc}. \code{Hazard}, \code{Survival}, and \code{Quantile} return S-functions. \code{residuals.psm} with \code{type="censored.normalized"} returns a \code{Surv} object which has a special attribute \code{"theoretical"} which is used by the \code{lines} routine. This is the assumed standardized survival function as a function of time or transformed time. } \details{ The object \code{survreg.distributions} contains definitions of properties of the various survival distributions. \cr \code{psm} does not trap singularity errors due to the way \code{survreg.fit} does matrix inversion. It will trap non-convergence (thus returning \code{fit$fail=TRUE}) if you give the argument \code{failure=2} inside the \code{control} list which is passed to \code{survreg.fit}. For example, use \code{f <- psm(S ~ x, control=list(failure=2, maxiter=20))} to allow up to 20 iterations and to set \code{f$fail=TRUE} in case of non-convergence. This is especially useful in simulation work. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University \cr \email{fh@fharrell.com} } \seealso{ \code{\link{rms}}, \code{\link{survreg}}, \code{\link{residuals.survreg}}, \code{\link{survreg.object}}, \code{\link{survreg.distributions}}, \code{\link{pphsm}}, \code{\link{survplot}}, \code{\link{survest}}, \code{\link[survival]{Surv}}, \code{\link[Hmisc]{na.delete}}, \code{\link[Hmisc]{na.detail.response}}, \code{\link{datadist}}, \code{\link{latex.psm}}, \code{\link{GiniMd}}, \code{\link{prModFit}}, \code{\link{ggplot.Predict}}, \code{\link{plot.Predict}} } \examples{ n <- 400 set.seed(1) age <- rnorm(n, 50, 12) sex <- factor(sample(c('Female','Male'),n,TRUE)) dd <- datadist(age,sex) options(datadist='dd') # Population hazard function: h <- .02*exp(.06*(age-50)+.8*(sex=='Female')) d.time <- -log(runif(n))/h cens <- 15*runif(n) death <- ifelse(d.time <= cens,1,0) d.time <- pmin(d.time, cens) f <- psm(Surv(d.time,death) ~ sex*pol(age,2), dist='lognormal') # Log-normal model is a bad fit for proportional hazards data anova(f) fastbw(f) # if deletes sex while keeping age*sex ignore the result f <- update(f, x=TRUE,y=TRUE) # so can validate, compute certain resids validate(f, B=10) # ordinarily use B=300 or more plot(Predict(f, age, sex)) # needs datadist since no explicit age, hosp. # Could have used ggplot(Predict(...)) survplot(f, age=c(20,60)) # needs datadist since hospital not set here # latex(f) S <- Survival(f) plot(f$linear.predictors, S(6, f$linear.predictors), xlab=expression(X*hat(beta)), ylab=expression(S(6,X*hat(beta)))) # plots 6-month survival as a function of linear predictor (X*Beta hat) times <- seq(0,24,by=.25) plot(times, S(times,0), type='l') # plots survival curve at X*Beta hat=0 lam <- Hazard(f) plot(times, lam(times,0), type='l') # similarly for hazard function med <- Quantile(f) # new function defaults to computing median only lp <- seq(-3, 5, by=.1) plot(lp, med(lp=lp), ylab="Median Survival Time") med(c(.25,.5), f$linear.predictors) # prints matrix with 2 columns # fit a model with no predictors f <- psm(Surv(d.time,death) ~ 1, dist="weibull") f pphsm(f) # print proportional hazards form g <- survest(f) plot(g$time, g$surv, xlab='Time', type='l', ylab=expression(S(t))) f <- psm(Surv(d.time,death) ~ age, dist="loglogistic", y=TRUE) r <- resid(f, 'cens') # note abbreviation survplot(npsurv(r ~ 1), conf='none') # plot Kaplan-Meier estimate of # survival function of standardized residuals survplot(npsurv(r ~ cut2(age, g=2)), conf='none') # both strata should be n(0,1) lines(r) # add theoretical survival function #More simply: survplot(r, age, g=2) options(datadist=NULL) } \keyword{models} \keyword{survival} rms/man/survest.psm.Rd0000644000176200001440000000666213714237251014445 0ustar liggesusers\name{survest.psm} \alias{survest.psm} \alias{print.survest.psm} \title{Parametric Survival Estimates} \description{ Computes predicted survival probabilities or hazards and optionally confidence limits (for survival only) for parametric survival models fitted with \code{psm}. If getting predictions for more than one observation, \code{times} must be specified. For a model without predictors, no input data are specified. } \usage{ \method{survest}{psm}(fit, newdata, linear.predictors, x, times, fun, loglog=FALSE, conf.int=0.95, what=c("survival","hazard","parallel"), \dots) \method{print}{survest.psm}(x, \dots) } \arguments{ \item{fit}{ fit from \code{psm} } \item{newdata, linear.predictors, x, times, conf.int}{ see \code{survest.cph}. One of \code{newdata}, \code{linear.predictors}, \code{x} must be given. \code{linear.predictors} includes the intercept. If \code{times} is omitted, predictions are made at 200 equally spaced points between 0 and the maximum failure/censoring time used to fit the model. \code{x} can also be a result from \code{survest.psm}. } \item{what}{ The default is to compute survival probabilities. Set \code{what="hazard"} or some abbreviation of \code{"hazard"} to compute hazard rates. \code{what="parallel"} assumes that the length of \code{times} is the number of subjects (or one), and causes \code{survest} to estimate the \eqn{i^{th}} subject's survival probability at the \eqn{i^{th}} value of \code{times} (or at the scalar value of \code{times}). \code{what="parallel"} is used by \code{val.surv} for example. } \item{loglog}{ set to \code{TRUE} to transform survival estimates and confidence limits using log-log } \item{fun}{ a function to transform estimates and optional confidence intervals } \item{\dots}{unused} } \value{ see \code{survest.cph}. If the model has no predictors, predictions are made with respect to varying time only, and the returned object is of class \code{"npsurv"} so the survival curve can be plotted with \code{survplot.npsurv}. If \code{times} is omitted, the entire survival curve or hazard from \code{t=0,\dots,fit$maxtime} is estimated, with increments computed to yield 200 points where \code{fit$maxtime} is the maximum survival time in the data used in model fitting. Otherwise, the \code{times} vector controls the time points used. } \details{ Confidence intervals are based on asymptotic normality of the linear predictors. The intervals account for the fact that a scale parameter may have been estimated jointly with beta. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{psm}}, \code{\link[survival]{survreg}}, \code{\link{rms}}, \code{\link[survival]{survfit}}, \code{\link{predictrms}}, \code{\link{survplot}}, \code{\link[survival]{survreg.distributions}} } \examples{ # Simulate data from a proportional hazards population model n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" cens <- 15*runif(n) h <- .02*exp(.04*(age-50)) dt <- -log(runif(n))/h label(dt) <- 'Follow-up Time' e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" S <- Surv(dt,e) f <- psm(S ~ lsp(age,c(40,70))) survest(f, data.frame(age=seq(20,80,by=5)), times=2) #Get predicted survival curve for 40 year old survest(f, data.frame(age=40)) #Get hazard function for 40 year old survest(f, data.frame(age=40), what="hazard")$surv #still called surv } \keyword{survival} \keyword{regression} \keyword{models} rms/man/orm.Rd0000644000176200001440000003757513754064145012744 0ustar liggesusers\name{orm} \alias{orm} \alias{print.orm} \alias{Quantile.orm} \title{Ordinal Regression Model} \description{ Fits ordinal cumulative probability models for continuous or ordinal response variables, efficiently allowing for a large number of intercepts by capitalizing on the information matrix being sparse. Five different distribution functions are implemented, with the default being the logistic (i.e., the proportional odds model). The ordinal cumulative probability models are stated in terms of exceedance probabilities (\eqn{Prob[Y \ge y | X]}) so that as with OLS larger predicted values are associated with larger \code{Y}. This is important to note for the asymmetric distributions given by the log-log and complementary log-log families, for which negating the linear predictor does not result in \eqn{Prob[Y < y | X]}. The \code{family} argument is defined in \code{\link{orm.fit}}. The model assumes that the inverse of the assumed cumulative distribution function, when applied to one minus the true cumulative distribution function and plotted on the \eqn{y}-axis (with the original \eqn{y} on the \eqn{x}-axis) yields parallel curves (though not necessarily linear). This can be checked by plotting the inverse cumulative probability function of one minus the empirical distribution function, stratified by \code{X}, and assessing parallelism. Note that parametric regression models make the much stronger assumption of linearity of such inverse functions. For the \code{print} method, format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. \code{Quantile.orm} creates an R function that computes an estimate of a given quantile for a given value of the linear predictor (which was assumed to use thefirst intercept). It uses a linear interpolation method by default, but you can override that to use a discrete method by specifying \code{method="discrete"} when calling the function generated by \code{Quantile}. Optionally a normal approximation for a confidence interval for quantiles will be computed using the delta method, if \code{conf.int > 0} is specified to the function generated from calling \code{Quantile} and you specify \code{X}. In that case, a \code{"lims"} attribute is included in the result computed by the derived quantile function. } \usage{ orm(formula, data=environment(formula), subset, na.action=na.delete, method="orm.fit", model=FALSE, x=FALSE, y=FALSE, linear.predictors=TRUE, se.fit=FALSE, penalty=0, penalty.matrix, tol=1e-7, eps=0.005, var.penalty=c('simple','sandwich'), scale=FALSE, \dots) \method{print}{orm}(x, digits=4, coefs=TRUE, intercepts=x$non.slopes < 10, title, \dots) \method{Quantile}{orm}(object, codes=FALSE, \dots) } \arguments{ \item{formula}{ a formula object. An \code{offset} term can be included. The offset causes fitting of a model such as \eqn{logit(Y=1) = X\beta + W}, where \eqn{W} is the offset variable having no estimated coefficient. The response variable can be any data type; \code{orm} converts it in alphabetic or numeric order to a factor variable and recodes it 1,2,\dots internally. } \item{data}{ data frame to use. Default is the current frame. } \item{subset}{ logical expression or vector of subscripts defining a subset of observations to analyze } \item{na.action}{ function to handle \code{NA}s in the data. Default is \code{na.delete}, which deletes any observation having response or predictor missing, while preserving the attributes of the predictors and maintaining frequencies of deletions due to each variable in the model. This is usually specified using \code{options(na.action="na.delete")}. } \item{method}{ name of fitting function. Only allowable choice at present is \code{orm.fit}. } \item{model}{ causes the model frame to be returned in the fit object } \item{x}{ causes the expanded design matrix (with missings excluded) to be returned under the name \code{x}. For \code{print}, an object created by \code{orm}. } \item{y}{ causes the response variable (with missings excluded) to be returned under the name \code{y}. } \item{linear.predictors}{ causes the predicted X beta (with missings excluded) to be returned under the name \code{linear.predictors}. The first intercept is used. } \item{se.fit}{ causes the standard errors of the fitted values (on the linear predictor scale) to be returned under the name \code{se.fit}. The middle intercept is used. } \item{penalty}{see \code{\link{lrm}}} \item{penalty.matrix}{see \code{\link{lrm}}} \item{tol}{singularity criterion (see \code{orm.fit})} \item{eps}{difference in \eqn{-2 log} likelihood for declaring convergence} \item{var.penalty}{see \code{\link{lrm}}} \item{scale}{set to \code{TRUE} to subtract column means and divide by column standard deviations of the design matrix before fitting, and to back-solve for the un-normalized covariance matrix and regression coefficients. This can sometimes make the model converge for very large sample sizes where for example spline or polynomial component variables create scaling problems leading to loss of precision when accumulating sums of squares and crossproducts.} \item{\dots}{arguments that are passed to \code{orm.fit}, or from \code{print}, to \code{\link{prModFit}}. Ignored for \code{Quantile}. One of the most important arguments is \code{family}.} \item{digits}{number of significant digits to use} \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{intercepts}{By default, intercepts are only printed if there are fewer than 10 of them. Otherwise this is controlled by specifying \code{intercepts=FALSE} or \code{TRUE}.} \item{title}{a character string title to be passed to \code{prModFit}. Default is constructed from the name of the distribution family.} \item{object}{an object created by \code{orm}} \item{codes}{if \code{TRUE}, uses the integer codes \eqn{1,2,\ldots,k} for the \eqn{k}-level response in computing the predicted quantile} } \value{ The returned fit object of \code{orm} contains the following components in addition to the ones mentioned under the optional arguments. \item{call}{calling expression} \item{freq}{ table of frequencies for \code{Y} in order of increasing \code{Y}} \item{stats}{ vector with the following elements: number of observations used in the fit, number of unique \code{Y} values, median \code{Y} from among the observations used int he fit, maximum absolute value of first derivative of log likelihood, model likelihood ratio \eqn{\chi^2}{chi-square}, d.f., \eqn{P}-value, score \eqn{\chi^2} statistic (if no initial values given), \eqn{P}-value, Spearman's \eqn{\rho} rank correlation between the linear predictor and \code{Y}, the Nagelkerke \eqn{R^2} index, the \eqn{g}-index, \eqn{gr} (the \eqn{g}-index on the odds ratio scale), and \eqn{pdm} (the mean absolute difference between 0.5 and the predicted probability that \eqn{Y\geq} the marginal median). In the case of penalized estimation, the \code{"Model L.R."} is computed without the penalty factor, and \code{"d.f."} is the effective d.f. from Gray's (1992) Equation 2.9. The \eqn{P}-value uses this corrected model L.R. \eqn{\chi^2}{chi-square} and corrected d.f. The score chi-square statistic uses first derivatives which contain penalty components. } \item{fail}{ set to \code{TRUE} if convergence failed (and \code{maxiter>1}) or if a singular information matrix is encountered } \item{coefficients}{estimated parameters} \item{var}{ estimated variance-covariance matrix (inverse of information matrix) for the middle intercept and regression coefficients. See \code{\link{lrm}} for details if penalization is used. } \item{effective.df.diagonal}{see \code{\link{lrm}}} \item{family}{the character string for \code{family}. If \code{family} was a user-customized list, it must have had an element named \code{name}, which is taken as the return value for \code{family} here.} \item{trans}{a list of functions for the choice of \code{family}, with elements \code{cumprob} (the cumulative probability distribution function), \code{inverse} (inverse of \code{cumprob}), \code{deriv} (first derivative of \code{cumprob}), and \code{deriv2} (second derivative of \code{cumprob})} \item{deviance}{ -2 log likelihoods (counting penalty components) When an offset variable is present, three deviances are computed: for intercept(s) only, for intercepts+offset, and for intercepts+offset+predictors. When there is no offset variable, the vector contains deviances for the intercept(s)-only model and the model with intercept(s) and predictors. } \item{non.slopes}{number of intercepts in model} \item{interceptRef}{the index of the middle (median) intercept used in computing the linear predictor and \code{var}} \item{penalty}{see \code{\link{lrm}}} \item{penalty.matrix}{the penalty matrix actually used in the estimation} \item{info.matrix}{a sparse matrix representation of type \code{matrix.csr} from the \code{SparseM} package. This allows the full information matrix with all intercepts to be stored efficiently, and matrix operations using the Cholesky decomposition to be fast. \code{link{vcov.orm}} uses this information to compute the covariance matrix for intercepts other than the middle one.} } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com\cr For the \code{Quantile} function:\cr Qi Liu and Shengxin Tu\cr Department of Biostatistics, Vanderbilt University } \references{ Sall J: A monotone regression smoother based on ordinal cumulative logistic regression, 1991. Le Cessie S, Van Houwelingen JC: Ridge estimators in logistic regression. Applied Statistics 41:191--201, 1992. Verweij PJM, Van Houwelingen JC: Penalized likelihood in Cox regression. Stat in Med 13:2427--2436, 1994. Gray RJ: Flexible methods for analyzing survival data using splines, with applications to breast cancer prognosis. JASA 87:942--951, 1992. Shao J: Linear model selection by cross-validation. JASA 88:486--494, 1993. Verweij PJM, Van Houwelingen JC: Crossvalidation in survival analysis. Stat in Med 12:2305--2314, 1993. Harrell FE: Model uncertainty, penalization, and parsimony. Available from \url{http://hbiostat.org/talks/iscb98.pdf}. } \seealso{ \code{\link{orm.fit}}, \code{\link{predict.orm}}, \code{\link[SparseM:SparseM.solve]{solve}}, \code{\link{rms.trans}}, \code{\link{rms}}, \code{\link[MASS]{polr}}, \code{\link{latex.orm}}, \code{\link{vcov.orm}}, \code{\link[Hmisc]{num.intercepts}}, \code{\link{residuals.orm}}, \code{\link[Hmisc]{na.delete}}, \code{\link[Hmisc]{na.detail.response}}, \code{\link{pentrace}}, \code{\link{rmsMisc}}, \code{\link{vif}}, \code{\link{predab.resample}}, \code{\link{validate.orm}}, \code{\link{calibrate}}, \code{\link{Mean.orm}}, \code{\link{gIndex}}, \code{\link{prModFit}} } \examples{ set.seed(1) n <- 100 y <- round(runif(n), 2) x1 <- sample(c(-1,0,1), n, TRUE) x2 <- sample(c(-1,0,1), n, TRUE) f <- lrm(y ~ x1 + x2, eps=1e-5) g <- orm(y ~ x1 + x2, eps=1e-5) max(abs(coef(g) - coef(f))) w <- vcov(g, intercepts='all') / vcov(f) - 1 max(abs(w)) set.seed(1) n <- 300 x1 <- c(rep(0,150), rep(1,150)) y <- rnorm(n) + 3*x1 g <- orm(y ~ x1) g k <- coef(g) i <- num.intercepts(g) h <- orm(y ~ x1, family=probit) ll <- orm(y ~ x1, family=loglog) cll <- orm(y ~ x1, family=cloglog) cau <- orm(y ~ x1, family=cauchit) x <- 1:i z <- list(logistic=list(x=x, y=coef(g)[1:i]), probit =list(x=x, y=coef(h)[1:i]), loglog =list(x=x, y=coef(ll)[1:i]), cloglog =list(x=x, y=coef(cll)[1:i])) labcurve(z, pl=TRUE, col=1:4, ylab='Intercept') tapply(y, x1, mean) m <- Mean(g) m(w <- k[1] + k['x1']*c(0,1)) mh <- Mean(h) wh <- coef(h)[1] + coef(h)['x1']*c(0,1) mh(wh) qu <- Quantile(g) # Compare model estimated and empirical quantiles cq <- function(y) { cat(qu(.1, w), tapply(y, x1, quantile, probs=.1), '\n') cat(qu(.5, w), tapply(y, x1, quantile, probs=.5), '\n') cat(qu(.9, w), tapply(y, x1, quantile, probs=.9), '\n') } cq(y) # Try on log-normal model g <- orm(exp(y) ~ x1) g k <- coef(g) plot(k[1:i]) m <- Mean(g) m(w <- k[1] + k['x1']*c(0,1)) tapply(exp(y), x1, mean) qu <- Quantile(g) cq(exp(y)) # Compare predicted mean with ols for a continuous x set.seed(3) n <- 200 x1 <- rnorm(n) y <- x1 + rnorm(n) dd <- datadist(x1); options(datadist='dd') f <- ols(y ~ x1) g <- orm(y ~ x1, family=probit) h <- orm(y ~ x1, family=logistic) w <- orm(y ~ x1, family=cloglog) mg <- Mean(g); mh <- Mean(h); mw <- Mean(w) r <- rbind(ols = Predict(f, conf.int=FALSE), probit = Predict(g, conf.int=FALSE, fun=mg), logistic = Predict(h, conf.int=FALSE, fun=mh), cloglog = Predict(w, conf.int=FALSE, fun=mw)) plot(r, groups='.set.') # Compare predicted 0.8 quantile with quantile regression qu <- Quantile(g) qu80 <- function(lp) qu(.8, lp) f <- Rq(y ~ x1, tau=.8) r <- rbind(probit = Predict(g, conf.int=FALSE, fun=qu80), quantreg = Predict(f, conf.int=FALSE)) plot(r, groups='.set.') # Verify transformation invariance of ordinal regression ga <- orm(exp(y) ~ x1, family=probit) qua <- Quantile(ga) qua80 <- function(lp) log(qua(.8, lp)) r <- rbind(logprobit = Predict(ga, conf.int=FALSE, fun=qua80), probit = Predict(g, conf.int=FALSE, fun=qu80)) plot(r, groups='.set.') # Try the same with quantile regression. Need to transform x1 fa <- Rq(exp(y) ~ rcs(x1,5), tau=.8) r <- rbind(qr = Predict(f, conf.int=FALSE), logqr = Predict(fa, conf.int=FALSE, fun=log)) plot(r, groups='.set.') options(datadist=NULL) \dontrun{ ## Simulate power and type I error for orm logistic and probit regression ## for likelihood ratio, Wald, and score chi-square tests, and compare ## with t-test require(rms) set.seed(5) nsim <- 2000 r <- NULL for(beta in c(0, .4)) { for(n in c(10, 50, 300)) { cat('beta=', beta, ' n=', n, '\n\n') plogistic <- pprobit <- plogistics <- pprobits <- plogisticw <- pprobitw <- ptt <- numeric(nsim) x <- c(rep(0, n/2), rep(1, n/2)) pb <- setPb(nsim, every=25, label=paste('beta=', beta, ' n=', n)) for(j in 1:nsim) { pb(j) y <- beta*x + rnorm(n) tt <- t.test(y ~ x) ptt[j] <- tt$p.value f <- orm(y ~ x) plogistic[j] <- f$stats['P'] plogistics[j] <- f$stats['Score P'] plogisticw[j] <- 1 - pchisq(coef(f)['x']^2 / vcov(f)[2,2], 1) f <- orm(y ~ x, family=probit) pprobit[j] <- f$stats['P'] pprobits[j] <- f$stats['Score P'] pprobitw[j] <- 1 - pchisq(coef(f)['x']^2 / vcov(f)[2,2], 1) } if(beta == 0) plot(ecdf(plogistic)) r <- rbind(r, data.frame(beta = beta, n=n, ttest = mean(ptt < 0.05), logisticlr = mean(plogistic < 0.05), logisticscore= mean(plogistics < 0.05), logisticwald = mean(plogisticw < 0.05), probit = mean(pprobit < 0.05), probitscore = mean(pprobits < 0.05), probitwald = mean(pprobitw < 0.05))) } } print(r) # beta n ttest logisticlr logisticscore logisticwald probit probitscore probitwald #1 0.0 10 0.0435 0.1060 0.0655 0.043 0.0920 0.0920 0.0820 #2 0.0 50 0.0515 0.0635 0.0615 0.060 0.0620 0.0620 0.0620 #3 0.0 300 0.0595 0.0595 0.0590 0.059 0.0605 0.0605 0.0605 #4 0.4 10 0.0755 0.1595 0.1070 0.074 0.1430 0.1430 0.1285 #5 0.4 50 0.2950 0.2960 0.2935 0.288 0.3120 0.3120 0.3120 #6 0.4 300 0.9240 0.9215 0.9205 0.920 0.9230 0.9230 0.9230 } } \keyword{category} \keyword{models} \concept{logistic regression model} \concept{ordinal logistic model} \concept{proportional odds model} \concept{ordinal response} rms/man/Gls.Rd0000644000176200001440000001616113714237251012654 0ustar liggesusers\name{Gls} \alias{Gls} \alias{print.Gls} \title{Fit Linear Model Using Generalized Least Squares} \description{ This function fits a linear model using generalized least squares. The errors are allowed to be correlated and/or have unequal variances. \code{Gls} is a slightly enhanced version of the Pinheiro and Bates \code{gls} function in the \code{nlme} package to make it easy to use with the rms package and to implement cluster bootstrapping (primarily for nonparametric estimates of the variance-covariance matrix of the parameter estimates and for nonparametric confidence limits of correlation parameters). For the \code{print} method, format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. } \usage{ Gls(model, data, correlation, weights, subset, method, na.action=na.omit, control, verbose, B=0, dupCluster=FALSE, pr=FALSE, x=FALSE) \method{print}{Gls}(x, digits=4, coefs=TRUE, title, \dots) } \arguments{ \item{model}{a two-sided linear formula object describing the model, with the response on the left of a \code{~} operator and the terms, separated by \code{+} operators, on the right.} \item{data}{an optional data frame containing the variables named in \code{model}, \code{correlation}, \code{weights}, and \code{subset}. By default the variables are taken from the environment from which \code{gls} is called.} \item{correlation}{an optional \code{corStruct} object describing the within-group correlation structure. See the documentation of \code{corClasses} for a description of the available \code{corStruct} classes. If a grouping variable is to be used, it must be specified in the \code{form} argument to the \code{corStruct} constructor. Defaults to \code{NULL}, corresponding to uncorrelated errors.} \item{weights}{an optional \code{varFunc} object or one-sided formula describing the within-group heteroscedasticity structure. If given as a formula, it is used as the argument to \code{varFixed}, corresponding to fixed variance weights. See the documentation on \code{varClasses} for a description of the available \code{varFunc} classes. Defaults to \code{NULL}, corresponding to homoscesdatic errors.} \item{subset}{an optional expression indicating which subset of the rows of \code{data} should be used in the fit. This can be a logical vector, or a numeric vector indicating which observation numbers are to be included, or a character vector of the row names to be included. All observations are included by default.} \item{method}{a character string. If \code{"REML"} the model is fit by maximizing the restricted log-likelihood. If \code{"ML"} the log-likelihood is maximized. Defaults to \code{"REML"}.} \item{na.action}{a function that indicates what should happen when the data contain \code{NA}s. The default action (\code{na.omit}) results in deletion of observations having any of the variables of interest missing.} \item{control}{a list of control values for the estimation algorithm to replace the default values returned by the function \code{glsControl}. Defaults to an empty list.} \item{verbose}{an optional logical value. If \code{TRUE} information on the evolution of the iterative algorithm is printed. Default is \code{FALSE}.} \item{B}{number of bootstrap resamples to fit and store, default is none} \item{dupCluster}{set to \code{TRUE} to have \code{Gls} when bootstrapping to consider multiply-sampled clusters as if they were one large cluster when fitting using the \code{gls} algorithm} \item{pr}{set to \code{TRUE} to show progress of bootstrap resampling} \item{x}{for \code{Gls} set to \code{TRUE} to store the design matrix in the fit object; otherwise the result of \code{Gls}} \item{digits}{number of significant digits to print} \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{title}{a character string title to be passed to \code{prModFit}} \item{\dots}{ignored} } \value{ an object of classes \code{Gls}, \code{rms}, and \code{gls} representing the linear model fit. Generic functions such as \code{print}, \code{plot}, \code{ggplot}, and \code{summary} have methods to show the results of the fit. See \code{glsObject} for the components of the fit. The functions \code{resid}, \code{coef}, and \code{fitted} can be used to extract some of its components. \code{Gls} returns the following components not returned by \code{gls}: \code{Design}, \code{assign}, \code{formula} (see arguments), \code{B} (see arguments), \code{bootCoef} (matrix of \code{B} bootstrapped coefficients), \code{boot.Corr} (vector of bootstrapped correlation parameters), \code{Nboot} (vector of total sample size used in each bootstrap (may vary if have unbalanced clusters), and \code{var} (sample variance-covariance matrix of bootstrapped coefficients). The \eqn{g}-index is also stored in the returned object under the name \code{"g"}. } \references{ Pinheiro J, Bates D (2000): Mixed effects models in S and S-Plus. New York: Springer-Verlag. } \details{ The \code{\link[Hmisc]{na.delete}} function will not work with \code{Gls} due to some nuance in the \code{model.frame.default} function. This probably relates to \code{na.delete} storing extra information in the \code{"na.action"} attribute of the returned data frame. } \author{Jose Pinheiro, Douglas Bates \email{bates@stat.wisc.edu}, Saikat DebRoy, Deepayan Sarkar, R-core \email{R-core@R-project.org}, Frank Harrell \email{fh@fharrell.com}, Patrick Aboyoun } \seealso{ \code{\link[nlme]{gls}} \code{\link[nlme]{glsControl}}, \code{\link[nlme]{glsObject}}, \code{\link{varFunc}}, \code{\link[nlme]{corClasses}}, \code{\link[nlme]{varClasses}}, \code{\link{GiniMd}}, \code{\link{prModFit}}, \code{\link{logLik.Gls}} } \examples{ \dontrun{ ns <- 20 # no. subjects nt <- 10 # no. time points/subject B <- 10 # no. bootstrap resamples # usually do 100 for variances, 1000 for nonparametric CLs rho <- .5 # AR(1) correlation parameter V <- matrix(0, nrow=nt, ncol=nt) V <- rho^abs(row(V)-col(V)) # per-subject correlation/covariance matrix d <- expand.grid(tim=1:nt, id=1:ns) d$trt <- factor(ifelse(d$id <= ns/2, 'a', 'b')) true.beta <- c(Intercept=0,tim=.1,'tim^2'=0,'trt=b'=1) d$ey <- true.beta['Intercept'] + true.beta['tim']*d$tim + true.beta['tim^2']*(d$tim^2) + true.beta['trt=b']*(d$trt=='b') set.seed(13) library(MASS) # needed for mvrnorm d$y <- d$ey + as.vector(t(mvrnorm(n=ns, mu=rep(0,nt), Sigma=V))) dd <- datadist(d); options(datadist='dd') f <- Gls(y ~ pol(tim,2) + trt, correlation=corCAR1(form= ~tim | id), data=d, B=B) f AIC(f) f$var # bootstrap variances f$varBeta # original variances summary(f) anova(f) ggplot(Predict(f, tim, trt)) # v <- Variogram(f, form=~tim|id, data=d) nlme:::summary.gls(f)$tTable # print matrix of estimates etc. options(datadist=NULL) } } \keyword{models} rms/man/residuals.lrm.Rd0000644000176200001440000003364313714237251014717 0ustar liggesusers\name{residuals.lrm} \alias{residuals.lrm} \alias{residuals.orm} \alias{plot.lrm.partial} \title{Residuals from an \code{lrm} or \code{orm} Fit} \description{ For a binary logistic model fit, computes the following residuals, letting \eqn{P} denote the predicted probability of the higher category of \eqn{Y}, \eqn{X} denote the design matrix (with a column of 1s for the intercept), and \eqn{L} denote the logit or linear predictors: ordinary or Li-Shepherd (\eqn{Y-P}), score (\eqn{X (Y-P)}), pearson (\eqn{(Y-P)/\sqrt{P(1-P)}}), deviance (for \eqn{Y=0} is \eqn{-\sqrt{2|\log(1-P)|}}, for \eqn{Y=1} is \eqn{\sqrt{2|\log(P)|}}, pseudo dependent variable used in influence statistics (\eqn{L + (Y-P)/(P(1-P))}), and partial (\eqn{X_{i}\beta_{i} + (Y-P)/(P(1-P))}). Will compute all these residuals for an ordinal logistic model, using as temporary binary responses dichotomizations of \eqn{Y}, along with the corresponding \eqn{P}, the probability that \eqn{Y \geq} cutoff. For \code{type="partial"}, all possible dichotomizations are used, and for \code{type="score"}, the actual components of the first derivative of the log likelihood are used for an ordinal model. For \code{type="li.shepherd"} the residual is \eqn{Pr(W < Y) - Pr(W > Y)} where Y is the observed response and W is a random variable from the fitted distribution. Alternatively, specify \code{type="score.binary"} to use binary model score residuals but for all cutpoints of \eqn{Y} (plotted only, not returned). The \code{score.binary}, \code{partial}, and perhaps \code{score} residuals are useful for checking the proportional odds assumption. If the option \code{pl=TRUE} is used to plot the \code{score} or \code{score.binary} residuals, a score residual plot is made for each column of the design (predictor) matrix, with \code{Y} cutoffs on the x-axis and the mean +- 1.96 standard errors of the score residuals on the y-axis. You can instead use a box plot to display these residuals, for both \code{score.binary} and \code{score}. Proportional odds dictates a horizontal \code{score.binary} plot. Partial residual plots use smooth nonparametric estimates, separately for each cutoff of \eqn{Y}. One examines that plot for parallelism of the curves to check the proportional odds assumption, as well as to see if the predictor behaves linearly. Also computes a variety of influence statistics and the le Cessie - van Houwelingen - Copas - Hosmer unweighted sum of squares test for global goodness of fit, done separately for each cutoff of \eqn{Y} in the case of an ordinal model. The \code{plot.lrm.partial} function computes partial residuals for a series of binary logistic model fits that all used the same predictors and that specified \code{x=TRUE, y=TRUE}. It then computes smoothed partial residual relationships (using \code{lowess} with \code{iter=0}) and plots them separately for each predictor, with residual plots from all model fits shown on the same plot for that predictor. } \usage{ \method{residuals}{lrm}(object, type=c("li.shepherd","ordinary", "score", "score.binary", "pearson", "deviance", "pseudo.dep", "partial", "dfbeta", "dfbetas", "dffit", "dffits", "hat", "gof", "lp1"), pl=FALSE, xlim, ylim, kint, label.curves=TRUE, which, \dots) \method{residuals}{orm}(object, type=c("li.shepherd","ordinary", "score", "score.binary", "pearson", "deviance", "pseudo.dep", "partial", "dfbeta", "dfbetas", "dffit", "dffits", "hat", "gof", "lp1"), pl=FALSE, xlim, ylim, kint, label.curves=TRUE, which, \dots) \method{plot}{lrm.partial}(\dots, labels, center=FALSE, ylim) } \arguments{ \item{object}{object created by \code{lrm} or \code{orm}} \item{\dots}{ for \code{residuals}, applies to \code{type="partial"} when \code{pl} is not \code{FALSE}. These are extra arguments passed to the smoothing function. Can also be used to pass extra arguments to \code{boxplot} for \code{type="score"} or \code{"score.binary"}. For \code{plot.lrm.partial} this specifies a series of binary model fit objects. } \item{type}{ type of residual desired. Use \code{type="lp1"} to get approximate leave-out-1 linear predictors, derived by subtracting the \code{dffit} from the original linear predictor values. } \item{pl}{ applies only to \code{type="partial"}, \code{"score"}, and \code{"score.binary"}. For score residuals in an ordinal model, set \code{pl=TRUE} to get means and approximate 0.95 confidence bars vs. \eqn{Y}, separately for each \eqn{X}. Alternatively, specify \code{pl="boxplot"} to use \code{boxplot} to draw the plot, with notches and with width proportional to the square root of the cell sizes. For partial residuals, set \code{pl=TRUE} (which uses \code{lowess}) or \code{pl="supsmu"} to get smoothed partial residual plots for all columns of \eqn{X} using \code{supsmu}. Use \code{pl="loess"} to use \code{loess} and get confidence bands (\code{"loess"} is not implemented for ordinal responses). Under R, \code{pl="loess"} uses \code{lowess} and does not provide confidence bands. If there is more than one \eqn{X}, you should probably use \code{par(mfrow=c( , ))} before calling \code{resid}. Note that \code{pl="loess"} results in \code{plot.loess} being called, which requires a large memory allocation. } \item{xlim}{ plotting range for x-axis (default = whole range of predictor) } \item{ylim}{ plotting range for y-axis (default = whole range of residuals, range of all confidence intervals for \code{score} or \code{score.binary} or range of all smoothed curves for \code{partial} if \code{pl=TRUE}, or 0.1 and 0.9 quantiles of the residuals for \code{pl="boxplot"}.) } \item{kint}{ for an ordinal model for residuals other than \code{li.shepherd}, \code{partial}, \code{score}, or \code{score.binary}, specifies the intercept (and the cutoff of \eqn{Y}) to use for the calculations. Specifying \code{kint=2}, for example, means to use \eqn{Y \geq} 3rd level. } \item{label.curves}{ set to \code{FALSE} to suppress curve labels when \code{type="partial"}. The default, \code{TRUE}, causes \code{labcurve} to be invoked to label curves where they are most separated. \code{label.curves} can be a list containing the \code{opts} parameter for \code{labcurve}, to send options to \code{labcurve}, such as \code{tilt}. The default for \code{tilt} here is \code{TRUE}. } \item{which}{ a vector of integers specifying column numbers of the design matrix for which to compute or plot residuals, for \code{type="partial","score","score.binary"}. } \item{labels}{ for \code{plot.lrm.partial} this specifies a vector of character strings providing labels for the list of binary fits. By default, the names of the fit objects are used as labels. The \code{labcurve} function is used to label the curve with the \code{labels}. } \item{center}{ for \code{plot.lrm.partial} this causes partial residuals for every model to have a mean of zero before smoothing and plotting }} \value{ a matrix (\code{type="partial","dfbeta","dfbetas","score"}), test statistic (\code{type="gof"}), or a vector otherwise. For partial residuals from an ordinal model, the returned object is a 3-way array (rows of \eqn{X} by columns of \eqn{X} by cutoffs of \eqn{Y}), and NAs deleted during the fit are not re-inserted into the residuals. For \code{score.binary}, nothing is returned. } \details{ For the goodness-of-fit test, the le Cessie-van Houwelingen normal test statistic for the unweighted sum of squared errors (Brier score times \eqn{n}) is used. For an ordinal response variable, the test for predicting the probability that \eqn{Y\geq j} is done separately for all \eqn{j} (except the first). Note that the test statistic can have strange behavior (i.e., it is far too large) if the model has no predictive value. For most of the values of \code{type}, you must have specified \code{x=TRUE, y=TRUE} to \code{lrm} or \code{orm}. There is yet no literature on interpreting score residual plots for the ordinal model. Simulations when proportional odds is satisfied have still shown a U-shaped residual plot. The series of binary model score residuals for all cutoffs of \eqn{Y} seems to better check the assumptions. See the examples. The li.shepherd residual is a single value per observation on the probability scale and can be useful for examining linearity, checking for outliers, and measuring residual correlation. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \references{ Landwehr, Pregibon, Shoemaker. JASA 79:61--83, 1984. le Cessie S, van Houwelingen JC. Biometrics 47:1267--1282, 1991. Hosmer DW, Hosmer T, Lemeshow S, le Cessie S, Lemeshow S. A comparison of goodness-of-fit tests for the logistic regression model. Stat in Med 16:965--980, 1997. Copas JB. Applied Statistics 38:71--80, 1989. Li C, Shepherd BE. Biometrika 99:473-480, 2012. } \seealso{ \code{\link{lrm}}, \code{\link{orm}}, \code{\link{naresid}}, \code{\link{which.influence}}, \code{\link{loess}}, \code{\link{supsmu}}, \code{\link{lowess}}, \code{\link{boxplot}}, \code{\link[Hmisc]{labcurve}} } \examples{ set.seed(1) x1 <- runif(200, -1, 1) x2 <- runif(200, -1, 1) L <- x1^2 - .5 + x2 y <- ifelse(runif(200) <= plogis(L), 1, 0) f <- lrm(y ~ x1 + x2, x=TRUE, y=TRUE) resid(f) #add rows for NAs back to data resid(f, "score") #also adds back rows r <- resid(f, "partial") #for checking transformations of X's par(mfrow=c(1,2)) for(i in 1:2) { xx <- if(i==1)x1 else x2 plot(xx, r[,i], xlab=c('x1','x2')[i]) lines(lowess(xx,r[,i])) } resid(f, "partial", pl="loess") #same as last 3 lines resid(f, "partial", pl=TRUE) #plots for all columns of X using supsmu resid(f, "gof") #global test of goodness of fit lp1 <- resid(f, "lp1") #approx. leave-out-1 linear predictors -2*sum(y*lp1 + log(1-plogis(lp1))) #approx leave-out-1 deviance #formula assumes y is binary # Simulate data from a population proportional odds model set.seed(1) n <- 400 age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) L <- .05*(age-50) + .03*(blood.pressure-120) p12 <- plogis(L) # Pr(Y>=1) p2 <- plogis(L-1) # Pr(Y=2) p <- cbind(1-p12, p12-p2, p2) # individual class probabilites # Cumulative probabilities: cp <- matrix(cumsum(t(p)) - rep(0:(n-1), rep(3,n)), byrow=TRUE, ncol=3) # simulate multinomial with varying probs: y <- (cp < runif(n)) \%*\% rep(1,3) y <- as.vector(y) # Thanks to Dave Krantz for this trick f <- lrm(y ~ age + blood.pressure, x=TRUE, y=TRUE) par(mfrow=c(2,2)) resid(f, 'score.binary', pl=TRUE) #plot score residuals resid(f, 'partial', pl=TRUE) #plot partial residuals resid(f, 'gof') #test GOF for each level separately # Show use of Li-Shepherd residuals f.wrong <- lrm(y ~ blood.pressure, x=TRUE, y=TRUE) par(mfrow=c(2,1)) # li.shepherd residuals from model without age plot(age, resid(f.wrong, type="li.shepherd"), ylab="li.shepherd residual") lines(lowess(age, resid(f.wrong, type="li.shepherd"))) # li.shepherd residuals from model including age plot(age, resid(f, type="li.shepherd"), ylab="li.shepherd residual") lines(lowess(age, resid(f, type="li.shepherd"))) # Make a series of binary fits and draw 2 partial residual plots # f1 <- lrm(y>=1 ~ age + blood.pressure, x=TRUE, y=TRUE) f2 <- update(f1, y==2 ~.) par(mfrow=c(2,1)) plot.lrm.partial(f1, f2) # Simulate data from both a proportional odds and a non-proportional # odds population model. Check how 3 kinds of residuals detect # non-prop. odds set.seed(71) n <- 400 x <- rnorm(n) par(mfrow=c(2,3)) for(j in 1:2) { # 1: prop.odds 2: non-prop. odds if(j==1) L <- matrix(c(1.4,.4,-.1,-.5,-.9), nrow=n, ncol=5, byrow=TRUE) + x / 2 else { # Slopes and intercepts for cutoffs of 1:5 : slopes <- c(.7,.5,.3,.3,0) ints <- c(2.5,1.2,0,-1.2,-2.5) L <- matrix(ints, nrow=n, ncol=5, byrow=TRUE) + matrix(slopes, nrow=n, ncol=5, byrow=TRUE) * x } p <- plogis(L) # Cell probabilities p <- cbind(1-p[,1],p[,1]-p[,2],p[,2]-p[,3],p[,3]-p[,4],p[,4]-p[,5],p[,5]) # Cumulative probabilities from left to right cp <- matrix(cumsum(t(p)) - rep(0:(n-1), rep(6,n)), byrow=TRUE, ncol=6) y <- (cp < runif(n)) \%*\% rep(1,6) f <- lrm(y ~ x, x=TRUE, y=TRUE) for(cutoff in 1:5) print(lrm(y >= cutoff ~ x)$coef) print(resid(f,'gof')) resid(f, 'score', pl=TRUE) # Note that full ordinal model score residuals exhibit a # U-shaped pattern even under prop. odds ti <- if(j==2) 'Non-Proportional Odds\nSlopes=.7 .5 .3 .3 0' else 'True Proportional Odds\nOrdinal Model Score Residuals' title(ti) resid(f, 'score.binary', pl=TRUE) if(j==1) ti <- 'True Proportional Odds\nBinary Score Residuals' title(ti) resid(f, 'partial', pl=TRUE) if(j==1) ti <- 'True Proportional Odds\nPartial Residuals' title(ti) } par(mfrow=c(1,1)) # Shepherd-Li residuals from orm. Thanks: Qi Liu set.seed(3) n <- 100 x1 <- rnorm(n) y <- x1 + rnorm(n) g <- orm(y ~ x1, family=probit, x=TRUE, y=TRUE) g.resid <- resid(g) plot(x1, g.resid, cex=0.4); lines(lowess(x1, g.resid)); abline(h=0, col=2,lty=2) set.seed(3) n <- 100 x1 <- rnorm(n) y <- x1 + x1^2 +rnorm(n) # model misspecification, the square term is left out in the model g <- orm(y ~ x1, family=probit, x=TRUE, y=TRUE) g.resid <- resid(g) plot(x1, g.resid, cex=0.4); lines(lowess(x1, g.resid)); abline(h=0, col=2,lty=2) \dontrun{ # Get data used in Hosmer et al. paper and reproduce their calculations v <- Cs(id, low, age, lwt, race, smoke, ptl, ht, ui, ftv, bwt) d <- read.table("http://www.umass.edu/statdata/statdata/data/lowbwt.dat", skip=6, col.names=v) d <- upData(d, race=factor(race,1:3,c('white','black','other'))) f <- lrm(low ~ age + lwt + race + smoke, data=d, x=TRUE,y=TRUE) f resid(f, 'gof') # Their Table 7 Line 2 found sum of squared errors=36.91, expected # value under H0=36.45, variance=.065, P=.071 # We got 36.90, 36.45, SD=.26055 (var=.068), P=.085 # Note that two logistic regression coefficients differed a bit # from their Table 1 } } \keyword{models} \keyword{regression} \concept{logistic regression model} \concept{model validation} rms/man/bootBCa.Rd0000644000176200001440000000347412134520511013430 0ustar liggesusers\name{bootBCa} \alias{bootBCa} \title{BCa Bootstrap on Existing Bootstrap Replicates} \description{ This functions constructs an object resembling one produced by the \code{boot} package's \code{boot} function, and runs that package's \code{boot.ci} function to compute BCa and percentile confidence limits. \code{bootBCa} can provide separate confidence limits for a vector of statistics when \code{estimate} has length greater than 1. In that case, \code{estimates} must have the same number of columns as \code{estimate} has values. } \usage{bootBCa(estimate, estimates, type=c('percentile','bca','basic'), n, seed, conf.int = 0.95)} \arguments{ \item{estimate}{original whole-sample estimate} \item{estimates}{vector of bootstrap estimates} \item{type}{type of confidence interval, defaulting to nonparametric percentile} \item{n}{original number of observations} \item{seed}{\code{.Random.seem} in effect before bootstrap estimates were run} \item{conf.int}{confidence level} } \value{a 2-vector if \code{estimate} is of length 1, otherwise a matrix with 2 rows and number of columns equal to the length of \code{estimate}} \author{Frank Harrell} \note{ You can use \code{if(!exists('.Random.seed')) runif(1)} before running your bootstrap to make sure that \code{.Random.seed} will be available to \code{bootBCa}. } \seealso{\code{\link[boot]{boot.ci}}} \examples{ \dontrun{ x1 <- runif(100); x2 <- runif(100); y <- sample(0:1, 100, TRUE) f <- lrm(y ~ x1 + x2, x=TRUE, y=TRUE) seed <- .Random.seed b <- bootcov(f) # Get estimated log odds at x1=.4, x2=.6 X <- cbind(c(1,1), x1=c(.4,2), x2=c(.6,3)) est <- X %*% coef(b) ests <- t(X %*% t(b$boot.Coef)) bootBCa(est, ests, n=100, seed=seed) bootBCa(est, ests, type='bca', n=100, seed=seed) bootBCa(est, ests, type='basic', n=100, seed=seed) }} \keyword{bootstrap} rms/man/val.prob.Rd0000644000176200001440000003461213714237251013653 0ustar liggesusers\name{val.prob} \alias{val.prob} \alias{print.val.prob} \alias{plot.val.prob} \title{ Validate Predicted Probabilities } \description{ The \code{val.prob} function is useful for validating predicted probabilities against binary events. Given a set of predicted probabilities \code{p} or predicted log odds \code{logit}, and a vector of binary outcomes \code{y} that were not used in developing the predictions \code{p} or \code{logit}, \code{val.prob} computes the following indexes and statistics: Somers' \eqn{D_{xy}} rank correlation between \code{p} and \code{y} [\eqn{2(C-.5)}, \eqn{C}=ROC area], Nagelkerke-Cox-Snell-Maddala-Magee R-squared index, Discrimination index \code{D} [ (Logistic model L.R. \eqn{\chi^2}{chi-square} - 1)/n], L.R. \eqn{\chi^2}{chi-square}, its \eqn{P}-value, Unreliability index \eqn{U}, \eqn{\chi^2}{chi-square} with 2 d.f. for testing unreliability (H0: intercept=0, slope=1), its \eqn{P}-value, the quality index \eqn{Q}, \code{Brier} score (average squared difference in \code{p} and \code{y}), \code{Intercept}, and \code{Slope}, \eqn{E_{max}}=maximum absolute difference in predicted and loess-calibrated probabilities, \code{Eavg}, the average in same, \code{E90}, the 0.9 quantile of same, the Spiegelhalter \eqn{Z}-test for calibration accuracy, and its two-tailed \eqn{P}-value. If \code{pl=TRUE}, plots fitted logistic calibration curve and optionally a smooth nonparametric fit using \code{lowess(p,y,iter=0)} and grouped proportions vs. mean predicted probability in group. If the predicted probabilities or logits are constant, the statistics are returned and no plot is made. \code{Eavg, Emax, E90} were from linear logistic calibration before rms 4.5-1. When \code{group} is present, different statistics are computed, different graphs are made, and the object returned by \code{val.prob} is different. \code{group} specifies a stratification variable. Validations are done separately by levels of group and overall. A \code{print} method prints summary statistics and several quantiles of predicted probabilities, and a \code{plot} method plots calibration curves with summary statistics superimposed, along with selected quantiles of the predicted probabilities (shown as tick marks on calibration curves). Only the \code{lowess} calibration curve is estimated. The statistics computed are the average predicted probability, the observed proportion of events, a 1 d.f. chi-square statistic for testing for overall mis-calibration (i.e., a test of the observed vs. the overall average predicted probability of the event) (\code{ChiSq}), and a 2 d.f. chi-square statistic for testing simultaneously that the intercept of a linear logistic calibration curve is zero and the slope is one (\code{ChiSq2}), average absolute calibration error (average absolute difference between the \code{lowess}-estimated calibration curve and the line of identity, labeled \code{Eavg}), \code{Eavg} divided by the difference between the 0.95 and 0.05 quantiles of predictive probabilities (\code{Eavg/P90}), a "median odds ratio", i.e., the anti-log of the median absolute difference between predicted and calibrated predicted log odds of the event (\code{Med OR}), the C-index (ROC area), the Brier quadratic error score (\code{B}), a chi-square test of goodness of fit based on the Brier score (\code{B ChiSq}), and the Brier score computed on calibrated rather than raw predicted probabilities (\code{B cal}). The first chi-square test is a test of overall calibration accuracy ("calibration in the large"), and the second will also detect errors such as slope shrinkage caused by overfitting or regression to the mean. See Cox (1970) for both of these score tests. The goodness of fit test based on the (uncalibrated) Brier score is due to Hilden, Habbema, and Bjerregaard (1978) and is discussed in Spiegelhalter (1986). When \code{group} is present you can also specify sampling \code{weights} (usually frequencies), to obtained weighted calibration curves. To get the behavior that results from a grouping variable being present without having a grouping variable, use \code{group=TRUE}. In the \code{plot} method, calibration curves are drawn and labeled by default where they are maximally separated using the \code{labcurve} function. The following parameters do not apply when \code{group} is present: \code{pl}, \code{smooth}, \code{logistic.cal}, \code{m}, \code{g}, \code{cuts}, \code{emax.lim}, \code{legendloc}, \code{riskdist}, \code{mkh}, \code{connect.group}, \code{connect.smooth}. The following parameters apply to the \code{plot} method but not to \code{val.prob}: \code{xlab}, \code{ylab}, \code{lim}, \code{statloc}, \code{cex}. } \usage{ val.prob(p, y, logit, group, weights=rep(1,length(y)), normwt=FALSE, pl=TRUE, smooth=TRUE, logistic.cal=TRUE, xlab="Predicted Probability", ylab="Actual Probability", lim=c(0, 1), m, g, cuts, emax.lim=c(0,1), legendloc=lim[1] + c(0.55 * diff(lim), 0.27 * diff(lim)), statloc=c(0,0.99), riskdist=c("predicted", "calibrated"), cex=.7, mkh=.02, connect.group=FALSE, connect.smooth=TRUE, g.group=4, evaluate=100, nmin=0) \method{print}{val.prob}(x, \dots) \method{plot}{val.prob}(x, xlab="Predicted Probability", ylab="Actual Probability", lim=c(0,1), statloc=lim, stats=1:12, cex=.5, lwd.overall=4, quantiles=c(.05,.95), flag, \dots) } \arguments{ \item{p}{ predicted probability } \item{y}{ vector of binary outcomes } \item{logit}{ predicted log odds of outcome. Specify either \code{p} or \code{logit}. } \item{group}{ a grouping variable. If numeric this variable is grouped into \code{g.group} quantile groups (default is quartiles). Set \code{group=TRUE} to use the \code{group} algorithm but with a single stratum for \code{val.prob}. } \item{weights}{ an optional numeric vector of per-observation weights (usually frequencies), used only if \code{group} is given. } \item{normwt}{ set to \code{TRUE} to make \code{weights} sum to the number of non-missing observations. } \item{pl}{ TRUE to plot calibration curves and optionally statistics } \item{smooth}{ plot smooth fit to \code{(p,y)} using \code{lowess(p,y,iter=0)} } \item{logistic.cal}{ plot linear logistic calibration fit to \code{(p,y)} } \item{xlab}{ x-axis label, default is \code{"Predicted Probability"} for \code{val.prob}. } \item{ylab}{ y-axis label, default is \code{"Actual Probability"} for \code{val.prob}. } \item{lim}{ limits for both x and y axes } \item{m}{ If grouped proportions are desired, average no. observations per group } \item{g}{ If grouped proportions are desired, number of quantile groups } \item{cuts}{ If grouped proportions are desired, actual cut points for constructing intervals, e.g. \code{c(0,.1,.8,.9,1)} or \code{seq(0,1,by=.2)} } \item{emax.lim}{ Vector containing lowest and highest predicted probability over which to compute \code{Emax}. } \item{legendloc}{ If \code{pl=TRUE}, list with components \code{x,y} or vector \code{c(x,y)} for upper left corner of legend for curves and points. Default is \code{c(.55, .27)} scaled to \code{lim}. Use \code{locator(1)} to use the mouse, \code{FALSE} to suppress legend. } \item{statloc}{ \eqn{D_{xy}}, \eqn{C}, \eqn{R^2}, \eqn{D}, \eqn{U}, \eqn{Q}, \code{Brier} score, \code{Intercept}, \code{Slope}, and \eqn{E_{max}} will be added to plot, using \code{statloc} as the upper left corner of a box (default is \code{c(0,.9)}). You can specify a list or a vector. Use \code{locator(1)} for the mouse, \code{FALSE} to suppress statistics. This is plotted after the curve legends. } \item{riskdist}{ Use \code{"calibrated"} to plot the relative frequency distribution of calibrated probabilities after dividing into 101 bins from \code{lim[1]} to \code{lim[2]}. Set to \code{"predicted"} (the default as of rms 4.5-1) to use raw assigned risk, \code{FALSE} to omit risk distribution. Values are scaled so that highest bar is \code{0.15*(lim[2]-lim[1])}. } \item{cex}{ Character size for legend or for table of statistics when \code{group} is given } \item{mkh}{ Size of symbols for legend. Default is 0.02 (see \code{par()}). } \item{connect.group}{ Defaults to \code{FALSE} to only represent group fractions as triangles. Set to \code{TRUE} to also connect with a solid line. } \item{connect.smooth}{ Defaults to \code{TRUE} to draw smoothed estimates using a dashed line. Set to \code{FALSE} to instead use dots at individual estimates. } \item{g.group}{ number of quantile groups to use when \code{group} is given and variable is numeric. } \item{evaluate}{ number of points at which to store the \code{lowess}-calibration curve. Default is 100. If there are more than \code{evaluate} unique predicted probabilities, \code{evaluate} equally-spaced quantiles of the unique predicted probabilities, with linearly interpolated calibrated values, are retained for plotting (and stored in the object returned by \code{val.prob}. } \item{nmin}{ applies when \code{group} is given. When \code{nmin} \eqn{> 0}, \code{val.prob} will not store coordinates of smoothed calibration curves in the outer tails, where there are fewer than \code{nmin} raw observations represented in those tails. If for example \code{nmin}=50, the \code{plot} function will only plot the estimated calibration curve from \eqn{a} to \eqn{b}, where there are 50 subjects with predicted probabilities \eqn{< a} and \eqn{> b}. \code{nmin} is ignored when computing accuracy statistics. } \item{x}{result of \code{val.prob} (with \code{group} in effect)} \item{\dots}{ optional arguments for \code{labcurve} (through \code{plot}). Commonly used options are \code{col} (vector of colors for the strata plus overall) and \code{lty}. Ignored for \code{print}. } \item{stats}{ vector of column numbers of statistical indexes to write on plot } \item{lwd.overall}{ line width for plotting the overall calibration curve } \item{quantiles}{ a vector listing which quantiles should be indicated on each calibration curve using tick marks. The values in \code{quantiles} can be any number of values from the following: .01, .025, .05, .1, .25, .5, .75, .9, .95, .975, .99. By default the 0.05 and 0.95 quantiles are indicated. } \item{flag}{ a function of the matrix of statistics (rows representing groups) returning a vector of character strings (one value for each group, including "Overall"). \code{plot.val.prob} will print this vector of character values to the left of the statistics. The \code{flag} function can refer to columns of the matrix used as input to the function by their names given in the description above. The default function returns \code{"*"} if either \code{ChiSq2} or \code{B ChiSq} is significant at the 0.01 level and \code{" "} otherwise. } } \value{ \code{val.prob} without \code{group} returns a vector with the following named elements: \code{Dxy}, \code{R2}, \code{D}, \code{D:Chi-sq}, \code{D:p}, \code{U}, \code{U:Chi-sq}, \code{U:p}, \code{Q}, \code{Brier}, \code{Intercept}, \code{Slope}, \code{S:z}, \code{S:p}, \code{Emax}. When \code{group} is present \code{val.prob} returns an object of class \code{val.prob} containing a list with summary statistics and calibration curves for all the strata plus \code{"Overall"}. } \details{ The 2 d.f. \eqn{\chi^2}{chi-square} test and \code{Med OR} exclude predicted or calibrated predicted probabilities \eqn{\leq 0} to zero or \eqn{\geq 1}, adjusting the sample size as needed. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Harrell FE, Lee KL, Mark DB (1996): Multivariable prognostic models: Issues in developing models, evaluating assumptions and adequacy, and measuring and reducing errors. Stat in Med 15:361--387. Harrell FE, Lee KL (1987): Using logistic calibration to assess the accuracy of probability predictions (Technical Report). Miller ME, Hui SL, Tierney WM (1991): Validation techniques for logistic regression models. Stat in Med 10:1213--1226. Stallard N (2009): Simple tests for the external validation of mortality prediction scores. Stat in Med 28:377--388. Harrell FE, Lee KL (1985): A comparison of the \emph{discrimination} of discriminant analysis and logistic regression under multivariate normality. In Biostatistics: Statistics in Biomedical, Public Health, and Environmental Sciences. The Bernard G. Greenberg Volume, ed. PK Sen. New York: North-Holland, p. 333--343. Cox DR (1970): The Analysis of Binary Data, 1st edition, section 4.4. London: Methuen. Spiegelhalter DJ (1986):Probabilistic prediction in patient management. Stat in Med 5:421--433. Rufibach K (2010):Use of Brier score to assess binary predictions. J Clin Epi 63:938-939 Tjur T (2009):Coefficients of determination in logistic regression models-A new proposal:The coefficient of discrimination. Am Statist 63:366--372. } \seealso{ \code{\link{validate.lrm}}, \code{\link{lrm.fit}}, \code{\link{lrm}}, \code{\link[Hmisc]{labcurve}}, \code{\link[Hmisc]{wtd.stats}}, \code{\link[Hmisc]{scat1d}} } \examples{ # Fit logistic model on 100 observations simulated from the actual # model given by Prob(Y=1 given X1, X2, X3) = 1/(1+exp[-(-1 + 2X1)]), # where X1 is a random uniform [0,1] variable. Hence X2 and X3 are # irrelevant. After fitting a linear additive model in X1, X2, # and X3, the coefficients are used to predict Prob(Y=1) on a # separate sample of 100 observations. Note that data splitting is # an inefficient validation method unless n > 20,000. set.seed(1) n <- 200 x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) logit <- 2*(x1-.5) P <- 1/(1+exp(-logit)) y <- ifelse(runif(n)<=P, 1, 0) d <- data.frame(x1,x2,x3,y) f <- lrm(y ~ x1 + x2 + x3, subset=1:100) pred.logit <- predict(f, d[101:200,]) phat <- 1/(1+exp(-pred.logit)) val.prob(phat, y[101:200], m=20, cex=.5) # subgroups of 20 obs. # Validate predictions more stringently by stratifying on whether # x1 is above or below the median v <- val.prob(phat, y[101:200], group=x1[101:200], g.group=2) v plot(v) plot(v, flag=function(stats) ifelse( stats[,'ChiSq2'] > qchisq(.95,2) | stats[,'B ChiSq'] > qchisq(.95,1), '*', ' ') ) # Stars rows of statistics in plot corresponding to significant # mis-calibration at the 0.05 level instead of the default, 0.01 plot(val.prob(phat, y[101:200], group=x1[101:200], g.group=2), col=1:3) # 3 colors (1 for overall) # Weighted calibration curves # plot(val.prob(pred, y, group=age, weights=freqs)) } \keyword{models} \keyword{regression} \keyword{htest} \keyword{smooth} \concept{model validation} \concept{predictive accuracy} \concept{logistic regression model} \concept{sampling} rms/man/Glm.Rd0000644000176200001440000000417113701122720012632 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Glm.r \name{Glm} \alias{Glm} \title{rms Version of glm} \usage{ Glm( formula, family = gaussian, data = environment(formula), weights, subset, na.action = na.delete, start = NULL, offset = NULL, control = glm.control(...), model = TRUE, method = "glm.fit", x = FALSE, y = TRUE, contrasts = NULL, ... ) } \arguments{ \item{formula, family, data, weights, subset, na.action, start, offset, control, model, method, x, y, contrasts}{see \code{\link[stats:glm]{stats::glm()}}; for \code{print} \code{x} is the result of \code{Glm}} \item{...}{ignored model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} } \value{ a fit object like that produced by \code{\link[stats:glm]{stats::glm()}} but with \code{rms} attributes and a \code{class} of \code{"rms"}, \code{"Glm"}, \code{"glm"}, and \code{"lm"}. The \code{g} element of the fit object is the \eqn{g}-index. } \description{ This function saves \code{rms} attributes with the fit object so that \code{anova.rms}, \code{Predict}, etc. can be used just as with \code{ols} and other fits. No \code{validate} or \code{calibrate} methods exist for \code{Glm} though. } \details{ For the \code{print} method, format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. } \examples{ ## Dobson (1990) Page 93: Randomized Controlled Trial : counts <- c(18,17,15,20,10,20,25,13,12) outcome <- gl(3,1,9) treatment <- gl(3,3) f <- glm(counts ~ outcome + treatment, family=poisson()) f anova(f) summary(f) f <- Glm(counts ~ outcome + treatment, family=poisson()) # could have had rcs( ) etc. if there were continuous predictors f anova(f) summary(f, outcome=c('1','2','3'), treatment=c('1','2','3')) } \seealso{ \code{\link[stats:glm]{stats::glm()}},\code{\link[Hmisc:GiniMd]{Hmisc::GiniMd()}}, \code{\link[=prModFit]{prModFit()}}, \link[stats:glm.summaries]{stats::residuals.glm} } \keyword{models} \keyword{regression} rms/man/gendata.Rd0000644000176200001440000000762013714237251013532 0ustar liggesusers\name{gendata} \alias{gendata} \title{Generate Data Frame with Predictor Combinations} \description{ If \code{nobs} is not specified, allows user to specify predictor settings by e.g. \code{age=50, sex="male"}, and any omitted predictors are set to reference values (default=median for continuous variables, first level for categorical ones - see \code{datadist}). If any predictor has more than one value given, \code{expand.grid} is called to generate all possible combinations of values, unless \code{expand=FALSE}. If \code{nobs} is given, a data frame is first generated which has \code{nobs} of adjust-to values duplicated. Then an editor window is opened which allows the user to subset the variable names down to ones which she intends to vary (this streamlines the \code{data.ed} step). Then, if any predictors kept are discrete and \code{viewvals=TRUE}, a window (using \code{page}) is opened defining the possible values of this subset, to facilitate data editing. Then the \code{data.ed} function is invoked to allow interactive overriding of predictor settings in the \code{nobs} rows. The subset of variables are combined with the other predictors which were not displayed with \code{data.ed}, and a final full data frame is returned. \code{gendata} is most useful for creating a \code{newdata} data frame to pass to \code{predict}. } \usage{ gendata(fit, \dots, nobs, viewvals=FALSE, expand=TRUE, factors) } \arguments{ \item{fit}{ a fit object created with \code{rms} in effect } \item{...}{ predictor settings, if \code{nobs} is not given. } \item{nobs}{ number of observations to create if doing it interactively using X-windows } \item{viewvals}{ if \code{nobs} is given, set \code{viewvals=TRUE} to open a window displaying the possible value of categorical predictors } \item{expand}{ set to \code{FALSE} to prevent \code{expand.grid} from being called, and to instead just convert to a data frame.} \item{factors}{ a list containing predictor settings with their names. This is an alternative to specifying the variables separately in \dots. Unlike the usage of \dots, variables getting default ranges in \code{factors} should have \code{NA} as their value. }} \value{ a data frame with all predictors, and an attribute \code{names.subset} if \code{nobs} is specified. This attribute contains the vector of variable names for predictors which were passed to \code{de} and hence were allowed to vary. If neither \code{nobs} nor any predictor settings were given, returns a data frame with adjust-to values. } \section{Side Effects}{ optionally writes to the terminal, opens X-windows, and generates a temporary file using \code{sink}. } \details{ if you have a variable in \code{\dots} that is named \code{n, no, nob, nob}, add \code{nobs=FALSE} to the invocation to prevent that variable from being misrecognized as \code{nobs} } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{predict.rms}}, \code{\link{survest.cph}}, \code{\link{survest.psm}}, \code{\link{rmsMisc}}, \code{\link{expand.grid}}, \code{\link{de}}, \code{\link{page}}, \code{\link{print.datadist}}, \code{\link{Predict}} } \examples{ set.seed(1) age <- rnorm(200, 50, 10) sex <- factor(sample(c('female','male'),200,TRUE)) race <- factor(sample(c('a','b','c','d'),200,TRUE)) y <- sample(0:1, 200, TRUE) dd <- datadist(age,sex,race) options(datadist="dd") f <- lrm(y ~ age*sex + race) gendata(f) gendata(f, age=50) d <- gendata(f, age=50, sex="female") # leave race=reference category d <- gendata(f, age=c(50,60), race=c("b","a")) # 4 obs. d$Predicted <- predict(f, d, type="fitted") d # Predicted column prints at the far right options(datadist=NULL) \dontrun{ d <- gendata(f, nobs=5, view=TRUE) # 5 interactively defined obs. d[,attr(d,"names.subset")] # print variables which varied predict(f, d) } } \keyword{methods} \keyword{models} \keyword{regression} \keyword{manip} rms/man/residuals.ols.Rd0000644000176200001440000000352713714237251014720 0ustar liggesusers\name{residuals.ols} \alias{residuals.ols} \title{Residuals for ols} \description{Computes various residuals and measures of influence for a fit from \code{ols}.} \usage{ \method{residuals}{ols}(object, type=c("ordinary", "score", "dfbeta", "dfbetas", "dffit", "dffits", "hat", "hscore"), \dots) } \arguments{ \item{object}{ object created by \code{ols}. Depending on \code{type}, you may have had to specify \code{x=TRUE} to \code{ols}. } \item{type}{ type of residual desired. \code{"ordinary"} refers to the usual residual. \code{"score"} is the matrix of score residuals (contributions to first derivative of log likelihood). \code{dfbeta} and \code{dfbetas} mean respectively the raw and normalized matrix of changes in regression coefficients after deleting in turn each observation. The coefficients are normalized by their standard errors. \code{hat} contains the leverages --- diagonals of the ``hat'' matrix. \code{dffit} and \code{dffits} contain respectively the difference and normalized difference in predicted values when each observation is omitted. The S \code{lm.influence} function is used. When \code{type="hscore"}, the ordinary residuals are divided by one minus the corresponding hat matrix diagonal element to make residuals have equal variance. } \item{\dots}{ignored} } \value{ a matrix or vector, with places for observations that were originally deleted by \code{ols} held by \code{NA}s } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{lm.influence}}, \code{\link{ols}}, \code{\link{which.influence}} } \examples{ set.seed(1) x1 <- rnorm(100) x2 <- rnorm(100) x1[1] <- 100 y <- x1 + x2 + rnorm(100) f <- ols(y ~ x1 + x2, x=TRUE, y=TRUE) resid(f, "dfbetas") which.influence(f) } \keyword{models} \keyword{regression} \concept{model validation} rms/man/validate.rpart.Rd0000644000176200001440000000710513714237251015045 0ustar liggesusers\name{validate.rpart} \alias{validate.rpart} \alias{print.validate.rpart} \alias{plot.validate.rpart} \title{ Dxy and Mean Squared Error by Cross-validating a Tree Sequence } \description{ Uses \code{xval}-fold cross-validation of a sequence of trees to derive estimates of the mean squared error and Somers' \code{Dxy} rank correlation between predicted and observed responses. In the case of a binary response variable, the mean squared error is the Brier accuracy score. For survival trees, \code{Dxy} is negated so that larger is better. There are \code{print} and \code{plot} methods for objects created by \code{validate.rpart}. } \usage{ # f <- rpart(formula=y ~ x1 + x2 + \dots) # or rpart \method{validate}{rpart}(fit, method, B, bw, rule, type, sls, aics, force, estimates, pr=TRUE, k, rand, xval=10, FUN, \dots) \method{print}{validate.rpart}(x, \dots) \method{plot}{validate.rpart}(x, what=c("mse","dxy"), legendloc=locator, \dots) } \arguments{ \item{fit}{ an object created by \code{rpart}. You must have specified the \code{model=TRUE} argument to \code{rpart}. } \item{method,B,bw,rule,type,sls,aics,force,estimates}{ are there only for consistency with the generic \code{validate} function; these are ignored } \item{x}{the result of \code{validate.rpart}} \item{k}{ a sequence of cost/complexity values. By default these are obtained from calling \code{FUN} with no optional arguments or from the \code{rpart} \code{cptable} object in the original fit object. You may also specify a scalar or vector. } \item{rand}{a random sample (usually omitted)} \item{xval}{number of splits} \item{FUN}{ the name of a function which produces a sequence of trees, such \code{prune}. } \item{\dots}{ additional arguments to \code{FUN} (ignored by \code{print,plot}). } \item{pr}{ set to \code{FALSE} to prevent intermediate results for each \code{k} to be printed } \item{what}{ a vector of things to plot. By default, 2 plots will be done, one for \code{mse} and one for \code{Dxy}. } \item{legendloc}{ a function that is evaluated with a single argument equal to \code{1} to generate a list with components \code{x, y} specifying coordinates of the upper left corner of a legend, or a 2-vector. For the latter, \code{legendloc} specifies the relative fraction of the plot at which to center the legend. } } \value{ a list of class \code{"validate.rpart"} with components named \code{k, size, dxy.app}, \code{dxy.val, mse.app, mse.val, binary, xval}. \code{size} is the number of nodes, \code{dxy} refers to Somers' \code{D}, \code{mse} refers to mean squared error of prediction, \code{app} means apparent accuracy on training samples, \code{val} means validated accuracy on test samples, \code{binary} is a logical variable indicating whether or not the response variable was binary (a logical or 0/1 variable is binary). \code{size} will not be present if the user specifies \code{k}. } \section{Side Effects}{ prints if \code{pr=TRUE} } \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr fh@fharrell.com } \seealso{ \code{\link[rpart]{rpart}}, \code{\link[Hmisc]{somers2}}, \code{\link{dxy.cens}}, \code{\link{locator}}, \code{\link{legend}} } \examples{ \dontrun{ n <- 100 set.seed(1) x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) y <- 1*(x1+x2+rnorm(n) > 1) table(y) require(rpart) f <- rpart(y ~ x1 + x2 + x3, model=TRUE) v <- validate(f) v # note the poor validation par(mfrow=c(1,2)) plot(v, legendloc=c(.2,.5)) par(mfrow=c(1,1)) } } \keyword{models} \keyword{tree} \keyword{category} \concept{model validation} \concept{predictive accuracy} rms/man/hazard.ratio.plot.Rd0000644000176200001440000000631513714237251015472 0ustar liggesusers\name{hazard.ratio.plot} \alias{hazard.ratio.plot} \title{Hazard Ratio Plot} \description{ The \code{hazard.ratio.plot} function repeatedly estimates Cox regression coefficients and confidence limits within time intervals. The log hazard ratios are plotted against the mean failure/censoring time within the interval. Unless \code{times} is specified, the number of time intervals will be \eqn{\max(round(d/e),2)}, where \eqn{d} is the total number of events in the sample. Efron's likelihood is used for estimating Cox regression coefficients (using \code{coxph.fit}). In the case of tied failure times, some intervals may have a point in common. } \usage{ hazard.ratio.plot(x, Srv, which, times=, e=30, subset, conf.int=.95, legendloc=NULL, smooth=TRUE, pr=FALSE, pl=TRUE, add=FALSE, ylim, cex=.5, xlab="t", ylab, antilog=FALSE, \dots) } \arguments{ \item{x}{ a vector or matrix of predictors } \item{Srv}{a \code{Surv} object} \item{which}{ a vector of column numbers of \code{x} for which to estimate hazard ratios across time and make plots. The default is to do so for all predictors. Whenever one predictor is displayed, all other predictors in the \code{x} matrix are adjusted for (with a separate adjustment form for each time interval). } \item{times}{ optional vector of time interval endpoints. Example: \code{times=c(1,2,3)} uses intervals \code{[0,1), [1,2), [2,3), [3+)}. If times is omitted, uses intervals containing \code{e} events } \item{e}{ number of events per time interval if times not given } \item{subset}{ vector used for subsetting the entire analysis, e.g. \code{subset=sex=="female"} } \item{conf.int}{ confidence interval coverage } \item{legendloc}{ location for legend. Omit to use mouse, \code{"none"} for none, \code{"ll"} for lower left of graph, or actual x and y coordinates (e.g. \code{c(2,3)}) } \item{smooth}{ also plot the super--smoothed version of the log hazard ratios } \item{pr}{ defaults to \code{FALSE} to suppress printing of individual Cox fits } \item{pl}{ defaults to \code{TRUE} to plot results } \item{add}{ add this plot to an already existing plot } \item{ylim}{ vector of \code{y}-axis limits. Default is computed to include confidence bands. } \item{cex}{ character size for legend information, default is 0.5 } \item{xlab}{ label for \code{x}-axis, default is \code{"t"} } \item{ylab}{ label for \code{y}-axis, default is \code{"Log Hazard Ratio"} or \code{"Hazard Ratio"}, depending on \code{antilog}. } \item{antilog}{ default is \code{FALSE}. Set to \code{TRUE} to plot anti-log, i.e., hazard ratio. } \item{...}{ optional graphical parameters }} \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link[survival]{cox.zph}}, \code{\link{residuals.cph}}, \code{\link[survival]{survival-internal}}, \code{\link{cph}}, \code{\link[survival]{coxph}}, \code{\link[survival]{Surv}} } \examples{ n <- 500 set.seed(1) age <- 50 + 12*rnorm(n) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)) d.time <- -log(runif(n))/h label(d.time) <- 'Follow-up Time' e <- ifelse(d.time <= cens,1,0) d.time <- pmin(d.time, cens) units(d.time) <- "Year" hazard.ratio.plot(age, Surv(d.time,e), e=20, legendloc='ll') } \keyword{survival} rms/man/residuals.cph.Rd0000644000176200001440000001036412257363317014676 0ustar liggesusers\name{residuals.cph} \alias{residuals.cph} \title{Residuals for a cph Fit} \description{ Calculates martingale, deviance, score or Schoenfeld residuals (scaled or unscaled) or influence statistics for a Cox proportional hazards model. This is a slightly modified version of Therneau's \code{residuals.coxph} function. It assumes that \code{x=TRUE} and \code{y=TRUE} were specified to \code{cph}, except for martingale residuals, which are stored with the fit by default. } \usage{ \method{residuals}{cph}(object, type=c("martingale", "deviance", "score", "schoenfeld", "dfbeta", "dfbetas", "scaledsch", "partial"), \dots) } \arguments{ \item{object}{a \code{cph} object} \item{type}{ character string indicating the type of residual desired; the default is martingale. Only enough of the string to determine a unique match is required. Instead of the usual residuals, \code{type="dfbeta"} may be specified to obtain approximate leave-out-one \eqn{\Delta \beta}s. Use \code{type="dfbetas"} to normalize the \eqn{\Delta \beta}s for the standard errors of the regression coefficient estimates. Scaled Schoenfeld residuals (\code{type="scaledsch"}, Grambsch and Therneau, 1993) better reflect the log hazard ratio function than ordinary Schoenfeld residuals, and they are on the regression coefficient scale. The weights use Grambsch and Therneau's "average variance" method. } \item{\dots}{see \code{\link[survival]{residuals.coxph}}} } \value{ The object returned will be a vector for martingale and deviance residuals and matrices for score and schoenfeld residuals, dfbeta, or dfbetas. There will be one row of residuals for each row in the input data (without \code{collapse}). One column of score and Schoenfeld residuals will be returned for each column in the model.matrix. The scaled Schoenfeld residuals are used in the \code{cox.zph} function. The score residuals are each individual's contribution to the score vector. Two transformations of this are often more useful: \code{dfbeta} is the approximate change in the coefficient vector if that observation were dropped, and \code{dfbetas} is the approximate change in the coefficients, scaled by the standard error for the coefficients. } \references{ T. Therneau, P. Grambsch, and T.Fleming. "Martingale based residuals for survival models", Biometrika, March 1990. P. Grambsch, T. Therneau. "Proportional hazards tests and diagnostics based on weighted residuals", unpublished manuscript, Feb 1993. } \seealso{ \code{\link{cph}}, \code{\link[survival]{coxph}}, \code{\link[survival]{residuals.coxph}}, \code{\link{cox.zph}}, \code{\link{naresid}} } \examples{ # fit <- cph(Surv(start, stop, event) ~ (age + surgery)* transplant, # data=jasa1) # mresid <- resid(fit, collapse=jasa1$id) # Get unadjusted relationships for several variables # Pick one variable that's not missing too much, for fit n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) d.time <- -log(runif(n))/h death <- ifelse(d.time <= cens,1,0) d.time <- pmin(d.time, cens) f <- cph(Surv(d.time, death) ~ age + blood.pressure + cholesterol, iter.max=0) res <- resid(f) # This re-inserts rows for NAs, unlike f$resid yl <- quantile(res, c(10/length(res),1-10/length(res)), na.rm=TRUE) # Scale all plots from 10th smallest to 10th largest residual par(mfrow=c(2,2), oma=c(3,0,3,0)) p <- function(x) { s <- !is.na(x+res) plot(lowess(x[s], res[s], iter=0), xlab=label(x), ylab="Residual", ylim=yl, type="l") } p(age); p(blood.pressure); p(cholesterol) mtext("Smoothed Martingale Residuals", outer=TRUE) # Assess PH by estimating log relative hazard over time f <- cph(Surv(d.time,death) ~ age + sex + blood.pressure, x=TRUE, y=TRUE) r <- resid(f, "scaledsch") tt <- as.numeric(dimnames(r)[[1]]) par(mfrow=c(3,2)) for(i in 1:3) { g <- areg.boot(I(r[,i]) ~ tt, B=20) plot(g, boot=FALSE) # shows bootstrap CIs } # Focus on 3 graphs on right # Easier approach: plot(cox.zph(f)) # invokes plot.cox.zph par(mfrow=c(1,1)) } \keyword{survival} \concept{model validation} rms/man/validate.Rd0000644000176200001440000001541613714237251013722 0ustar liggesusers\name{validate} \alias{validate} \alias{print.validate} \alias{latex.validate} \alias{html.validate} \title{Resampling Validation of a Fitted Model's Indexes of Fit} \description{ The \code{validate} function when used on an object created by one of the \code{rms} series does resampling validation of a regression model, with or without backward step-down variable deletion. } \usage{ # fit <- fitting.function(formula=response ~ terms, x=TRUE, y=TRUE) validate(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=0.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, \dots) \method{print}{validate}(x, digits=4, B=Inf, \dots) \method{latex}{validate}(object, digits=4, B=Inf, file='', append=FALSE, title=first.word(deparse(substitute(x))), caption=NULL, table.env=FALSE, size='normalsize', extracolsize=size, \dots) \method{html}{validate}(object, digits=4, B=Inf, caption=NULL, \dots) } \arguments{ \item{fit}{ a fit derived by e.g. \code{lrm}, \code{cph}, \code{psm}, \code{ols}. The options \code{x=TRUE} and \code{y=TRUE} must have been specified. } \item{method}{ may be \code{"crossvalidation"}, \code{"boot"} (the default), \code{".632"}, or \code{"randomization"}. See \code{predab.resample} for details. Can abbreviate, e.g. \code{"cross", "b", ".6"}. } \item{B}{ number of repetitions. For \code{method="crossvalidation"}, is the number of groups of omitted observations. For \code{print.validate}, \code{latex.validate}, and \code{html.validate}, \code{B} is an upper limit on the number of resamples for which information is printed about which variables were selected in each model re-fit. Specify zero to suppress printing. Default is to print all re-samples. } \item{bw}{ \code{TRUE} to do fast step-down using the \code{fastbw} function, for both the overall model and for each repetition. \code{fastbw} keeps parameters together that represent the same factor. } \item{rule}{ Applies if \code{bw=TRUE}. \code{"aic"} to use Akaike's information criterion as a stopping rule (i.e., a factor is deleted if the \eqn{\chi^2}{chi-square} falls below twice its degrees of freedom), or \code{"p"} to use \eqn{P}-values. } \item{type}{ \code{"residual"} or \code{"individual"} - stopping rule is for individual factors or for the residual \eqn{\chi^2}{chi-square} for all variables deleted } \item{sls}{ significance level for a factor to be kept in a model, or for judging the residual \eqn{\chi^2}{chi-square}. } \item{aics}{ cutoff on AIC when \code{rule="aic"}. } \item{force}{see \code{\link{fastbw}}} \item{estimates}{see \code{\link{print.fastbw}}} \item{pr}{ \code{TRUE} to print results of each repetition } \item{\dots}{ parameters for each specific validate function, and parameters to pass to \code{predab.resample} (note especially the \code{group}, \code{cluster}, amd \code{subset} parameters). For \code{latex}, optional arguments to \code{\link[Hmisc:latex]{latex.default}}. Ignored for \code{html.validate}. For \code{psm}, you can pass the \code{maxiter} parameter here (passed to \code{survreg.control}, default is 15 iterations) as well as a \code{tol} parameter for judging matrix singularity in \code{solvet} (default is 1e-12) and a \code{rel.tolerance} parameter that is passed to \code{survreg.control} (default is 1e-5). For \code{print.validate} \ldots is ignored. } \item{x,object}{an object produced by one of the \code{validate} functions} \item{digits}{number of decimal places to print} \item{file}{file to write LaTeX output. Default is standard output.} \item{append}{set to \code{TRUE} to append LaTeX output to an existing file} \item{title, caption, table.env, extracolsize}{see \code{\link[Hmisc]{latex.default}}. If \code{table.env} is \code{FALSE} and \code{caption} is given, the character string contained in \code{caption} will be placed before the table, centered.} \item{size}{size of LaTeX output. Default is \code{'normalsize'}. Must be a defined LaTeX size when prepended by double slash. } } \details{ It provides bias-corrected indexes that are specific to each type of model. For \code{validate.cph} and \code{validate.psm}, see \code{validate.lrm}, which is similar. \cr For \code{validate.cph} and \code{validate.psm}, there is an extra argument \code{dxy}, which if \code{TRUE} causes the \code{dxy.cens} function to be invoked to compute the Somers' \eqn{D_{xy}}{Dxy} rank correlation to be computed at each resample. The values corresponding to the row \eqn{D_{xy}}{Dxy} are equal to \eqn{2 * (C - 0.5)} where C is the C-index or concordance probability. \cr For \code{validate.cph} with \code{dxy=TRUE}, you must specify an argument \code{u} if the model is stratified, since survival curves can then cross and \eqn{X\beta}{X beta} is not 1-1 with predicted survival. \cr There is also \code{validate} method for \code{tree}, which only does cross-validation and which has a different list of arguments. } \value{ a matrix with rows corresponding to the statistical indexes and columns for columns for the original index, resample estimates, indexes applied to the whole or omitted sample using the model derived from the resample, average optimism, corrected index, and number of successful re-samples. } \section{Side Effects}{ prints a summary, and optionally statistics for each re-fit } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{validate.ols}}, \code{\link{validate.cph}}, \code{\link{validate.lrm}}, \code{\link{validate.rpart}}, \code{\link{predab.resample}}, \code{\link{fastbw}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{calibrate}}, \code{\link{dxy.cens}}, \code{\link[survival]{survConcordance}} } \examples{ # See examples for validate.cph, validate.lrm, validate.ols # Example of validating a parametric survival model: n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, TRUE)) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) dt <- -log(runif(n))/h e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" S <- Surv(dt,e) f <- psm(S ~ age*sex, x=TRUE, y=TRUE) # Weibull model # Validate full model fit validate(f, B=10) # usually B=150 # Validate stepwise model with typical (not so good) stopping rule # bw=TRUE does not preserve hierarchy of terms at present validate(f, B=10, bw=TRUE, rule="p", sls=.1, type="individual") } \keyword{models} \keyword{regression} \keyword{methods} \keyword{survival} \concept{model validation} \concept{predictive accuracy} \concept{bootstrap} rms/man/fastbw.Rd0000644000176200001440000001064713714237251013420 0ustar liggesusers\name{fastbw} \alias{fastbw} \alias{print.fastbw} \title{Fast Backward Variable Selection} \description{ Performs a slightly inefficient but numerically stable version of fast backward elimination on factors, using a method based on Lawless and Singhal (1978). This method uses the fitted complete model and computes approximate Wald statistics by computing conditional (restricted) maximum likelihood estimates assuming multivariate normality of estimates. \code{fastbw} deletes factors, not columns of the design matrix. Factors requiring multiple d.f. will be retained or dropped as a group. The function prints the deletion statistics for each variable in turn, and prints approximate parameter estimates for the model after deleting variables. The approximation is better when the number of factors deleted is not large. For \code{ols}, the approximation is exact for regression coefficients, and standard errors are only off by a factor equal to the ratio of the mean squared error estimate for the reduced model to the original mean squared error estimate for the full model. If the fit was from \code{ols}, \code{fastbw} will compute the usual \eqn{R^2} statistic for each model. } \usage{ fastbw(fit, rule=c("aic", "p"), type=c("residual", "individual", "total"), sls=.05, aics=0, eps=1e-9, k.aic=2, force=NULL) \method{print}{fastbw}(x, digits=4, estimates=TRUE, \dots) } \arguments{ \item{fit}{ fit object with \code{Varcov(fit)} defined (e.g., from \code{ols}, \code{lrm}, \code{cph}, \code{psm}, \code{glmD}) } \item{rule}{ Stopping rule. Defaults to \code{"aic"} for Akaike's information criterion. Use \code{rule="p"} to use \eqn{P}-values } \item{type}{ Type of statistic on which to base the stopping rule. Default is \code{"residual"} for the pooled residual chi-square. Use \code{type="individual"} to use Wald chi-square of individual factors. } \item{sls}{ Significance level for staying in a model if \code{rule="p"}. Default is .05. } \item{aics}{ For \code{rule="aic"}, variables are deleted until the chi-square - \code{k.aic} times d.f. would rise above \code{aics}. Default \code{aics} is zero to use the ordinary AIC. Set \code{aics} to say 10000 to see all variables deleted in order of descending importance. } \item{eps}{ Singularity criterion, default is \code{1E-9}. } \item{k.aic}{ multiplier to compute AIC, default is 2. To use BIC, set \code{k.aic} equal to \eqn{\log(n)}, where \eqn{n} is the effective sample size (number of events for survival models). } \item{force}{a vector of integers specifying parameters forced to be in the model, not counting intercept(s)} \item{x}{result of \code{fastbw}} \item{digits}{number of significant digits to print} \item{estimates}{set to \code{FALSE} to suppress printing table of approximate coefficients, SEs, etc., after variable deletions} \item{\dots}{ignored} } \value{ a list with an attribute \code{kept} if \code{bw=TRUE}, and the following components: \item{result}{ matrix of statistics with rows in order of deletion. } \item{names.kept}{ names of factors kept in final model. } \item{factors.kept}{ the subscripts of factors kept in the final model } \item{factors.deleted}{ opposite of \code{factors.kept}. } \item{parms.kept}{ column numbers in design matrix corresponding to parameters kept in the final model. } \item{parms.deleted}{ opposite of \code{parms.kept}. } \item{coefficients}{ vector of approximate coefficients of reduced model. } \item{var}{ approximate covariance matrix for reduced model. } \item{Coefficients}{ matrix of coefficients of all models. Rows correspond to the successive models examined and columns correspond to the coefficients in the full model. For variables not in a particular sub-model (row), the coefficients are zero. }} \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Lawless, J. F. and Singhal, K. (1978): Efficient screening of nonnormal regression models. Biometrics 34:318--327. } \seealso{ \code{\link{rms}}, \code{\link{ols}}, \code{\link{lrm}}, \code{\link{cph}}, \code{\link{psm}}, \code{\link{validate}}, \code{\link[Hmisc]{solvet}}, \code{\link{rmsMisc}} } \examples{ \dontrun{ fastbw(fit, optional.arguments) # print results z <- fastbw(fit, optional.args) # typically used in simulations lm.fit(X[,z$parms.kept], Y) # least squares fit of reduced model } } \keyword{models} \keyword{regression} \keyword{htest} \concept{stepwise} \concept{variable selection} rms/man/rms.Rd0000644000176200001440000001543613714237251012734 0ustar liggesusers\name{rms} \alias{rms} \alias{Design} \alias{modelData} \title{rms Methods and Generic Functions} \description{ This is a series of special transformation functions (\code{asis}, \code{pol}, \code{lsp}, \code{rcs}, \code{catg}, \code{scored}, \code{strat}, \code{matrx}), fitting functions (e.g., \code{lrm},\code{cph}, \code{psm}, or \code{ols}), and generic analysis functions (\code{anova.rms}, \code{summary.rms}, \code{Predict}, \code{plot.Predict}, \code{ggplot.Predict}, \code{survplot}, \code{fastbw}, \code{validate}, \code{calibrate}, \code{specs.rms}, \code{which.influence}, \code{latexrms}, \code{nomogram}, \code{datadist}, \code{gendata}) that help automate many analysis steps, e.g. fitting restricted interactions and multiple stratification variables, analysis of variance (with tests of linearity of each factor and pooled tests), plotting effects of variables in the model, estimating and graphing effects of variables that appear non-linearly in the model using e.g. inter-quartile-range hazard ratios, bootstrapping model fits, and constructing nomograms for obtaining predictions manually. Behind the scene is the \code{Design} function which stores extra attributes. \code{Design()} is not intended to be called by users. \code{Design} causes detailed design attributes and descriptions of the distribution of predictors to be stored in an attribute of the \code{terms} component called \code{Design}. \code{modelData} is a replacement for \code{model.frame.default} that is much streamlined and prepares data for \code{Design()}. If a second formula is present, \code{modelData} ensures that missing data deletions are the same for both formulas, and produces a second model frame for \code{formula2} as the \code{data2} attribute of the main returned data frame. } \usage{ modelData(data=environment(formula), formula, formula2=NULL, weights, subset, na.action=na.delete, dotexpand=TRUE, callenv=parent.frame(n=2)) Design(mf, formula=NULL, specials=NULL, allow.offset=TRUE, intercept=1) # not to be called by the user; called by fitting routines # dist <- datadist(x1,x2,sex,age,race,bp) # or dist <- datadist(my.data.frame) # Can omit call to datadist if not using summary.rms, Predict, # survplot.rms, or if all variable settings are given to them # options(datadist="dist") # f <- fitting.function(formula = y ~ rcs(x1,4) + rcs(x2,5) + x1\%ia\%x2 + # rcs(x1,4)\%ia\%rcs(x2,5) + # strat(sex)*age + strat(race)*bp) # See rms.trans for rcs, strat, etc. # \%ia\% is restricted interaction - not doubly nonlinear # for x1 by x2 this uses the simple product only, but pools x1*x2 # effect with nonlinear function for overall tests # specs(f) # anova(f) # summary(f) # fastbw(f) # pred <- predict(f, newdata=expand.grid(x1=1:10,x2=3,sex="male", # age=50,race="black")) # pred <- predict(f, newdata=gendata(f, x1=1:10, x2=3, sex="male")) # This leaves unspecified variables set to reference values from datadist # pred.combos <- gendata(f, nobs=10) # Use X-windows to edit predictor settings # predict(f, newdata=pred.combos) # plot(Predict(f, x1)) # or ggplot(...) # latex(f) # nomogram(f) } \arguments{ \item{data}{a data frame or calling environment} \item{formula}{model formula} \item{formula2}{an optional second model formula (see for example \code{ppo} in \code{blrm})} \item{weights}{a weight variable or expression} \item{subset}{a subsetting expression evaluated in the calling frame or \code{data}} \item{na.action}{NA handling function, ideally one such as \code{na.delete} that stores extra information about data omissions} \item{specials}{a character vector specifying which function evaluations appearing in \code{formula} are "special" in the \code{model.frame} sense} \item{dotexpand}{set to \code{FALSE} to prevent . on right hand side of model formula from expanding into all variables in \code{data}; used for \code{cph}} \item{callenv}{the parent frame that called the fitting function} \item{mf}{a model frame} \item{allow.offset}{set to \code{TRUE} if model fitter allows an offset term} \item{intercept}{1 if an ordinary intercept is present, 0 otherwise} } \value{ a data frame augmented with additional information about the predictors and model formulation } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{rms.trans}}, \code{\link{rmsMisc}}, \code{\link{cph}}, \code{\link{lrm}}, \code{\link{ols}}, \code{\link{specs.rms}}, \code{\link{anova.rms}}, \code{\link{summary.rms}}, \code{\link{Predict}}, \code{\link{gendata}}, \code{\link{fastbw}}, \code{\link{predictrms}}. \code{\link{validate}}, \code{\link{calibrate}}, \code{\link{which.influence}}, \code{\link[Hmisc]{latex}}, \code{\link{latexrms}}, \code{\link{model.frame.default}}, \code{\link{datadist}}, \code{\link[Hmisc]{describe}}, \code{\link{nomogram}}, \code{\link{vif}}, \code{\link[Hmisc]{dataRep}} } \examples{ \dontrun{ require(rms) dist <- datadist(data=2) # can omit if not using summary, (gg)plot, survplot, # or if specify all variable values to them. Can # also defer. data=2: get distribution summaries # for all variables in search position 2 # run datadist once, for all candidate variables dist <- datadist(age,race,bp,sex,height) # alternative options(datadist="dist") f <- cph(Surv(d.time, death) ~ rcs(age,4)*strat(race) + bp*strat(sex)+lsp(height,60),x=TRUE,y=TRUE) anova(f) anova(f,age,height) # Joint test of 2 vars fastbw(f) summary(f, sex="female") # Adjust sex to "female" when testing # interacting factor bp bplot(Predict(f, age, height)) # 3-D plot ggplot(Predict(f, age=10:70, height=60)) latex(f) # LaTeX representation of fit f <- lm(y ~ x) # Can use with any fitting function that # calls model.frame.default, e.g. lm, glm specs.rms(f) # Use .rms since class(f)="lm" anova(f) # Works since Varcov(f) (=Varcov.lm(f)) works fastbw(f) options(datadist=NULL) f <- ols(y ~ x1*x2) # Saves enough information to do fastbw, anova anova(f) # Will not do Predict since distributions fastbw(f) # of predictors not saved plot(f, x1=seq(100,300,by=.5), x2=.5) # all values defined - don't need datadist dist <- datadist(x1,x2) # Equivalent to datadist(f) options(datadist="dist") plot(f, x1, x2=.5) # Now you can do plot, summary plot(nomogram(f, interact=list(x2=c(.2,.7)))) } } \keyword{models} \keyword{regression} \keyword{survival} \keyword{math} \keyword{manip} \keyword{methods} \concept{logistic regression model} rms/man/print.ols.Rd0000644000176200001440000000260413020543171014043 0ustar liggesusers\name{print.ols} \alias{print.ols} \title{Print ols} \description{ Formatted printing of an object of class \code{ols} using methods taken from \code{print.lm} and \code{summary.lm}. Prints R-squared, adjusted R-squared, parameter estimates, standard errors, and t-statistics (Z statistics if penalized estimation was used). For penalized estimation, prints the maximum penalized likelihood estimate of the residual standard deviation (\code{Sigma}) instead of the usual root mean squared error. Format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. } \usage{ \method{print}{ols}(x, digits=4, long=FALSE, coefs=TRUE, title="Linear Regression Model", \dots) } \arguments{ \item{x}{fit object} \item{digits}{number of significant digits to print} \item{long}{set to \code{TRUE} to print the correlation matrix of parameter estimates} \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{title}{a character string title to be passed to \code{prModFit}} \item{\dots}{other parameters to pass to \code{print} or \code{format}} } \seealso{ \code{\link{ols}}, \code{\link{lm}},\code{\link{prModFit}} } \keyword{print} rms/man/survest.cph.Rd0000644000176200001440000001352013714237251014407 0ustar liggesusers\name{survest.cph} \alias{survest} \alias{survest.cph} \title{ Cox Survival Estimates } \description{ Compute survival probabilities and optional confidence limits for Cox survival models. If \code{x=TRUE, y=TRUE} were specified to \code{cph}, confidence limits use the correct formula for any combination of predictors. Otherwise, if \code{surv=TRUE} was specified to \code{cph}, confidence limits are based only on standard errors of \code{log(S(t))} at the mean value of \eqn{X\beta}{X beta}. If the model contained only stratification factors, or if predictions are being requested near the mean of each covariable, this approximation will be accurate. Unless \code{times} is given, at most one observation may be predicted. } \usage{ survest(fit, \dots) \method{survest}{cph}(fit, newdata, linear.predictors, x, times, fun, loglog=FALSE, conf.int=0.95, type, vartype, conf.type=c("log", "log-log", "plain", "none"), se.fit=TRUE, what=c('survival','parallel'), individual=FALSE, ...) } \arguments{ \item{fit}{ a model fit from \code{cph} } \item{newdata}{ a data frame containing predictor variable combinations for which predictions are desired } \item{linear.predictors}{ a vector of linear predictor values (centered) for which predictions are desired. If the model is stratified, the "strata" attribute must be attached to this vector (see example). } \item{x}{ a design matrix at which to compute estimates, with any strata attached as a "strata" attribute. Only one of \code{newdata}, \code{linear.predictors}, or \code{x} may be specified. If none is specified, but \code{times} is specified, you will get survival predictions at all subjects' linear predictor and strata values. } \item{times}{ a vector of times at which to get predictions. If omitted, predictions are made at all unique failure times in the original input data. } \item{loglog}{ set to \code{TRUE} to make the \code{log-log} transformation of survival estimates and confidence limits. } \item{fun}{ any function to transform the estimates and confidence limits (\code{loglog} is a special case) } \item{conf.int}{ set to \code{FALSE} or \code{0} to suppress confidence limits, or e.g. \code{.95} to cause 0.95 confidence limits to be computed } \item{type}{ see \code{survfit.coxph} } \item{vartype}{ see \code{survfit.coxph} } \item{conf.type}{ specifies the basis for computing confidence limits. \code{"log"} is the default as in the \code{survival} package. } \item{se.fit}{ set to \code{TRUE} to get standard errors of log predicted survival (no matter what \code{conf.type} is). If \code{FALSE}, confidence limits are suppressed. } \item{individual}{ set to \code{TRUE} to have \code{survfit} interpret \code{newdata} as specifying a covariable path for a single individual (represented by multiple records). } \item{what}{ Normally use \code{what="survival"} to estimate survival probabilities at times that may not correspond to the subjects' own times. \code{what="parallel"} assumes that the length of \code{times} is the number of subjects (or one), and causes \code{survest} to estimate the ith subject's survival probability at the ith value of \code{times} (or at the scalar value of \code{times}). \code{what="parallel"} is used by \code{val.surv} for example. } \item{\dots}{unused} } \value{ If \code{times} is omitted, returns a list with the elements \code{time}, \code{n.risk}, \code{n.event}, \code{surv}, \code{call} (calling statement), and optionally \code{std.err}, \code{upper}, \code{lower}, \code{conf.type}, \code{conf.int}. The estimates in this case correspond to one subject. If \code{times} is specified, the returned list has possible components \code{time}, \code{surv}, \code{std.err}, \code{lower}, and \code{upper}. These will be matrices (except for \code{time}) if more than one subject is being predicted, with rows representing subjects and columns representing \code{times}. If \code{times} has only one time, these are reduced to vectors with the number of elements equal to the number of subjects. } \details{ The result is passed through \code{naresid} if \code{newdata}, \code{linear.predictors}, and \code{x} are not specified, to restore placeholders for \code{NA}s. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{cph}}, \code{\link{survfit.cph}}, \code{\link[survival]{survfit.coxph}}, \code{\link{predictrms}}, \code{\link{survplot}} } \examples{ # Simulate data from a population model in which the log hazard # function is linear in age and there is no age x sex interaction # Proportional hazards holds for both variables but we # unnecessarily stratify on sex to see what happens n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, TRUE)) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) dt <- -log(runif(n))/h label(dt) <- 'Follow-up Time' e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" dd <- datadist(age, sex) options(datadist='dd') Srv <- Surv(dt,e) f <- cph(Srv ~ age*strat(sex), x=TRUE, y=TRUE) #or surv=T survest(f, expand.grid(age=c(20,40,60),sex=c("Male","Female")), times=c(2,4,6), conf.int=.9) f <- update(f, surv=TRUE) lp <- c(0, .5, 1) f$strata # check strata names attr(lp,'strata') <- rep(1,3) # or rep('sex=Female',3) survest(f, linear.predictors=lp, times=c(2,4,6)) # Test survest by comparing to survfit.coxph for a more complex model f <- cph(Srv ~ pol(age,2)*strat(sex), x=TRUE, y=TRUE) survest(f, data.frame(age=median(age), sex=levels(sex)), times=6) age2 <- age^2 f2 <- coxph(Srv ~ (age + age2)*strata(sex)) new <- data.frame(age=median(age), age2=median(age)^2, sex='Male') summary(survfit(f2, new), times=6) new$sex <- 'Female' summary(survfit(f2, new), times=6) options(datadist=NULL) } \keyword{models} \keyword{survival} \keyword{regression} rms/man/validate.Rq.Rd0000644000176200001440000000660213714237251014300 0ustar liggesusers\name{validate.Rq} \alias{validate.Rq} \title{Validation of a Quantile Regression Model} \description{ The \code{validate} function when used on an object created by \code{Rq} does resampling validation of a quantile regression model, with or without backward step-down variable deletion. Uses resampling to estimate the optimism in various measures of predictive accuracy which include mean absolute prediction error (MAD), Spearman rho, the \eqn{g}-index, and the intercept and slope of an overall calibration \eqn{a + b\hat{y}}{a + b * (predicted y)}. The "corrected" slope can be thought of as shrinkage factor that takes into account overfitting. \code{validate.Rq} can also be used when a model for a continuous response is going to be applied to a binary response. A Somers' \eqn{D_{xy}} for this case is computed for each resample by dichotomizing \code{y}. This can be used to obtain an ordinary receiver operating characteristic curve area using the formula \eqn{0.5(D_{xy} + 1)}. See \code{predab.resample} for the list of resampling methods. The LaTeX \code{needspace} package must be in effect to use the \code{latex} method. } \usage{ # fit <- fitting.function(formula=response ~ terms, x=TRUE, y=TRUE) \method{validate}{Rq}(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=0.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, u=NULL, rel=">", tolerance=1e-7, \dots) } \arguments{ \item{fit}{ a fit derived by \code{Rq}. The options \code{x=TRUE} and \code{y=TRUE} must have been specified. See \code{validate} for a description of arguments \code{method} - \code{pr}. } \item{method,B,bw,rule,type,sls,aics,force,estimates,pr}{see \code{\link{validate}} and \code{\link{predab.resample}} and \code{\link{fastbw}}} \item{u}{ If specifed, \code{y} is also dichotomized at the cutoff \code{u} for the purpose of getting a bias-corrected estimate of \eqn{D_{xy}}. } \item{rel}{ relationship for dichotomizing predicted \code{y}. Defaults to \code{">"} to use \code{y>u}. \code{rel} can also be \code{"<"}, \code{">="}, and \code{"<="}. } \item{tolerance}{ ignored } \item{\dots}{ other arguments to pass to \code{predab.resample}, such as \code{group}, \code{cluster}, and \code{subset} }} \value{ matrix with rows corresponding to various indexes, and optionally \eqn{D_{xy}}, and columns for the original index, resample estimates, indexes applied to whole or omitted sample using model derived from resample, average optimism, corrected index, and number of successful resamples. } \section{Side Effects}{ prints a summary, and optionally statistics for each re-fit } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{Rq}}, \code{\link{predab.resample}}, \code{\link{fastbw}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{gIndex}} } \examples{ set.seed(1) x1 <- runif(200) x2 <- sample(0:3, 200, TRUE) x3 <- rnorm(200) distance <- (x1 + x2/3 + rnorm(200))^2 f <- Rq(sqrt(distance) ~ rcs(x1,4) + scored(x2) + x3, x=TRUE, y=TRUE) #Validate full model fit (from all observations) but for x1 < .75 validate(f, B=20, subset=x1 < .75) # normally B=300 #Validate stepwise model with typical (not so good) stopping rule validate(f, B=20, bw=TRUE, rule="p", sls=.1, type="individual") } \keyword{models} \keyword{regression} \concept{model validation} \concept{bootstrap} \concept{predictive accuracy} rms/man/plot.contrast.rms.Rd0000644000176200001440000000315313702620526015534 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.contrast.r \name{plot.contrast.rms} \alias{plot.contrast.rms} \title{plot.contrast.rms} \usage{ \method{plot}{contrast.rms}( x, bivar = FALSE, bivarmethod = c("ellipse", "kernel"), prob = 0.95, which = c("both", "diff", "ind"), nrow = NULL, ncol = NULL, ... ) } \arguments{ \item{x}{the result of \code{contrast.rms}} \item{bivar}{set to \code{TRUE} to plot 2-d posterior density contour} \item{bivarmethod}{see \code{\link[rmsb:pdensityContour]{rmsb::pdensityContour()}}} \item{prob}{posterior coverage probability for HPD interval or 2-d contour} \item{which}{applies when plotting the result of \code{contrast(..., fun=)}, defaulting to showing the posterior density of both estimates plus their difference. Set to \code{"ind"} to only show the two individual densities or \code{"diff"} to only show the posterior density for the differences.} \item{nrow}{for \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}}} \item{ncol}{likewise} \item{...}{unused} } \value{ \code{ggplot2} object } \description{ Plot Bayesian Contrast Posterior Densities } \details{ If there are exactly two contrasts and \code{bivar=TRUE} plots an elliptical or kernal (based on \code{bivarmethod} posterior density contour with probability \code{prob}). Otherwise plots a series of posterior densities of contrasts along with HPD intervals, posterior means, and medians. When the result being plotted comes from \code{contrast} with \verb{fun=} specified, both the two individual estimates and their difference are plotted. } \author{ Frank Harrell } rms/man/which.influence.Rd0000644000176200001440000000506013714237251015174 0ustar liggesusers\name{which.influence} \alias{which.influence} \alias{show.influence} \title{ Which Observations are Influential } \description{ Creates a list with a component for each factor in the model. The names of the components are the factor names. Each component contains the observation identifiers of all observations that are "overly influential" with respect to that factor, meaning that \eqn{|dfbetas| > u} for at least one \eqn{\beta_i}{beta i} associated with that factor, for a given \code{cutoff}. The default \code{cutoff} is \code{.2}. The fit must come from a function that has \code{resid(fit, type="dfbetas")} defined. \code{show.influence}, written by Jens Oehlschlaegel-Akiyoshi, applies the result of \code{which.influence} to a data frame, usually the one used to fit the model, to report the results. } \usage{ which.influence(fit, cutoff=.2) show.influence(object, dframe, report=NULL, sig=NULL, id=NULL) } \arguments{ \item{fit}{ fit object } \item{object}{ the result of \code{which.influence} } \item{dframe}{ data frame containing observations pertinent to the model fit } \item{cutoff}{ cutoff value } \item{report}{ other columns of the data frame to report besides those corresponding to predictors that are influential for some observations } \item{sig}{ runs results through \code{signif} with \code{sig} digits if \code{sig} is given } \item{id}{ a character vector that labels rows of \code{dframe} if \code{row.names} were not used }} \value{ \code{show.influence} returns a marked dataframe with the first column being a count of influence values } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com \cr Jens Oehlschlaegel-Akiyoshi\cr Center for Psychotherapy Research\cr Christian-Belser-Strasse 79a\cr D-70597 Stuttgart Germany\cr oehl@psyres-stuttgart.de } \seealso{ \code{\link{residuals.lrm}}, \code{\link{residuals.cph}}, \code{\link{residuals.ols}}, \code{\link{rms}}, \code{\link{lrm}}, \code{\link{ols}}, \code{\link{cph}} } \examples{ #print observations in data frame that are influential, #separately for each factor in the model x1 <- 1:20 x2 <- abs(x1-10) x3 <- factor(rep(0:2,length.out=20)) y <- c(rep(0:1,8),1,1,1,1) f <- lrm(y ~ rcs(x1,3) + x2 + x3, x=TRUE,y=TRUE) w <- which.influence(f, .55) nam <- names(w) d <- data.frame(x1,x2,x3,y) for(i in 1:length(nam)) { print(paste("Influential observations for effect of ",nam[i]),quote=FALSE) print(d[w[[i]],]) } show.influence(w, d) # better way to show results } \keyword{models} \keyword{regression} \keyword{survival} \concept{logistic regression model} rms/man/plot.xmean.ordinaly.Rd0000644000176200001440000001031513714237251016027 0ustar liggesusers\name{plot.xmean.ordinaly} \alias{plot.xmean.ordinaly} \title{ Plot Mean X vs. Ordinal Y } \description{ Separately for each predictor variable \eqn{X} in a formula, plots the mean of \eqn{X} vs. levels of \eqn{Y}. Then under the proportional odds assumption, the expected value of the predictor for each \eqn{Y} value is also plotted (as a dotted line). This plot is useful for assessing the ordinality assumption for \eqn{Y} separately for each \eqn{X}, and for assessing the proportional odds assumption in a simple univariable way. If several predictors do not distinguish adjacent categories of \eqn{Y}, those levels may need to be pooled. This display assumes that each predictor is linearly related to the log odds of each event in the proportional odds model. There is also an option to plot the expected means assuming a forward continuation ratio model. } \usage{ \method{plot}{xmean.ordinaly}(x, data, subset, na.action, subn=TRUE, cr=FALSE, topcats=1, cex.points=.75, \dots) } \arguments{ \item{x}{ an S formula. Response variable is treated as ordinal. For categorical predictors, a binary version of the variable is substituted, specifying whether or not the variable equals the modal category. Interactions or non-linear effects are not allowed. } \item{data}{ a data frame or frame number } \item{subset}{ vector of subscripts or logical vector describing subset of data to analyze } \item{na.action}{ defaults to \code{na.keep} so all NAs are initially retained. Then NAs are deleted only for each predictor currently being plotted. Specify \code{na.action=na.delete} to remove observations that are missing on any of the predictors (or the response). } \item{subn}{ set to \code{FALSE} to suppress a left bottom subtitle specifying the sample size used in constructing each plot } \item{cr}{ set to \code{TRUE} to plot expected values by levels of the response, assuming a forward continuation ratio model holds. The function is fairly slow when this option is specified. } \item{topcats}{When a predictor is categorical, by default only the proportion of observations in the overall most frequent category will be plotted against response variable strata. Specify a higher value of \code{topcats} to make separate plots for the proportion in the \code{k} most frequent predictor categories, where \code{k} is \code{min(ncat-1, topcats)} and \code{ncat} is the number of unique values of the predictor.} \item{cex.points}{if \code{cr} is \code{TRUE}, specifies the size of the \code{"C"} that is plotted. Default is 0.75.} \item{...}{ other arguments passed to \code{plot} and \code{lines} }} \section{Side Effects}{ plots } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \references{ Harrell FE et al. (1998): Development of a clinical prediction model for an ordinal outcome. Stat in Med 17:909--44. } \seealso{ \code{\link{lrm}}, \code{\link{residuals.lrm}}, \code{\link{cr.setup}}, \code{\link[Hmisc]{summary.formula}}, \code{\link[Hmisc]{biVar}}. } \examples{ # Simulate data from a population proportional odds model set.seed(1) n <- 400 age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) region <- factor(sample(c('north','south','east','west'), n, replace=TRUE)) L <- .2*(age-50) + .1*(blood.pressure-120) p12 <- plogis(L) # Pr(Y>=1) p2 <- plogis(L-1) # Pr(Y=2) p <- cbind(1-p12, p12-p2, p2) # individual class probabilites # Cumulative probabilities: cp <- matrix(cumsum(t(p)) - rep(0:(n-1), rep(3,n)), byrow=TRUE, ncol=3) y <- (cp < runif(n)) \%*\% rep(1,3) # Thanks to Dave Krantz for this trick par(mfrow=c(2,2)) plot.xmean.ordinaly(y ~ age + blood.pressure + region, cr=TRUE, topcats=2) par(mfrow=c(1,1)) # Note that for unimportant predictors we don't care very much about the # shapes of these plots. Use the Hmisc chiSquare function to compute # Pearson chi-square statistics to rank the variables by unadjusted # importance without assuming any ordering of the response: chiSquare(y ~ age + blood.pressure + region, g=3) chiSquare(y ~ age + blood.pressure + region, g=5) } \keyword{category} \keyword{models} \keyword{regression} \keyword{hplot} \concept{model validation} \concept{logistic regression model} rms/man/sensuc.Rd0000644000176200001440000002330113714237251013421 0ustar liggesusers\name{sensuc} \alias{sensuc} \alias{plot.sensuc} \title{Sensitivity to Unmeasured Covariables} \description{ Performs an analysis of the sensitivity of a binary treatment (\eqn{X}) effect to an unmeasured binary confounder (\eqn{U}) for a fitted binary logistic or an unstratified non-time-dependent Cox survival model (the function works well for the former, not so well for the latter). This is done by fitting a sequence of models with separately created \eqn{U} variables added to the original model. The sequence of models is formed by simultaneously varying \eqn{a} and \eqn{b}, where \eqn{a} measures the association between \eqn{U} and \eqn{X} and \eqn{b} measures the association between \eqn{U} and \eqn{Y}, where \eqn{Y} is the outcome of interest. For Cox models, an approximate solution is used by letting \eqn{Y} represent some binary classification of the event/censoring time and the event indicator. For example, \eqn{Y} could be just be the event indicator, ignoring time of the event or censoring, or it could be \eqn{1} if a subject failed before one year and \eqn{0} otherwise. When for each combination of \eqn{a} and \eqn{b} the vector of binary values \eqn{U} is generated, one of two methods is used to constrain the properties of \eqn{U}. With either method, the overall prevalence of \eqn{U} is constrained to be \code{prev.u}. With the default method (\code{or.method="x:u y:u"}), \eqn{U} is sampled so that the \eqn{X:U} odds ratio is \eqn{a} and the \eqn{Y:U} odds ratio is \eqn{b}. With the second method, \eqn{U} is sampled according to the model \eqn{logit(U=1 | X, Y) = \alpha + \beta*Y + \gamma*X}, where \eqn{\beta=\log(b)} and \eqn{\gamma=\log(a)} and \eqn{\alpha} is determined so that the prevalence of \eqn{U=1} is \code{prev.u}. This second method results in the adjusted odds ratio for \eqn{Y:U} given \eqn{X} being \eqn{b} whereas the default method forces the unconditional (marginal) \eqn{Y:U} odds ratio to be \eqn{b}. Rosenbaum uses the default method. There is a \code{plot} method for plotting objects created by \code{sensuc}. Values of \eqn{a} are placed on the x-axis and observed marginal odds or hazards ratios for \eqn{U} (unadjusted ratios) appear on the y-axis. For Cox models, the hazard ratios will not agree exactly with \eqn{X}:event indicator odds ratios but they sometimes be made close through judicious choice of the \code{event} function. The default plot uses four symbols which differentiate whether for the \eqn{a,b} combination the effect of \eqn{X} adjusted for \eqn{U} (and for any other covariables that were in the original model fit) is positive (usually meaning an effect ratio greater than 1) and "significant", merely positive, not positive and non significant, or not positive but significant. There is also an option to draw the numeric value of the \eqn{X} effect ratio at the \eqn{a},\eqn{b} combination along with its \eqn{Z} statistic underneath in smaller letters, and an option to draw the effect ratio in one of four colors depending on the significance of the \eqn{Z} statistic. } \usage{ # fit <- lrm(formula=y ~ x + other.predictors, x=TRUE, y=TRUE) #or # fit <- cph(formula=Surv(event.time,event.indicator) ~ x + other.predictors, # x=TRUE, y=TRUE) sensuc(fit, or.xu=seq(1, 6, by = 0.5), or.u=or.xu, prev.u=0.5, constrain.binary.sample=TRUE, or.method=c("x:u y:u","u|x,y"), event=function(y) if(is.matrix(y))y[,ncol(y)] else 1*y) \method{plot}{sensuc}(x, ylim=c((1+trunc(min(x$effect.u)-.01))/ ifelse(type=='numbers',2,1), 1+trunc(max(x$effect.u)-.01)), xlab='Odds Ratio for X:U', ylab=if(x$type=='lrm')'Odds Ratio for Y:U' else 'Hazard Ratio for Y:U', digits=2, cex.effect=.75, cex.z=.6*cex.effect, delta=diff(par('usr')[3:4])/40, type=c('symbols','numbers','colors'), pch=c(15,18,5,0), col=c(2,3,1,4), alpha=.05, impressive.effect=function(x)x > 1,\dots) } \arguments{ \item{fit}{ result of \code{lrm} or \code{cph} with \code{x=TRUE, y=TRUE}. The first variable in the right hand side of the model formula must have been the binary \eqn{X} variable, and it may not interact with other predictors. } \item{x}{ result of \code{sensuc} } \item{or.xu}{ vector of possible odds ratios measuring the \eqn{X:U} association. } \item{or.u}{ vector of possible odds ratios measuring the \eqn{Y:U} association. Default is \code{or.xu}. } \item{prev.u}{ desired prevalence of \eqn{U=1}. Default is 0.5, which is usually a "worst case" for sensitivity analyses. } \item{constrain.binary.sample}{ By default, the binary \eqn{U} values are sampled from the appropriate distributions conditional on \eqn{Y} and \eqn{X} so that the proportions of \eqn{U=1} in each sample are exactly the desired probabilities, to within the closeness of \eqn{n\times}probability to an integer. Specify \code{constrain.binary.sample=FALSE} to sample from ordinary Bernoulli distributions, to allow proportions of \eqn{U=1} to reflect sampling fluctuations. } \item{or.method}{ see above } \item{event}{ a function classifying the response variable into a binary event for the purposes of constraining the association between \eqn{U} and \eqn{Y}. For binary logistic models, \code{event} is left at its default value, which is the identify function, i.e, the original \eqn{Y} values are taken as the events (no other choice makes any sense here). For Cox models, the default \code{event} function takes the last column of the \code{Surv} object stored with the fit. For rare events (high proportion of censored observations), odds ratios approximate hazard ratios, so the default is OK. For other cases, the survival times should be considered (probably in conjunction with the event indicators), although it may not be possible to get a high enough hazard ratio between \eqn{U} and \eqn{Y} by sampling \eqn{U} by temporarily making \eqn{Y} binary. See the last example which is for a 2-column \code{Surv} object (first column of response variable=event time, second=event indicator). When dichotomizing survival time at a given point, it is advantageous to choose the cutpoint so that not many censored survival times preceed the cutpoint. Note that in fitting Cox models to examine sensitivity to \eqn{U}, the original non-dichotomized failure times are used. } \item{ylim}{ y-axis limits for \code{plot} } \item{xlab}{ x-axis label } \item{ylab}{ y-axis label } \item{digits}{ number of digits to the right of the decimal point for drawing numbers on the plot, for \code{type="numbers"} or \code{type="colors"}. } \item{cex.effect}{ character size for drawing effect ratios } \item{cex.z}{ character size for drawing \eqn{Z} statistics } \item{delta}{ decrement in \eqn{y} value used to draw \eqn{Z} values below effect ratios } \item{type}{ specify \code{"symbols"} (the default), \code{"numbers"}, or \code{"colors"} (see above) } \item{pch}{ 4 plotting characters corresponding to positive and significant effects for \eqn{X}, positive and non-significant effects, not positive and not significant, not positive but significant } \item{col}{ 4 colors as for \code{pch} } \item{alpha}{ significance level } \item{impressive.effect}{ a function of the odds or hazard ratio for \eqn{X} returning \code{TRUE} for a positive effect. By default, a positive effect is taken to mean a ratio exceeding one. } \item{...}{ optional arguments passed to \code{plot} }} \value{ \code{sensuc} returns an object of class \code{"sensuc"} with the following elements: \code{OR.xu} (vector of desired \eqn{X:U} odds ratios or \eqn{a} values), \code{OOR.xu} (observed marginal \eqn{X:U} odds ratios), \code{OR.u} (desired \eqn{Y:U} odds ratios or \eqn{b} values), \code{effect.x} (adjusted odds or hazards ratio for \eqn{X} in a model adjusted for \eqn{U} and all of the other predictors), \code{effect.u} (unadjusted \eqn{Y:U} odds or hazards ratios), \code{effect.u.adj} (adjusted \eqn{Y:U} odds or hazards ratios), \eqn{Z} (Z-statistics), \code{prev.u} (input to \code{sensuc}), \code{cond.prev.u} (matrix with one row per \eqn{a},\eqn{b} combination, specifying prevalences of \eqn{U} conditional on \eqn{Y} and \eqn{X} combinations), and \code{type} (\code{"lrm"} or \code{"cph"}). } \author{ Frank Harrell\cr Mark Conaway\cr Department of Biostatistics\cr Vanderbilt University School of Medicine\cr fh@fharrell.com, mconaway@virginia.edu } \references{ Rosenbaum, Paul R (1995): Observational Studies. New York: Springer-Verlag. Rosenbaum P, Rubin D (1983): Assessing sensitivity to an unobserved binary covariate in an observational study with binary outcome. J Roy Statist Soc B 45:212--218. Lee WC (2011): Bounding the bias of unmeasured factors with confounding and effect-modifying potentials. Stat in Med 30:1007-1017. } \seealso{ \code{\link{lrm}}, \code{\link{cph}}, \code{\link{sample}} %% \code{\link[treatSens]{treatSens}} } \examples{ set.seed(17) x <- sample(0:1, 500,TRUE) y <- sample(0:1, 500,TRUE) y[1:100] <- x[1:100] # induce an association between x and y x2 <- rnorm(500) f <- lrm(y ~ x + x2, x=TRUE, y=TRUE) #Note: in absence of U odds ratio for x is exp(2nd coefficient) g <- sensuc(f, c(1,3)) # Note: If the generated sample of U was typical, the odds ratio for # x dropped had U been known, where U had an odds ratio # with x of 3 and an odds ratio with y of 3 plot(g) # Fit a Cox model and check sensitivity to an unmeasured confounder # f <- cph(Surv(d.time,death) ~ treatment + pol(age,2)*sex, x=TRUE, y=TRUE) # sensuc(f, event=function(y) y[,2] & y[,1] < 365.25 ) # Event = failed, with event time before 1 year # Note: Analysis uses f$y which is a 2-column Surv object } \keyword{regression} \keyword{htest} \keyword{models} \keyword{survival} \concept{model validation} \concept{sampling} \concept{logistic regression model} \concept{sensitivity analysis} rms/man/survfit.cph.Rd0000644000176200001440000000256412257363474014414 0ustar liggesusers\name{survfit.cph} \alias{survfit.cph} \title{ Cox Predicted Survival } \description{ This is a slightly modified version of Therneau's \code{survfit.coxph} function. The difference is that \code{survfit.cph} assumes that \code{x=TRUE,y=TRUE} were specified to the fit. This assures that the environment in effect at the time of the fit (e.g., automatic knot estimation for spline functions) is the same one used for basing predictions. } \usage{ \method{survfit}{cph}(formula, newdata, se.fit=TRUE, conf.int=0.95, individual=FALSE, type=NULL, vartype=NULL, conf.type=c('log', "log-log", "plain", "none"), id, \dots) } \arguments{ \item{formula}{ a fit object from \code{cph} or \code{coxph} see \code{\link[survival]{survfit.coxph}} } \item{newdata,se.fit,conf.int,individual,type,vartype,conf.type,id}{see \code{\link[survival]{survfit}}. If \code{individual} is \code{TRUE}, there must be exactly one \code{Surv} object in \code{newdata}. This object is used to specify time intervals for time-dependent covariate paths. To get predictions for multiple subjects with time-dependent covariates, specify a vector \code{id} which specifies unique hypothetical subjects. The length of \code{id} should equal the number of rows in \code{newdata}.} \item{\dots}{Not used} } \value{see \code{survfit.coxph}} \seealso{\code{\link{survest.cph}}} \keyword{survival} rms/man/validate.ols.Rd0000644000176200001440000000712713714237251014516 0ustar liggesusers\name{validate.ols} \alias{validate.ols} \title{Validation of an Ordinary Linear Model} \description{ The \code{validate} function when used on an object created by \code{ols} does resampling validation of a multiple linear regression model, with or without backward step-down variable deletion. Uses resampling to estimate the optimism in various measures of predictive accuracy which include \eqn{R^2}, \eqn{MSE} (mean squared error with a denominator of \eqn{n}), the \eqn{g}-index, and the intercept and slope of an overall calibration \eqn{a + b\hat{y}}{a + b * (predicted y)}. The "corrected" slope can be thought of as shrinkage factor that takes into account overfitting. \code{validate.ols} can also be used when a model for a continuous response is going to be applied to a binary response. A Somers' \eqn{D_{xy}} for this case is computed for each resample by dichotomizing \code{y}. This can be used to obtain an ordinary receiver operating characteristic curve area using the formula \eqn{0.5(D_{xy} + 1)}. The Nagelkerke-Maddala \eqn{R^2} index for the dichotomized \code{y} is also given. See \code{predab.resample} for the list of resampling methods. The LaTeX needspace package must be in effect to use the \code{latex} method. } \usage{ # fit <- fitting.function(formula=response ~ terms, x=TRUE, y=TRUE) \method{validate}{ols}(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=0.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, u=NULL, rel=">", tolerance=1e-7, \dots) } \arguments{ \item{fit}{ a fit derived by \code{ols}. The options \code{x=TRUE} and \code{y=TRUE} must have been specified. See \code{validate} for a description of arguments \code{method} - \code{pr}. } \item{method,B,bw,rule,type,sls,aics,force,estimates,pr}{see \code{\link{validate}} and \code{\link{predab.resample}} and \code{\link{fastbw}}} \item{u}{ If specifed, \code{y} is also dichotomized at the cutoff \code{u} for the purpose of getting a bias-corrected estimate of \eqn{D_{xy}}. } \item{rel}{ relationship for dichotomizing predicted \code{y}. Defaults to \code{">"} to use \code{y>u}. \code{rel} can also be \code{"<"}, \code{">="}, and \code{"<="}. } \item{tolerance}{ tolerance for singularity; passed to \code{lm.fit.qr}. } \item{\dots}{ other arguments to pass to \code{predab.resample}, such as \code{group}, \code{cluster}, and \code{subset} }} \value{ matrix with rows corresponding to R-square, MSE, g, intercept, slope, and optionally \eqn{D_{xy}} and \eqn{R^2}, and columns for the original index, resample estimates, indexes applied to whole or omitted sample using model derived from resample, average optimism, corrected index, and number of successful resamples. } \section{Side Effects}{ prints a summary, and optionally statistics for each re-fit } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{ols}}, \code{\link{predab.resample}}, \code{\link{fastbw}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{calibrate}}, \code{\link{gIndex}} } \examples{ set.seed(1) x1 <- runif(200) x2 <- sample(0:3, 200, TRUE) x3 <- rnorm(200) distance <- (x1 + x2/3 + rnorm(200))^2 f <- ols(sqrt(distance) ~ rcs(x1,4) + scored(x2) + x3, x=TRUE, y=TRUE) #Validate full model fit (from all observations) but for x1 < .75 validate(f, B=20, subset=x1 < .75) # normally B=300 #Validate stepwise model with typical (not so good) stopping rule validate(f, B=20, bw=TRUE, rule="p", sls=.1, type="individual") } \keyword{models} \keyword{regression} \concept{model validation} \concept{bootstrap} \concept{predictive accuracy} rms/man/npsurv.Rd0000644000176200001440000000702112613205242013447 0ustar liggesusers\name{npsurv} \alias{npsurv} \title{Nonparametric Survival Estimates for Censored Data} \description{ Computes an estimate of a survival curve for censored data using either the Kaplan-Meier or the Fleming-Harrington method or computes the predicted survivor function. For competing risks data it computes the cumulative incidence curve. This calls the \code{survival} package's \code{survfit.formula} function. Attributes of the event time variable are saved (label and units of measurement). For competing risks the second argument for \code{Surv} should be the event state variable, and it should be a factor variable with the first factor level denoting right-censored observations. } \usage{npsurv(formula, data, subset, na.action, \dots)} \arguments{ \item{formula}{ a formula object, which must have a \code{Surv} object as the response on the left of the \code{~} operator and, if desired, terms separated by + operators on the right. One of the terms may be a \code{strata} object. For a single survival curve the right hand side should be \code{~ 1}. } \item{data,subset,na.action}{see \code{\link[survival]{survfit.formula}}} \item{\dots}{see \code{\link[survival]{survfit.formula}}} } \value{ an object of class \code{"npsurv"} and \code{"survfit"}. See \code{survfit.object} for details. Methods defined for \code{survfit} objects are \code{print}, \code{summary}, \code{plot},\code{lines}, and \code{points}. } \details{ see \code{\link[survival]{survfit.formula}} for details } \seealso{ \code{\link{survfit.cph}} for survival curves from Cox models. \code{\link{print}}, \code{\link{plot}}, \code{\link{lines}}, \code{\link{coxph}}, \code{\link{strata}}, \code{\link{survplot}} } \author{Thomas Lumley \email{tlumley@u.washington.edu} and Terry Therneau} \examples{ require(survival) # fit a Kaplan-Meier and plot it fit <- npsurv(Surv(time, status) ~ x, data = aml) plot(fit, lty = 2:3) legend(100, .8, c("Maintained", "Nonmaintained"), lty = 2:3) # Here is the data set from Turnbull # There are no interval censored subjects, only left-censored (status=3), # right-censored (status 0) and observed events (status 1) # # Time # 1 2 3 4 # Type of observation # death 12 6 2 3 # losses 3 2 0 3 # late entry 2 4 2 5 # tdata <- data.frame(time = c(1,1,1,2,2,2,3,3,3,4,4,4), status = rep(c(1,0,2),4), n = c(12,3,2,6,2,4,2,0,2,3,3,5)) fit <- npsurv(Surv(time, time, status, type='interval') ~ 1, data=tdata, weights=n) # # Time to progression/death for patients with monoclonal gammopathy # Competing risk curves (cumulative incidence) # status variable must be a factor with first level denoting right censoring m <- upData(mgus1, stop = stop / 365.25, units=c(stop='years'), labels=c(stop='Follow-up Time'), subset=start == 0) f <- npsurv(Surv(stop, event) ~ 1, data=m) # CI curves are always plotted from 0 upwards, rather than 1 down plot(f, fun='event', xmax=20, mark.time=FALSE, col=2:3, xlab="Years post diagnosis of MGUS") text(10, .4, "Competing Risk: death", col=3) text(16, .15,"Competing Risk: progression", col=2) # Use survplot for enhanced displays of cumulative incidence curves for # competing risks survplot(f, state='pcm', n.risk=TRUE, xlim=c(0, 20), ylim=c(0, .5), col=2) survplot(f, state='death', add=TRUE, col=3) f <- npsurv(Surv(stop, event) ~ sex, data=m) survplot(f, state='death', n.risk=TRUE, conf='diffbands') } rms/man/Predict.Rd0000644000176200001440000004044213725507520013521 0ustar liggesusers\name{Predict} \alias{Predict} \alias{print.Predict} \alias{rbind.Predict} \title{Compute Predicted Values and Confidence Limits} \description{ \code{Predict} allows the user to easily specify which predictors are to vary. When the vector of values over which a predictor should vary is not specified, the range will be all levels of a categorical predictor or equally-spaced points between the \code{\link{datadist}} \code{"Low:prediction"} and \code{"High:prediction"} values for the variable (\code{datadist} by default uses the 10th smallest and 10th largest predictor values in the dataset). Predicted values are the linear predictor (X beta), a user-specified transformation of that scale, or estimated probability of surviving past a fixed single time point given the linear predictor. \code{Predict} is usually used for plotting predicted values but there is also a \code{print} method. When the first argument to \code{Predict} is a fit object created by \code{bootcov} with \code{coef.reps=TRUE}, confidence limits come from the stored matrix of bootstrap repetitions of coefficients, using bootstrap percentile nonparametric confidence limits, basic bootstrap, or BCa limits. Such confidence intervals do not make distributional assumptions. You can force \code{Predict} to instead use the bootstrap covariance matrix by setting \code{usebootcoef=FALSE}. If \code{coef.reps} was \code{FALSE}, \code{usebootcoef=FALSE} is the default. There are \code{ggplot}, \code{plotp}, and \code{plot} methods for \code{Predict} objects that makes it easy to show predicted values and confidence bands. The \code{rbind} method for \code{Predict} objects allows you to create separate sets of predictions under different situations and to combine them into one set for feeding to \code{plot.Predict}, \code{ggplot.Predict}, or \code{plotp.Predict}. For example you might want to plot confidence intervals for means and for individuals using \code{ols}, and have the two types of confidence bands be superposed onto one plot or placed into two panels. Another use for \code{rbind} is to combine predictions from quantile regression models that predicted three different quantiles. If \code{conf.type="simultaneous"}, simultaneous (over all requested predictions) confidence limits are computed. See the \code{\link{predictrms}} function for details. If \code{fun} is given, \code{conf.int} > 0, the model is not a Bayesian model, and the bootstrap was not used, \code{fun} may return \code{limits} attribute when \code{fun} computed its own confidence limits. These confidence limits will be functions of the design matrix, not just the linear predictor. } \usage{ Predict(object, ..., fun=NULL, funint=TRUE, type = c("predictions", "model.frame", "x"), np = 200, conf.int = 0.95, conf.type = c("mean", "individual","simultaneous"), usebootcoef=TRUE, boot.type=c("percentile", "bca", "basic"), posterior.summary=c('mean', 'median', 'mode'), adj.zero = FALSE, ref.zero = FALSE, kint=NULL, ycut=NULL, time = NULL, loglog = FALSE, digits=4, name, factors=NULL, offset=NULL) \method{print}{Predict}(x, \dots) \method{rbind}{Predict}(\dots, rename) } \arguments{ \item{object}{ an \code{rms} fit object, or for \code{print} the result of \code{Predict}. \code{options(datadist="d")} must have been specified (where \code{d} was created by \code{datadist}), or it must have been in effect when the the model was fitted.} \item{\dots}{ One or more variables to vary, or single-valued adjustment values. Specify a variable name without an equal sign to use the default display range, or any range you choose (e.g. \code{seq(0,100,by=2),c(2,3,7,14)}). The default list of values for which predictions are made is taken as the list of unique values of the variable if they number fewer than 11. For variables with \eqn{>10} unique values, \code{np} equally spaced values in the range are used for plotting if the range is not specified. Variables not specified are set to the default adjustment value \code{limits[2]}, i.e. the median for continuous variables and a reference category for non-continuous ones. Later variables define adjustment settings. For categorical variables, specify the class labels in quotes when specifying variable values. If the levels of a categorical variable are numeric, you may omit the quotes. For variables not described using \code{datadist}, you must specify explicit ranges and adjustment settings for predictors that were in the model. If no variables are specified in \dots, predictions will be made by separately varying all predictors in the model over their default range, holding the other predictors at their adjustment values. This has the same effect as specifying \code{name} as a vector containing all the predictors. For \code{rbind}, \dots represents a series of results from \code{Predict}. If you name the results, these names will be taken as the values of the new \code{.set.} variable added to the concatenated data frames. See an example below. } \item{fun}{an optional transformation of the linear predictor. Specify \code{fun='mean'} if the fit is a proportional odds model fit and you ran \code{bootcov} with \code{coef.reps=TRUE}. This will let the mean function be re-estimated for each bootstrap rep to properly account for all sources of uncertainty in estimating the mean response. \code{fun} can be a general function and can compute confidence limits (stored as a list in the \code{limits} attribute) of the transformed parameters such as means.} \item{funint}{set to \code{FALSE} if \code{fun} is not a function such as the result of \code{Mean}, \code{Quantile}, or \code{ExProb} that contains an \code{intercepts} argument} \item{type}{ defaults to providing predictions. Set to \code{"model.frame"} to return a data frame of predictor settings used. Set to \code{"x"} to return the corresponding design matrix constructed from the predictor settings. } \item{np}{ the number of equally-spaced points computed for continuous predictors that vary, i.e., when the specified value is \code{.} or \code{NA} } \item{conf.int}{ confidence level (highest posterior density interval probability for Bayesian models). Default is 0.95. Specify \code{FALSE} to suppress.} \item{conf.type}{ type of confidence interval. Default is \code{"mean"} which applies to all models. For models containing a residual variance (e.g, \code{ols}), you can specify \code{conf.type="individual"} instead, to obtain limits on the predicted value for an individual subject. Specify \code{conf.type="simultaneous"} to obtain simultaneous confidence bands for mean predictions with family-wise coverage of \code{conf.int}. } \item{usebootcoef}{set to \code{FALSE} to force the use of the bootstrap covariance matrix estimator even when bootstrap coefficient reps are present} \item{boot.type}{set to \code{'bca'} to compute BCa confidence limits or \code{'basic'} to use the basic bootstrap. The default is to compute percentile intervals} \item{posterior.summary}{defaults to using the posterior mean of the regression coefficients. Specify \code{'mode'} or \code{'median'} to instead use the other summaries.} \item{adj.zero}{ Set to \code{TRUE} to adjust all non-plotted variables to 0 (or reference cell for categorical variables) and to omit intercept(s) from consideration. Default is \code{FALSE}. } \item{ref.zero}{ Set to \code{TRUE} to subtract a constant from \eqn{X\beta}{X beta} before plotting so that the reference value of the \code{x}-variable yields \code{y=0}. This is done before applying function \code{fun}. This is especially useful for Cox models to make the hazard ratio be 1.0 at reference values, and the confidence interval have width zero. } \item{kint}{ This is only useful in a multiple intercept model such as the ordinal logistic model. There to use to second of three intercepts, for example, specify \code{kint=2}. The default is 1 for \code{lrm} and the middle intercept corresponding to the median \code{y} for \code{orm} or \code{blrm}. You can specify \code{ycut} instead, and the intercept corresponding to Y >= ycut will be used for \code{kint}. } \item{ycut}{for an ordinal model specifies the Y cutoff to use in evaluating departures from proportional odds, when the constrained partial proportional odds model is used. When omitted, \code{ycut} is implied by \code{kint}. The only time it is absolutely mandatory to specify \code{ycut} is when computed an effect (e.g., odds ratio) at a level of the response variable that did not occur in the data. This would only occur when the \code{cppo} function given to \code{blrm} is a continuous function.} \item{time}{ Specify a single time \code{u} to cause function \code{survest} to be invoked to plot the probability of surviving until time \code{u} when the fit is from \code{cph} or \code{psm}. } \item{loglog}{ Specify \code{loglog=TRUE} to plot \code{log[-log(survival)]} instead of survival, when \code{time} is given. } \item{digits}{ Controls how ``adjust-to'' values are plotted. The default is 4 significant digits. } \item{name}{ Instead of specifying the variables to vary in the \code{variables} (\dots) list, you can specify one or more variables by specifying a vector of character string variable names in the \code{name} argument. Using this mode you cannot specify a list of variable values to use; prediction is done as if you had said e.g. \code{age} without the equal sign. Also, interacting factors can only be set to their reference values using this notation. } \item{factors}{ an alternate way of specifying \dots, mainly for use by \code{survplot} or \code{gendata}. This must be a list with one or more values for each variable listed, with \code{NA} values for default ranges.} \item{offset}{a list containing one value for one variable, which is mandatory if the model included an offset term. The variable name must match the innermost variable name in the offset term. The single offset is added to all predicted values.} \item{x}{an object created by \code{Predict}} \item{rename}{ If you are concatenating predictor sets using \code{rbind} and one or more of the variables were renamed for one or more of the sets, but these new names represent different versions of the same predictors (e.g., using or not using imputation), you can specify a named character vector to rename predictors to a central name. For example, specify \code{rename=c(age.imputed='age', corrected.bp='bp')} to rename from old names \code{age.imputed, corrected.bp} to \code{age, bp}. This happens before concatenation of rows. } } \details{ When there are no intercepts in the fitted model, plot subtracts adjustment values from each factor while computing variances for confidence limits. Specifying \code{time} will not work for Cox models with time-dependent covariables. Use \code{survest} or \code{survfit} for that purpose. } \value{ a data frame containing all model predictors and the computed values \code{yhat}, \code{lower}, \code{upper}, the latter two if confidence intervals were requested. The data frame has an additional \code{class} \code{"Predict"}. If \code{name} is specified or no predictors are specified in \dots, the resulting data frame has an additional variable called \code{.predictor.} specifying which predictor is currently being varied. \code{.predictor.} is handy for use as a paneling variable in \code{lattice} or \code{ggplot2} graphics. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{plot.Predict}}, \code{\link{ggplot.Predict}}, \code{\link{plotp.Predict}}, \code{\link{datadist}}, \code{\link{predictrms}}, \code{\link{contrast.rms}}, \code{\link{summary.rms}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{survest}}, \code{\link{survplot}}, \code{\link{rmsMisc}}, \code{\link[Hmisc]{transace}}, \code{rbind}, \code{\link{bootcov}}, \code{\link{bootBCa}}, \code{\link[boot]{boot.ci}} } \examples{ n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4))) Predict(fit, age, cholesterol, np=4) Predict(fit, age=seq(20,80,by=10), sex, conf.int=FALSE) Predict(fit, age=seq(20,80,by=10), sex='male') # works if datadist not used # Get simultaneous confidence limits accounting for making 7 estimates # Predict(fit, age=seq(20,80,by=10), sex='male', conf.type='simult') # (this needs the multcomp package) ddist$limits$age[2] <- 30 # make 30 the reference value for age # Could also do: ddist$limits["Adjust to","age"] <- 30 fit <- update(fit) # make new reference value take effect Predict(fit, age, ref.zero=TRUE, fun=exp) # Make two curves, and plot the predicted curves as two trellis panels w <- Predict(fit, age, sex) require(lattice) xyplot(yhat ~ age | sex, data=w, type='l') # To add confidence bands we need to use the Hmisc xYplot function in # place of xyplot xYplot(Cbind(yhat,lower,upper) ~ age | sex, data=w, method='filled bands', type='l', col.fill=gray(.95)) # If non-displayed variables were in the model, add a subtitle to show # their settings using title(sub=paste('Adjusted to',attr(w,'info')$adjust),adj=0) # Easier: feed w into plot.Predict, ggplot.Predict, plotp.Predict \dontrun{ # Predictions form a parametric survival model n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) t <- -log(runif(n))/h label(t) <- 'Follow-up Time' e <- ifelse(t<=cens,1,0) t <- pmin(t, cens) units(t) <- "Year" ddist <- datadist(age, sex) Srv <- Surv(t,e) # Fit log-normal survival model and plot median survival time vs. age f <- psm(Srv ~ rcs(age), dist='lognormal') med <- Quantile(f) # Creates function to compute quantiles # (median by default) Predict(f, age, fun=function(x)med(lp=x)) # Note: This works because med() expects the linear predictor (X*beta) # as an argument. Would not work if use # ref.zero=TRUE or adj.zero=TRUE. # Also, confidence intervals from this method are approximate since # they don't take into account estimation of scale parameter # Fit an ols model to log(y) and plot the relationship between x1 # and the predicted mean(y) on the original scale without assuming # normality of residuals; use the smearing estimator. Before doing # that, show confidence intervals for mean and individual log(y), # and for the latter, also show bootstrap percentile nonparametric # pointwise confidence limits set.seed(1) x1 <- runif(300) x2 <- runif(300) ddist <- datadist(x1,x2); options(datadist='ddist') y <- exp(x1+ x2 - 1 + rnorm(300)) f <- ols(log(y) ~ pol(x1,2) + x2, x=TRUE, y=TRUE) # x y for bootcov fb <- bootcov(f, B=100) pb <- Predict(fb, x1, x2=c(.25,.75)) p1 <- Predict(f, x1, x2=c(.25,.75)) p <- rbind(normal=p1, boot=pb) plot(p) p1 <- Predict(f, x1, conf.type='mean') p2 <- Predict(f, x1, conf.type='individual') p <- rbind(mean=p1, individual=p2) plot(p, label.curve=FALSE) # uses superposition plot(p, ~x1 | .set.) # 2 panels r <- resid(f) smean <- function(yhat)smearingEst(yhat, exp, res, statistic='mean') formals(smean) <- list(yhat=numeric(0), res=r[!is.na(r)]) #smean$res <- r[!is.na(r)] # define default res argument to function Predict(f, x1, fun=smean) ## Example using offset g <- Glm(Y ~ offset(log(N)) + x1 + x2, family=poisson) Predict(g, offset=list(N=100)) } options(datadist=NULL) } \keyword{models} rms/man/ie.setup.Rd0000644000176200001440000000675713714237251013675 0ustar liggesusers\name{ie.setup} \alias{ie.setup} \title{Intervening Event Setup} \description{ Creates several new variables which help set up a dataset for modeling with \code{cph} or \code{coxph} when there is a single binary time-dependent covariable which turns on at a given time, and stays on. This is typical when analyzing the impact of an intervening event. \code{ie.setup} creates a \code{Surv} object using the start time, stop time format. It also creates a binary indicator for the intervening event, and a variable called \code{subs} that is useful when \code{attach}-ing a dataframe. \code{subs} has observation numbers duplicated for subjects having an intervening event, so those subject's baseline covariables (that are not time-dependent) can be duplicated correctly. } \usage{ ie.setup(failure.time, event, ie.time, break.ties=FALSE) } \arguments{ \item{failure.time}{ a numeric variable containing the event or censoring times for the terminating event } \item{event}{ a binary (0/1) variable specifying whether observations had the terminating event (event=1) or were censored (event=0) } \item{ie.time}{ intervening event times. For subjects having no intervening events, the corresponding values of ie.time must be NA. } \item{break.ties}{ Occasionally intervening events are recorded as happening at exactly the same time as the termination of follow-up for some subjects. The \code{Surv} and \code{Surv} functions will not allow this. To randomly break the ties by subtracting a random number from such tied intervening event times, specify \code{break.ties=TRUE}. The random number is uniform between zero and the minimum difference between any two untied \code{failure.time}s. }} \value{ a list with components \code{S, ie.status, subs, reps}. \code{S} is a \code{Surv} object containing start and stop times for intervals of observation, along with event indicators. \code{ie.status} is one if the intervening event has occurred at the start of the interval, zero otherwise. \code{subs} is a vector of subscripts that can be used to replicate other variables the same way \code{S} was replicated. \code{reps} specifies how many times each original observation was replicated. \code{S, ie.status, subs} are all the same length (at least the number of rows for \code{S} is) and are longer than the original \code{failure.time} vector. \code{reps} is the same length as the original \code{failure.time} vector. The \code{subs} vector is suitable for passing to \code{validate.lrm} or \code{calibrate}, which pass this vector under the name \code{cluster} on to \code{predab.resample} so that bootstrapping can be done by sampling with replacement from the original subjects rather than from the individual records created by \code{ie.setup}. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{cph}}, \code{\link[survival]{coxph}}, \code{\link[survival]{Surv}}, \code{\link{cr.setup}}, \code{\link{predab.resample}} } \examples{ failure.time <- c(1 , 2, 3) event <- c(1 , 1, 0) ie.time <- c(NA, 1.5, 2.5) z <- ie.setup(failure.time, event, ie.time) S <- z$S S ie.status <- z$ie.status ie.status z$subs z$reps \dontrun{ attach(input.data.frame[z$subs,]) #replicates all variables f <- cph(S ~ age + sex + ie.status) # Instead of duplicating rows of data frame, could do this: attach(input.data.frame) z <- ie.setup(failure.time, event, ie.time) s <- z$subs age <- age[s] sex <- sex[s] f <- cph(S ~ age + sex + ie.status) } } \keyword{survival} rms/man/vif.Rd0000644000176200001440000000205113714237251012704 0ustar liggesusers\name{vif} \alias{vif} \title{Variance Inflation Factors} \description{ Computes variance inflation factors from the covariance matrix of parameter estimates, using the method of Davis et al. (1986), which is based on the correlation matrix from the information matrix. } \usage{ vif(fit) } \arguments{ \item{fit}{ an object created by \code{lrm}, \code{ols}, \code{psm}, \code{cph}, \code{Rq}, \code{Glm}, \code{glm} }} \value{vector of vifs} \author{ Frank Harrell \cr Department of Biostatistics \cr Vanderbilt University \cr fh@fharrell.com } \references{ Davis CE, Hyde JE, Bangdiwala SI, Nelson JJ: An example of dependencies among variables in a conditional logistic regression. In Modern Statistical Methods in Chronic Disease Epidemiology, Eds SH Moolgavkar and RL Prentice, pp. 140--147. New York: Wiley; 1986. } \seealso{ \code{\link{rmsMisc}} (for \code{\link[Hmisc]{num.intercepts}} } \examples{ set.seed(1) x1 <- rnorm(100) x2 <- x1+.1*rnorm(100) y <- sample(0:1, 100, TRUE) f <- lrm(y ~ x1 + x2) vif(f) } \keyword{models} \keyword{regression} rms/man/predictrms.Rd0000644000176200001440000004170613714237251014306 0ustar liggesusers\name{predictrms} \alias{predictrms} \alias{predict.rms} \alias{predict.bj} \alias{predict.cph} \alias{predict.Glm} \alias{predict.Gls} \alias{predict.ols} \alias{predict.psm} \title{Predicted Values from Model Fit} \description{ The \code{predict} function is used to obtain a variety of values or predicted values from either the data used to fit the model (if \code{type="adjto"} or \code{"adjto.data.frame"} or if \code{x=TRUE} or \code{linear.predictors=TRUE} were specified to the modeling function), or from a new dataset. Parameters such as knots and factor levels used in creating the design matrix in the original fit are "remembered". See the \code{Function} function for another method for computing the linear predictors. \code{predictrms} is an internal utility function that is for the other functions. } \usage{ predictrms(fit, newdata=NULL, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean', 'individual', 'simultaneous'), kint=NULL, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", ref.zero=FALSE, posterior.summary=c('mean', 'median', 'mode'), second=FALSE, ...) \method{predict}{bj}(object, newdata, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", \dots) # for bj \method{predict}{cph}(object, newdata=NULL, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", \dots) # cph \method{predict}{Glm}(object, newdata, type= c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", \dots) # Glm \method{predict}{Gls}(object, newdata, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", \dots) # Gls \method{predict}{ols}(object, newdata, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", \dots) # ols \method{predict}{psm}(object, newdata, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", \dots) # psm } \arguments{ \item{object,fit}{a fit object with an \code{rms} fitting function} \item{newdata}{ An S data frame, list or a matrix specifying new data for which predictions are desired. If \code{newdata} is a list, it is converted to a matrix first. A matrix is converted to a data frame. For the matrix form, categorical variables (\code{catg} or \code{strat}) must be coded as integer category numbers corresponding to the order in which value labels were stored. For list or matrix forms, \code{matrx} factors must be given a single value. If this single value is the S missing value \code{NA}, the adjustment values of matrx (the column medians) will later replace this value. If the single value is not \code{NA}, it is propagated throughout the columns of the \code{matrx} factor. For \code{factor} variables having numeric levels, you can specify the numeric values in \code{newdata} without first converting the variables to factors. These numeric values are checked to make sure they match a level, then the variable is converted internally to a \code{factor}. It is most typical to use a data frame for newdata, and the S function \code{expand.grid} is very handy here. For example, one may specify \cr \code{newdata=expand.grid(age=c(10,20,30),} \cr \code{race=c("black","white","other"),} \cr \code{chol=seq(100,300,by=25))}. } \item{type}{ Type of output desired. The default is \code{"lp"} to get the linear predictors - predicted \eqn{X\beta}{X beta}. For Cox models, these predictions are centered. You may specify \code{"x"} to get an expanded design matrix at the desired combinations of values, \code{"data.frame"} to get an S data frame of the combinations, \code{"model.frame"} to get a data frame of the transformed predictors, \code{"terms"} to get a matrix with each column being the linear combination of variables making up a factor (with separate terms for interactions), \code{"cterms"} ("combined terms") to not create separate terms for interactions but to add all interaction terms involving each predictor to the main terms for each predictor, \code{"ccterms"} to combine all related terms (related through interactions) and their interactions into a single column, \code{"adjto"} to return a vector of \code{limits[2]} (see \code{datadist}) in coded form, and \code{"adjto.data.frame"} to return a data frame version of these central adjustment values. Use of \code{type="cterms"} does not make sense for a \code{strat} variable that does not interact with another variable. If \code{newdata} is not given, \code{predict} will attempt to return information stored with the fit object if the appropriate options were used with the modeling function (e.g., \code{x, y, linear.predictors, se.fit}). } \item{se.fit}{ Defaults to \code{FALSE}. If \code{type="linear.predictors"}, set \code{se.fit=TRUE} to return a list with components \code{linear.predictors} and \code{se.fit} instead of just a vector of fitted values. For Cox model fits, standard errors of linear predictors are computed after subtracting the original column means from the new design matrix. } \item{conf.int}{ Specify \code{conf.int} as a positive fraction to obtain upper and lower confidence intervals (e.g., \code{conf.int=0.95}). The \eqn{t}-distribution is used in the calculation for \code{ols} fits. Otherwise, the normal critical value is used. For Bayesian models \code{conf.int} is the highest posterior density interval probability. } \item{conf.type}{ specifies the type of confidence interval. Default is for the mean. For \code{ols} fits there is the option of obtaining confidence limits for individual predicted values by specifying \code{conf.type="individual"}. } \item{posterior.summary}{when making predictions from a Bayesian model, specifies whether you want the linear predictor to be computed from the posterior mean of parameters (default) or the posterior mode or median median} \item{second}{set to \code{TRUE} to use the model's second formula. At present this pertains only to a partial proportional odds model fitted using the \code{blrm} function. When \code{second=TRUE} and \code{type='x'} the Z design matrix is returned (that goes with the tau parameters in the partial PO model). When \code{type='lp'} is specified Z*tau is computed. In neither case is the result is multiplied by the by the \code{cppo} function.} \item{kint}{a single integer specifying the number of the intercept to use in multiple-intercept models. The default is 1 for \code{lrm} and the reference median intercept for \code{orm} and \code{blrm}. For a partial PO model, \code{kint} should correspond to the response variable value that will be used when dealing with \code{second=TRUE}.} \item{na.action}{ Function to handle missing values in \code{newdata}. For predictions "in data", the same \code{na.action} that was used during model fitting is used to define an \code{naresid} function to possibly restore rows of the data matrix that were deleted due to NAs. For predictions "out of data", the default \code{na.action} is \code{na.keep}, resulting in NA predictions when a row of \code{newdata} has an NA. Whatever \code{na.action} is in effect at the time for "out of data" predictions, the corresponding \code{naresid} is used also. } \item{expand.na}{ set to \code{FALSE} to keep the \code{naresid} from having any effect, i.e., to keep from adding back observations removed because of NAs in the returned object. If \code{expand.na=FALSE}, the \code{na.action} attribute will be added to the returned object. } \item{center.terms}{ set to \code{FALSE} to suppress subtracting adjust-to values from columns of the design matrix before computing terms with \code{type="terms"}. } \item{ref.zero}{Set to \code{TRUE} to subtract a constant from \eqn{X\beta}{X beta} before plotting so that the reference value of the \code{x}-variable yields \code{y=0}. This is done before applying function \code{fun}. This is especially useful for Cox models to make the hazard ratio be 1.0 at reference values, and the confidence interval have width zero.} \item{\dots}{ignored} } \details{ \code{datadist} and \code{options(datadist=)} should be run before \code{predictrms} if using \code{type="adjto"}, \code{type="adjto.data.frame"}, or \code{type="terms"}, or if the fit is a Cox model fit and you are requesting \code{se.fit=TRUE}. For these cases, the adjustment values are needed (either for the returned result or for the correct covariance matrix computation). } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{plot.Predict}}, \code{\link{ggplot.Predict}}, \code{\link{summary.rms}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{predict.lrm}}, \code{\link{predict.orm}}, \code{\link{residuals.cph}}, \code{\link{datadist}}, \code{\link{gendata}}, \code{\link{gIndex}}, \code{\link{Function.rms}}, \code{\link[Hmisc]{reShape}}, \code{\link[Hmisc]{xYplot}}, \code{\link{contrast.rms}} } \examples{ n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) treat <- factor(sample(c('a','b','c'), n,TRUE)) # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) + .3*sqrt(blood.pressure-60)-2.3 + 1*(treat=='b') # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, blood.pressure, cholesterol, sex, treat) options(datadist='ddist') fit <- lrm(y ~ rcs(blood.pressure,4) + sex * (age + rcs(cholesterol,4)) + sex*treat*age) # Use xYplot to display predictions in 9 panels, with error bars, # with superposition of two treatments dat <- expand.grid(treat=levels(treat),sex=levels(sex), age=c(20,40,60),blood.pressure=120, cholesterol=seq(100,300,length=10)) # Add variables linear.predictors and se.fit to dat dat <- cbind(dat, predict(fit, dat, se.fit=TRUE)) # This is much easier with Predict # xYplot in Hmisc extends xyplot to allow error bars xYplot(Cbind(linear.predictors,linear.predictors-1.96*se.fit, linear.predictors+1.96*se.fit) ~ cholesterol | sex*age, groups=treat, data=dat, type='b') # Since blood.pressure doesn't interact with anything, we can quickly and # interactively try various transformations of blood.pressure, taking # the fitted spline function as the gold standard. We are seeking a # linearizing transformation even though this may lead to falsely # narrow confidence intervals if we use this data-dredging-based transformation bp <- 70:160 logit <- predict(fit, expand.grid(treat="a", sex='male', age=median(age), cholesterol=median(cholesterol), blood.pressure=bp), type="terms")[,"blood.pressure"] #Note: if age interacted with anything, this would be the age # "main effect" ignoring interaction terms #Could also use Predict(f, age=ag)$yhat #which allows evaluation of the shape for any level of interacting #factors. When age does not interact with anything, the result from #predict(f, \dots, type="terms") would equal the result from #plot if all other terms were ignored plot(bp^.5, logit) # try square root vs. spline transform. plot(bp^1.5, logit) # try 1.5 power plot(sqrt(bp-60), logit) #Some approaches to making a plot showing how predicted values #vary with a continuous predictor on the x-axis, with two other #predictors varying combos <- gendata(fit, age=seq(10,100,by=10), cholesterol=c(170,200,230), blood.pressure=c(80,120,160)) #treat, sex not specified -> set to mode #can also used expand.grid combos$pred <- predict(fit, combos) xyplot(pred ~ age | cholesterol*blood.pressure, data=combos, type='l') xYplot(pred ~ age | cholesterol, groups=blood.pressure, data=combos, type='l') Key() # Key created by xYplot xYplot(pred ~ age, groups=interaction(cholesterol,blood.pressure), data=combos, type='l', lty=1:9) Key() # Add upper and lower 0.95 confidence limits for individuals combos <- cbind(combos, predict(fit, combos, conf.int=.95)) xYplot(Cbind(linear.predictors, lower, upper) ~ age | cholesterol, groups=blood.pressure, data=combos, type='b') Key() # Plot effects of treatments (all pairwise comparisons) vs. # levels of interacting factors (age, sex) d <- gendata(fit, treat=levels(treat), sex=levels(sex), age=seq(30,80,by=10)) x <- predict(fit, d, type="x") betas <- fit$coef cov <- vcov(fit, intercepts='none') i <- d$treat=="a"; xa <- x[i,]; Sex <- d$sex[i]; Age <- d$age[i] i <- d$treat=="b"; xb <- x[i,] i <- d$treat=="c"; xc <- x[i,] doit <- function(xd, lab) { xb <- matxv(xd, betas) se <- apply((xd \%*\% cov) * xd, 1, sum)^.5 q <- qnorm(1-.01/2) # 0.99 confidence limits lower <- xb - q * se; upper <- xb + q * se #Get odds ratios instead of linear effects xb <- exp(xb); lower <- exp(lower); upper <- exp(upper) #First elements of these agree with #summary(fit, age=30, sex='female',conf.int=.99)) for(sx in levels(Sex)) { j <- Sex==sx errbar(Age[j], xb[j], upper[j], lower[j], xlab="Age", ylab=paste(lab, "Odds Ratio"), ylim=c(.1, 20), log='y') title(paste("Sex:", sx)) abline(h=1, lty=2) } } par(mfrow=c(3,2), oma=c(3,0,3,0)) doit(xb - xa, "b:a") doit(xc - xa, "c:a") doit(xb - xa, "c:b") # NOTE: This is much easier to do using contrast.rms # Demonstrate type="terms", "cterms", "ccterms" set.seed(1) n <- 40 x <- 1:n w <- factor(sample(c('a', 'b'), n, TRUE)) u <- factor(sample(c('A', 'B'), n, TRUE)) y <- .01*x + .2*(w=='b') + .3*(u=='B') + .2*(w=='b' & u=='B') + rnorm(n)/5 ddist <- datadist(x, w, u) f <- ols(y ~ x*w*u, x=TRUE, y=TRUE) f anova(f) z <- predict(f, type='terms', center.terms=FALSE) z[1:5,] k <- coef(f) ## Manually compute combined terms wb <- w=='b' uB <- u=='B' h <- k['x * w=b * u=B']*x*wb*uB tx <- k['x'] *x + k['x * w=b']*x*wb + k['x * u=B'] *x*uB + h tw <- k['w=b']*wb + k['x * w=b']*x*wb + k['w=b * u=B']*wb*uB + h tu <- k['u=B']*uB + k['x * u=B']*x*uB + k['w=b * u=B']*wb*uB + h h <- z[,'x * w * u'] # highest order term is present in all cterms tx2 <- z[,'x']+z[,'x * w']+z[,'x * u']+h tw2 <- z[,'w']+z[,'x * w']+z[,'w * u']+h tu2 <- z[,'u']+z[,'x * u']+z[,'w * u']+h ae <- function(a, b) all.equal(a, b, check.attributes=FALSE) ae(tx, tx2) ae(tw, tw2) ae(tu, tu2) zc <- predict(f, type='cterms') zc[1:5,] ae(tx, zc[,'x']) ae(tw, zc[,'w']) ae(tu, zc[,'u']) zc <- predict(f, type='ccterms') # As all factors are indirectly related, ccterms gives overall linear # predictor except for the intercept zc[1:5,] ae(as.vector(zc + coef(f)[1]), f$linear.predictors) \dontrun{ #A variable state.code has levels "1", "5","13" #Get predictions with or without converting variable in newdata to factor predict(fit, data.frame(state.code=c(5,13))) predict(fit, data.frame(state.code=factor(c(5,13)))) #Use gendata function (gendata.rms) for interactive specification of #predictor variable settings (for 10 observations) df <- gendata(fit, nobs=10, viewvals=TRUE) df$predicted <- predict(fit, df) # add variable to data frame df df <- gendata(fit, age=c(10,20,30)) # leave other variables at ref. vals. predict(fit, df, type="fitted") # See reShape (in Hmisc) for an example where predictions corresponding to # values of one of the varying predictors are reformatted into multiple # columns of a matrix } options(datadist=NULL) } \keyword{models} \keyword{regression} rms/man/pentrace.Rd0000644000176200001440000002403313714237251013725 0ustar liggesusers\name{pentrace} \alias{pentrace} \alias{plot.pentrace} \alias{print.pentrace} \alias{effective.df} \title{ Trace AIC and BIC vs. Penalty } \description{ For an ordinary unpenalized fit from \code{lrm} or \code{ols} and for a vector or list of penalties, fits a series of logistic or linear models using penalized maximum likelihood estimation, and saves the effective degrees of freedom, Akaike Information Criterion (\eqn{AIC}), Schwarz Bayesian Information Criterion (\eqn{BIC}), and Hurvich and Tsai's corrected \eqn{AIC} (\eqn{AIC_c}). Optionally \code{pentrace} can use the \code{nlminb} function to solve for the optimum penalty factor or combination of factors penalizing different kinds of terms in the model. The \code{effective.df} function prints the original and effective degrees of freedom for a penalized fit or for an unpenalized fit and the best penalization determined from a previous invocation of \code{pentrace} if \code{method="grid"} (the default). The effective d.f. is computed separately for each class of terms in the model (e.g., interaction, nonlinear). A \code{plot} method exists to plot the results, and a \code{print} method exists to print the most pertinent components. Both \eqn{AIC} and \eqn{BIC} may be plotted if there is only one penalty factor type specified in \code{penalty}. Otherwise, the first two types of penalty factors are plotted, showing only the \eqn{AIC}. } \usage{ pentrace(fit, penalty, penalty.matrix, method=c('grid','optimize'), which=c('aic.c','aic','bic'), target.df=NULL, fitter, pr=FALSE, tol=1e-7, keep.coef=FALSE, complex.more=TRUE, verbose=FALSE, maxit=12, subset, noaddzero=FALSE) effective.df(fit, object) \method{print}{pentrace}(x, \dots) \method{plot}{pentrace}(x, method=c('points','image'), which=c('effective.df','aic','aic.c','bic'), pch=2, add=FALSE, ylim, \dots) } \arguments{ \item{fit}{ a result from \code{lrm} or \code{ols} with \code{x=TRUE, y=TRUE} and without using \code{penalty} or \code{penalty.matrix} (or optionally using penalization in the case of \code{effective.df}) } \item{penalty}{ can be a vector or a list. If it is a vector, all types of terms in the model will be penalized by the same amount, specified by elements in \code{penalty}, with a penalty of zero automatically added. \code{penalty} can also be a list in the format documented in the \code{lrm} function, except that elements of the list can be vectors. The \code{expand.grid} function is invoked by \code{pentrace} to generate all possible combinations of penalties. For example, specifying \code{penalty=list(simple=1:2, nonlinear=1:3)} will generate 6 combinations to try, so that the analyst can attempt to determine whether penalizing more complex terms in the model more than the linear or categorical variable terms will be beneficial. If \code{complex.more=TRUE}, it is assumed that the variables given in \code{penalty} are listed in order from less complex to more complex. With \code{method="optimize"} \code{penalty} specifies an initial guess for the penalty or penalties. If all term types are to be equally penalized, \code{penalty} should be a single number, otherwise it should be a list containing single numbers as elements, e.g., \code{penalty=list(simple=1, nonlinear=2)}. Experience has shown that the optimization algorithm is more likely to find a reasonable solution when the starting value specified in \code{penalty} is too large rather than too small. } \item{object}{ an object returned by \code{pentrace}. For \code{effective.df}, \code{object} can be omitted if the \code{fit} was penalized. } \item{penalty.matrix}{ see \code{lrm} } \item{method}{ The default is \code{method="grid"} to print various indexes for all combinations of penalty parameters given by the user. Specify \code{method="optimize"} to have \code{pentrace} use \code{nlminb} to solve for the combination of penalty parameters that gives the maximum value of the objective named in \code{which}, or, if \code{target.df} is given, to find the combination that yields \code{target.df} effective total degrees of freedom for the model. When \code{target.df} is specified, \code{method} is set to \code{"optimize"} automatically. For \code{plot.pentrace} this parameter applies only if more than one penalty term-type was used. The default is to use open triangles whose sizes are proportional to the ranks of the AICs, plotting the first two penalty factors respectively on the x and y axes. Use \code{method="image"} to plot an image plot. } \item{which}{ the objective to maximize for either \code{method}. Default is \code{"aic.c"} (corrected AIC). For \code{plot.pentrace}, \code{which} is a vector of names of criteria to show; default is to plot all 4 types, with effective d.f. in its own separate plot } \item{target.df}{ applies only to \code{method="optimize"}. See \code{method}. \code{target.df} makes sense mainly when a single type of penalty factor is specified. } \item{fitter}{ a fitting function. Default is \code{lrm.fit} (\code{lm.pfit} is always used for \code{ols}). } \item{pr}{ set to \code{TRUE} to print intermediate results } \item{tol}{ tolerance for declaring a matrix singular (see \code{lrm.fit, solvet}) } \item{keep.coef}{ set to \code{TRUE} to store matrix of regression coefficients for all the fits (corresponding to increasing values of \code{penalty}) in object \code{Coefficients} in the returned list. Rows correspond to penalties, columns to regression parameters. } \item{complex.more}{ By default if \code{penalty} is a list, combinations of penalties for which complex terms are penalized less than less complex terms will be dropped after \code{expand.grid} is invoked. Set \code{complex.more=FALSE} to allow more complex terms to be penalized less. Currently this option is ignored for \code{method="optimize"}. } \item{verbose}{set to \code{TRUE} to print number of intercepts and sum of effective degrees of freedom} \item{maxit}{ maximum number of iterations to allow in a model fit (default=12). This is passed to the appropriate fitter function with the correct argument name. Increase \code{maxit} if you had to when fitting the original unpenalized model. } \item{subset}{ a logical or integer vector specifying rows of the design and response matrices to subset in fitting models. This is most useful for bootstrapping \code{pentrace} to see if the best penalty can be estimated with little error so that variation due to selecting the optimal penalty can be safely ignored when bootstrapping standard errors of regression coefficients and measures of predictive accuracy. See an example below. } \item{noaddzero}{set to \code{TRUE} to not add an unpenalized model to the list of models to fit} \item{x}{a result from \code{pentrace}} \item{pch}{used for \code{method="points"}} \item{add}{ set to \code{TRUE} to add to an existing plot. In that case, the effective d.f. plot is not re-drawn, but the AIC/BIC plot is added to. } \item{ylim}{ 2-vector of y-axis limits for plots other than effective d.f. } \item{...}{ other arguments passed to \code{plot}, \code{lines}, or \code{image} }} \value{ a list of class \code{"pentrace"} with elements \code{penalty, df, objective, fit, var.adj, diag, results.all}, and optionally \code{Coefficients}. The first 6 elements correspond to the fit that had the best objective as named in the \code{which} argument, from the sequence of fits tried. Here \code{fit} is the fit object from \code{fitter} which was a penalized fit, \code{diag} is the diagonal of the matrix used to compute the effective d.f., and \code{var.adj} is Gray (1992) Equation 2.9, which is an improved covariance matrix for the penalized beta. \code{results.all} is a data frame whose first few variables are the components of \code{penalty} and whose other columns are \code{df, aic, bic, aic.c}. \code{results.all} thus contains a summary of results for all fits attempted. When \code{method="optimize"}, only two components are returned: \code{penalty} and \code{objective}, and the object does not have a class. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \references{ Gray RJ: Flexible methods for analyzing survival data using splines, with applications to breast cancer prognosis. JASA 87:942--951, 1992. Hurvich CM, Tsai, CL: Regression and time series model selection in small samples. Biometrika 76:297--307, 1989. } \seealso{ \code{\link{lrm}}, \code{\link{ols}}, \code{\link[Hmisc]{solvet}}, \code{\link{rmsMisc}}, \code{\link{image}} } \examples{ n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) f <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) p <- pentrace(f, seq(.2,1,by=.05)) plot(p) p$diag # may learn something about fractional effective d.f. # for each original parameter pentrace(f, list(simple=c(0,.2,.4), nonlinear=c(0,.2,.4,.8,1))) # Bootstrap pentrace 5 times, making a plot of corrected AIC plot with 5 reps n <- nrow(f$x) plot(pentrace(f, seq(.2,1,by=.05)), which='aic.c', col=1, ylim=c(30,120)) #original in black for(j in 1:5) plot(pentrace(f, seq(.2,1,by=.05), subset=sample(n,n,TRUE)), which='aic.c', col=j+1, add=TRUE) # Find penalty giving optimum corrected AIC. Initial guess is 1.0 # Not implemented yet # pentrace(f, 1, method='optimize') # Find penalty reducing total regression d.f. effectively to 5 # pentrace(f, 1, target.df=5) # Re-fit with penalty giving best aic.c without differential penalization f <- update(f, penalty=p$penalty) effective.df(f) } \keyword{models} \keyword{regression} \concept{logistic regression model} \concept{penalized MLE} \concept{ridge regression} \concept{shrinkage} rms/man/Function.Rd0000644000176200001440000001000413714237251013702 0ustar liggesusers\name{Function} \alias{Function.rms} \alias{Function.cph} \alias{sascode} \alias{perlcode} \title{Compose an S Function to Compute X beta from a Fit} \description{ \code{Function} is a class of functions for creating other S functions. \code{Function.rms} is the method for creating S functions to compute X beta, based on a model fitted with \code{rms} in effect. Like \code{latexrms}, \code{Function.rms} simplifies restricted cubic spline functions and factors out terms in second-order interactions. \code{Function.rms} will not work for models that have third-order interactions involving restricted cubic splines. \code{Function.cph} is a particular method for handling fits from \code{cph}, for which an intercept (the negative of the centering constant) is added to the model. \code{sascode} is a function that takes an S function such as one created by \code{Function} and does most of the editing to turn the function definition into a fragment of SAS code for computing X beta from the fitted model, along with assignment statements that initialize predictors to reference values. \code{perlcode} similarly creates Perl code to evaluate a fitted regression model. } \usage{ \method{Function}{rms}(object, intercept=NULL, digits=max(8, .Options$digits), posterior.summary=c('mean', 'median', 'mode'), \dots) \method{Function}{cph}(object, intercept=-object$center, \dots) # Use result as fun(predictor1=value1, predictor2=value2, \dots) sascode(object, file='', append=FALSE) perlcode(object) } \arguments{ \item{object}{ a fit created with \code{rms} in effect } \item{intercept}{ an intercept value to use (not allowed to be specified to \code{Function.cph}). The intercept is usually retrieved from the regression coefficients automatically. } \item{digits}{ number of significant digits to use for coefficients and knot locations} \item{posterior.summary}{if using a Bayesian model fit such as from \code{blrm}, specifies whether to use posterior mode/mean/median parameter estimates in generating the function} \item{file}{ name of a file in which to write the SAS code. Default is to write to standard output. } \item{append}{ set to \code{TRUE} to have \code{sascode} append code to an existing file named \code{file}. } \item{\dots}{arguments to pass to \code{Function.rms} from \code{Function.cph}} } \value{ \code{Function} returns an S-Plus function that can be invoked in any usual context. The function has one argument per predictor variable, and the default values of the predictors are set to \code{adjust-to} values (see \code{datadist}). Multiple predicted X beta values may be calculated by specifying vectors as arguments to the created function. All non-scalar argument values must have the same length. \code{perlcode} returns a character string with embedded newline characters. } \author{ Frank Harrell, Jeremy Stephens, and Thomas Dupont\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{latexrms}}, \code{\link[Hmisc]{transcan}}, \code{\link{predict.rms}}, \code{\link{rms}}, \code{\link{rms.trans}} } \examples{ suppressWarnings(RNGversion("3.5.0")) set.seed(1331) x1 <- exp(rnorm(100)) x2 <- factor(sample(c('a','b'),100,rep=TRUE)) dd <- datadist(x1, x2) options(datadist='dd') y <- log(x1)^2+log(x1)*(x2=='b')+rnorm(100)/4 f <- ols(y ~ pol(log(x1),2)*x2) f$coef g <- Function(f, digits=5) g sascode(g) cat(perlcode(g), '\n') g() g(x1=c(2,3), x2='b') #could omit x2 since b is default category predict(f, expand.grid(x1=c(2,3),x2='b')) g8 <- Function(f) # default is 8 sig. digits g8(x1=c(2,3), x2='b') options(datadist=NULL) \dontrun{ # Make self-contained functions for computing survival probabilities # using a log-normal regression f <- psm(Surv(d.time, death) ~ rcs(age,4)*sex, dist='gaussian') g <- Function(f) surv <- Survival(f) # Compute 2 and 5-year survival estimates for 50 year old male surv(c(2,5), g(age=50, sex='male')) } } \keyword{regression} \keyword{methods} \keyword{interface} \keyword{models} \keyword{survival} \keyword{math} \concept{logistic regression model} rms/man/bplot.Rd0000644000176200001440000002075613715036033013250 0ustar liggesusers\name{bplot} \alias{bplot} \alias{perimeter} \title{ 3-D Plots Showing Effects of Two Continuous Predictors in a Regression Model Fit} \description{ Uses lattice graphics and the output from \code{Predict} to plot image, contour, or perspective plots showing the simultaneous effects of two continuous predictor variables. Unless \code{formula} is provided, the \eqn{x}-axis is constructed from the first variable listed in the call to \code{Predict} and the \eqn{y}-axis variable comes from the second. The \code{perimeter} function is used to generate the boundary of data to plot when a 3-d plot is made. It finds the area where there are sufficient data to generate believable interaction fits. } \usage{ bplot(x, formula, lfun=lattice::levelplot, xlab, ylab, zlab, adj.subtitle=!info$ref.zero, cex.adj=.75, cex.lab=1, perim, showperim=FALSE, zlim=range(yhat, na.rm=TRUE), scales=list(arrows=FALSE), xlabrot, ylabrot, zlabrot=90, \dots) perimeter(x, y, xinc=diff(range(x))/10, n=10, lowess.=TRUE) } \arguments{ \item{x}{ for \code{bplot}, an object created by \code{Predict} for which two or more numeric predictors varied. For \code{perim} is the first variable of a pair of predictors forming a 3-d plot. } \item{formula}{ a formula of the form \code{f(yhat) ~ x*y} optionally followed by |a*b*c which are 1-3 paneling variables that were specified to \code{Predict}. \code{f} can represent any R function of a vector that produces a vector. If the left hand side of the formula is omitted, \code{yhat} will be inserted. If \code{formula} is omitted, it will be inferred from the first two variables that varied in the call to \code{Predict}. } \item{lfun}{ a high-level lattice plotting function that takes formulas of the form \code{z ~ x*y}. The default is an image plot (\code{levelplot}). Other common choices are \code{wireframe} for perspective plot or \code{contourplot} for a contour plot. } \item{xlab}{ Character string label for \eqn{x}-axis. Default is given by \code{Predict}. } \item{ylab}{ Character string abel for \eqn{y}-axis } \item{zlab}{ Character string \eqn{z}-axis label for perspective (wireframe) plots. Default comes from \code{Predict}. \code{zlab} will often be specified if \code{fun} was specified to \code{Predict}. } \item{adj.subtitle}{ Set to \code{FALSE} to suppress subtitling the graph with the list of settings of non-graphed adjustment values. Default is \code{TRUE} if there are non-plotted adjustment variables and \code{ref.zero} was not used. } \item{cex.adj}{ \code{cex} parameter for size of adjustment settings in subtitles. Default is 0.75 } \item{cex.lab}{ \code{cex} parameter for axis labels. Default is 1. } \item{perim}{ names a matrix created by \code{perimeter} when used for 3-d plots of two continuous predictors. When the combination of variables is outside the range in \code{perim}, that section of the plot is suppressed. If \code{perim} is omitted, 3-d plotting will use the marginal distributions of the two predictors to determine the plotting region, when the grid is not specified explicitly in \code{variables}. When instead a series of curves is being plotted, \code{perim} specifies a function having two arguments. The first is the vector of values of the first variable that is about to be plotted on the \eqn{x}-axis. The second argument is the single value of the variable representing different curves, for the current curve being plotted. The function's returned value must be a logical vector whose length is the same as that of the first argument, with values \code{TRUE} if the corresponding point should be plotted for the current curve, \code{FALSE} otherwise. See one of the latter examples. } \item{showperim}{ set to \code{TRUE} if \code{perim} is specified and you want to show the actual perimeter used. } \item{zlim}{ Controls the range for plotting in the \eqn{z}-axis if there is one. Computed by default. } \item{scales}{see \code{\link[lattice:cloud]{wireframe}} } \item{xlabrot}{rotation angle for the x-axis. Default is 30 for \code{wireframe} and 0 otherwise. } \item{ylabrot}{rotation angle for the y-axis. Default is -40 for \code{wireframe}, 90 for \code{contourplot} or \code{levelplot}, and 0 otherwise. } \item{zlabrot}{rotation angle for z-axis rotation for \code{wireframe} plots } \item{\dots}{other arguments to pass to the lattice function } \item{y}{ second variable of the pair for \code{perim}. If omitted, \code{x} is assumed to be a list with both \code{x} and \code{y} components. } \item{xinc}{ increment in \code{x} over which to examine the density of \code{y} in \code{perimeter} } \item{n}{ within intervals of \code{x} for \code{perimeter}, takes the informative range of \code{y} to be the \eqn{n}th smallest to the \eqn{n}th largest values of \code{y}. If there aren't at least 2\eqn{n} \code{y} values in the \code{x} interval, no \code{y} ranges are used for that interval. } \item{lowess.}{ set to \code{FALSE} to not have \code{lowess} smooth the data perimeters } } \value{ \code{perimeter} returns a matrix of class \code{perimeter}. This outline can be conveniently plotted by \code{lines.perimeter}. } \details{ \code{perimeter} is a kind of generalization of \code{datadist} for 2 continuous variables. First, the \code{n} smallest and largest \code{x} values are determined. These form the lowest and highest possible \code{x}s to display. Then \code{x} is grouped into intervals bounded by these two numbers, with the interval widths defined by \code{xinc}. Within each interval, \code{y} is sorted and the \eqn{n}th smallest and largest \code{y} are taken as the interval containing sufficient data density to plot interaction surfaces. The interval is ignored when there are insufficient \code{y} values. When the data are being readied for \code{persp}, \code{bplot} uses the \code{approx} function to do linear interpolation of the \code{y}-boundaries as a function of the \code{x} values actually used in forming the grid (the values of the first variable specified to \code{Predict}). To make the perimeter smooth, specify \code{lowess.=TRUE} to \code{perimeter}. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{datadist}}, \code{\link{Predict}}, \code{\link{rms}}, \code{\link{rmsMisc}}, \code{\link[lattice]{levelplot}}, \code{\link[lattice]{contourplot}}, \code{\link[lattice:cloud]{wireframe}} } \examples{ n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) p <- Predict(fit, age, cholesterol, sex, np=50) # vary sex last bplot(p) # image plot for age, cholesterol with color # coming from yhat; use default ranges for # both continuous predictors; two panels (for sex) bplot(p, lfun=wireframe) # same as bplot(p,,wireframe) # View from different angle, change y label orientation accordingly # Default is z=40, x=-60 bplot(p,, wireframe, screen=list(z=40, x=-75), ylabrot=-25) bplot(p,, contourplot) # contour plot bounds <- perimeter(age, cholesterol, lowess=TRUE) plot(age, cholesterol) # show bivariate data density and perimeter lines(bounds[,c('x','ymin')]); lines(bounds[,c('x','ymax')]) p <- Predict(fit, age, cholesterol) # use only one sex bplot(p, perim=bounds) # draws image() plot # don't show estimates where data are sparse # doesn't make sense here since vars don't interact bplot(p, plogis(yhat) ~ age*cholesterol) # Probability scale options(datadist=NULL) } \keyword{models} \keyword{hplot} \keyword{htest} rms/man/rms.trans.Rd0000644000176200001440000002120214024510250014032 0ustar liggesusers\name{rms.trans} \alias{rms.trans} \alias{asis} \alias{pol} \alias{lsp} \alias{rcs} \alias{catg} \alias{scored} \alias{strat} \alias{matrx} \alias{gTrans} \alias{\%ia\%} \title{rms Special Transformation Functions} \description{ This is a series of functions (\code{asis}, \code{pol}, \code{lsp}, \code{rcs}, \code{catg}, \code{scored}, \code{strat}, \code{matrx}, \code{gTrans}, and \code{\%ia\%}) that set up special attributes (such as knots and nonlinear term indicators) that are carried through to fits (using for example \code{lrm},\code{cph}, \code{ols}, \code{psm}). \code{anova.rms}, \code{summary.rms}, \code{Predict}, \code{survplot}, \code{fastbw}, \code{validate}, \code{specs}, \code{which.influence}, \code{nomogram} and \code{latex.rms} use these attributes to automate certain analyses (e.g., automatic tests of linearity for each predictor are done by \code{anova.rms}). Many of the functions are called implicitly. Some S functions such as \code{ns} derive data-dependent transformations that are not always "remembered" when predicted values are later computed, so the predictions may be incorrect. The functions listed here solve that problem when used in the \code{rms} context. \code{asis} is the identity transformation, \code{pol} is an ordinary (non-orthogonal) polynomial, \code{rcs} is a linear tail-restricted cubic spline function (natural spline, for which the \code{rcspline.eval} function generates the design matrix, the presence of system option \code{rcspc} causes \code{rcspline.eval} to be invoked with \code{pc=TRUE}, and the presence of system option \code{fractied} causes this value to be passed to \code{rcspline.eval} as the \code{fractied} argument), \code{catg} is for a categorical variable, \code{scored} is for an ordered categorical variable, \code{strat} is for a stratification factor in a Cox model, \code{matrx} is for a matrix predictor, and \code{\%ia\%} represents restricted interactions in which products involving nonlinear effects on both variables are not included in the model. \code{asis, catg, scored, matrx} are seldom invoked explicitly by the user (only to specify \code{label} or \code{name}, usually). \code{gTrans} is a general multiple-parameter transformation function. It can be used to specify new polynomial bases, smooth relationships with a discontinuity at one or more values of \code{x}, grouped categorical variables, e.g., a categorical variable with 5 levels where you want to combine two of the levels to spend only 3 degrees of freedom in all but see plots of predicted values where the two combined categories are kept separate but will have equal effect estimates. The first argument to \code{gTrans} is a regular numeric, character, or factor variable. The next argument is a function that transforms a vector into a matrix. If the basis functions are to include a linear term it is up too the user to include the original \code{x} as one of the columns. Column names are assigned automaticall, but any column names specified by the user will override the default name. If you want to signal which terms correspond to linear and which correspond to nonlinear effects for the purpose of running \code{anova.rms}, add an integer vector attribute \code{nonlinear} to the resulting matrix. This vector specifies the column numbers corresponding to nonlinear effects. The default is to assume a column is a linear effect. The \code{parms} attribute stored with a \code{gTrans} result a character vector version of the function, so as to not waste space carrying along any environment information. In the list below, functions \code{asis} through \code{gTrans} can have arguments \code{x, parms, label, name} except that \code{parms} does not apply to \code{asis, matrx, strat}. } \usage{ asis(\dots) matrx(\dots) pol(\dots) lsp(\dots) rcs(\dots) catg(\dots) scored(\dots) strat(\dots) gTrans(\dots) x1 \%ia\% x2 } \arguments{ \item{\dots}{ The arguments \dots above contain the following. \describe{ \item{\code{x}}{a predictor variable (or a function of one). If you specify e.g. \code{pol(pmin(age,10),3)}, a cubic polynomial will be fitted in \code{pmin(age,10)} (\code{pmin} is the S vector element--by--element function). The predictor will be labeled \code{age} in the output, and plots with have \code{age} in its original units on the axes. If you use a function such as \code{pmin}, the predictor is taken as the first argument, and other arguments must be defined in the frame in effect when predicted values, etc., are computed.} \item{\code{parms}}{parameters of transformation (e.g. number or location of knots). For \code{pol} the argument is the order of the polynomial, e.g. \code{2} for quadratic (the usual default). For \code{lsp} it is a vector of knot locations (\code{lsp} will not estimate knot locations). For \code{rcs} it is the number of knots (if scalar), or vector of knot locations (if \code{>2} elements). The default number is the \code{nknots} system option if \code{parms} is not given. If the number of knots is given, locations are computed for that number of knots. If system option \code{rcspc} is \code{TRUE} the \code{parms} vector has an attribute defining the principal components transformation parameters. For \code{catg}, \code{parms} is the category labels (not needed if variable is an S category or factor variable). If omitted, \code{catg} will use \code{unique(x)}, or \code{levels(x)} if \code{x} is a \code{category} or a \code{factor}. For \code{scored}, \code{parms} is a vector of unique values of variable (uses \code{unique(x)} by default). This is not needed if \code{x} is an S \code{ordered} variable. For \code{strat}, \code{parms} is the category labels (not needed if variable is an S category variable). If omitted, will use \code{unique(x)}, or \code{levels(x)} if \code{x} is \code{category} or \code{factor}. \code{parms} is not used for \code{matrix}.} \item{\code{label}}{label of predictor for plotting (default = \code{"label"} attribute or variable name)} \item{\code{name}}{Name to use for predictor in model. Default is name of argument to function.} } } \item{x1,x2}{two continuous variables for which to form a non-doubly-nonlinear interaction} } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link[Hmisc]{rcspline.eval}}, \code{\link[Hmisc]{rcspline.restate}}, \code{\link{rms}}, \code{\link{cph}}, \code{\link{lrm}}, \code{\link{ols}}, \code{\link{datadist}} } \examples{ \dontrun{ options(knots=4, poly.degree=2) # To get the old behavior of rcspline.eval knot placement (which didnt' handle # clumping at the lowest or highest value of the predictor very well): # options(fractied = 1.0) # see rcspline.eval for details country <- factor(country.codes) blood.pressure <- cbind(sbp=systolic.bp, dbp=diastolic.bp) fit <- lrm(Y ~ sqrt(x1)*rcs(x2) + rcs(x3,c(5,10,15)) + lsp(x4,c(10,20)) + country + blood.pressure + poly(age,2)) # sqrt(x1) is an implicit asis variable, but limits of x1, not sqrt(x1) # are used for later plotting and effect estimation # x2 fitted with restricted cubic spline with 4 default knots # x3 fitted with r.c.s. with 3 specified knots # x4 fitted with linear spline with 2 specified knots # country is an implied catg variable # blood.pressure is an implied matrx variable # since poly is not an rms function (pol is), it creates a # matrx type variable with no automatic linearity testing # or plotting f1 <- lrm(y ~ rcs(x1) + rcs(x2) + rcs(x1) \%ia\% rcs(x2)) # \%ia\% restricts interactions. Here it removes terms nonlinear in # both x1 and x2 f2 <- lrm(y ~ rcs(x1) + rcs(x2) + x1 \%ia\% rcs(x2)) # interaction linear in x1 f3 <- lrm(y ~ rcs(x1) + rcs(x2) + x1 \%ia\% x2) # simple product interaction (doubly linear) # Use x1 \%ia\% x2 instead of x1:x2 because x1 \%ia\% x2 triggers # anova to pool x1*x2 term into x1 terms to test total effect # of x1 # # Examples of gTrans # # Linear relationship with a discontinuity at zero: ldisc <- function(x) {z <- cbind(x == 0, x); attr(z, 'nonlinear') <- 1; z} gTrans(x, ldisc) # Duplicate pol(x, 2): pol2 <- function(x) {z <- cbind(x, x^2); attr(z, 'nonlinear') <- 2; z} gTrans(x, pol2) # Linear spline with a knot at x=10 with the new slope taking effect # until x=20 and the spline turning flat at that point but with a # discontinuous vertical shift dspl <- function(x) { z <- cbind(x, pmax(pmin(x, 20) - 10, 0), x > 20) attr(z, 'nonlinear') <- 2:3 z } gTrans(x, dspl) } } \keyword{models} \keyword{regression} \keyword{math} \keyword{manip} \keyword{methods} \keyword{survival} \keyword{smooth} \concept{logistic regression model} \concept{transformation} rms/man/survplot.Rd0000644000176200001440000005051713701123114014014 0ustar liggesusers\name{survplot} \alias{survplot} \alias{survplotp} \alias{survplot.rms} \alias{survplot.npsurv} \alias{survplotp.npsurv} \alias{survdiffplot} \title{Plot Survival Curves and Hazard Functions} \description{ Plot estimated survival curves, and for parametric survival models, plot hazard functions. There is an option to print the number of subjects at risk at the start of each time interval. Curves are automatically labeled at the points of maximum separation (using the \code{labcurve} function), and there are many other options for labeling that can be specified with the \code{label.curves} parameter. For example, different plotting symbols can be placed at constant x-increments and a legend linking the symbols with category labels can automatically positioned on the most empty portion of the plot. For the case of a two stratum analysis by \code{npsurv}, \code{survdiffplot} plots the difference in two Kaplan-Meier estimates along with approximate confidence bands for the differences, with a reference line at zero. The number of subjects at risk is optionally plotted. This number is taken as the minimum of the number of subjects at risk over the two strata. When \code{conf='diffbands'}, \code{survdiffplot} instead does not make a new plot but adds a shaded polygon to an existing plot, showing the midpoint of two survival estimates plus or minus 1/2 the width of the confidence interval for the difference of two Kaplan-Meier estimates. \code{survplotp} creates an interactive \code{plotly} graphic with shaded confidence bands. In the two strata case, it draws the 1/2 confidence bands for the difference in two probabilities centered at the midpoint of the probability estimates, so that where the two curves touch this band there is no significant difference (no multiplicity adjustment is made). For the two strata case, the two individual confidence bands have entries in the legend but are not displayed until the user clicks on the legend. When \code{code} was from running \code{npsurv} on a multi-state/competing risk \code{Surv} object, \code{survplot} plots cumulative incidence curves properly accounting for competing risks. You must specify exactly one state/event cause to plot using the \code{state} argument. \code{survplot} will not plot multiple states on one graph. This can be accomplished using multiple calls with different values of \code{state} and specifying \code{add=TRUE} for all but the first call. } \usage{ survplot(fit, \dots) survplotp(fit, \dots) \method{survplot}{rms}(fit, \dots, xlim, ylim=if(loglog) c(-5, 1.5) else if (what == "survival" & missing(fun)) c(0, 1), xlab, ylab, time.inc, what=c("survival","hazard"), type=c("tsiatis","kaplan-meier"), conf.type=c("log","log-log","plain","none"), conf.int=FALSE, conf=c("bands","bars"), mylim=NULL, add=FALSE, label.curves=TRUE, abbrev.label=FALSE, levels.only=FALSE, lty, lwd=par("lwd"), col=1, col.fill=gray(seq(.95, .75, length=5)), adj.subtitle=TRUE, loglog=FALSE, fun, n.risk=FALSE, logt=FALSE, dots=FALSE, dotsize=.003, grid=NULL, srt.n.risk=0, sep.n.risk=0.056, adj.n.risk=1, y.n.risk, cex.n.risk=.6, cex.xlab=par('cex.lab'), cex.ylab=cex.xlab, pr=FALSE) \method{survplot}{npsurv}(fit, xlim, ylim, xlab, ylab, time.inc, state=NULL, conf=c("bands","bars","diffbands","none"), mylim=NULL, add=FALSE, label.curves=TRUE, abbrev.label=FALSE, levels.only=FALSE, lty,lwd=par('lwd'), col=1, col.fill=gray(seq(.95, .75, length=5)), loglog=FALSE, fun, n.risk=FALSE, aehaz=FALSE, times=NULL, logt=FALSE, dots=FALSE, dotsize=.003, grid=NULL, srt.n.risk=0, sep.n.risk=.056, adj.n.risk=1, y.n.risk, cex.n.risk=.6, cex.xlab=par('cex.lab'), cex.ylab=cex.xlab, pr=FALSE, \dots) \method{survplotp}{npsurv}(fit, xlim, ylim, xlab, ylab, time.inc, state=NULL, conf=c("bands", "none"), mylim=NULL, abbrev.label=FALSE, col=colorspace::rainbow_hcl, levels.only=TRUE, loglog=FALSE, fun=function(y) y, aehaz=FALSE, times=NULL, logt=FALSE, pr=FALSE, \dots) survdiffplot(fit, order=1:2, fun=function(y) y, xlim, ylim, xlab, ylab="Difference in Survival Probability", time.inc, conf.int, conf=c("shaded", "bands","diffbands","none"), add=FALSE, lty=1, lwd=par('lwd'), col=1, n.risk=FALSE, grid=NULL, srt.n.risk=0, adj.n.risk=1, y.n.risk, cex.n.risk=.6, cex.xlab=par('cex.lab'), cex.ylab=cex.xlab, convert=function(f) f) } \arguments{ \item{fit}{ result of fit (\code{cph}, \code{psm}, \code{npsurv}, \code{survest.psm}). For \code{survdiffplot}, \code{fit} must be the result of \code{npsurv}. } \item{\dots}{ list of factors with names used in model. For fits from \code{npsurv} these arguments do not appear - all strata are plotted. Otherwise the first factor listed is the factor used to determine different survival curves. Any other factors are used to specify single constants to be adjusted to, when defaults given to fitting routine (through \code{limits}) are not used. The value given to factors is the original coding of data given to fit, except that for categorical or strata factors the text string levels may be specified. The form of values given to the first factor are none (omit the equal sign to use default range or list of all values if variable is discrete), \code{"text"} if factor is categorical, \code{c(value1, value2, \dots)}, or a function which returns a vector, such as \code{seq(low,high,by=increment)}. Only the first factor may have the values omitted. In this case the \code{Low effect}, \code{Adjust to}, and \code{High effect} values will be used from \code{datadist} if the variable is continuous. For variables not defined to \code{datadist}, you must specify non-missing constant settings (or a vector of settings for the one displayed variable). Note that since \code{npsurv} objects do not use the variable list in \code{\dots}, you can specify any extra arguments to \code{labcurve} by adding them at the end of the list of arguments. For \code{survplotp} \dots (e.g., \code{height}, \code{width}) is passed to \code{plotly::plot_ly}. } \item{xlim}{ a vector of two numbers specifiying the x-axis range for follow-up time. Default is \code{(0,maxtime)} where \code{maxtime} was the \code{pretty()}d version of the maximum follow-up time in any stratum, stored in \code{fit$maxtime}. If \code{logt=TRUE}, default is \code{(1, log(maxtime))}. } \item{ylim}{ y-axis limits. Default is \code{c(0,1)} for survival, and \code{c(-5,1.5)} if \code{loglog=TRUE}. If \code{fun} or \code{loglog=TRUE} are given and \code{ylim} is not, the limits will be computed from the data. For \code{what="hazard"}, default limits are computed from the first hazard function plotted. } \item{xlab}{ x-axis label. Default is \code{units} attribute of failure time variable given to \code{Surv}. } \item{ylab}{ y-axis label. Default is \code{"Survival Probability"} or \code{"log(-log Survival Probability)"}. If \code{fun} is given, the default is \code{""}. For \code{what="hazard"}, the default is \code{"Hazard Function"}. For a multi-state/competing risk application the default is \code{"Cumulative Incidence"}. } \item{time.inc}{ time increment for labeling the x-axis and printing numbers at risk. If not specified, the value of \code{time.inc} stored with the model fit will be used. } \item{state}{the state/event cause to use in plotting if the fit was for a multi-state/competing risk \code{Surv} object} \item{type}{ specifies type of estimates, \code{"tsiatis"} (the default) or \code{"kaplan-meier"}. \code{"tsiatis"} here corresponds to the Breslow estimator. This is ignored if survival estimates stored with \code{surv=TRUE} are being used. For fits from \code{npsurv}, this argument is also ignored, since it is specified as an argument to \code{npsurv}. } \item{conf.type}{ specifies the basis for confidence limits. This argument is ignored for fits from \code{npsurv}. } \item{conf.int}{ Default is \code{FALSE}. Specify e.g. \code{.95} to plot 0.95 confidence bands. For fits from parametric survival models, or Cox models with \code{x=TRUE} and \code{y=TRUE} specified to the fit, the exact asymptotic formulas will be used to compute standard errors, and confidence limits are based on \code{log(-log S(t))} if \code{loglog=TRUE}. If \code{x=TRUE} and \code{y=TRUE} were not specified to \code{cph} but \code{surv=TRUE} was, the standard errors stored for the underlying survival curve(s) will be used. These agree with the former if predictions are requested at the mean value of X beta or if there are only stratification factors in the model. This argument is ignored for fits from \code{npsurv}, which must have previously specified confidence interval specifications. For \code{survdiffplot} if \code{conf.int} is not specified, the level used in the call to \code{npsurv} will be used. } \item{conf}{ \code{"bars"} for confidence bars at each \code{time.inc} time point. If the fit was from \code{cph(\dots, surv=TRUE)}, the \code{time.inc} used will be that stored with the fit. Use \code{conf="bands"} (the default) for bands using standard errors at each failure time. For \code{npsurv} objects only, \code{conf} may also be \code{"none"}, indicating that confidence interval information stored with the \code{npsurv} result should be ignored. For \code{npsurv} and \code{survdiffplot}, \code{conf} may be \code{"diffbands"} whereby a shaded region is drawn for comparing two curves. The polygon is centered at the midpoint of the two survival estimates and the height of the polygon is 1/2 the width of the approximate \code{conf.int} pointwise confidence region. Survival curves not overlapping the shaded area are approximately significantly different at the \code{1 - conf.int} level. } \item{mylim}{used to curtail computed \code{ylim}. When \code{ylim} is not given by the user, the computed limits are expanded to force inclusion of the values specified in \code{mylim}.} \item{what}{ defaults to \code{"survival"} to plot survival estimates. Set to \code{"hazard"} or an abbreviation to plot the hazard function (for \code{psm} fits only). Confidence intervals are not available for \code{what="hazard"}. } \item{add}{ set to \code{TRUE} to add curves to an existing plot. } \item{label.curves}{ default is \code{TRUE} to use \code{labcurve} to label curves where they are farthest apart. Set \code{label.curves} to a \code{list} to specify options to \code{labcurve}, e.g., \code{label.curves=list(method="arrow", cex=.8)}. These option names may be abbreviated in the usual way arguments are abbreviated. Use for example \code{label.curves=list(keys=1:5)} to draw symbols (as in \code{pch=1:5} - see \code{points}) on the curves and automatically position a legend in the most empty part of the plot. Set \code{label.curves=FALSE} to suppress drawing curve labels. The \code{col}, \code{lty}, \code{lwd}, and \code{type} parameters are automatically passed to \code{labcurve}, although you can override them here. To distinguish curves by line types and still have \code{labcurve} construct a legend, use for example \code{label.curves=list(keys="lines")}. The negative value for the plotting symbol will suppress a plotting symbol from being drawn either on the curves or in the legend. } \item{abbrev.label}{ set to \code{TRUE} to \code{abbreviate()} curve labels that are plotted } \item{levels.only}{ set to \code{TRUE} to remove \code{variablename=} from the start of curve labels.} \item{lty}{ vector of line types to use for different factor levels. Default is \code{c(1,3,4,5,6,7,\dots)}. } \item{lwd}{ vector of line widths to use for different factor levels. Default is current \code{par} setting for \code{lwd}. } \item{col}{ color for curve, default is \code{1}. Specify a vector to assign different colors to different curves. For \code{survplotp}, \code{col} is a vector of colors corresponding to strata, or a function that will be called to generate such colors. } \item{col.fill}{a vector of colors to used in filling confidence bands} \item{adj.subtitle}{ set to \code{FALSE} to suppress plotting subtitle with levels of adjustment factors not plotted. Defaults to \code{TRUE}. This argument is ignored for \code{npsurv}. } \item{loglog}{ set to \code{TRUE} to plot \code{log(-log Survival)} instead of \code{Survival} } \item{fun}{ specifies any function to translate estimates and confidence limits before plotting. If the fit is a multi-state object the default for \code{fun} is \code{function(y) 1 - y} to draw cumulative incidence curves. } \item{logt}{ set to \code{TRUE} to plot \code{log(t)} instead of \code{t} on the x-axis } \item{n.risk}{ set to \code{TRUE} to add number of subjects at risk for each curve, using the \code{surv.summary} created by \code{cph} or using the failure times used in fitting the model if \code{y=TRUE} was specified to the fit or if the fit was from \code{npsurv}. The numbers are placed at the bottom of the graph unless \code{y.n.risk} is given. If the fit is from \code{survest.psm}, \code{n.risk} does not apply. } \item{srt.n.risk}{ angle of rotation for leftmost number of subjects at risk (since this number may run into the second or into the y-axis). Default is \code{0}. } \item{adj.n.risk}{ justification for leftmost number at risk. Default is \code{1} for right justification. Use \code{0} for left justification, \code{.5} for centered. } \item{sep.n.risk}{ multiple of upper y limit - lower y limit for separating lines of text containing number of subjects at risk. Default is \code{.056*(ylim[2]-ylim[1])}. } \item{y.n.risk}{ When \code{n.risk=TRUE}, the default is to place numbers of patients at risk above the x-axis. You can specify a y-coordinate for the bottom line of the numbers using \code{y.n.risk}. Specify \code{y.n.risk='auto'} to place the numbers below the x-axis at a distance of 1/3 of the range of \code{ylim}. } \item{cex.n.risk}{ character size for number of subjects at risk (when \code{n.risk} is \code{TRUE}) } \item{cex.xlab}{\code{cex} for x-axis label} \item{cex.ylab}{\code{cex} for y-axis label} \item{dots}{ set to \code{TRUE} to plot a grid of dots. Will be plotted at every \code{time.inc} (see \code{cph}) and at survival increments of .1 (if \code{d>.4}), .05 (if \code{.2 < d <= .4}), or .025 (if \code{d <= .2}), where \code{d} is the range of survival displayed. } \item{dotsize}{size of dots in inches} \item{grid}{ defaults to \code{NULL} (not drawing grid lines). Set to \code{TRUE} to plot \code{gray(.8)} grid lines, or specify any color. } \item{pr}{ set to \code{TRUE} to print survival curve coordinates used in the plots } \item{aehaz}{set to \code{TRUE} to add number of events and exponential distribution hazard rate estimates in curve labels. For competing risk data the number of events is for the cause of interest, and the hazard rate is the number of events divided by the sum of all failure and censoring times.} \item{times}{a numeric vector of times at which to compute cumulative incidence probability estimates to add to curve labels} \item{order}{ an integer vector of length two specifying the order of groups when computing survival differences. The default of \code{1:2} indicates that the second group is subtracted from the first. Specify \code{order=2:1} to instead subtract the first from the second. A subtitle indicates what was done. } \item{convert}{a function to convert the output of \code{summary.survfitms} to pick off the data needed for a single state} } \value{ list with components adjust (text string specifying adjustment levels) and \code{curve.labels} (vector of text strings corresponding to levels of factor used to distinguish curves). For \code{npsurv}, the returned value is the vector of strata labels, or NULL if there are no strata. } \section{Side Effects}{ plots. If \code{par()$mar[4] < 4}, issues \code{par(mar=)} to increment \code{mar[4]} by 2 if \code{n.risk=TRUE} and \code{add=FALSE}. The user may want to reset \code{par(mar)} in this case to not leave such a wide right margin for plots. You usually would issue \code{par(mar=c(5,4,4,2)+.1)}. } \details{ \code{survplot} will not work for Cox models with time-dependent covariables. Use \code{survest} or \code{survfit} for that purpose. There is a set a system option \code{\link[Hmisc:mgp.axis]{mgp.axis.labels}} to allow x and y-axes to have differing \code{mgp} graphical parameters (see \code{par}). This is important when labels for y-axis tick marks are to be written horizontally (\code{par(las=1)}), as a larger gap between the labels and the tick marks are needed. You can set the axis-specific 2nd component of \code{mgp} using \code{mgp.axis.labels(c(xvalue,yvalue))}. } \seealso{ \code{\link{datadist}}, \code{\link{rms}}, \code{\link{cph}}, \code{\link{psm}}, \code{\link{survest}}, \code{\link{predictrms}}, \code{\link{plot.Predict}}, \code{\link{ggplot.Predict}}, \code{\link[Hmisc]{units}}, \code{\link[Hmisc]{errbar}}, \code{\link{survfit}}, \code{\link[survival]{survreg.distributions}}, \code{\link[Hmisc]{labcurve}}, \code{\link[Hmisc:mgp.axis]{mgp.axis}}, \code{\link{par}}, } \examples{ # Simulate data from a population model in which the log hazard # function is linear in age and there is no age x sex interaction n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('male','female'), n, TRUE)) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='female')) dt <- -log(runif(n))/h label(dt) <- 'Follow-up Time' e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" dd <- datadist(age, sex) options(datadist='dd') S <- Surv(dt,e) # When age is in the model by itself and we predict at the mean age, # approximate confidence intervals are ok f <- cph(S ~ age, surv=TRUE) survplot(f, age=mean(age), conf.int=.95) g <- cph(S ~ age, x=TRUE, y=TRUE) survplot(g, age=mean(age), conf.int=.95, add=TRUE, col='red', conf='bars') # Repeat for an age far from the mean; not ok survplot(f, age=75, conf.int=.95) survplot(g, age=75, conf.int=.95, add=TRUE, col='red', conf='bars') #Plot stratified survival curves by sex, adj for quadratic age effect # with age x sex interaction (2 d.f. interaction) f <- cph(S ~ pol(age,2)*strat(sex), x=TRUE, y=TRUE) #or f <- psm(S ~ pol(age,2)*sex) Predict(f, sex, age=c(30,50,70)) survplot(f, sex, n.risk=TRUE, levels.only=TRUE) #Adjust age to median survplot(f, sex, logt=TRUE, loglog=TRUE) #Check for Weibull-ness (linearity) survplot(f, sex=c("male","female"), age=50) #Would have worked without datadist #or with an incomplete datadist survplot(f, sex, label.curves=list(keys=c(2,0), point.inc=2)) #Identify curves with symbols survplot(f, sex, label.curves=list(keys=c('m','f'))) #Identify curves with single letters #Plots by quintiles of age, adjusting sex to male options(digits=3) survplot(f, age=quantile(age,(1:4)/5), sex="male") #Plot survival Kaplan-Meier survival estimates for males f <- npsurv(S ~ 1, subset=sex=="male") survplot(f) #Plot survival for both sexes and show exponential hazard estimates f <- npsurv(S ~ sex) survplot(f, aehaz=TRUE) #Check for log-normal and log-logistic fits survplot(f, fun=qnorm, ylab="Inverse Normal Transform") survplot(f, fun=function(y)log(y/(1-y)), ylab="Logit S(t)") #Plot the difference between sexes survdiffplot(f) #Similar but show half-width of confidence intervals centered #at average of two survival estimates survplot(f, conf='diffbands') options(datadist=NULL) \dontrun{ # # Time to progression/death for patients with monoclonal gammopathy # Competing risk curves (cumulative incidence) # status variable must be a factor with first level denoting right censoring m <- upData(mgus1, stop = stop / 365.25, units=c(stop='years'), labels=c(stop='Follow-up Time'), subset=start == 0) f <- npsurv(Surv(stop, event) ~ 1, data=m) # Use survplot for enhanced displays of cumulative incidence curves for # competing risks survplot(f, state='pcm', n.risk=TRUE, xlim=c(0, 20), ylim=c(0, .5), col=2) survplot(f, state='death', aehaz=TRUE, col=3, label.curves=list(keys='lines')) f <- npsurv(Surv(stop, event) ~ sex, data=m) survplot(f, state='death', aehaz=TRUE, n.risk=TRUE, conf='diffbands', label.curves=list(keys='lines')) } } \keyword{survival} \keyword{hplot} \keyword{nonparametric} \keyword{models} rms/man/validate.lrm.Rd0000644000176200001440000001412413714237251014506 0ustar liggesusers\name{validate.lrm} \alias{validate.lrm} \alias{validate.orm} \title{Resampling Validation of a Logistic or Ordinal Regression Model} \description{ The \code{validate} function when used on an object created by \code{lrm} or \code{orm} does resampling validation of a logistic regression model, with or without backward step-down variable deletion. It provides bias-corrected Somers' \eqn{D_{xy}} rank correlation, R-squared index, the intercept and slope of an overall logistic calibration equation, the maximum absolute difference in predicted and calibrated probabilities \eqn{E_{max}}, the discrimination index \eqn{D} (model L.R. \eqn{(\chi^2 - 1)/n}{(chi-square - 1)/n}), the unreliability index \eqn{U} = difference in -2 log likelihood between un-calibrated \eqn{X\beta}{X beta} and \eqn{X\beta}{X beta} with overall intercept and slope calibrated to test sample / n, the overall quality index (logarithmic probability score) \eqn{Q = D - U}, and the Brier or quadratic probability score, \eqn{B} (the last 3 are not computed for ordinal models), the \eqn{g}-index, and \code{gp}, the \eqn{g}-index on the probability scale. The corrected slope can be thought of as shrinkage factor that takes into account overfitting. For \code{orm} fits, a subset of the above indexes is provided, Spearman's \eqn{\rho} is substituted for \eqn{D_{xy}}, and a new index is reported: \code{pdm}, the mean absolute difference between 0.5 and the predicted probability that \eqn{Y\geq} the marginal median of \eqn{Y}. } \usage{ # fit <- lrm(formula=response ~ terms, x=TRUE, y=TRUE) or orm \method{validate}{lrm}(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=0.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, kint, Dxy.method=if(k==1) 'somers2' else 'lrm', emax.lim=c(0,1), \dots) \method{validate}{orm}(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, ...) } \arguments{ \item{fit}{ a fit derived by \code{lrm} or \code{orm}. The options \code{x=TRUE} and \code{y=TRUE} must have been specified. } \item{method,B,bw,rule,type,sls,aics,force,estimates,pr}{see \code{\link{validate}} and \code{\link{predab.resample}}} \item{kint}{ In the case of an ordinal model, specify which intercept to validate. Default is the middle intercept. For \code{validate.orm}, intercept-specific quantities are not validated so this does not matter. } \item{Dxy.method}{ \code{"lrm"} to use \code{lrm}s computation of \eqn{D_{xy}} correlation, which rounds predicted probabilities to nearest .002. Use \code{Dxy.method="somers2"} (the default) to instead use the more accurate but slower \code{somers2} function. This will matter most when the model is extremely predictive. The default is \code{"lrm"} for ordinal models, since \code{somers2} only handles binary response variables. } \item{emax.lim}{ range of predicted probabilities over which to compute the maximum error. Default is entire range. } \item{\dots}{ other arguments to pass to \code{lrm.fit} (now only \code{maxit} and \code{tol} are allowed) and to \code{predab.resample} (note especially the \code{group}, \code{cluster}, and \code{subset} parameters) }} \value{ a matrix with rows corresponding to \eqn{D_{xy}}, \eqn{R^2}, \code{Intercept}, \code{Slope}, \eqn{E_{max}}, \eqn{D}, \eqn{U}, \eqn{Q}, \eqn{B}, \eqn{g}, \eqn{gp}, and columns for the original index, resample estimates, indexes applied to the whole or omitted sample using the model derived from the resample, average optimism, corrected index, and number of successful re-samples. For \code{validate.orm} not all columns are provided, Spearman's rho is returned instead of \eqn{D_{xy}}, and \code{pdm} is reported. } \section{Side Effects}{ prints a summary, and optionally statistics for each re-fit } \details{ If the original fit was created using penalized maximum likelihood estimation, the same \code{penalty.matrix} used with the original fit are used during validation. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Miller ME, Hui SL, Tierney WM (1991): Validation techniques for logistic regression models. Stat in Med 10:1213--1226. Harrell FE, Lee KL (1985): A comparison of the \emph{discrimination} of discriminant analysis and logistic regression under multivariate normality. In Biostatistics: Statistics in Biomedical, Public Health, and Environmental Sciences. The Bernard G. Greenberg Volume, ed. PK Sen. New York: North-Holland, p. 333--343. } \seealso{ \code{\link{predab.resample}}, \code{\link{fastbw}}, \code{\link{lrm}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{calibrate}}, \code{\link[Hmisc]{somers2}}, \code{\link{cr.setup}}, \code{\link{gIndex}}, \code{\link{orm}} } \examples{ n <- 1000 # define sample size age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) f <- lrm(y ~ sex*rcs(cholesterol)+pol(age,2)+blood.pressure, x=TRUE, y=TRUE) #Validate full model fit validate(f, B=10) # normally B=300 validate(f, B=10, group=y) # two-sample validation: make resamples have same numbers of # successes and failures as original sample #Validate stepwise model with typical (not so good) stopping rule validate(f, B=10, bw=TRUE, rule="p", sls=.1, type="individual") \dontrun{ #Fit a continuation ratio model and validate it for the predicted #probability that y=0 u <- cr.setup(y) Y <- u$y cohort <- u$cohort attach(mydataframe[u$subs,]) f <- lrm(Y ~ cohort+rcs(age,4)*sex, penalty=list(interaction=2)) validate(f, cluster=u$subs, subset=cohort=='all') #see predab.resample for cluster and subset } } \keyword{models} \keyword{regression} \concept{logistic regression model} \concept{model validation} \concept{predictive accuracy} \concept{bootstrap} rms/man/poma.Rd0000644000176200001440000000342713722734602013065 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/poma.r \name{poma} \alias{poma} \title{Examine proportional odds and parallelism assumptions of `orm` and `lrm` model fits.} \usage{ poma(mod.orm, cutval) } \arguments{ \item{mod.orm}{Model fit of class `orm` or `lrm`. For `fit.mult.impute` objects, `poma` will refit model on a singly-imputed data-set} \item{cutval}{Numeric vector; sequence of observed values to cut outcome} } \description{ Based on codes and strategies from Frank Harrell's canonical `Regression Modeling Strategies` text } \details{ Strategy 1: Apply different link functions to Prob of Binary Ys (defined by cutval). Regress transformed outcome on combined X and assess constancy of slopes (betas) across cut-points \cr Strategy 2: Generate score residual plot for each predictor (for response variable with <10 unique levels) \cr Strategy 3: Assess parallelism of link function transformed inverse CDFs curves for different XBeta levels (for response variables with >=10 unique levels) } \examples{ ## orm model (response variable has fewer than 10 unique levels) mod.orm <- orm(carb ~ cyl + hp , x=TRUE, y=TRUE, data = mtcars) poma(mod.orm) ## orm model (response variable has >=10 unique levels) mod.orm <- orm(mpg ~ cyl + hp , x=TRUE, y=TRUE, data = mtcars) poma(mod.orm) ## orm model using imputation dat <- mtcars ## introduce NAs dat[sample(rownames(dat), 10), "cyl"] <- NA im <- aregImpute(~ cyl + wt + mpg + am, data = dat) aa <- fit.mult.impute(mpg ~ cyl + wt , xtrans = im, data = dat, fitter = orm) poma(aa) } \seealso{ Harrell FE. *Regression Modeling Strategies: with applications to linear models, logistic and ordinal regression, and survival analysis.* New York: Springer Science, LLC, 2015. } \author{ Yong Hao Pua } rms/man/anova.rms.Rd0000644000176200001440000003775113714237251014043 0ustar liggesusers\name{anova.rms} \alias{anova.rms} \alias{print.anova.rms} \alias{plot.anova.rms} \alias{latex.anova.rms} \alias{html.anova.rms} \title{Analysis of Variance (Wald and F Statistics)} \description{ The \code{anova} function automatically tests most meaningful hypotheses in a design. For example, suppose that age and cholesterol are predictors, and that a general interaction is modeled using a restricted spline surface. \code{anova} prints Wald statistics (\eqn{F} statistics for an \code{ols} fit) for testing linearity of age, linearity of cholesterol, age effect (age + age by cholesterol interaction), cholesterol effect (cholesterol + age by cholesterol interaction), linearity of the age by cholesterol interaction (i.e., adequacy of the simple age * cholesterol 1 d.f. product), linearity of the interaction in age alone, and linearity of the interaction in cholesterol alone. Joint tests of all interaction terms in the model and all nonlinear terms in the model are also performed. For any multiple d.f. effects for continuous variables that were not modeled through \code{rcs}, \code{pol}, \code{lsp}, etc., tests of linearity will be omitted. This applies to matrix predictors produced by e.g. \code{poly} or \code{ns}. \code{print.anova.rms} is the printing method. \code{plot.anova.rms} draws dot charts depicting the importance of variables in the model, as measured by Wald \eqn{\chi^2}{chi-square}, \eqn{\chi^2}{chi-square} minus d.f., AIC, \eqn{P}-values, partial \eqn{R^2}, \eqn{R^2} for the whole model after deleting the effects in question, or proportion of overall model \eqn{R^2} that is due to each predictor. \code{latex.anova.rms} is the \code{latex} method. It substitutes Greek/math symbols in column headings, uses boldface for \code{TOTAL} lines, and constructs a caption. Then it passes the result to \code{latex.default} for conversion to LaTeX. For Bayesian models such as \code{blrm}, \code{anova} computes relative explained variation indexes (REV) based on approximate Wald statistics. This uses the variance-covariance matrix of all of the posterior draws, and the individual draws of betas, plus an overall summary from the posterior mode/mean/median beta. Wald chi-squares assuming multivariate normality of betas are computed just as with frequentist models, and for each draw (or for the summary) the ratio of the partial Wald chi-square to the total Wald statistic for the model is computed as REV. The \code{print} method calls \code{latex} or \code{html} methods depending on \code{options(prType=)}, and output is to the console. For \code{latex} a \code{table} environment is not used and an ordinary \code{tabular} is produced. \code{html.anova.rms} just calls \code{latex.anova.rms}. } \usage{ \method{anova}{rms}(object, \ldots, main.effect=FALSE, tol=1e-9, test=c('F','Chisq'), india=TRUE, indnl=TRUE, ss=TRUE, vnames=c('names','labels'), posterior.summary=c('mean', 'median', 'mode'), ns=500, cint=0.95) \method{print}{anova.rms}(x, which=c('none','subscripts','names','dots'), table.env=FALSE, \dots) \method{plot}{anova.rms}(x, what=c("chisqminusdf","chisq","aic","P","partial R2","remaining R2", "proportion R2", "proportion chisq"), xlab=NULL, pch=16, rm.totals=TRUE, rm.ia=FALSE, rm.other=NULL, newnames, sort=c("descending","ascending","none"), margin=c('chisq','P'), pl=TRUE, trans=NULL, ntrans=40, height=NULL, width=NULL, \dots) \method{latex}{anova.rms}(object, title, dec.chisq=2, dec.F=2, dec.ss=NA, dec.ms=NA, dec.P=4, dec.REV=3, table.env=TRUE, caption=NULL, \dots) \method{html}{anova.rms}(object, \dots) } \arguments{ \item{object}{ a \code{rms} fit object. \code{object} must allow \code{vcov} to return the variance-covariance matrix. For \code{latex} is the result of \code{anova}. } \item{\dots}{ If omitted, all variables are tested, yielding tests for individual factors and for pooled effects. Specify a subset of the variables to obtain tests for only those factors, with a pooled Wald tests for the combined effects of all factors listed. Names may be abbreviated. For example, specify \code{anova(fit,age,cholesterol)} to get a Wald statistic for testing the joint importance of age, cholesterol, and any factor interacting with them. Can be optional graphical parameters to send to \code{dotchart2}, or other parameters to send to \code{latex.default}. Ignored for \code{print}. For \code{html.anova.rms} the arguments are passed to \code{latex.anova.rms}. } \item{main.effect}{ Set to \code{TRUE} to print the (usually meaningless) main effect tests even when the factor is involved in an interaction. The default is \code{FALSE}, to print only the effect of the main effect combined with all interactions involving that factor. } \item{tol}{ singularity criterion for use in matrix inversion } \item{test}{ For an \code{ols} fit, set \code{test="Chisq"} to use Wald \eqn{\chi^2} tests rather than F-tests. } \item{india}{set to \code{FALSE} to exclude individual tests of interaction from the table} \item{indnl}{set to \code{FALSE} to exclude individual tests of nonlinearity from the table} \item{ss}{ For an \code{ols} fit, set \code{ss=FALSE} to suppress printing partial sums of squares, mean squares, and the Error SS and MS. } \item{vnames}{set to \code{'labels'} to use variable labels rather than variable names in the output} \item{posterior.summary}{specifies whether the posterior mode/mean/median beta are to be used as a measure of central tendence of the posterior distribution, for use in relative explained variation from Bayesian models} \item{ns}{number of random samples from the posterior draws to use for REV highest posterior density intervals} \item{cint}{HPD interval probability} \item{x}{for \code{print,plot,text} is the result of \code{anova}. } \item{which}{ If \code{which} is not \code{"none"} (the default), \code{print.anova.rms} will add to the rightmost column of the output the list of parameters being tested by the hypothesis being tested in the current row. Specifying \code{which="subscripts"} causes the subscripts of the regression coefficients being tested to be printed (with a subscript of one for the first non-intercept term). \code{which="names"} prints the names of the terms being tested, and \code{which="dots"} prints dots for terms being tested and blanks for those just being adjusted for. } \item{what}{ what type of statistic to plot. The default is the Wald \eqn{\chi^2}{chi-square} statistic for each factor (adding in the effect of higher-ordered factors containing that factor) minus its degrees of freedom. The R2 choices for \code{what} only apply to \code{ols} models. } \item{xlab}{ x-axis label, default is constructed according to \code{what}. \code{plotmath} symbols are used for \R, by default. } \item{pch}{ character for plotting dots in dot charts. Default is 16 (solid dot). } \item{rm.totals}{ set to \code{FALSE} to keep total \eqn{\chi^2}{chi-square}s (overall, nonlinear, interaction totals) in the chart. } \item{rm.ia}{ set to \code{TRUE} to omit any effect that has \code{"*"} in its name } \item{rm.other}{ a list of other predictor names to omit from the chart } \item{newnames}{ a list of substitute predictor names to use, after omitting any. } \item{sort}{default is to sort bars in descending order of the summary statistic } \item{margin}{set to a vector of character strings to write text for selected statistics in the right margin of the dot chart. The character strings can be any combination of \code{"chisq"}, \code{"d.f."}, \code{"P"}, \code{"partial R2"}, \code{"proportion R2"}, and \code{"proportion chisq"}. Default is to not draw any statistics in the margin. When \code{plotly} is in effect, margin values are instead displayed as hover text.} \item{pl}{ set to \code{FALSE} to suppress plotting. This is useful when you only wish to analyze the vector of statistics returned. } \item{trans}{ set to a function to apply that transformation to the statistics being plotted, and to truncate negative values at zero. A good choice is \code{trans=sqrt}. } \item{ntrans}{\code{n} argument to \code{\link{pretty}}, specifying the number of values for which to place tick marks. This should be larger than usual because of nonlinear scaling, to provide a sufficient number of tick marks on the left (stretched) part of the chi-square scale. } \item{height,width}{height and width of \code{plotly} plots drawn using \code{dotchartp}, in pixels. Ignored for ordinary plots. Defaults to minimum of 400 and 100 + 25 times the number of test statistics displayed.} \item{title}{ title to pass to \code{latex}, default is name of fit object passed to \code{anova} prefixed with \code{"anova."}. For Windows, the default is \code{"ano"} followed by the first 5 letters of the name of the fit object. } \item{dec.chisq}{ number of places to the right of the decimal place for typesetting \eqn{\chi^2}{chi-square} values (default is \code{2}). Use zero for integer, \code{NA} for floating point. } \item{dec.F}{ digits to the right for \eqn{F} statistics (default is \code{2}) } \item{dec.ss}{ digits to the right for sums of squares (default is \code{NA}, indicating floating point) } \item{dec.ms}{ digits to the right for mean squares (default is \code{NA}) } \item{dec.P}{digits to the right for \eqn{P}-values} \item{dec.REV}{digits to the right for REV} \item{table.env}{see \code{\link[Hmisc]{latex}}} \item{caption}{caption for table if \code{table.env} is \code{TRUE}. Default is constructed from the response variable.} } \value{ \code{anova.rms} returns a matrix of class \code{anova.rms} containing factors as rows and \eqn{\chi^2}{chi-square}, d.f., and \eqn{P}-values as columns (or d.f., partial \eqn{SS, MS, F, P}). An attribute \code{vinfo} provides list of variables involved in each row and the type of test done. \code{plot.anova.rms} invisibly returns the vector of quantities plotted. This vector has a names attribute describing the terms for which the statistics in the vector are calculated. } \details{ If the statistics being plotted with \code{plot.anova.rms} are few in number and one of them is negative or zero, \code{plot.anova.rms} will quit because of an error in \code{dotchart2}. The \code{latex} method requires LaTeX packages \code{relsize} and \code{needspace}. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \section{Side Effects}{ \code{print} prints, \code{latex} creates a file with a name of the form \code{"title.tex"} (see the \code{title} argument above). } \seealso{ \code{\link{rms}}, \code{\link{rmsMisc}}, \code{\link{lrtest}}, \code{\link{rms.trans}}, \code{\link{summary.rms}}, \code{\link{plot.Predict}}, \code{\link{ggplot.Predict}}, \code{\link[Hmisc]{solvet}}, \code{\link{locator}}, \code{\link[Hmisc]{dotchart2}}, \code{\link[Hmisc]{latex}}, \code{\link[Hmisc]{xYplot}}, \code{\link{anova.lm}}, \code{\link{contrast.rms}}, \code{\link{pantext}} } \examples{ n <- 1000 # define sample size set.seed(17) # so can reproduce the results treat <- factor(sample(c('a','b','c'), n,TRUE)) num.diseases <- sample(0:4, n,TRUE) age <- rnorm(n, 50, 10) cholesterol <- rnorm(n, 200, 25) weight <- rnorm(n, 150, 20) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(num.diseases) <- 'Number of Comorbid Diseases' label(cholesterol) <- 'Total Cholesterol' label(weight) <- 'Weight, lbs.' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc # Specify population model for log odds that Y=1 L <- .1*(num.diseases-2) + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(treat=='a') + 3.5*(treat=='b')+2*(treat=='c')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) fit <- lrm(y ~ treat + scored(num.diseases) + rcs(age) + log(cholesterol+10) + treat:log(cholesterol+10)) a <- anova(fit) # Test all factors b <- anova(fit, treat, cholesterol) # Test these 2 by themselves # to get their pooled effects a b # Add a new line to the plot with combined effects s <- rbind(a, 'treat+cholesterol'=b['TOTAL',]) class(s) <- 'anova.rms' plot(s, margin=c('chisq', 'proportion chisq')) g <- lrm(y ~ treat*rcs(age)) dd <- datadist(treat, num.diseases, age, cholesterol) options(datadist='dd') p <- Predict(g, age, treat="b") s <- anova(g) # Usually omit fontfamily to default to 'Courier' # It's specified here to make R pass its package-building checks plot(p, addpanel=pantext(s, 28, 1.9, fontfamily='Helvetica')) plot(s, margin=c('chisq', 'proportion chisq')) # new plot - dot chart of chisq-d.f. with 2 other stats in right margin # latex(s) # nice printout - creates anova.g.tex options(datadist=NULL) # Simulate data with from a given model, and display exactly which # hypotheses are being tested set.seed(123) age <- rnorm(500, 50, 15) treat <- factor(sample(c('a','b','c'), 500, TRUE)) bp <- rnorm(500, 120, 10) y <- ifelse(treat=='a', (age-50)*.05, abs(age-50)*.08) + 3*(treat=='c') + pmax(bp, 100)*.09 + rnorm(500) f <- ols(y ~ treat*lsp(age,50) + rcs(bp,4)) print(names(coef(f)), quote=FALSE) specs(f) anova(f) an <- anova(f) options(digits=3) print(an, 'subscripts') print(an, 'dots') an <- anova(f, test='Chisq', ss=FALSE) plot(0:1) # make some plot tab <- pantext(an, 1.2, .6, lattice=FALSE, fontfamily='Helvetica') # create function to write table; usually omit fontfamily tab() # execute it; could do tab(cex=.65) plot(an) # new plot - dot chart of chisq-d.f. # Specify plot(an, trans=sqrt) to use a square root scale for this plot # latex(an) # nice printout - creates anova.f.tex ## Example to save partial R^2 for all predictors, along with overall ## R^2, from two separate fits, and to combine them with a lattice plot require(lattice) set.seed(1) n <- 100 x1 <- runif(n) x2 <- runif(n) y <- (x1-.5)^2 + x2 + runif(n) group <- c(rep('a', n/2), rep('b', n/2)) A <- NULL for(g in c('a','b')) { f <- ols(y ~ pol(x1,2) + pol(x2,2) + pol(x1,2) \%ia\% pol(x2,2), subset=group==g) a <- plot(anova(f), what='partial R2', pl=FALSE, rm.totals=FALSE, sort='none') a <- a[-grep('NONLINEAR', names(a))] d <- data.frame(group=g, Variable=factor(names(a), names(a)), partialR2=unname(a)) A <- rbind(A, d) } dotplot(Variable ~ partialR2 | group, data=A, xlab=ex <- expression(partial~R^2)) dotplot(group ~ partialR2 | Variable, data=A, xlab=ex) dotplot(Variable ~ partialR2, groups=group, data=A, xlab=ex, auto.key=list(corner=c(.5,.5))) # Suppose that a researcher wants to make a big deal about a variable # because it has the highest adjusted chi-square. We use the # bootstrap to derive 0.95 confidence intervals for the ranks of all # the effects in the model. We use the plot method for anova, with # pl=FALSE to suppress actual plotting of chi-square - d.f. for each # bootstrap repetition. # It is important to tell plot.anova.rms not to sort the results, or # every bootstrap replication would have ranks of 1,2,3,... for the stats. n <- 300 set.seed(1) d <- data.frame(x1=runif(n), x2=runif(n), x3=runif(n), x4=runif(n), x5=runif(n), x6=runif(n), x7=runif(n), x8=runif(n), x9=runif(n), x10=runif(n), x11=runif(n), x12=runif(n)) d$y <- with(d, 1*x1 + 2*x2 + 3*x3 + 4*x4 + 5*x5 + 6*x6 + 7*x7 + 8*x8 + 9*x9 + 10*x10 + 11*x11 + 12*x12 + 9*rnorm(n)) f <- ols(y ~ x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12, data=d) B <- 20 # actually use B=1000 ranks <- matrix(NA, nrow=B, ncol=12) rankvars <- function(fit) rank(plot(anova(fit), sort='none', pl=FALSE)) Rank <- rankvars(f) for(i in 1:B) { j <- sample(1:n, n, TRUE) bootfit <- update(f, data=d, subset=j) ranks[i,] <- rankvars(bootfit) } lim <- t(apply(ranks, 2, quantile, probs=c(.025,.975))) predictor <- factor(names(Rank), names(Rank)) Dotplot(predictor ~ Cbind(Rank, lim), pch=3, xlab='Rank') } \keyword{models} \keyword{regression} \keyword{htest} \keyword{aplot} \concept{bootstrap} rms/man/lrm.Rd0000644000176200001440000005210713714237251012721 0ustar liggesusers\name{lrm} \alias{lrm} \alias{print.lrm} \title{Logistic Regression Model} \description{ Fit binary and proportional odds ordinal logistic regression models using maximum likelihood estimation or penalized maximum likelihood estimation. See \code{cr.setup} for how to fit forward continuation ratio models with \code{lrm}. For the \code{print} method, format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. } \usage{ lrm(formula, data=environment(formula), subset, na.action=na.delete, method="lrm.fit", model=FALSE, x=FALSE, y=FALSE, linear.predictors=TRUE, se.fit=FALSE, penalty=0, penalty.matrix, tol=1e-7, strata.penalty=0, var.penalty=c('simple','sandwich'), weights, normwt, scale=FALSE, \dots) \method{print}{lrm}(x, digits=4, strata.coefs=FALSE, coefs=TRUE, title='Logistic Regression Model', \dots) } \arguments{ \item{formula}{ a formula object. An \code{offset} term can be included. The offset causes fitting of a model such as \eqn{logit(Y=1) = X\beta + W}, where \eqn{W} is the offset variable having no estimated coefficient. The response variable can be any data type; \code{lrm} converts it in alphabetic or numeric order to an S factor variable and recodes it 0,1,2,\dots internally. } \item{data}{ data frame to use. Default is the current frame. } \item{subset}{ logical expression or vector of subscripts defining a subset of observations to analyze } \item{na.action}{ function to handle \code{NA}s in the data. Default is \code{na.delete}, which deletes any observation having response or predictor missing, while preserving the attributes of the predictors and maintaining frequencies of deletions due to each variable in the model. This is usually specified using \code{options(na.action="na.delete")}. } \item{method}{ name of fitting function. Only allowable choice at present is \code{lrm.fit}. } \item{model}{ causes the model frame to be returned in the fit object } \item{x}{ causes the expanded design matrix (with missings excluded) to be returned under the name \code{x}. For \code{print}, an object created by \code{lrm}. } \item{y}{ causes the response variable (with missings excluded) to be returned under the name \code{y}. } \item{linear.predictors}{ causes the predicted X beta (with missings excluded) to be returned under the name \code{linear.predictors}. When the response variable has more than two levels, the first intercept is used. } \item{se.fit}{ causes the standard errors of the fitted values to be returned under the name \code{se.fit}. } \item{penalty}{ The penalty factor subtracted from the log likelihood is \eqn{0.5 \beta' P \beta}, where \eqn{\beta} is the vector of regression coefficients other than intercept(s), and \eqn{P} is \code{penalty factors * penalty.matrix} and \code{penalty.matrix} is defined below. The default is \code{penalty=0} implying that ordinary unpenalized maximum likelihood estimation is used. If \code{penalty} is a scalar, it is assumed to be a penalty factor that applies to all non-intercept parameters in the model. Alternatively, specify a list to penalize different types of model terms by differing amounts. The elements in this list are named \code{simple, nonlinear, interaction} and \code{nonlinear.interaction}. If you omit elements on the right of this series, values are inherited from elements on the left. Examples: \code{penalty=list(simple=5, nonlinear=10)} uses a penalty factor of 10 for nonlinear or interaction terms. \code{penalty=list(simple=0, nonlinear=2, nonlinear.interaction=4)} does not penalize linear main effects, uses a penalty factor of 2 for nonlinear or interaction effects (that are not both), and 4 for nonlinear interaction effects. } \item{penalty.matrix}{ specifies the symmetric penalty matrix for non-intercept terms. The default matrix for continuous predictors has the variance of the columns of the design matrix in its diagonal elements so that the penalty to the log likelhood is unitless. For main effects for categorical predictors with \eqn{c} categories, the rows and columns of the matrix contain a \eqn{c-1 \times c-1} sub-matrix that is used to compute the sum of squares about the mean of the \eqn{c} parameter values (setting the parameter to zero for the reference cell) as the penalty component for that predictor. This makes the penalty independent of the choice of the reference cell. If you specify \code{penalty.matrix}, you may set the rows and columns for certain parameters to zero so as to not penalize those parameters. Depending on \code{penalty}, some elements of \code{penalty.matrix} may be overridden automatically by setting them to zero. The penalty matrix that is used in the actual fit is \eqn{penalty \times diag(pf) \times penalty.matrix \times diag(pf)}, where \eqn{pf} is the vector of square roots of penalty factors computed from \code{penalty} by \code{Penalty.setup} in \code{rmsMisc}. If you specify \code{penalty.matrix} you must specify a nonzero value of \code{penalty} or no penalization will be done. } \item{tol}{singularity criterion (see \code{lrm.fit})} \item{strata.penalty}{scalar penalty factor for the stratification factor, for the experimental \code{strat} variable} \item{var.penalty}{ the type of variance-covariance matrix to be stored in the \code{var} component of the fit when penalization is used. The default is the inverse of the penalized information matrix. Specify \code{var.penalty="sandwich"} to use the sandwich estimator (see below under \code{var}), which limited simulation studies have shown yields variances estimates that are too low. } \item{weights}{ a vector (same length as \code{y}) of possibly fractional case weights } \item{normwt}{ set to \code{TRUE} to scale \code{weights} so they sum to the length of \code{y}; useful for sample surveys as opposed to the default of frequency weighting } \item{scale}{set to \code{TRUE} to subtract means and divide by standard deviations of columns of the design matrix before fitting, and to back-solve for the un-normalized covariance matrix and regression coefficients. This can sometimes make the model converge for very large sample sizes where for example spline or polynomial component variables create scaling problems leading to loss of precision when accumulating sums of squares and crossproducts.} \item{\dots}{arguments that are passed to \code{lrm.fit}, or from \code{print}, to \code{\link{prModFit}}} \item{digits}{number of significant digits to use} \item{strata.coefs}{set to \code{TRUE} to print the (experimental) strata coefficients} \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{title}{a character string title to be passed to \code{prModFit}} } \value{ The returned fit object of \code{lrm} contains the following components in addition to the ones mentioned under the optional arguments. \item{call}{ calling expression } \item{freq}{ table of frequencies for \code{Y} in order of increasing \code{Y} } \item{stats}{ vector with the following elements: number of observations used in the fit, maximum absolute value of first derivative of log likelihood, model likelihood ratio \eqn{\chi^2}{chi-square}, d.f., \eqn{P}-value, \eqn{c} index (area under ROC curve), Somers' \eqn{D_{xy}}, Goodman-Kruskal \eqn{\gamma}{gamma}, Kendall's \eqn{\tau_a}{tau-a} rank correlations between predicted probabilities and observed response, the Nagelkerke \eqn{R^2} index, the Brier score computed with respect to \eqn{Y >} its lowest level, the \eqn{g}-index, \eqn{gr} (the \eqn{g}-index on the odds ratio scale), and \eqn{gp} (the \eqn{g}-index on the probability scale using the same cutoff used for the Brier score). Probabilities are rounded to the nearest 0.0002 in the computations or rank correlation indexes. In the case of penalized estimation, the \code{"Model L.R."} is computed without the penalty factor, and \code{"d.f."} is the effective d.f. from Gray's (1992) Equation 2.9. The \eqn{P}-value uses this corrected model L.R. \eqn{\chi^2}{chi-square} and corrected d.f. The score chi-square statistic uses first derivatives which contain penalty components. } \item{fail}{ set to \code{TRUE} if convergence failed (and \code{maxiter>1}) } \item{coefficients}{estimated parameters} \item{var}{ estimated variance-covariance matrix (inverse of information matrix). If \code{penalty>0}, \code{var} is either the inverse of the penalized information matrix (the default, if \code{var.penalty="simple"}) or the sandwich-type variance - covariance matrix estimate (Gray Eq. 2.6) if \code{var.penalty="sandwich"}. For the latter case the simple information-matrix - based variance matrix is returned under the name \code{var.from.info.matrix}. } \item{effective.df.diagonal}{ is returned if \code{penalty>0}. It is the vector whose sum is the effective d.f. of the model (counting intercept terms). } \item{u}{vector of first derivatives of log-likelihood} \item{deviance}{ -2 log likelihoods (counting penalty components) When an offset variable is present, three deviances are computed: for intercept(s) only, for intercepts+offset, and for intercepts+offset+predictors. When there is no offset variable, the vector contains deviances for the intercept(s)-only model and the model with intercept(s) and predictors. } \item{est}{ vector of column numbers of \code{X} fitted (intercepts are not counted) } \item{non.slopes}{number of intercepts in model} \item{penalty}{see above} \item{penalty.matrix}{the penalty matrix actually used in the estimation} } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Le Cessie S, Van Houwelingen JC: Ridge estimators in logistic regression. Applied Statistics 41:191--201, 1992. Verweij PJM, Van Houwelingen JC: Penalized likelihood in Cox regression. Stat in Med 13:2427--2436, 1994. Gray RJ: Flexible methods for analyzing survival data using splines, with applications to breast cancer prognosis. JASA 87:942--951, 1992. Shao J: Linear model selection by cross-validation. JASA 88:486--494, 1993. Verweij PJM, Van Houwelingen JC: Crossvalidation in survival analysis. Stat in Med 12:2305--2314, 1993. Harrell FE: Model uncertainty, penalization, and parsimony. ISCB Presentation on UVa Web page, 1998. } \seealso{ \code{\link{lrm.fit}}, \code{\link{predict.lrm}}, \code{\link{rms.trans}}, \code{\link{rms}}, \code{\link{glm}}, \code{\link{latex.lrm}}, \code{\link{residuals.lrm}}, \code{\link[Hmisc]{na.delete}}, \code{\link[Hmisc]{na.detail.response}}, \code{\link{pentrace}}, \code{\link{rmsMisc}}, \code{\link{vif}}, \code{\link{cr.setup}}, \code{\link{predab.resample}}, \code{\link{validate.lrm}}, \code{\link{calibrate}}, \code{\link{Mean.lrm}}, \code{\link{gIndex}}, \code{\link{prModFit}} } \examples{ #Fit a logistic model containing predictors age, blood.pressure, sex #and cholesterol, with age fitted with a smooth 5-knot restricted cubic #spline function and a different shape of the age relationship for males #and females. As an intermediate step, predict mean cholesterol from #age using a proportional odds ordinal logistic model # n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' #To use prop. odds model, avoid using a huge number of intercepts by #grouping cholesterol into 40-tiles ch <- cut2(cholesterol, g=40, levels.mean=TRUE) # use mean values in intervals table(ch) f <- lrm(ch ~ age) options(prType='latex') print(f, coefs=4) # write latex code to console m <- Mean(f) # see help file for Mean.lrm d <- data.frame(age=seq(0,90,by=10)) m(predict(f, d)) # Repeat using ols f <- ols(cholesterol ~ age) predict(f, d) # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) cholesterol[1:3] <- NA # 3 missings, at random ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) # x=TRUE, y=TRUE allows use of resid(), which.influence below # could define d <- datadist(fit) after lrm(), but data distribution # summary would not be stored with fit, so later uses of Predict # or summary.rms would require access to the original dataset or # d or specifying all variable values to summary, Predict, nomogram anova(fit) p <- Predict(fit, age, sex) ggplot(p) # or plot() ggplot(Predict(fit, age=20:70, sex="male")) # need if datadist not used print(cbind(resid(fit,"dfbetas"), resid(fit,"dffits"))[1:20,]) which.influence(fit, .3) # latex(fit) #print nice statement of fitted model # #Repeat this fit using penalized MLE, penalizing complex terms #(for nonlinear or interaction effects) # fitp <- update(fit, penalty=list(simple=0,nonlinear=10), x=TRUE, y=TRUE) effective.df(fitp) # or lrm(y ~ \dots, penalty=\dots) #Get fits for a variety of penalties and assess predictive accuracy #in a new data set. Program efficiently so that complex design #matrices are only created once. set.seed(201) x1 <- rnorm(500) x2 <- rnorm(500) x3 <- sample(0:1,500,rep=TRUE) L <- x1+abs(x2)+x3 y <- ifelse(runif(500)<=plogis(L), 1, 0) new.data <- data.frame(x1,x2,x3,y)[301:500,] # for(penlty in seq(0,.15,by=.005)) { if(penlty==0) { f <- lrm(y ~ rcs(x1,4)+rcs(x2,6)*x3, subset=1:300, x=TRUE, y=TRUE) # True model is linear in x1 and has no interaction X <- f$x # saves time for future runs - don't have to use rcs etc. Y <- f$y # this also deletes rows with NAs (if there were any) penalty.matrix <- diag(diag(var(X))) Xnew <- predict(f, new.data, type="x") # expand design matrix for new data Ynew <- new.data$y } else f <- lrm.fit(X,Y, penalty.matrix=penlty*penalty.matrix) # cat("\nPenalty :",penlty,"\n") pred.logit <- f$coef[1] + (Xnew \%*\% f$coef[-1]) pred <- plogis(pred.logit) C.index <- somers2(pred, Ynew)["C"] Brier <- mean((pred-Ynew)^2) Deviance<- -2*sum( Ynew*log(pred) + (1-Ynew)*log(1-pred) ) cat("ROC area:",format(C.index)," Brier score:",format(Brier), " -2 Log L:",format(Deviance),"\n") } #penalty=0.045 gave lowest -2 Log L, Brier, ROC in test sample for S+ # #Use bootstrap validation to estimate predictive accuracy of #logistic models with various penalties #To see how noisy cross-validation estimates can be, change the #validate(f, \dots) to validate(f, method="cross", B=10) for example. #You will see tremendous variation in accuracy with minute changes in #the penalty. This comes from the error inherent in using 10-fold #cross validation but also because we are not fixing the splits. #20-fold cross validation was even worse for some #indexes because of the small test sample size. Stability would be #obtained by using the same sample splits for all penalty values #(see above), but then we wouldn't be sure that the choice of the #best penalty is not specific to how the sample was split. This #problem is addressed in the last example. # penalties <- seq(0,.7,length=3) # really use by=.02 index <- matrix(NA, nrow=length(penalties), ncol=11, dimnames=list(format(penalties), c("Dxy","R2","Intercept","Slope","Emax","D","U","Q","B","g","gp"))) i <- 0 for(penlty in penalties) { cat(penlty, "") i <- i+1 if(penlty==0) { f <- lrm(y ~ rcs(x1,4)+rcs(x2,6)*x3, x=TRUE, y=TRUE) # fit whole sample X <- f$x Y <- f$y penalty.matrix <- diag(diag(var(X))) # save time - only do once } else f <- lrm(Y ~ X, penalty=penlty, penalty.matrix=penalty.matrix, x=TRUE,y=TRUE) val <- validate(f, method="boot", B=20) # use larger B in practice index[i,] <- val[,"index.corrected"] } par(mfrow=c(3,3)) for(i in 1:9) { plot(penalties, index[,i], xlab="Penalty", ylab=dimnames(index)[[2]][i]) lines(lowess(penalties, index[,i])) } options(datadist=NULL) # Example of weighted analysis x <- 1:5 y <- c(0,1,0,1,0) reps <- c(1,2,3,2,1) lrm(y ~ x, weights=reps) x <- rep(x, reps) y <- rep(y, reps) lrm(y ~ x) # same as above # #Study performance of a modified AIC which uses the effective d.f. #See Verweij and Van Houwelingen (1994) Eq. (6). Here AIC=chisq-2*df. #Also try as effective d.f. equation (4) of the previous reference. #Also study performance of Shao's cross-validation technique (which was #designed to pick the "right" set of variables, and uses a much smaller #training sample than most methods). Compare cross-validated deviance #vs. penalty to the gold standard accuracy on a 7500 observation dataset. #Note that if you only want to get AIC or Schwarz Bayesian information #criterion, all you need is to invoke the pentrace function. #NOTE: the effective.df( ) function is used in practice # \dontrun{ for(seed in c(339,777,22,111,3)){ # study performance for several datasets set.seed(seed) n <- 175; p <- 8 X <- matrix(rnorm(n*p), ncol=p) # p normal(0,1) predictors Coef <- c(-.1,.2,-.3,.4,-.5,.6,-.65,.7) # true population coefficients L <- X \%*\% Coef # intercept is zero Y <- ifelse(runif(n)<=plogis(L), 1, 0) pm <- diag(diag(var(X))) #Generate a large validation sample to use as a gold standard n.val <- 7500 X.val <- matrix(rnorm(n.val*p), ncol=p) L.val <- X.val \%*\% Coef Y.val <- ifelse(runif(n.val)<=plogis(L.val), 1, 0) # Penalty <- seq(0,30,by=1) reps <- length(Penalty) effective.df <- effective.df2 <- aic <- aic2 <- deviance.val <- Lpenalty <- single(reps) n.t <- round(n^.75) ncv <- c(10,20,30,40) # try various no. of reps in cross-val. deviance <- matrix(NA,nrow=reps,ncol=length(ncv)) #If model were complex, could have started things off by getting X, Y #penalty.matrix from an initial lrm fit to save time # for(i in 1:reps) { pen <- Penalty[i] cat(format(pen),"") f.full <- lrm.fit(X, Y, penalty.matrix=pen*pm) Lpenalty[i] <- pen* t(f.full$coef[-1]) \%*\% pm \%*\% f.full$coef[-1] f.full.nopenalty <- lrm.fit(X, Y, initial=f.full$coef, maxit=1) info.matrix.unpenalized <- solve(f.full.nopenalty$var) effective.df[i] <- sum(diag(info.matrix.unpenalized \%*\% f.full$var)) - 1 lrchisq <- f.full.nopenalty$stats["Model L.R."] # lrm does all this penalty adjustment automatically (for var, d.f., # chi-square) aic[i] <- lrchisq - 2*effective.df[i] # pred <- plogis(f.full$linear.predictors) score.matrix <- cbind(1,X) * (Y - pred) sum.u.uprime <- t(score.matrix) \%*\% score.matrix effective.df2[i] <- sum(diag(f.full$var \%*\% sum.u.uprime)) aic2[i] <- lrchisq - 2*effective.df2[i] # #Shao suggested averaging 2*n cross-validations, but let's do only 40 #and stop along the way to see if fewer is OK dev <- 0 for(j in 1:max(ncv)) { s <- sample(1:n, n.t) cof <- lrm.fit(X[s,],Y[s], penalty.matrix=pen*pm)$coef pred <- cof[1] + (X[-s,] \%*\% cof[-1]) dev <- dev -2*sum(Y[-s]*pred + log(1-plogis(pred))) for(k in 1:length(ncv)) if(j==ncv[k]) deviance[i,k] <- dev/j } # pred.val <- f.full$coef[1] + (X.val \%*\% f.full$coef[-1]) prob.val <- plogis(pred.val) deviance.val[i] <- -2*sum(Y.val*pred.val + log(1-prob.val)) } postscript(hor=TRUE) # along with graphics.off() below, allow plots par(mfrow=c(2,4)) # to be printed as they are finished plot(Penalty, effective.df, type="l") lines(Penalty, effective.df2, lty=2) plot(Penalty, Lpenalty, type="l") title("Penalty on -2 log L") plot(Penalty, aic, type="l") lines(Penalty, aic2, lty=2) for(k in 1:length(ncv)) { plot(Penalty, deviance[,k], ylab="deviance") title(paste(ncv[k],"reps")) lines(supsmu(Penalty, deviance[,k])) } plot(Penalty, deviance.val, type="l") title("Gold Standard (n=7500)") title(sub=format(seed),adj=1,cex=.5) graphics.off() } } #The results showed that to obtain a clear picture of the penalty- #accuracy relationship one needs 30 or 40 reps in the cross-validation. #For 4 of 5 samples, though, the super smoother was able to detect #an accurate penalty giving the best (lowest) deviance using 10-fold #cross-validation. Cross-validation would have worked better had #the same splits been used for all penalties. #The AIC methods worked just as well and are much quicker to compute. #The first AIC based on the effective d.f. in Gray's Eq. 2.9 #(Verweij and Van Houwelingen (1994) Eq. 5 (note typo)) worked best. } \keyword{category} \keyword{models} \concept{logistic regression model} \concept{ordinal logistic model} \concept{proportional odds model} \concept{continuation ratio model} \concept{ordinal response} rms/man/lrm.fit.bare.Rd0000644000176200001440000000215213751064426014410 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lrm.fit.bare.r \name{lrm.fit.bare} \alias{lrm.fit.bare} \title{lrm.fit.bare} \usage{ lrm.fit.bare(x, y, maxit = 12, eps = 0.025, tol = 1e-07) } \arguments{ \item{x}{a vector of matrix of covariate values} \item{y}{a numeric or factor vector representing the dependent variable} \item{maxit}{maximum number of iteractions} \item{eps}{stopping criterion (change in -2 log likelihood)} \item{tol}{matrix inversion tolerance for singularities} } \value{ a list with elements \code{coefficients}, \code{var}, \code{fail}, \code{freq}, \code{deviance} } \description{ Bare Bones Logistic Regression Fit } \details{ This is a stripped down version of the \code{lrm.fit()} function that computes only the regression coefficients, variance-covariance-matrix, and log likelihood (for null and fitted model) and does not compute any model fit indexes etc. This is for speed in simulations or with bootstrapping. Missing data are not allowed. The function handles binary and ordinal logistic regression (proportional odds model). } \author{ Frank Harrell } rms/man/matinv.Rd0000644000176200001440000000323611651566431013427 0ustar liggesusers\name{matinv} \alias{matinv} \title{ Total and Partial Matrix Inversion using Gauss-Jordan Sweep Operator } \description{ This function inverts or partially inverts a matrix using pivoting (the sweep operator). It is useful for sequential model-building. } \usage{ matinv(a, which, negate=TRUE, eps=1e-12) } \arguments{ \item{a}{ square matrix to invert or partially invert. May have been inverted or partially inverted previously by matinv, in which case its "swept" attribute is updated. Will un-invert if already inverted. } \item{which}{ vector of column/row numbers in a to invert. Default is all, for total inverse. } \item{negate}{ So that the algorithm can keep track of which pivots have been swept as well as roundoff errors, it actually returns the negative of the inverse or partial inverse. By default, these elements are negated to give the usual expected result. Set negate=FALSE if you will be passing the result right back into matinv, otherwise, negate the submatrix before sending back to matinv. } \item{eps}{ singularity criterion }} \value{ a square matrix, with attributes "rank" and "swept". } \references{ Clarke MRB (1982). Algorithm AS 178: The Gauss-Jordan sweep operator with detection of collinearity. Appl Statist 31:166--9. Ridout MS, Cobb JM (1986). Algorithm AS R78 : A remark on algorithm AS 178: The Gauss-Jordan sweep operator with detection of collinearity. Appl Statist 38:420--2. } \seealso{\code{\link{lrm}}, \code{\link{solve}}} \examples{ a <- diag(1:3) a.inv1 <- matinv(a, 1, negate=FALSE) #Invert with respect to a[1,1] a.inv1 a.inv <- -matinv(a.inv1, 2:3, negate=FALSE) #Finish the job a.inv solve(a) } \keyword{array} rms/man/plot.Predict.Rd0000644000176200001440000004775413714237251014512 0ustar liggesusers\name{plot.Predict} \alias{plot.Predict} \alias{pantext} \title{Plot Effects of Variables Estimated by a Regression Model Fit} \description{ Uses \code{lattice} graphics to plot the effect of one or two predictors on the linear predictor or X beta scale, or on some transformation of that scale. The first argument specifies the result of the \code{Predict} function. The predictor is always plotted in its original coding. \code{plot.Predict} uses the \code{xYplot} function unless \code{formula} is omitted and the x-axis variable is a factor, in which case it reverses the x- and y-axes and uses the \code{Dotplot} function. If \code{data} is given, a rug plot is drawn showing the location/density of data values for the \eqn{x}-axis variable. If there is a \code{groups} (superposition) variable that generated separate curves, the data density specific to each class of points is shown. This assumes that the second variable was a factor variable. The rug plots are drawn by \code{scat1d}. When the same predictor is used on all \eqn{x}-axes, and multiple panels are drawn, you can use \code{subdata} to specify an expression to subset according to other criteria in addition. To plot effects instead of estimates (e.g., treatment differences as a function of interacting factors) see \code{contrast.rms} and \code{summary.rms}. \code{pantext} creates a \code{lattice} panel function for including text such as that produced by \code{print.anova.rms} inside a panel or in a base graphic. } \usage{ \method{plot}{Predict}(x, formula, groups=NULL, cond=NULL, varypred=FALSE, subset, xlim, ylim, xlab, ylab, data=NULL, subdata, anova=NULL, pval=FALSE, cex.anova=.85, col.fill=gray(seq(.825, .55, length=5)), adj.subtitle, cex.adj, cex.axis, perim=NULL, digits=4, nlevels=3, nlines=FALSE, addpanel, scat1d.opts=list(frac=0.025, lwd=0.3), type=NULL, yscale=NULL, scaletrans=function(z) z, ...) pantext(object, x, y, cex=.5, adj=0, fontfamily="Courier", lattice=TRUE) } \arguments{ \item{x}{a data frame created by \code{Predict}, or for \code{pantext} the x-coordinate for text} \item{formula}{ the right hand side of a \code{lattice} formula reference variables in data frame \code{x}. You may not specify \code{formula} if you varied multiple predictors separately when calling \code{Predict}. Otherwise, when \code{formula} is not given, \code{plot.Predict} constructs one from information in \code{x}. } \item{groups}{an optional name of one of the variables in \code{x} that is to be used as a grouping (superpositioning) variable. Note that \code{groups} does not contain the groups data as is customary in \code{lattice}; it is only a single character string specifying the name of the grouping variable.} \item{cond}{when plotting effects of different predictors, \code{cond} is a character string that specifies a single variable name in \code{x} that can be used to form panels. Only applies if using \code{rbind} to combine several \code{Predict} results.} \item{varypred}{set to \code{TRUE} if \code{x} is the result of passing multiple \code{Predict} results, that represent different predictors, to \code{rbind.Predict}. This will cause the \code{.set.} variable created by \code{rbind} to be copied to the \code{.predictor.} variable.} \item{subset}{a subsetting expression for restricting the rows of \code{x} that are used in plotting. For example, predictions may have been requested for males and females but one wants to plot only females.} \item{xlim}{ This parameter is seldom used, as limits are usually controlled with \code{Predict}. One reason to use \code{xlim} is to plot a \code{factor} variable on the x-axis that was created with the \code{cut2} function with the \code{levels.mean} option, with \code{val.lev=TRUE} specified to \code{plot.Predict}. In this case you may want the axis to have the range of the original variable values given to \code{cut2} rather than the range of the means within quantile groups. } \item{ylim}{ Range for plotting on response variable axis. Computed by default. } \item{xlab}{ Label for \code{x}-axis. Default is one given to \code{asis, rcs}, etc., which may have been the \code{"label"} attribute of the variable. } \item{ylab}{ Label for \code{y}-axis. If \code{fun} is not given, default is \code{"log Odds"} for \code{lrm}, \code{"log Relative Hazard"} for \code{cph}, name of the response variable for \code{ols}, \code{TRUE} or \code{log(TRUE)} for \code{psm}, or \code{"X * Beta"} otherwise. } \item{data}{a data frame containing the original raw data on which the regression model were based, or at least containing the \eqn{x}-axis and grouping variable. If \code{data} is present and contains the needed variables, the original data are added to the graph in the form of a rug plot using \code{scat1d}. } \item{subdata}{if \code{data} is specified, an expression to be evaluated in the \code{data} environment that evaluates to a logical vector specifying which observations in \code{data} to keep. This will be intersected with the criterion for the \code{groups} variable. Example: if conditioning on two paneling variables using \code{|a*b} you can specify \code{subdata=b==levels(b)[which.packet()[2]]}, where the \code{2} comes from the fact that \code{b} was listed second after the vertical bar (this assumes \code{b} is a \code{factor} in \code{data}. Another example: \code{subdata=sex==c('male','female')[current.row()]}.} \item{anova}{an object returned by \code{\link{anova.rms}}. If \code{anova} is specified, the overall test of association for predictor plotted is added as text to each panel, located at the spot at which the panel is most empty unless there is significant empty space at the top or bottom of the panel; these areas are given preference.} \item{pval}{specify \code{pval=TRUE} for \code{anova} to include not only the test statistic but also the P-value} \item{cex.anova}{character size for the test statistic printed on the panel} \item{col.fill}{ a vector of colors used to fill confidence bands for successive superposed groups. Default is inceasingly dark gray scale. } \item{adj.subtitle}{ Set to \code{FALSE} to suppress subtitling the graph with the list of settings of non-graphed adjustment values. } \item{cex.adj}{ \code{cex} parameter for size of adjustment settings in subtitles. Default is 0.75 times \code{par("cex")}. } \item{cex.axis}{ \code{cex} parameter for x-axis tick labels } \item{perim}{ \code{perim} specifies a function having two arguments. The first is the vector of values of the first variable that is about to be plotted on the x-axis. The second argument is the single value of the variable representing different curves, for the current curve being plotted. The function's returned value must be a logical vector whose length is the same as that of the first argument, with values \code{TRUE} if the corresponding point should be plotted for the current curve, \code{FALSE} otherwise. See one of the latter examples. If a predictor is not specified to \code{plot}, \code{NULL} is passed as the second argument to \code{perim}, although it makes little sense to use \code{perim} when the same \code{perim} is used for multiple predictors. } \item{digits}{ Controls how numeric variables used for panel labels are formatted. The default is 4 significant digits. } \item{nlevels}{ when \code{groups} and \code{formula} are not specified, if any panel variable has \code{nlevels} or fewer values, that variable is converted to a \code{groups} (superpositioning) variable. Set \code{nlevels=0} to prevent this behavior. For other situations, a numeric x-axis variable with \code{nlevels} or fewer unique values will cause a dot plot to be drawn instead of an x-y plot. } \item{nlines}{If \code{formula} is given, you can set \code{nlines} to \code{TRUE} to convert the x-axis variable to a factor and then to an integer. Points are plotted at integer values on the x-axis but labeled with category levels. Points are connected by lines.} \item{addpanel}{an additional panel function to call along with panel functions used for \code{xYplot} and \code{Dotplot} displays} \item{scat1d.opts}{a list containing named elements that specifies parameters to \code{\link{scat1d}} when \code{data} is given. The \code{col} parameter is usually derived from other plotting information and not specified by the user.} \item{type}{a value (\code{"l","p","b"}) to override default choices related to showing or connecting points. Especially useful for discrete x coordinate variables.} \item{yscale}{a \code{lattice} scale \code{list} for the \code{y}-axis to be added to what is automatically generated for the \code{x}-axis. Example: \code{yscale=list(at=c(.005,.01,.05),labels=format(c(.005,.01,.05)))}. See \link[lattice]{xyplot}} \item{scaletrans}{a function that operates on the \code{scale} object created by \code{plot.Predict} to produce a modified \code{scale} object that is passed to the lattice graphics function. This is useful for adding other \code{scales} options or for changing the \code{x}-axis limits for one predictor.} \item{\dots}{ extra arguments to pass to \code{xYplot} or \code{Dotplot}. Some useful ones are \code{label.curves} and \code{abline}. Set \code{label.curves} to \code{FALSE} to suppress labeling of separate curves. Default is \code{TRUE}, which causes \code{labcurve} to be invoked to place labels at positions where the curves are most separated, labeling each curve with the full curve label. Set \code{label.curves} to a \code{list} to specify options to \code{labcurve}, e.g., \code{label.curves=} \code{list(method="arrow", cex=.8)}. These option names may be abbreviated in the usual way arguments are abbreviated. Use for example \code{label.curves=list(keys=letters[1:5])} to draw single lower case letters on 5 curves where they are most separated, and automatically position a legend in the most empty part of the plot. The \code{col}, \code{lty}, and \code{lwd} parameters are passed automatically to \code{labcurve} although they may be overridden here. It is also useful to use \dots to pass \code{lattice} graphics parameters, e.g. \code{par.settings=list(axis.text=list(cex=1.2), par.ylab.text=list(col='blue',cex=.9),par.xlab.text=list(cex=1))}. } \item{object}{an object having a \code{print} method} \item{y}{y-coordinate for placing text in a \code{lattice} panel or on a base graphics plot} \item{cex}{character expansion size for \code{pantext}} \item{adj}{text justification. Default is left justified.} \item{fontfamily}{ font family for \code{pantext}. Default is \code{"Courier"} which will line up columns of a table. } \item{lattice}{set to \code{FALSE} to use \code{text} instead of \code{ltext} in the function generated by \code{pantext}, to use base graphics} } \value{ a \code{lattice} object ready to \code{print} for rendering. } \details{ When a \code{groups} (superpositioning) variable was used, you can issue the command \code{Key(\dots)} after printing the result of \code{plot.Predict}, to draw a key for the groups. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Fox J, Hong J (2009): Effect displays in R for multinomial and proportional-odds logit models: Extensions to the effects package. J Stat Software 32 No. 1. } \note{If plotting the effects of all predictors you can reorder the panels using for example \code{p <- Predict(fit); p$.predictor. <- factor(p$.predictor., v)} where \code{v} is a vector of predictor names specified in the desired order. } \seealso{ \code{\link{Predict}}, \code{\link{ggplot.Predict}}, \code{link{plotp.Predict}}, \code{\link{rbind.Predict}}, \code{\link{datadist}}, \code{\link{predictrms}}, \code{\link{anova.rms}}, \code{\link{contrast.rms}}, \code{\link{summary.rms}}, \code{\link{rms}}, \code{\link{rmsMisc}}, \code{\link[Hmisc]{labcurve}}, \code{\link[Hmisc]{scat1d}}, \code{\link[Hmisc]{xYplot}}, \code{\link[Hmisc]{Overview}} } \examples{ n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) an <- anova(fit) # Plot effects of all 4 predictors with test statistics from anova, and P plot(Predict(fit), anova=an, pval=TRUE) plot(Predict(fit), data=llist(blood.pressure,age)) # rug plot for two of the predictors p <- Predict(fit, name=c('age','cholesterol')) # Make 2 plots plot(p) p <- Predict(fit, age=seq(20,80,length=100), sex, conf.int=FALSE) # Plot relationship between age and log # odds, separate curve for each sex, plot(p, subset=sex=='female' | age > 30) # No confidence interval, suppress estimates for males <= 30 p <- Predict(fit, age, sex) plot(p, label.curves=FALSE, data=llist(age,sex)) # use label.curves=list(keys=c('a','b'))' # to use 1-letter abbreviations # data= allows rug plots (1-dimensional scatterplots) # on each sex's curve, with sex- # specific density of age # If data were in data frame could have used that p <- Predict(fit, age=seq(20,80,length=100), sex='male', fun=plogis) # works if datadist not used plot(p, ylab=expression(hat(P))) # plot predicted probability in place of log odds per <- function(x, y) x >= 30 plot(p, perim=per) # suppress output for age < 30 but leave scale alone # Take charge of the plot setup by specifying a lattice formula p <- Predict(fit, age, blood.pressure=c(120,140,160), cholesterol=c(180,200,215), sex) plot(p, ~ age | blood.pressure*cholesterol, subset=sex=='male') # plot(p, ~ age | cholesterol*blood.pressure, subset=sex=='female') # plot(p, ~ blood.pressure|cholesterol*round(age,-1), subset=sex=='male') plot(p) # Plot the age effect as an odds ratio # comparing the age shown on the x-axis to age=30 years ddist$limits$age[2] <- 30 # make 30 the reference value for age # Could also do: ddist$limits["Adjust to","age"] <- 30 fit <- update(fit) # make new reference value take effect p <- Predict(fit, age, ref.zero=TRUE, fun=exp) plot(p, ylab='Age=x:Age=30 Odds Ratio', abline=list(list(h=1, lty=2, col=2), list(v=30, lty=2, col=2))) # Compute predictions for three predictors, with superpositioning or # conditioning on sex, combined into one graph p1 <- Predict(fit, age, sex) p2 <- Predict(fit, cholesterol, sex) p3 <- Predict(fit, blood.pressure, sex) p <- rbind(age=p1, cholesterol=p2, blood.pressure=p3) plot(p, groups='sex', varypred=TRUE, adj.subtitle=FALSE) plot(p, cond='sex', varypred=TRUE, adj.subtitle=FALSE) \dontrun{ # For males at the median blood pressure and cholesterol, plot 3 types # of confidence intervals for the probability on one plot, for varying age ages <- seq(20, 80, length=100) p1 <- Predict(fit, age=ages, sex='male', fun=plogis) # standard pointwise p2 <- Predict(fit, age=ages, sex='male', fun=plogis, conf.type='simultaneous') # simultaneous p3 <- Predict(fit, age=c(60,65,70), sex='male', fun=plogis, conf.type='simultaneous') # simultaneous 3 pts # The previous only adjusts for a multiplicity of 3 points instead of 100 f <- update(fit, x=TRUE, y=TRUE) g <- bootcov(f, B=500, coef.reps=TRUE) p4 <- Predict(g, age=ages, sex='male', fun=plogis) # bootstrap percentile p <- rbind(Pointwise=p1, 'Simultaneous 100 ages'=p2, 'Simultaneous 3 ages'=p3, 'Bootstrap nonparametric'=p4) xYplot(Cbind(yhat, lower, upper) ~ age, groups=.set., data=p, type='l', method='bands', label.curve=list(keys='lines')) } # Plots for a parametric survival model n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) t <- -log(runif(n))/h label(t) <- 'Follow-up Time' e <- ifelse(t<=cens,1,0) t <- pmin(t, cens) units(t) <- "Year" ddist <- datadist(age, sex) Srv <- Surv(t,e) # Fit log-normal survival model and plot median survival time vs. age f <- psm(Srv ~ rcs(age), dist='lognormal') med <- Quantile(f) # Creates function to compute quantiles # (median by default) p <- Predict(f, age, fun=function(x) med(lp=x)) plot(p, ylab="Median Survival Time") # Note: confidence intervals from this method are approximate since # they don't take into account estimation of scale parameter # Fit an ols model to log(y) and plot the relationship between x1 # and the predicted mean(y) on the original scale without assuming # normality of residuals; use the smearing estimator # See help file for rbind.Predict for a method of showing two # types of confidence intervals simultaneously. set.seed(1) x1 <- runif(300) x2 <- runif(300) ddist <- datadist(x1,x2) y <- exp(x1+x2-1+rnorm(300)) f <- ols(log(y) ~ pol(x1,2)+x2) r <- resid(f) smean <- function(yhat)smearingEst(yhat, exp, res, statistic='mean') formals(smean) <- list(yhat=numeric(0), res=r[!is.na(r)]) #smean$res <- r[!is.na(r)] # define default res argument to function plot(Predict(f, x1, fun=smean), ylab='Predicted Mean on y-scale') # Make an 'interaction plot', forcing the x-axis variable to be # plotted at integer values but labeled with category levels n <- 100 set.seed(1) gender <- c(rep('male', n), rep('female',n)) m <- sample(c('a','b'), 2*n, TRUE) d <- datadist(gender, m); options(datadist='d') anxiety <- runif(2*n) + .2*(gender=='female') + .4*(gender=='female' & m=='b') tapply(anxiety, llist(gender,m), mean) f <- ols(anxiety ~ gender*m) p <- Predict(f, gender, m) plot(p) # horizontal dot chart; usually preferred for categorical predictors Key(.5, .5) plot(p, ~gender, groups='m', nlines=TRUE) plot(p, ~m, groups='gender', nlines=TRUE) plot(p, ~gender|m, nlines=TRUE) options(datadist=NULL) \dontrun{ # Example in which separate curves are shown for 4 income values # For each curve the estimated percentage of voters voting for # the democratic party is plotted against the percent of voters # who graduated from college. Data are county-level percents. incomes <- seq(22900, 32800, length=4) # equally spaced to outer quintiles p <- Predict(f, college, income=incomes, conf.int=FALSE) plot(p, xlim=c(0,35), ylim=c(30,55)) # Erase end portions of each curve where there are fewer than 10 counties having # percent of college graduates to the left of the x-coordinate being plotted, # for the subset of counties having median family income with 1650 # of the target income for the curve show.pts <- function(college.pts, income.pt) { s <- abs(income - income.pt) < 1650 #assumes income known to top frame x <- college[s] x <- sort(x[!is.na(x)]) n <- length(x) low <- x[10]; high <- x[n-9] college.pts >= low & college.pts <= high } plot(p, xlim=c(0,35), ylim=c(30,55), perim=show.pts) # Rename variables for better plotting of a long list of predictors f <- ... p <- Predict(f) re <- c(trt='treatment', diabet='diabetes', sbp='systolic blood pressure') for(n in names(re)) { names(p)[names(p)==n] <- re[n] p$.predictor.[p$.predictor.==n] <- re[n] } plot(p) } } \keyword{models} \keyword{hplot} \keyword{htest} rms/man/groupkm.Rd0000644000176200001440000000630012257362701013605 0ustar liggesusers\name{groupkm} \alias{groupkm} \title{Kaplan-Meier Estimates vs. a Continuous Variable} \description{ Function to divide \code{x} (e.g. age, or predicted survival at time \code{u} created by \code{survest}) into \code{g} quantile groups, get Kaplan-Meier estimates at time \code{u} (a scaler), and to return a matrix with columns \code{x}=mean \code{x} in quantile, \code{n}=number of subjects, \code{events}=no. events, and \code{KM}=K-M survival at time \code{u}, \code{std.err} = s.e. of -log K-M. Confidence intervals are based on -log S(t). Instead of supplying \code{g}, the user can supply the minimum number of subjects to have in the quantile group (\code{m}, default=50). If \code{cuts} is given (e.g. \code{cuts=c(0,.1,.2,\dots,.9,.1)}), it overrides \code{m} and \code{g}. Calls Therneau's \code{survfitKM} in the \code{survival} package to get Kaplan-Meiers estimates and standard errors. } \usage{ groupkm(x, Srv, m=50, g, cuts, u, pl=FALSE, loglog=FALSE, conf.int=.95, xlab, ylab, lty=1, add=FALSE, cex.subtitle=.7, \dots) } \arguments{ \item{x}{variable to stratify} \item{Srv}{ a \code{Surv} object - n x 2 matrix containing survival time and event/censoring 1/0 indicator. Units of measurement come from the "units" attribute of the survival time variable. "Day" is the default. } \item{m}{desired minimum number of observations in a group} \item{g}{number of quantile groups} \item{cuts}{actual cuts in \code{x}, e.g. \code{c(0,1,2)} to use [0,1), [1,2]. } \item{u}{time for which to estimate survival} \item{pl}{TRUE to plot results} \item{loglog}{ set to \code{TRUE} to plot \code{log(-log(survival))} instead of survival } \item{conf.int}{ defaults to \code{.95} for 0.95 confidence bars. Set to \code{FALSE} to suppress bars. } \item{xlab}{ if \code{pl=TRUE}, is x-axis label. Default is \code{label(x)} or name of calling argument } \item{ylab}{ if \code{pl=TRUE}, is y-axis label. Default is constructed from \code{u} and time \code{units} attribute. } \item{lty}{ line time for primary line connecting estimates } \item{add}{ set to \code{TRUE} if adding to an existing plot } \item{cex.subtitle}{ character size for subtitle. Default is \code{.7}. Use \code{FALSE} to suppress subtitle. } \item{...}{plotting parameters to pass to the plot and errbar functions} } \value{ matrix with columns named \code{x} (mean predictor value in interval), \code{n} (sample size in interval), \code{events} (number of events in interval), \code{KM} (Kaplan-Meier estimate), \code{std.err} (standard error of -log \code{KM}) } \seealso{ \code{\link[survival]{survfit}}, \code{\link[Hmisc]{errbar}}, \code{\link[Hmisc]{cut2}}, \code{\link[survival]{Surv}}, \code{\link[Hmisc]{units}} } \examples{ n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)) d.time <- -log(runif(n))/h label(d.time) <- 'Follow-up Time' e <- ifelse(d.time <= cens,1,0) d.time <- pmin(d.time, cens) units(d.time) <- "Year" groupkm(age, Surv(d.time, e), g=10, u=5, pl=TRUE) #Plot 5-year K-M survival estimates and 0.95 confidence bars by #decile of age. If omit g=10, will have >= 50 obs./group. } \keyword{survival} \keyword{nonparametric} \concept{grouping} \concept{stratification} \concept{aggregation} rms/man/ols.Rd0000644000176200001440000002030013714237251012712 0ustar liggesusers\name{ols} \alias{ols} \title{Linear Model Estimation Using Ordinary Least Squares} \description{ Fits the usual weighted or unweighted linear regression model using the same fitting routines used by \code{lm}, but also storing the variance-covariance matrix \code{var} and using traditional dummy-variable coding for categorical factors. Also fits unweighted models using penalized least squares, with the same penalization options as in the \code{lrm} function. For penalized estimation, there is a fitter function call \code{lm.pfit}. } \usage{ ols(formula, data=environment(formula), weights, subset, na.action=na.delete, method="qr", model=FALSE, x=FALSE, y=FALSE, se.fit=FALSE, linear.predictors=TRUE, penalty=0, penalty.matrix, tol=1e-7, sigma, var.penalty=c('simple','sandwich'), \dots) } \arguments{ \item{formula}{ an S formula object, e.g. \cr Y ~ rcs(x1,5)*lsp(x2,c(10,20)) } \item{data}{ name of an S data frame containing all needed variables. Omit this to use a data frame already in the S ``search list''. } \item{weights}{an optional vector of weights to be used in the fitting process. If specified, weighted least squares is used with weights \code{weights} (that is, minimizing \eqn{sum(w*e^2)}); otherwise ordinary least squares is used.} \item{subset}{ an expression defining a subset of the observations to use in the fit. The default is to use all observations. Specify for example \code{age>50 & sex="male"} or \code{c(1:100,200:300)} respectively to use the observations satisfying a logical expression or those having row numbers in the given vector. } \item{na.action}{ specifies an S function to handle missing data. The default is the function \code{na.delete}, which causes observations with any variable missing to be deleted. The main difference between \code{na.delete} and the S-supplied function \code{na.omit} is that \code{na.delete} makes a list of the number of observations that are missing on each variable in the model. The \code{na.action} is usally specified by e.g. \code{options(na.action="na.delete")}. } \item{method}{ specifies a particular fitting method, or \code{"model.frame"} instead to return the model frame of the predictor and response variables satisfying any subset or missing value checks. } \item{model}{ default is \code{FALSE}. Set to \code{TRUE} to return the model frame as element \code{model} of the fit object. } \item{x}{ default is \code{FALSE}. Set to \code{TRUE} to return the expanded design matrix as element \code{x} (without intercept indicators) of the returned fit object. Set both \code{x=TRUE} if you are going to use the \code{residuals} function later to return anything other than ordinary residuals. } \item{y}{ default is \code{FALSE}. Set to \code{TRUE} to return the vector of response values as element \code{y} of the fit. } \item{se.fit}{ default is \code{FALSE}. Set to \code{TRUE} to compute the estimated standard errors of the estimate of \eqn{X\beta}{X beta} and store them in element \code{se.fit} of the fit. } \item{linear.predictors}{ set to \code{FALSE} to cause predicted values not to be stored } \item{penalty}{ } \item{penalty.matrix}{ see \code{lrm} } \item{tol}{tolerance for information matrix singularity} \item{sigma}{ If \code{sigma} is given, it is taken as the actual root mean squared error parameter for the model. Otherwise \code{sigma} is estimated from the data using the usual formulas (except for penalized models). It is often convenient to specify \code{sigma=1} for models with no error, when using \code{fastbw} to find an approximate model that predicts predicted values from the full model with a given accuracy. } \item{var.penalty}{ the type of variance-covariance matrix to be stored in the \code{var} component of the fit when penalization is used. The default is the inverse of the penalized information matrix. Specify \code{var.penalty="sandwich"} to use the sandwich estimator (see below under \code{var}), which limited simulation studies have shown yields variances estimates that are too low. } \item{\dots}{arguments to pass to \code{\link{lm.wfit}} or \code{\link{lm.fit}}} } \value{ the same objects returned from \code{lm} (unless \code{penalty} or \code{penalty.matrix} are given - then an abbreviated list is returned since \code{lm.pfit} is used as a fitter) plus the design attributes (see \code{rms}). Predicted values are always returned, in the element \code{linear.predictors}. The vectors or matrix stored if \code{y=TRUE} or \code{x=TRUE} have rows deleted according to \code{subset} and to missing data, and have names or row names that come from the data frame used as input data. If \code{penalty} or \code{penalty.matrix} is given, the \code{var} matrix returned is an improved variance-covariance matrix for the penalized regression coefficient estimates. If \code{var.penalty="sandwich"} (not the default, as limited simulation studies have found it provides variance estimates that are too low) it is defined as \eqn{\sigma^{2} (X'X + P)^{-1} X'X (X'X + P)^{-1}}, where \eqn{P} is \code{penalty factors * penalty.matrix}, with a column and row of zeros added for the intercept. When \code{var.penalty="simple"} (the default), \code{var} is \eqn{\sigma^{2} (X'X + P)^{-1}}. The returned list has a vector \code{stats} with named elements \code{n, Model L.R., d.f., R2, g, Sigma}. \code{Model L.R.} is the model likelihood ratio \eqn{\chi^2}{chi-square} statistic, and \code{R2} is \eqn{R^2}. For penalized estimation, \code{d.f.} is the effective degrees of freedom, which is the sum of the elements of another vector returned, \code{effective.df.diagonal}, minus one for the intercept. \code{g} is the \eqn{g}-index. \code{Sigma} is the penalized maximum likelihood estimate (see below). } \details{ For penalized estimation, the penalty factor on the log likelihood is \eqn{-0.5 \beta' P \beta / \sigma^2}, where \eqn{P} is defined above. The penalized maximum likelihood estimate (penalized least squares or ridge estimate) of \eqn{\beta}{beta} is \eqn{(X'X + P)^{-1} X'Y}. The maximum likelihood estimate of \eqn{\sigma^2} is \eqn{(sse + \beta' P \beta) / n}, where \code{sse} is the sum of squared errors (residuals). The \code{effective.df.diagonal} vector is the diagonal of the matrix \eqn{X'X/(sse/n) \sigma^{2} (X'X + P)^{-1}}. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{anova.rms}}, \code{\link{summary.rms}}, \code{\link{predict.rms}}, \code{\link{fastbw}}, \code{\link{validate}}, \code{\link{calibrate}}, \code{\link{Predict}}, \code{\link{specs.rms}}, \code{\link{cph}}, \code{\link{lrm}}, \code{\link{which.influence}}, \code{\link{lm}}, \code{\link{summary.lm}}, \code{\link{print.ols}}, \code{\link{residuals.ols}}, \code{\link{latex.ols}}, \code{\link[Hmisc]{na.delete}}, \code{\link[Hmisc]{na.detail.response}}, \code{\link{datadist}}, \code{\link{pentrace}}, \code{\link{vif}}, \code{\link[Hmisc]{abs.error.pred}} } \examples{ set.seed(1) x1 <- runif(200) x2 <- sample(0:3, 200, TRUE) distance <- (x1 + x2/3 + rnorm(200))^2 d <- datadist(x1,x2) options(datadist="d") # No d -> no summary, plot without giving all details f <- ols(sqrt(distance) ~ rcs(x1,4) + scored(x2), x=TRUE) # could use d <- datadist(f); options(datadist="d") at this point, # but predictor summaries would not be stored in the fit object for # use with Predict, summary.rms. In that case, the original # dataset or d would need to be accessed later, or all variable values # would have to be specified to summary, plot anova(f) which.influence(f) summary(f) summary.lm(f) # will only work if penalty and penalty.matrix not used # Fit a complex model and approximate it with a simple one x1 <- runif(200) x2 <- runif(200) x3 <- runif(200) x4 <- runif(200) y <- x1 + x2 + rnorm(200) f <- ols(y ~ rcs(x1,4) + x2 + x3 + x4) pred <- fitted(f) # or predict(f) or f$linear.predictors f2 <- ols(pred ~ rcs(x1,4) + x2 + x3 + x4, sigma=1) # sigma=1 prevents numerical problems resulting from R2=1 fastbw(f2, aics=100000) # This will find the best 1-variable model, best 2-variable model, etc. # in predicting the predicted values from the original model options(datadist=NULL) } \keyword{models} \keyword{regression} rms/man/print.Glm.Rd0000644000176200001440000000114013701122720013756 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Glm.r \name{print.Glm} \alias{print.Glm} \title{print.glm} \usage{ \method{print}{Glm}(x, digits = 4, coefs = TRUE, title = "General Linear Model", ...) } \arguments{ \item{x}{`Glm` object} \item{digits}{number of significant digits to print} \item{coefs}{specify `coefs=FALSE` to suppress printing the table of} \item{title}{a character string title to be passed to `prModFit`} \item{...}{ignored} } \description{ Print a `Glm` Object } \details{ Prints a `Glm` object, optionally in LaTeX or html } \author{ Frank Harrell } rms/man/validate.cph.Rd0000644000176200001440000001500013714237251014460 0ustar liggesusers\name{validate.cph} \alias{validate.cph} \alias{validate.psm} \alias{dxy.cens} \title{Validation of a Fitted Cox or Parametric Survival Model's Indexes of Fit} \description{ This is the version of the \code{validate} function specific to models fitted with \code{cph} or \code{psm}. Also included is a small function \code{dxy.cens} that retrieves \eqn{D_{xy}}{Dxy} and its standard error from the \code{survival} package's \code{survConcordance.fit} function. This allows for incredibly fast computation of \eqn{D_{xy}}{Dxy} or the c-index even for hundreds of thousands of observations. \code{dxy.cens} negates \eqn{D_{xy}}{Dxy} if log relative hazard is being predicted. If \code{y} is a left-censored \code{Surv} object, times are negated and a right-censored object is created, then \eqn{D_{xy}}{Dxy} is negated. } \usage{ # fit <- cph(formula=Surv(ftime,event) ~ terms, x=TRUE, y=TRUE, \dots) \method{validate}{cph}(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, dxy=TRUE, u, tol=1e-9, \dots) \method{validate}{psm}(fit, method="boot",B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, dxy=TRUE, tol=1e-12, rel.tolerance=1e-5, maxiter=15, \dots) dxy.cens(x, y, type=c('time','hazard')) } \arguments{ \item{fit}{ a fit derived \code{cph}. The options \code{x=TRUE} and \code{y=TRUE} must have been specified. If the model contains any stratification factors and dxy=TRUE, the options \code{surv=TRUE} and \code{time.inc=u} must also have been given, where \code{u} is the same value of \code{u} given to \code{validate}. } \item{method}{see \code{\link{validate}}} \item{B}{ number of repetitions. For \code{method="crossvalidation"}, is the number of groups of omitted observations. } \item{rel.tolerance,maxiter,bw}{ \code{TRUE} to do fast step-down using the \code{fastbw} function, for both the overall model and for each repetition. \code{fastbw} keeps parameters together that represent the same factor. } \item{rule}{ Applies if \code{bw=TRUE}. \code{"aic"} to use Akaike's information criterion as a stopping rule (i.e., a factor is deleted if the \eqn{\chi^2}{chi-square} falls below twice its degrees of freedom), or \code{"p"} to use \eqn{P}-values. } \item{type}{ \code{"residual"} or \code{"individual"} - stopping rule is for individual factors or for the residual \eqn{\chi^2}{chi-square} for all variables deleted. For \code{dxy.cens}, specify \code{type="hazard"} if \code{x} is on the hazard or cumulative hazard (or their logs) scale, causing negation of the correlation index. } \item{sls}{ significance level for a factor to be kept in a model, or for judging the residual \eqn{\chi^2}{chi-square}. } \item{aics}{ cutoff on AIC when \code{rule="aic"}. } \item{force}{see \code{\link{fastbw}}} \item{estimates}{see \code{\link{print.fastbw}}} \item{pr}{\code{TRUE} to print results of each repetition} \item{tol,\dots}{see \code{\link{validate}} or \code{\link{predab.resample}}} \item{dxy}{ set to \code{TRUE} to validate Somers' \eqn{D_{xy}}{Dxy} using \code{dxy.cens}, which is fast until n > 500,000. Uses the \code{survival} package's \code{survConcordance.fit} service function for \code{survConcordance}. } \item{u}{ must be specified if the model has any stratification factors and \code{dxy=TRUE}. In that case, strata are not included in \eqn{X\beta}{X beta} and the survival curves may cross. Predictions at time \code{t=u} are correlated with observed survival times. Does not apply to \code{validate.psm}. } \item{x}{a numeric vector} \item{y}{a \code{Surv} object that may be uncensored or right-censored} } \details{ Statistics validated include the Nagelkerke \eqn{R^2}, \eqn{D_{xy}}{Dxy}, slope shrinkage, the discrimination index \eqn{D} [(model L.R. \eqn{\chi^2}{chi-square} - 1)/L], the unreliability index \eqn{U} = (difference in -2 log likelihood between uncalibrated \eqn{X\beta}{X beta} and \eqn{X\beta}{X beta} with overall slope calibrated to test sample) / L, and the overall quality index \eqn{Q = D - U}. \eqn{g} is the \eqn{g}-index on the log relative hazard (linear predictor) scale. L is -2 log likelihood with beta=0. The "corrected" slope can be thought of as shrinkage factor that takes into account overfitting. See \code{predab.resample} for the list of resampling methods. } \value{ matrix with rows corresponding to \eqn{D_{xy}}{Dxy}, Slope, \eqn{D}, \eqn{U}, and \eqn{Q}, and columns for the original index, resample estimates, indexes applied to whole or omitted sample using model derived from resample, average optimism, corrected index, and number of successful resamples.\cr The values corresponding to the row \eqn{D_{xy}}{Dxy} are equal to \eqn{2 * (C - 0.5)} where C is the C-index or concordance probability. If the user is correlating the linear predictor (predicted log hazard) with survival time, \eqn{D_{xy}}{Dxy} is automatically negated. } \section{Side Effects}{ prints a summary, and optionally statistics for each re-fit (if \code{pr=TRUE}) } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{validate}}, \code{\link{predab.resample}}, \code{\link{fastbw}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{calibrate}}, \code{\link[Hmisc]{rcorr.cens}}, \code{\link{cph}}, \code{\link[survival]{survival-internal}}, \code{\link{gIndex}}, \code{\link[survival:survConcordance]{survConcordance}} } \examples{ n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, TRUE)) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) dt <- -log(runif(n))/h e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" S <- Surv(dt,e) f <- cph(S ~ age*sex, x=TRUE, y=TRUE) # Validate full model fit validate(f, B=10) # normally B=150 # Validate a model with stratification. Dxy is the only # discrimination measure for such models, by Dxy requires # one to choose a single time at which to predict S(t|X) f <- cph(S ~ rcs(age)*strat(sex), x=TRUE, y=TRUE, surv=TRUE, time.inc=2) validate(f, u=2, B=10) # normally B=150 # Note u=time.inc } \keyword{models} \keyword{regression} \keyword{survival} \concept{model validation} \concept{predictive accuracy} \concept{bootstrap} rms/man/latexrms.Rd0000644000176200001440000001044113714237251013761 0ustar liggesusers\name{latexrms} \alias{latexrms} \alias{latex.bj} \alias{latex.Glm} \alias{latex.Gls} \title{LaTeX Representation of a Fitted Model} \description{ Creates a file containing a LaTeX representation of the fitted model. For model-specific typesetting there is \code{latex.lrm}, \code{latex.cph}, \code{latex.psm} and \code{latex.ols}. \code{latex.cph} has some arguments that are specific to \code{cph} models. \code{latexrms} is the core function which is called internally by \code{latexrms} (which is called by \code{latex.cph}, \code{latex.ols}, etc.). \code{html} and R Markdown-compatible markup (using MathJax) are written if \code{options(prType='html')}. } \usage{ latexrms(object, file='', append=FALSE, which=1:p, varnames, columns=65, prefix=NULL, inline=FALSE, before=if(inline)"" else "& &", after="", intercept, pretrans=TRUE, digits=.Options$digits, size="") } \arguments{ \item{object}{ a fit object created by a fitting function in the \code{rms} series } \item{file}{ name of \code{.tex} file to create, default is to write to console. \code{file} is ignored when \code{options(prType='html'}. } \item{append}{whether or not to append to an existing file} \item{which}{ a vector of subcripts (corresponding to \code{object$Design$name}) specifying a submodel to print. Default is to describe the whole model. \code{which} can also be a vector of character strings specifying the factor names to print. Enough of each string is needed to ensure a unique match. Names for interaction effects are of the form \code{"age * sex"}. For any interaction effect for which you do not request main effects, the main effects will be added to \code{which}. When \code{which} is given, the model structural statement is not included. In this case, intercepts are not included either. } \item{varnames}{ variable names to substitute for non-interactions. Order must correspond to \code{object$Design$name} and interactions must be omitted. Default is \code{object$Design$name[object$Design$assume.code!=9]}. \code{varnames} can contain any LaTeX commands such as subscripts and "\\\\\\\\frac" (all "\\" must be quadrupled.) Any "/" must be preceeded by "\\\\" (2, not 4 backslashes). Elements of \code{varnames} for interactions are ignored; they can be set to any value. } \item{columns}{ maximum number of columns of printing characters to allow before outputting a LaTeX newline command } \item{prefix}{ if given, a LaTeX \\lefteqn command of the form \code{\\lefteqn\{prefix =\} \\\\} will be inserted to print a left-hand-side of the equation. } \item{inline}{ Set to \code{TRUE} to create text for insertion in an in-line equation. This text contains only the expansion of X beta, and is not surrounded by \code{"$"}. } \item{before}{ a character string to place before each line of output. Use the default for a LaTeX \code{eqnarray} environment. For \code{inline=TRUE}, the \code{before} string, if not an empty string, will be placed once before the entire markup. } \item{after}{ a character string to place after the output if \code{inline=TRUE} } \item{intercept}{ a special intercept value to include that is not part of the standard model parameters (e.g., centering constant in Cox model). Only allowed in the \code{latexrms} rendition. } \item{pretrans}{ if any spline or polynomial-expanded variables are themselves transformed, a table of pre-transformations will be formed unless \code{pretrans=FALSE}. } \item{digits}{number of digits of precision to use in formatting coefficients and other numbers} \item{size}{a LaTeX font size to use for the output, without the slash. Default is current size.} } \value{a file name of class \code{"latex"}, or if \code{options(prType='html')}, nothing (output direct to console)} \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link[Hmisc]{latex}}, \code{\link[Hmisc]{rcspline.restate}}, \code{\link{rms}} } \examples{ \dontrun{ f <- lrm(death ~ rcs(age)+sex) w <- latex(f, file='f.tex') w # displays, using e.g. xdvi latex(f) # send LaTeX code to console, as for knitr options(prType='html') latex(f) # emit html and latex for knitr html and html notebooks } } \keyword{models} \keyword{regression} \keyword{character} \keyword{methods} \keyword{interface} rms/man/pphsm.Rd0000644000176200001440000000263413714237251013256 0ustar liggesusers\name{pphsm} \alias{pphsm} \alias{print.pphsm} \alias{vcov.pphsm} \title{Parametric Proportional Hazards form of AFT Models} \description{ Translates an accelerated failure time (AFT) model fitted by \code{psm} to proportional hazards form, if the fitted model was a Weibull or exponential model (extreme value distribution with "log" link). } \usage{ pphsm(fit) \method{print}{pphsm}(x, digits=max(options()$digits - 4, 3), correlation=TRUE, \dots) \method{vcov}{pphsm}(object, \dots) } \arguments{ \item{fit}{fit object created by \code{psm}} \item{x}{result of \code{psm}} \item{digits}{how many significant digits are to be used for the returned value} \item{correlation}{set to \code{FALSE} to suppress printing of correlation matrix of parameter estimates} \item{\dots}{ignored} \item{object}{a pphsm object} } \value{ a new fit object with transformed parameter estimates } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{psm}}, \code{\link{summary.rms}}, \code{\link{print.pphsm}} } \examples{ set.seed(1) S <- Surv(runif(100)) x <- runif(100) dd <- datadist(x); options(datadist='dd') f <- psm(S ~ x, dist="exponential") summary(f) # effects on log(T) scale f.ph <- pphsm(f) \dontrun{summary(f.ph) # effects on hazard ratio scale} options(datadist=NULL) } \keyword{models} \keyword{survival} \keyword{regression} rms/man/lrm.fit.Rd0000644000176200001440000001207413714237251013501 0ustar liggesusers\name{lrm.fit} \alias{lrm.fit} \title{Logistic Model Fitter} \description{ Fits a binary or ordinal logistic model for a given design matrix and response vector with no missing values in either. Ordinary or penalized maximum likelihood estimation is used. } \usage{ lrm.fit(x, y, offset=0, initial, est, maxit=12, eps=.025, tol=1e-7, trace=FALSE, penalty.matrix=NULL, weights=NULL, normwt=FALSE, scale=FALSE) } \arguments{ \item{x}{ design matrix with no column for an intercept } \item{y}{ response vector, numeric, categorical, or character } \item{offset}{optional numeric vector containing an offset on the logit scale} \item{initial}{ vector of initial parameter estimates, beginning with the intercept } \item{est}{ indexes of \code{x} to fit in the model (default is all columns of \code{x}). Specifying \code{est=c(1,2,5)} causes columns 1,2, and 5 to have parameters estimated. The score vector \code{u} and covariance matrix \code{var} can be used to obtain score statistics for other columns } \item{maxit}{ maximum no. iterations (default=\code{12}). Specifying \code{maxit=1} causes logist to compute statistics at initial estimates. } \item{eps}{ difference in \eqn{-2 log} likelihood for declaring convergence. Default is \code{.025}. If the \eqn{-2 log} likelihood gets worse by eps/10 while the maximum absolute first derivative of \eqn{-2 log} likelihood is below 1e-9, convergence is still declared. This handles the case where the initial estimates are MLEs, to prevent endless step-halving. } \item{tol}{ Singularity criterion. Default is 1e-7 } \item{trace}{ set to \code{TRUE} to print -2 log likelihood, step-halving fraction, change in -2 log likelihood, maximum absolute value of first derivative, and vector of first derivatives at each iteration. } \item{penalty.matrix}{ a self-contained ready-to-use penalty matrix - see \code{lrm} } \item{weights}{ a vector (same length as \code{y}) of possibly fractional case weights } \item{normwt}{ set to \code{TRUE} to scale \code{weights} so they sum to the length of \code{y}; useful for sample surveys as opposed to the default of frequency weighting } \item{scale}{set to \code{TRUE} to subtract column means and divide by column standard deviations of \code{x} before fitting, and to back-solve for the un-normalized covariance matrix and regresion coefficients. This can sometimes make the model converge for very large sample sizes where for example spline or polynomial component variables create scaling problems leading to loss of precision when accumulating sums of squares and crossproducts.} } \value{ a list with the following components: \item{call}{ calling expression } \item{freq}{ table of frequencies for \code{y} in order of increasing \code{y} } \item{stats}{ vector with the following elements: number of observations used in the fit, maximum absolute value of first derivative of log likelihood, model likelihood ratio chi-square, d.f., P-value, \eqn{c} index (area under ROC curve), Somers' \eqn{D_{xy}}, Goodman-Kruskal \eqn{\gamma}{gamma}, and Kendall's \eqn{\tau_a}{tau-a} rank correlations between predicted probabilities and observed response, the Nagelkerke \eqn{R^2} index, the Brier probability score with respect to computing the probability that \eqn{y >} the mid level less one, the \eqn{g}-index, \eqn{gr} (the \eqn{g}-index on the odds ratio scale), and \eqn{gp} (the \eqn{g}-index on the probability scale using the same cutoff used for the Brier score). Probabilities are rounded to the nearest 0.002 in the computations or rank correlation indexes. When \code{penalty.matrix} is present, the \eqn{\chi^2}{chi-square}, d.f., and P-value are not corrected for the effective d.f. } \item{fail}{ set to \code{TRUE} if convergence failed (and \code{maxit>1}) } \item{coefficients}{ estimated parameters } \item{var}{ estimated variance-covariance matrix (inverse of information matrix). Note that in the case of penalized estimation, \code{var} is not the improved sandwich-type estimator (which \code{lrm} does compute). } \item{u}{ vector of first derivatives of log-likelihood } \item{deviance}{ -2 log likelihoods. When an offset variable is present, three deviances are computed: for intercept(s) only, for intercepts+offset, and for intercepts+offset+predictors. When there is no offset variable, the vector contains deviances for the intercept(s)-only model and the model with intercept(s) and predictors. } \item{est}{ vector of column numbers of \code{X} fitted (intercepts are not counted) } \item{non.slopes}{ number of intercepts in model } \item{penalty.matrix}{ see above }} \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{lrm}}, \code{\link{glm}}, \code{\link{matinv}}, \code{\link[Hmisc]{solvet}}, \code{\link{cr.setup}}, \code{\link{gIndex}} } \examples{ #Fit an additive logistic model containing numeric predictors age, #blood.pressure, and sex, assumed to be already properly coded and #transformed # # fit <- lrm.fit(cbind(age,blood.pressure,sex), death) } \keyword{models} \keyword{regression} \concept{logistic regression model} rms/man/cr.setup.Rd0000644000176200001440000001063313714237251013670 0ustar liggesusers\name{cr.setup} \alias{cr.setup} \title{Continuation Ratio Ordinal Logistic Setup} \description{ Creates several new variables which help set up a dataset with an ordinal response variable \eqn{y} for use in fitting a forward continuation ratio (CR) model. The CR model can be fitted with binary logistic regression if each input observation is replicated the proper number of times according to the \eqn{y} value, a new binary \eqn{y} is computed that has at most one \eqn{y=1} per subject, and if a \code{cohort} variable is used to define the current qualifying condition for a cohort of subjects, e.g., \eqn{y\geq 2}. \code{cr.setup} creates the needed auxilliary variables. See \code{predab.resample} and \code{validate.lrm} for information about validating CR models (e.g., using the bootstrap to sample with replacement from the original subjects instead of the records used in the fit, validating the model separately for user-specified values of \code{cohort}). } \usage{ cr.setup(y) } \arguments{ \item{y}{ a character, numeric, \code{category}, or \code{factor} vector containing values of the response variable. For \code{category} or \code{factor} variables, the \code{levels} of the variable are assumed to be listed in an ordinal way. }} \value{ a list with components \code{y, cohort, subs, reps}. \code{y} is a new binary variable that is to be used in the binary logistic fit. \code{cohort} is a \code{factor} vector specifying which cohort condition currently applies. \code{subs} is a vector of subscripts that can be used to replicate other variables the same way \code{y} was replicated. \code{reps} specifies how many times each original observation was replicated. \code{y, cohort, subs} are all the same length and are longer than the original \code{y} vector. \code{reps} is the same length as the original \code{y} vector. The \code{subs} vector is suitable for passing to \code{validate.lrm} or \code{calibrate}, which pass this vector under the name \code{cluster} on to \code{predab.resample} so that bootstrapping can be done by sampling with replacement from the original subjects rather than from the individual records created by \code{cr.setup}. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \references{ Berridge DM, Whitehead J: Analysis of failure time data with ordinal categories of response. Stat in Med 10:1703--1710, 1991. } \seealso{ \code{\link{lrm}}, \code{\link{glm}}, \code{\link{predab.resample}} } \examples{ y <- c(NA, 10, 21, 32, 32) cr.setup(y) set.seed(171) y <- sample(0:2, 100, rep=TRUE) sex <- sample(c("f","m"),100,rep=TRUE) sex <- factor(sex) table(sex, y) options(digits=5) tapply(y==0, sex, mean) tapply(y==1, sex, mean) tapply(y==2, sex, mean) cohort <- y>=1 tapply(y[cohort]==1, sex[cohort], mean) u <- cr.setup(y) Y <- u$y cohort <- u$cohort sex <- sex[u$subs] lrm(Y ~ cohort + sex) f <- lrm(Y ~ cohort*sex) # saturated model - has to fit all data cells f #Prob(y=0|female): # plogis(-.50078) #Prob(y=0|male): # plogis(-.50078+.11301) #Prob(y=1|y>=1, female): plogis(-.50078+.31845) #Prob(y=1|y>=1, male): plogis(-.50078+.31845+.11301-.07379) combinations <- expand.grid(cohort=levels(cohort), sex=levels(sex)) combinations p <- predict(f, combinations, type="fitted") p p0 <- p[c(1,3)] p1 <- p[c(2,4)] p1.unconditional <- (1 - p0) *p1 p1.unconditional p2.unconditional <- 1 - p0 - p1.unconditional p2.unconditional \dontrun{ dd <- datadist(inputdata) # do this on non-replicated data options(datadist='dd') pain.severity <- inputdata$pain.severity u <- cr.setup(pain.severity) # inputdata frame has age, sex with pain.severity attach(inputdata[u$subs,]) # replicate age, sex # If age, sex already available, could do age <- age[u$subs] etc., or # age <- rep(age, u$reps), etc. y <- u$y cohort <- u$cohort dd <- datadist(dd, cohort) # add to dd f <- lrm(y ~ cohort + age*sex) # ordinary cont. ratio model g <- lrm(y ~ cohort*sex + age, x=TRUE,y=TRUE) # allow unequal slopes for # sex across cutoffs cal <- calibrate(g, cluster=u$subs, subset=cohort=='all') # subs makes bootstrap sample the correct units, subset causes # Predicted Prob(pain.severity=0) to be checked for calibration } } \keyword{category} \keyword{models} \keyword{regression} \concept{logistic regression model} \concept{continuation ratio model} \concept{ordinal logistic model} \concept{ordinal response} rms/man/zzzrmsOverview.Rd0000644000176200001440000010561313717762337015251 0ustar liggesusers\name{rmsOverview} \alias{rmsOverview} \alias{rms.Overview} \title{Overview of rms Package} \description{ rms is the package that goes along with the book Regression Modeling Strategies. rms does regression modeling, testing, estimation, validation, graphics, prediction, and typesetting by storing enhanced model design attributes in the fit. rms is a re-written version of the Design package that has improved graphics and duplicates very little code in the survival package. The package is a collection of about 180 functions that assist and streamline modeling, especially for biostatistical and epidemiologic applications. It also contains functions for binary and ordinal logistic regression models and the Buckley-James multiple regression model for right-censored responses, and implements penalized maximum likelihood estimation for logistic and ordinary linear models. rms works with almost any regression model, but it was especially written to work with logistic regression, Cox regression, accelerated failure time models, ordinary linear models, the Buckley-James model, generalized lease squares for longitudinal data (using the nlme package), generalized linear models, and quantile regression (using the quantreg package). rms requires the Hmisc package to be installed. Note that Hmisc has several functions useful for data analysis (especially data reduction and imputation). Older references below pertaining to the Design package are relevant to rms. } \section{Statistical Methods Implemented}{ \itemize{ \item Ordinary linear regression models \item Binary and ordinal logistic models (proportional odds and continuation ratio models, probit, log-log, complementary log-log including ordinal cumulative probability models for continuous Y, efficiently handling thousands of distinct Y values using full likelihood methods) \item Bayesian binary and ordinal regression models, partial proportional odds model, and random effects \item Cox model \item Parametric survival models in the accelerated failure time class \item Buckley-James least-squares linear regression model with possibly right-censored responses \item Generalized linear model \item Quantile regression \item Generalized least squares \item Bootstrap model validation to obtain unbiased estimates of model performance without requiring a separate validation sample \item Automatic Wald tests of all effects in the model that are not parameterization-dependent (e.g., tests of nonlinearity of main effects when the variable does not interact with other variables, tests of nonlinearity of interaction effects, tests for whether a predictor is important, either as a main effect or as an effect modifier) \item Graphical depictions of model estimates (effect plots, odds/hazard ratio plots, nomograms that allow model predictions to be obtained manually even when there are nonlinear effects and interactions in the model) \item Various smoothed residual plots, including some new residual plots for verifying ordinal logistic model assumptions \item Composing S functions to evaluate the linear predictor (\eqn{X\hat{beta}}{X*beta hat}), hazard function, survival function, quantile functions analytically from the fitted model \item Typesetting of fitted model using LaTeX \item Robust covariance matrix estimation (Huber or bootstrap) \item Cubic regression splines with linear tail restrictions (natural splines) \item Tensor splines \item Interactions restricted to not be doubly nonlinear \item Penalized maximum likelihood estimation for ordinary linear regression and logistic regression models. Different parts of the model may be penalized by different amounts, e.g., you may want to penalize interaction or nonlinear effects more than main effects or linear effects \item Estimation of hazard or odds ratios in presence of nolinearity and interaction \item Sensitivity analysis for an unmeasured binary confounder in a binary logistic model } } \section{Motivation}{ rms was motivated by the following needs: \itemize{ \item need to automatically print interesting Wald tests that can be constructed from the design \itemize{ \item tests of linearity with respect to each predictor \item tests of linearity of interactions \item pooled interaction tests (e.g., all interactions involving race) \item pooled tests of effects with higher order effects \itemize{ \item test of main effect not meaningful when effect in interaction \item pooled test of main effect + interaction effect is meaningful \item test of 2nd-order interaction + any 3rd-order interaction containing those factors is meaningful } } \item need to store transformation parameters with the fit \itemize{ \item example: knot locations for spline functions \item these are "remembered" when getting predictions, unlike standard S or \R \item for categorical predictors, save levels so that same dummy variables will be generated for predictions; check that all levels in out-of-data predictions were present when model was fitted } \item need for uniform re-insertion of observations deleted because of NAs when using \code{predict} without \code{newdata} or when using \code{resid} \item need to easily plot the regression effect of any predictor \itemize{ \item example: age is represented by a linear spline with knots at 40 and 60y plot effect of age on log odds of disease, adjusting interacting factors to easily specified constants \item vary 2 predictors: plot x1 on x-axis, separate curves for discrete x2 or 3d perspective plot for continuous x2 \item if predictor is represented as a function in the model, plots should be with respect to the original variable:\cr \code{f <- lrm(y ~ log(cholesterol)+age)} \cr \code{plot(Predict(f, cholesterol)) # cholesterol on x-axis, default range} \cr \code{ggplot(Predict(f, cholesterol)) # same using ggplot2} \code{plotp(Predict(f, cholesterol)) # same directly using plotly} } \item need to store summary of distribution of predictors with the fit \itemize{ \item plotting limits (default: 10th smallest, 10th largest values or \%-tiles) \item effect limits (default: .25 and .75 quantiles for continuous vars.) \item adjustment values for other predictors (default: median for continuous predictors, most frequent level for categorical ones) \item discrete numeric predictors: list of possible values example: x=0,1,2,3,5 -> by default don't plot prediction at x=4 \item values are on the inner-most variable, e.g. cholesterol, not log(chol.) \item allows estimation/plotting long after original dataset has been deleted \item for Cox models, underlying survival also stored with fit, so original data not needed to obtain predicted survival curves } \item need to automatically print estimates of effects in presence of non- linearity and interaction \itemize{ \item example: age is quadratic, interacting with sex default effect is inter-quartile-range hazard ratio (for Cox model), for sex=reference level \item user-controlled effects: \code{summary(fit, age=c(30,50), sex="female")} -> odds ratios for logistic model, relative survival time for accelerated failure time survival models \item effects for all variables (e.g. odds ratios) may be plotted with multiple-confidence-level bars } \item need for prettier and more concise effect names in printouts, especially for expanded nonlinear terms and interaction terms \itemize{ \item use inner-most variable name to identify predictors \item e.g. for \code{pmin(x^2-3,10)} refer to factor with legal S-name \code{x} } \item need to recognize that an intercept is not always a simple concept \itemize{ \item some models (e.g., Cox) have no intercept \item some models (e.g., ordinal logistic) have multiple intercepts } \item need for automatic high-quality printing of fitted mathematical model (with dummy variables defined, regression spline terms simplified, interactions "factored"). Focus is on regression splines instead of nonparametric smoothers or smoothing splines, so that explicit formulas for fit may be obtained for use outside S. rms can also compose S functions to evaluate \eqn{X\beta}{X*Beta} from the fitted model analytically, as well as compose SAS code to do this. \item need for automatic drawing of nomogram to represent the fitted model \item need for automatic bootstrap validation of a fitted model, with only one S command (with respect to calibration and discrimination) \item need for robust (Huber sandwich) estimator of covariance matrix, and be able to do all other analysis (e.g., plots, C.L.) using the adjusted covariances \item need for robust (bootstrap) estimator of covariance matrix, easily used in other analyses without change \item need for Huber sandwich and bootstrap covariance matrices adjusted for cluster sampling \item need for routine reporting of how many observations were deleted by missing values on each predictor (see \code{na.delete} in Hmisc) \item need for optional reporting of descriptive statistics for Y stratified by missing status of each X (see na.detail.response) \item need for pretty, annotated survival curves, using the same commands for parametric and Cox models \item need for ordinal logistic model (proportional odds model, continuation ratio model) \item need for estimating and testing general contrasts without having to be conscious of variable coding or parameter order } } \details{ To make use of automatic typesetting features you must have LaTeX or one of its variants installed.\cr Some aspects of rms (e.g., \code{latex}) will not work correctly if \code{options(contrasts=)} other than \code{c("contr.treatment", "contr.poly")} are used. rms relies on a wealth of survival analysis functions written by Terry Therneau of Mayo Clinic. Front-ends have been written for several of Therneau's functions, and other functions have been slightly modified. } \section{Fitting Functions Compatible with rms}{ rms will work with a wide variety of fitting functions, but it is meant especially for the following: \tabular{lll}{ \bold{Function} \tab \bold{Purpose} \tab \bold{Related S}\cr \tab \tab \bold{Functions}\cr \bold{\code{ols}} \tab Ordinary least squares linear model \tab \code{lm}\cr \bold{\code{lrm}} \tab Binary and ordinal logistic regression \tab \code{glm}\cr \tab model \tab \code{cr.setup}\cr \bold{\code{orm}} \tab Ordinal regression model \tab \code{lrm}\cr \bold{\code{blrm}} \tab Bayesian binary and ordinal regression \tab\ \cr \bold{\code{psm}} \tab Accelerated failure time parametric \tab \code{survreg}\cr \tab survival model \tab \cr \bold{\code{cph}} \tab Cox proportional hazards regression \tab \code{coxph}\cr \bold{\code{npsurv}} \tab Nonparametric survival estimates \tab \code{survfit.formula} \cr \bold{\code{bj}} \tab Buckley-James censored least squares \tab \code{survreg}\cr \tab linear model \tab \cr \bold{\code{Glm}} \tab Version of \code{glm} for use with rms \tab \code{glm}\cr \bold{\code{Gls}} \tab Version of \code{gls} for use with rms \tab \code{gls}\cr \bold{\code{Rq}} \tab Version of \code{rq} for use with rms \tab \code{rq}\cr } } \section{Methods in rms}{ The following generic functions work with fits with rms in effect: \tabular{lll}{ \bold{Function} \tab \bold{Purpose} \tab \bold{Related}\cr \tab \tab \bold{Functions}\cr \bold{\code{print}} \tab Print parameters and statistics of fit \tab \cr \bold{\code{coef}} \tab Fitted regression coefficients \tab \cr \bold{\code{formula}} \tab Formula used in the fit \tab \cr \bold{\code{specs}} \tab Detailed specifications of fit \tab \cr \bold{\code{robcov}} \tab Robust covariance matrix estimates \tab \cr \bold{\code{bootcov}} \tab Bootstrap covariance matrix estimates \tab \cr \bold{\code{summary}} \tab Summary of effects of predictors \tab \cr \bold{\code{plot.summary}} \tab Plot continuously shaded confidence \tab \cr \tab bars for results of summary \tab \cr \bold{\code{anova}} \tab Wald tests of most meaningful hypotheses \tab \cr \bold{\code{contrast}} \tab General contrasts, C.L., tests \tab \cr \bold{\code{plot.anova}} \tab Depict results of anova graphically \tab \code{dotchart} \cr \bold{\code{Predict}} \tab Partial predictor effects \tab \code{predict} \cr \bold{\code{plot.Predict}}\tab Plot predictor effects using lattice graphics \tab \code{predict} \cr \bold{\code{ggplot}} \tab Similar to above but using ggplot2 \cr \bold{\code{plotp}} \tab Similar to above but using plotly \cr \bold{\code{bplot}} \tab 3-D plot of effects of varying two \tab \cr \tab continuous predictors \tab \code{image, persp, contour} \cr \bold{\code{gendata}} \tab Generate data frame with predictor \tab \code{expand.grid} \cr \tab combinations (optionally interactively) \tab \cr \bold{\code{predict}} \tab Obtain predicted values or design matrix \tab \cr \bold{\code{fastbw}} \tab Fast backward step-down variable \tab \code{step} \cr \tab selection \tab \cr \bold{\code{residuals}} \tab Residuals, influence statistics from fit \tab \cr (or \bold{\code{resid}}) \tab \tab \cr \bold{\code{which.influence}} \tab Which observations are overly \tab \code{residuals} \cr \tab influential \tab \cr \bold{\code{sensuc}} \tab Sensitivity of one binary predictor in \tab \cr \tab lrm and cph models to an unmeasured \tab \cr \tab binary confounder \tab \cr \bold{\code{latex}} \tab LaTeX representation of fitted \tab \cr \tab model or \code{anova} or \code{summary} table \tab \cr \bold{\code{Function}} \tab S function analytic representation \tab \code{Function.transcan} \cr \tab of a fitted regression model (\eqn{X\beta}{X*Beta}) \tab \cr \bold{\code{hazard}} \tab S function analytic representation \tab \code{rcspline.restate} \cr \tab of a fitted hazard function (for \code{psm}) \tab \cr \bold{\code{Survival}} \tab S function analytic representation of \tab \cr \tab fitted survival function (for \code{psm,cph}) \tab \cr \bold{\code{Quantile}} \tab S function analytic representation of \tab \cr \tab fitted function for quantiles of \tab \cr \tab survival time (for \code{psm, cph}) \tab \cr \bold{\code{nomogram}} \tab Draws a nomogram for the fitted model \tab \code{latex, plot, ggplot, plotp} \cr \bold{\code{survest}} \tab Estimate survival probabilities \tab \code{survfit} \cr \tab (for \code{psm, cph}) \tab \cr \bold{\code{survplot}} \tab Plot survival curves (psm, cph, npsurv) \tab plot.survfit \cr \bold{\code{validate}} \tab Validate indexes of model fit using \tab val.prob \cr \tab resampling \tab \cr \bold{\code{calibrate}} \tab Estimate calibration curve for model \tab \cr \tab using resampling \tab \cr \bold{\code{vif}} \tab Variance inflation factors for a fit \tab \cr \bold{\code{naresid}} \tab Bring elements corresponding to missing \tab \cr \tab data back into predictions and residuals \tab \cr \bold{\code{naprint}} \tab Print summary of missing values \tab \cr \bold{\code{pentrace}} \tab Find optimum penality for penalized MLE \tab \cr \bold{\code{effective.df}} \tab Print effective d.f. for each type of \tab \cr \tab variable in model, for penalized fit or \tab \cr \tab pentrace result \tab \cr \bold{\code{rm.impute}} \tab Impute repeated measures data with \tab \code{transcan}, \cr \tab non-random dropout \tab \code{fit.mult.impute} \cr \tab \emph{experimental, non-functional} \tab } } \section{Background for Examples}{ The following programs demonstrate how the pieces of the rms package work together. A (usually) one-time call to the function \code{datadist} requires a pass at the entire data frame to store distribution summaries for potential predictor variables. These summaries contain (by default) the .25 and .75 quantiles of continuous variables (for estimating effects such as odds ratios), the 10th smallest and 10th largest values (or .1 and .9 quantiles for small \eqn{n}) for plotting ranges for estimated curves, and the total range. For discrete numeric variables (those having \eqn{\leq 10}{<=10} unique values), the list of unique values is also stored. Such summaries are used by the \code{summary.rms, Predict}, and \code{nomogram.rms} functions. You may save time and defer running \code{datadist}. In that case, the distribution summary is not stored with the fit object, but it can be gathered before running \code{summary}, \code{plot}, \code{ggplot}, or \code{plotp}. \code{d <- datadist(my.data.frame) # or datadist(x1,x2)}\cr \code{options(datadist="d") # omit this or use options(datadist=NULL)}\cr \code{ # if not run datadist yet}\cr \code{cf <- ols(y ~ x1 * x2)}\cr \code{anova(f)}\cr \code{fastbw(f)}\cr \code{Predict(f, x2)} \code{predict(f, newdata)} In the \bold{Examples} section there are three detailed examples using a fitting function designed to be used with rms, \code{lrm} (logistic regression model). In \bold{Detailed Example 1} we create 3 predictor variables and a two binary response on 500 subjects. For the first binary response, \code{dz}, the true model involves only \code{sex} and \code{age}, and there is a nonlinear interaction between the two because the log odds is a truncated linear relationship in \code{age} for females and a quadratic function for males. For the second binary outcome, \code{dz.bp}, the true population model also involves systolic blood pressure (\code{sys.bp}) through a truncated linear relationship. First, nonparametric estimation of relationships is done using the Hmisc package's \code{plsmo} function which uses \code{lowess} with outlier detection turned off for binary responses. Then parametric modeling is done using restricted cubic splines. This modeling does not assume that we know the true transformations for \code{age} or \code{sys.bp} but that these transformations are smooth (which is not actually the case in the population). For \bold{Detailed Example 2}, suppose that a categorical variable treat has values \code{"a", "b"}, and \code{"c"}, an ordinal variable \code{num.diseases} has values 0,1,2,3,4, and that there are two continuous variables, \code{age} and \code{cholesterol}. \code{age} is fitted with a restricted cubic spline, while \code{cholesterol} is transformed using the transformation \code{log(cholesterol - 10)}. Cholesterol is missing on three subjects, and we impute these using the overall median cholesterol. We wish to allow for interaction between \code{treat} and \code{cholesterol}. The following S program will fit a logistic model, test all effects in the design, estimate effects, and plot estimated transformations. The fit for \code{num.diseases} really considers the variable to be a 5-level categorical variable. The only difference is that a 3 d.f. test of linearity is done to assess whether the variable can be re-modeled "asis". Here we also show statements to attach the rms package and store predictor characteristics from datadist. \bold{Detailed Example 3} shows some of the survival analysis capabilities of rms related to the Cox proportional hazards model. We simulate data for 2000 subjects with 2 predictors, \code{age} and \code{sex}. In the true population model, the log hazard function is linear in \code{age} and there is no \code{age} \eqn{\times}{x} \code{sex} interaction. In the analysis below we do not make use of the linearity in age. rms makes use of many of Terry Therneau's survival functions that are builtin to S. The following is a typical sequence of steps that would be used with rms in conjunction with the Hmisc \code{transcan} function to do single imputation of all NAs in the predictors (multiple imputation would be better but would be harder to do in the context of bootstrap model validation), fit a model, do backward stepdown to reduce the number of predictors in the model (with all the severe problems this can entail), and use the bootstrap to validate this stepwise model, repeating the variable selection for each re-sample. Here we take a short cut as the imputation is not repeated within the bootstrap. In what follows we (atypically) have only 3 candidate predictors. In practice be sure to have the validate and calibrate functions operate on a model fit that contains all predictors that were involved in previous analyses that used the response variable. Here the imputation is necessary because backward stepdown would otherwise delete observations missing on any candidate variable. Note that you would have to define \code{x1, x2, x3, y} to run the following code. \code{xt <- transcan(~ x1 + x2 + x3, imputed=TRUE)}\cr \code{impute(xt) # imputes any NAs in x1, x2, x3}\cr \code{# Now fit original full model on filled-in data}\cr \code{f <- lrm(y ~ x1 + rcs(x2,4) + x3, x=TRUE, y=TRUE) #x,y allow boot.}\cr \code{fastbw(f)}\cr \code{# derives stepdown model (using default stopping rule)}\cr \code{validate(f, B=100, bw=TRUE) # repeats fastbw 100 times}\cr \code{cal <- calibrate(f, B=100, bw=TRUE) # also repeats fastbw}\cr \code{plot(cal)} } \examples{ ## To run several comprehensive examples, run the following command \dontrun{ demo(all, 'rms') } } \section{Common Problems to Avoid}{ \enumerate{ \item Don't have a formula like \code{y ~ age + age^2}. In S you need to connect related variables using a function which produces a matrix, such as \code{pol} or \code{rcs}. This allows effect estimates (e.g., hazard ratios) to be computed as well as multiple d.f. tests of association. \item Don't use \code{poly} or \code{strata} inside formulas used in rms. Use \code{pol} and \code{strat} instead. \item Almost never code your own dummy variables or interaction variables in S. Let S do this automatically. Otherwise, \code{anova} can't do its job. \item Almost never transform predictors outside of the model formula, as then plots of predicted values vs. predictor values, and other displays, would not be made on the original scale. Use instead something like \code{y ~ log(cell.count+1)}, which will allow \code{cell.count} to appear on \eqn{x}-axes. You can get fancier, e.g., \code{y ~ rcs(log(cell.count+1),4)} to fit a restricted cubic spline with 4 knots in \code{log(cell.count+1)}. For more complex transformations do something like %\cr \code{f <- function(x) \{}\cr \code{\ldots various 'if' statements, etc.}\cr \code{log(pmin(x,50000)+1)}\cr \code{\}}\cr \code{fit1 <- lrm(death ~ f(cell.count))}\cr \code{fit2 <- lrm(death ~ rcs(f(cell.count),4))}\cr \code{\}} \item Don't put \code{$} inside variable names used in formulas. Either attach data frames or use \code{data=}. \item Don't forget to use \code{datadist}. Try to use it at the top of your program so that all model fits can automatically take advantage if its distributional summaries for the predictors. \item Don't \code{validate} or \code{calibrate} models which were reduced by dropping "insignificant" predictors. Proper bootstrap or cross-validation must repeat any variable selection steps for each re-sample. Therefore, \code{validate} or \code{calibrate} models which contain all candidate predictors, and if you must reduce models, specify the option \code{bw=TRUE} to \code{validate} or \code{calibrate}. \item Dropping of "insignificant" predictors ruins much of the usual statistical inference for regression models (confidence limits, standard errors, \eqn{P}-values, \eqn{\chi^2}{chi-squares}, ordinary indexes of model performance) and it also results in models which will have worse predictive discrimination. } } \section{Accessing the Package}{ Use \code{require(rms)}. } \references{ The primary resource for the rms package is \emph{Regression Modeling Strategies, second edition} by FE Harrell (Springer-Verlag, 2015) and the web page \url{https://hbiostat.org/R/rms/}. See also the Statistics in Medicine articles by Harrell \emph{et al} listed below for case studies of modeling and model validation using rms. Several datasets useful for multivariable modeling with rms are found at \url{https://hbiostat.org/data/}. } \section{Published Applications of rms and Regression Splines}{ \itemize{ \item Spline fits \enumerate{ \item Spanos A, Harrell FE, Durack DT (1989): Differential diagnosis of acute meningitis: An analysis of the predictive value of initial observations. \emph{JAMA} 2700-2707. \item Ohman EM, Armstrong PW, Christenson RH, \emph{et al}. (1996): Cardiac troponin T levels for risk stratification in acute myocardial ischemia. \emph{New Eng J Med} 335:1333-1341. } \item Bootstrap calibration curve for a parametric survival model: \enumerate{ \item Knaus WA, Harrell FE, Fisher CJ, Wagner DP, \emph{et al}. (1993): The clinical evaluation of new drugs for sepsis: A prospective study design based on survival analysis. \emph{JAMA} 270:1233-1241. } \item Splines, interactions with splines, algebraic form of fitted model from \code{latex.rms} \enumerate{ \item Knaus WA, Harrell FE, Lynn J, et al. (1995): The SUPPORT prognostic model: Objective estimates of survival for seriously ill hospitalized adults. \emph{Annals of Internal Medicine} 122:191-203. } \item Splines, odds ratio chart from fitted model with nonlinear and interaction terms, use of \code{transcan} for imputation \enumerate{ \item Lee KL, Woodlief LH, Topol EJ, Weaver WD, Betriu A. Col J, Simoons M, Aylward P, Van de Werf F, Califf RM. Predictors of 30-day mortality in the era of reperfusion for acute myocardial infarction: results from an international trial of 41,021 patients. \emph{Circulation} 1995;91:1659-1668. } \item Splines, external validation of logistic models, prediction rules using point tables \enumerate{ \item Steyerberg EW, Hargrove YV, \emph{et al} (2001): Residual mass histology in testicular cancer: development and validation of a clinical prediction rule. \emph{Stat in Med} 2001;20:3847-3859. \item van Gorp MJ, Steyerberg EW, \emph{et al} (2003): Clinical prediction rule for 30-day mortality in Bjork-Shiley convexo-concave valve replacement. \emph{J Clinical Epidemiology} 2003;56:1006-1012. } \item Model fitting, bootstrap validation, missing value imputation \enumerate{ \item Krijnen P, van Jaarsveld BC, Steyerberg EW, Man in 't Veld AJ, Schalekamp, MADH, Habbema JDF (1998): A clinical prediction rule for renal artery stenosis. \emph{Annals of Internal Medicine} 129:705-711. } \item Model fitting, splines, bootstrap validation, nomograms \enumerate{ \item Kattan MW, Eastham JA, Stapleton AMF, Wheeler TM, Scardino PT. A preoperative nomogram for disease recurrence following radical prostatectomy for prostate cancer. \emph{J Natl Ca Inst} 1998; 90(10):766-771. \item Kattan, MW, Wheeler TM, Scardino PT. A postoperative nomogram for disease recurrence following radical prostatectomy for prostate cancer. \emph{J Clin Oncol} 1999; 17(5):1499-1507 \item Kattan MW, Zelefsky MJ, Kupelian PA, Scardino PT, Fuks Z, Leibel SA. A pretreatment nomogram for predicting the outcome of three-dimensional conformal radiotherapy in prostate cancer. \emph{J Clin Oncol} 2000; 18(19):3252-3259. \item Eastham JA, May R, Robertson JL, Sartor O, Kattan MW. Development of a nomogram which predicts the probability of a positive prostate biopsy in men with an abnormal digital rectal examination and a prostate specific antigen between 0 and 4 ng/ml. \emph{Urology}. (In press). \item Kattan MW, Heller G, Brennan MF. A competing-risk nomogram fir sarcoma-specific death following local recurrence. \emph{Stat in Med} 2003; 22; 3515-3525. } \item Penalized maximum likelihood estimation, regression splines, web site to get predicted values \enumerate{ \item Smits M, Dippel DWJ, Steyerberg EW, et al. Predicting intracranial traumatic findings on computed tomography in patients with minor head injury: The CHIP prediction rule. \emph{Ann Int Med} 2007; 146:397-405. } \item Nomogram with 2- and 5-year survival probability and median survival time (but watch out for the use of univariable screening) \enumerate{ \item Clark TG, Stewart ME, Altman DG, Smyth JF. A prognostic model for ovarian cancer. \emph{Br J Cancer} 2001; 85:944-52. } \item Comprehensive example of parametric survival modeling with an extensive nomogram, time ratio chart, anova chart, survival curves generated using survplot, bootstrap calibration curve \enumerate{ \item Teno JM, Harrell FE, Knaus WA, et al. Prediction of survival for older hospitalized patients: The HELP survival model. \emph{J Am Geriatrics Soc} 2000; 48: S16-S24. } \item Model fitting, imputation, and several nomograms expressed in tabular form \enumerate{ \item Hasdai D, Holmes DR, et al. Cardiogenic shock complicating acute myocardial infarction: Predictors of death. \emph{Am Heart J} 1999; 138:21-31. } \item Ordinal logistic model with bootstrap calibration plot \enumerate{ \item Wu AW, Yasui U, Alzola CF \emph{et al}. Predicting functional status outcomes in hospitalized patients aged 80 years and older. \emph{J Am Geriatric Society} 2000; 48:S6-S15. } \item Propensity modeling in evaluating medical diagnosis, anova dot chart \enumerate{ \item Weiss JP, Gruver C, et al. Ordering an echocardiogram for evaluation of left ventricular function: Level of expertise necessary for efficient use. \emph{J Am Soc Echocardiography} 2000; 13:124-130. } \item Simulations using rms to study the properties of various modeling strategies \enumerate{ \item Steyerberg EW, Eijkemans MJC, Habbema JDF. Stepwise selection in small data sets: A simulation study of bias in logistic regression analysis. \emph{J Clin Epi} 1999; 52:935-942. \item Steyerberg WE, Eijekans MJC, Harrell FE, Habbema JDF. Prognostic modeling with logistic regression analysis: In search of a sensible strategy in small data sets. \emph{Med Decision Making} 2001; 21:45-56. } \item Statistical methods and references related to rms, along with case studies which includes the rms code which produced the analyses \enumerate{ \item Harrell FE, Lee KL, Mark DB (1996): Multivariable prognostic models: Issues in developing models, evaluating assumptions and adequacy, and measuring and reducing errors. \emph{Stat in Med} 15:361-387. \item Harrell FE, Margolis PA, Gove S, Mason KE, Mulholland EK et al. (1998): Development of a clinical prediction model for an ordinal outcome: The World Health Organization ARI Multicentre Study of clinical signs and etiologic agents of pneumonia, sepsis, and meningitis in young infants. \emph{Stat in Med} 17:909-944. \item Bender R, Benner, A (2000): Calculating ordinal regression models in SAS and S-Plus. \emph{Biometrical J} 42:677-699. } } } \section{Bug Reports}{ The author is willing to help with problems. Send E-mail to \email{fh@fharrell.com}. To report bugs, please do the following: \enumerate{ \item If the bug occurs when running a function on a fit object (e.g., \code{anova}), attach a \code{dump}'d text version of the fit object to your note. If you used \code{datadist} but not until after the fit was created, also send the object created by \code{datadist}. Example: \code{save(myfit,"/tmp/myfit.rda")} will create an R binary save file that can be attached to the E-mail. \item If the bug occurs during a model fit (e.g., with \code{lrm, ols, psm, cph}), send the statement causing the error with a \code{save}'d version of the data frame used in the fit. If this data frame is very large, reduce it to a small subset which still causes the error. } } \section{Copyright Notice}{ GENERAL DISCLAIMER This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. In short: you may use this code any way you like, as long as you don't charge money for it, remove this notice, or hold anyone liable for its results. Also, please acknowledge the source and communicate changes to the author. If this software is used is work presented for publication, kindly reference it using for example: Harrell FE (2009): rms: S functions for biostatistical/epidemiologic modeling, testing, estimation, validation, graphics, and prediction. Programs available from \url{https://hbiostat.org/R/rms/}. Be sure to reference other packages used as well as \R itself. } \author{ Frank E Harrell Jr\cr Professor of Biostatistics\cr Vanderbilt University School of Medicine\cr Nashville, Tennessee\cr \email{fh@fharrell.com} } \keyword{models} \concept{overview} rms/man/cph.Rd0000644000176200001440000004525413714237251012706 0ustar liggesusers\name{cph} \alias{cph} \alias{Survival.cph} \alias{Quantile.cph} \alias{Mean.cph} \title{Cox Proportional Hazards Model and Extensions} \description{ Modification of Therneau's \code{coxph} function to fit the Cox model and its extension, the Andersen-Gill model. The latter allows for interval time-dependent covariables, time-dependent strata, and repeated events. The \code{Survival} method for an object created by \code{cph} returns an S function for computing estimates of the survival function. The \code{Quantile} method for \code{cph} returns an S function for computing quantiles of survival time (median, by default). The \code{Mean} method returns a function for computing the mean survival time. This function issues a warning if the last follow-up time is uncensored, unless a restricted mean is explicitly requested. } \usage{ cph(formula = formula(data), data=environment(formula), weights, subset, na.action=na.delete, method=c("efron","breslow","exact","model.frame","model.matrix"), singular.ok=FALSE, robust=FALSE, model=FALSE, x=FALSE, y=FALSE, se.fit=FALSE, linear.predictors=TRUE, residuals=TRUE, nonames=FALSE, eps=1e-4, init, iter.max=10, tol=1e-9, surv=FALSE, time.inc, type=NULL, vartype=NULL, debug=FALSE, \dots) \method{Survival}{cph}(object, \dots) # Evaluate result as g(times, lp, stratum=1, type=c("step","polygon")) \method{Quantile}{cph}(object, \dots) # Evaluate like h(q, lp, stratum=1, type=c("step","polygon")) \method{Mean}{cph}(object, method=c("exact","approximate"), type=c("step","polygon"), n=75, tmax, \dots) # E.g. m(lp, stratum=1, type=c("step","polygon"), tmax, \dots) } \arguments{ \item{formula}{ an S formula object with a \code{Surv} object on the left-hand side. The \code{terms} can specify any S model formula with up to third-order interactions. The \code{strat} function may appear in the terms, as a main effect or an interacting factor. To stratify on both race and sex, you would include both terms \code{strat(race)} and \code{strat(sex)}. Stratification factors may interact with non-stratification factors; not all stratification terms need interact with the same modeled factors. } \item{object}{ an object created by \code{cph} with \code{surv=TRUE} } \item{data}{ name of an S data frame containing all needed variables. Omit this to use a data frame already in the S ``search list''. } \item{weights}{ case weights } \item{subset}{ an expression defining a subset of the observations to use in the fit. The default is to use all observations. Specify for example \code{age>50 & sex="male"} or \code{c(1:100,200:300)} respectively to use the observations satisfying a logical expression or those having row numbers in the given vector. } \item{na.action}{ specifies an S function to handle missing data. The default is the function \code{na.delete}, which causes observations with any variable missing to be deleted. The main difference between \code{na.delete} and the S-supplied function \code{na.omit} is that \code{na.delete} makes a list of the number of observations that are missing on each variable in the model. The \code{na.action} is usally specified by e.g. \code{options(na.action="na.delete")}. } \item{method}{ for \code{cph}, specifies a particular fitting method, \code{"model.frame"} instead to return the model frame of the predictor and response variables satisfying any subset or missing value checks, or \code{"model.matrix"} to return the expanded design matrix. The default is \code{"efron"}, to use Efron's likelihood for fitting the model. For \code{Mean.cph}, \code{method} is \code{"exact"} to use numerical integration of the survival function at any linear predictor value to obtain a mean survival time. Specify \code{method="approximate"} to use an approximate method that is slower when \code{Mean.cph} is executing but then is essentially instant thereafter. For the approximate method, the area is computed for \code{n} points equally spaced between the min and max observed linear predictor values. This calculation is done separately for each stratum. Then the \code{n} pairs (X beta, area) are saved in the generated S function, and when this function is evaluated, the \code{approx} function is used to evaluate the mean for any given linear predictor values, using linear interpolation over the \code{n} X beta values. } \item{singular.ok}{ If \code{TRUE}, the program will automatically skip over columns of the X matrix that are linear combinations of earlier columns. In this case the coefficients for such columns will be NA, and the variance matrix will contain zeros. For ancillary calculations, such as the linear predictor, the missing coefficients are treated as zeros. The singularities will prevent many of the features of the \code{rms} library from working. } \item{robust}{ if \code{TRUE} a robust variance estimate is returned. Default is \code{TRUE} if the model includes a \code{cluster()} operative, \code{FALSE} otherwise. } \item{model}{ default is \code{FALSE}(false). Set to \code{TRUE} to return the model frame as element \code{model} of the fit object. } \item{x}{ default is \code{FALSE}. Set to \code{TRUE} to return the expanded design matrix as element \code{x} (without intercept indicators) of the returned fit object. } \item{y}{ default is \code{FALSE}. Set to \code{TRUE} to return the vector of response values (\code{Surv} object) as element \code{y} of the fit. } \item{se.fit}{ default is \code{FALSE}. Set to \code{TRUE} to compute the estimated standard errors of the estimate of X beta and store them in element \code{se.fit} of the fit. The predictors are first centered to their means before computing the standard errors. } \item{linear.predictors}{set to \code{FALSE} to omit \code{linear.predictors} vector from fit} \item{residuals}{set to \code{FALSE} to omit \code{residuals} vector from fit} \item{nonames}{set to \code{TRUE} to not set \code{names} attribute for \code{linear.predictors}, \code{residuals}, \code{se.fit}, and rows of design matrix} \item{eps}{ convergence criterion - change in log likelihood. } \item{init}{ vector of initial parameter estimates. Defaults to all zeros. Special residuals can be obtained by setting some elements of \code{init} to MLEs and others to zero and specifying \code{iter.max=1}. } \item{iter.max}{ maximum number of iterations to allow. Set to \code{0} to obtain certain null-model residuals. } \item{tol}{ tolerance for declaring singularity for matrix inversion (available only when survival5 or later package is in effect) } \item{surv}{ set to \code{TRUE} to compute underlying survival estimates for each stratum, and to store these along with standard errors of log Lambda(t), \code{maxtime} (maximum observed survival or censoring time), and \code{surv.summary} in the returned object. Set \code{surv="summary"} to only compute and store \code{surv.summary}, not survival estimates at each unique uncensored failure time. If you specify \code{x=TRUE} and \code{y=TRUE}, you can obtain predicted survival later, with accurate confidence intervals for any set of predictor values. The standard error information stored as a result of \code{surv=TRUE} are only accurate at the mean of all predictors. If the model has no covariables, these are of course OK. The main reason for using \code{surv} is to greatly speed up the computation of predicted survival probabilities as a function of the covariables, when accurate confidence intervals are not needed. } \item{time.inc}{ time increment used in deriving \code{surv.summary}. Survival, number at risk, and standard error will be stored for \code{t=0, time.inc, 2 time.inc, \dots, maxtime}, where \code{maxtime} is the maximum survival time over all strata. \code{time.inc} is also used in constructing the time axis in the \code{survplot} function (see below). The default value for \code{time.inc} is 30 if \code{units(ftime) = "Day"} or no \code{units} attribute has been attached to the survival time variable. If \code{units(ftime)} is a word other than \code{"Day"}, the default for \code{time.inc} is 1 when it is omitted, unless \code{maxtime<1}, then \code{maxtime/10} is used as \code{time.inc}. If \code{time.inc} is not given and \code{maxtime/ default time.inc} > 25, \code{time.inc} is increased. } \item{type}{ (for \code{cph}) applies if \code{surv} is \code{TRUE} or \code{"summary"}. If \code{type} is omitted, the method consistent with \code{method} is used. See \code{survfit.coxph} (under \code{survfit}) or \code{survfit.cph} for details and for the definitions of values of \code{type} For \code{Survival, Quantile, Mean} set to \code{"polygon"} to use linear interpolation instead of the usual step function. For \code{Mean}, the default of \code{step} will yield the sample mean in the case of no censoring and no covariables, if \code{type="kaplan-meier"} was specified to \code{cph}. For \code{method="exact"}, the value of \code{type} is passed to the generated function, and it can be overridden when that function is actually invoked. For \code{method="approximate"}, \code{Mean.cph} generates the function different ways according to \code{type}, and this cannot be changed when the function is actually invoked. } \item{vartype}{see \code{survfit.coxph}} \item{debug}{set to \code{TRUE} to print debugging information related to model matrix construction. You can also use \code{options(debug=TRUE)}.} \item{\dots}{ other arguments passed to \code{coxph.fit} from \code{cph}. Ignored by other functions. } \item{times}{ a scalar or vector of times at which to evaluate the survival estimates } \item{lp}{ a scalar or vector of linear predictors (including the centering constant) at which to evaluate the survival estimates } \item{stratum}{ a scalar stratum number or name (e.g., \code{"sex=male"}) to use in getting survival probabilities } \item{q}{ a scalar quantile or a vector of quantiles to compute } \item{n}{ the number of points at which to evaluate the mean survival time, for \code{method="approximate"} in \code{Mean.cph}. } \item{tmax}{ For \code{Mean.cph}, the default is to compute the overall mean (and produce a warning message if there is censoring at the end of follow-up). To compute a restricted mean life length, specify the truncation point as \code{tmax}. For \code{method="exact"}, \code{tmax} is passed to the generated function and it may be overridden when that function is invoked. For \code{method="approximate"}, \code{tmax} must be specified at the time that \code{Mean.cph} is run. }} \value{ For \code{Survival}, \code{Quantile}, or \code{Mean}, an S function is returned. Otherwise, in addition to what is listed below, formula/design information and the components \code{maxtime, time.inc, units, model, x, y, se.fit} are stored, the last 5 depending on the settings of options by the same names. The vectors or matrix stored if \code{y=TRUE} or \code{x=TRUE} have rows deleted according to \code{subset} and to missing data, and have names or row names that come from the data frame used as input data. \item{n}{ table with one row per stratum containing number of censored and uncensored observations } \item{coef}{ vector of regression coefficients } \item{stats}{ vector containing the named elements \code{Obs}, \code{Events}, \code{Model L.R.}, \code{d.f.}, \code{P}, \code{Score}, \code{Score P}, \code{R2}, Somers' \code{Dxy}, \code{g}-index, and \code{gr}, the \code{g}-index on the hazard ratio scale. \code{R2} is the Nagelkerke R-squared, with division by the maximum attainable R-squared. } \item{var}{ variance/covariance matrix of coefficients } \item{linear.predictors}{ values of predicted X beta for observations used in fit, normalized to have overall mean zero, then having any offsets added } \item{resid}{ martingale residuals } \item{loglik}{ log likelihood at initial and final parameter values } \item{score}{ value of score statistic at initial values of parameters } \item{times}{ lists of times (if \code{surv="T"}) } \item{surv}{ lists of underlying survival probability estimates } \item{std.err}{ lists of standard errors of estimate log-log survival } \item{surv.summary}{ a 3 dimensional array if \code{surv=TRUE}. The first dimension is time ranging from 0 to \code{maxtime} by \code{time.inc}. The second dimension refers to strata. The third dimension contains the time-oriented matrix with \code{Survival, n.risk} (number of subjects at risk), and \code{std.err} (standard error of log-log survival). } \item{center}{ centering constant, equal to overall mean of X beta. }} \details{ If there is any strata by covariable interaction in the model such that the mean X beta varies greatly over strata, \code{method="approximate"} may not yield very accurate estimates of the mean in \code{Mean.cph}. For \code{method="approximate"} if you ask for an estimate of the mean for a linear predictor value that was outside the range of linear predictors stored with the fit, the mean for that observation will be \code{NA}. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr \email{fh@fharrell.com} } \seealso{ \code{\link[survival]{coxph}}, \code{\link[survival]{survival-internal}}, \code{\link[survival]{Surv}}, \code{\link{residuals.cph}}, \code{\link[survival]{cox.zph}}, \code{\link{survfit.cph}}, \code{\link{survest.cph}}, \code{\link[survival]{survfit.coxph}}, \code{\link{survplot}}, \code{\link{datadist}}, \code{\link{rms}}, \code{\link{rms.trans}}, \code{\link{anova.rms}}, \code{\link{summary.rms}}, \code{\link{Predict}}, \code{\link{fastbw}}, \code{\link{validate}}, \code{\link{calibrate}}, \code{\link{plot.Predict}}, \code{\link{ggplot.Predict}}, \code{\link{specs.rms}}, \code{\link{lrm}}, \code{\link{which.influence}}, \code{\link[Hmisc]{na.delete}}, \code{\link[Hmisc]{na.detail.response}}, \code{\link{print.cph}}, \code{\link{latex.cph}}, \code{\link{vif}}, \code{\link{ie.setup}}, \code{\link{GiniMd}}, \code{\link{dxy.cens}}, \code{\link[survival:survConcordance]{survConcordance}} } \examples{ # Simulate data from a population model in which the log hazard # function is linear in age and there is no age x sex interaction n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) dt <- -log(runif(n))/h label(dt) <- 'Follow-up Time' e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" dd <- datadist(age, sex) options(datadist='dd') S <- Surv(dt,e) f <- cph(S ~ rcs(age,4) + sex, x=TRUE, y=TRUE) cox.zph(f, "rank") # tests of PH anova(f) ggplot(Predict(f, age, sex)) # plot age effect, 2 curves for 2 sexes survplot(f, sex) # time on x-axis, curves for x2 res <- resid(f, "scaledsch") time <- as.numeric(dimnames(res)[[1]]) z <- loess(res[,4] ~ time, span=0.50) # residuals for sex plot(time, fitted(z)) lines(supsmu(time, res[,4]),lty=2) plot(cox.zph(f,"identity")) #Easier approach for last few lines # latex(f) f <- cph(S ~ age + strat(sex), surv=TRUE) g <- Survival(f) # g is a function g(seq(.1,1,by=.1), stratum="sex=Male", type="poly") #could use stratum=2 med <- Quantile(f) plot(Predict(f, age, fun=function(x) med(lp=x))) #plot median survival # Fit a model that is quadratic in age, interacting with sex as strata # Compare standard errors of linear predictor values with those from # coxph # Use more stringent convergence criteria to match with coxph f <- cph(S ~ pol(age,2)*strat(sex), x=TRUE, eps=1e-9, iter.max=20) coef(f) se <- predict(f, se.fit=TRUE)$se.fit require(lattice) xyplot(se ~ age | sex, main='From cph') a <- c(30,50,70) comb <- data.frame(age=rep(a, each=2), sex=rep(levels(sex), 3)) p <- predict(f, comb, se.fit=TRUE) comb$yhat <- p$linear.predictors comb$se <- p$se.fit z <- qnorm(.975) comb$lower <- p$linear.predictors - z*p$se.fit comb$upper <- p$linear.predictors + z*p$se.fit comb age2 <- age^2 f2 <- coxph(S ~ (age + age2)*strata(sex)) coef(f2) se <- predict(f2, se.fit=TRUE)$se.fit xyplot(se ~ age | sex, main='From coxph') comb <- data.frame(age=rep(a, each=2), age2=rep(a, each=2)^2, sex=rep(levels(sex), 3)) p <- predict(f2, newdata=comb, se.fit=TRUE) comb$yhat <- p$fit comb$se <- p$se.fit comb$lower <- p$fit - z*p$se.fit comb$upper <- p$fit + z*p$se.fit comb # g <- cph(Surv(hospital.charges) ~ age, surv=TRUE) # Cox model very useful for analyzing highly skewed data, censored or not # m <- Mean(g) # m(0) # Predicted mean charge for reference age #Fit a time-dependent covariable representing the instantaneous effect #of an intervening non-fatal event rm(age) set.seed(121) dframe <- data.frame(failure.time=1:10, event=rep(0:1,5), ie.time=c(NA,1.5,2.5,NA,3,4,NA,5,5,5), age=sample(40:80,10,rep=TRUE)) z <- ie.setup(dframe$failure.time, dframe$event, dframe$ie.time) S <- z$S ie.status <- z$ie.status attach(dframe[z$subs,]) # replicates all variables f <- cph(S ~ age + ie.status, x=TRUE, y=TRUE) #Must use x=TRUE,y=TRUE to get survival curves with time-dep. covariables #Get estimated survival curve for a 50-year old who has an intervening #non-fatal event at 5 days new <- data.frame(S=Surv(c(0,5), c(5,999), c(FALSE,FALSE)), age=rep(50,2), ie.status=c(0,1)) g <- survfit(f, new) plot(c(0,g$time), c(1,g$surv[,2]), type='s', xlab='Days', ylab='Survival Prob.') # Not certain about what columns represent in g$surv for survival5 # but appears to be for different ie.status #or: #g <- survest(f, new) #plot(g$time, g$surv, type='s', xlab='Days', ylab='Survival Prob.') #Compare with estimates when there is no intervening event new2 <- data.frame(S=Surv(c(0,5), c(5, 999), c(FALSE,FALSE)), age=rep(50,2), ie.status=c(0,0)) g2 <- survfit(f, new2) lines(c(0,g2$time), c(1,g2$surv[,2]), type='s', lty=2) #or: #g2 <- survest(f, new2) #lines(g2$time, g2$surv, type='s', lty=2) detach("dframe[z$subs, ]") options(datadist=NULL) } \keyword{survival} \keyword{models} \keyword{nonparametric} rms/man/latex.cph.Rd0000644000176200001440000000706313714237251014016 0ustar liggesusers\name{latex.cph} \alias{latex.cph} \alias{latex.lrm} \alias{latex.ols} \alias{latex.orm} \alias{latex.pphsm} \alias{latex.psm} \title{LaTeX Representation of a Fitted Cox Model} \description{Creates a file containing a LaTeX representation of the fitted model.} \usage{ \method{latex}{cph}(object, title, file='', append=FALSE, surv=TRUE, maxt=FALSE, which=NULL, varnames, columns=65, inline=FALSE, before=if(inline)"" else "& &", after="", dec=3, pretrans=TRUE, caption, digits=.Options$digits, size="", \dots) # for cph fit \method{latex}{lrm}(object, title, file, append, which, varnames, columns, inline, before, after, pretrans, caption, digits=.Options$digits, size="", \dots) # for lrm fit \method{latex}{ols}(object, title, file, append, which, varnames, columns, inline, before, after, pretrans, caption, digits=.Options$digits, size="", \dots) # ols fit \method{latex}{orm}(object, title, file, append, which, varnames, columns, inline, before, after, pretrans, caption, digits=.Options$digits, size="", intercepts=nrp < 10, \dots) # for orm fit \method{latex}{pphsm}(object, title, file, append, which=NULL, varnames, columns, inline, before, after, pretrans, caption, digits=.Options$digits, size="", \dots) # pphsm fit \method{latex}{psm}(object, title, file, append, which=NULL, varnames, columns, inline, before, after, pretrans, caption, digits=.Options$digits, size="", \dots) # psm fit } \arguments{ \item{object}{ a fit object created by a \code{rms} fitting function. } \item{title}{ignored} \item{file,append}{see \code{\link[Hmisc:latex]{latex.default}}. Defaults to the console. When usinghtml/markdown, \code{file} is ignored.} \item{surv}{ if \code{surv=TRUE} was specified to \code{cph}, the underlying survival probabilities from \code{object$surv.summary} will be placed in a table unless \code{surv=FALSE}. } \item{maxt}{ if the maximum follow-up time in the data (\code{object$maxtime}) exceeds the last entry in \code{object$surv.summary}, underlying survival estimates at \code{object$maxtime} will be added to the table if \code{maxt=TRUE}. } \item{which,varnames,columns,inline,before,dec,pretrans}{see \code{\link[Hmisc]{latex.default}}} \item{after}{if not an empty string, added to end of markup if \code{inline=TRUE}} \item{caption}{a character string specifying a title for the equation to be centered and typeset in bold face. Default is no title. } \item{digits}{see \link{latexrms}} \item{size}{a LaTeX size to use, without the slash. Default is the prevailing size} \item{intercepts}{for \code{orm} fits. Default is to print intercepts if they are fewer than 10 in number. Set to \code{TRUE} or \code{FALSE} to force.} \item{\dots}{ignored} } \value{ the name of the created file, with class \code{c("latex","file")}. This object works with latex viewing and printing commands in Hmisc. } \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{latexrms}}, \code{\link[Hmisc]{rcspline.restate}}, \code{\link[Hmisc]{latex}} } \examples{ \dontrun{ units(ftime) <- "Day" f <- cph(Surv(ftime, death) ~ rcs(age)+sex, surv=TRUE, time.inc=60) w <- latex(f, file='f.tex') #Interprets fitted model and makes table of S0(t) #for t=0,60,120,180,... w #displays image, if viewer installed and file given above latex(f) # send LaTeX code to the console for knitr options(prType='html') latex(f) # for use with knitr and R Markdown using MathJax } } \keyword{regression} \keyword{character} \keyword{survival} \keyword{interface} \keyword{models} rms/man/ggplot.Predict.Rd0000644000176200001440000005205413714237251015015 0ustar liggesusers\name{ggplot.Predict} \alias{ggplot.Predict} \title{Plot Effects of Variables Estimated by a Regression Model Fit Using ggplot2} \description{ Uses \code{ggplot2} graphics to plot the effect of one or two predictors on the linear predictor or X beta scale, or on some transformation of that scale. The first argument specifies the result of the \code{Predict} function. The predictor is always plotted in its original coding. If \code{rdata} is given, a spike histogram is drawn showing the location/density of data values for the \eqn{x}-axis variable. If there is a \code{groups} (superposition) variable that generated separate curves, the data density specific to each class of points is shown. This assumes that the second variable was a factor variable. The histograms are drawn by \code{histSpikeg}. To plot effects instead of estimates (e.g., treatment differences as a function of interacting factors) see \code{contrast.rms} and \code{summary.rms}. } \usage{ \method{ggplot}{Predict}(data, mapping, formula=NULL, groups=NULL, aestype=c('color', 'linetype'), conf=c('fill', 'lines'), conflinetype=1, varypred=FALSE, sepdiscrete=c('no', 'list', 'vertical', 'horizontal'), subset, xlim., ylim., xlab, ylab, colorscale=function(...) scale_color_manual(..., values=c("#000000", "#E69F00", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7")), colfill='black', rdata=NULL, anova=NULL, pval=FALSE, size.anova=4, adj.subtitle, size.adj=2.5, perim=NULL, nlevels=3, flipxdiscrete=TRUE, legend.position='right', legend.label=NULL, vnames=c('labels','names'), abbrev=FALSE, minlength=6, layout=NULL, addlayer, histSpike.opts=list(frac=function(f) 0.01 + 0.02 * sqrt(f - 1)/sqrt(max(f, 2) - 1), side=1, nint=100), type=NULL, ggexpr=FALSE, height=NULL, width=NULL, ..., environment) } \arguments{ \item{data}{a data frame created by \code{Predict}} \item{mapping}{kept because of \code{ggplot} generic setup. If specified it will be assumed to be \code{formula}.} \item{formula}{ a \code{ggplot} faceting formula of the form \code{vertical variables ~ horizontal variables}, with variables separated by \code{*} if there is more than one variable on a side. If omitted, the formula will be built using assumptions on the list of variables that varied in the \code{Predict} call. When plotting multiple panels (for separate predictors), \code{formula} may be specified but by default no formula is constructed. } \item{groups}{an optional character string containing the name of one of the variables in \code{data} that is to be used as a grouping (superpositioning) variable. Set \code{groups=FALSE} to suppress superpositioning. By default, the second varying variable is used for superpositioning groups. You can also specify a length 2 string vector of variable names specifying two dimensions of superpositioning, identified by different aesthetics corresponding to the \code{aestype} argument. When plotting effects of more than one predictor, \code{groups} is a character string that specifies a single variable name in \code{data} that can be used to form panels. Only applies if using \code{rbind} to combine several \code{Predict} results. If there is more than one \code{groups} variable, confidence bands are suppressed because \code{ggplot2:geom_ribbon} does not handle the aesthetics correctly.} \item{aestype}{a string vector of aesthetic names corresponding to variables in the \code{groups} vector. Default is to use, in order, \code{color}, and \code{linetype}. Other permissible values are \code{size}, \code{shape}.} \item{conf}{specify \code{conf="line"} to show confidence bands with lines instead of filled ribbons, the default} \item{conflinetype}{specify an alternative \code{linetype} for confidence intervals if \code{conf="line"}} \item{varypred}{set to \code{TRUE} if \code{data} is the result of passing multiple \code{Predict} results, that represent different predictors, to \code{rbind.Predict}. This will cause the \code{.set.} variable created by \code{rbind} to be copied to the \code{.predictor.} variable.} \item{sepdiscrete}{set to something other than \code{"no"} to create separate graphics for continuous and discrete predictors. For discrete predictors, horizontal dot charts are produced. This allows use of the \code{ggplot2} \code{facet_wrap} function to make better use of space. If \code{sepdiscrete="list"}, a list of two \code{grid} graphics objects is returned if both types of predictors are present (otherwise one object for the type that existed in the model). Set \code{sepdiscrete="vertical"} to put the two types of plots into one graphical object with continuous predictors on top and given a fraction of space relative to the number of continuous vs. number of discrete variables. Set \code{sepdiscrete="horizontal"} to get a horizontal arrangements with continuous variables on the left.} \item{subset}{a subsetting expression for restricting the rows of \code{data} that are used in plotting. For example, predictions may have been requested for males and females but one wants to plot only females.} \item{xlim.}{ This parameter is seldom used, as limits are usually controlled with \code{Predict}. Usually given as its legal abbreviation \code{xlim}. One reason to use \code{xlim} is to plot a \code{factor} variable on the x-axis that was created with the \code{cut2} function with the \code{levels.mean} option, with \code{val.lev=TRUE} specified to \code{plot.Predict}. In this case you may want the axis to have the range of the original variable values given to \code{cut2} rather than the range of the means within quantile groups. } \item{ylim.}{ Range for plotting on response variable axis. Computed by default. Usually specified using its legal definition \code{ylim}. } \item{xlab}{ Label for \code{x}-axis. Default is one given to \code{asis, rcs}, etc., which may have been the \code{"label"} attribute of the variable. } \item{ylab}{ Label for \code{y}-axis. If \code{fun} is not given, default is \code{"log Odds"} for \code{lrm}, \code{"log Relative Hazard"} for \code{cph}, name of the response variable for \code{ols}, \code{TRUE} or \code{log(TRUE)} for \code{psm}, or \code{"X * Beta"} otherwise. Specify \code{ylab=NULL} to omit \code{y}-axis labels. } \item{colorscale}{a \code{ggplot2} discrete scale function, e.g. \code{function(...) scale_color_brewer(..., palette='Set1', type='qual')}. The default is the colorblind-friendly palette including black in \url{http://www.cookbook-r.com/Graphs/Colors_(ggplot2)}. } \item{colfill}{a single character string or number specifying the fill color to use for \code{geom_ribbon} for shaded confidence bands. Alpha transparency of 0.2 is applied to any color specified.} \item{rdata}{a data frame containing the original raw data on which the regression model were based, or at least containing the \eqn{x}-axis and grouping variable. If \code{rdata} is present and contains the needed variables, the original data are added to the graph in the form of a spike histogram using \code{histSpikeg} in the Hmisc package. } \item{anova}{an object returned by \code{\link{anova.rms}}. If \code{anova} is specified, the overall test of association for predictor plotted is added as text to each panel, located at the spot at which the panel is most empty unless there is significant empty space at the top or bottom of the panel; these areas are given preference.} \item{pval}{specify \code{pval=TRUE} for \code{anova} to include not only the test statistic but also the P-value} \item{size.anova}{character size for the test statistic printed on the panel, mm} \item{adj.subtitle}{ Set to \code{FALSE} to suppress subtitling the graph with the list of settings of non-graphed adjustment values. Subtitles appear as captions with \code{ggplot2} using \code{labs(caption=)}. } \item{size.adj}{Size of adjustment settings in subtitles in mm. Default is 2.5.} \item{perim}{ \code{perim} specifies a function having two arguments. The first is the vector of values of the first variable that is about to be plotted on the x-axis. The second argument is the single value of the variable representing different curves, for the current curve being plotted. The function's returned value must be a logical vector whose length is the same as that of the first argument, with values \code{TRUE} if the corresponding point should be plotted for the current curve, \code{FALSE} otherwise. See one of the latter examples. \code{perim} only applies if predictors were specified to \code{Predict}. } \item{nlevels}{ when \code{groups} and \code{formula} are not specified, if any panel variable has \code{nlevels} or fewer values, that variable is converted to a \code{groups} (superpositioning) variable. Set \code{nlevels=0} to prevent this behavior. For other situations, a non-numeric x-axis variable with \code{nlevels} or fewer unique values will cause a horizontal dot plot to be drawn instead of an x-y plot unless \code{flipxdiscrete=FALSE}. } \item{flipxdiscrete}{see \code{nlevels}} \item{legend.position}{\code{"right"} (the default for single-panel plots), \code{"left"}, \code{"bottom"}, \code{"top"}, a two-element numeric vector, or \code{"none"} to suppress. For multi-panel plots the default is \code{"top"}, and a legend only appears for the first (top left) panel.} \item{legend.label}{if omitted, group variable labels will be used for label the legend. Specify \code{legend.label=FALSE} to suppress using a legend name, or a character string or expression to specify the label. Can be a vector is there is more than one grouping variable.} \item{vnames}{applies to the case where multiple plots are produced separately by predictor. Set to \code{'names'} to use variable names instead of labels for these small plots.} \item{abbrev}{set to true to abbreviate levels of predictors that are categorical to a minimum length of \code{minlength}} \item{minlength}{see \code{abbrev}} \item{layout}{for multi-panel plots a 2-vector specifying the number of rows and number of columns. If omitted will be computed from the number of panels to make as square as possible.} \item{addlayer}{a \code{ggplot2} expression consisting of one or more layers to add to the current plot} \item{histSpike.opts}{a list containing named elements that specifies parameters to \code{\link[Hmisc:scat1d]{histSpikeg}} when \code{rdata} is given. The \code{col} parameter is usually derived from other plotting information and not specified by the user.} \item{type}{a value (\code{"l","p","b"}) to override default choices related to showing or connecting points. Especially useful for discrete x coordinate variables.} \item{ggexpr}{set to \code{TRUE} to have the function return the character string(s) constructed to invoke \code{ggplot} without executing the commands} \item{height,width}{used if \code{plotly} is in effect, to specify the \code{plotly} image in pixels. Default is to let \code{plotly} size the image.} \item{\dots}{ignored} \item{environment}{ignored; used to satisfy rules because of the generic ggplot} } \value{an object of class \code{"ggplot2"} ready for printing. For the case where predictors were not specified to \code{Predict}, \code{sepdiscrete=TRUE}, and there were both continuous and discrete predictors in the model, a list of two graphics objects is returned.} \author{ Frank Harrell\cr Department of Biostatistics, Vanderbilt University\cr fh@fharrell.com } \references{ Fox J, Hong J (2009): Effect displays in R for multinomial and proportional-odds logit models: Extensions to the effects package. J Stat Software 32 No. 1. } \note{If plotting the effects of all predictors you can reorder the panels using for example \code{p <- Predict(fit); p$.predictor. <- factor(p$.predictor., v)} where \code{v} is a vector of predictor names specified in the desired order. } \seealso{ \code{\link{Predict}}, \code{\link{rbind.Predict}}, \code{\link{datadist}}, \code{\link{predictrms}}, \code{\link{anova.rms}}, \code{\link{contrast.rms}}, \code{\link{summary.rms}}, \code{\link{rms}}, \code{\link{rmsMisc}}, \code{\link{plot.Predict}}, \code{\link[Hmisc]{labcurve}}, \code{\link[Hmisc:scat1d]{histSpikeg}}, \code{\link[ggplot2]{ggplot}}, \code{\link[Hmisc]{Overview}} } \examples{ n <- 350 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) + .01 * (blood.pressure - 120) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) an <- anova(fit) # Plot effects in two vertical sub-panels with continuous predictors on top # ggplot(Predict(fit), sepdiscrete='vertical') # Plot effects of all 4 predictors with test statistics from anova, and P ggplot(Predict(fit), anova=an, pval=TRUE) # ggplot(Predict(fit), rdata=llist(blood.pressure, age)) # spike histogram plot for two of the predictors # p <- Predict(fit, name=c('age','cholesterol')) # Make 2 plots # ggplot(p) # p <- Predict(fit, age=seq(20,80,length=100), sex, conf.int=FALSE) # # Plot relationship between age and log # odds, separate curve for each sex, # ggplot(p, subset=sex=='female' | age > 30) # No confidence interval, suppress estimates for males <= 30 # p <- Predict(fit, age, sex) # ggplot(p, rdata=llist(age,sex)) # rdata= allows rug plots (1-dimensional scatterplots) # on each sex's curve, with sex- # specific density of age # If data were in data frame could have used that # p <- Predict(fit, age=seq(20,80,length=100), sex='male', fun=plogis) # works if datadist not used # ggplot(p, ylab=expression(hat(P))) # plot predicted probability in place of log odds # per <- function(x, y) x >= 30 # ggplot(p, perim=per) # suppress output for age < 30 but leave scale alone # Do ggplot2 faceting a few different ways p <- Predict(fit, age, sex, blood.pressure=c(120,140,160), cholesterol=c(180,200,215)) # ggplot(p) ggplot(p, cholesterol ~ blood.pressure) # ggplot(p, ~ cholesterol + blood.pressure) # color for sex, line type for blood.pressure: ggplot(p, groups=c('sex', 'blood.pressure')) # Add legend.position='top' to allow wider plot # Map blood.pressure to line thickness instead of line type: # ggplot(p, groups=c('sex', 'blood.pressure'), aestype=c('color', 'size')) # Plot the age effect as an odds ratio # comparing the age shown on the x-axis to age=30 years # ddist$limits$age[2] <- 30 # make 30 the reference value for age # Could also do: ddist$limits["Adjust to","age"] <- 30 # fit <- update(fit) # make new reference value take effect # p <- Predict(fit, age, ref.zero=TRUE, fun=exp) # ggplot(p, ylab='Age=x:Age=30 Odds Ratio', # addlayer=geom_hline(yintercept=1, col=gray(.8)) + # geom_vline(xintercept=30, col=gray(.8)) + # scale_y_continuous(trans='log', # breaks=c(.5, 1, 2, 4, 8)))) # Compute predictions for three predictors, with superpositioning or # conditioning on sex, combined into one graph p1 <- Predict(fit, age, sex) p2 <- Predict(fit, cholesterol, sex) p3 <- Predict(fit, blood.pressure, sex) p <- rbind(age=p1, cholesterol=p2, blood.pressure=p3) ggplot(p, groups='sex', varypred=TRUE, adj.subtitle=FALSE) # ggplot(p, groups='sex', varypred=TRUE, adj.subtitle=FALSE, sepdiscrete='vert') \dontrun{ # For males at the median blood pressure and cholesterol, plot 3 types # of confidence intervals for the probability on one plot, for varying age ages <- seq(20, 80, length=100) p1 <- Predict(fit, age=ages, sex='male', fun=plogis) # standard pointwise p2 <- Predict(fit, age=ages, sex='male', fun=plogis, conf.type='simultaneous') # simultaneous p3 <- Predict(fit, age=c(60,65,70), sex='male', fun=plogis, conf.type='simultaneous') # simultaneous 3 pts # The previous only adjusts for a multiplicity of 3 points instead of 100 f <- update(fit, x=TRUE, y=TRUE) g <- bootcov(f, B=500, coef.reps=TRUE) p4 <- Predict(g, age=ages, sex='male', fun=plogis) # bootstrap percentile p <- rbind(Pointwise=p1, 'Simultaneous 100 ages'=p2, 'Simultaneous 3 ages'=p3, 'Bootstrap nonparametric'=p4) # as.data.frame so will call built-in ggplot ggplot(as.data.frame(p), aes(x=age, y=yhat)) + geom_line() + geom_ribbon(data=p, aes(ymin=lower, ymax=upper), alpha=0.2, linetype=0)+ facet_wrap(~ .set., ncol=2) # Plots for a parametric survival model n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) t <- -log(runif(n))/h label(t) <- 'Follow-up Time' e <- ifelse(t<=cens,1,0) t <- pmin(t, cens) units(t) <- "Year" ddist <- datadist(age, sex) Srv <- Surv(t,e) # Fit log-normal survival model and plot median survival time vs. age f <- psm(Srv ~ rcs(age), dist='lognormal') med <- Quantile(f) # Creates function to compute quantiles # (median by default) p <- Predict(f, age, fun=function(x) med(lp=x)) ggplot(p, ylab="Median Survival Time") # Note: confidence intervals from this method are approximate since # they don't take into account estimation of scale parameter # Fit an ols model to log(y) and plot the relationship between x1 # and the predicted mean(y) on the original scale without assuming # normality of residuals; use the smearing estimator # See help file for rbind.Predict for a method of showing two # types of confidence intervals simultaneously. # Add raw data scatterplot to graph set.seed(1) x1 <- runif(300) x2 <- runif(300) ddist <- datadist(x1, x2); options(datadist='ddist') y <- exp(x1 + x2 - 1 + rnorm(300)) f <- ols(log(y) ~ pol(x1,2) + x2) r <- resid(f) smean <- function(yhat)smearingEst(yhat, exp, res, statistic='mean') formals(smean) <- list(yhat=numeric(0), res=r[! is.na(r)]) #smean$res <- r[! is.na(r)] # define default res argument to function ggplot(Predict(f, x1, fun=smean), ylab='Predicted Mean on y-scale', addlayer=geom_point(aes(x=x1, y=y), data.frame(x1, y))) # Had ggplot not added a subtitle (i.e., if x2 were not present), you # could have done ggplot(Predict(), ylab=...) + geom_point(...) } # Make an 'interaction plot', forcing the x-axis variable to be # plotted at integer values but labeled with category levels n <- 100 set.seed(1) gender <- c(rep('male', n), rep('female',n)) m <- sample(c('a','b'), 2*n, TRUE) d <- datadist(gender, m); options(datadist='d') anxiety <- runif(2*n) + .2*(gender=='female') + .4*(gender=='female' & m=='b') tapply(anxiety, llist(gender,m), mean) f <- ols(anxiety ~ gender*m) p <- Predict(f, gender, m) # ggplot(p) # horizontal dot chart; usually preferred for categorical predictors # ggplot(p, flipxdiscrete=FALSE) # back to vertical ggplot(p, groups='gender') ggplot(p, ~ m, groups=FALSE, flipxdiscrete=FALSE) options(datadist=NULL) \dontrun{ # Example in which separate curves are shown for 4 income values # For each curve the estimated percentage of voters voting for # the democratic party is plotted against the percent of voters # who graduated from college. Data are county-level percents. incomes <- seq(22900, 32800, length=4) # equally spaced to outer quintiles p <- Predict(f, college, income=incomes, conf.int=FALSE) ggplot(p, xlim=c(0,35), ylim=c(30,55)) # Erase end portions of each curve where there are fewer than 10 counties having # percent of college graduates to the left of the x-coordinate being plotted, # for the subset of counties having median family income with 1650 # of the target income for the curve show.pts <- function(college.pts, income.pt) { s <- abs(income - income.pt) < 1650 #assumes income known to top frame x <- college[s] x <- sort(x[!is.na(x)]) n <- length(x) low <- x[10]; high <- x[n-9] college.pts >= low & college.pts <= high } ggplot(p, xlim=c(0,35), ylim=c(30,55), perim=show.pts) # Rename variables for better plotting of a long list of predictors f <- ... p <- Predict(f) re <- c(trt='treatment', diabet='diabetes', sbp='systolic blood pressure') for(n in names(re)) { names(p)[names(p)==n] <- re[n] p$.predictor.[p$.predictor.==n] <- re[n] } ggplot(p) } } \keyword{models} \keyword{hplot} \keyword{htest} rms/man/rmsMisc.Rd0000644000176200001440000004316613745313140013545 0ustar liggesusers\name{rmsMisc} \alias{rmsMisc} \alias{calibrate.rms} \alias{DesignAssign} \alias{vcov.rms} \alias{vcov.cph} \alias{vcov.Glm} \alias{vcov.Gls} \alias{vcov.lrm} \alias{vcov.ols} \alias{vcov.orm} \alias{vcov.psm} \alias{oos.loglik} \alias{oos.loglik.ols} \alias{oos.loglik.lrm} \alias{oos.loglik.cph} \alias{oos.loglik.psm} \alias{oos.loglik.Glm} \alias{Getlim} \alias{Getlimi} \alias{related.predictors} \alias{interactions.containing} \alias{combineRelatedPredictors} \alias{param.order} \alias{Penalty.matrix} \alias{Penalty.setup} \alias{logLik.Gls} \alias{logLik.ols} \alias{logLik.rms} \alias{AIC.rms} \alias{nobs.rms} \alias{lrtest} \alias{univarLR} \alias{Newlabels} \alias{Newlevels} \alias{Newlabels.rms} \alias{Newlevels.rms} \alias{rmsArgs} \alias{print.rms} \alias{print.lrtest} \alias{survest.rms} \alias{prModFit} \alias{prStats} \alias{reListclean} \alias{formatNP} \alias{latex.naprint.delete} \alias{html.naprint.delete} \alias{removeFormulaTerms} \title{Miscellaneous Design Attributes and Utility Functions} \description{ These functions are used internally to \code{anova.rms}, \code{fastbw}, etc., to retrieve various attributes of a design. These functions allow some fitting functions not in the \code{rms} series (e.g,, \code{lm}, \code{glm}) to be used with \code{rms.Design}, \code{fastbw}, and similar functions. For \code{vcov}, there are several functions. The method for \code{orm} fits is a bit different because the covariance matrix stored in the fit object only deals with the middle intercept. See the \code{intercepts} argument for more options. There is a method for \code{lrm} that also allows non-default intercept(s) to be selected (default is first). The \code{oos.loglik} function for each type of model implemented computes the -2 log likelihood for out-of-sample data (i.e., data not necessarily used to fit the model) evaluated at the parameter estimates from a model fit. Vectors for the model's linear predictors and response variable must be given. \code{oos.loglik} is used primarily by \code{bootcov}. The \code{Getlim} function retrieves distribution summaries from the fit or from a \code{datadist} object. It handles getting summaries from both sources to fill in characteristics for variables that were not defined during the model fit. \code{Getlimi} returns the summary for an individual model variable. \code{Mean} is a generic function that creates an R function that calculates the expected value of the response variable given a fit from \code{rms} or \code{rmsb}. The \code{related.predictors} function returns a list containing variable numbers that are directly or indirectly related to each predictor. The \code{interactions.containing} function returns indexes of interaction effects containing a given predictor. The \code{param.order} function returns a vector of logical indicators for whether parameters are associated with certain types of effects (nonlinear, interaction, nonlinear interaction). \code{combineRelatedPredictors} creates of list of inter-connected main effects and interations for use with \code{predictrms} with \code{type='ccterms'} (useful for \code{gIndex}). The \code{Penalty.matrix} function builds a default penalty matrix for non-intercept term(s) for use in penalized maximum likelihood estimation. The \code{Penalty.setup} function takes a constant or list describing penalty factors for each type of term in the model and generates the proper vector of penalty multipliers for the current model. \code{logLik.rms} returns the maximized log likelihood for the model, whereas \code{AIC.rms} returns the AIC. The latter function has an optional argument for computing AIC on a "chi-square" scale (model likelihood ratio chi-square minus twice the regression degrees of freedom. \code{logLik.ols} handles the case for \code{ols}, just by invoking \code{logLik.lm} in the \code{stats} package. \code{logLik.Gls} is also defined. \code{nobs.rms} returns the number of observations used in the fit. The \code{lrtest} function does likelihood ratio tests for two nested models, from fits that have \code{stats} components with \code{"Model L.R."} values. For models such as \code{psm, survreg, ols, lm} which have scale parameters, it is assumed that scale parameter for the smaller model is fixed at the estimate from the larger model (see the example). \code{univarLR} takes a multivariable model fit object from \code{rms} and re-fits a sequence of models containing one predictor at a time. It prints a table of likelihood ratio \eqn{chi^2} statistics from these fits. The \code{Newlabels} function is used to override the variable labels in a fit object. Likewise, \code{Newlevels} can be used to create a new fit object with levels of categorical predictors changed. These two functions are especially useful when constructing nomograms. \code{rmsArgs} handles \dots arguments to functions such as \code{Predict}, \code{summary.rms}, \code{nomogram} so that variables to vary may be specified without values (after an equals sign). \code{prModFit} is the workhorse for the \code{print} methods for highest-level \code{rms} model fitting functions, handling both regular, html, and LaTeX printing, the latter two resulting in html or LaTeX code written to the console, automatically ready for \code{knitr}. The work of printing summary statistics is done by \code{prStats}, which uses the Hmisc \code{print.char.matrix} function to print overall model statistics if \code{options(prType=)} was not set to \code{"latex"} or \code{"html"}. Otherwise it generates customized LaTeX or html code. The LaTeX longtable and epic packages must be in effect to use LaTeX. \code{reListclean} allows one to rename a subset of a named list, ignoring the previous names and not concatenating them as \R does. It also removes \code{NULL} elements and (by default) elements that are \code{NA}, as when an optional named element is fetched that doesn't exist. \code{formatNP} is a function to format a vector of numerics. If \code{digits} is specified, \code{formatNP} will make sure that the formatted representation has \code{digits} positions to the right of the decimal place. If \code{lang="latex"} it will translate any scientific notation to LaTeX math form. If \code{lang="html"} will convert to html. If \code{pvalue=TRUE}, it will replace formatted values with "< 0.0001" (if \code{digits=4}). \code{latex.naprint.delete} will, if appropriate, use LaTeX to draw a dot chart of frequency of variable \code{NA}s related to model fits. \code{html.naprint.delete} does the same thing in the RStudio R markdown context, using \code{Hmisc:dotchartp} (which uses \code{plotly}) for drawing any needed dot chart. \code{removeFormulaTerms} removes one or more terms from a model formula, using strictly character manipulation. This handles problems such as \code{[.terms} removing \code{offset()} if you subset on anything. The function can also be used to remove the dependent variable(s) from the formula. } \usage{ \method{vcov}{rms}(object, regcoef.only=TRUE, intercepts='all', \dots) \method{vcov}{cph}(object, regcoef.only=TRUE, \dots) \method{vcov}{Glm}(object, regcoef.only=TRUE, intercepts='all', \dots) \method{vcov}{Gls}(object, intercepts='all', \dots) \method{vcov}{lrm}(object, regcoef.only=TRUE, intercepts='all', \dots) \method{vcov}{ols}(object, regcoef.only=TRUE, \dots) \method{vcov}{orm}(object, regcoef.only=TRUE, intercepts='mid', \dots) \method{vcov}{psm}(object, regcoef.only=TRUE, \dots) # Given Design attributes and number of intercepts creates R # format assign list. atr non.slopes Terms DesignAssign(atr, non.slopes, Terms) oos.loglik(fit, \dots) \method{oos.loglik}{ols}(fit, lp, y, \dots) \method{oos.loglik}{lrm}(fit, lp, y, \dots) \method{oos.loglik}{cph}(fit, lp, y, \dots) \method{oos.loglik}{psm}(fit, lp, y, \dots) \method{oos.loglik}{Glm}(fit, lp, y, \dots) Getlim(at, allow.null=FALSE, need.all=TRUE) Getlimi(name, Limval, need.all=TRUE) related.predictors(at, type=c("all","direct")) interactions.containing(at, pred) combineRelatedPredictors(at) param.order(at, term.order) Penalty.matrix(at, X) Penalty.setup(at, penalty) \method{logLik}{Gls}(object, \dots) \method{logLik}{ols}(object, \dots) \method{logLik}{rms}(object, \dots) \method{AIC}{rms}(object, \dots, k=2, type=c('loglik', 'chisq')) \method{nobs}{rms}(object, \dots) lrtest(fit1, fit2) \method{print}{lrtest}(x, \dots) univarLR(fit) Newlabels(fit, \dots) Newlevels(fit, \dots) \method{Newlabels}{rms}(fit, labels, \dots) \method{Newlevels}{rms}(fit, levels, \dots) prModFit(x, title, w, digits=4, coefs=TRUE, footer=NULL, lines.page=40, long=TRUE, needspace, subtitle=NULL, \dots) prStats(labels, w, lang=c("plain", "latex", "html")) reListclean(\dots, na.rm=TRUE) formatNP(x, digits=NULL, pvalue=FALSE, lang=c("plain", "latex", "html")) \method{latex}{naprint.delete}(object, file="", append=TRUE, \dots) \method{html}{naprint.delete}(object, \dots) removeFormulaTerms(form, which=NULL, delete.response=FALSE) } \arguments{ \item{fit}{result of a fitting function} \item{object}{result of a fitting function} \item{regcoef.only}{For fits such as parametric survival models which have a final row and column of the covariance matrix for a non-regression parameter such as a log(scale) parameter, setting \code{regcoef.only=TRUE} causes only the first \code{p} rows and columns of the covariance matrix to be returned, where \code{p} is the length of \code{object$coef}. } \item{intercepts}{set to \code{"none"} to omit any rows and columns related to intercepts. Set to an integer scalar or vector to include particular intercept elements. Set to \code{'all'} to include all intercepts, or for \code{orm} to \code{"mid"} to use the default for \code{orm}. The default is to use the first for \code{lrm} and the median intercept for \code{orm}. } \item{at}{ \code{Design} element of a fit } \item{pred}{ index of a predictor variable (main effect) } \item{fit1}{ } \item{fit2}{ fit objects from \code{lrm,ols,psm,cph} etc. It doesn't matter which fit object is the sub-model. } \item{lp}{ linear predictor vector for \code{oos.loglik}. For proportional odds ordinal logistic models, this should have used the first intercept only. If \code{lp} and \code{y} are omitted, the -2 log likelihood for the original fit are returned. } \item{y}{ values of a new vector of responses passed to \code{oos.loglik}. } \item{name}{ the name of a variable in the model } \item{Limval}{ an object returned by \code{Getlim} } \item{allow.null}{ prevents \code{Getlim} from issuing an error message if no limits are found in the fit or in the object pointed to by \code{options(datadist=)} } \item{need.all}{ set to \code{FALSE} to prevent \code{Getlim} or \code{Getlimi} from issuing an error message if data for a variable are not found } \item{type}{ For \code{related.predictors}, set to \code{"direct"} to return lists of indexes of directly related factors only (those in interactions with the predictor). For \code{AIC.rms}, \code{type} specifies the basis on which to return AIC. The default is minus twice the maximized log likelihood plus \code{k} times the degrees of freedom counting intercept(s). Specify \code{type='chisq'} to get a penalized model likelihood ratio chi-square instead. } \item{term.order}{ 1 for all parameters, 2 for all parameters associated with either nonlinear or interaction effects, 3 for nonlinear effects (main or interaction), 4 for interaction effects, 5 for nonlinear interaction effects. } \item{X}{ a design matrix, not including columns for intercepts } \item{penalty}{ a vector or list specifying penalty multipliers for types of model terms } \item{k}{the multiplier of the degrees of freedom to be used in computing AIC. The default is 2.} \item{x}{a result of \code{lrtest}, or the result of a high-level model fitting function (for \code{prModFit})} \item{labels}{ a character vector specifying new labels for variables in a fit. To give new labels for all variables, you can specify \code{labels} of the form \code{labels=c("Age in Years","Cholesterol")}, where the list of new labels is assumed to be the length of all main effect-type variables in the fit and in their original order in the model formula. You may specify a named vector to give new labels in random order or for a subset of the variables, e.g., \code{labels=c(age="Age in Years",chol="Cholesterol")}. For \code{prStats}, is a list with major column headings, which can themselves be vectors that are then stacked vertically. } \item{levels}{ a list of named vectors specifying new level labels for categorical predictors. This will override \code{parms} as well as \code{datadist} information (if available) that were stored with the fit. } \item{title}{a single character string used to specify an overall title for the regression fit, which is printed first by \code{prModFit}. Set to \code{""} to suppress the title.} \item{w}{For \code{prModFit}, a special list of lists, which each list element specifying information about a block of information to include in the \code{print.} output for a fit. For \code{prStats}, \code{w} is a list of statistics to print, elements of which can be vectors that are stacked vertically. Unnamed elements specify number of digits to the right of the decimal place to which to round (\code{NA} means use \code{format} without rounding, as with integers and floating point values). Negative values of \code{digits} indicate that the value is a P-value to be formatted with \code{formatNP}. Digits are recycled as needed. } \item{digits}{number of digits to the right of the decimal point, for formatting numeric values in printed output} \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{footer}{a character string to appear at the bottom of the regression model output} \item{file}{name of file to which to write model output} \item{append}{specify \code{append=FALSE} when using \code{file} and you want to start over instead of adding to an existing file.} \item{lang}{specifies the typesetting language: plain text, LaTeX, or html} \item{lines.page}{see \code{\link[Hmisc]{latex}}} \item{long}{set to \code{FALSE} to suppress printing of formula and certain other model output} \item{needspace}{optional character string to insert inside a LaTeX needspace macro call before the statistics table and before the coefficient matrix, to avoid bad page splits. This assumes the LaTeX needspace style is available. Example: \code{needspace='6\\baselineskip'} or \code{needspace='1.5in'}.} \item{subtitle}{optional vector of character strings containing subtitles that will appear under \code{title} but not bolded} \item{na.rm}{set to \code{FALSE} to keep \code{NA}s in the vector created by \code{reListclean}} \item{pvalue}{set to \code{TRUE} if you want values below 10 to the minus \code{digits} to be formatted to be less than that value} \item{form}{a formula object} \item{which}{a vector of one or more character strings specifying the names of functions that are called from a formula, e.g., \code{"cluster"}. By default no right-hand-side terms are removed.} \item{delete.response}{set to \code{TRUE} to remove the dependent variable(s) from the formula} \item{atr, non.slopes, Terms}{\code{Design} function attributes, number of intercepts, and \code{terms} object} \item{\dots}{other arguments. For \code{reListclean} this contains the elements being extracted. For \code{prModFit} this information is passed to the \code{Hmisc latexTabular} function when a block of output is a vector to be formatted in LaTeX.} } \value{ \code{vcov} returns a variance-covariance matrix \code{oos.loglik} returns a scalar -2 log likelihood value. \code{Getlim} returns a list with components \code{limits} and \code{values}, either stored in \code{fit} or retrieved from the object created by \code{datadist} and pointed to in \code{options(datadist=)}. \code{related.predictors} and \code{combineRelatedPredictors} return a list of vectors, and \code{interactions.containing} returns a vector. \code{param.order} returns a logical vector corresponding to non-strata terms in the model. \code{Penalty.matrix} returns a symmetric matrix with dimension equal to the number of slopes in the model. For all but categorical predictor main effect elements, the matrix is diagonal with values equal to the variances of the columns of \code{X}. For segments corresponding to \code{c-1} dummy variables for \code{c}-category predictors, puts a \code{c-1} x \code{c-1} sub-matrix in \code{Penalty.matrix} that is constructed so that a quadratic form with \code{Penalty.matrix} in the middle computes the sum of squared differences in parameter values about the mean, including a portion for the reference cell in which the parameter is by definition zero. \code{Newlabels} returns a new fit object with the labels adjusted. \code{reListclean} returns a vector of named (by its arguments) elements. \code{formatNP} returns a character vector. \code{removeFormulaTerms} returns a formula object. } \seealso{ \code{\link{rms}}, \code{\link{fastbw}}, \code{\link{anova.rms}}, \code{\link{summary.lm}}, \code{\link{summary.glm}}, \code{\link{datadist}}, \code{\link{vif}}, \code{\link{bootcov}}, \code{\link[Hmisc]{latex}}, \code{\link[Hmisc]{latexTabular}}, \code{\link[Hmisc:latex]{latexSN}}, \code{\link[Hmisc]{print.char.matrix}} } \examples{ \dontrun{ f <- psm(S ~ x1 + x2 + sex + race, dist='gau') g <- psm(S ~ x1 + sex + race, dist='gau', fixed=list(scale=exp(f$parms))) lrtest(f, g) g <- Newlabels(f, c(x2='Label for x2')) g <- Newlevels(g, list(sex=c('Male','Female'),race=c('B','W'))) nomogram(g) } } \keyword{models} \keyword{methods} rms/man/Rq.Rd0000644000176200001440000000757013667447632012532 0ustar liggesusers\name{Rq} \Rdversion{1.1} \alias{Rq} \alias{RqFit} \alias{print.Rq} \alias{latex.Rq} \alias{predict.Rq} \title{rms Package Interface to quantreg Package} \description{ The \code{Rq} function is the \code{rms} front-end to the \code{quantreg} package's \code{rq} function. \code{print} and \code{latex} methods are also provided, and a fitting function \code{RqFit} is defined for use in bootstrapping, etc. Its result is a function definition. For the \code{print} method, format of output is controlled by the user previously running \code{options(prType="lang")} where \code{lang} is \code{"plain"} (the default), \code{"latex"}, or \code{"html"}. For the \code{latex} method, \code{html} will actually be used of \code{options(prType='html')}. } \usage{ Rq(formula, tau = 0.5, data=environment(formula), subset, weights, na.action=na.delete, method = "br", model = FALSE, contrasts = NULL, se = "nid", hs = TRUE, x = FALSE, y = FALSE, ...) \method{print}{Rq}(x, digits=4, coefs=TRUE, title, \dots) \method{latex}{Rq}(object, file = paste(first.word(deparse(substitute(object))), ".tex", sep = ""), append=FALSE, which, varnames, columns=65, inline=FALSE, caption=NULL, ...) \method{predict}{Rq}(object, \dots, kint=1, se.fit=FALSE) RqFit(fit, wallow=TRUE, passdots=FALSE) } \arguments{ \item{formula}{model formula} \item{tau}{ the single quantile to estimate. Unlike \code{rq} you cannot estimate more than one quantile at one model fitting. } \item{data,subset,weights,na.action,method,model,contrasts,se,hs}{see \code{\link[quantreg]{rq}}} \item{x}{set to \code{TRUE} to store the design matrix with the fit. For \code{print} is an \code{Rq} object.} \item{y}{set to \code{TRUE} to store the response vector with the fit} \item{\dots}{ other arguments passed to one of the \code{rq} fitting routines. For \code{latex.Rq} these are optional arguments passed to \code{latexrms}. Ignored for \code{print.Rq}. For \code{predict.Rq} this is usually just a \code{newdata} argument. } \item{digits}{ number of significant digits used in formatting results in \code{print.Rq}. } \item{coefs}{specify \code{coefs=FALSE} to suppress printing the table of model coefficients, standard errors, etc. Specify \code{coefs=n} to print only the first \code{n} regression coefficients in the model.} \item{title}{a character string title to be passed to \code{prModFit}} \item{object}{an object created by \code{Rq}} \item{file,append,which,varnames,columns,inline,caption}{see \code{\link{latexrms}}} \item{kint}{ignored} \item{se.fit}{set to \code{TRUE} to obtain standard errors of predicted quantiles} \item{fit}{an object created by \code{Rq}} \item{wallow}{ set to \code{TRUE} if \code{weights} are allowed in the current context. } \item{passdots}{ set to \code{TRUE} if \dots may be passed to the fitter} } \value{ \code{Rq} returns a list of class \code{"rms", "lassorq"} or \code{"scadrq", "Rq"}, and \code{"rq"}. \code{RqFit} returns a function definition. \code{latex.Rq} returns an object of class \code{"latex"}. } \author{ Frank Harrell } \note{ The author and developer of methodology in the \code{quantreg} package is Roger Koenker. } \seealso{ \code{\link[quantreg]{rq}}, \code{\link{prModFit}}, \code{\link{orm}} } \examples{ \dontrun{ set.seed(1) n <- 100 x1 <- rnorm(n) y <- exp(x1 + rnorm(n)/4) dd <- datadist(x1); options(datadist='dd') fq2 <- Rq(y ~ pol(x1,2)) anova(fq2) fq3 <- Rq(y ~ pol(x1,2), tau=.75) anova(fq3) pq2 <- Predict(fq2, x1) pq3 <- Predict(fq3, x1) p <- rbind(Median=pq2, Q3=pq3) plot(p, ~ x1 | .set.) # For superpositioning, with true curves superimposed a <- function(x, y, ...) { x <- unique(x) col <- trellis.par.get('superpose.line')$col llines(x, exp(x), col=col[1], lty=2) llines(x, exp(x + qnorm(.75)/4), col=col[2], lty=2) } plot(p, addpanel=a) } } \keyword{models} \keyword{nonparametric} rms/man/calibrate.Rd0000644000176200001440000002500313714237251014050 0ustar liggesusers\name{calibrate} \alias{calibrate} \alias{calibrate.default} \alias{calibrate.cph} \alias{calibrate.psm} \alias{print.calibrate} \alias{print.calibrate.default} \alias{plot.calibrate} \alias{plot.calibrate.default} \title{ Resampling Model Calibration } \description{ Uses bootstrapping or cross-validation to get bias-corrected (overfitting- corrected) estimates of predicted vs. observed values based on subsetting predictions into intervals (for survival models) or on nonparametric smoothers (for other models). There are calibration functions for Cox (\code{cph}), parametric survival models (\code{psm}), binary and ordinal logistic models (\code{lrm}) and ordinary least squares (\code{ols}). For survival models, "predicted" means predicted survival probability at a single time point, and "observed" refers to the corresponding Kaplan-Meier survival estimate, stratifying on intervals of predicted survival, or, if the \code{polspline} package is installed, the predicted survival probability as a function of transformed predicted survival probability using the flexible hazard regression approach (see the \code{val.surv} function for details). For logistic and linear models, a nonparametric calibration curve is estimated over a sequence of predicted values. The fit must have specified \code{x=TRUE, y=TRUE}. The \code{print} and \code{plot} methods for \code{lrm} and \code{ols} models (which use \code{calibrate.default}) print the mean absolute error in predictions, the mean squared error, and the 0.9 quantile of the absolute error. Here, error refers to the difference between the predicted values and the corresponding bias-corrected calibrated values. Below, the second, third, and fourth invocations of \code{calibrate} are, respectively, for \code{ols} and \code{lrm}, \code{cph}, and \code{psm}. The first and second \code{plot} invocation are respectively for \code{lrm} and \code{ols} fits or all other fits. } \usage{ calibrate(fit, \dots) \method{calibrate}{default}(fit, predy, method=c("boot","crossvalidation",".632","randomization"), B=40, bw=FALSE, rule=c("aic","p"), type=c("residual","individual"), sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, kint, smoother="lowess", digits=NULL, \dots) \method{calibrate}{cph}(fit, cmethod=c('hare', 'KM'), method="boot", u, m=150, pred, cuts, B=40, bw=FALSE, rule="aic", type="residual", sls=0.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, what="observed-predicted", tol=1e-12, maxdim=5, \dots) \method{calibrate}{psm}(fit, cmethod=c('hare', 'KM'), method="boot", u, m=150, pred, cuts, B=40, bw=FALSE,rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, what="observed-predicted", tol=1e-12, maxiter=15, rel.tolerance=1e-5, maxdim=5, \dots) \method{print}{calibrate}(x, B=Inf, \dots) \method{print}{calibrate.default}(x, B=Inf, \dots) \method{plot}{calibrate}(x, xlab, ylab, subtitles=TRUE, conf.int=TRUE, cex.subtitles=.75, riskdist=TRUE, add=FALSE, scat1d.opts=list(nhistSpike=200), par.corrected=NULL, \dots) \method{plot}{calibrate.default}(x, xlab, ylab, xlim, ylim, legend=TRUE, subtitles=TRUE, cex.subtitles=.75, riskdist=TRUE, scat1d.opts=list(nhistSpike=200), \dots) } \arguments{ \item{fit}{ a fit from \code{ols}, \code{lrm}, \code{cph} or \code{psm} } \item{x}{an object created by \code{calibrate}} \item{method, B, bw, rule, type, sls, aics, force, estimates}{see \code{\link{validate}}. For \code{print.calibrate}, \code{B} is an upper limit on the number of resamples for which information is printed about which variables were selected in each model re-fit. Specify zero to suppress printing. Default is to print all re-samples. } \item{cmethod}{method for validating survival predictions using right-censored data. The default is \code{cmethod='hare'} to use the \code{hare} function in the \code{polspline} package. Specify \code{cmethod='KM'} to use less precision stratified Kaplan-Meier estimates. If the \code{polspline} package is not available, the procedure reverts to \code{cmethod='KM'}. } \item{u}{ the time point for which to validate predictions for survival models. For \code{cph} fits, you must have specified \code{surv=TRUE, time.inc=u}, where \code{u} is the constant specifying the time to predict. } \item{m}{ group predicted \code{u}-time units survival into intervals containing \code{m} subjects on the average (for survival models only) } \item{pred}{ vector of predicted survival probabilities at which to evaluate the calibration curve. By default, the low and high prediction values from \code{datadist} are used, which for large sample size is the 10th smallest to the 10th largest predicted probability.} \item{cuts}{ actual cut points for predicted survival probabilities. You may specify only one of \code{m} and \code{cuts} (for survival models only) } \item{pr}{ set to \code{TRUE} to print intermediate results for each re-sample } \item{what}{ The default is \code{"observed-predicted"}, meaning to estimate optimism in this difference. This is preferred as it accounts for skewed distributions of predicted probabilities in outer intervals. You can also specify \code{"observed"}. This argument applies to survival models only. } \item{tol}{criterion for matrix singularity (default is \code{1e-12})} \item{maxdim}{see \code{\link[polspline]{hare}}} \item{maxiter}{for \code{psm}, this is passed to \code{\link[survival]{survreg.control}} (default is 15 iterations) } \item{rel.tolerance}{parameter passed to \code{\link[survival]{survreg.control}} for \code{psm} (default is 1e-5). } \item{predy}{ a scalar or vector of predicted values to calibrate (for \code{lrm}, \code{ols}). Default is 50 equally spaced points between the 5th smallest and the 5th largest predicted values. For \code{lrm} the predicted values are probabilities (see \code{kint}). } \item{kint}{ For an ordinal logistic model the default predicted probability that \eqn{Y\geq} the middle level. Specify \code{kint} to specify the intercept to use, e.g., \code{kint=2} means to calibrate \eqn{Prob(Y\geq b)}, where \eqn{b} is the second level of \eqn{Y}. } \item{smoother}{ a function in two variables which produces \eqn{x}- and \eqn{y}-coordinates by smoothing the input \code{y}. The default is to use \code{lowess(x, y, iter=0)}. } \item{digits}{If specified, predicted values are rounded to \code{digits} digits before passing to the smoother. Occasionally, large predicted values on the logit scale will lead to predicted probabilities very near 1 that should be treated as 1, and the \code{round} function will fix that. Applies to \code{calibrate.default}.} \item{\dots}{ other arguments to pass to \code{predab.resample}, such as \code{group}, \code{cluster}, and \code{subset}. Also, other arguments for \code{plot}. } \item{xlab}{ defaults to "Predicted x-units Survival" or to a suitable label for other models } \item{ylab}{ defaults to "Fraction Surviving x-units" or to a suitable label for other models } \item{xlim,ylim}{2-vectors specifying x- and y-axis limits, if not using defaults} \item{subtitles}{ set to \code{FALSE} to suppress subtitles in plot describing method and for \code{lrm} and \code{ols} the mean absolute error and original sample size } \item{conf.int}{ set to \code{FALSE} to suppress plotting 0.95 confidence intervals for Kaplan-Meier estimates } \item{cex.subtitles}{character size for plotting subtitles} \item{riskdist}{set to \code{FALSE} to suppress the distribution of predicted risks (survival probabilities) from being plotted} \item{add}{set to \code{TRUE} to add the calibration plot to an existing plot} \item{scat1d.opts}{a list specifying options to send to \code{scat1d} if \code{riskdist=TRUE}. See \code{\link[Hmisc]{scat1d}}.} \item{par.corrected}{a list specifying graphics parameters \code{col}, \code{lty}, \code{lwd}, \code{pch} to be used in drawing overfitting-corrected estimates. Default is \code{col="blue"}, \code{lty=1}, \code{lwd=1}, \code{pch=4}.} \item{legend}{ set to \code{FALSE} to suppress legends (for \code{lrm}, \code{ols} only) on the calibration plot, or specify a list with elements \code{x} and \code{y} containing the coordinates of the upper left corner of the legend. By default, a legend will be drawn in the lower right 1/16th of the plot. } } \value{ matrix specifying mean predicted survival in each interval, the corresponding estimated bias-corrected Kaplan-Meier estimates, number of subjects, and other statistics. For linear and logistic models, the matrix instead has rows corresponding to the prediction points, and the vector of predicted values being validated is returned as an attribute. The returned object has class \code{"calibrate"} or \code{"calibrate.default"}. \code{plot.calibrate.default} invisibly returns the vector of estimated prediction errors corresponding to the dataset used to fit the model. } \section{Side Effects}{ prints, and stores an object \code{pred.obs} or \code{.orig.cal} } \details{ If the fit was created using penalized maximum likelihood estimation, the same \code{penalty} and \code{penalty.scale} parameters are used during validation. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \seealso{ \code{\link{validate}}, \code{\link{predab.resample}}, \code{\link{groupkm}}, \code{\link[Hmisc]{errbar}}, \code{\link[Hmisc]{scat1d}}, \code{\link{cph}}, \code{\link{psm}}, \code{\link{lowess}} } \examples{ set.seed(1) n <- 200 d.time <- rexp(n) x1 <- runif(n) x2 <- factor(sample(c('a', 'b', 'c'), n, TRUE)) f <- cph(Surv(d.time) ~ pol(x1,2) * x2, x=TRUE, y=TRUE, surv=TRUE, time.inc=1.5) #or f <- psm(S ~ \dots) pa <- 'polspline' \%in\% row.names(installed.packages()) if(pa) { cal <- calibrate(f, u=1.5, B=20) # cmethod='hare' plot(cal) } cal <- calibrate(f, u=1.5, cmethod='KM', m=50, B=20) # usually B=200 or 300 plot(cal, add=pa) set.seed(1) y <- sample(0:2, n, TRUE) x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) x4 <- runif(n) f <- lrm(y ~ x1 + x2 + x3 * x4, x=TRUE, y=TRUE) cal <- calibrate(f, kint=2, predy=seq(.2, .8, length=60), group=y) # group= does k-sample validation: make resamples have same # numbers of subjects in each level of y as original sample plot(cal) #See the example for the validate function for a method of validating #continuation ratio ordinal logistic models. You can do the same #thing for calibrate } \keyword{methods} \keyword{models} \keyword{regression} \keyword{survival} \keyword{hplot} \concept{bootstrap} \concept{model validation} \concept{calibration} \concept{model reliability} \concept{predictive accuracy} rms/man/robcov.Rd0000644000176200001440000001351213714237251013416 0ustar liggesusers\name{robcov} \alias{robcov} \title{Robust Covariance Matrix Estimates} \description{ Uses the Huber-White method to adjust the variance-covariance matrix of a fit from maximum likelihood or least squares, to correct for heteroscedasticity and for correlated responses from cluster samples. The method uses the ordinary estimates of regression coefficients and other parameters of the model, but involves correcting the covariance matrix for model misspecification and sampling design. Models currently implemented are models that have a \code{residuals(fit,type="score")} function implemented, such as \code{lrm}, \code{cph}, \code{coxph}, and ordinary linear models (\code{ols}). The fit must have specified the \code{x=TRUE} and \code{y=TRUE} options for certain models. Observations in different clusters are assumed to be independent. For the special case where every cluster contains one observation, the corrected covariance matrix returned is the "sandwich" estimator (see Lin and Wei). This is a consistent estimate of the covariance matrix even if the model is misspecified (e.g. heteroscedasticity, underdispersion, wrong covariate form). For the special case of ols fits, \code{robcov} can compute the improved (especially for small samples) Efron estimator that adjusts for natural heterogeneity of residuals (see Long and Ervin (2000) estimator HC3). } \usage{ robcov(fit, cluster, method=c('huber','efron')) } \arguments{ \item{fit}{ a fit object from the \code{rms} series } \item{cluster}{ a variable indicating groupings. \code{cluster} may be any type of vector (factor, character, integer). NAs are not allowed. Unique values of \code{cluster} indicate possibly correlated groupings of observations. Note the data used in the fit and stored in \code{fit$x} and \code{fit$y} may have had observations containing missing values deleted. It is assumed that if any NAs were removed during the original model fitting, an \code{naresid} function exists to restore NAs so that the rows of the score matrix coincide with \code{cluster}. If \code{cluster} is omitted, it defaults to the integers 1,2,\dots,n to obtain the "sandwich" robust covariance matrix estimate. } \item{method}{ can set to \code{"efron"} for ols fits (only). Default is Huber-White estimator of the covariance matrix. }} \value{ a new fit object with the same class as the original fit, and with the element \code{orig.var} added. \code{orig.var} is the covariance matrix of the original fit. Also, the original \code{var} component is replaced with the new Huberized estimates. A component \code{clusterInfo} is added to contain elements \code{name} and \code{n} holding the name of the \code{cluster} variable and the number of clusters. } \author{ Frank Harrell\cr Department of Biostatistics\cr Vanderbilt University\cr fh@fharrell.com } \references{ Huber, PJ. Proc Fifth Berkeley Symposium Math Stat 1:221--33, 1967. White, H. Econometrica 50:1--25, 1982. Lin, DY, Wei, LJ. JASA 84:1074--8, 1989. Rogers, W. Stata Technical Bulletin STB-8, p. 15--17, 1992. Rogers, W. Stata Release 3 Manual, \code{deff}, \code{loneway}, \code{huber}, \code{hreg}, \code{hlogit} functions. Long, JS, Ervin, LH. The American Statistician 54:217--224, 2000. } \seealso{ \code{\link{bootcov}}, \code{\link{naresid}}, \code{\link{residuals.cph}}, \code{http://gforge.se/gmisc} interfaces \code{rms} to the \code{sandwich} package } \examples{ # In OLS test against more manual approach set.seed(1) n <- 15 x1 <- 1:n x2 <- sample(1:n) y <- round(x1 + x2 + 8*rnorm(n)) f <- ols(y ~ x1 + x2, x=TRUE, y=TRUE) vcov(f) vcov(robcov(f)) X <- f$x G <- diag(resid(f)^2) solve(t(X) \%*\% X) \%*\% (t(X) \%*\% G \%*\% X) \%*\% solve(t(X) \%*\% X) # Duplicate data and adjust for intra-cluster correlation to see that # the cluster sandwich estimator completely ignored the duplicates x1 <- c(x1,x1) x2 <- c(x2,x2) y <- c(y, y) g <- ols(y ~ x1 + x2, x=TRUE, y=TRUE) vcov(robcov(g, c(1:n, 1:n))) # A dataset contains a variable number of observations per subject, # and all observations are laid out in separate rows. The responses # represent whether or not a given segment of the coronary arteries # is occluded. Segments of arteries may not operate independently # in the same patient. We assume a "working independence model" to # get estimates of the coefficients, i.e., that estimates assuming # independence are reasonably efficient. The job is then to get # unbiased estimates of variances and covariances of these estimates. n.subjects <- 30 ages <- rnorm(n.subjects, 50, 15) sexes <- factor(sample(c('female','male'), n.subjects, TRUE)) logit <- (ages-50)/5 prob <- plogis(logit) # true prob not related to sex id <- sample(1:n.subjects, 300, TRUE) # subjects sampled multiple times table(table(id)) # frequencies of number of obs/subject age <- ages[id] sex <- sexes[id] # In truth, observations within subject are independent: y <- ifelse(runif(300) <= prob[id], 1, 0) f <- lrm(y ~ lsp(age,50)*sex, x=TRUE, y=TRUE) g <- robcov(f, id) diag(g$var)/diag(f$var) # add ,group=w to re-sample from within each level of w anova(g) # cluster-adjusted Wald statistics # fastbw(g) # cluster-adjusted backward elimination plot(Predict(g, age=30:70, sex='female')) # cluster-adjusted confidence bands # or use ggplot(...) # Get design effects based on inflation of the variances when compared # with bootstrap estimates which ignore clustering g2 <- robcov(f) diag(g$var)/diag(g2$var) # Get design effects based on pooled tests of factors in model anova(g2)[,1] / anova(g)[,1] # A dataset contains one observation per subject, but there may be # heteroscedasticity or other model misspecification. Obtain # the robust sandwich estimator of the covariance matrix. # f <- ols(y ~ pol(age,3), x=TRUE, y=TRUE) # f.adj <- robcov(f) } \keyword{models} \keyword{regression} \keyword{robust} \concept{cluster sampling} \concept{intra-class correlation} rms/DESCRIPTION0000644000176200001440000000320314024574032012560 0ustar liggesusersPackage: rms Version: 6.2-0 Date: 2021-03-17 Title: Regression Modeling Strategies Author: Frank E Harrell Jr Maintainer: Frank E Harrell Jr Depends: R (>= 3.5.0), Hmisc (>= 4.3-0), survival (>= 3.1-12), lattice, ggplot2 (>= 2.2), SparseM Imports: methods, quantreg, rpart, nlme (>= 3.1-123), polspline, multcomp, htmlTable (>= 1.11.0), htmltools, MASS, cluster, digest Suggests: boot, tcltk, plotly (>= 4.5.6), knitr, mice, rmsb Description: Regression modeling, testing, estimation, validation, graphics, prediction, and typesetting by storing enhanced model design attributes in the fit. 'rms' is a collection of functions that assist with and streamline modeling. It also contains functions for binary and ordinal logistic regression models, ordinal models for continuous Y with a variety of distribution families, and the Buckley-James multiple regression model for right-censored responses, and implements penalized maximum likelihood estimation for logistic and ordinary linear models. 'rms' works with almost any regression model, but it was especially written to work with binary or ordinal regression models, Cox regression, accelerated failure time models, ordinary linear models, the Buckley-James model, generalized least squares for serially or spatially correlated observations, generalized linear models, and quantile regression. License: GPL (>= 2) URL: https://hbiostat.org/R/rms/, https://github.com/harrelfe/rms LazyLoad: yes RoxygenNote: 7.1.1 NeedsCompilation: yes Packaged: 2021-03-18 02:38:20 UTC; harrelfe Repository: CRAN Date/Publication: 2021-03-18 06:50:02 UTC rms/src/0000755000176200001440000000000014024536433011646 5ustar liggesusersrms/src/ormuv.f0000644000176200001440000000547713007152474013201 0ustar liggesusersC Output from Public domain Ratfor, version 1.03 subroutine ormuv(n, p, kint, nx, x, y, pr, fpa, fpb, fppa, fppb, u *, v, ja, ia, l, lia, kk) implicit double precision (a-h,o-z) integer p, y(n), ja(l), ia(lia), z, kk(p) double precision x(n,nx), pr(n), fpa(n), fpb(n), fppa(n), fppb(n), * u(p), v(l), ld do23000 k=1,kint uk = 0d0 do23002 j=1, n z = y(j) a = 0d0 if(z - 1 .eq. k)then a = a + fpa(j) else if(z .eq. k)then a = a - fpb(j) endif endif uk = uk + a / pr(j) 23002 continue 23003 continue u(k) = uk 23000 continue 23001 continue if(nx .gt. 0)then do23010 k = (kint + 1), p uk = 0d0 do23012 j=1, n uk = uk + (fpa(j) - fpb(j)) * x(j, k-kint) / pr(j) 23012 continue 23013 continue u(k) = uk 23010 continue 23011 continue endif iv = 0 do23014 m = 1,p if(kint .gt. 1)then if(m .eq. 1)then nkk = 2 kk(1) = 1 kk(2) = 2 else if(m .gt. 1 .and. m .lt. kint)then nkk = 3 kk(1) = m - 1 kk(2) = m kk(3) = m + 1 else if(m .eq. kint)then nkk = 2 kk(1) = m-1 kk(2) = m else nkk = kint do23024 mm=1, kint kk(mm) = mm 23024 continue 23025 continue endif endif endif do23026 mm=(kint+1),p nkk = nkk + 1 kk(nkk) = mm 23026 continue 23027 continue else nkk = p endif do23028 ik = 1,nkk if(kint .eq. 1)then k = ik else k = kk(ik) endif vmk = 0d0 do23032 j = 1,n z = y(j) pa = fpa(j) pb = fpb(j) ppa = fppa(j) ppb = fppb(j) w = 1/(pr(j)*pr(j)) if(m .le. kint .and. k .le. kint)then a = - w * (pa*ld(z - 1 .eq. m) - pb*ld(z .eq. m)) * (pa*ld(z - 1 . *eq. k) - pb*ld(z .eq. k)) + (ppa*ld(z - 1 .eq. m)*ld(m .eq. k) - p *pb*ld(z .eq. m)*ld(m .eq. k))/pr(j) else if(m .gt. kint .and. k .gt. kint)then a = x(j,m-kint) * x(j,k-kint) / pr(j) * (-1/pr(j) * (pa - pb) * (p *a - pb) + ppa - ppb) else mi = max(m, k) ki = min(m, k) a = x(j, mi - kint) / pr(j) * (-1/pr(j) * (pa - pb) * (pa*ld(z - 1 * .eq. ki) - pb*ld(z .eq. ki)) + ppa*ld(z - 1 .eq. ki) - ppb*ld(z . *eq. ki)) endif endif vmk = vmk + a 23032 continue 23033 continue iv = iv + 1 v(iv) = - vmk if(kint .gt. 1)then ja(iv) = k if(ik .eq. 1)then ia(m) = iv endif endif 23028 continue 23029 continue 23014 continue 23015 continue if(kint .gt. 1)then ia(p+1) = iv + 1 endif return end double precision function ld(a) logical a if(a)then ld = 1d0 else ld = 0d0 endif return end rms/src/robcovf.f0000644000176200001440000000172213007152474013456 0ustar liggesusersC Output from Public domain Ratfor, version 1.03 subroutine robcovf(n, p, nc, start, len, u, s, v, w) implicit double precision (a-h,o-z) integer p, start(nc), len(nc) double precision u(n,p), s(p), v(p,p), w(p,p) do23000 i=1,p do23002 j=1,p w(i,j)=0d0 23002 continue 23003 continue 23000 continue 23001 continue do23004 k=1,nc do23006 i=1,p s(i)=0d0 do23008 j=1,p v(i,j)=0d0 23008 continue 23009 continue 23006 continue 23007 continue do23010 i=start(k),start(k)+len(k)-1 do23012 j=1,p s(j)=s(j)+u(i,j) 23012 continue 23013 continue 23010 continue 23011 continue do23014 i=1,p do23016 j=1,p v(i,j)=v(i,j)+s(i)*s(j) 23016 continue 23017 continue 23014 continue 23015 continue do23018 i=1,p do23020 j=1,p w(i,j)=w(i,j)+v(i,j) 23020 continue 23021 continue 23018 continue 23019 continue 23004 continue 23005 continue return end rms/src/ratfor/0000755000176200001440000000000013555351757013157 5ustar liggesusersrms/src/ratfor/ormuv.r0000644000176200001440000000465613007152474014510 0ustar liggesusers## Usage: ratfor -o ../ormuv.f ormuv.r ## Computes the score vector and compressed information matrix for ## an ordinal regression model with possibly very many intercepts ## l= nx^2 + 2nx*kint + 3kint - 2 ## If kint=1 just use a regular square matrix, l=p^2 SUBROUTINE ormuv(n, p, kint, nx, x, y, pr, fpa, fpb, fppa, fppb, u, v, ja, ia, l, lia, kk) IMPLICIT DOUBLE PRECISION (a-h,o-z) INTEGER p, y(n), ja(l), ia(lia), z, kk(p) DOUBLE PRECISION X(n,nx), pr(n), fpa(n), fpb(n), fppa(n), fppb(n), u(p), v(l), ld do k=1,kint { uk = 0d0 do j=1, n { z = y(j) a = 0d0 if(z - 1 == k) a = a + fpa(j) else if(z == k) a = a - fpb(j) uk = uk + a / pr(j) } u(k) = uk } if(nx > 0) do k = (kint + 1), p { uk = 0d0 do j=1, n { uk = uk + (fpa(j) - fpb(j)) * x(j, k-kint) / pr(j) } u(k) = uk } iv = 0 do m = 1,p { if(kint > 1) { ## Compute column numbers for nonzero elements: kk if(m == 1) { nkk = 2 kk(1) = 1 kk(2) = 2 } else if(m > 1 & m < kint) { nkk = 3 kk(1) = m - 1 kk(2) = m kk(3) = m + 1 } else if(m == kint) { nkk = 2 kk(1) = m-1 kk(2) = m } else { nkk = kint do mm=1, kint { kk(mm) = mm } } do mm=(kint+1),p { nkk = nkk + 1 kk(nkk) = mm } } else nkk = p # call intpr('nkk', 3, nkk, 1) # call intpr('kk', 2, kk, nkk) do ik = 1,nkk { if(kint == 1) k = ik else k = kk(ik) vmk = 0d0 do j = 1,n { z = y(j) pa = fpa(j) pb = fpb(j) ppa = fppa(j) ppb = fppb(j) w = 1/(pr(j)*pr(j)) if(m <= kint & k <= kint) a = - w * (pa*ld(z - 1 == m) - pb*ld(z == m)) * (pa*ld(z - 1 == k) - pb*ld(z == k)) + (ppa*ld(z - 1 == m)*ld(m == k) - ppb*ld(z == m)*ld(m == k))/pr(j) else if(m > kint & k > kint) a = x(j,m-kint) * x(j,k-kint) / pr(j) * (-1/pr(j) * (pa - pb) * (pa - pb) + ppa - ppb) else { mi = max(m, k) ki = min(m, k) a = x(j, mi - kint) / pr(j) * (-1/pr(j) * (pa - pb) * (pa*ld(z - 1 == ki) - pb*ld(z == ki)) + ppa*ld(z - 1 == ki) - ppb*ld(z == ki)) } vmk = vmk + a } iv = iv + 1 v(iv) = - vmk if(kint > 1) { ja(iv) = k if(ik == 1) ia(m) = iv } } } if(kint > 1) ia(p+1) = iv + 1 return end double precision function ld(a) logical a if(a) ld = 1d0 else ld = 0d0 return end rms/src/ratfor/robcovf.r0000644000176200001440000000121513007152474014764 0ustar liggesusers## Usage: ratfor -o ../robcovf.f robcovf.r ## Computes sum of (within cluster sum of U)(within cluster sum of U)' ## SUBROUTINE robcovf(n, p, nc, start, len, u, s, v, w) IMPLICIT DOUBLE PRECISION (a-h,o-z) INTEGER p, start(nc), len(nc) DOUBLE PRECISION u(n,p), s(p), v(p,p), w(p,p) do i=1,p { do j=1,p { w(i,j)=0d0 }} do k=1,nc { do i=1,p { s(i)=0d0 do j=1,p { v(i,j)=0d0 }} do i=start(k),start(k)+len(k)-1 { do j=1,p { s(j)=s(j)+u(i,j) } } do i=1,p { do j=1,p { v(i,j)=v(i,j)+s(i)*s(j) }} do i=1,p { do j=1,p { w(i,j)=w(i,j)+v(i,j) }} } return end rms/src/init.c0000644000176200001440000000225613101622024012745 0ustar liggesusers#include #include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .Fortran calls */ extern void F77_NAME(lrmfit)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(matinv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ormuv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(robcovf)(void *, void *, void *, void *, void *, void *, void *, void *, void *); static const R_FortranMethodDef FortranEntries[] = { {"lrmfit", (DL_FUNC) &F77_NAME(lrmfit), 22}, {"matinv", (DL_FUNC) &F77_NAME(matinv), 11}, {"ormuv", (DL_FUNC) &F77_NAME(ormuv), 18}, {"robcovf", (DL_FUNC) &F77_NAME(robcovf), 9}, {NULL, NULL, 0} }; void R_init_rms(DllInfo *dll) { R_registerRoutines(dll, NULL, NULL, FortranEntries, NULL); R_useDynamicSymbols(dll, FALSE); } rms/src/mlmats.f0000644000176200001440000003755313007152474013326 0ustar liggesusersC----------------------------------------------------------------------------- C Helper function to compute sign of a number C returns 1 when number is greater than zero C returns 0 when number is zero C returns -1 when number is less than zero C----------------------------------------------------------------------------- function isgn(i) implicit none integer isgn, i isgn = isign(1,i) if(i.eq.0) isgn = 0 return end function isgn FUNCTION isub(i,j) C----------------------------------------------------------------------------- C Computes subscript in lower triangular matrix corresponding to (i,j) C----------------------------------------------------------------------------- INTEGER i,j,isub,isgn SELECT CASE (isgn(i-j)) CASE (: 0) isub=i+j*(j-1)/2 CASE (1 : ) isub=j+i*(i-1)/2 END SELECT RETURN END SUBROUTINE sqtria(vsq,vtri,n,k) C---------------------------------------------------------------------------- C k=1 : converts n x n square symmetric matrix vsq to lower triangular C form and stores result in vtri C k=2 : converts lower triangular matrix vtri to n x n uncompressed C square matrix C F. Harrell 6Sep90 C---------------------------------------------------------------------------- DOUBLE PRECISION vsq(n,n),vtri(n*(n+1)/2) IF(k.EQ.1) THEN l=0 DO i=1,n DO j=1,i l=l+1 vtri(l)=vsq(i,j) END DO END DO ELSE DO i=1,n DO j=1,n vsq(i,j)=vtri(isub(i,j)) END DO END DO ENDIF RETURN END subroutine inner(b,x,n,z) C----------------------------------------------------------------------------- C Computes dot product of b and x, each of length n, returns result in z C----------------------------------------------------------------------------- DOUBLE PRECISION b(n),x(n),z z=0D0 DO i=1,n z=z+b(i)*x(i) end do return end SUBROUTINE SPROD(M,V,P,N) C----------------------------------------------------------------------------- C MULTIPLIES N*N SYMMETRIC MATRIX M STORED IN COMPRESSED FORMAT BY C THE N*1 VECTOR V AND RETURNS THE N*1 VECTOR PRODUCT P C----------------------------------------------------------------------------- IMPLICIT DOUBLE PRECISION (A-H,O-Z) INTEGER I, N, II, J, IR, isgn DOUBLE PRECISION PI DOUBLE PRECISION M(N*(N+1)/2),V(N),P(N) DO I=1,N PI=0D0 II=I*(I-1)/2 DO J=1,N SELECT CASE(isgn(I-J)) CASE ( : -1) IR=I+J*(J-1)/2 CASE (0 : ) IR=J+II END SELECT PI=PI+M(IR)*V(J) END DO P(I)=PI END DO RETURN END SUBROUTINE AVA(A,V,P,N) C----------------------------------------------------------------------------- C V IS AN N X N SYMMETRIC MATRIX AND A IS AN N X 1 VECTOR. C THIS ROUTINE RETURNS P=A'VA C----------------------------------------------------------------------------- IMPLICIT DOUBLE PRECISION (A-H,O-Z) DOUBLE PRECISION A(N),V(N*(N+1)/2) P=0D0 K=0 DO 10 I=1,N AI=A(I) DO 20 J=1,I K=K+1 IF (I.EQ.J) THEN P=P+AI*AI*V(K) ELSE P=P+2D0*AI*A(J)*V(K) ENDIF 20 CONTINUE 10 CONTINUE RETURN END SUBROUTINE avia(a,v,p,n,idx,nidx,nrank,eps,vsub,wv1,wv2,wv3, & wv4,pivot) C---------------------------------------------------------------------------- C V is an n x n symmetric matrix and a is an n x 1 vector. C Returns P=a' v**-1 a and nrank=rank(v), where C a=a(idx(i),i=1,...,nidx), v=(v(idx(i),idx(i),i=1,...,nidx). C vsub is nidx x nidx scratch matrix and wv1-wv4 are scratch C vectors of length nidx (except for wv3 which is 2*nidx). C pivot is scratch integer vector C of length nidx. eps is singularity criterion, e.g. 1d-7. C Uses Fortran routines dqr (see S function qr) and dqrsl1 C (see S function solve). In R these are dqrdc2 and dqrsl (args C differ too). C C F. Harrell 20 Nov 90 C C---------------------------------------------------------------------------- DOUBLE PRECISION a(n),wv1(nidx),wv2(nidx),wv3(2*nidx), & wv4(nidx), v(n,n), eps, vsub(nidx,nidx), p INTEGER idx(nidx),pivot(nidx),dim(2) k=nidx C CALL intpr("k",1,k,1) dim(1)=k dim(2)=k DO i=1,k wv4(i)=a(idx(i)) pivot(i)=i DO j=1,k vsub(i,j)=v(idx(i),idx(j)) ENDDO ENDDO C CALL dblepr('wv4',3,wv4,k) C CALL dblepr('vsub',4,vsub,k*k) nrank=k C CALL dqr(vsub,dim,pivot,wv2,eps,wv3,nrank) CALL dqrdc2(vsub,dim,dim,dim,eps,nrank,wv2,pivot,wv3) C CALL intpr('nrank',5,nrank,1) IF(nrank.LT.k)RETURN DO i=1,k wv3(i)=wv4(i) ENDDO j=1 i=100 C CALL dqrsl1(vsub,dim,wv2,nrank,wv4,1,wv3,wv1,i,j) CALL dqrsl(vsub,dim,dim,nrank,wv2,wv4,wv3,wv1,wv1, & wv3,wv3,i,j) p=0d0 DO i=1,k p=p+wv4(i)*wv1(i) ENDDO C CALL intpr('dim',3,dim,2) C CALL dblepr('vsub',4,vsub,k*k) C CALL dblepr('wv1',3,wv1,k) C CALL dblepr('wv4',3,wv4,k) C CALL dblepr('p',1,p,1) RETURN END SUBROUTINE AVIA2(A,V,P,N,idx,nidx,nrank,eps,vsub,s,swept) C---------------------------------------------------------------------------- C V IS AN N X N SYMMETRIC square MATRIX AND A IS AN C N X 1 VECTOR. C THIS ROUTINE RETURNS P=a' vinverse a and nrank=rank(v) where C a=A(idx(i),i=1,...,nidx), v=V(idx(i),idx(i),i=1,...,nidx). C S(nidx) is DOUBLE PRECISION scratch vector, SWEPT(nidx) is LOGICAL scratch C vector, VSUB(nidx*(nidx+1)/2) is DOUBLE PRECISION scratch vector C eps is singularity criterion, e.g. 1D-6 C C F. Harrell 6 Sep90 C---------------------------------------------------------------------------- IMPLICIT DOUBLE PRECISION (A-H,O-Z) DOUBLE PRECISION A(n),V(n,n),s(nidx),vsub(nidx*(nidx+1)/2) INTEGER idx(nidx) LOGICAL swept(nidx) l=0 DO i=1,nidx swept(i)=.FALSE. idxi=idx(i) C Initialize s vector to diagonal elements s(i)=v(idxi,idxi) DO j=1,i l=l+1 vsub(l)=v(idxi,idx(j)) END DO END DO nrank=0 DO i=1,nidx CALL GSWEEP(s,vsub,i,lsing,nidx,eps,swept,ifault) IF(lsing.EQ.0)nrank=nrank+1 ENDDO P=0D0 K=0 DO 10 I=1,Nidx C Singularities are like parameter never appeared IF(swept(i)) THEN AI=A(idx(i)) ELSE AI=0D0 ENDIF DO 20 J=1,I K=K+1 IF (I.EQ.J) THEN P=P+AI*AI*Vsub(K) ELSE P=P+2D0*AI*A(idx(J))*Vsub(K) ENDIF 20 CONTINUE 10 CONTINUE C gsweep returns negative of inverse P=-P RETURN END SUBROUTINE ainvb(a, b, aib, k, tol, irank, pivot, & wv1, wv2, wv3) C----------------------------------------------------------------------- C Uses same Fortran subroutines as S function solve to accurately C compute aib=a inverse * b, for k x k symmetric matrix a stored in C lower triangular form and k x 1 vector b. wv1(k,k), wv2(k), wv3(2*k) C are DOUBLE PRECISION scratch arrays and pivot(k) is INTEGER scratch vector. C tol is tolerance, e.g. 1d-7. C IF irank (output) < k, result is not computed. Index of singular C column will be stored in pivot(k) if irank for examining the proportional odds assumption * Mean.lrm, Mean.orm, Quantile.orm: improved by Shengxin Tu to compute confidence limits using the delta method * prModFit: added subtitle * lrm.fit.bare: new minimalistic lrm fitting function * ExProb: new version from Shengxin Tu that implements confidence intervals using the delta method * Quantile: improved further by Shengxin Tu to provide two methods for computing quantiles (matters most when there are many ties) Changes in version 6.0-1 (2020-07-15) * cph: workaround for R bug where name of weights variable was set to ..1 when dtrans was used with fit.mult.impute and dtrans contained weights (thanks: Trevor Thompson) * cph: changed as.name('model.frame') to quote(stats::model.frame) * blrm: subset was not working; reworked data setup for the two formulas * modelData: new service function that allows us to do away with model.frame and let Design() directly process the data frame; tiny change to Design() * as.data.frame.rms: added so that modified Design will work * lrm, Rq, psm, Glm, bj, orm, ols: changed to use modelData instead of model.frame * predictrms: prevented structure(NULL, ...) * ggplot.Predict: fixed bug relating to histSpikeg and vnames * survplotp: removed blank levels and : in legend * contrast: added y argument to allow y-specific odds ratios to be computed for a constrained partial prorportional odds model * blrm: moved to new rmsb package * Dropped Mean and Quantile since it is already in Hmisc * Fixed bug in modelData when formula2 was not the same as formula * Predict: added calling environment argument to eval() to allow calling Predict from within a function Changes in version 6.0-0 (2020-06-04) * orm.fit: changed to result in a fail=TRUE fit if NA, NaN element of variance matrix (Thanks: Thomas Dupont) * prModFit: changed default in catl function to center=FALSE * residuals.psm: added transformation of survival time when computing censored normalized residuals * added SUGGESTS for rstan * added utility functions for rstan: stanDx, stanGet, coef.rmsb, vcov.rmsb, print.rmsb, stanCompile (fetches Stan code from github.com/harrelfe/stan and compiles it to a central place on your machine) * new function blrm for Bayesian binary and ordinal logistic regression along the lines of rms::lrm * new function blrmStats to compute posterior distributions and credible intervals for several predictive accuracy measures for blrm including Dxy (which is easily translated to c-index (= AUROC in binary Y case), Brier score, and measures of explained variation; print method print.blrmStats also added * modified predictrms and contrast.rms for Bayesian calculations, adding new argument posterior.summary for predictrms and Predict * enhanced anova.rms for approximate relative explained variation measures for Bayesian models * extended Function.rms to use posterior mean/median coefficients * new function PostF to generate an R function that computes posterior probabilities of assertions * added stanDxplot and plot methods for Bayesian fits * extended nomogram to use posterior mean or median parameter values * extended Design() to deal with time() variables (for blrm) * extended blrm to use Ben Goodrich's AR(1) model * HPDint: new function, adopted from the coda package * distSym: new function: symmetry measure for a distribution * stackMI: new function: Bayesian fitting with multiple imputation by posterior stacking * import MASS::kde2d and bandwidth.nrd and cluster::ellipsoidhull for 2-d posterior density estimation * pdensityContour: new function * plot.contrast.rms: new function for Bayesian posterior densities and 2-d densities * made dependent on survival >= 3.1-12 * fixed bug related to changes in survival which no longer stores $y matrix as transformed; affects val.surv.s, calibrate.psm.s, validate.psm.s, psm.s. Thanks: Aida Eslami * which.influence: fixed bug when model has more than one intercept (thanks: Yuwei Zhu) * blrm: added partial proportional odds model * stanQr: new function * predict.blrm: new function * Mean.blrm, Quantile.blrm, ExProb.blrm: new functions * residuals.lrm: implemented score residuals for all link functions using code from Yuqi Tian * contrast.rms: added fun= for Bayesian models for getting posterior distributions of differences of nonlinearly transformed estimates; enhanceds plot.contrast.rms and print.contrast.rms to handle this * fitIF, fitLoad, fitSave: removed (rejected by CRAN since fitIf assigned to global environment) Changes in version 5.1-4 (2019-11-16) * anova.rms: added which LaTeX packages are needed in help page * survplot.npsurv: added use of fun for conf type bars. Thanks: Joonas Lehto * made rms dependent on Hisc 4.2-0 or later * help files using set.seed: added suppressWarnings because of change in R base random number generators. Thanks: Brian Ripley * calibrate.cph: subset cuts to unique values. Thanks: Eduwin Pakpahan * psm: stored original Y if y=TRUE * residuals.psm: implemented type='score' (used by robcov) * depend on survival >= 3.1-6 and use survival's new coding of reference (censoring) event for competing risk models Changes in version 5.1-3 (2019-01-27) * lrm: when lrm.fit fails, don't stop altogether but print warning and return fit with fail=TRUE * summary.orm: stopped negating linear predictor before computing hazard ratio for orm fit with log-log family (confidence limits never attempted to do this). See https://github.com/harrelfe/rms/issues/56 * Design: stop with error message any any design matrix column name is duplicated as with a predictor X1 with factor level 2 (forming name X12 in model.matrix) and a numeric predictor named X12. See https://github.com/harrelfe/rms/issues/55 * contrast.rms: for conf.type='simultaneous' shortened fit$coefficients to only have one intercept so will work with multcomp::glht. Thanks: Matthew Shun-Shin * orm: changed label for Unique Y to Distinct Y * survplotp: fixed bug in legends * predab.resample: handled case where fitter doesn't include fail=FALSE * orm.fit: fixed problem with omission of name of fam argument with offset is present, and bug where initial had the wrong length. Thanks: Tamas Ferenci * inst/tests/cph4.r: commented out line that caused failure * plotp: moved generic to Hmisc * Design: gave better error message re:ordered factors * predictrms: remembered to run scored() when producing design matrix * Design: allowed for = in value labels in mmnames. See github.com/harrelfe/rms/issues/29#issuecomment-417901353 * plot.calibrate.default: added cex.subtitles, riskdist options and changed default for scat1d.opts * inst/tests/calibrate.r: new test Changes in version 5.1-2 (2018-01-06) * plot.pentrace: corrected lty for AIC if ! add, interchanged lty for AIC and BIC. Thanks: Ferenci Tamas * removeFormulaTerms: added width.cutoff=500 to deparse call to avoid problems with long variable names. Thanks: Ferenci Tamas * removeFormulaTerms: changed above fix to use paste collapse instead (Thanks: Tamas) * Design: mmnames function: escaped <= like >= already escaped. See https://github.com/harrelfe/rms/issues/29#issuecomment-303423887 * groupkm: speed improvement (helps calibrate*; thanks Cole Beck) * fastbw: improved messages for rule and type * prModFit: changed to have htmlSpecial, htmlTranslate do all the html special character coding, allowing default to unicode * lrm, lrm.fit: for rank correlation measures, made more accurate by binning probabilities into bins of width 0.0002 instead of 0.002 * ggplot.Predict: fixed case of empty labels when defining pmlabel * vcov.orm: fixed bug when intercepts longer than 1 * lrm.fit, orm.fit: diag(1 / sd) not returning matrix if sd is a matrix and not a vector * ols: respected data in terms call, for case where formula is . * robcov: removed warning in help file about printed standard errors (Thanks: Ferenci Tamás) * anova, rmsMisc, survplotp.npsurv: changed nbsp to htmlSpecial version * prModStat, anova, latex.cph, latex.rms, summary, validate.ols: added escape.html=FALSE to htmlTable * print.summary.rms: for html returned HTML object instead of invisible * validate.ols, calibrate.default: fixed fitter so that NA coefficient results in fail=TRUE * Gls: fixed major bug where correlation pattern was being ignored if not corAR1,corCAR1,corCompSymm. Thanks: Tamas Ferenci * latex.cph.s: changed to use latex instead of latex.default * ggplot.Predict: added conflinetype parameter (thanks: jphdotam; https://github.com/harrelfe/rms/pull/54) Changes in version 5.1-1 (2017-05-01) * latex.anova.rms: if table.env is FALSE but caption is specified, passes caption as insert.top argument to latex.default; when options(prType='latex') you can do print(anova(), caption=) to use this * prModFit: fixed one latex to lang=='latex' bug * print.anova.rms, print.summary.rms: added table.env argument * psm: fixed bug in storage of g and gr that made output for them invalid * calibrate.cph,calibrate.psm,Function.rms,plot.Predict,predab.resample,Predict,rms.trans,summary.rms,which.influence:fixed preservation of options() * lrm.fit,orm.fit:fixed bug in scale=TRUE when design matrix has 1 column * Design:quit running structure(ia, dimnames=NULL) if ia empty * Fortran calls: registered all Fortran functions, removed package name and quotes in first argument in .Fortran(), added F_ to entry point name in those calls, changed useDynLib call in NAMESPACE, added \alias{} for all F_ entries Changes in version 5.1-0 (2017-01-01) * anova.rms, survplotp.npsurv: use Hmisc htmlGreek to encode Greek letters * plotp.Predict: new function for direct plotly graphics for Predict * rbind.Predict: carry adjust object Changes in version 5.0-1 (2016-12-04) * latex.anova.rms: fixed problem with conversion of * to $\times$ * prModStats: fixed problem with print.ols residuals * print.summary.rms: made it respect options(prType), and default latex to no table environment * plot.summary.rms, survplot.npsurv: height and width moved from plotly::layout to plot_ly due to changes in plotly 4.5.6 * plot.summary.rms, survplotp.npsurv: added data= to plot::add_* because plotly was evaluating using the wrong copy of x,y, etc. * ggplot.Predict: used new labs(caption=) feature for ggplot2 instead of title * prModFit: fixed translation of * to \times, fixed \textgreater and \textless for row and column names for latex, fixed all model print methods to use options(prType=...) * latexrms: remove md argument, use options(prType='html') * latex methods for model fits: change default file to console * latex.anova.rms: remove html argument, use options(prType) * print.anova.rms: use options(prType) to automatically use LateX or html printing to console * cph: fixed bug when method="exact" related to type vs ytype * latex.cph: fixed bug where using strata used data not levels Changes in version 5.0-0 (2016-10-31) * plot.summary.rms: implemented plotly interactive plots if options(grType='plotly') in effect * plot.anova.rms: implemented plotly interactive plots if options(grType='plotly') in effect; remove psmall argument; changed margin default to chisq and P * ggplot.Predict: implemented plotly interactive plots if options(grType='plotly') in effect * print(fit, md=TRUE), prModFit, prStats: added latex/html methods using htmlTable package and MathJax for latex math * html.anova.rms, html.validate, html.summary.rms: new functions for use with html and MathJax/knitr/RStudio * latex methods for model fits: added md=TRUE argument to produce MathJax-compatible latex and html code for fitted models when using R Markdown * html: new methods for model fit objects for use with R Markdown * formatNP: fixed error when digits=NA * latex.anova.rms: fixed error in not rounding enough columns doe to using all.is.numeric intead of is.numeric * catg: corrected bug that disallowed explicit catg() in formulas * ggplot.Predict: added height and width for plotly * survplot: respected xlim with diffbands. Thanks: Toni G * reVector: changed to reListclean and stored model stats components as lists so can handle mixture of numeric and character, e.g., name of clustering variable * survplotp.npsurv: new function for interactive survival curve graphs using plotly * anova, summary, latex, print for model fits: use options(grType='html') or 'latex' to set output type, output htmltools::HTML marked html so that chunk header doesn't need results='asis' * latex methods - set file default to '' * GiniMd: moved to Hmisc package * plot.nomogram: fixed bug where abbreviations were being ignored. Thanks: Zongheng Zhang * nomogram: improved examples in help file * survplot.npsurv: fixed n.risk when competing risks * survest.cph, survfit.cph, others: fixed large problems due to incompatibility with survival 2.39-5 for survival predictions; changed object Strata to strata in cph to be compatible with survival package * new test survest.r to more comprehensively check survfit.cph and survest.cph * ggplot.Predict: quit ignoring xlim; suppress confidence bands if two group bariables because ggplot2 geom_ribbon doesn't support multiple aesthetics * predictrms: set contrasts for ordered factors to contr.treatment instead of contr.poly * val.prob: changed line of identity to use wide grayscale line instead of dashed line Changes in version 4.5-1 (2016-06-01) * ggplot.Predict: fixed bug related to .predictor. vs. .Predictor. which resulted in double plotting of some anova annotations * tests/ggplot3: new tests related to above * tests/survplot2.r: new test * ggplot.Predict: fixed bug when legend.label=FALSE and didn't enclose empty string in quotes for later parsing * validate.rpart: extended to handle survival models (exponential distribution) * print.validate.rpart: finally added method in NAMESPACE * val.prob: make riskdist="predicted" the default; added E90 and Eavg to plotted output; for E measures changed to loess calibration instead of linear logistic calibration * fastbw: corrected error in help file regarding aics argument * NAMESPACE: added graphics::grconvertX grconvertY * latex.naprint.delete: fixed width algorithm for dot chart * survplot.npsurv, survplotp.npsurv: changed $prev to $pstate for competing risk estimation due to change in survival package 2.40-0 * rbind.Predict: removed varying .x.; can't figure out what it was doing and cause a bug Changes in version 4.5-0 (2016-04-02) * val.prob: fixed longstanding error: U was always -2/n because used wrong deviance. Thanks: Kai Chen * survplot*: implemented y.n.risk='auto' to place numbers at risk below the x-axis by 1/3 the range of ylim * survplot*: added mylim argument to force inclusion of ranges in computed limits * bootplot: added what='box' for box plots to check for wild bootstrap coefficients * ggplot.Predict: old workaround for expression labels for facet_grid quit work; changed to new labeller capabilities just added to ggplot2 * survplot.*, survdiffplot: added cex.xlab, cex.ylab arguments * Design: fixed bug where one-column matrices as predictors did not compute column names correctly * predictrms: fixed bug in trying to retrieve offsets; now offsets set to zero (so are ignored) * tests: several updated, all run, several bugs fixed as result Changes in version 4.4-2 (2016-02-20) * tests: added test mice.r exposing problem with complete function in mice * ols: passed tol argument to lm.fit, lm.wfit * contrast: fixed major bug for orm fits - column was shifted because added 1 col to X instead of num intercepts. Changed to choose only one intercept in coef * ols: workaround - fitting functions are adding a class of integer for labelled Y * Gls: updated to be consistent with changes in nlme, DEPENDS on latest nlme * ggplot.Predict: changed show_guide to show.legend (thanks: John Woodill) * tests/ggplot2b.r: new test * tests/orm4.r: new test for small sample 2-way ANOVA * tests/orm-profile.r: new test to time profile orm * cph: fixed bug in se.fit when surv='summary' * cph: added arguments linear.predictors, residuals, nonames Changes in version 4.4-1 (2015-12-21) * contrast, residuals.lrm, survreg.distributions, val.prob, validate.ols: changed 1 - pnorm(z) to pnorm(-z) and 1 - pt(z) to pt(-z) to increase precision; thanks: Alexander Ploner * tests/anova-ols-mult-impute.r: helps to understand sigma and sums of squares when ols is used with fit.mult.impute * survplot.npsurv: added support for competing risk cumulative incidence plots - see the new state argument * ols: fixed bug in which df.residual was n-1 when the fit was penalized. Thanks: Mark Seeto * %ia%: returned attribute iaspecial which TRUE when the interactions involved a categorical variable with exactly 2 levels and a variable that was not passed through lsp, rcs, pol * Design: if an %ia% object has iaspecial TRUE modifies how mmcolnames is created for that model term to account for an inconsistency in R whereby a categorical variable involved in %ia% when there are only two levels does not generate the usual variable=non-reference value in the column names. * bj, cph, Glm, lrm, ols, orm: changed to subset model.matrix result on mmcolnames to rigorously require expected design matrix column names to be what model.matrix actually constructed * npsurv: add numevents and exposure objects to fit object so will have number of events by cause in case of competing risks (summary.survfit does not compute this) as well as with ordinary right-censored single events * ggplot.Predict: inserted mapping and environment arguments to comply with ggplot generic * legend.nomabbrev: fixed bug info$Abbrev (note A; thanks: Alvin Jeffery) * Design: fixed bug, was not handling logical predictors correctly in mmcolnames. Thanks: Max Gordon Changes in version 4.4-0 (2015-09-28) * contrast.rms: made SE a vector not a matrix, added 4 list logic for nvary, added new test from JoAnn Alvarez * plot.summary.rms: correct bug where pch was ignored. Thanks: Tamas Ferenci * prModFit: fix print(fit, latex=FALSE) when fit is result of robcov, bootcov * NAMESPACE: added imports for base functions used to avoid warnings with R CMD CHECK; new test rcs.r * prModFit: added rmarkdown argument. All print.* methods can pass this argument. * All print methods for fit objects: left result as prModFit instead of invisible() so that rmarkdown will work * demo/all.R: updated for plot and ggplot methods, npsurv * cph, predictrms, rms, rms.trans, rmsMisc: changed Design function to return new objects sformula (formula without cluster()) and mmcolnames which provides a new way to get rid of strat() main effects and interactions involving non-reference cells; handle offsets in cph and predict() (not yet in Predict); new internal function removeFormulaTerms that does character manipulation to remove terms like cluster() or offset() or the dependent variable(s). This gets around the problem with [.terms messing up offset terms when you subset on non-offset terms * Glm, ols: fixed offset * bj, Gls, lrm, orm, psm, Rq: change to new offset method and sformula * Predict: added offset=list(offsetvariable=value) * several: made temporary function names unique to avoid warnings with R CMD CHECK * ggplot.Predict: changed facet_wrap_labeller to not mess with class of returned object from ggplotGrob * Design: fixed column names for matrix predictors * Design, cph: handled special case where model is fit on a fit$x matrix * dxy.cens: exported * cph: added debug argument * tests/cph4.r: new tests for various predictor types * rms: changed warning to error if an ordered factor appears in the model and options(contrasts) is not set properly * rms transformation functions: made more robust by checking ! length instead of is.null Changes in version 4.3-1 (2015-04-20) * NAMESPACE: removed reference to gridExtra, in DESCRIPTION moved gridExtra from Depends to Suggests * ggplot.Predict: re-worked generation of ggplot function call construction to use character strings with evaluation at the very end; added colfill argument * bplot: fixed so will find panel function with lattice:: * orm.fit: trapped negative cell prob due to updated intercepts out of order * ggplot.Predict: fixed bug in expch when x=NULL * lrm.fit: fixed but with wrong dimension array given to Fortran if offset used * predict.Rq: fixed bug causing intercept to be ignored * survplot.npsurv: override conf='diffbands' to 'bands' when one stratum; added aehaz and times arguments * ggplot.Predict: call new function in Hmisc: arrGrob, remove gridExtra from depends * lrm.fit: changed any(duplicated()) to anyDuplicated() Changes in version 4.3-0 (2015-02-15) * contrast.rms: added 3rd and 4th list of predictor settings to allow for double-difference (interaction) contrasts * predictrms: don't call vcov if no covariates present (e.g. cph strata only) * print.summary.rms, latex.summary.rms: print more significant digits for effect-related columns * robcov, bootcov: added new object clusterInfo in the fit object * all fitting functions: print method prints clusterInfo * residuals.lrm: negated Li-Shepherd residuals to make them correct * residuals.orm: put in namespace and added examples from Qi Liu * plot.calibrate: added par.corrected so user can specify graphics parameters for plotting overfitting-corrected estimates * robcov: dropped unused levels from cluster so that clusterInfo is correct * plot.Predict: added example in help file for how to set lattice graphics parameters * datadist: quit rounding quantiles to integers if raw data were integer * predictrms, Predict: fixed bug with ref.zero=TRUE and with handling Cox models; added new test code in tests/cph3.r * cph, dxy.cens: fixed bug - Dxy was negative of what it should have been for cph. Thanks: Irantzu Barrio * ggplot.Predict: new function for ggplot2 graphics for Predict objects * contrast.rms: added ... for print method (e.g., to allow digits=4) * survplot: raredd fixed bug with 1 observation per group - see http://stackoverflow.com/questions/24459078/error-message-when-ploting-subjects-at-risk-with-survplot * latex.rms: changed notation for indicator variables from {} to [] a la Knuth * latex.anova.rms: stopped putting d.f. and Partial SS in math mode * npsurv: neatened help file * residuals.orm: fixed bug for score residuals. This fixed robcov. * orm-residuals.r: new test code * vcov.orm: handled case where fit was run through robcov * print: for LaTeX fixed prStats to translate clustering variable to LaTeX * vcov.orm: handled case where fit was run through bootcov * bootcov: for orm stored intercepts attribute in var * tests: new test for orm bootcov * contrast, vcov.orm: made to work if fit run through robcov, bootcov * print.anova.rms: fixed bug with subscripts,names,dots Changes in version 4.2-1 (2014-09-18) * plot.summary.rms: allowed a vector for lwd, and passed lwd to confbar. Thanks: Michael Friendly * gendata: Starting in R 3.1.0, as.data.frame.labelled or as.data.frame.list quit working when length vary; workaround * predictrms, ols: handle offset in formula. Thanks: Max Gordon * pentrace: neatened code, added new argument noaddzero if user wants to prevent unpenalized model from being tried; add new test script in tests * bplot: fixed bug whereby xlabrot was ignored. Thanks: Sven Krackow ; new test for bplot in tests directory * plot.Predict: fixed bug in which 2nd argument to perim was not correct * validate.ols: Shane McIntosh fixed the passing of the tolerance argument to predab.resample * predictrms: computed offset earlier so always defined no matter the value of type * plot.Predict: added scaletrans argument, fixed use of subscripts in pan * lrm, lrm.fit: added scale argument * orm, orm.fit: added scale argument * vcov.orm: accounted for scale when extracting covariance matrix * npsurv: was not passing type argument * npsurv: start storing all classes created by survfit.formula * logLik.Gls: added. Makes AIC(Gls object) work. * NAMESPACE: several changes Changes in version 4.2-0 (2014-04-13) * Deprecated survfit.formula so would not overlap with function in survival * Added function npsurv, survplot.npsurv * REMOVED survfit.formula * Used new type argument to label.Surv for fitting functions * cph: added weights argument to residuals.coxph (Thanks: Thomas Lumley) * survfit.cph: fixed bug in using wrong type variable. Thanks: Zhiyuan Sun * cph: added weighted=TRUE in call to residuals.coxph (Thanks: T Lumley) * orm.fit: improved ormfit to not try to deal with NaN in V, assuming that step-halving will happen Changes in version 4.1-3 (2014-03-02) * num.intercepts: removed (is in Hmisc) * survfit.formula, cph, psm: changed to use inputAttributes attribute of Surv objects (introduced earlier in survival package so that rms could drop its customized Surv function) * Exported survfit.formula * Changed survival fitting functions and residuals to use units.Surv Changes in version 4.1-2 (2014-02-28) * psm: Fixed bug to allow computation of Dxy with left censoring * val.prob: Fixed recently introduced bug that made calibration intercept and slope always 0,1. Thanks: Lars.Engerstrom@lio.se * plot.Predict: added between to leave space between panels * orm.fit: fixed error in kmid calculation when heavy ties at first level of y. Thanks: Yuwei Zhu * setPb: changed default to now use tktcl to show progress bars for simulations * predictrms: fixed bug with type='terms' * val.surv: handle case where survival estimates=0 or 1 when using log-log transform Changes in version 4.1-1 (2014-01-22) * Removed use of under.unix in anova.rms, latex.summary, plot.nomogram * Removed use of oldUnclass, oldClass, is.category * Fixed class of Rq object; had failed with bootcov. Thanks: Max Gordon * survplot: preserved par() * Srv: removed, changed all uses to Surv() for new survival package that preserves attributes for Surv inputs * survplot.survfit, survdiffplot: added conf='diffbands' * predictrms: fixed num. intercepts calculation order * survplot, survdiffplot: used original standard error for survdiffplot, and fun * dyx.cens: allow left-censoring Changes in version 4.1-0 (2013-12-05) * Fixed orm.fit to not create penalty matrix if not needed (penalties are not yet implemented anyway) * Added yscale argument to plot.Predict * Added Wald test simulation to orm help file * Added example in help file for plot.anova.rms of adding a line combining the effects of two predictors in dot chart * Fixed grid interpretation error in survplot.survfit * Changed plot.anova.rms to use dotchart3 instead of dotchart2 * Fixed bug in summary.rms - was taking reciprocal of effect ratio with orm even if not loglog family (thanks: Yong Hao Pua * Removed link to print.lm, summary.lm in ols.Rd * Added ntrans argument to plot.anova.rms * Fixed handling of intercepts in Rq, validate.Rq * Removed residuals.Glm, residuals.rms (also from Rd, NAMESPACE) * Removed other .rms methods and other remnants from fooling S+ dispatcher * Fixed bug in lm.pfit when penalty used (thanks: Yong Hao Pua ) * Fixed bug in calibrate.default for ols (thanks: Andy Bush) * Change print.contrast.rms to insert NA for SE if fun is not the identity function * Added margin argument to plot.anova.rms to print selected stats in right margin of dot chart * Added anova argument to plot.Predict to allow overall association test statistics to be added to panels * Fixed bug in val.prob in which the logistic model was re-fitted instead of fixing coefficients at 0,1. This resulted in model statistics (including c-index) to always be favorable even when predictions were worse than random. Thanks: Kirsen Van Hoorde * Fixed bug in survdiffplot where conf.int was always overridden by value from survfit. Thanks: Kamil Fijorek * Fixed bug in grid= for survplot.* and survdiffplot. Thanks: Kamil Fijorek * Fixed rms.s to account for possible offset in names(nmiss). Thanks: Larry Hunsicker * Fixed psm.s to not compute Dxy if simple right censoring is not in effect. Thanks: I.M. Nolte * rcs: respect system option fractied, passed to rcspline.eval; can be used to get old behavior * Gls: as nlme 3.1-113 exports more functions, removed nlme::: Changes in version 4.0-0 (2013-07-10) * Cleaned up label logic in Surv, made it work with interval2 (thanks:Chris Andrews) * Fixed bug in val.prob - wrong denominator for Brier score if obs removed for logistic calibration * Fixed inconsistency in predictrms where predict() for Cox models used a design matrix that was centered on medians and modes rather than means (thanks: David van Klaveren ) * Added mean absolute prediction error to Rq output * Made pr argument passed to predab.resample more encompassing * Fixed logLik method for ols * Made contrast.rms and summary.rms automatically compute bootstrap nonparametric confidence limits if fit was run through bootcov * Fixed bug in Predict where conf.type='simultaneous' was being ignored if bootstrap coefficients were present * For plot.Predict made default gray scale shaded confidence bands darker * For bootcov exposed eps argument to fitters and default to lower value * Fixed bug in plot.pentrace regarding effective.df plotting * Added setPb function for pop-up progress bars for simulations; turn off using options(showprogress=FALSE) or options(showprogress='console') * Added progress bars for predab.resample (for validate, calibrate) and bootcov * Added bootBCa function * Added seed to bootcov object * Added boot.type='bca' to Predict, contrast.rms, summary.rms * Improved summary.rms to use t critical values if df.residual defined * Added simultaneous contrasts to summary.rms * Fixed calculation of Brier score, g, gp in lrm.fit by handling special case of computing linear predictor when there are no predictors in the model * Fixed bug in prModFit preventing successful latex'ing of penalized lrms * Removed \synopsis from two Rd files * Added prmodsel argument to predab.resample * Correct Rd files to change Design to rms * Restricted NAMESPACE to functions expected to be called by users * Improved Fortran code to use better dimensions for array declarations * Added the basic bootstrap for confidence limits for bootBCa, contrast, Predict, summary * Fixed bug in latex.pphsm, neatened pphsm code * Neatened code in rms.s * Improved code for bootstrapping ranks of variables in anova.rms help file * Fixed bug in Function.rms - undefined Nam[[i]] if strat. Thanks: douglaswilkins@yahoo.com * Made quantreg be loaded at end of search list in Rq so it doesn't override latex generic in Hmisc * Improved plot.summary.rms to use blue of varying transparency instead of polygons to show confidence intervals, and to use only three confidence levels by default: 0.9 0.95 0.99 * Changed Surv to Srv; use of Surv in fitting functions will result in lack of time labels and assumption of Day as time unit; no longer override Surv in survival * Changed calculation of Dxy (and c-index) to use survival package survConcordance service function when analyzing (censored) survival time; very fast * Changed default dxy to TRUE in validate.cph, validate.psm * Dxy is now negated if correlating Cox model log relative hazard with survival time * Removed dxy argument from validate.bj as it always computed * Added Dxy to standard output of cph, psm * Added help file for Srv * Removed reference to ps.slide from survplot help page * Added the general ordinal regression fitting function orm (and orm.fit) which efficiently handles thousands of intercepts because of sparse matrix representation of the information matrix; implements 5 distribution families * Added associated functions print.orm, vcov.orm, predict.orm, Mean.orm, Quantile.orm, latex.orm, validate.orm * Changed predab.resample to allow number of intercepts from resample to resample * Fixed bug in Mean.cph (thanks: Komal Kapoor ) * Removed incl.non.slopes and non.slopes arguments from all predict methods * Changed all functions to expect predict(..., type='x') to not return intercept columns, and all fitting functions to not store column of ones if x=TRUE * Changed nomogram argument intercept to kint, used default as fit$interceptRef * Made bootcov behave in a special way for orm, to use linear interpolation to select a single intercept targeted at median Y * Revamped all of rms to never store intercepts in design matrices in fit objects and to add intercepts on demand inside predictrms * Added new function generator ExProb to compute exceedance probabilities from orm fits Changes in version 3.6-3 (2013-01-11) * Added Li-Shepherd residuals in residuals.lrm.s, become new default (same as ordinary residuals for binary models) * Remove glm null fit usage as this is no longer in R Changes in version 3.6-2 (2012-12-09) * bootcov, predab.resample: captured errors in all fits (to ignore bootstrap rep) using tryCatch. Thanks: Max Gordoin * predab.resample: made as.matrix(y) conditional to handle change in the survival package whereby the "type" attribute did not exist for a matrix * anova.rms: added new parameter vnames to allow use of variable labels instead of names in anova table; added vinfo attribute * residuals.lrm: removed intercept from partial residuals for binary models * moved comprehensive examples in rmsOverview to ~/rms/demo/all.R; greatly speeds up package checking but demo needs to be run separately for better checking, using demo(all, 'rms') * Fixed survfit.formula to not use .Global environment Changes in version 3.6-1 (2012-11-05) * bootcov: set loglik to default to FALSE and added code to fill in missing intercepts in coef vector for prop. odds model when levels of Y not resampled; see coef.reps to default to TRUE * Predict: implemented fun='mean' to get proper penalty for estimating the mean function for proportional odds models * Added usebootcov argument to Predict to allow the user to force the use of bootstrap covariance matrix even when coef.reps=TRUE was in effect for bootcov Changes in version 3.6-0 (2012-10-26) * Gls: Updated optimization calls - had become inconsistent with gls and failed if > 1 correlation parameter (thanks: Mark Seeto ); removed opmeth argument * print.fastbw: added argument: estimates * survplot.survfit: handled fact that survival:::summary.survfit may not preserve order of strata levels. Also fixed survit.cph and cph; Thanks: William.Fulp@moffitt.org * plot.Predict: added example showing how to rename variables in plot * print(fit object, latex=TRUE): added latex.naprint.delete, used new Hmisc latexDotchart function to make a dot chart of number of NAs due to each model variable if at least 4 variables have NAs * added trans argument to plot.anova.rms to allow transformed scales * Corrected cph to use model.offset(); thanks: Simon Thornley * Changed latex.anova.rms to use REGRESSION instead of TOTAL label * Changed gendata, contrast.rms to allow expand=FALSE to prevent expand.grid from being called to generate all combinations of predictors * Added type= to plot.Predict to allow user to specify a different default line/point type (especially useful when x is categorical) * Corrected bug in offset in psm - made default offset the length of X * Corrected bug in calibrate.psm (fixed -> parms) * predab.resample, calibrate.cph, calibrate.default, calibrate.psm: stopped putting results from overall initial fit into .Global and instead had predab.resample put them in attribute keepinfo, obtained from measure() Changes in version 3.5-0 (2012-03-24) * contrast.rms: saved conf.type and conf.int in returned object, added to print method * Added debug= to predab.resample so user can see all the training and test sample subscripts * Added validate.Rq function * Fixed bug in Rq that caused 2 copies of fitted.values to be in fit object, which caused fit.mult.impute to double fitted.values * Added how to reorder predictors if using plot(Predict(fit)) * Added new function perlcode written by Jeremy Stephens and Thomas Dupont; converts result of Function to Perl code * Fixed partial argument matches in many functions to pass new R checks * Changed matrx and DesignAssign to allow validate.Rq to consider null models; neatened code Changes in version 3.4-0 (2012-01-17) * psm: fixed logcorrect logic (thanks: Rob Kushler) * Added suggested package multcomp (required for simultaneous CLs) * Implemented simultaneous confidence intervals in Predict, predictrms, contrast.rms, all specific model predict methods * Add multiplicity adjustment for individual confidence limits computed by contrast.rms, to preserve family-wise coverage using multcomp package * Improved rbind.Predict to preserve order of groups as presented, as levels of .set. * Added example for plot.Predict showing how to suppress predictions for certain intervals/groups from being plotted * Added example in plot.Predict help file for graphing multiple types of confidence bands simultaneously Changes in version 3.3-3 (2011-12-06) * robcov: used vcov to get var-cov matrix * vcov.Glm: gave precedence to $var object in fit * Added residuals.Glm to force call to residuals.glm, and make robcov fail as type="score" is not implemented for glm * Fixed bootcov for Glm to sense NA in coefficients and skip that iteration * Fixed digit -> digits error in latex.rms * Fixed f$coef error in pentrace; thanks christopher.hane@optum.com * Added new feature for Predict() to plot bootstrap nonparametric confidence limits if fit was run through bootcov with coef.reps=TRUE * Added ylim argument to plot.residuals.lrm Changes in version 3.3-2 (2011-11-09) * calibrate.default: add var-cov matrix to ols objects * print.lrtest: discarded two formula attributes before printing * Added digits, size, and after arguments for latex methods for model fits, made before argument work with inline=TRUE, changed \needspace to \Needspace in latex.validate and prModFit * latex: fixed to consider digits for main effects * plot.xmean.ordinaly: added new argument cex.points * print.lrm: improved printing of -2 LL overall penalty * plot.calibrate.default: invisibly return prediction errors * plot.Predict: added cex.axis argument to pass to x scales; added subdata * print.pentrace: neatened up output * added title as an argument to all high-level function print methods * prModFit: fixed bug where Score chi2 was not translated to LaTeX * prModFit: changed to use LaTeX longtable style for coefficients etc. * prModFit: added arguments long and needspace * prModFit: suppressed title if title="" * rmsMisc: added nobs.rms and added nobs to object returned by logLik.rms * Added new argument cex.points to plot.xmean.ordinaly * Changed example in anova.rms to use reorder instead of reorder.factor Changes in version 3.3-1 (2011-06-01) * Added new example for anova.rms for making dot plots of partial R^2 of predictors * Defined logLik.ols (calls logLik.lm) * Fixed and cleaned up logLik.rms, AIC.rms * Fixed residuals.psm to allow other type= values used by residuals.survreg * Fixed Predict and survplot.rms to allow for case where no covariates present * Fixed bug in val.prob where Eavg wasn't being defined if pl=FALSE (thanks: Ben Haller) * Fixed bug in Predict so that it could get a list or vector from predictrms * Fixed latex.rms to not treat * as a wild card in various contexts (may be interaction) * Fixed predictrms to temporarily get std.err if conf.int requested even it std.err not; omitted std.err in returned object if not wanted * Enhanced plot.Predict to allow plots for different predictors to be combined, after running rbind.Predict (varypred argument) * Also enhanced to allow groups= and cond= when varying the predictors * Corrected bug where sometimes would try to plot confidence limits when conf.int=FALSE was given to Predict * Added india, indnl arguments to anova.rms to suppress printing individual tests of interaction/nonlinearity * Changed anova.rms so that if all non-summary terms have (Factor+Higher Order Factor) in their labels, this part of the labels is suppressed (useful with india and indnl) Changes in version 3.3-0 (2011-02-28) * In survplot.rms, fixed bug (curves were undefined if conf='bands' and labelc was FALSE) * In survfit.cph, fixed bug by which n wasn't always defined * In cph, put survival::: on exact fit call * Quit ignoring zlim argument in bplot; added xlabrot argument * Added caption argument for latex.anova.rms * Changed predab to not print summaries of variables selected if bw=TRUE * Changed predab to pass force argument to fastbw * fastbw: implemented force argument * Added force argument to validate.lrm, validate.bj, calibrate.default, calibrate.cph, calibrate.psm, validate.bj, validate.cph, validate.ols * print.validate: added B argument to limit how many resamples are printed summarizing variables selected if BW=TRUE * print.calibrate, print.calibrate.default: added B argument * Added latex method for results produced by validate functions * Fixed survest.cph to convert summary.survfit std.err to log S(t) scale * Fixed val.surv by pulling surv object from survest result * Clarified in predict.lrm help file that doesn't always use the first intercept * lrm.fit, lrm: linear predictor stored in fit object now uses first intercept and not middle one (NOT DOWNWARD COMPATIBLE but makes predict work when using stored linear.predictors) * Fixed argument consistency with validate methods Changes in version 3.2-0 (2011-02-14) * Changed to be compatible with survival 2.36-3 which is now required * Added logLik.rms and AIC.rms functions to be compatible with standard R * Fixed oos.loglik.Glm * Fixed bootcov related to nfit='Glm' * Fixed (probably) old bug in latexrms with strat predictors Changes in version 3.1-0 (2010-09-12) * Fixed gIndex to not use scale for labeling unless character * Changed default na.action in Gls to na.omit and added a note in the help file that na.delete does not work with Gls * Added terms component to Gls fit object (latex was not working) * Added examples in robcov help file testing sandwich covariance estimator * Added reference related to the effects package under help file for plot.Predict * Added more examples and simulations to gIndex * Fixed ancient bug in lrm.fit Fortran code to handle case where initial estimates are nearly perfect (was trying to step halve); thanks: Dan Hogan * Changed survdiffplot to use gray(.85) for bands instead of gray(.95) * Fixed formatting problem in print.psm * Added prStats and reVector functions to rmsMisc.s * Changed formatting of all print.* functions for model fits to use new prStats function * Added latex=TRUE option to all model fit print methods; requires LaTeX package needspace * Re-wrote printing routines to make use of more general model * Removed long and scale options from cph printing-related routines * Prepare for version 2.36-1 of survival package by adding censor=FALSE argument to survfit.coxph * Added type="ccterms" to various predict methods * Made type="ccterms" the default for partial g-indexes in gIndex, i.e., combine all indirectly related (through interactions) terms * Added Spiegelhalter calibration test to val.prob * Added a check in cph to trigger an error if strata() is used in formula * Fixed drawing of polygon for shaded confidence bands for survplot.survfit (thanks to Patrick Breheny ) * Changed default adjust.subtitle in bplot to depend on ref.zero, thanks to David Winsemius * Used a namespace and simplified referenced to a few survival package functions that survival actually exports Changes in version 3.0-0 (2010-05-16) * Made Gls not store data label() in residuals object, instead storing a label of 'Residuals' * Fixed handling of na.action and check for presence of offsets in Glm * Added type="cterms" to predict methods; computes combined terms for main effects + any interaction terms involving that main effect; in preparation for new geffects function * Added GiniMd and gIndex functions * Change lrm (lrm.fit) to use the middle intercept in computing Brier score * Added 3 g-indexes to lrm fits * Added 1 g-index to ols, Rq, Glm, Gls fits * Added 2 g-indexes to cph, psm fits * Added g to validate.ols, .lrm, .cph, .psm, but not to validate.bj * Added print.validate to set default digits to 4 * Changed validate.lrm to compute 3 indexes even on ordinal response data Changes in version 2.2-0 (2010-02-23) * Added levels.only option to survplot.* to remove variablename= from curve labels * Added digits argument to calibrate.default * Added new ref in val.prob help page * Corrected location of dataset in residuals.lrm help page (thanks frederic.holzwarth@bgc-jena.mpg.de) * Fixed latex.rms to latex-escape percent signs inside value labels * Added scat1d.opts to plot.Predict * Changed method of specifying variables to vary by not requiring an equals sign and a dot after the variable name, for Predict, summary, nomogram, gendata, survplot.rms * Added factors argument to Predict to handle the above for survplot * Made gendata a non-generic function, changed the order of its arguments, removed editor options, relying on R de function always * Thanks to Kevin Thorpe to make latex.summary.rms and latex.anova.rms respect the table.env argument * Fixed bug in calibrate.default related to digits argument * Re-wrote bplot to use lattice graphics (e.g., levelplot contourplot wireframe), allowing for multiple panels for 3-d plots * Changed all Rd files to use {arg1,arg2,...} instead of having empty {} Changes in version 2.1-0 (2009-09-30) * Made Predict not return invisibly if predictors not specified * New option nlines for plot.Predict for getting line plots with 2 categorical predictors * Added rename option to rbind.Predict to handle case where predictor name has changed between models * Added ties=mean to approx( ) calls that did not have ties= specified * Added nlevels argument to bplot to pass to contour * Added par argument to iLegend - list to pass to par(). * Redirected ... argument to iLegend to image( ). * Fixed groupkm - was printing warning messages wrongly * Added new semiparametric survival prediction calibration curve method in val.surv for external validation; this is the first implementation of smooth calibration curves for survival probability validation with right-censored data * Fixed calibrate confidence limits from groupkm * Added smooth calibration curve using hare (polspline package) for calibrate.cph and calibrate.psm * Added display of predicted risks for cph and psm models even for the stratified KM method (old default) rms/R/0000755000176200001440000000000014024533360011254 5ustar liggesusersrms/R/ggplot.Predict.s0000644000176200001440000006304513670041322014333 0ustar liggesusersggplot.Predict <- function(data, mapping, formula=NULL, groups=NULL, aestype=c('color', 'linetype'), conf=c('fill', 'lines'), conflinetype=1, varypred=FALSE, sepdiscrete=c('no', 'list', 'vertical', 'horizontal'), subset, xlim., ylim., xlab, ylab, colorscale=function(...) scale_color_manual(..., values=c("#000000", "#E69F00", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7")), colfill='black', rdata=NULL, anova=NULL, pval=FALSE, size.anova=4, adj.subtitle, size.adj=2.5, perim=NULL, nlevels=3, flipxdiscrete=TRUE, legend.position='right', legend.label=NULL, vnames=c('labels', 'names'), abbrev=FALSE, minlength=6, layout=NULL, addlayer, histSpike.opts=list(frac=function(f) 0.01 + 0.02 * sqrt(f - 1)/sqrt(max(f, 2) - 1), side=1, nint=100), type=NULL, ggexpr=FALSE, height=NULL, width=NULL, ..., environment) { isbase <- Hmisc::grType() == 'base' ## vs. 'plotly' if(! isbase && length(anova)) stop('anova not yet implemented for grType plotly') lhw <- length(height) + length(width) if(isbase && lhw) warning('height and width ignored for non-plotly graphics') plrend <- if(isbase) function(obj, ...) obj else function(obj, final=TRUE) { if(final && (length(width) > 0 || length(height) > 0)) plotly::ggplotly(obj, height=height, width=width) else plotly::ggplotly(obj) } comb <- function(plist, nrow=1, ncol=1, ...) { ## Note: subplot does not take an ncols argument if(isbase) return(do.call(arrGrob, c(plist, list(nrow=nrow, ncol=ncol), list(...)))) z <- do.call(plotly::subplot, c(plist, list(nrows=nrow, titleX=TRUE, titleY=TRUE, margin=0.065), list(...))) if(lhw) { z <- plotly::plotly_build(z) z$layout$height <- height z$layout$width <- width } # if(lhw) z <- layout(z, height=height, width=width) # also works z } if(! length(formula) && ! missing(mapping)) formula <- mapping ## .xlim, .ylim instead of xlim, ylim to distinguish from ggplot functions sepdiscrete <- match.arg(sepdiscrete) class(data) <- setdiff(class(data), 'Predict') ## so won't involve ggplot.Predict if(varypred) { data$.predictor. <- data$.set. data$.set. <- NULL } predpres <- length(data$.predictor.) > 0 if(predpres && missing(legend.position)) legend.position <- 'top' conf <- match.arg(conf) vnames <- match.arg(vnames) maddlayer <- missing(addlayer) if(! maddlayer) addlayer <- paste(deparse(substitute(addlayer)), collapse=' ') ribbonargs <- sprintf("alpha=0.2, linetype=0, fill=I('%s'),show.legend=FALSE", colfill) dohist <- function(...) { so <- histSpike.opts do.call('histSpikeg', c(list(...), so)) } info <- attr(data, 'info') at <- info$Design label <- at$label units <- at$units adjust <- info$adjust varying <- info$varying conf.int <- info$conf.int pmlabel <- character(length(label)) names(pmlabel) <- names(label) for(i in 1 : length(label)) { rl <- if(isbase) as.character(labelPlotmath(label[i], units[i])) else markupSpecs$html$varlabel(label[i], units[i]) if(length(rl)) pmlabel[i] <- rl } if(predpres) data$.Predictor. <- if(vnames != 'labels') data$.predictor. else pmlabel[as.character(data$.predictor.)] glabel <- function(gname, j=1, chr=FALSE) { r <- if(! length(legend.label)) if(isbase) parse(text=pmlabel[gname]) else pmlabel[gname] else if(is.logical(legend.label)) '' else legend.label[j] if(is.expression(r)) { if(chr) r <- sprintf('expression(%s)', as.character(r)) } else { qc <- if(length(grep("'", r))) '"' else "'" r <- paste0(qc, r, qc) } r } ## Function to create expression( ) or "" depending on argument expch <- function(x, chr=FALSE) { if(! length(x)) 'NULL' else if(is.expression(x)) { if(chr) sprintf('expression(%s)', as.character(x)) else x } else if(grepl('expression\\(', x)) x else deparse(x) } ## Function to construct xlim() or ylim() call limc <- function(limits, which) sprintf("%slim(%s, %s)", which, limits[1], limits[2]) xlimc <- if(missing(xlim.)) '' else paste('+', limc(xlim., 'x')) if(! missing(subset)) { subset <- eval(substitute(subset), data) data <- data[subset,, drop=FALSE] } if(length(groups) == 1 && is.logical(groups) && ! groups) groups <- NULL else if(length(groups)) { if(length(groups) > 2 || !is.character(groups) || any(groups %nin% names(data))) stop('groups must be one or two predictor names') ## geom_ribbon will not handle two aesthetics if(length(groups) == 2) conf.int <- FALSE } else if(! predpres && length(varying) > 1) groups <- varying[2] ## Make all grouping variables discrete for proper aesthetic mapping if(length(groups)) for(v in groups) data[[v]] <- as.factor(data[[v]]) if(missing(ylab)) ylab <- if(isbase) info$ylabPlotmath else info$ylabhtml if(! length(data$lower)) conf.int <- FALSE if(missing(ylim.)) ylim. <- range(pretty( if(conf.int) c(data$yhat, data$lower, data$upper) else data$yhat), na.rm=TRUE) if(missing(adj.subtitle)) adj.subtitle <- length(adjust) > 0 sub <- if(adj.subtitle && length(adjust)==1) paste0('Adjusted to:', adjust) else NULL cap <- expch(sub, chr=TRUE) tanova <- if(length(anova)) function(name, x, y, xlim, ylim, flip=FALSE, empty=FALSE, dataOnly=FALSE) annotateAnova(name, plotmathAnova(anova, pval), x, y, ggplot=TRUE, xlim=xlim, ylim=ylim, size=size.anova, flip=flip, empty=empty, dataOnly=dataOnly) else function(...) {} ## See http://bigdata-analyst.com/best-way-to-add-a-footnote-to-a-plot-created-with-ggplot2.html ## size is in mm # footnote <- function(object, text, size=2.5, color=grey(.5)) # arrGrob(object, sub = grid::textGrob(text, x = 1, hjust = 1.01, # vjust=0.1, gp = grid::gpar(fontsize =size/0.3527778 ))) if(predpres) { ## User did not specify which predictors to plot; all plotted data$.predictor. <- factor(data$.predictor.) if(sepdiscrete != 'no') { ## From http://stackoverflow.com/questions/11979017 ## Changed to assume that each element of labels is a character string ## of the form "expression(....)" if(FALSE) facet_wrap_labeller <- function(gg.plot, labels=NULL) { ## Uses functions from gridExtra g <- ggplotGrob(gg.plot) gg <- g$grobs strips <- grep("strip_t", names(gg)) for(ii in seq_along(labels)) { modgrob <- grid::getGrob(gg[[strips[ii]]], "strip.text", grep=TRUE, global=TRUE) gg[[strips[ii]]]$children[[modgrob$name]] <- grid::editGrob(modgrob,label=eval(parse(text=labels[ii]))) } g$grobs <- gg # class(g) = c("arrange", "ggplot", class(g)) g } ## Determine which predictors are discrete isdiscrete <- function(z) is.factor(z) || is.character(z) || length(unique(z[!is.na(z)])) <= nlevels lp <- setdiff(levels(data$.predictor.), groups) isdis <- sapply(data[lp], isdiscrete) dogroup <- function(type) { v <- if(type == 'continuous') names(isdis)[! isdis] else names(isdis)[isdis] # dat <- subset(data, .predictor. %in% v) ## would not work dat <- data[data$.predictor. %in% v,, drop=TRUE] p <- dat$.predictor. xx <- switch(type, continuous = numeric( nrow(dat)), discrete = character(nrow(dat)) ) lbr <- if(! isbase || vnames != 'labels') '' else ', labeller=label_parsed' ## Prepare to create a "super factor" variable by concatenating ## all levels of all categorical variables keeping original orders ## firstLev is first level for each discrete predictor ## Thought was needed with anova but geom_text will take a numeric ## x or y coordinate where factor levels seem to be spaced at 1.0 Lev <- character() ## firstLev <- character(length(v)) ## names(firstLev) <- v for(iv in v) { j <- which(p == iv) datj <- dat[j, iv] if(type == 'continuous') { xx[j] <- datj ## firstLev[iv] <- '' } else { levj <- levels(datj) if(! length(levj)) levj <- unique(datj) Lev <- c(Lev, levj) xx[j] <- as.character(datj) ## firstLev[iv] <- levj[1] } } if(type == 'discrete') { Lev <- unique(Lev) xx <- factor(xx, Lev) } dat$.xx. <- xx if(length(groups)) dat$.co. <- as.factor(dat[[groups]]) ylimc <- limc(ylim., 'y') if(type == 'continuous') { if(length(groups)) g <- sprintf('ggplot(dat, aes(x=.xx., y=yhat, %s=%s)) + labs(x=NULL, y=%s, caption=%s) + %s %s', aestype[1], groups[1], expch(ylab, chr=TRUE), cap, ylimc, xlimc) else g <- sprintf("ggplot(dat, aes(x=.xx., y=yhat)) + labs(x=NULL, y=%s, caption=%s) + %s %s", expch(ylab, chr=TRUE), cap, ylimc, xlimc) g <- c(g, if(length(layout)) sprintf("facet_wrap(~ .Predictor., scales='free_x', ncol=%s%s)", layout[2], lbr) else sprintf("facet_wrap(~ .Predictor., scales='free_x'%s)", lbr), "geom_line()") if(conf.int) { h <- if(conf == 'fill') sprintf("geom_ribbon(aes(x=.xx., ymin=lower, ymax=upper),%s)", ribbonargs) else c("geom_line(aes(x=.xx., y=lower), linetype=conflinetype)", "geom_line(aes(x=.xx., y=upper), linetype=conflinetype)") g <- c(g, h) } if(length(rdata)) { rv <- intersect(v, names(rdata)) rdata <- rdata[c(rv, groups)] ## For each variable in rdata that is in dat, set values ## outside the range in dat to NA. Otherwise x-axes will ## be rescaled to include all raw data values, not just ## points at which predictions are made for(vv in rv) { a <- dat[[vv]] if(is.numeric(a)) { r <- range(a, na.rm=TRUE) b <- rdata[[vv]] i <- b < r[1] | b > r[2] if(any(i)) { b[i] <- NA rdata[[vv]] <- b } } } ## Reshape rdata to be tall and thin rdata <- reshape(as.data.frame(rdata), direction='long', v.names='.xx.', timevar='.Predictor.', varying=rv, times=rv) if(vnames == 'labels') rdata$.Predictor. <- pmlabel[rdata$.Predictor.] form <- 'yhat ~ .xx. + .Predictor.' if(length(groups)) form <- paste(form, '+', paste(groups, collapse='+')) g <- c(g, sprintf("dohist(%s, predictions=dat, data=rdata, ylim=%s)", form, deparse(ylim.))) } } else { # discrete x if(length(groups)) g <- c(sprintf('ggplot(dat, aes(x=yhat, y=.xx., %s=%s))', aestype[1], groups[1]), sprintf("labs(x=%s, y=NULL, caption=%s)", expch(ylab, chr=TRUE), cap)) else g <- c("ggplot(dat, aes(x=yhat, y=.xx.))", sprintf("labs(x=%s, y=NULL, caption=%s)", expch(ylab, chr=TRUE), cap)) if(! maddlayer) g <- c(g, addlayer) g <- c(g, limc(ylim., 'x'), sprintf("facet_wrap(~ .Predictor., scales='free_y'%s)", lbr), "geom_point()") if(conf.int) g <- c(g, "geom_errorbarh(aes(y=.xx., xmin=lower, xmax=upper), height=0)") } ## anova annotations need to be created for all variables being ## plotted with faceting, and annotation information must be ## based on a dataset with the information and the .Predictor. ## variable, and geom_text() must be used instead of annotate() ## See http://stackoverflow.com/questions/2417623 if(length(anova)) { .xx. <- yhat <- .label. <- hjust <- vjust <- NULL for(iv in v) { j <- which(data$.predictor. == iv) datj <- data[j,, drop=FALSE] xv <- datj[, iv] xx <- switch(type, continuous = xv, discrete = as.numeric(xv)) yy <- datj[, 'yhat'] if(conf.int) { xx <- c(xx, xx, xx) yy <- c(yy, datj[, 'lower'], datj[, 'upper']) } xlim. <- if(is.factor(xv)) c(1, length(levels(xv))) else range(pretty(xv)) tan <- tanova(iv, xx, yy, xlim., ylim., dataOnly=TRUE, flip=type=='discrete', empty=type == 'discrete') ## .xx. <- c(.xx., if(type == 'discrete') firstLev[iv] else tan$x) .xx. <- c(.xx., tan$x) yhat <- c(yhat, tan$y) .label. <- c(.label., tan$label) hjust <- c(hjust, tan$hjust) vjust <- c(vjust, tan$vjust) } .anova. <- data.frame(.Predictor. = if(vnames != 'labels') v else pmlabel[v], .xx., yhat, .label., hjust, vjust) g <- c(g, sprintf("geom_text(aes(label=.label., hjust=hjust, vjust=vjust), size=size.anova, nudge_y=%s, data=.anova., parse=TRUE, show.legend=FALSE)", if(type == 'discrete') -0.25 else 0)) } g <- paste(g, collapse=' + ') if(ggexpr) return(g) g <- eval(parse(text = g)) g } # end dogroup function gcont <- if(any(! isdis)) dogroup('continuous') gdis <- if(any( isdis)) dogroup('discrete') if(ggexpr) return(list(continuous=gcont, discrete=gdis)) r <- mean(! isdis) return(if(length(gcont) && length(gdis)) switch(sepdiscrete, list = list(continuous = plrend(gcont), discrete = plrend(gdis )), vertical = comb(list(plrend(gcont, final=FALSE), plrend(gdis, final=FALSE)), nrow=2, heights=c(r, 1-r)), horizontal= comb(list(plrend(gcont, final=FALSE), plrend(gdis, final=FALSE)), ncol=2, widths =c(r, 1-r))) else if(length(gcont)) plrend(gcont) else plrend(gdis)) } # end if(sepdiscrete) ## Form separate plots and combine at end p <- data$.predictor. levs <- at <- labels <- limits <- list() lp <- setdiff(levels(p), groups) np <- length(lp) .co <- if(length(groups)) as.factor(data[[groups]]) if(! length(layout)) layout <- if(np <= 4) c(2,2) else if(np <= 6) c(2,3) else if(np <= 9) c(3,3) else if(np <=12) c(3,4) else if(np <=16) c(4,4) else if(np <=20) c(4,5) else ceil(rep(sqrt(np), 2)) # pushViewport(viewport(layout = grid.layout(layout[1], layout[2]))) Plt <- list() jplot <- 0 for(w in lp) { jplot <- jplot + 1 i <- p == w z <- data[i, w] l <- levels(z) if(abbrev) { l <- abbreviate(l, minlength=minlength) levels(z) <- l } ll <- length(l) xlim. <- if(ll) c(1, ll) else range(pretty(z)) yhat <- data[i, 'yhat'] xl <- if(vnames == 'labels') { if(isbase) parse(text=pmlabel[w]) else pmlabel[w] } else w zz <- data.frame(.xx.=z, .yhat=yhat) if(length(formula)) zz <- cbind(zz, data[i, all.vars(formula), drop=FALSE]) if(conf.int) { zz$lower <- data[i, 'lower'] zz$upper <- data[i, 'upper'] } if(length(.co)) { zz$.cond <- .co[i] g <- sprintf( 'ggplot(zz, aes(x=.xx., y=.yhat, %s=.cond))', aestype[1]) } else g <- 'ggplot(zz, aes(x=.xx., y=.yhat))' xdiscrete <- is.factor(z) || is.character(z) || length(unique(z[!is.na(z)])) <= nlevels flipped <- FALSE if(xdiscrete) { if(flipxdiscrete && ! is.numeric(z)) { g <- c(g, 'coord_flip()') flipped <- TRUE } g <- c(g, 'geom_point()', if(length(type) && type %in% c('l', 'b')) 'geom_line()') if(is.numeric(z)) g <- c(g, sprintf('scale_x_discrete(breaks=%s)', deparse(unique(z)))) } else { if(length(type)) g <- c(g, switch(type, p='geom_point()', l='geom_line()', b='geom_point() + geom_line()')) else g <- c(g, 'geom_line()') } ## Need the following or geom_ribbon will improperly clip regions if(flipped) g <- c(g, limc(ylim., 'y')) else g <- c(g, sprintf('coord_cartesian(ylim=%s)', deparse(ylim.))) g <- c(g, sprintf('labs(x=%s, y=%s, caption=%s)', expch(xl, chr=TRUE), expch(ylab, chr=TRUE), cap), "theme(plot.margin = grid::unit(rep(0, 4), 'cm'))") ## use rep(.1, 4) if using print(..., viewport=...) for multiple plots if(length(groups)) { #### ?? # if(nr == 1 && nc == 1) { if(jplot == 1) { colFun <- if(aestype[1] == 'color') colorscale else get(paste('scale', aestype[1], 'discrete', sep='_')) groupLabel <- glabel(groups[1], chr=TRUE) g <- c(g, if(aestype[1] == 'size') sprintf("colFun(name=%s, range=c(.2, 1.5))", groupLabel) else sprintf("colFun(name=%s)", groupLabel)) g <- c(g, sprintf("theme(legend.position='%s')", legend.position)) } else g <- c(g, "theme(legend.position='none')") } xa <- if(conf.int) c(zz$.xx., zz$.xx., zz$.xx.) else zz$.xx. ya <- if(conf.int) c(zz$.yhat, zz$lower, zz$upper) else zz$.yhat g <- c(g, sprintf("tanova(w, xa, ya, %s, %s, flip=FALSE)", deparse(xlim.), deparse(ylim.))) ## was flip=flipped if(! maddlayer) g <- c(g, addlayer) if(conf.int) { h <- if(ll || xdiscrete) "geom_errorbar(aes(ymin=lower, ymax=upper), width=0)" else { if(conf == 'fill') sprintf("geom_ribbon(aes(ymin=lower, ymax=upper), %s)", ribbonargs) else c("geom_line(aes(x=.xx., y=lower), linetype=conflinetype)", "geom_line(aes(x=.xx., y=upper), linetype=conflinetype)") ## geom_ribbon with fill=NA draws vertical lines at ## ends of confidence regions } g <- c(g, h) } if(length(formula)) g <- c(g, sprintf("facet_grid(%s)", deparse(formula))) if(! is.factor(z) && length(rdata) && w %in% names(rdata)) { rdata$.xx. <- rdata[[w]] if(length(.co)) { rdata$.cond <- rdata[[groups]] form <- '.yhat ~ .xx. + .cond' } else form <- '.yhat ~ .xx.' g <- c(g, sprintf("dohist(%s, predictions=zz, data=rdata, ylim=%s)", form, deparse(ylim.))) } # print(g, vp = viewport(layout.pos.row=nr, layout.pos.col=nc)) g <- paste(g, collapse = ' + ') if(ggexpr) return(g) g <- eval(parse(text=g)) Plt[[jplot]] <- g } res <- if(jplot == 1) plrend(Plt[[1]]) else { for(j in 1 : jplot) Plt[[j]] <- plrend(Plt[[j]], final=FALSE) comb(Plt, nrow=layout[1], ncol=layout[2]) } # if(length(sub)) { # Plt <- if(isbase) footnote(Plt, sub, size=size.adj) # else # plotly::layout(p, title=sub, margin=0.03) # } return(res) } else { # .predictor. not included; user specified predictors to show v <- varying xn <- v[1] ## name of x-axis variable (first variable given to Predict) if(missing(xlab)) xlab <- if(isbase) parse(text = pmlabel[xn]) else pmlabel[xn] xv <- data[[xn]] xdiscrete <- is.factor(xv) || is.character(xv) || length(unique(xv[!is.na(xv)])) <= nlevels if(length(perim)) { j <- if(! length(groups)) perim(xv, NULL) else perim(xv, data[[groups[1]]]) data$yhat[! j] <- NA if(conf.int) data$lower[! j] <- data$upper[! j] <- NA } ae <- paste0('aes(x=', xn, ', y=yhat') if(length(groups)) for(j in 1 : length(groups)) ae <- paste0(ae, ', ', aestype[j], '=', groups[j]) #### ae <- eval(parse(text=paste0(ae, ')'))) ae <- paste0(ae, ')') g <- c(sprintf("ggplot(data, %s)", ae), sprintf("labs(x=%s, y=%s, caption=%s) %s", expch(xlab, chr=TRUE), expch(ylab, chr=TRUE), cap, xlimc)) flipped <- FALSE if(xdiscrete) { if(flipxdiscrete && ! is.numeric(xv)) { g <- c(g, "coord_flip()") flipped <- TRUE } g <- c(g, "geom_point()", if(length(type) && type %in% c('l', 'b')) "geom_line()" ) if(conf.int) g <- c(g, "geom_errorbar(aes(ymin=lower, ymax=upper), width=0)") } else { if(length(type)) g <- c(g, switch(type, p="geom_point()", l="geom_line()", b="geom_point() + geom_line()")) else g <- c(g, "geom_line()") if(length(groups)) { for(j in 1 : length(groups)) { # colFun <- if(aestype[j] == 'color') colorscale else # get(paste('scale', aestype[j], 'discrete', sep='_')) colFun <- if(aestype[j] == 'color') 'colorscale' else paste('scale', aestype[j], 'discrete', sep='_') groupLabel <- glabel(groups[j], j, chr=TRUE) #?? g <- c(g, if(aestype[j] == 'size') # sprintf("colFun(name=%s, range=c(.2, 1.5))", # groupLabel) else # sprintf("colFun(name=%s)", groupLabel)) g <- c(g, if(aestype[j] == 'size') sprintf('%s(name=%s, range=c(.2, 1.5))', colFun, groupLabel) else sprintf('%s(name=%s)', colFun, groupLabel)) } g <- c(g, sprintf("theme(legend.position='%s')", legend.position)) } if(conf.int) { h <- if(conf == 'fill') sprintf("geom_ribbon(data=data, aes(ymin=lower, ymax=upper),%s)", ribbonargs) else c(sprintf('geom_line(data=data, aes(x=%s, y=lower), linetype=conflinetype)', xn), sprintf('geom_line(data=data, aes(x=%s, y=upper), linetype=conflinetype)', xn)) g <- c(g, h) } # end if(conf.int) } if(! maddlayer) g <- c(g, addlayer) g <- c(g, if(flipped) sprintf("ylim(%s)", deparse(ylim.)) else sprintf("coord_cartesian(ylim=%s)", deparse(ylim.))) xa <- if(conf.int) c(xv, xv, xv) else xv ya <- if(conf.int) c(data$yhat, data$lower, data$upper) else data$yhat if(missing(xlim.)) xlim. <- if(is.factor(xv)) c(1 , length(levels(xv))) else range(pretty(xv)) g <- c(g, sprintf("tanova(xn, xa, ya, %s, %s, flip=FALSE)", deparse(xlim.), deparse(ylim.))) # was flip=flipped if(! is.factor(xv) && length(rdata) && xn %in% names(rdata)) { form <- paste('yhat', xn, sep='~') if(length(groups)) form <- paste(form, groups[1], sep='+') g <- c(g, sprintf("dohist(%s, predictions=data, data=rdata, ylim=%s)", form, deparse(ylim.))) } ## Get list of varying variables that are not a groups variable ## These will be for faceting ## If the faceting formula is specified, just use it f <- if(length(v) > 1) setdiff(v[-1], groups) if(length(f)) { if(! length(formula)) { k <- length(f) formula <- if(k == 1) paste('~', f[1]) else if(k == 2) paste(f[1], f[2], sep='~') else if(k == 3) paste(f[1], '~', f[2], '*', f[3]) else if(k == 4) paste(f[1], '*', f[2], '~', f[3], '*', f[4]) else stop('too many varying variables to use faceting') } else formula <- deparse(formula) g <- c(g, sprintf("facet_grid(%s)", formula)) } g <- paste(g, collapse=' + ') if(ggexpr) return(g) g <- plrend(eval(parse(text=g))) # if(length(sub)) g <- if(isbase) footnote(g, sub) # else # plotly::layout(g, title=sub, margin=0.03) ## Could not get layout(g, annotations=...) to work return(g) } } utils::globalVariables(c('.xx.', '.yhat', 'lower', 'upper', 'groupLabel')) rms/R/orm.fit.s0000644000176200001440000003272113661213303013021 0ustar liggesusersorm.fit <- function(x=NULL, y, family='logistic', offset=0., initial, maxit=12L, eps=.005, tol=1e-7, trace=FALSE, penalty.matrix=NULL, scale=FALSE) { cal <- match.call() len.penmat <- length(penalty.matrix) ## Extreme value type I dist = Gumbel maximum = exp(-exp(-x)) = MASS:::pgumbel ## Gumbel minimum = 1 - exp(-exp(x)) families <- probabilityFamilies familiesDefined <- names(families) sfam <- substitute(family) csfam <- as.character(sfam) if(length(csfam) == 1 && csfam %in% familiesDefined) { fam <- families[[csfam]] family <- csfam } else if(is.character(family) && family %in% familiesDefined) fam <- families[[family]] else { fam <- family family <- fam$name } n <- length(y) initial.there <- ! missing(initial) if(! length(x)) { nx <- 0 xname <- NULL x <- 0 } else { if(! is.matrix(x)) x <- as.matrix(x) dx <- dim(x) nx <- dx[2L] if(dx[1] != n) stop("x and y must have same length") xname <- dimnames(x)[[2]] if(! length(xname)) xname <- paste("x[", 1 : nx, "]", sep="") if(scale) { x <- scale(x) scinfo <- attributes(x)[c('scaled:center', 'scaled:scale')] xbar <- as.matrix(scinfo[[1]]) xsd <- as.matrix(scinfo[[2]]) } } ynumeric <- is.numeric(y) if(ynumeric) { mediany <- quantile(y, probs=.5, type=1L) yu <- sort(unique(y)) kmid <- max(1, which(yu == mediany) - 1L) } # For large n, as.factor is slow # if(!is.factor(y)) y <- as.factor(y) if(is.factor(y)) { ylevels <- levels(y) y <- unclass(y) } else { ylevels <- sort(unique(y)) y <- match(y, ylevels) } if(! ynumeric) { mediany <- quantile(y, probs=.5, type=1L) kmid <- max(1, which(1L : length(ylevels) == mediany) - 1L) } kint <- length(ylevels) - 1L if(kint == 1) kmid <- 1 ofpres <- ! all(offset == 0) if(ofpres && length(offset) != n) stop("offset and y must have same length") if(n < 3) stop("must have >=3 non-missing observations") numy <- tabulate(y) names(numy) <- ylevels p <- as.integer(nx + kint) if(missing(initial)) { cp <- (n - cumsum(numy)[- length(numy)]) / n names(cp) <- NULL initial <- fam$inverse(cp) if(ofpres) initial <- initial - mean(offset) } loglik <- -2 * sum(numy * log(numy / n)) if(len.penmat) { if(nx > 0) { if(len.penmat == 0) penalty.matrix <- matrix(0, nrow=nx, ncol=nx) if(nrow(penalty.matrix) != nx || ncol(penalty.matrix) != nx) stop(paste("penalty.matrix does not have", nx, "rows and columns")) penmat <- rbind( matrix(0, ncol=kint+nx, nrow=kint), cbind(matrix(0, ncol=kint, nrow=nx), penalty.matrix)) } else penmat <- matrix(0, ncol=kint, nrow=kint) } else penmat <- NULL if(nx==0 & ! ofpres) { loglik <- rep(loglik, 2) z <- list(coef=initial, u=rep(0, kint)) } if(ofpres) { ## Fit model with only intercept(s) and offset z <- ormfit(NULL, y, kint, 0, initial=initial, offset=offset, maxit=maxit, tol=tol, eps=eps, trace=trace, fam=fam) if(z$fail) return(structure(list(fail=TRUE), class="orm")) loglik <- c(loglik, z$loglik) initial <- z$coef } if(nx > 0) { ##Fit model with intercept(s), offset, covariables z <- ormfit(x, y, kint, nx, initial=initial, offset=offset, penmat=penmat, maxit=maxit, tol=tol, eps=eps, trace=trace, fam=fam) if(z$fail) return(structure(list(fail=TRUE), class="orm")) loglik <- c(loglik, z$loglik) kof <- z$coef ## Compute linear predictor before unscaling beta, as x is scaled lp <- matxv(x, kof, kint=kmid) info <- z$v if(scale) { attr(info, 'scale') <- list(mean=xbar, sd=xsd) betas <- kof[- (1 : kint)] kof[1 : kint] <- kof[1 : kint] - sum(betas * xbar / xsd) kof[-(1 : kint)] <- betas / xsd } } else lp <- rep(kof[kmid], n) ## Keep variance matrix for middle intercept and all predictors ## Middle intercept take to be intercept corresponding to y that is ## closest to the median y i <- if(nx > 0) c(kmid, (kint + 1):p) else kmid v <- tryCatch(as.matrix(solve(info, tol=tol)[i, i])) if(inherits(v, 'try-error')) { cat('Singular information matrix\n') return(structure(list(fail=TRUE), class="orm")) } if(scale) { trans <- rbind(cbind(1, matrix(0, nrow=1, ncol=nx)), cbind(-matrix(rep(xbar/xsd, 1), ncol=1), diag(1 / as.vector(xsd)))) v <- t(trans) %*% v %*% trans } name <- if(kint == 1) "Intercept" else paste("y>=", ylevels[-1L], sep="") name <- c(name, xname) names(kof) <- name dimnames(v) <- list(name[i], name[i]) if(kint > 1L) attr(v, 'intercepts') <- kmid llnull <- loglik[length(loglik) - 1L] model.lr <- llnull - loglik[length(loglik)] model.df <- nx if(initial.there) model.p <- score <- score.p <- NA else { score <- z$score if(model.df == 0) model.p <- score.p <- 1. else { model.p <- 1. - pchisq(model.lr, model.df) score.p <- 1. - pchisq(score, model.df) } } r2 <- 1. - exp(-model.lr / n) r2.max <- 1. - exp(-llnull / n) r2 <- r2 / r2.max if(kint > 1L) attr(lp, 'intercepts') <- kmid g <- GiniMd(lp) ## compute average |difference| between 0.5 and the condition ## probability of being >= marginal median pdm <- mean(abs(fam$cumprob(lp) - 0.5)) rho <- cor(rank(lp), rank(y)) ## Somewhat faster: ## rho <- .Fortran('rcorr', cbind(lp, y), as.integer(n), 2L, 2L, r=double(4), ## integer(4), double(n), double(n), double(n), double(n), ## double(n), integer(n), PACKAGE='Hmisc')$r[2] stats <- c(n, length(numy), mediany, z$dmax, model.lr, model.df, model.p, score, score.p, rho, r2, g, exp(g), pdm) nam <- c("Obs", "Distinct Y", "Median Y", "Max Deriv", "Model L.R.", "d.f.", "P", "Score", "Score P", "rho", "R2", "g", "gr", "pdm") names(stats) <- nam retlist <- list(call=cal, freq=numy, yunique=ylevels, stats=stats, fail=FALSE, coefficients=kof, var=v, ## u=z$u, family=family, trans=fam, deviance=loglik, non.slopes=kint, interceptRef=kmid, linear.predictors=lp, penalty.matrix=if(nx > 0 && any(penalty.matrix != 0)) penalty.matrix else NULL, info.matrix=info) class(retlist) <- 'orm' retlist } ormfit <- function(x, y, kint, nx, initial, offset, penmat=NULL, maxit=12L, eps=.005, tol=1e-7, trace=FALSE, fam) { if(missing(x) || ! length(x) || nx == 0) { x <- 0. nx <- 0 } n <- length(y) p <- as.integer(kint + nx) ymax <- kint + 1L iter <- 0L oldL <- 1e100 if(length(initial) < p) initial <- c(initial, rep(0, p - length(initial))) coef <- initial del <- rep(0., p) curstp <- 1. f <- fam$cumprob fp <- fam$deriv fpp <- fam$deriv2 ep <- .Machine$double.eps fptest <- function(x) fp (x, f(x)) fpptest <- function(x) fpp(x, f(x), fp(x, f(x))) repeat { if(iter >= maxit) { cat('Did not converge in', maxit, 'iterations\n') return(list(fail=TRUE)) } iter <- iter + 1L ## Compute linear predictor less intercept xb <- if(nx == 0L) offset else offset + x %*% coef[-(1L : kint)] ## Compute current Prob y=observed y ## P <- rep(0., n) ## P[y == 1] <- f(xb + coef[1]) - f(xb + coef[2]) ## if(ymax > 2) P[y > 1 & y < ymax] <- f(xb + coef[y]) - f(xb + coef[y+1]) ## P[y == ymax] <- f(xb + coef[kint]) ints <- c(1e100, coef[1:kint], -1e100) xby <- xb + ints[y]; xby1 <- xb + ints[y + 1L] fa <- f(xby) fb <- f(xby1) P <- fa - fb ## Compute -2 log likelihood ## Occasionally, Newton-Raphson updating puts intercepts out of order ## See w=2 in fit in line 181 of projects/Cardiology/ahf/cpbr/a.Rnw if(min(P) < 1e-10) warning(sprintf('cell probability < 1e-10 in iteration %s, set to 1e-10', iter)) L <- -2. * sum(log(pmax(1e-10, P))) ## Compute components of 1st and 2nd derivatives fpa <- fp(xby, fa) fpb <- fp(xby1, fb) fppa <- fpp(xby, fa, fpa) fppb <- fpp(xby1, fb, fpb) if(abs(fptest(ints[1] + max(xb))) > ep || abs(fptest(ints[kint+2] + min(xb))) > ep) stop('logic error 1') if(abs(fpptest(ints[1] + max(xb))) > ep || abs(fpptest(ints[kint+2] + min(xb))) > ep) stop('logic error 2') slow <- FALSE ## To compute score vector and info matrix in R instead of Ratfor/Fortran if(slow) { U <- rep(0., p) for(m in 1:kint) for(j in 1:n) U[m] <- U[m] + (fpa[j]*(y[j]-1==m) - fpb[j]*(y[j]==m)) / P[j] if(nx > 0) for(m in (kint+1):p) for(j in 1:n) U[m] <- U[m] + (fpa[j] - fpb[j]) * x[j,m-kint] / P[j] ## To compute the (wastefully sparse) information matrix in R: V <- matrix(NA, p, p) for(m in 1:p) { for(k in 1:m) { V[m,k] <- 0 for(j in 1:n) { z <- y[j] pa <- fpa[j]; pb <- fpb[j] ppa <- fppa[j]; ppb <- fppb[j] w <- 1/(P[j]*P[j]) v <- NA ## temp v <- if(m <= kint & k <= kint) - w * (pa*(z-1==m) - pb*(z==m)) * (pa*(z-1==k) - pb*(z==k)) + (ppa*(z-1==m)*(m==k) - ppb*(z==m)*(m==k))/P[j] else if(m > kint & k <= kint) x[j,m-kint] / P[j] * (-1/P[j] * (pa - pb) * (pa*(z-1==k) - pb*(z==k)) + ppa*(z-1==k) - ppb*(z==k)) else if(m > kint & k > kint) x[j,m-kint] * x[j,k-kint] / P[j] * (-1/P[j] * (pa - pb) * (pa-pb) + ppa - ppb) else {cat('nd\n');prn(c(m,k));9999} V[m,k] <- V[m,k] + v }}} V[col(V) > row(V)] <- t(V)[row(V) < col(V)] V <- -V } else { l <- if(kint == 1) p ^ 2 else as.integer(nx * nx + 2L * kint * nx + 3L * kint - 2L) lia <- as.integer(p + 1L) w <- .Fortran(F_ormuv, n, p, kint, nx, x, y, P, fpa, fpb, fppa, fppb, u=double(p), v=double(l), ja=integer(l), ia=integer(lia), l=as.integer(l), lia=lia, integer(p)) U <- w$u ## if any element of w$v is a NaN then return that the fitting attempt ## has failed V <- if(any(is.nan(w$v))) return(list(fail=TRUE)) else { ## assume step-halving happens V <- if(kint == 1L) matrix(w$v, nrow=p, ncol=p) else new('matrix.csr', ra=w$v, ja=w$ja, ia=w$ia, dimension=c(p, p)) V <- (V + t(V))/2. # force symmetry; chol() complains if 1e-15 off } } dmax <- max(abs(U)) ## Don't try to step halve if initial estimates happen to be ## MLEs; allow for a tiny worsening of LL without step-halving if ## max absolute first derivative is small if(trace) cat('-2logL=', L, ' step=', curstp, ' delta LL=', oldL-L, ' max |deriv|=', dmax, '\n') if(dmax < 1e-9 && abs(L - oldL) < 0.01*eps) break if(abs(oldL - L) < eps) break if(L > oldL) { # going the wrong direction if(iter == 1L) { cat('First iteration moved parameter estimates in wrong direction.\n') return(list(fail=TRUE)) } ## Try step-halving curstp <- curstp / 2. coef <- coef - curstp * del next } curstp <- 1. del <- tryCatch(solve(V, U, tol=tol)) if(inherits(del, 'try-error')) { cat('Singular information matrix\n') return(list(fail=TRUE)) } if(iter == 1L) score <- U %*% del coef <- coef + del oldL <- L } ## Converged list(coef=coef, v=V, loglik=L, score=score, dmax=dmax, iter=iter, fail=FALSE, eps=eps) } ## Extreme value type I dist = Gumbel maximum = exp(-exp(-x)) = MASS:::pgumbel ## Gumbel minimum = 1 - exp(-exp(x)) probabilityFamilies <- list(logistic = list(cumprob=plogis, inverse=qlogis, deriv =function(x, f) f * (1 - f), deriv2 =function(x, f, deriv) f * (1 - 3*f + 2*f*f) ), probit = list(cumprob=pnorm, inverse=qnorm, deriv =function(x, ...) dnorm(x), deriv2 =function(x, f, deriv) - deriv * x), loglog = list(cumprob=function(x) exp(-exp(-x)), inverse=function(x) -log(-log(x )), deriv =function(x, ...) exp(-x - exp(-x)), deriv2 =function(x, ...) ifelse(abs(x) > 200, 0, exp(-x - exp(-x)) * (-1 + exp(-x)))), cloglog = list(cumprob=function(x) 1 - exp(-exp(x)), inverse=function(x) log(-log(1 - x)), deriv =function(x, ...) exp( x - exp( x)), deriv2 =function(x, f, deriv) ifelse(abs(x) > 200, 0, deriv * ( 1 - exp( x)))), cauchit = list(cumprob=pcauchy, inverse=qcauchy, deriv =function(x, ...) dcauchy(x), deriv2 =function(x, ...) -2 * x * ((1 + x*x)^(-2)) / pi)) ## Check: ## P(x) = plogis(x); P'(x) = P(x) - P(x)^2 ## d <- function(x) plogis(x) - 3*plogis(x)^2 + 2*plogis(x)^3 ## x <- seq(-3, 3, length=150) ## plot(x, d(x), type='l') ## ad <- c(NA,diff(dlogis(x))/(x[2]-x[1])) ## lines(x, ad, col='red') rms/R/survest.cph.s0000644000176200001440000003507312773074532013746 0ustar liggesusers##Use x= if input is a design matrix, newdata= if a data frame or data matrix ##or vector. Can specify (centered) linear predictor values instead (linear.predictors). ##Strata is attached to linear.predictors or x as "strata" attribute. ##data matrix assumes that categorical variables are coded with integer codes survest.cph <- function(fit, newdata, linear.predictors, x, times, fun, loglog=FALSE, conf.int=.95, type=NULL, vartype=NULL, conf.type=c("log", "log-log","plain","none"), se.fit=TRUE, what=c("survival","parallel"), individual=FALSE, ...) { at <- fit$Design f <- sum(at$assume.code != 8) #non-strata factors nf <- length(at$name) - f strata.levels <- levels(fit$strata) num.strata <- if(nf == 0) 1 else length(strata.levels) conf.type <- match.arg(conf.type) what <- match.arg(what) if(what == 'parallel') { conf.int <- FALSE conf.type <- 'none' } inputData <- ! (missing(newdata) && missing(linear.predictors) && missing(x)) if(! se.fit) conf.int <- 0 if(individual && (length(fit$x) == 0 || length(fit$y) == 0 || attr(fit$y,'type') != 'counting')) stop('must specify x=TRUE, y=TRUE, and start and stop time to cph when individual=TRUE') if(missing(fun)) fun <- if(loglog) function(x) logb(-logb(ifelse(x == 0 | x == 1, NA, x))) else function(x) x ## Compute confidence limits for survival based on -log survival, ## constraining to be in [0,1]; d = std.error of cum hazard * z value ciupper <- function(surv, d) ifelse(surv == 0, 0, pmin(1, surv*exp(d))) cilower <- function(surv, d) ifelse(surv == 0, 0, surv*exp(-d)) naa <- fit$na.action ##First see if use method that depends on x and y being stored in fit if(! missing(linear.predictors) && length(fit$surv) == 0) stop('when using linear.predictors= must have specified surv=TRUE to cph') if(length(fit$y) && (f == 0 || length(fit$x)) && ((conf.int > 0 && f > 0) | length(fit$surv) == 0) & (! missing(newdata) | (missing(linear.predictors) && missing(x)))) { if(! missing(linear.predictors) | ! missing(x)) stop(paste("may not specify linear.predictors or x when survival estimation", "is not using underlying survival estimates stored with surv=TRUE")) sf <- function(..., type=NULL, vartype=NULL, cphnull=FALSE) { g <- list(...) if(length(type)) g$type <- type if(length(vartype)) g$vartype <- vartype g$censor <- FALSE # don't output censored values do.call('survfit.cph', g) } if(f == 0) { g <- sf(fit, se.fit=se.fit, conf.int=conf.int, conf.type=conf.type, type=type, vartype=vartype, cphnull=TRUE) sreq <- if(missing(newdata)) 1 else attr(predict(fit, newdata, type="lp", expand.na=FALSE), "strata") } else { if(missing(newdata)) { g <- sf(fit, se.fit=se.fit, conf.int=conf.int, conf.type=conf.type, type=type, vartype=vartype) sreq <- 1 } else { if(nrow(newdata) > 1 && ! individual && missing(times)) stop("must specify times= if predicting for >1 observation") g <- sf(fit, newdata=newdata, se.fit=se.fit, conf.int=conf.int, conf.type=conf.type, individual=individual, type=type, vartype=vartype) sreq <- g$requested.strata } naa <- g$na.action } sreq <- unclass(sreq) if(missing(times)) { ##delete extra S(t) curves added by survfit for all strata ##No newdata -> requested underlying survival for all strata if(missing(newdata)) return(g) else { if(nf == 0) j <- TRUE else { stemp <- rep(1:num.strata, g$strata) j <- stemp == sreq } tim <- c(0, g$time[j]) nr <- c(g$n.risk[j][1],g$n.risk[j]) ne <- c(0, g$n.event[j]) surv <- c(1, g$surv[j]) se <- c(NA, g$std.err[j]) upper <- c(1, g$upper[j]) # 1 was NA lower <- c(1, g$lower[j]) # 1 was NA yy <- fit$y ny <- ncol(yy) str <- unclass(fit$strata) if(length(str)) yy <- yy[str == sreq, ny-1] else yy <- yy[,ny-1] maxt <- max(yy) if(maxt > tim[length(tim)]) { tim <- c(tim,maxt) nr <- c(nr, sum(yy >= maxt-1e-6)) ne <- c(ne, 0) surv <- c(surv, surv[length(surv)]) se <- c(se, NA) upper <- c(upper, NA) lower <- c(lower, NA) } surv <- fun(surv) surv[is.infinite(surv)] <- NA lower <- fun(lower) lower[is.infinite(lower)] <- NA upper <- fun(upper) upper[is.infinite(upper)] <- NA retlist <- list(time=tim,n.risk=nr, n.event=ne, surv=surv, std.err=se, upper=upper, lower=lower, conf.type=g$conf.type, conf.int=g$conf.int, call=g$call) if(nf > 0) retlist$strata <- sreq return(retlist) } } ## end if(missing(times)) else { ## times specified ## g$requested.strata <- NULL ## return(g) g <- summary(g, print.it=FALSE, times=times, extend=TRUE) for(w in c('n.risk', 'n.event', 'n.censor', if(num.strata > 1) 'strata', 'surv', 'cumhaz', 'std.err', 'lower', 'upper')) if(is.matrix(g[[w]]) && nrow(g[[w]]) == 1) g[[w]] <- as.vector(g[[w]]) ## Why does summary.survfit output vectors as row matrices? ## summary.survfit returns std. err of S(t) unlike other ## survival package functions g$std.err <- ifelse(g$surv == 0, NA, g$std.err / g$surv) if(! individual && nf > 0 && ## delete extra cells added by survfit for strat ! missing(newdata) && nrow(newdata) == 1 && any(g$strata %nin% g$requested.strata)) { j <- g$strata %in% g$requested.strata g$time <- g$time[j] g$n.risk <- g$n.risk[j] g$n.event <- g$n.event[j] g$n.censor <- g$n.censor[j] g$strata <- g$strata[j] g$surv <- g$surv[j] g$cumhaz <- g$cumhaz[j] g$std.err <- g$std.err[j] g$lower <- g$lower[j] g$upper <- g$upper[j] if(FALSE) { if(length(g$time) != length(times) * num.strata) stop('summary.survfit could not compute estimates for all strata at all times requested.\nYou probably requested times where data are limited.') d <- dim(g$surv) if(length(d) == 0) d <- c(length(g$surv), 1) strata.col <- matrix(rep(sreq, d[1]), ncol=d[2], byrow=TRUE) gs <- factor(g$strata, strata.levels) strata.row <- matrix(rep(unclass(gs), d[2]), ncol=d[2]) m <- strata.col == strata.row g$surv <- matrix(g$surv[m], ncol=d[2])[,,drop=TRUE] g$lower <- matrix(g$lower[m], ncol=d[2])[,,drop=TRUE] g$upper <- matrix(g$upper[m], ncol=d[2])[,,drop=TRUE] g$std.err <- matrix(g$std.err[m],ncol=d[2])[,,drop=TRUE] } } if(length(times) > 1) for(w in c('n.risk', 'n.event', 'n.censor', if(num.strata > 1) 'strata', 'surv', 'cumhaz', 'std.err', 'lower', 'upper')) g[[w]] <- matrix(g[[w]], ncol=length(times), byrow=TRUE) } # end non-missing times tim <- g$time nr <- g$n.risk ne <- g$n.event surv <- g$surv se <- g$std.err low <- g$lower up <- g$upper tim <- unique(tim) if(FALSE && is.matrix(surv)) { surv <- t(surv) se <- t(se) low <- t(low) up <- t(up) dn <- list(row.names(newdata),format(tim)) dimnames(surv) <- dn dimnames(se) <- dn dimnames(low) <- dn dimnames(up) <- dn } surv <- fun(surv) low <- fun(low) up <- fun(up) surv[is.infinite(surv)] <- NA low[is.infinite(low)] <- NA up[is.infinite(up)] <- NA retlist <- list(time=tim, surv=naresid(naa,surv), std.err=naresid(naa,se), lower=naresid(naa,low), upper=naresid(naa,up)) if(nf > 0) retlist$strata <- naresid(naa,sreq) return(retlist) } asnum.strata <- function(str, strata.levels) { if(! length(str)) return(NULL) if(is.numeric(str) && any(str < 1 | str>length(strata.levels))) stop('illegal stratum number') if(is.factor(str) || is.numeric(str)) return(as.integer(str)) i <- match(str, strata.levels, nomatch=0) if(any(i == 0)) stop(paste('illegal strata:', paste(str[i == 0],collapse=' '))) i } ##Instead use the baseline survival computed at fit time with cph(...,surv=TRUE) nt <- if(missing(times)) 0 else length(times) if(conf.int > 0 && f > 0) warning(paste("S.E. and confidence intervals are approximate except", "at predictor means.\nUse cph(...,x=TRUE,y=TRUE) (and don't use linear.predictors=) for better estimates.")) if(missing(linear.predictors)) { if(missing(x) && missing(newdata)) { linear.predictors <- fit$linear.predictors #assume was centered rnam <- names(linear.predictors) if(! length(linear.predictors)) { if(length(fit$x) == 0) stop("newdata, x, linear.predictors not given but x nor linear.predictors stored in fit") linear.predictors <- matxv(fit$x, fit$coef) - fit$center strata <- fit$strata rnam <- dimnames(fit$x)[[1]] } else strata <- attr(linear.predictors,"strata") } else { if(missing(x)) { x <- predict(fit, newdata, type="x", expand.na=FALSE) naa <- attr(x,"na.action") } strata <- attr(x,"strata") if(f > 0) linear.predictors <- matxv(x,fit$coef) - fit$center else linear.predictors <- 0 rnam <- dimnames(x)[[1]] } } else { strata <- asnum.strata(attr(linear.predictors, "strata"), strata.levels) rnam <- names(linear.predictors) } if(length(strata) == 0 && nf > 0) stop("strata not stored in x or linear.predictors") attr(strata, "class") <- NULL if(length(fit$surv) == 0 && length(fit$x) == 0 && length(fit$y) == 0) stop("you did not specify surv=TRUE or x=TRUE, y=TRUE in cph") if(conf.int>0) zcrit <- qnorm((conf.int+1)/2) if(length(strata) == 0) { n <- length(linear.predictors) strata <- rep(1,n) ns <- 1 } else { ns <- max(strata, na.rm=TRUE) n <- length(strata) } if(what == 'parallel') { if(length(times) >1 && length(times) != n) stop('length of times must equal 1 or number of subjects being predicted') if(! length(fit$surv)) stop('must specify surv=TRUE to cph') if(diff(range(strata)) == 0) { estsurv <- approx(fit$time, fit$surv, xout=times, method="constant", f=0, ties=mean)$y return(estsurv ^ exp(linear.predictors)) } est.surv <- double(n) for(zs in unique(strata)) { this <- strata == zs estsurv <- approx(fit$time[[zs]], fit$surv[[zs]], xout=if(length(times) == 1)times else times[this], method='constant', f=0, ties=mean)$y est.surv[this] <- estsurv ^ exp(if(length(linear.predictors) == 1) linear.predictors else linear.predictors[this]) } return(est.surv) } if(n>1 && nt == 0) stop("must specify times if getting predictions for >1 obs.") if(nt == 0) { #Get est for 1 obs if(! is.list(fit$time)) { times <- fit$time surv <- fit$surv^exp(linear.predictors) std.err <- fit$std.err } else { times <- fit$time[[strata]] surv <- fit$surv[[strata]]^exp(linear.predictors) std.err <- fit$std.err[[strata]] } if(conf.int > 0) { lower <- cilower(surv, zcrit*std.err) upper <- ciupper(surv, zcrit*std.err) lower[1] <- 1 upper[1] <- 1 attr(lower, "type") <- NULL attr(upper, "type") <- NULL } surv <- fun(surv); surv[is.infinite(surv)] <- NA if(conf.int>0) { lower <- fun(lower); lower[is.infinite(lower)] <- NA upper <- fun(upper); upper[is.infinite(upper)] <- NA } if(nf == 0) strata <- NULL retlist <- list(time=times, surv=surv, linear.predictors=linear.predictors) if(conf.int>0) retlist <- c(retlist,list(lower=lower, upper=upper, std.err=std.err)) if(nf>0) { retlist$strata <- strata retlist$requested.strata <- unclass(strata) } return(retlist) } # end if(nt==0) ##Selected times for >=1 obs ##First get survival at times "times" for each stratum surv <- matrix(double(1), nrow=ns, ncol=nt) serr <- matrix(double(1), nrow=ns, ncol=nt) for(i in 1:ns) { if(! is.list(fit$time)) { tim <- fit$time se <- fit$std.err srv <- fit$surv } else { tim <- fit$time[[i]] se <- fit$std.err[[i]] srv <- fit$surv[[i]] } m <- length(tim) j <- 0 for(u in times) { j <- j + 1 tm <- max((1:length(tim))[tim<=u+1e-6]) s <- srv[tm] Se <- se[tm] if(u > tim[m] && srv[m] > 0) {s <- NA; Se <- NA} surv[i,j] <- s serr[i,j] <- Se } } srv <- surv[strata,]^exp(linear.predictors) ft <- format(times) if(is.matrix(srv)) { dn <- list(rnam, ft) dimnames(srv) <- dn } else names(srv) <- if(n == 1) ft else rnam if(conf.int > 0) { serr <- serr[strata,] lower <- cilower(srv, zcrit*serr) upper <- ciupper(srv, zcrit*serr) if(is.matrix(lower)) { dimnames(serr) <- dn dimnames(lower) <- dn dimnames(upper) <- dn } else { names(serr) <- names(lower) <- names(upper) <- if(n == 1) ft else rnam } lower <- fun(lower); lower[is.infinite(lower)] <- NA upper <- fun(upper); upper[is.infinite(upper)] <- NA } srv <- fun(srv) srv[is.infinite(srv)] <- NA nar <- if(inputData) function(naa,w) w else function(...) naresid(...) if(conf.int == 0) return(list(time=times, surv=nar(naa,srv))) retlist <- list(time=times, surv=nar(naa,srv), lower=nar(naa,lower), upper=nar(naa,upper), std.err=nar(naa,serr)) if(nf>0) retlist$requested.strata <- nar(naa, unclass(strata)) retlist } rms/R/predict.lrm.s0000644000176200001440000001050313751567312013674 0ustar liggesuserspredict.lrm <- function(object, ..., type=c("lp","fitted","fitted.ind","mean","x","data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, codes=FALSE) { type <- match.arg(type) if(type %nin% c("fitted","fitted.ind", "mean")) return(predictrms(object,...,type=type, se.fit=se.fit)) xb <- predictrms(object, ..., type="lp", se.fit=FALSE) rnam <- names(xb) ns <- object$non.slopes cnam <- names(object$coef[1:ns]) trans <- object$trans ## If orm object get cumulative probability function used cumprob <- if(length(trans)) trans$cumprob else plogis if(se.fit) warning('se.fit not supported with type="fitted" or type="mean"') if(ns == 1 & type == "mean") stop('type="mean" makes no sense with a binary response') if(ns == 1) return(cumprob(xb)) intcept <- object$coef[1:ns] interceptRef <- object$interceptRef if(!length(interceptRef)) interceptRef <- 1 xb <- xb - intcept[interceptRef] xb <- sapply(intcept, "+", xb) P <- cumprob(xb) nam <- names(object$freq) if(is.matrix(P)) dimnames(P) <- list(rnam, cnam) else names(P) <- names(object$coef[1:ns]) if(type=="fitted") return(P) ##type="mean" or "fitted.ind" vals <- names(object$freq) P <- matrix(P, ncol=ns) Peq <- cbind(1, P) - cbind(P, 0) if(type == "fitted.ind") { ynam <- as.character(attr(object$terms, "formula")[2]) ynam <- paste(ynam, "=", vals, sep="") dimnames(Peq) <- list(rnam, ynam) return(drop(Peq)) } ##type="mean" if(codes) vals <- 1:length(object$freq) else { vals <- as.numeric(vals) if(any(is.na(vals))) stop('values of response levels must be numeric for type="mean" and codes=F') } m <- drop(Peq %*% vals) names(m) <- rnam m } predict.orm <- function(object, ..., type=c("lp","fitted","fitted.ind","mean","x","data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, codes=FALSE) { type <- match.arg(type) predict.lrm(object, ..., type=type, se.fit=se.fit, codes=codes) } Mean.lrm <- function(object, codes=FALSE, ...) { ns <- object$non.slopes if(ns < 2) stop('using this function only makes sense for >2 ordered response categories') if(codes) vals <- 1:length(object$freq) else { vals <- object$yunique if(!length(vals)) vals <- names(object$freq) vals <- as.numeric(vals) if(any(is.na(vals))) stop('values of response levels must be numeric for codes=FALSE') } f <- function(lp=numeric(0), X=numeric(0), intercepts=numeric(0), slopes=numeric(0), info=numeric(0), values=numeric(0), interceptRef=integer(0), trans=trans, conf.int=0) { ns <- length(intercepts) lp <- if(length(lp)) lp - intercepts[interceptRef] else matxv(X, slopes) xb <- sapply(intercepts, '+', lp) P <- matrix(trans$cumprob(xb), ncol = ns) P <- cbind(1, P) - cbind(P, 0) m <- drop(P %*% values) names(m) <- names(lp) if(conf.int) { if(! length(X)) stop('must specify X if conf.int > 0') lb <- matrix(sapply(intercepts, '+', lp), ncol = ns) dmean.dalpha <- t(apply(trans$deriv(lb), 1, FUN=function(x) x * (values[2:length(values)] - values[1:ns]))) dmean.dbeta <- apply(dmean.dalpha, 1, sum) * X dmean.dtheta <- cbind(dmean.dalpha, dmean.dbeta) mean.var <- diag(dmean.dtheta %*% solve(info, t(dmean.dtheta))) w <- qnorm((1 + conf.int) / 2) * sqrt(mean.var) attr(m, 'limits') <- list(lower = m - w, upper = m + w) } m } ## If lrm fit, add information that orm fits have family <- object$family trans <- object$trans if(! length(family)) { family <- 'logistic' trans <- probabilityFamilies$logistic } ## Re-write first derivative so that it doesn't need the f argument if(family == "logistic") trans$deriv <- function(x) {p <- plogis(x); p * (1. - p)} ir <- object$interceptRef if(!length(ir)) ir <- 1 formals(f) <- list(lp=numeric(0), X=numeric(0), intercepts=object$coef[1 : ns], slopes=object$coef[- (1 : ns)], info=object$info.matrix, values=vals, interceptRef=ir, trans=trans, conf.int=0) f } rms/R/gendata.s0000644000176200001440000000450012325076031013041 0ustar liggesusersgendata <- function(fit, ..., nobs, viewvals=FALSE, expand=TRUE, factors) { at <- fit$Design nam <- at$name[at$assume!="interaction"] if(!missing(nobs) && !is.logical(nobs)) { df <- predictrms(fit, type="adjto.data.frame") df[1:nobs,] <- df cat("Edit the list of variables you would like to vary.\nBlank out variables to set to reference values.\n") nam.sub <- de(nam)[[1]] nam.sub <- nam.sub[!is.na(nam.sub)] if(!all(nam.sub %in% nam)) stop("misspelled a variable name") df.sub <- as.data.frame(df[,nam.sub]) cat("Edit the predictor settings to use.\n") if(viewvals && length(val <- Getlim(at, allow.null=TRUE, need.all=FALSE)$values[nam.sub])) { cat("A window is being opened to list the valid values of discrete variables.\n") sink(tf <- tempfile()) print.datadist(list(values=val)) sink() file.show(tf) } for(i in 1:length(df.sub)) if(is.factor(df.sub[[i]])) df.sub[[i]] <- as.character(df.sub[[i]]) df.sub <- as.data.frame(de(df.sub)) df[nam.sub] <- df.sub return(structure(df, names.subset=nam.sub)) } factors <- if(missing(factors)) rmsArgs(substitute(list(...))) else factors fnam <- names(factors) nf <- length(factors) if(nf==0) return(predictrms(fit, type="adjto.data.frame")) which <- charmatch(fnam, nam, 0) if(any(which==0)) stop(paste("factor(s) not in design:", paste(names(factors)[which==0],collapse=" "))) settings <- if(nf 0) for(i in 1 : nf) settings[[fnam[i]]] <- factors[[i]] attr(settings, 'row.names') <- NULL ## Starting in R 3.1.0, as.data.frame.labelled or as.data.frame.list ## quit working when lengths vary if(nf == 0 || ! expand) { len <- sapply(settings, length) n <- max(len) if(any(len < n)) for(i in which(len < max(len))) settings[[i]] <- rep(settings[[i]], length=n) attr(settings, 'row.names') <- as.character(1 : n) attr(settings, 'class') <- 'data.frame' } if(nf == 0) return(settings) if(expand) expand.grid(settings) else settings } rms/R/survreg.distributions.s0000644000176200001440000001436212604552151016045 0ustar liggesusers# SCCS @(#)survreg.distributions.s 4.3 11/19/92 # # Create the survreg.distributions object # # Infinite mean in log logistic courtesy of Victor Moreno # SERC, Institut Catala d'Oncologia (V.Moreno@ico.scs.es) 9Feb98 # survival package defines basic quantile function ignoring link # Actual quantile function called Quantile here, for SV4 or R survreg.auxinfo <- list( exponential = list( survival = function(times, lp, parms) exp(-times/exp(lp)), hazard = function(times, lp, parms) exp(-lp), quantile = function(p) log(-log(p)), Quantile = function(q=.5, lp, parms) { names(parms) <- NULL f <- function(lp, q, parms) -logb(1-q)*exp(lp) names(q) <- format(q) drop(outer(lp, q, FUN=f, parms=parms)) }, mean = function(lp, parms) exp(lp), latex = function(...) '\\exp(-t/\\exp(X\\beta))' ), extreme = list( survival = function(times, lp, parms) { exp(-exp((times-lp)/exp(parms))) }, hazard = function(times, lp, parms) { scale <- exp(parms[1]) #14Jun97 exp((times-lp)/scale)/scale }, Quantile = function(q=.5, lp, parms) { names(parms) <- NULL f <- function(lp, q, parms) lp + exp(parms)*logb(-logb(1-q)) names(q) <- format(q) drop(outer(lp, q, FUN=f, parms=parms)) }, mean = function(lp, parms) { names(parms) <- NULL lp-.57722*exp(parms) }, latex = function(scale) { yvar <- "t" z <- if(scale==1) paste(yvar,"-X\\beta") else paste( "\\frac{", yvar, "-X\\beta}{",format(scale),"}",sep="") z <- paste("\\exp[-\\exp(",z,")]") z } ), weibull = list( survival = function(times, lp, parms) { t.trans <- logb(times) names(t.trans) <- format(times) exp(-exp((t.trans-lp)/exp(parms))) }, hazard = function(times, lp, parms) { t.trans <- logb(times) t.deriv <- 1/times names(t.trans) <- format(times) scale <- exp(parms[1]) #14Jun97 ifelse(times==0,exp(-lp/scale)/scale, exp((t.trans-lp)/scale)*t.deriv/scale) }, quantile = function(p) log(-log(p)), Quantile = function(q=.5, lp, parms) { names(parms) <- NULL f <- function(lp, q, parms) lp + exp(parms)*logb(-logb(1-q)) names(q) <- format(q) drop(exp(outer(lp, q, FUN=f, parms=parms))) }, mean = function(lp, parms, transform) { names(parms) <- NULL exp(lp)*gamma(exp(parms)+1) }, latex = function(scale) { yvar <- "\\log(t)" z <- if(scale==1) paste(yvar,"-X\\beta") else paste( "\\frac{", yvar, "-X\\beta}{",format(scale),"}",sep="") z <- paste("\\exp[-\\exp(",z,")]") z } ), logistic = list( survival = function(times, lp, parms) { 1/(1+exp((times-lp)/exp(parms))) }, hazard = function(times, lp, parms) { scale <- exp(parms) 1/scale/(1+exp(-(times-lp)/scale)) }, Quantile = function(q=.5, lp, parms) { names(parms) <- NULL f <- function(lp, q, parms) lp + exp(parms)*logb(q/(1-q)) names(q) <- format(q) drop(outer(lp, q, FUN=f, parms=parms)) }, mean = function(lp, parms) lp, latex = function(scale){ yvar <- "t" z <- if(scale==1) paste(yvar,"-X\\beta") else paste( "\\frac{", yvar, "-X\\beta}{",format(scale),"}",sep="") z <- paste("[1+\\exp(",z,")]^{-1}") z } ), loglogistic = list( survival = function(times, lp, parms) { 1/(1+exp((logb(times)-lp)/exp(parms))) }, hazard = function(times, lp, parms) { t.trans <- logb(times) t.deriv <- 1/times scale <- exp(parms) names(t.trans) <- format(times) t.deriv/scale/(1+exp(-(t.trans-lp)/scale)) }, quantile = qlogis, Quantile = function(q=.5, lp, parms) { names(parms) <- NULL f <- function(lp, q, parms) lp + exp(parms)*logb(q/(1-q)) names(q) <- format(q) drop(exp(outer(lp, q, FUN=f, parms=parms))) }, mean = function(lp, parms) { names(parms) <- NULL if(exp(parms)>1) rep(Inf,length(lp)) else exp(lp)*pi*exp(parms)/sin(pi*exp(parms)) }, latex = function(scale) { yvar <- "\\log(t)" z <- if(scale==1) paste(yvar,"-X\\beta") else paste( "\\frac{", yvar, "-X\\beta}{",format(scale),"}",sep="") z <- paste("[1+\\exp(",z,")]^{-1}") z }), gaussian = list( survival = function(times, lp, parms) pnorm(- (times-lp)/exp(parms)), hazard = function(times, lp, parms) { scale <- exp(parms) z <- (times-lp)/scale dnorm(z) / scale / pnorm(- z) }, Quantile = function(q=.5, lp, parms) { names(parms) <- NULL f <- function(lp, q, parms) lp + exp(parms)*qnorm(q) names(q) <- format(q) drop(outer(lp, q, FUN=f, parms=parms)) }, mean = function(lp, parms) lp, latex = function(scale) { yvar <- "t" z <- if(scale==1) paste(yvar,"-X\\beta") else paste( "\\frac{", yvar, "-X\\beta}{",format(scale),"}",sep="") z <- paste("1-\\Phi(",z,")") z } ), lognormal = list( survival = function(times, lp, parms) { t.trans <- logb(times) names(t.trans) <- format(times) pnorm(- (t.trans-lp)/exp(parms)) }, hazard = function(times, lp, parms) { t.trans <- logb(times) t.deriv <- 1/times scale <- exp(parms) names(t.trans) <- format(times) z <- (t.trans-lp)/scale t.deriv * dnorm(z) / scale / pnorm(- z) }, quantile = qnorm, Quantile = function(q=.5, lp, parms) { names(parms) <- NULL f <- function(lp, q, parms) lp + exp(parms)*qnorm(q) names(q) <- format(q) drop(exp(outer(lp, q, FUN=f, parms=parms))) }, mean = function(lp, parms) { names(parms) <- NULL exp(lp+exp(2*parms)/2) }, latex = function(scale) { yvar <- "\\log(t)" z <- if(scale==1) paste(yvar,"-X\\beta") else paste( "\\frac{", yvar, "-X\\beta}{",format(scale),"}",sep="") z <- paste("1-\\Phi(",z,")") z } ), t = list( survival = function(times, lp, parms) { scale <- exp(parms[1]) df <- parms[2] pt(- (times-lp)/scale,df) }, hazard = function(times, lp, parms) { scale <- exp(parms[1]) df <- parms[2] z <- (times-lp)/scale dt(z,df) / scale / pt(- z,df) }, Quantile = function(q=.5, lp, parms) { names(parms) <- NULL f <- function(lp, q, parms) lp + exp(parms[1])*qt(q, parms[2]) names(q) <- format(q) drop(outer(lp, q, FUN=f, parms=parms)) }, mean = function(lp, parms) lp, latex = function(scale,df) { yvar <- "t" z <- if(scale==1) paste(yvar,"-X\\beta") else paste( "\\frac{", yvar, "-X\\beta}{",format(scale),"}",sep="") z <- paste("1-T_{",df,"}(",z,")", sep="") z } ) ) rms/R/latex.pphsm.s0000644000176200001440000000274013020562223013701 0ustar liggesuserslatex.pphsm <- function(object, title, file='', append=FALSE, which=NULL, varnames, columns=65, inline=FALSE, before=if(inline)"" else "& &", after="", pretrans=TRUE, caption=NULL, digits=.Options$digits, size='', ...) { md <- prType() %in% c('html', 'md', 'markdown') whichThere <- length(which) w <- if(length(caption)) { if(md) paste('
', caption, '
', sep='') else paste('\\begin{center} \\bf',caption,'\\end{center}') } sc <- object$scale at <- object$Design if(!whichThere & !inline) { dist <- paste("\\exp\\{-t^{",format(1/sc, digits=digits), "} \\exp(X\\hat{\\beta})\\}") w <- c(w,paste("\\[{\\rm Prob}\\{T\\geq t\\} = ",dist, "{\\rm \\ \\ where} \\\\ \\]",sep="")) } if(!whichThere) which <- 1:length(at$name) if(missing(varnames)) varnames <- at$name[at$assume.code!=9] if(! md) cat(w, file=file, sep=if(length(w))"\n" else "", append=append) z <- latexrms(object, file=file, append=TRUE, which=which, varnames=varnames, columns=columns, before=before, after=after, prefix=if(!whichThere)"X\\hat{\\beta}" else NULL, inline=inline,pretrans=pretrans, digits=digits, size=size) if(md) htmltools::HTML(c(paste0(w, '\n'), as.character(z))) else z } rms/R/print.psm.s0000644000176200001440000001266113050125331013371 0ustar liggesusersprint.psm <- function(x, correlation = FALSE, digits=4, coefs=TRUE, title, ...) { k <- 0 z <- list() dist <- x$dist name <- survreg.distributions[[dist]]$name if(missing(title)) title <- paste("Parametric Survival Model:", name, "Distribution") stats <- x$stats ci <- x$clusterInfo counts <- reListclean(Obs = stats['Obs'], Events = stats['Events'], 'Cluster on' = ci$name, Clusters = ci$n, 'Sum of Weights' = stats['Sum of Weights'], sigma = if(length(x$scale) == 1) x$scale) lr <- reListclean('LR chi2' = stats['Model L.R.'], 'd.f.' = stats['d.f.'], 'Pr(> chi2)' = stats['P']) disc <- reListclean(R2=stats['R2'], Dxy=stats['Dxy'], g=stats['g'], gr=stats['gr']) headings <- c('', 'Model Likelihood\nRatio Test', 'Discrimination\nIndexes') digcounts <- c(NA, NA, NA, if(length(ci$name)) NA, if(length(ci$n)) NA, if(length(x$scale) == 1) 4) data <- list(c(counts, digcounts), c(lr, c(2,NA,-4)), c(disc, 3)) k <- k + 1 z[[k]] <- list(type='stats', list(headings=headings, data=data)) summary.survreg <- getS3method('summary', 'survreg') if(!x$fail) x$fail <- NULL # summary.survreg uses NULL for OK s <- summary.survreg(x, correlation=correlation) k <- k + 1 z[[k]] <- list(type='coefmatrix', list(coef = s$table[,'Value'], se = s$table[,'Std. Error'])) if (correlation && length(correl <- s$correlation)) { p <- ncol(correl) if (p > 1) { cat("\nCorrelation of Coefficients:\n") ll <- lower.tri(correl) correl[ll] <- format(round(correl[ll], digits = digits)) correl[!ll] <- "" k <- k + 1 z[[k]] <- list(type='print', list(correl[-1, -p, drop = FALSE], quote = FALSE), title='Correlation of Coefficients') } } prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } # wt <- x$weights # fparms <- x$fixed # coef <- c(x$coef, x$parms[!fparms]) # resid <- x$residuals # dresid <- x$dresiduals # n <- length(resid) # p <- x$rank # if(!length(p)) p <- sum(!is.na(coef)) # if(!p) # { # warning("This model has zero rank --- no summary is provided") # return(x) # } # nsingular <- length(coef) - p # rdf <- x$df.resid # if(!length(rdf)) # rdf <- n - p # R <- x$R #check for rank deficiencies # if(p < max(dim(R))) # R <- R[1:p, #coded by pivoting # 1:p] # if(length(wt)) # { # wt <- wt^0.5 # resid <- resid * wt # excl <- wt == 0 # if(any(excl)) # { # warning(paste(sum(excl), # "rows with zero weights not counted")) # resid <- resid[!excl] # if(!length(x$df.residual)) # rdf <- rdf - sum(excl) # } # } # famname <- x$family["name"] # if(!length(famname)) famname <- "Gaussian" # scale <- x$fparms # nas <- is.na(coef) # cnames <- names(coef[!nas]) # coef <- matrix(rep(coef[!nas], 4), ncol = 4) # dimnames(coef) <- list(cnames, c("Value", "Std. Error", "z value", "p")) # stds <- sqrt(diag(x$var[!nas,!nas,drop=FALSE])) # coef[, 2] <- stds # coef[, 3] <- coef[, 1]/stds # coef[, 4] <- 2*pnorm(-abs(coef[,3])) # if(correlation) # { # if(sum(nas)==1) ss <- 1/stds else ss <- diag(1/stds) # correl <- ss %*% x$var[!nas, !nas, drop=FALSE] %*% ss # dimnames(correl) <- list(cnames, cnames) # } # else # correl <- NULL # ocall <- x$call # if(length(form <- x$formula)) # { # if(!length(ocall$formula)) # ocall <- match.call(get("survreg"), ocall) # ocall$formula <- form # } # dig <- .Options$digits # survival:::print.summary.survreg( # list(call = ocall, terms = x$terms, coefficients = coef#, # df = c(p, rdf), deviance.resid = dresid, # var=x$var, correlation = correl, deviance = devian#ce(x), # null.deviance = x$null.deviance, loglik=x$loglik, # iter = x$iter, # nas = nas)) # options(digits=dig) #recovers from bug in print.summary.survreg # invisible() #} ## Mod of print.summary.survreg from survival5 - suppresses printing a ## few things, added correlation arg print.summary.survreg2 <- function (x, digits = max(options()$digits - 4, 3), correlation=FALSE, ...) { correl <- x$correl n <- x$n if (is.null(digits)) digits <- options()$digits print(x$table, digits = digits) if (nrow(x$var) == length(x$coefficients)) cat("\nScale fixed at", format(x$scale, digits = digits), "\n") else if (length(x$scale) == 1) cat("\nScale=", format(x$scale, digits = digits), "\n") else { cat("\nScale:\n") print(x$scale, digits = digits, ...) } if (correlation && length(correl)) { p <- dim(correl)[2] if (p > 1) { cat("\nCorrelation of Coefficients:\n") ll <- lower.tri(correl) correl[ll] <- format(round(correl[ll], digits = digits)) correl[!ll] <- "" print(correl[-1, -p, drop = FALSE], quote = FALSE) } } cat("\n") invisible(NULL) } rms/R/lrm.fit.bare.r0000644000176200001440000000640613751064402013732 0ustar liggesusers##' Bare Bones Logistic Regression Fit ##' ##' This is a stripped down version of the `lrm.fit()` function that computes only the regression coefficients, variance-covariance-matrix, and log likelihood (for null and fitted model) and does not compute any model fit indexes etc. This is for speed in simulations or with bootstrapping. Missing data are not allowed. The function handles binary and ordinal logistic regression (proportional odds model). ##' @title lrm.fit.bare ##' @param x a vector of matrix of covariate values ##' @param y a numeric or factor vector representing the dependent variable ##' @param maxit maximum number of iteractions ##' @param eps stopping criterion (change in -2 log likelihood) ##' @param tol matrix inversion tolerance for singularities ##' @return a list with elements `coefficients`, `var`, `fail`, `freq`, `deviance` ##' @author Frank Harrell ##' @md lrm.fit.bare <- function(x, y, maxit=12, eps=.025, tol=1E-7) { opts <- double(12) opts[1:3] <- c(tol, eps, maxit) n <- length(y) if(n < 3) stop("must have >=3 non-missing observations") if(! is.matrix(x)) x <- as.matrix(x) dx <- dim(x) nx <- dx[2] if(nx == 0) stop('must have at least one x') if(dx[1] != n) stop("x and y must have same length") storage.mode(x) <- "double" est <- 1 : nx xname <- dimnames(x)[[2]] if(! length(xname)) xname <- paste("x[", 1 : nx, "]", sep="") if(! is.factor(y)) y <- as.factor(y) y <- unclass(y) ylevels <- levels(y) kint <- as.integer(length(ylevels) - 1) ftable <- integer(5001 * (kint + 1)) numy <- tabulate(y) names(numy) <- ylevels y <- as.integer(y - 1) nvi <- as.integer(nx + kint) weights <- rep(1., n) storage.mode(weights) <- 'double' sumwty <- tapply(weights, y, sum) sumwt <- sum(sumwty) sumw <- as.integer(round(sumwty)) ncum <- rev(cumsum(rev(sumwty)))[2 : (kint + 1)] pp <- ncum / sumwt initial <- rep(0., nvi) initial[1 : kint] <- log(pp / (1 - pp)) storage.mode(initial) <- "double" loglik <- -2 * sum(sumwty * logb(sumwty / sum(sumwty))) ## loglik <- -2 * sum(numy * logb(numy/n)) penmat <- matrix(0, ncol=nvi, nrow=nvi) storage.mode(penmat) <- 'double' z <- .Fortran(F_lrmfit, coef=initial, nx, est, x, y, offset=0., u=double(nvi), double(nvi * (nvi + 1) / 2), loglik=double(1), n, nx, sumw, nvi, v=double(nvi * nvi), double(nvi), double(2 * nvi), double(nvi), pivot=integer(nvi), opts=opts, ftable, penmat, weights) irank <- z$opts[7] if(irank < nvi) { cat("singular information matrix in lrm.fit.bare (rank=", irank, "). Offending variable(s):\n") cat(paste(xname[est[z$pivot[nvi : (irank + 1)] - kint]], collapse=" "), "\n") return(structure(list(fail=TRUE))) } loglik <- c(loglik, z$loglik) dvrg <- z$opts[6] > 0 ## Invert v with respect to fitted variables info.matrix <- matrix(z$v, nrow=nvi, ncol=nvi) v <- solvet(info.matrix, tol=tol) irank <- nvi name <- if(kint == 1) "Intercept" else paste("y>=", ylevels[2 : (kint + 1)], sep="") name <- c(name, xname) kof <- z$coef names(kof) <- name dimnames(v) <- list(name, name) list(freq=numy, fail=dvrg, coefficients=kof, var=v, deviance=loglik) } rms/R/rmsMisc.s0000644000176200001440000012455714024462760013100 0ustar liggesusers#Miscellaneous functions to retrieve characteristics of design DesignAssign <- function(atr, non.slopes, Terms) { ## Given Design attributes and number of intercepts creates R ## format assign list. ll <- if(missing(Terms)) atr$name else attr(Terms,'term.labels') if(! length(ll)) return(list()) nv <- length(ll) params <- sapply(atr$nonlinear, length) ## d.f. per predictor asc <- atr$assume.code assign <- list() j <- non.slopes + 1 if(length(params)) for(i in 1 : length(ll)) { if(asc[i] == 8) next assign[[ll[i]]] <- j : (j + params[i] - 1) j <- j + params[i] } assign } #Function to return variance-covariance matrix, optionally deleting #rows and columns corresponding to parameters such as scale parameters #in parametric survival models (if regcoef.only=TRUE) vcov.lrm <- function(object, regcoef.only=TRUE, intercepts='all', ...) { if(length(intercepts) == 1 && is.character(intercepts) && intercepts %nin% c('all', 'none')) stop('if character, intercepts must be "all" or "none"') if(!length(intercepts) || (length(intercepts) == 1) && intercepts == 'all') return(vcov.rms(object, regcoef.only=regcoef.only, ...)) ns <- num.intercepts(object) v <- object$var p <- ncol(v) nx <- p - ns if(intercepts == 'none') intercepts <- integer(0) i <- if(nx == 0) intercepts else c(intercepts, (ns+1):p) v[i, i, drop=FALSE] } vcov.ols <- function(object, regcoef.only=TRUE, ...) vcov.rms(object, regcoef.only=regcoef.only, ...) vcov.cph <- function(object, regcoef.only=TRUE, ...) vcov.rms(object, regcoef.only=regcoef.only, ...) vcov.psm <- function(object, regcoef.only=TRUE, ...) vcov.rms(object, regcoef.only=regcoef.only, ...) vcov.orm <- function(object, regcoef.only=TRUE, intercepts='mid', ...) { v <- object$var if(! length(intercepts)) return(v) li1 <- length(intercepts) == 1 iat <- attr(v, 'intercepts') # handle fit.mult.impute (?), robcov # robcov re-writes var object and uses all intercepts iref <- object$interceptRef if(is.numeric(intercepts) && li1 && intercepts == iref) intercepts <- 'mid' if(! length(iat)) { if(li1 && intercepts == 'mid') { i <- c(iref, (num.intercepts(object, 'var') + 1) : nrow(v)) return(object$var[i, i, drop=FALSE]) } return(vcov.lrm(object, regcoef.only=regcoef.only, intercepts=intercepts, ...)) } if(li1 && intercepts == 'none') return(object$var[-(1 : length(iat)), -(1 : length(iat)), drop=FALSE]) if(li1 && intercepts == 'mid' && length(iat) == 1) return(object$var) iref <- object$interceptRef info <- object$info.matrix isbootcov <- length(object$boot.coef) ns <- num.intercepts(object) p <- ncol(info) ns <- num.intercepts(object) nx <- p - ns scale <- attr(info, 'scale') name <- names(coef(object)) if(length(scale) && (! is.character(intercepts) || (li1 && intercepts == 'all'))) { xbar <- scale$mean xsd <- scale$sd trans <- rbind(cbind(diag(ns), matrix(0, nrow=ns, ncol=nx)), cbind(-matrix(rep(xbar / xsd, ns), ncol=ns), diag(1 / as.vector(xsd)))) } if(li1 && is.character(intercepts)) { if(intercepts != 'mid' && isbootcov) stop('intercepts must be "mid" if object produced by bootcov') switch(intercepts, mid = return(object$var), all = { if(! length(scale)) { v <- as.matrix(solve(info)) dimnames(v) <- list(name, name) return(v) } kint <- num.intercepts(object) v <- t(trans) %*% as.matrix(solve(info)) %*% trans dimnames(v) <- list(name, name) return(v) }, none= return(object$var[-1, -1, drop=FALSE]) ) } if(isbootcov) stop('intercepts must be "mid" if object produced by bootcov') i <- if(nx == 0) intercepts else c(intercepts, (ns+1):p) v <- if(length(scale)) (t(trans) %*% as.matrix(solve(info)) %*% trans)[i,i] else as.matrix(solve(info)[i,i]) dimnames(v) <- list(name[i], name[i]) v } vcov.rms <- function(object, regcoef.only=TRUE, intercepts='all', ...) { cov <- object$var if(!length(cov)) stop("fit does not have variance-covariance matrix") if(regcoef.only) { p <- length(object$coefficients) cov <- cov[1:p, 1:p, drop=FALSE] } if(length(intercepts) && intercepts == 'none') { ns <- num.intercepts(object) if(ns > 0) cov <- cov[-(1:ns), -(1:ns), drop=FALSE] } cov } ## Functions for Out Of Sample computation of -2 log likelihood ## evaluated at parameter estimates of a given fit oos.loglik <- function(fit, ...) UseMethod("oos.loglik") oos.loglik.ols <- function(fit, lp, y, ...) { sigma2 <- sum(fit$residuals^2)/length(fit$residuals) if(missing(lp)) { n <- length(fit$residuals) n*logb(2*pi*sigma2)+n } else { s <- !is.na(lp + y) lp <- lp[s]; y <- y[s] n <- length(lp) sse <- sum((y - lp)^2) n*logb(2*pi*sigma2) + sse/sigma2 } } oos.loglik.lrm <- function(fit, lp, y, ...) { if(missing(lp)) return(fit$deviance[length(fit$deviance)]) ns <- fit$non.slopes if(ns > 1) stop('ordinal y case not implemented') y <- as.integer(as.factor(y)) - 1 s <- !is.na(lp + y) lp <- lp[s]; y <- y[s] p <- plogis(lp) -2*sum(ifelse(y==1, logb(p), logb(1-p))) } oos.loglik.cph <- function(fit, lp, y, ...) { if(missing(lp)) return(-2*fit$loglik[2]) else stop('not implemented for cph models') } oos.loglik.psm <- function(fit, lp, y, ...) { if(missing(lp)) return(-2*fit$loglik[2]) else stop('not implemented for psm models') } oos.loglik.Glm <- function(fit, lp, y, ...) if(missing(lp)) deviance(fit) else glm.fit(x=NULL, y=as.vector(y), offset=lp, family=fit$family)$deviance #Function to retrieve limits and values, from fit (if they are there) #or from a datadist object. If need.all=F and input is coming from datadist, #insert columns with NAs for variables not defined #at is attr(fit$terms,"Design") (now fit$Design) Getlim <- function(at, allow.null=FALSE, need.all=TRUE) { nam <- at$name[at$assume!="interaction"] limits <- at$limits values <- at$values XDATADIST <- .Options$datadist X <- lims <- vals <- NULL if(! is.null(XDATADIST)) { X <- if(inherits(XDATADIST, 'datadist')) XDATADIST else if(exists(XDATADIST)) eval(as.name(XDATADIST)) if(! is.null(X)) { lims <- X$limits if(is.null(lims)) stop(paste("options(datadist=",XDATADIST, ") not created with datadist")) vals <- X$values } } if((length(X) + length(limits)) == 0) { if(allow.null) { lims <- list() for(nn in nam) lims[[nn]] <- rep(NA,7) lims <- structure(lims, class="data.frame", row.names=c("Low:effect","Adjust to", "High:effect", "Low:prediction", "High:prediction","Low","High")) return(list(limits=lims, values=values)) } stop("no datadist in effect now or during model fit") } na <- if(length(limits)) sapply(limits, function(x) all(is.na(x))) else rep(TRUE, length(nam)) if(length(lims) && any(na)) for(n in nam[na]) { #if() assumes NA stored in fit # for missing vars z <- limits[[n]] u <- if(match(n, names(lims), 0) > 0) lims[[n]] else NULL # This requires exact name match, not substring match if(is.null(u)) { if(need.all) stop(paste("variable",n, "does not have limits defined in fit or with datadist")) else limits[[n]] <- rep(NA,7) # Added 28 Jul 94 } else limits[[n]] <- u } limits <- structure(limits, class="data.frame", row.names=c("Low:effect","Adjust to", "High:effect", "Low:prediction", "High:prediction","Low","High")) if(length(vals)) values <- c(values, vals[match(names(vals),nam,0)>0 & match(names(vals),names(values),0)==0] ) # add in values from datadist corresponding to vars in model # not already defined for model list(limits=limits, values=values) } #Function to return limits for an individual variable, given an object #created by Getlim Getlimi <- function(name, Limval, need.all=TRUE) { lim <- if(match(name, names(Limval$limits), 0) > 0) Limval$limits[[name]] else NULL if(is.null(Limval) || is.null(lim) || all(is.na(lim))) { if(need.all) stop(paste("no limits defined by datadist for variable", name)) return(rep(NA,7)) } lim } #Function to return a list whose ith element contains indexes #of all predictors related, indirectly or directly, to predictor i #Predictor i and j are related indirectly if they are related to #any predictors that interact #Set type="direct" to only include factors interacting with i #This function is used by nomogram. related.predictors <- function(at, type=c("all","direct")) { type <- match.arg(type) f <- sum(at$assume.code < 9) if(any(at$assume.code == 10)) stop("does not work with matrix factors") ia <- at$interactions x <- rep(NA,f) names(x) <- at$name[at$assume.code < 9] mode(x) <- "list" if(length(ia)==0) { for(i in 1:f) x[[i]] <- integer(0) return(x) } for(i in 1:f) { r <- integer(0) for(j in 1:ncol(ia)) { w <- ia[,j] if(any(w==i)) r <- c(r, w[w>0 & w!=i]) } x[[i]] <- r } if(type=="direct") return(x) while(TRUE) { bigger <- FALSE for(j in 1:f) { xj <- x[[j]] y <- unlist(x[xj]) y <- y[y != j] new <- unique(c(y, xj)) bigger <- bigger | length(new) > length(xj) x[[j]] <- new } if(!bigger) break } x } #Function like related.predictors(..., type='all') but with new # "super" predictors created by combining all indirected related # (through interactions) predictors into a vector of predictor numbers # with a new name formed from combining all related original names combineRelatedPredictors <- function(at) { nam <- at$name r <- related.predictors(at) newnames <- newnamesia <- components <- list() pused <- rep(FALSE, length(nam)) k <- 0 for(i in (1:length(nam))[at$assume.code != 9]) { if(!pused[i]) { comp <- i nn <- nam[i] ri <- r[[i]] ianames <- character(0) ic <- interactions.containing(at, i) if(length(ic)) { comp <- c(comp, ic) ianames <- nam[ic] } if(length(ri)) { comp <- c(comp, ri) nn <- c(nn, nam[ri]) for(j in ri) { pused[j] <- TRUE ic <- interactions.containing(at, j) if(length(ic)) { comp <- c(comp, ic) ianames <- c(ianames, nam[ic]) } } } k <- k + 1 components[[k]] <- unique(comp) newnames[[k]] <- unique(nn) newnamesia[[k]] <- unique(c(nn, ianames)) } } list(names=newnames, namesia=newnamesia, components=components) } #Function to list all interaction term numbers that include predictor #pred as one of the interaction components interactions.containing <- function(at, pred) { ia <- at$interactions if(length(ia)==0) return(NULL) name <- at$name parms <- at$parms ic <- NULL for(i in (1:length(at$assume.code))[at$assume.code==9]) { terms.involved <- parms[[name[i]]][,1] if(any(terms.involved==pred)) ic <- c(ic, i) } ic } #Function to return a vector of logical values corresponding to #non-intercepts, indicating if the parameter is one of the following types: # term.order Meaning # ---------- ----------------- # 1 all parameters # 2 all nonlinear or interaction parameters # 3 all nonlinear parameters (main effects or interactions) # 4 all interaction parameters # 5 all nonlinear interaction parameters param.order <- function(at, term.order) { #at=Design attributes if(term.order==1) return(rep(TRUE,length(at$colnames))) nonlin <- unlist(at$nonlinear[at$name[at$assume!="strata"]]) # omit strat ia <- NULL for(i in (1:length(at$name))[at$assume!="strata"]) ia <- c(ia, rep(at$assume[i]=="interaction",length(at$nonlinear[[i]]))) if(term.order==5) nonlin & ia else if(term.order==4) ia else if(term.order==3) nonlin else nonlin | ia } # rms.levels # Make each variable in an input data frame that is a # factor variable in the model be a factor variable with # the levels that were used in the model. This is primarily # so that row insertion will work right with <-[.data.frame # #at=Design attributes rms.levels <- function(df, at) { ac <- at$assume.code for(nn in names(df)) { j <- match(nn, at$name, 0) if(j>0) { if((ac[j]==5 | ac[j]==8) & length(lev <- at$parms[[nn]])) df[[nn]] <- factor(df[[nn]], lev) } } df } #Function to return a default penalty matrix for penalized MLE, #according to the design attributes and a design matrix X Penalty.matrix <- function(at, X) { d1 <- dimnames(X)[[2]][1] if(d1 %in% c('Intercept', '(Intercept)')) X <- X[, -1, drop=FALSE] d <- dim(X) n <- d[1]; p <- d[2] center <- as.vector(rep(1 / n, n) %*% X) # see scale() function v <- as.vector(rep(1 / (n - 1), n) %*% (X - rep(center, rep(n, p)))^2) pen <- if(p == 1) as.matrix(v) else as.matrix(diag(v)) ## works even if X one column is <- 1 ac <- at$assume for(i in (1 : length(at$name))[ac != "strata"]) { len <- length(at$nonlinear[[i]]) ie <- is + len - 1 if(ac[i] == "category") pen[is : ie, is : ie] <- diag(len) - 1 / (len + 1) is <- ie + 1 } pen } #Function to take as input a penalty specification of the form #penalty=constant or penalty=list(simple=,nonlinear=,interaction=, #nonlinear.interaction=) where higher order terms in the latter notation #may be omitted, in which case their penalty factors are taken from lower- #ordered terms. Returns a new penalty object in full list form along #with a full vector of penalty factors corresponding to the elements #in regression coefficient vectors to be estimated Penalty.setup <- function(at, penalty) { if(!is.list(penalty)) penalty <- list(simple=penalty, nonlinear=penalty, interaction=penalty, nonlinear.interaction=penalty) tsimple <- penalty$simple if(!length(tsimple)) tsimple <- 0 tnonlinear <- penalty$nonlinear if(!length(tnonlinear)) tnonlinear <- tsimple tinteraction <- penalty$interaction if(!length(tinteraction)) tinteraction <- tnonlinear tnonlinear.interaction <- penalty$nonlinear.interaction if(!length(tnonlinear.interaction)) tnonlinear.interaction <- tinteraction nonlin <- unlist(at$nonlinear[at$name[at$assume!='strata']]) ia <- NULL for(i in (1:length(at$name))[at$assume!='strata']) ia <- c(ia, rep(at$assume[i]=='interaction',length(at$nonlinear[[i]]))) nonlin.ia <- nonlin & ia nonlin[nonlin.ia] <- FALSE ia[nonlin.ia] <- FALSE simple <- rep(TRUE, length(ia)) simple[nonlin | ia | nonlin.ia] <- FALSE penfact <- tsimple*simple + tnonlinear*nonlin + tinteraction*ia + tnonlinear.interaction*nonlin.ia list(penalty=list(simple=tsimple, nonlinear=tnonlinear, interaction=tinteraction,nonlinear.interaction=tnonlinear.interaction), multiplier=penfact) } #Function to do likelihood ratio tests from two models that are # (1) nested and (2) have 'Model L.R.' components of the stats # component of the fit objects # For models with scale parameters, it is also assumed that the # scale estimate for the sub-model was fixed at that from the larger model lrtest <- function(fit1, fit2) { if(length(fit1$fail) && fit1$fail) stop('fit1 had failed') if(length(fit2$fail) && fit2$fail) stop('fit2 had failed') s1 <- fit1$stats s2 <- fit2$stats if(!length(s1)) s1 <- c('Model L.R.'=fit1$null.deviance - fit1$deviance, 'd.f.'=fit1$rank - (any(names(coef(fit1))=='(Intercept)'))) if(!length(s2)) s2 <- c('Model L.R.'=fit2$null.deviance - fit2$deviance, 'd.f.'=fit2$rank - (any(names(coef(fit2))=='(Intercept)'))) chisq1 <- s1['Model L.R.'] chisq2 <- s2['Model L.R.'] if(length(chisq1)==0 || length(chisq2)==2) stop('fits do not have stats component with "Model L.R." or deviance component') df1 <- s1['d.f.'] df2 <- s2['d.f.'] if(df1==df2) stop('models are not nested') lp1 <- length(fit1$parms); lp2 <- length(fit2$parms) if(lp1 != lp2) warning('fits do not have same number of scale parameters') else if(lp1 == 1 && abs(fit1$parms-fit2$parms)>1e-6) warning('fits do not have same values of scale parameters.\nConsider fixing the scale parameter for the reduced model to that from the larger model.') chisq <- abs(chisq1-chisq2) dof <- abs(df1-df2) p <- 1-pchisq(chisq,dof) r <- c(chisq,dof,p) names(r) <- c('L.R. Chisq','d.f.','P') structure(list(stats=r, formula1=formula(fit1), formula2=formula(fit2)), class='lrtest') } print.lrtest <- function(x, ...) { f1 <- x$formula1 f2 <- x$formula2 attributes(f1) <- NULL attributes(f2) <- NULL cat('\nModel 1: '); print(f1) cat('Model 2: '); print(f2); cat('\n') print(x$stats) cat('\n') invisible() } Newlabels <- function(fit, ...) UseMethod('Newlabels') Newlabels.rms <- function(fit, labels, ...) { at <- fit$Design nam <- names(labels) if(length(nam)==0) { if(length(labels)!=length(at$name)) stop('labels is not a named vector and its length is not equal to the number of variables in the fit') nam <- at$name } i <- match(nam, at$name, nomatch=0) if(any(i==0)) { warning(paste('the following variables were not in the fit and are ignored:\n', paste(nam[i==0],collapse=' '))) labels <- labels[i>0] i <- i[i>0] } at$label[i] <- labels fit$Design <- at fit } Newlevels <- function(fit, ...) UseMethod('Newlevels') Newlevels.rms <- function(fit, levels, ...) { at <- fit$Design nam <- names(levels) if(length(nam)==0) stop('levels must have names') i <- match(nam, at$name, nomatch=0) if(any(i==0)) { warning(paste('the following variables were not in the fit and are ignored:\n', paste(nam[i==0],collapse=' '))) nam <- nam[i>0] } for(n in nam) { prm <- at$parms[[n]] if(length(prm)!=length(levels[[n]])) stop(paste('new levels for variable', n,'has the wrong length')) levs <- levels[[n]] if(length(at$values[[n]])) at$values[[n]] <- levs if(length(at$limits)) { m <- match(at$limits[[n]], at$parms[[n]]) if(is.factor(at$limits[[n]])) attr(at$limits[[n]],'levels') <- levs else at$limits[[n]] <- levs[m] } at$parms[[n]] <- levs } fit$Design <- at fit } univarLR <- function(fit) { ## Computes all univariable LR chi-square statistics w <- as.character(attr(fit$terms,'variables')) w <- w[-1] p <- length(w)-1 stats <- P <- double(p) dof <- nobs <- integer(p) for(i in 1:p) { stat <- update(fit, as.formula(paste(w[1],w[i+1],sep='~')))$stats stats[i] <- stat['Model L.R.'] dof[i] <- stat['d.f.'] P[i] <- stat['P'] nobs[i] <- stat['Obs'] } data.frame(LR=stats, 'd.f.'=dof, P=P, N=nobs, row.names=w[-1], check.names=FALSE) } vif <- function(fit) { v <- vcov(fit, regcoef.only=TRUE) nam <- dimnames(v)[[1]] ns <- num.intercepts(fit) if(ns>0) { v <- v[-(1:ns),-(1:ns),drop=FALSE] nam <- nam[-(1:ns)] } d <- diag(v)^.5 v <- diag(solve(v/(d %o% d))) names(v) <- nam v } ## Returns a list such that variables with no = after them get the value NA ## For handling ... arguments to Predict, summary, nomogram, gendata, ## survplot.rms, ... rmsArgs <- function(.object, envir=parent.frame(2)) { if(length(.object) < 2) return(NULL) .names <- names(.object)[-1] ## See if no variables given with = after their names if(!length(.names)) .names <- rep('', length(.object)-1) .n <- length(.names) .vars <- sapply(.object, as.character)[-1] .res <- vector('list', .n) for(.i in 1:.n) { if(.names[.i] == '') { .names[.i] <- .vars[.i] .res[[.i]] <- NA } else .res[[.i]] <- eval(.object[[.i+1]], envir=envir) } names(.res) <- .names .res } ## General function to print model fit objects using latex, html, or regular ## print (the default) prModFit <- function(x, title, w, digits=4, coefs=TRUE, footer=NULL, lines.page=40, long=TRUE, needspace, subtitle=NULL, ...) { lang <- prType() specs <- markupSpecs[[lang]] transl <- switch(lang, latex = latexTranslate, html = htmlTranslate, plain = function(x) x) # cca <- htmlSpecial('combiningcircumflexaccent') nbsp <- htmlSpecial('nbsp') gt <- transl('>') vbar <- transl('|') chi2 <- specs$chisq() beta <- htmlGreek('beta') R <- character(0) bverb <- function() { switch(lang, html = '
',
           latex = '\\begin{verbatim}',
           plain = NULL)
    }
  
  everb <- function()
    switch(lang,
           html  = '
', latex = '\\end{verbatim}', plain = NULL) skipt <- function(n=1) { if(n==0) return(character(0)) if(n == 1) return('') specs$lineskip(n) } catl <- function(x, skip=1, bold=FALSE, verb=FALSE, pre=0, center=FALSE, indent=FALSE) { if(lang == 'latex') { if(verb) c('\\begin{verbatim}', skipt(pre), x, skipt(skip), '\\end{verbatim}') else c(skipt(pre), paste0( if(center) '\\centerline{' else if(!indent) '\\noindent ', if(bold) '\\textbf{', x, if(bold) '}', if(center) '}'), skipt(skip)) } else if(lang == 'html') { if(verb) c('
', skipt(pre),
          x,
          skipt(skip),
          '
') else c(skipt(pre), paste0(if(center) '
', if(bold) '', x, if(bold) '', if(center) '
'), skipt(skip)) } else c(paste0(skipt(pre), x), skipt(skip)) } latexVector <- function(x, ...) latexTabular(t(x), helvetica=FALSE, ...) if(length(x$fail) && x$fail) { return(catl('Model Did Not Converge. No summary provided.', bold=TRUE, pre=1, verb=TRUE)) } R <- character(0) if(! missing(needspace) && lang == 'latex') R <- paste0('\\Needspace{', needspace, '}') lsub <- length(subtitle) if(title != '') R <- c(R, catl(title, pre=1, bold=TRUE, skip=1)) ## was skip=if(lsub) 0 else 1 if(lsub) for(i in lsub) R <- c(R, catl(subtitle[i], bold=FALSE)) if(long) { R <- c(R, bverb(), deparse(x$call), everb(), '') ## dput(x$call) didn't work with rmarkdown because dput has no append= } for(z in w) { type <- z$type obj <- z[[2]] titl <- z$title tex <- z$tex if(! length(tex)) tex <- FALSE if(type == 'naprint.delete') { if(lang == 'latex') { type <- 'latex.naprint.delete' tex <- TRUE } if(lang == 'html') type <- 'html.naprint.delete' } preskip <- z$preskip if(! length(preskip)) preskip <- 0 if(! tex && length(titl)) R <- c(R, '', catl(titl, pre=preskip, skip=1)) if(type == 'stats') { R <- c(R, prStats(obj[[1]], obj[[2]], lang=lang)) } else if(type == 'coefmatrix') { if(coefs) { pad <- function(x) switch(lang, latex = paste0('~', x, '~'), html = paste0(nbsp, x), plain = x) betan <- switch(lang, plain = 'Beta', html = htmlGreek('beta'), latex = '$\\hat{\\beta}$') B <- obj$bayes if(length(B)) { U <- matrix('', nrow=nrow(B), ncol=ncol(B)) for(i in 1:ncol(B)) { dig <- if(colnames(B)[i] == 'Symmetry') 2 else digits U[, i] <- pad(formatNP(B[, i], dig, lang=lang)) } pn <- switch(lang, plain='Pr(Beta>0)', html = paste0('Pr(', betan, transl('>'), '0)'), latex = 'Pr$(\\beta>0)$') coltrans <- c(Mean = paste('Mean', betan), Median = paste('Median', betan), Mode = paste('Mode', betan), SE = 'S.E.', Lower = 'Lower', Upper = 'Upper', P = pn, Symmetry = 'Symmetry') colnames(U) <- coltrans[colnames(B)] rownames(U) <- rownames(B) betanames <- rownames(B) } else { errordf <- obj$errordf beta <- obj$coef betanames <- names(beta) se <- obj$se Z <- beta / se P <- if(length(errordf)) 2 * (1 - pt(abs(Z), errordf)) else 1 - pchisq(Z ^ 2, 1) U <- cbind('Coef' = pad(formatNP(beta, digits, lang=lang)), 'S.E.' = pad(formatNP(se, digits, lang=lang)), 'Wald Z' = formatNP(Z, 2, lang=lang), 'Pr(>|Z|)' = formatNP(P, 4, lang=lang, pvalue=TRUE)) if(lang == 'latex') colnames(U) <- c('$\\hat{\\beta}$', 'S.E.', 'Wald $Z$', 'Pr$(>|Z|)$') else if(lang == 'html') colnames(U) <- c(htmlGreek('beta'), # did have cca 'S.E.', 'Wald Z', paste0('Pr(', gt, vbar, 'Z', vbar, ')')) if(length(errordf)) colnames(U)[3:4] <- switch(lang, latex = c('$t$', 'Pr$(>|t|)$'), html = c('t', paste0('Pr(', gt, vbar, 't', vbar, ')')), plain = c('t', 'Pr(>|t|)') ) rownames(U) <- betanames if(length(obj$aux)) { U <- cbind(U, formatNP(obj$aux, digits, lang=lang)) colnames(U)[ncol(U)] <- obj$auxname } } if(lang %in% c('latex', 'html')) { R <- c(R, skipt(1)) rownames(U) <- transl(betanames) if(is.numeric(coefs)) { U <- U[1:coefs,,drop=FALSE] U <- rbind(U, rep('', ncol(U))) rownames(U)[nrow(U)] <- if(lang == 'html') '…' else '\\dots' } ## Translate interaction symbol (*) to times symbol rownames(U) <- gsub('*', specs$times, rownames(U), fixed=TRUE) if(! missing(needspace) && lang == 'latex') R <- c(R, paste0('\\Needspace{', needspace, '}')) if(lang == 'latex') R <- c(R, # was capture.output(latex()) capture.output(latex(U, file='', first.hline.double=FALSE, table=FALSE, longtable=TRUE, lines.page=lines.page, col.just=rep('r',ncol(U)), rowlabel='', already.math.col.names=TRUE, append=TRUE))) else { al <- paste(rep('r', ncol(U)), collapse='') R <- c(R, as.character( htmlTable::htmlTable(U, css.cell = 'min-width: 7em;', align=al, align.header=al, rowlabel='', escape.html=FALSE))) } } else { if(is.numeric(coefs)) { U <- U[1:coefs,,drop=FALSE] U <- rbind(U, rep('', ncol(U))) rownames(U)[nrow(U)] <- '. . .' } R <- c(R, '', capture.output(print(U, quote=FALSE)), '') } } ## end if(coefs) } ## end coefmatrix else { if(tex) { ### ??? how does this apply to html? R <- c(R, '\\begin{center}', if(length(titl)) c(titl, '\n')) } else { R <- c(R, skipt(preskip)) } R <- c(R, if(type == 'html.naprint.delete') do.call(type, obj) else if(type == 'latex.naprint.delete') capture.output(do.call(type, c(obj, list(file='')))) else if(type == 'print') c(bverb(), capture.output(do.call(type, obj)), everb()) else do.call(type, obj), ## unlike do.call, eval(call(...)) dispatches on class of ... if(tex) '\\end{center}' else '' ) } } if(length(footer)) R <- c(R, paste0(specs$smallskip, transl(footer))) R <- paste0(R, '\n') switch(lang, html = htmltools::HTML(R), latex = cat(R), plain = cat(R)) } latex.naprint.delete <- function(object, file='', append=TRUE, ...) { lg <- length(g <- object$nmiss) if(file != '') sink(file, append=append) if(lg) { cat("Frequencies of Missing Values Due to Each Variable\n\n\\smallskip\n\n") if(sum(g > 0) < 4) { cat('\\begin{verbatim}\n') print(g) cat('\\end{verbatim}\n') } else { maxlen <- max(nchar(names(g))) est <- function(X, Y, x) approx(X, Y, xout=x, rule=2)$y z <- latexDotchart(g, names(g), auxdata=g, auxtitle='N', w = 1 + est(c(2, 60), c(.5, 6), maxlen), h = min(max(2.5*lg/20, 1), 8)) cat(z, sep='\n') } cat("\n") } if(length(g <- object$na.detail.response)) { cat("\nStatistics on Response by Missing/Non-Missing Status of Predictors\n\n") print(unclass(g)) cat("\n") } if(file != '') sink() invisible() } html.naprint.delete <- function(object, ...) { lg <- length(g <- object$nmiss) R <- character(0) if(lg) { R <- "Frequencies of Missing Values Due to Each Variable" if(sum(g > 0) < 4) R <- c(R, '
', capture.output(print(g)), '
') else { maxlen <- max(nchar(names(g))) print(dotchartp(g, names(g), auxdata=g, auxtitle='N', showlegend = FALSE, sort = 'descending', xlab = 'Missing', width = min(550, 300 + 20 * maxlen), height = plotlyParm$heightDotchart(lg))) } } if(length(g <- object$na.detail.response)) { R <- c(R, '', 'Statistics on Response by Missing/Non-Missing Status of Predictors
', '
', capture.output(print(unclass(g))), '
') } R } ## Function to print model fit statistics ## Example: #prStats(list('Observations', c('Log','Likelihood'), ## c('Rank','Measures'), ## c('Mean |difference|','Measures')), ## list(list(N0=52, N1=48), list('max |deriv|'=1e-9,'-2 LL'=1332.23, ## c(NA,2)), # list(tau=-.75, Dxy=-.64, C=.743, 2), # list(g=1.25, gr=11.3, 2))) ## Note that when there is an unnamed element of w, it is assumed to be ## the number of digits to the right of the decimal place (recycling of ## elements is done if fewer elements are in this vector), causing ## round(, # digits) and format(..., nsmall=# digits). Use NA to use ## format without nsmall and without rounding (useful for integers and for ## scientific notation) prStats <- function(labels, w, lang=c('plain', 'latex', 'html')) { lang <- match.arg(lang) lorh <- lang != 'plain' specs <- markupSpecs[[lang]] partial <- htmlSpecial('part') vbar <- htmlTranslate('|') cca <- htmlSpecial('combiningcircumflexaccent') beta <- htmlGreek('beta') geq <- htmlTranslate('>=') spaces <- function(n) if(n <= 0.5) '' else substring(' ', 1, floor(n)) ## strsplit returns character(0) for "" ssplit <- function(x) { x <- strsplit(x, split='\n') for(i in 1 : length(x)) if(! length(x[[i]])) x[[i]] <- '' x } trans <- switch(lang, latex = latexTranslate, html = htmlTranslate, plain = function(x) x ) ## Find maximum width used for each column p <- length(labels) width <- numeric(p) for(i in 1:p) { labs <- ssplit(labels[i])[[1]] width[i] <- max(nchar(labs)) u <- w[[i]] dig <- NA if(any(names(u)=='')) { dig <- unlist(u[names(u) == '']) u <- u[names(u) != ''] } lu <- length(u) dig <- rep(dig, length=lu) fu <- character(lu) for(j in 1 : length(u)) { uj <- u[[j]] nuj <- names(u)[j] dg <- dig[j] fu[j] <- if(nuj == 'Cluster on') specs$code(trans(uj)) else if(nuj == 'max |deriv|') formatNP(signif(uj, 1), lang=lang) else if(is.na(dg)) format(uj) else if(dg < 0) formatNP(uj, -dg, pvalue=TRUE, lang=lang) else formatNP(uj, dg, lang=lang) } names(fu) <- names(u) w[[i]] <- fu for(j in 1 : length(u)) width[i] <- max(width[i], 1 + nchar(nuj) + nchar(fu[j])) } if(lorh) { maxl <- max(sapply(w, length)) z <- matrix('', nrow=maxl, ncol=p) fil <- if(lang == 'latex') '~\\hfill ' else htmlSpecial('emsp') chisq <- specs$chisq() trans <- rbind( 'Dxy' = c(latex = '$D_{xy}$', html = 'Dxy'), 'LR chi2' = c(latex = paste0('LR ', chisq), html = paste0('LR ', chisq)), 'Score chi2' = c(latex = paste0('Score ', chisq), html = paste0('Score ', chisq)), 'Pr(> chi2)' = c(latex = 'Pr$(>\\chi^{2})$', html = paste0('Pr(', htmlTranslate('>'), chisq, ')')), 'tau-a' = c(latex = '$\\tau_{a}$', html = paste0(htmlGreek('tau'), 'a')), 'sigma gamma'= c(latex = '$\\sigma_{\\gamma}$', html = 'σγ'), 'sigma w' = c(latex = '$\\sigma_{w}$', html = 'σw'), 'gamma' = c(latex = '$\\gamma$', html = htmlGreek('gamma')), 'R2' = c(latex = '$R^{2}$', html = 'R2'), 'R2 adj' = c(latex = '$R^{2}_{\\textrm{adj}}$', html = paste0('R', specs$subsup('adj', '2'))), 'C' = c(latex = '$C$', html = 'C'), 'g' = c(latex = '$g$', html = 'g'), 'gp' = c(latex = '$g_{p}$', html = 'gp'), 'gr' = c(latex = '$g_{r}$', html = 'gr'), 'max |deriv|' = c(latex = '$\\max|\\frac{\\partial\\log L}{\\partial \\beta}|$', html = paste0('max ', vbar, partial, 'log L/', partial, beta, vbar)), 'mean |Y-Yhat|' = c(latex = 'mean $|Y-\\hat{Y}|$', html = paste0('mean ', vbar, 'Y - Y', cca, vbar)), 'Distinct Y' = c(latex = 'Distinct $Y$', html = 'Distinct Y'), 'Median Y' = c(latex = '$Y_{0.5}$', html = 'Y0.5'), '|Pr(Y>=median)-0.5|' = c(latex = '$|\\overline{\\mathrm{Pr}(Y\\geq Y_{0.5})-\\frac{1}{2}}|$', html = paste0('', vbar, 'Pr(Y ', geq, ' median)-', htmlSpecial('half'), vbar, '')) ) for(i in 1 : p) { k <- names(w[[i]]) j <- k %in% rownames(trans) if(any(j)) k[j] <- trans[k[j], lang] j <- ! j if(any(j)) k[j] <- switch(lang, plain = k[j], latex = latexTranslate(k[j], greek=TRUE), html = htmlTranslate (k[j], greek=TRUE) ) z[1 : length(k), i] <- paste0(k, fil, w[[i]]) } al <- paste0('|', paste(rep('c|', p), collapse='')) if(lang == 'latex') w <- latexTabular(z, headings=labels, align=al, halign=al, translate=FALSE, hline=2, center=TRUE) else { labels <- gsub('\n', '
', labels) w <- htmlTable::htmlTable(z, header=labels, css.cell = 'min-width: 9em;', align=al, align.header=al, escape.html=FALSE) w <- htmltools::HTML(paste0(w, '\n')) } return(w) } z <- labs <- character(0) for(i in 1:p) { wid <- width[i] lab <- ssplit(labels[i])[[1]] for(j in 1:length(lab)) lab[j] <- paste0(spaces((wid - nchar(lab[j])) / 2), lab[j]) labs <- c(labs, paste(lab, collapse='\n')) u <- w[[i]] a <- '' for(i in 1:length(u)) a <- paste0(a, names(u)[i], spaces(wid - nchar(u[i]) - nchar(names(u[i]))), u[i], if(i < length(u)) '\n') z <- c(z, a) } res <- rbind(labs, z) rownames(res) <- NULL capture.output(print.char.matrix(res, vsep='', hsep=' ', csep='', top.border=FALSE, left.border=FALSE)) } ## reListclean is used in conjunction with pstats ## Example: # x <- c(a=1, b=2) # c(A=x[1], B=x[2]) # reListclean(A=x[1], B=x[2]) # reListclean(A=x['a'], B=x['b'], C=x['c']) #reListclean <- function(..., na.rm=TRUE) { # d <- list(...) # d <- d[sapply(d, function(x) ! is.null(x))] # x <- unlist(d) # names(x) <- names(d) # if(na.rm) x[! is.na(x)] else x #} reListclean <- function(..., na.rm=TRUE) { d <- list(...) g <- if(na.rm) function(x) length(x) > 0 && ! is.na(x) else function(x) length(x) > 0 d[sapply(d, g)] } formatNP <- function(x, digits=NULL, pvalue=FALSE, lang=c('plain', 'latex', 'html')) { lang <- match.arg(lang) if(! is.numeric(x)) return(x) digits <- as.numeric(digits) # Needed but can't figure out why x <- as.numeric(x) f <- if(length(digits) && ! is.na(digits)) format(round(x, digits), nsmall=digits, scientific=1) else format(x, scientific=1) sci <- grep('e', f) if(length(sci)) { if(lang == 'latex') f[sci] <- paste0('$', latexSN(f[sci]), '$') else if(lang == 'html') f[sci] <- htmlSN(f[sci]) } f <- ifelse(is.na(x), '', f) if(! pvalue) return(f) if(! length(digits)) stop('must specify digits if pvalue=TRUE') s <- ! is.na(x) & x < 10 ^ (-digits) if(any(s)) { w <- paste0('0.', paste0(rep('0', digits - 1), collapse=''), '1') f[s] <- switch(lang, latex = paste0('\\textless ', w), html = paste0(htmlTranslate('<'), w), plain = paste0('<', w)) } f } logLik.ols <- function(object, ...) { ll <- getS3method('logLik', 'lm')(object) attr(ll, 'df') <- object$stats['d.f.'] + 2 ll } logLik.rms <- function(object, ...) { dof <- unname(object$stats['d.f.'] + num.intercepts(object)) if(inherits(object, 'psm')) dof <- dof + 1 # for sigma nobs <- nobs(object) w <- object$loglik if(length(w)) return(structure(w[length(w)], nobs=nobs, df=dof, class='logLik')) w <- object$deviance structure(-0.5*w[length(w)], nobs=nobs, df=dof, class='logLik') } logLik.Gls <- function(object, ...) getS3method('logLik', 'gls')(object, ...) AIC.rms <- function(object, ..., k=2, type=c('loglik','chisq')) { type <- match.arg(type) if(type == 'loglik') return(AIC(logLik(object), k=k)) stats <- object$stats dof <- stats['d.f.'] unname(stats['Model L.R.'] - k * dof) } nobs.rms <- function(object, ...) { st <- object$stats if(inherits(object,'Gls')) length(object$residuals) else if(any(names(st) == 'Obs')) unname(st['Obs']) else unname(st['n']) } setPb <- function(n, type=c('Monte Carlo Simulation','Bootstrap', 'Cross-Validation'), label, usetk=TRUE, onlytk=FALSE, every=1) { type <- match.arg(type) if(!missing(label)) type <- label pbo <- .Options$showprogress if(!length(pbo)) pbo <- 'console' else if(is.logical(pbo)) { pbo <- if(pbo) 'tk' else 'none' } if(missing(every)) { evo <- .Options$showevery if(length(evo)) every <- evo } if(pbo == 'none') return(function(i, ...){invisible()}) if(pbo == 'tk' && usetk && requireNamespace('tcltk', quietly=TRUE)) { pb <- tcltk::tkProgressBar(type, 'Iteration: ', 0, n) upb1 <- function(i, n, every, pb) { if(i %% every == 0) tcltk::setTkProgressBar(pb, i, label=sprintf('Iteration: %d', i)) if(i == n) close(pb) } formals(upb1) <- list(i=0, n=n, every=every, pb=pb) return(upb1) } if(onlytk) return(function(...) {invisible()}) upb2 <- function(i, n, every) { if(i %% every == 0) cat('Iteration: ', i, ' of ', n, '\r', sep='') if(i == n) cat('\n') } formals(upb2) <- list(i=0, n=n, every=every) upb2 } ## Function to remove one or more terms from a model formula, using ## strictly character manipulation. This handles problems such as ## [.terms removing offset() if you subset on anything ## For each character string in which, terms like string(...) are removed. removeFormulaTerms <- function(form, which=NULL, delete.response=FALSE) { if('offset' %in% which) { form <- formula(terms(form)[TRUE]) which <- setdiff(which, 'offset') } ## [.terms ignores offset variables. Above logic handles nested () unlike ## what is below form <- paste(deparse(form), collapse='') # no string splitting if(delete.response) form <- gsub('.*~', '~', form) for(w in which) { pattern <- sprintf('\\+?[ ]*?%s\\(.*?\\)[ ]*?\\+{0,1}', w) ## assume additive form form <- gsub(pattern, '', form) } as.formula(form) } rms/R/survplotp.npsurv.s0000644000176200001440000003336513671511133015065 0ustar liggesuserssurvplotp <- function(fit, ...) UseMethod("survplotp") survplotp.npsurv <- function(fit, xlim, ylim, xlab, ylab, time.inc, state=NULL, conf=c("bands", "none"), mylim=NULL, abbrev.label=FALSE, col=colorspace::rainbow_hcl, levels.only=TRUE, loglog=FALSE, fun=function(y) y, aehaz=FALSE, times=NULL, logt=FALSE, pr=FALSE, ...) { conf <- match.arg(conf) conf.int <- fit$conf.int if(!length(conf.int) | conf == "none") conf.int <- 0 if(loglog) fun <- function(y) logb(-logb(ifelse(y == 0 | y == 1, NA, y))) mstate <- inherits(fit, 'survfitms') if(mstate) fun <- function(y) 1 - y ## Multi-state model for competing risks z <- seq(.1, .9, by = .1) funtype <- if(loglog) 'loglog' else if(all(fun(z) == z)) 'identity' else if(all(abs(fun(z) - (1 - z)) < 1e-6)) 'inverse' else if(loglog) 'loglog' else 'other' cylim <- function(ylim) if(length(mylim)) c(min(ylim[1], mylim[1]), max(ylim[2], mylim[2])) else ylim mu <- markupSpecs$html nbsp <- htmlSpecial('nbsp') if(funtype %in% c('identity', 'inverse')) survdiffplotp <- function(fit, fun, xlim, conf.int, convert=function(f) f, pobj) { if(length(fit$strata) != 2) stop('must have exactly two strata') h <- function(level, f) { i <- f$strata == levels(f$strata)[level] tim <- f$time[i] surv <- fun(f$surv[i]) se <- f$std.err[i] list(time=tim, surv=surv, se=se) } times <- sort(c(0, unique(fit$time))) times <- times[times >= xlim[1] & times <= xlim[2]] f <- convert(summary(fit, times=times, print.it=FALSE, extend=TRUE)) a <- h(1, f) b <- h(2, f) if(! identical(a$time, b$time)) stop('program logic error') time <- a$time surv <- (a$surv + b$surv) / 2 se <- sqrt(a$se^2 + b$se^2) z <- qnorm((1 + conf.int) / 2) lo <- pmax(0, surv - 0.5 * z * se) hi <- pmin(1, surv + 0.5 * z * se) k <- ! is.na(time + lo + hi) list(times=time[k], lower=lo[k], upper=hi[k]) } fit.orig <- fit units <- fit$units if(!length(units)) units <- "Day" maxtime <- fit$maxtime if(! length(maxtime)) maxtime <- max(fit$time) mintime <- min(fit$time, 0) pret <- pretty(c(mintime, maxtime)) maxtime <- max(pret) mintime <- min(pret) if(missing(time.inc)) { time.inc <- switch(units, Day=30, Month=1, Year=1, (maxtime - mintime) / 10) if(time.inc > maxtime) time.inc <- (maxtime - mintime) / 10 } if(mstate) { ## Multi-state model for competing risks if(missing(state)) stop('state must be given when response is a multi-state/competing risk object from Surv()') if(length(state) != 1) stop('at present state can only be a single state') states <- fit$states if(state %nin% states) stop(paste('state is not in', paste(states, collapse=', '))) } if(missing(ylab)) ylab <- switch(funtype, identity = 'Survival Probability', inverse = if(mstate) paste('Cumulative Incidence of', upFirst(state)) else 'Cumulative Incidence', loglog = 'log(-log Survival Probability)', other = '') un <- fit$units if(un != '') un <- paste0(un, 's') if(missing(xlab)) xlab <- if(logt) paste0("log Follow-up Time in ", un) else mu$varlabel('Follow-up Time', un) if(missing(xlim)) xlim <- if(logt) logb(c(maxtime / 100, maxtime)) else c(mintime, maxtime) convert <- if(mstate) { istate <- match(state, states) conv <- function(f, istate) { f$surv <- 1 - f$pstate [, istate] f$lower <- 1 - f$lower [, istate] f$upper <- 1 - f$upper [, istate] f$std.err <- f$std.err[, istate] icens <- which(states == '(s0)') if(! length(icens)) stop('Program logic error: did not find (s0) column with competing risks') f$n.risk <- f$n.risk[, icens] if(all(f$n.risk == 0)) stop('program logic error: all n.risk are zero') f } formals(conv) <- list(f=NULL, istate=istate) conv } else function(f) f fit <- convert(fit) origsurv <- fit$surv if(funtype != 'identity') { fit$surv <- fun(fit$surv) fit$surv[is.infinite(fit$surv)] <- NA ## handle e.g. logit(1) - Inf would mess up ylim in plot() if(conf.int > 0) { fit$lower <- fun(fit$lower) fit$upper <- fun(fit$upper) fit$lower[is.infinite(fit$lower)] <- NA fit$upper[is.infinite(fit$upper)] <- NA if(missing(ylim)) ylim <- cylim(range(c(fit$lower, fit$upper), na.rm=TRUE)) } else if(missing(ylim)) ylim <- cylim(range(fit$surv, na.rm=TRUE)) } else if(missing(ylim)) ylim <- c(0, 1) olev <- slev <- names(fit$strata) if(levels.only) slev <- gsub('.*=', '', slev) sleva <- if(abbrev.label) abbreviate(slev) else slev ns <- length(slev) slevp <- ns > 0 ns <- max(ns, 1) if(is.function(col)) col <- col(ns) y <- 1 : ns strat <- if(ns == 1) rep(1, length(fit$time)) else rep(1 : ns, fit$strata) stime <- sort(unique(c(0, fit.orig$time))) stime <- stime[stime >= mintime & stime <= maxtime] # v <- convert(summary(fit.orig, times=stime, print.it=FALSE)) # vs <- if(ns > 1) as.character(v$strata) ## survival:::summary.survfit was not preserving order of strata levels nevents <- totaltime <- numeric(ns) cuminc <- character(ns) p <- plotly::plot_ly(...) pl <- function(p, x, y, n.risk=NULL, col, slev, type='est') { sname <- if(ns == 1) '' else slev snames <- if(sname == '') '' else paste0(sname, ' ') d <- paste0('Difference
', mu$half(), ' ', conf.int, ' CL') nam <- switch(type, est = sname, lower = paste0(snames, conf.int, ' CL'), upper = paste0(snames, conf.int, ' CL', type), 'diff lower' = d, 'diff upper' = paste0(d, type)) lg <- switch(type, est = 'Estimates', lower = paste0(snames, 'CL'), upper = paste0(snames, 'CL'), 'diff lower' = 'Difference', 'diff upper' = 'Difference') rx <- format(round(x, 3)) ry <- format(round(y, 3)) txt <- switch(type, est = paste0('t=', rx, '
Probability=', ry, if(length(n.risk)) '
At risk:', n.risk), lower = paste0('t=', rx, '
Lower:', ry), upper = paste0('t=', rx, '
Upper:', ry), 'diff lower' = NULL, 'diff upper' = NULL) ## Mark in text the point estimates that correspond to every time.inc if(type == 'est' && ! logt) { nicet <- seq(mintime, maxtime, by=time.inc) nicet <- nicet[nicet > 0] for(ti in nicet) { if(any(abs(ti - x) < 1e-6)) next # nice time already covered k <- which(x < ti); k <- max(k) txt[k] <- paste0(txt[k], '
(Also for t=', ti, ')') } } fcol <- plotly::toRGB(col, 0.2) vis <- if(ns == 2 && type %in% c('lower', 'upper')) 'legendonly' else TRUE ln <- if(type == 'est') list(shape='hv', color=col) else list(shape='hv', color=col, width=0) dat <- if(length(txt)) data.frame(x, y, txt) else data.frame(x, y) up <- type %in% c('upper', 'diff upper') p <- if(length(txt)) plotly::add_lines(p, x=~ x, y=~ y, text=~ txt, data=dat, hoverinfo='text', line=ln, fillcolor=fcol, fill=if(type %in% c('upper', 'diff upper')) 'tonexty' else 'none', visible=vis, legendgroup=lg, name=nam, showlegend=! up) else plotly::add_lines(p, x=~ x, y=~ y, data=dat, hoverinfo='none', line=ln, fillcolor=fcol, fill=if(type %in% c('upper', 'diff upper')) 'tonexty' else 'none', visible=vis, legendgroup=lg, name=nam, showlegend=! up) } for(i in 1 : ns) { st <- strat == i time <- fit$time[st] surv <- fit$surv[st] lower <- fit$lower[st] upper <- fit$upper[st] osurv <- origsurv[st] n.risk <- fit$n.risk[st] if(! logt && xlim[1] ==0 && all(time > xlim[1])) { time <- c(xlim[1], time) surv <- c(fun(1), surv) lower <- c(fun(1), lower) upper <- c(fun(1), upper) osurv <- c(1, osurv) n.risk <- c(fit$n[i], n.risk) } ## nevents[i] <- sum(fit$n.event[st]) ## nrsk <- fit$n.risk[st] ## neachtime <- c(- diff(nrsk), min(nrsk)) ## totaltime[i] <- sum(neachtime * time) nevents[i] <- if(mstate) { if(ns == 1) fit$numevents[, state] else fit$numevents[olev[i], state] } else { if(ns == 1) fit$numevents else fit$numevents[olev[i]] } totaltime[i] <- if(ns == 1) fit$exposure else fit$exposure[olev[i]] if(length(times)) { cumi <- 1. - approx(time, osurv, xout=times, method='constant')$y noun <- units %in% c('', ' ') cuminc[i] <- paste(round(cumi, 3), collapse=', ') } if(logt) time <- logb(time) ##don't let step function go beyond x-axis - ##this cuts it off but allows step to proceed axis end if(max(time) > xlim[2]) { srvl <- surv[time <= xlim[2] + 1e-6] s.last <- srvl[length(srvl)] k <- time < xlim[2] time <- c(time[k], xlim[2]) surv <- c(surv[k], s.last) n.risk <- c(n.risk[k], n.risk[length(srvl)]) if(conf.int > 0) { low.last <- lower[time <= xlim[2] + 1e-6] low.last <- low.last[length(low.last)] up.last <- upper[time <= xlim[2] + 1e-6] up.last <- up.last[length(up.last)] lower <- c(lower[k], low.last) upper <- c(upper[k], up.last) } } if(logt) p <- pl(p, time, surv, n.risk, col=col[i], slev=sleva[i]) else { xxx <- time yyy <- surv nr <- n.risk if(mintime < min(time)) { xxx <- c(mintime, time) yyy <- c(fun(1), surv) nr <- c(fit$n[i], n.risk) } p <- pl(p, xxx, yyy, nr, col=col[i], slev=sleva[i]) } if(pr) { zest <- rbind(time, surv) dimnames(zest) <- list(c("Time", "Survival"), rep("", length(time))) if(slevp)cat("\nEstimates for ", slev[i], "\n\n") print(zest, digits=3) } if(conf.int > 0) { if(logt) { p <- pl(p, time, lower, type='lower', col=col[i], slev=sleva[i]) p <- pl(p, time, upper, type='upper', col=col[i], slev=sleva[i]) } else { p <- pl(p, c(min(time), time), c(fun(1), lower), col=col[i], slev=slev[i], type='lower') # see survplot ?max(tim)? p <- pl(p, c(min(time), time), c(fun(1), upper), col=col[i], slev=slev[i], type='upper') } } } if(funtype %in% c('identity', 'inverse') && ns == 2 && conf.int > 0) { z <- survdiffplotp(fit.orig, fun=fun, conf.int=conf.int, convert=convert, xlim=xlim, pobj=p) g <- plotly::toRGB('gray') p <- pl(p, z$time, z$lower, type='diff lower', col=g, slev='') p <- pl(p, z$time, z$upper, type='diff upper', col=g, slev='') } slevat <- ifelse(sleva == '', '', paste0(sleva, ': ')) if(aehaz || length(times)) { un <- if(units == ' ' | units == '') '' else paste('/', tolower(units), sep='') haz <- round(nevents / totaltime, 4) txt <- paste(nevents, 'events') if(aehaz) txt <- paste0(txt, '
', htmlGreek('lambda'), '=', haz, un, sep='') z <- paste(paste0(slevat, txt), collapse='
') if(length(times)) { z2 <- paste0('
Cumulative
Incidence at
', 't=', paste(times, collapse=', '), ' ', units, if(un !='') 's', '
', paste0(slevat, cuminc, collapse='
')) z <- paste0(z, z2) } } else z <- paste(paste0(slevat, nevents, ' events'), collapse='
') ## Add empty trace just to add to bottom of legend. Used to have x=~NA y=~NA ## but plotly update made that point ignored in every way dam <- data.frame(x=xlim[1], y=ylim[1]) p <- plotly::add_markers(p, x=~ x, y=~ y, # mode='markers', marker=list(symbol='asterisk'), # suppresses pt name=z, data=dam) xaxis <- list(range=xlim, title=xlab) if(! logt) xaxis <- c(xaxis, list(tickvals = seq(xlim[1], max(pretty(xlim)), time.inc))) plotly::layout(p, xaxis=xaxis, yaxis=list(range=ylim, title=ylab)) } rms/R/calibrate.s0000644000176200001440000001112212424776435013401 0ustar liggesuserscalibrate <- function(fit, ...) UseMethod("calibrate") print.calibrate <- function(x, B=Inf, ...) { at <- attributes(x) predicted <- at$predicted dput(at$call) cat('\n\nn=', length(predicted), ' B=', at$B, ' u=', at$u, ' ', at$units, '\n\n', sep='') stratified <- 'KM' %in% colnames(x) if(stratified){ attributes(x) <- at[c('dim','dimnames')] print.default(x) } else if(length(predicted)) { s <- !is.na(x[,'pred'] + x[,'calibrated.corrected']) err <- predicted - approxExtrap(x[s,'pred'],x[s,'calibrated.corrected'], xout=predicted, ties=mean)$y cat('\nMean |error|:', format(mean(abs(err))), ' 0.9 Quantile of |error|:', format(quantile(err, 0.9, na.rm=TRUE)), '\n', sep='') } kept <- at$kept if(length(kept)) { cat("\nFactors Retained in Backwards Elimination\n\n") varin <- ifelse(kept, '*', ' ') print(varin[1:min(nrow(varin), B),], quote=FALSE) cat("\nFrequencies of Numbers of Factors Retained\n\n") nkept <- apply(kept, 1, sum) tkept <- table(nkept) names(dimnames(tkept)) <- NULL print(tkept) } invisible() } plot.calibrate <- function(x, xlab, ylab, subtitles=TRUE, conf.int=TRUE, cex.subtitles=.75, riskdist=TRUE, add=FALSE, scat1d.opts=list(nhistSpike=200), par.corrected=NULL, ...) { at <- attributes(x) u <- at$u units <- at$units if(length(par.corrected) && ! is.list(par.corrected)) stop('par.corrected must be a list') z <- list(col='blue', lty=1, lwd=1, pch=4) if(! length(par.corrected)) par.corrected <- z else for(n in setdiff(names(z), names(par.corrected))) par.corrected[[n]] <- z[[n]] predicted <- at$predicted if('KM' %in% colnames(x)) { type <- 'stratified' pred <- x[,"mean.predicted"] cal <- x[,"KM"] cal.corrected <- x[,"KM.corrected"] se <- x[,"std.err"] } else { type <- 'smooth' pred <- x[,'pred'] cal <- x[,'calibrated'] cal.corrected <- x[,'calibrated.corrected'] se <- NULL } un <- if(u==1) paste(units, 's', sep='') else units if(missing(xlab)) xlab <- paste("Predicted ",format(u),units,"Survival") if(missing(ylab)) ylab <- paste("Fraction Surviving ",format(u)," ",un, sep="") ##Remember that groupkm stored the se of the log survival if(length(se) && conf.int) { ## Compute confidence limits for survival based on -log survival, ## constraining to be in [0,1]; d = std.error of cum hazard * z value ciupper <- function(surv, d) ifelse(surv==0, 0, pmin(1, surv*exp(d))) cilower <- function(surv, d) ifelse(surv==0, 0, surv*exp(-d)) errbar(pred, cal, cilower(cal, 1.959964*se), ciupper(cal, 1.959964*se), xlab=xlab, ylab=ylab, type="b", add=add, ...) } else if(add) lines(pred, cal, type=if(type=='smooth') 'l' else 'b') else plot(pred, cal, xlab=xlab, ylab=ylab, type=if(type=='smooth')'l' else "b", ...) err <- NULL if(riskdist && length(predicted)) { do.call('scat1d', c(list(x=predicted), scat1d.opts)) if(type=='smooth') { s <- !is.na(pred + cal.corrected) err <- predicted - approxExtrap(pred[s], cal.corrected[s], xout=predicted, ties=mean)$y } } if(subtitles && !add) { if(type=='smooth') { Col <- par.corrected$col substring(Col, 1, 1) <- toupper(substring(Col, 1, 1)) title(sub=sprintf('Black: observed Gray: ideal\n%s : optimism corrected', Col), adj=0, cex.sub=cex.subtitles) w <- if(length(err)) paste('B=', at$B, ' based on ', at$what, '\nMean |error|=', round(mean(abs(err)), 3), ' 0.9 Quantile=', round(quantile(abs(err), .9, na.rm=TRUE), 3), sep='') else paste('B=', at$B, '\nBased on ', at$what, sep='') title(sub=w, adj=1, cex.sub=cex.subtitles) } else { title(sub=paste("n=", at$n, " d=", at$d, " p=", at$p, ", ", at$m, " subjects per group\nGray: ideal", sep=""), adj=0, cex.sub=cex.subtitles) title(sub=paste("X - resampling optimism added, B=", at$B, "\nBased on ", at$what, sep=""), adj=1, cex.sub=cex.subtitles) } } abline(0, 1, col=gray(.9)) #ideal line if(type=='stratified') points(pred, cal.corrected, pch=par.corrected$pch, col=par.corrected$col) else lines (pred, cal.corrected, col=par.corrected$col, lty=par.corrected$lty, lwd=par.corrected$lwd) invisible() } rms/R/Rq.s0000644000176200001440000001577213704356665012055 0ustar liggesusers## Modification of the rq function in the quantreg package written by ## Roger Koenker, Stephen Portnoy, Pin Tian Ng, Achim Zeileis, ## Philip Grosjean, Brian Ripley Rq <- function (formula, tau = 0.5, data=environment(formula), subset, weights, na.action=na.delete, method = "br", model = FALSE, contrasts = NULL, se='nid', hs=TRUE, x=FALSE, y=FALSE, ...) { call <- match.call() callenv <- parent.frame() # don't delay these evaluations weights <- if(! missing(weights)) eval(substitute(weights), data, callenv) subset <- if(! missing(subset )) eval(substitute(subset), data, callenv) mf <- modelData(data, formula, subset = subset, weights = weights, na.action=na.action, callenv=callenv) mf <- Design(mf, formula=formula) at <- attributes(mf) sformula <- at$sformula desatr <- at$Design attr(mf,'Design') <- NULL if (method == "model.frame") return(mf) mt <- at$terms weights <- model.weights(mf) Y <- model.response(mf) X <- model.matrix(mt, mf, contrasts) if (length(desatr$colnames)) colnames(X) <- c("Intercept", desatr$colnames) eps <- .Machine$double.eps^(2/3) Rho <- function(u, tau) u * (tau - (u < 0)) if (length(tau) > 1) stop('does not allow more than one quantile to be estimated simultaneously') ## The following keeps quantreg from overriding latex generic in Hmisc ## library(quantreg, pos=length(search()) + 1) fit <- if (length(weights)) quantreg::rq.wfit(X, Y, tau = tau, weights, method, ...) else quantreg::rq.fit(X, Y, tau = tau, method, ...) rownames(fit$residuals) <- rownames(dimnames(X)[[1]]) rho <- sum(Rho(fit$residuals, tau)) stats <- c(n=length(fit$residuals), p=length(fit$coefficients), g=GiniMd(fit$fitted.values), mad=mean(abs(fit$residuals), na.rm=TRUE)) fit <- c(fit, list( na.action = at$na.action, formula = formula, sformula = sformula, terms = mt, xlevels = .getXlevels(mt, mf), call = call, tau = tau, method = method, weights = weights, residuals = drop(fit$residuals), rho = rho, # fitted.values = drop(fit$fitted.values), model = mf, Design = desatr, assign = DesignAssign(desatr, 1, mt), stats = stats)) # attr(fit, "na.message") <- attr(m, "na.message") s <- quantreg::summary.rq(fit, covariance=TRUE, se=se, hs=hs) k <- s$coefficients nam <- names(fit$coefficients) rownames(k) <- nam fit$summary <- k cov <- s$cov dimnames(cov) <- list(nam, nam) fit$var <- cov fit$method <- method fit$se <- se fit$hs <- hs ## Remove the following since summary.rq has done its job if(!model) fit$model <- NULL if(!x) fit$x <- NULL else fit$x <- X[, -1, drop=FALSE] if(!y) fit$y <- NULL class(fit) <- c('Rq', 'rms', if (method == "lasso") "lassorq" else if (method == "scad") "scadrq", "rq") fit } ## Thanks to Duncan Murdoch for the formals alist substitute technique RqFit <- function(fit, wallow=TRUE, passdots=FALSE) { w <- fit$weights if(length(w)) { if(!wallow) stop('weights not implemented') g <- if(passdots) function(x, y, weights, tau, method, ...) quantreg::rq.wfit(cbind(Intercept=1., x), y, tau = tau, weights=weights, method=method, ...) else function(x, y, weights, tau, method, ...) quantreg::rq.wfit(cbind(Intercept=1., x), y, tau = tau, weights=weights, method=method) formals(g) <- eval(substitute( alist(x=,y=, weights=,tau=deftau,method=defmethod,...=), list(deftau=fit$tau, defmethod=fit$method))) } else { g <- if(passdots) function(x, y, tau, method, ...) quantreg::rq.fit(cbind(Intercept=1., x), y, tau = tau, method=method, ...) else function(x, y, tau, method, ...) quantreg::rq.fit(cbind(Intercept=1., x), y, tau = tau, method=method) formals(g) <- eval(substitute(alist(x=,y=, tau=deftau, method=defmethod,...=), list(deftau=fit$tau, defmethod=fit$method))) } g } print.Rq <- function(x, digits=4, coefs=TRUE, title, ...) { k <- 0 z <- list() ftau <- format(round(x$tau, digits)) if(missing(title)) title <- if(prType() == 'latex') paste('Quantile Regression~~~~$\\tau$', ftau, sep='=') else paste('Quantile Regression\t\ttau:', ftau) if(length(zz <- x$na.action)) { k <- k + 1 z[[k]] <- list(type=paste('naprint', class(zz)[1], sep='.'), list(zz)) } s <- x$stats n <- s['n']; p <- s['p']; errordf <- n - p; g <- s['g'] mad <- s['mad'] ci <- x$clusterInfo misc <- reListclean(Obs=n, p=p, 'Residual d.f.'=errordf, 'Cluster on'=ci$name, Clusters =ci$n, 'mean |Y-Yhat|'=mad) disc <- reListclean(g=g) headings <- c('', 'Discrimination\nIndex') data <- list(misc, c(disc,3)) k <- k + 1 z[[k]] <- list(type='stats', list(headings=headings, data=data)) s <- x$summary k <- k + 1 z[[k]] <- list(type='coefmatrix', list(coef = s[,'Value'], se = s[,'Std. Error'], errordf = errordf)) if (length(mes <- attr(x, "na.message"))) { k <- k + 1 z[[k]] <- list(type='cat', list(mes, '\n')) } prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } latex.Rq <- function(object, file = paste(first.word(deparse(substitute(object))), ".tex", sep = ""), append=FALSE, which, varnames, columns=65, inline=FALSE, caption=NULL, ...) { html <- prType() == 'html' if(html) file <- '' f <- object tau <- f$tau at <- f$Design w <- if (length(caption)) { if(html) paste('
', caption, '
', sep='') else paste("\\begin{center} \\bf", caption, "\\end{center}") } if (missing(which) & !inline) { Y <- paste("{\\rm ", as.character(formula(f))[2], "}", sep = "") w <- c(w, paste("\\[", Y, "_{", tau, "} = X\\beta, {\\rm \\ \\ where} \\\\ \\]", sep = "")) } if(missing(which)) which <- 1:length(at$name) if(missing(varnames)) varnames <- at$name cat(w, file = file, sep = if (length(w)) "\n" else "", append = append) latexrms(f, file=file, append=TRUE, which=which, inline=inline, varnames=varnames, columns=columns, caption, ...) } predict.Rq <- function(object, ..., kint=1, se.fit=FALSE) predictrms(object, ..., kint=kint, se.fit=se.fit) rms/R/groupkm.s0000644000176200001440000001034413142405454013130 0ustar liggesusers#Function to divide x (e.g. Cox predicted survical at time y created by #survest) into g quantile groups, get Kaplan-Meier estimates at time u #(a scaler), and to return a matrix with columns x=mean x in # quantile, n=#subjects, events=#events, and KM=K-M survival at time u, # std.err = s.e. of log-log K-M #Failure time=y Censoring indicator=e #Instead of supplying g, the user can supply the number of subjects to have #in the quantile group on the average; then g will be computed. The default #m is 50, so the default g is n/50. #If cuts is given (e.g. cuts=c(0,.1,.2,...,.9,.1)), it overrides m and g. #Set pl=T to plot results. If pl=T, units attribute of y applies. #Default is "Day". #xlab and ... are passed to plot() if pl=T. Default xlab is label(x) #if it is defined, otherwise the name of the calling argument for x. # #Author: Frank Harrell 8 May 91 groupkm <- function(x, Srv, m=50, g, cuts, u, pl=FALSE, loglog=FALSE, conf.int=.95, xlab, ylab, lty=1, add=FALSE, cex.subtitle=.7, ...) { if(missing(u))stop("u (time point) must be given") if(missing(xlab)) xlab <- label(x) if(xlab=="") xlab <- as.character(sys.call())[2] s <- !(is.na(x)|is.na(Srv[,1])|is.na(Srv[,2])) x <- x[s]; Srv <- Srv[s,] x[abs(x) < 1e-10] <- 0 #cut2 doesn't work with tiny x e <- Srv[,2] if(nrow(Srv) != length(x)) stop("lengths of x and Srv must match") unit <- units(Srv) if(is.null(unit) || unit=="") unit <- "Day" if(!missing(cuts)) q <- cut2(x, cuts) else if(!missing(g)) q <- cut2(x, g=g) else q <- cut2(x, m=m) if(any(table(q) < 2)) warning('one interval had < 2 observations') q <- unclass(q) #need integer g <- length(levels(q)) km <- double(g) pred <- km std.err <- km events <- integer(g) numobs <- events #f <- survfit.km(q, Srv, conf.int=conf.int, conf.type="log-log") #if(is.null(f$strata)) {nstrat <- 1; stemp <- rep(1, length(f$time))} #else { nstrat <- length(f$strata); stemp <- rep(1:nstrat,f$strata)} #This is more efficient but doesn't handle empty strata for(i in 1:g) { s <- q==i nobs <- sum(s); ne <- sum(e[s]) if(nobs < 2) { numobs[i] <- 0 events[i] <- 0 pred[i] <- if(nobs==1) mean(x[s], na.rm=TRUE) else NA km[i] <- NA std.err[i] <- NA } else { pred[i] <- mean(x[s], na.rm=TRUE) dummystrat <- as.factor(rep("1", nobs)) f <- survfitKM(dummystrat,Srv[s,]) ##doesn't need conf.int since only need s.e. tt <- c(0, f$time) ss <- c(1, f$surv) se <- c(0, f$std.err) tm <- max((1:length(tt))[tt <= u+1e-6]) km[i] <- ss[tm] std.err[i] <- se[tm] numobs[i] <- nobs events[i] <- ne n <- length(tt) if(u > tt[n]+1e-6 & ss[n]>0) { km[i] <- NA std.err[i] <- NA } } } z <- cbind(x=pred, n=numobs, events=events, KM=km, std.err=std.err) ciupper <- function(surv, d) ifelse(surv==0, 0, pmin(1, surv*exp(d))) cilower <- function(surv, d) ifelse(surv==0, 0, surv*exp(-d)) if(pl) { y <- km if(conf.int) { zcrit <- qnorm((conf.int+1)/2) low <- cilower(km, zcrit*std.err) hi <- ciupper(km, zcrit*std.err) } if(missing(ylab)) ylab <- paste("Kaplan-Meier ",format(u),"-",unit," Survival",sep="") if(loglog) { y <- logb(-logb(y)) if(conf.int) { low <- logb(-logb(low)) hi <- logb(-logb(hi)) } if(missing(ylab)) ylab <- paste("log(-log Kaplan-Meier ",format(u),unit, " Survival",sep="") } if(!add)plot(pred, y, xlab=xlab, ylab=ylab, type="n", ...) lines(pred, y, lty=lty) if(conf.int) errbar(pred, y, hi, low, add=TRUE, ...) if(!is.logical(cex.subtitle)) { nn <- sum(numobs,na.rm=TRUE) mm <- round(nn/g) title(sub=paste("n=",nn," d=",sum(events,na.rm=TRUE), ", avg. ",mm," patients per group",sep=""), adj=0,cex=cex.subtitle) } } z } rms/R/predictrms.s0000644000176200001440000004147114024533360013623 0ustar liggesusers##newdata=data frame, vector, matrix, or list. All but first assume data ##need coding, e.g. categorical variables are given as integers ##variable missing for all obs -> use adjust-to value in limits ##(means (parms) for matrx) predictrms <- function(fit, newdata=NULL, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean', 'individual', 'simultaneous'), kint=NULL, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", ref.zero=FALSE, posterior.summary=c('mean', 'median', 'mode'), second=FALSE, ...) { type <- match.arg(type) conf.type <- match.arg(conf.type) posterior.summary <- match.arg(posterior.summary) # Prevents structure(NULL, ...) below (now deprecated) nulll <- function(z) if(is.null(z)) list() else z if(second && type %nin% c('lp', 'x', 'adjto', 'adjto.data.frame')) stop('type not implemented when second=TRUE') draws <- fit$draws bayes <- length(draws) > 0 if(bayes) param <- fit$param if(bayes && conf.type == 'simultaneous') stop('conf.type simultaneous not supported for Bayesian models') if(bayes && se.fit) { warning('se.fit ignored for Bayesian models') se.fit <- FALSE } if(second || conf.type == 'simultaneous') { ## require(multcomp) if(missing(newdata) || ! length(newdata)) stop('newdata must be given if conf.type="simultaneous" or second=TRUE') } at <- if(second) fit$zDesign else fit$Design assume <- at$assume.code Limval <- Getlim(at, allow.null=TRUE, need.all=FALSE) Values <- Limval$values non.ia <- assume != 9L non.strat <- assume != 8L f <- sum(non.ia) nstrata <- sum(assume == 8L) somex <- any(non.strat) rnam <- NULL cox <- inherits(fit, "cph") naa <- fit$na.action if(! expand.na) naresid <- function(a,b) b #don't really call naresid if drop NAs parms <- at$parms name <- at$name coeff <- if(bayes) rmsb::getParamCoef(fit, posterior.summary, what=if(second) 'taus' else 'betas') else fit$coefficients nrp <- num.intercepts(fit) nrpcoef <- num.intercepts(fit, 'coef') if(! length(kint)) kint <- fit$interceptRef # orm, lrm, blrm otherwise NULL int.pres <- nrp > 0L assign <- if(second) fit$zassign else fit$assign nama <- names(assign)[1L] asso <- 1*(nama=="Intercept" | nama=="(Intercept)") Center <- if(cox) fit$center else 0. oldopts <- options(contrasts=c(factor="contr.treatment", ordered="contr.treatment"), # was "contr.poly" Design.attr=at) ## In SV4 options(two lists) causes problems on.exit({options(contrasts=oldopts$contrasts) options(Design.attr=NULL)}) ## Formula without response variable any offsets: formulano <- if(second) fit$zsformula else removeFormulaTerms(fit$sformula, which='offset', delete.response=TRUE) offset <- 0; offpres <- FALSE ## offset is ignored for prediction (offset set to zero) ## if(! missing(newdata) && length(newdata)) { ## offset <- model.offset(model.frame(removeFormulaTerms(fit$sformula, ## delete.response=TRUE), newdata, ## na.action=na.action, ...)) ## offpres <- length(offset) > 0 ## if(! offpres) offset <- 0 ## } #Terms <- delete.response(terms(formula(fit), specials='strat')) Terms <- terms(formulano, specials='strat') attr(Terms, "response") <- 0L attr(Terms, "intercept") <- 1L ## Need intercept whenever design matrix is generated to get ## current list of dummy variables for factor variables stra <- attr(Terms, "specials")$strat Terms.ns <- if(length(stra)) Terms[-stra] else Terms if(conf.int) { vconstant <- 0. if(conf.type=='individual') { vconstant <- fit$stats['Sigma']^2 if(is.na(vconstant)) stop('conf.type="individual" requires that fit be from ols') } zcrit <- if(length(idf <- fit$df.residual)) qt((1. + conf.int) / 2., idf) else qnorm((1. + conf.int) / 2.) } ## Form design matrix for adjust-to values ## Result of Adjto() is a model matrix with no intercept(s) Adjto <- function(type) { adjto <- list() ii <- 0L for(i in (1L : length(assume))[non.ia]) { ii <- ii + 1L xi <- Getlimi(name[i], Limval, need.all=TRUE)[2L] if(assume[i] %in% c(5L, 8L)) xi <- factor(xi, parms[[name[i]]]) else if(assume[i] == 7L) xi <- scored(xi, name=name[i]) else if(assume[i] == 10L) xi <- I(matrix(parms[[name[i]]], nrow=1)) #matrx col medians adjto[[ii]] <- xi } names(adjto) <- name[non.ia] attr(adjto, "row.names") <- "1" class(adjto) <- "data.frame" if(type == "adjto.data.frame") return(adjto) adjto <- model.frame(Terms, adjto) adjto <- model.matrix(Terms.ns, adjto)[, -1, drop=FALSE] if(type == 'adjto') { k <- (nrpcoef + 1L) : length(coeff) nck <- names(coeff)[k] if(is.matrix(adjto)) dimnames(adjto) <- list(dimnames(adjto)[[1L]], nck) else names(adjto) <- nck } adjto } adjto <- NULL if(type %nin% c('adjto', 'adjto.data.frame')) { X <- NULL if(missing(newdata) || ! length(newdata)) { flp <- fit$linear.predictors if(type == "lp" && length(flp)) { LP <- naresid(naa, flp) if(int.pres) { lpkint <- attr(flp, 'intercepts') if(! length(lpkint)) lpkint <- 1L if(length(kint) && kint != lpkint) { LP <- LP - coeff[lpkint] + coeff[kint] } } if(length(stra <- fit$strata)) attr(LP, "strata") <- naresid(naa, stra) if(! se.fit && ! conf.int) return(LP) else if(length(fit$se.fit)) { if(nrp > 1L) warning("se.fit is retrieved from the fit but it corresponded to kint") retlist <- list(linear.predictors=LP) if(se.fit) retlist$se.fit <- naresid(naa, fit$se.fit) if(conf.int) { plminus <- zcrit * sqrt(retlist$se.fit^2 + vconstant) retlist$lower <- LP - plminus retlist$upper <- LP + plminus } return(retlist) } } # end type='lp' with linear.predictors stored in fit else if(type == "x") return(structure(nulll(naresid(naa, fit$x)), strata=if(length(stra <- fit$strata)) naresid(naa, stra) else NULL)) X <- fit[['x']] rnam <- dimnames(X)[[1]] if(! length(X)) stop("newdata not given and fit did not store x") } # end no newdata if(! length(X)) { if(! is.data.frame(newdata)) { if(is.list(newdata)) { ## When second=TRUE the formula may not contain all the variables ## in newdata loc <- name[assume != 9L] if(length(names(newdata))) newdata <- newdata[loc] ## loc <- if(! length(names(newdata))) 1L : f else name[assume != 9L] new <- matrix(double(1L), nrow=length(newdata[[1L]]), ncol=length(newdata)) for(j in 1L : ncol(new)) new[, j] <- newdata[[loc[j]]] newdata <- new } if(! is.matrix(newdata)) newdata <- matrix(newdata, ncol=f) if(ncol(newdata) != f) stop("# columns in newdata not= # factors in design") X <- list() k <- 0L ii <- 0L for(i in (1L : length(assume))[non.ia]) { ii <- ii + 1L xi <- newdata[, ii] as <- assume[i] allna <- all(is.na(xi)) if(as == 5L | as == 8L) { xi <- as.integer(xi) levels(xi) <- parms[[name[i]]] class(xi) <- "factor" } else if(as == 7L) xi <- scored(xi, name=name[i]) else if(as == 10L) { if(i == 1) ifact <- 1L else ifact <- 1L + sum(assume[1L : (i - 1L)] != 8L) ## Accounts for assign not being output for strata factors ncols <- length(assign[[ifact + asso]]) if(allna) { xi <- matrix(double(1L), nrow=length(xi), ncol=ncols) for(j in 1L : ncol(xi)) xi[, j] <- parms[[name[i]]][j] xi <- I(xi) } else xi <- I(matrix(xi, nrow=length(xi), ncol=ncols)) } ## Duplicate single value for all parts of matrix k <- k + 1L X[[k]] <- xi } names(X) <- name[non.ia] attr(X, "row.names") <- as.character(1L : nrow(newdata)) class(X) <- "data.frame" newdata <- X ## Note: data.frame() converts matrix variables to individual variables if(type == "data.frame") return(newdata) } # end !is.data.frame(newdata) else { ## Need to convert any factors to have all levels in original fit ## Otherwise, wrong dummy variables will be generated by model.matrix nm <- names(newdata) for(i in 1L : ncol(newdata)) { j <- match(nm[i], name) if(! is.na(j)) { asj <- assume[j] w <- newdata[, i] V <- NULL if(asj %in% c(5L, 7L, 8L) | (name[j] %in% names(Values) && asj != 11 && length(V <- Values[[name[j]]]) && is.character(V))) { if(length(Pa <- parms[[name[j]]])) V <- Pa newdata[,i] <- factor(w, V) ## Handles user specifying numeric values without quotes, that ## are levels ww <- is.na(newdata[,i]) & ! is.na(unclass(w)) if(any(ww)) { cat("Error in predictrms: Values in", names(newdata)[i], "not in", V, ":\n") print(as.character(w[ww]), quote=FALSE); stop() } } } } } # is.data.frame(newdata) X <- model.frame(Terms, newdata, na.action=na.action, ...) if(type == "model.frame") return(X) naa <- attr(X, "na.action") rnam <- row.names(X) strata <- list() nst <- 0 ii <- 0 for(i in 1L : ncol(X)) { ii <- ii + 1L xi <- X[[i]] asi <- attr(xi, "assume.code") as <- assume[ii] if(! length(asi) && as == 7L) { attr(X[,i], "contrasts") <- attr(scored(xi, name=name[ii]), "contrasts") if(length(xi) == 1L) warning("a bug in model.matrix can produce incorrect results\nwhen only one observation is being predicted for an ordered variable") } if(as == 8L) { nst <- nst + 1L ste <- paste(name[ii], parms[[name[ii]]], sep='=') strata[[nst]] <- factor(ste[X[,i]], ste) } } X <- if(! somex) NULL else model.matrix(Terms.ns, X)[, -1L, drop=FALSE] if(nstrata > 0L) { names(strata) <- paste("S", 1L : nstrata, sep="") strata <- interaction(as.data.frame(strata), drop=FALSE) } } # end !length(X) else strata <- attr(X, "strata") } # if(end adj.to adj.to.data.frame) if(somex && ! bayes) { cov <- vcov(fit, regcoef.only=TRUE, intercepts=kint) covnoint <- if(nrp == 0) cov else vcov(fit, regcoef.only=TRUE, intercepts='none') } if(type %in% c('adjto.data.frame', 'adjto')) return(Adjto(type)) if(type=="x") return( structure(nulll(naresid(naa, X)), strata=if(nstrata > 0) naresid(naa, strata) else NULL, na.action=if(expand.na) NULL else naa) ) if(type == "lp") { if(somex) { xb <- matxv(X, coeff, kint=kint) - Center + offset names(xb) <- rnam if(bayes && conf.int) { xB <- matxv(X, draws, kint=kint, bmat=TRUE) xB <- apply(xB, 1, rmsb::HPDint, prob=conf.int) lower <- xB[1, ] upper <- xB[2, ] } } else { xb <- if(offpres) offset else numeric(0) if(nstrata > 0) attr(xb, 'strata') <- naresid(naa, strata) return(structure(if(se.fit) list(linear.predictors=xb, se.fit=rep(NA, length(xb))) else xb, na.action=if(expand.na) NULL else naa)) } xb <- naresid(naa, xb) if(nstrata > 0) attr(xb, "strata") <- naresid(naa, strata) ycenter <- if(ref.zero && somex) { if(! length(adjto)) adjto <- Adjto(type) matxv(adjto, coeff, kint=kint) - Center } else 0. if(ref.zero || ((se.fit || conf.int) && somex)) { dx <- dim(X) n <- dx[1L]; p <- dx[2L] if(cox && ! ref.zero) X <- X - rep(fit$means, rep.int(n, p)) if(ref.zero) { if(! length(adjto)) adjto <- Adjto(type) X <- X - rep(adjto, rep.int(n, p)) } if(! bayes) { se <- drop(if(ref.zero || nrp == 0L) sqrt(((X %*% covnoint) * X) %*% rep(1L, ncol(X))) else { Xx <- cbind(Intercept=1., X) sqrt(((Xx %*% cov) * Xx) %*% rep(1L, ncol(Xx))) }) names(se) <- rnam sef <- naresid(naa, se) } ww <- if(conf.int || se.fit) { if(se.fit) list(linear.predictors = xb - ycenter, se.fit = sef) else list(linear.predictors = xb - ycenter) } else xb - ycenter if(bayes) {lower <- lower - ycenter; upper <- upper - ycenter} retlist <- structure(nulll(ww), na.action=if(expand.na) NULL else naa) if(conf.int) { if(conf.type == 'simultaneous') { num.intercepts.not.in.X <- length(coeff) - ncol(X) u <- confint(multcomp::glht(fit, if(num.intercepts.not.in.X == 0L) X else Xx, df=if(length(idf)) idf else 0L), level=conf.int)$confint retlist$lower <- u[,'lwr'] retlist$upper <- u[,'upr'] } else { if(bayes) { retlist$lower <- lower retlist$upper <- upper } else { plminus <- zcrit*sqrt(sef^2 + vconstant) retlist$lower <- xb - plminus - ycenter retlist$upper <- xb + plminus - ycenter } } } return(retlist) } else return(structure(xb - ycenter, na.action=if(expand.na)NULL else naa)) } ## end if type='lp' if(type %in% c("terms", "cterms", "ccterms")) { if(! somex) stop('type="terms" may not be given unless covariables present') usevar <- if(type=="terms") non.strat else rep(TRUE, length(assume)) fitted <- array(0, c(nrow(X), sum(usevar)), list(rnam, name[usevar])) if(se.fit) se <- fitted if(center.terms) { if(! length(adjto)) adjto <- Adjto(type) if(ncol(adjto) != ncol(X)) { if(dimnames(adjto)[[2L]][1L] %in% c('Intercept','(Intercept)') && dimnames(X)[[2L]][1L] %nin% c('Intercept','(Intercept)')) adjto <- adjto[, -1L, drop=FALSE] if(ncol(adjto) != ncol(X)) stop('program logic error') } X <- sweep(X, 2L, adjto) # center columns } j <- 0L for(i in (1L : length(assume))[usevar]) { j <- j + 1L if(assume[i] != 8L) { # non-strat factor; otherwise leave fitted=0 k <- assign[[j + asso]] num.intercepts.not.in.X <- length(coeff) - ncol(X) ko <- k - num.intercepts.not.in.X fitted[, j] <- matxv(X[, ko, drop=FALSE], coeff[k]) if(se.fit) se[,j] <- (((X[, ko, drop=FALSE] %*% cov[k, k, drop=FALSE]) * X[, ko, drop=FALSE]) %*% rep(1., length(ko)))^.5 } } if(type == "cterms") { ## Combine all related interation terms with main effect terms w <- fitted[, non.ia, drop=FALSE] # non-interaction terms for(i in 1L : f) { ia <- interactions.containing(at, i) ## subscripts of interaction terms related to predictor i if(length(ia)) w[, i] <- rowSums(fitted[, c(i,ia), drop=FALSE]) } fitted <- w } if(type=='ccterms') { z <- combineRelatedPredictors(at) f <- length(z$names) w <- matrix(NA, ncol=f, nrow=nrow(fitted)) colnames(w) <- sapply(z$names, paste, collapse=', ') for(i in 1L : f) w[,i] <- rowSums(fitted[, z$namesia[[i]], drop=FALSE]) fitted <- w } fitted <- structure(nulll(naresid(naa, fitted)), strata=if(nstrata==0) NULL else naresid(naa, strata)) if(se.fit) { return(structure(list(fitted=fitted, se.fit=naresid(naa,se)), na.action=if(expand.na)NULL else naa)) } else return(structure(fitted, na.action=if(expand.na)NULL else naa)) } } rms/R/rms.trans.s0000644000176200001440000005212314024532001013361 0ustar liggesusers# design.trans FEH 4 Oct 90 # Contains individual functions for creating sub-design matrices from # vectors, for use with design(). # code name # 1 asis leave variable coded as-is, get default name, label, # limits, values # 2 pol polynomial expansion # 3 lsp linear spline # 4 rcs restricted cubic spline # 5 catg category # 7 scored scored ordinal variable # 8 strat stratification factor #10 matrx matrix factor - used to keep groups of variables together # as one factor #11 gTrans - general transformations # # des.args generic function for retrieving arguments # set.atr generic function to set attributes of sub design matrix # options sets default options # [.rms subsets variables, keeping attributes # gparms retrieve parms for design or fit object. Not used by any # of these routines, but used by analyst to force a new # fit to use same parms as previous fit for a given factor. # value.chk # Check a given list of values for a factor for validity, # or if list is NA, return list of possible values # # Default label is attr(x,"label") or argument name if label= omitted. # First argument can be as follows, using asis as an example: # asis(x, ...) name="x", label=attr(x,"label") or "x" # if NULL # asis(w=abs(q), ...) name="w", label=attr(x,"label") or "w" # asis(age=xx) name="age", label=label attr or "age" # asis(x,label="Age, yr") name="x", label="Age, yr" # asis(age=x,label= name="age", label="Age in Years" # "Age in Years") # matrx(dx=cbind(dx1=dx1,dx2=dx2)) name="dx", individual names # dx1 and dx2 # For matrx, default label is list of column names. # An additional argument, name, can be used to instead specify the name of the # variable. This is used when the functions are implicitly called from within # design(). # # The routines define dimnames for the returned object with column # names = expanded list of names based on original name. # assume.code is added to attributes of returned matrix. Is 1-8 # corresponding to transformation routines asis-strat above, 10 for matrx. # Adds attribute nonlinear, one element/column of expanded design matrix. # nonlinear=T if column is a nonlinear expansion of original variable, # F if linear part or not applicable # (e.g. dummy variable for category -> F). For matrx, all are linear. # # System options used: nknots for default number of knots in restr. cubic spline # and poly.degree, default degree of polynomials # Second argument to routines is the parameters (parms) of the # transformation (except for asis), defined as follows: # # poly order of polynomial, e.g. 2 for quadratic # lsp list of knots # rcs number of knots if parms=1 element (-> compute default # knot locations), actual knot locations if >2 elements # (2 knots not allowed for restr. cubic spline) # catg list of value labels corresponding to values 1,2,3,... # scored list of unique values of the variable # strat list of value labels corresponding to values 1,2,3 # # For catg and strat, parms are omitted if the variable is character or # is already an S category variable. # # Argument retrieval: After variable and optional parms, other variables # may be named or positional, in the following order: label, name. # For matrx, parms are not allowed. # # Function to return list with elements name, parms, label. # corresponding to arguments in call to asis, etc. parms=NULL if # parms.allowed=F. Reason for going to this trouble is that first arg to # asis, etc. is allowed to be a named argument to set a new name for it. # With ordinary argument fetching, remaining arguments would have to be # named. This logic allows them to be named or positional in order: # parms (if allowed), label. # # If options(Design.attr) is non-null, looks up attributes in elements # in Design.attr corresponding to the name of the current variable. # This is used to get predicted values when the original fitting # function (e.g., rcs) derived parms of the transformation from the data. # des.args <- function(x, parms.allowed, call.args) { nam <- names(x) if(! length(nam)) nam <- rep("", 5) name <- nam[1] if(name=="") { form <- formula(call("~",as.name("...y..."), call.args[[2]])) name <- var.inner(form) } pa <- parms.allowed argu <- function(x,karg, arg.name, parms.all, nm) { if(! parms.all) karg <- karg-1 k <- charmatch(arg.name,nm,0) #k>0 : named arg found ## Added karg <= length(x) 9Apr02 for R; R doesn't return NULL ## like S+ if(k > 0) x[[k]] else if(length(nm) < karg || nm[karg] != "") NULL else if(karg <= length(x)) x[[karg]] else NULL } if(parms.allowed) parms <- argu(x,2,"parms",pa,nam) else { parms <- NULL if(charmatch("parms",nam,0)>0) stop(paste("parms not allowed for",as.character(call.args[1]))) } nm <- argu(x, 5, "name", pa, nam) if(length(nm)) name <- nm if(length(.Options$Design.attr)) { atr <- .Options$Design.attr i <- charmatch(name, atr$name, 0) if(! length(i))stop("program logic error for options(factor.number)") parmi <- atr$parms[[name]] return(list(name=atr$name[i], parms=parmi, label=atr$label[i], units=atr$units[i])) # added units 9Jun99 } label <- argu(x, 3, "label", pa, nam) atx <- attributes(x[[1]]) # 9Jun99 if(! length(label)) label <- atx$label # 9Jun99 attr(x[[1]],"label") if(! length(label)) label <- name list(name=name, parms=parms, label=label, units=atx$units) #9Jun99 } ## Function to list all attributes of new sub-design matrix set.atr <- function(xd, x, z, colnames, assume, code, parms, nonlinear) { ##Note: x argument isn't used if(is.matrix(xd)) list(dim=dim(xd),dimnames=list(NULL,colnames),class="rms", name=z$name, label=z$label, assume=assume, assume.code=code, parms=parms, nonlinear=nonlinear,colnames=colnames,units=z$units) else list(dim=dim(xd), class="rms", name=z$name, label=z$label, assume=assume, assume.code=code, parms=parms, nonlinear=nonlinear,colnames=colnames,units=z$units) } ## asis transformation - no transformation asis <- function(...) { cal <- sys.call() xx <- list(...) z <- des.args(xx, FALSE, cal) xd <- xx[[1]] if(is.factor(xd)) { attr(xd,"class") <- NULL } if(! (is.numeric(xd) | is.logical(xd))) { stop(paste(z$name,"is not numeric")) } attributes(xd) <- set.atr(xd,xd,z,z$name,"asis",1,NULL,FALSE) xd } ## matrx transformation - no transformation, keep original vars as matrix ## column names as parameter names, parms=column medians matrx <- function(...) { cal <- sys.call() xx <- list(...) z <- des.args(xx, FALSE, cal) xd <- xx[[1]] nc <- ncol(xd) if(! is.matrix(xd)) { stop(paste(z$name, "is not a matrix")) } colname <- dimnames(xd)[[2]] if(length(colname)==0 && nc > 0) colname <- paste0(z$name, '[', 1:nc, ']') else if(z$label==z$name) z$label <- paste(colname, collapse=",") parms <- rep(NA, max(1, nc)) if(length(xd)) for(i in 1:nc) parms[i] <- median(xd[,i], na.rm=TRUE) xd <- I(xd) attributes(xd) <- set.atr(xd, NULL, z, colname, "matrix", 10, parms, rep(FALSE,nc)) xd } ## Polynomial expansion pol <- function(...) { cal <- sys.call() xx <- list(...) z <- des.args(xx,TRUE,cal) x <- xx[[1]] if(! is.numeric(x)) { stop(paste(z$name,"is not numeric")) } poly.degree <- .Options$poly.degree if(! length(poly.degree)) { poly.degree <- 2 } if(length(z$parms)) { poly.degree <- z$parms } if(poly.degree<2){ stop("order for polynomial must be 2,3,...") } xd <- matrix(1,nrow=length(x),ncol=poly.degree) nam <- z$name name <- character(poly.degree) name[1] <- nam xd[,1] <- x for(j in 2:poly.degree) { name[j] <- paste0(nam,"^",j) xd[,j] <- x^j } attributes(xd) <- set.atr(xd,x,z,name,"polynomial",2,poly.degree, c(FALSE,rep(TRUE,poly.degree-1))) xd } ## Linear spline expansion lsp <- function(...) { cal <- sys.call() xx <- list(...) z <- des.args(xx,TRUE,cal) x <- xx[[1]] if(! is.numeric(x)) { stop(paste(z$name,"is not numeric")) } parms <- z$parms if(! length(parms) || any(is.na(parms))) { stop("must specify knots for linear spline") } suffix <- NULL nam <- z$name lp <- length(parms) xd <- matrix(double(1),nrow=length(x),ncol=lp+1) name <- character(lp+1) xd[,1] <- x name[1] <- nam for(j in 1:lp) { suffix <- paste0(suffix, "'") name[j+1] <- paste0(nam, suffix) xd[,j+1] <- pmax(x - parms[j], 0) } attributes(xd) <- set.atr(xd,x,z,name,"lspline",3,parms,c(FALSE,rep(TRUE,lp))) xd } ## Restricted cubic spline expansion rcs <- function(...) { cal <- sys.call() xx <- list(...) z <- des.args(xx, TRUE, cal) x <- xx[[1]] if(! is.numeric(x)) stop(paste(z$name, "is not numeric")) nknots <- .Options$nknots if(! length(nknots)) nknots <- 5 parms <- z$parms if(! length(parms)) parms <- nknots if(length(parms)==1) { nknots <- parms knots <- NULL if(nknots == 0) { attributes(x) <- set.atr(x, x, z, z$name, "asis", 1, NULL, FALSE) return(x) } } else { nknots <- length(parms) knots <- parms } pc <- length(.Options$rcspc) && .Options$rcspc fractied <- .Options$fractied if(! length(fractied)) fractied <- 0.05 if(! length(knots)) { xd <- rcspline.eval(x, nk=nknots, inclx=TRUE, pc=pc, fractied=fractied) knots <- attr(xd,"knots") } else xd <- rcspline.eval(x, knots=knots, inclx=TRUE, pc=pc, fractied=fractied) parms <- knots nknots <- length(parms) nam <- z$name primes <- paste(rep("'",nknots-1), collapse="") name <- if(pc) paste0(nam, substring(primes, 1, 1:(nknots-1))) else c(nam, paste0(nam, substring(primes, 1, 1:(nknots-2)))) if(pc) attr(parms, 'pcparms') <- attr(xd, 'pcparms') attributes(xd) <- set.atr(xd, x, z, name, "rcspline", 4, parms, if(pc) rep(TRUE, nknots-1) else c(FALSE,rep(TRUE,nknots-2))) xd } ## Category variable catg <- function(...) { cal <- sys.call() xx <- list(...) z <- des.args(xx,TRUE,cal) nam <- z$name y <- xx[[1]] parms <- z$parms if(! length(parms) & is.factor(y)) parms <- levels(y) if(! length(parms)) { if(is.character(y)) { parms <- sort(unique(y[y != "" & y != " "])) } else { parms <- as.character(sort(unique(y[! is.na(y)]))) } } if(! is.factor(y)) { x <- factor(y, levels=parms) } else { x <- y } if((is.character(y) && any(y != "" & y != " " & is.na(x))) || (is.numeric(y) & any(! is.na(y) & is.na(x)))) { stop(paste(nam,"has non-allowable values")) } if(all(is.na(x))) { stop(paste(nam,"has no non-missing observations")) } lp <- length(parms) if(lp < 2) stop(paste(nam,"has <2 category levels")) attributes(x) <- list(levels=parms,class=c("factor","rms"), name=nam,label=z$label,assume="category",assume.code=5, parms=parms,nonlinear=rep(FALSE,lp-1), colnames=paste0(nam, "=", parms[-1])) x } ## Scored expansion parms=unique values scored <- function(...) { cal <- sys.call() xx <- list(...) z <- des.args(xx, TRUE, cal) parms <- z$parms nam <- z$name x <- xx[[1]] if(is.factor(x)) { levx <- as.numeric(levels(x)) if(any(is.na(levx))) stop(paste("levels for", nam, "not numeric")) if(! length(parms)) parms <- levx ## .Options$warn <- -1 #suppress warning about NAs oldopt <- options('warn') options(warn=-1) on.exit(options(oldopt)) x <- levx[x] } if(! is.numeric(x)) stop(paste(nam,"is not a numeric variable")) y <- sort(unique(x[! is.na(x)])) if(! length(parms)) parms <- y parms <- sort(parms) n.unique <- length(parms) if(n.unique < 3) { stop("scored specified with < 3 levels") } lp <- length(parms) - 1 ## Form contrast matrix of the form linear | dummy | dummy ... xd <- matrix(double(1), nrow=length(y), ncol=lp) xd[,1] <- y name <- character(lp) name[1] <- nam i <- 1 for(k in parms[3:length(parms)]) { i <- i+1 name[i] <- paste0(nam, "=", k) xd[,i] <- y==k } dimnames(xd) <- list(NULL, name) x <- ordered(x) class(x) <- c("ordered","factor","rms") attributes(x) <- c(attributes(x), list(name=nam,label=z$label,assume="scored",assume.code=7, parms=parms, nonlinear=c(FALSE,rep(TRUE,lp-1)), colnames=name, contrasts=xd)) x } # General transformations - allows discontinuities, special spline # functions, etc. gTrans <- function(...) { cal <- sys.call() xx <- list(...) z <- des.args(xx, TRUE, cal) parms <- z$parms if(is.character(parms)) parms <- eval(parse(text=parms)) nam <- z$nam x <- xx[[1]] suffix <- '' nam <- z$name xd <- as.matrix(parms(x)) nc <- ncol(xd) name <- rep('', nc) if(length(colnames(xd))) name <- colnames(xd) nonlin <- rep(FALSE, nc) nonlin[attr(xd, 'nonlinear')] <- TRUE for(j in 1 : nc) { if(name[j] == '') name[j] <- paste0(nam, suffix) suffix <- paste0(suffix, "'") } colnames(xd) <- name # model.matrix will put TRUE after a term name if logical # convert to 0/1 if(is.logical(xd)) xd <- 1 * xd xd <- I(xd) # Store the function parms as character so environment won't # be carried along (makes serialized .rds and other files large) attributes(xd) <- set.atr(xd, x, z, name, "gTrans", 11, deparse(parms), nonlin) xd } ## strat parms=value labels strat <- function(...) { cal <- sys.call() xx <- list(...) y <- xx[[1]] z <- des.args(xx,TRUE,cal) parms <- z$parms if(! length(parms)) parms <- levels(y) if(! length(parms)) { if(is.character(y)) { parms <- sort(unique(y[y != "" & y != " "])) } else parms <- as.character(sort(unique(y[! is.na(y)]))) } nam <- z$name if(! is.factor(y)) { x <- factor(y,levels=parms) } else x <- y if((is.character(y) & any(y != "" & y != " " & is.na(x))) || (is.numeric(y) & any(! is.na(y) & is.na(x)))) { stop(paste(nam," has a non-allowable value")) } name <- nam attributes(x) <- list(levels=parms,class=c("factor","rms"), name=nam, label=z$label, assume="strata", assume.code=8, parms=parms, nonlinear=FALSE, colnames=paste0(nam,"=", parms[-1])) x } ## Function to subscript a variable, keeping attributes ## Is similar to [.smooth, but does not keep attribute NAs "[.rms" <- function(x, ..., drop = FALSE) { ats <- attributes(x) ats$dimnames <- NULL ats$dim <- NULL ats$names <- NULL class(x) <- NULL y <- x[..., drop = drop] attributes(y) <- c(attributes(y), ats) y } ## Function to get parms of factor in fit or design object "fit" with name ## given by second argument (without quotes) gparms <- function(fit,...) { name <- as.character(sys.call())[3] atr <- fit$Design atr$parms[[name]] } ## value.chk - if x=NA, returns list of possible values of factor i defined ## in object f's attributes. For continuous factors, returns n values ## in default prediction range. Use n=0 to return trio of effect ## limits. Use n < 0 to return pretty(plotting range, nint = - n). ## If type.range="full" uses the full range instead of default plot rng. ## If x is not NA, checks that list to see that each value is allowable ## for the factor type, and returns x ## Last argument is object returned from Getlim (see Design.Misc) ## First argument is Design list value.chk <- function(f, i, x, n, limval, type.range="plot") { as <- f$assume.code[i] name <- f$name[i] parms <- f$parms[[name]] isna <- length(x)==1 && is.na(x) values <- limval$values[[name]] charval <- length(values) && is.character(values) if(isna & as != 7) { if(! length(limval) || match(name, dimnames(limval$limits)[[2]], 0)==0 || is.na(limval$limits["Adjust to",name])) stop(paste("variable",name,"does not have limits defined by datadist")) limits <- limval$limits[,name] lim <- if(type.range=="full") limits[6:7] else limits[4:5] } if(as < 5 | as == 6 | as == 11) { if(isna) { if(! length(values)) { if(n==0) x <- limits[1:3] else { if(n>0) x <- seq(unclass(lim[1]), #handles chron unclass(lim[2]),length=n) else x <- pretty(unclass(lim[1:2]), n=-n) class(x) <- class(lim) } } else x <- values } else { if(is.character(x) && ! charval) stop(paste("character value not allowed for variable", name)) #Allow any numeric value if(charval) { j <- match(x, values, 0) if(any(j==0)) stop(paste("illegal values for categorical variable:", paste(x[j==0],collapse=" "),"\nPossible levels:", paste(values,collapse=" "))) } } } else if(as == 5 | as == 8) { if(isna) x <- parms else { j <- match(x, parms, 0) #match converts x to char if needed if(any(j == 0)) stop(paste("illegal levels for categorical variable:", paste(x[j == 0], collapse=" "), "\nPossible levels:", paste(parms, collapse=" "))) x } } else if(as==7) { if(isna) x <- parms else if(is.character(x)) stop(paste("character value not allowed for", "variable",name)) else { j <- match(x, parms, 0) if(any(j==0)) { stop(paste("illegal levels for categorical variable:", paste(x[j==0],collapse=" "),"\n","Possible levels:", paste(parms,collapse=" "))) } } } invisible(x) } ##ia.operator.s - restricted interaction operators for use with Design ##F. Harrell 8 Nov 91 ##Set up proper attributes for a restricted interaction for a model ##such as y ~ rcs(x1) + rcs(x2) + x1 %ia% x2 or x1 %ia% rcs(x2) ##or rcs(x1) %ia% x2 "%ia%" <- function(x1, x2) { a1 <- attributes(x1) a2 <- attributes(x2) nam <- as.character(sys.call())[-1] redo <- function(x, nam) { if(! length(attr(x, "assume.code"))) { ## a variable being interacted appears without an rms ## fitting function around it x <- if(length(class(x)) && class(x)[1] == "ordered") scored(x, name=nam) else if(is.character(x) | is.factor(x)) catg(x, name=nam) else if(is.matrix(x)) matrx(x, name=nam) else asis(x, name=nam) } at <- attributes(x) ass <- at$assume.code nam <- at$name if(ass == 5) { colnames <- at$colnames len <- length(at$parms) - 1 } else if(ass == 8) { prm <- at$parms colnames <- paste0(nam, "=", prm[-1]) len <- length(prm) - 1 } else if(ass == 7) { prm <- at$parms colnames <- c(nam, paste0(nam, "=", prm[-(1 : 2)])) len <- length(prm) - 1 } else { if(! length(ncol(x))) { len <- 1 colnames <- nam } else { colnames <- dimnames(x)[[2]] len <- ncol(x) } } attr(x, "colnames") <- colnames attr(x, "len") <- len if(ass == 8) attr(x, "nonlinear") <- rep(FALSE, len) x } x1 <- redo(x1, nam[1]) x2 <- redo(x2, nam[2]) a1 <- attributes(x1) a2 <- attributes(x2) n1 <- a1$colnames n2 <- a2$colnames nl1 <- a1$nonlinear nl2 <- a2$nonlinear as1 <- a1$assume.code as2 <- a2$assume.code lev1 <- length(levels(x1)) lev2 <- length(levels(x2)) ## Mark special if %ia% does not generate expanded term labels. This ## occurs if both variables are as-is or if %ia% involved a variable ## that was categorical with ## 2 levels and did not involve a variable that was an expanded spline ## or polynomial. Handles inconsistency in model.matrix whereby ## categorical %ia% plain variable generates a column name of the form ## categorical %ia% plain without "categorical=2nd level" iaspecial <- (as1 == 1 && as2 ==1) || ((lev1 == 2 || lev2 == 2) && (as1 %nin% 2 : 4 && as2 %nin% 2 : 4)) l1 <- a1$len l2 <- a2$len if(any(nl1) & any(nl2)) nc <- l1 + l2 - 1 else nc <- l1 * l2 nr <- if(is.matrix(x1)) nrow(x1) else length(x1) x <- matrix(double(1), nrow=nr, ncol=nc) name <- character(nc) parms <- matrix(integer(1), nrow=2, ncol=nc + 1) nonlinear <- logical(nc) k <- 0 if(! is.factor(x1)) x1 <- as.matrix(x1) if(! is.factor(x2)) x2 <- as.matrix(x2) for(i in 1 : l1) { if(as1 == 5 | as1 == 8) x1i <- unclass(x1) == i + 1 else x1i <- x1[, i] for(j in 1 : l2) { ## Remove doubly nonlinear terms if(nl1[i] & nl2[j]) break k <- k + 1 if(as2 == 5 | as2 == 8) x2j <- unclass(x2) == j + 1 else x2j <- x2[, j] x[,k] <- x1i * x2j name[k] <- paste(n1[i], "*", n2[j]) parms[, k + 1] <- c(nl1[i], nl2[j]) nonlinear[k] <- nl1[i] | nl2[j] } } dimnames(x) <- list(NULL, name) attr(x, "ia") <- c(a1$name, a2$name) attr(x, "parms") <- parms attr(x, "nonlinear") <- nonlinear attr(x, "assume.code") <- 9 attr(x, "name") <- paste(a1$name, "*", a2$name) attr(x, "label") <- attr(x, "name") attr(x, "iaspecial") <- iaspecial attr(x, "colnames") <- name attr(x, "class") <- "rms" x } rms/R/bootcov.s0000644000176200001440000004310312773171034013121 0ustar liggesusersbootcov <- function(fit, cluster, B=200, fitter, coef.reps=TRUE, loglik=FALSE, pr=FALSE, maxit=15, eps=.0001, group=NULL, stat=NULL) { coxcph <- inherits(fit,'coxph') || inherits(fit,'cph') nfit <- class(fit)[1] if(length(fit$weights) && (coxcph || nfit[1] == 'Rq')) stop('does not handle weights') if(!length(X <- fit$x) | !length(Y <- fit$y)) stop("you did not specify x=TRUE and y=TRUE in the fit") sc.pres <- match('scale',names(fit),0) > 0 ns <- fit$non.slopes if(nfit == 'psm') { fixed <- fit$fixed #psm only fixed <- if(length(fixed) == 1 && is.logical(fixed) && !fixed) list() else list(scale=TRUE) fixed <- NULL dist <- fit$dist parms <- fit$parms } if(nfit %in% c('Glm','orm')) fitFamily <- fit$family ## For orm fits, find y cutoff target for intercept (from median of ## original sample) ytarget <- if(nfit == 'orm') with(fit, ifelse(is.numeric(yunique), yunique[interceptRef + 1L], interceptRef + 1L)) ## See if ordinal regression being done ordinal <- nfit == 'orm' || (nfit == 'lrm' && length(unique(Y)) > 2) penalty.matrix <- fit$penalty.matrix if(missing(fitter)) { fitter <- switch(nfit, ols=if(length(penalty.matrix)) { function(x, y, penalty.matrix,...) { lm.pfit(cbind(Intercept=1., x), y, penalty.matrix=penalty.matrix, tol=1e-11, regcoef.only=TRUE) } } else function(x, y, ...) { lm.fit.qr.bare(x, y, tolerance=1e-11, intercept=TRUE) }, lrm=function(x, y, maxit=15, eps=.0001, penalty.matrix,...) { lrm.fit(x, y, maxit=maxit, tol=1E-11, eps=eps, penalty.matrix=penalty.matrix) }, cph=function(x, y, strata=NULL, maxit=15, eps=.0001,...) { coxphFit(x, y, strata=strata, iter.max=maxit, eps=eps, method="efron", toler.chol=1e-11, type='right') }, psm=function(x, y, maxit=15,...) { survreg.fit2(x, y, dist=dist, parms=parms, fixed=fixed, offset=NULL, init=NULL, maxiter=maxit) }, bj=function(x, y, maxit=15, eps=.0001, ...) { bj.fit(x, y, control=list(iter.max=maxit, eps=eps)) }, Glm=function(x, y, ...) { glm.fit(cbind(1., x), as.vector(y), family=fitFamily) }, Rq=RqFit(fit, wallow=FALSE), orm=function(x, y, maxit=14L, eps=.005, tol=1e-7, ytarget=NULL, ...) { f <- orm.fit(x, y, family=fitFamily, maxit=maxit, eps=eps, tol=tol) ns <- f$non.slopes cof <- f$coefficients if(length(ytarget)) { ## Y values corresponding to intercepts yu <- f$yunique[-1] ## Linearly interpolate to return an intercept aimed ## at Y >= ytarget intcept <- approx(yu, cof[1:ns], xout=ytarget)$y ## if(min(abs(intcept - cof[1:ns])) > 1e-9) cat('****') intattr <- approx(yu, 1:ns, xout=ytarget)$y } else { k <- f$interceptRef intattr <- k intcept <- cof[k] } names(intcept) <- 'Intercept' cof <- c(intcept, cof[(ns + 1) : length(cof)]) attr(cof, 'intercepts') <- intattr f$coefficients <- cof f }) } if(!length(fitter)) stop("fitter not valid") if(loglik) { oosl <- switch(nfit, ols=oos.loglik.ols, lrm=oos.loglik.lrm, cph=oos.loglik.cph, psm=oos.loglik.psm, Glm=oos.loglik.Glm) if(!length(oosl)) stop('loglik=TRUE but no oos.loglik method for model in rmsMisc') Loglik <- double(B+1) Loglik[B+1] <- oosl(fit) } else Loglik <- NULL n <- nrow(X) cof <- fit$coefficients if(nfit == 'orm') { iref <- fit$interceptRef cof <- cof[c(iref, (ns + 1L) : length(cof))] } p <- length(cof) vname <- names(cof) if(sc.pres) { p <- p + 1L vname <- c(vname, "log scale") } ## Function to carry non-NA values backwards and replace NAs at the ## right end with zeros. This will cause cell proportions for unsampled ## Y values to be zero for the purpose of computing means ## The zero placements will mess up bootstrap covariance matrix however fillInTheBlanks <- function(S) { ## http://stackoverflow.com/questions/1782704/propagating-data-within-a-vector/1783275#1783275 ## NA in S are replaced with observed values ## accepts a vector possibly holding NA values and returns a vector ## where all observed values are carried forward and the first is ## also carried backward. cfr na.locf from zoo library. L <- !is.na(S) c(S[L][1L], S[L])[cumsum(L) + 1L] } ## vn = names of full coefficient vector ## ns = # non-slopes (intercepts) in full vector (target) ## nc = # non-slopes for current fit in cof fill <- function(cof, vn, ns) { p <- length(vn) if(length(cof) == p) return(cof) nc <- ns - (p - length(cof)) cints <- cof[1L : nc] ## current intercepts ints <- rep(NA, ns) names(ints) <- vn[1L : ns] ints[names(cints)] <- cints ## Set not just last intercept to -Inf if missing but set all ## NA intercepts at the right end to -Inf. This will later lead to ## cell probabilities of zero for bootstrap-omitted levels of Y if(is.na(ints[ns])) { l <- ns if(ns > 1L) { for(j in (ns - 1L) : 1L) { if(!is.na(ints[j])) break l <- j } } ints[l : ns] <- -Inf ## probability zero of exceeding unobserved high Y } #### CHANGE TO FILL IN ONLY INTERCEPTS c(rev(fillInTheBlanks(rev(ints))), cof[-(1L : nc)]) } bar <- rep(0, p) cov <- matrix(0, nrow=p, ncol=p, dimnames=list(vname,vname)) if(coef.reps) coefs <- matrix(NA, nrow=B, ncol=p, dimnames=list(NULL,vname)) if(length(stat)) stats <- numeric(B) Y <- as.matrix(if(is.factor(Y)) unclass(Y) else Y) ny <- ncol(Y) Strata <- fit$strata nac <- fit$na.action if(length(group)) { if(length(group) > n) { ## Missing observations were deleted during fit if(length(nac)) { j <- !is.na(naresid(nac, Y) %*% rep(1,ny)) group <- group[j] } } if(length(group) != n) stop('length of group does not match # rows used in fit') group.inds <- split(1:n, group) ngroup <- length(group.inds) } else ngroup <- 0 anyinf <- FALSE if(!exists('.Random.seed')) runif(1) seed <- .Random.seed if(missing(cluster)) { clusterInfo <- NULL nc <- n b <- 0 pb <- setPb(B, type='Boot', onlytk=!pr, every=20) for(i in 1:B) { pb(i) if(ngroup) { j <- integer(n) for(si in 1L : ngroup) { gi <- group.inds[[si]] j[gi] <- sample(gi, length(gi), replace=TRUE) } } else j <- sample(1L : n, n, replace=TRUE) ## Note: If Strata is NULL, NULL[j] is still NULL f <- tryCatch(fitter(X[j,,drop=FALSE], Y[j,,drop=FALSE], maxit=maxit, eps=eps, ytarget=ytarget, penalty.matrix=penalty.matrix, strata=Strata[j]), error=function(...) list(fail=TRUE)) if(length(f$fail) && f$fail) next cof <- f$coefficients if(any(is.na(cof))) next # glm b <- b + 1L if(sc.pres) cof <- c(cof, 'log scale' = log(f$scale)) ## Index by names used since some intercepts may be missing in a ## bootstrap resample from an ordinal logistic model ## Missing coefficients represent values of Y not appearing in the ## bootstrap sample. Carry backwards the next non-NA intercept if(ordinal) cof <- fill(cof, vname, ns) if(any(is.infinite(cof))) anyinf <- TRUE if(coef.reps) coefs[b,] <- cof if(length(stat)) stats[b] <- f$stats[stat] bar <- bar + cof cof <- as.matrix(cof) cov <- cov + cof %*% t(cof) if(loglik) Loglik[b] <- oosl(f, matxv(X,cof), Y) } } else { clusterInfo <- list(name=deparse(substitute(cluster))) if(length(cluster) > n) { ## Missing obs were deleted during fit if(length(nac)) { j <- !is.na(naresid(nac, Y) %*% rep(1,ny)) cluster <- cluster[j] } } if(length(cluster) != n) stop("length of cluster does not match # rows used in fit") if(any(is.na(cluster))) stop("cluster contains NAs") cluster <- as.character(cluster) clusters <- unique(cluster) nc <- length(clusters) Obsno <- split(1:n, cluster) b <- 0 pb <- setPb(B, type='Boot', onlytk=!pr, every=20) for(i in 1L : B) { pb(i) ## Begin addition Bill Pikounis if(ngroup) { j <- integer(0L) for(si in 1L : ngroup) { gi <- group.inds[[si]] cluster.gi <- cluster[gi] clusters.gi <- unique(cluster.gi) nc.gi <- length(clusters.gi) Obsno.gci <- split(gi, cluster.gi) j.gci <- sample(clusters.gi, nc.gi, replace = TRUE) obs.gci <- unlist(Obsno.gci[j.gci]) j <- c(j, obs.gci) } obs <- j } else { ## End addition Bill Pikounis (except for closing brace below) j <- sample(clusters, nc, replace=TRUE) obs <- unlist(Obsno[j]) } f <- tryCatch(fitter(X[obs,,drop=FALSE], Y[obs,,drop=FALSE], maxit=maxit, eps=eps, ytarget=ytarget, penalty.matrix=penalty.matrix, strata=Strata[obs]), error=function(...) list(fail=TRUE)) if(length(f$fail) && f$fail) next cof <- f$coefficients if(any(is.na(cof))) next # glm b <- b + 1L if(sc.pres) cof <- c(cof, 'log scale' = log(f$scale)) cof <- fill(cof, vname, ns) if(any(is.infinite(cof))) anyinf <- TRUE if(coef.reps) coefs[b,] <- cof if(length(stat)) stats[b] <- f$stats[stat] bar <- bar + cof cof <- as.matrix(cof) cov <- cov + cof %*% t(cof) if(loglik) Loglik[b] <- oosl(f, matxv(X,cof), Y) } } if(b < B) { warning(paste('fit failure in',B-b, 'resamples. Might try increasing maxit')) if(coef.reps) coefs <- coefs[1L : b,,drop=FALSE] Loglik <- Loglik[1L : b] } if(nfit == 'orm') attr(coefs, 'intercepts') <- iref if(anyinf) warning('at least one resample excluded highest Y values, invalidating bootstrap covariance matrix estimate') bar <- bar / b fit$B <- b fit$seed <- seed names(bar) <- vname fit$boot.coef <- bar if(coef.reps) fit$boot.Coef <- coefs bar <- as.matrix(bar) cov <- (cov - b * bar %*% t(bar)) / (b - 1L) fit$orig.var <- fit$var if(nfit == 'orm') attr(cov, 'intercepts') <- iref fit$var <- cov fit$boot.loglik <- Loglik if(length(stat)) fit$boot.stats <- stats if(nfit == 'Rq') { newse <- sqrt(diag(cov)) newt <- fit$summary[, 1L]/newse newp <- 2. * (1. - pt(abs(newt), fit$stats['n'] - fit$stats['p'])) fit$summary[, 2L : 4L] <- cbind(newse, newt, newp) } if(length(clusterInfo)) clusterInfo$n <- nc fit$clusterInfo <- clusterInfo fit } bootplot <- function(obj, which=1 : ncol(Coef), X, conf.int=c(.9,.95,.99), what=c('density', 'qqnorm', 'box'), fun=function(x) x, labels., ...) { what <- match.arg(what) Coef <- obj$boot.Coef if(length(Coef) == 0) stop('did not specify "coef.reps=TRUE" to bootcov') Coef <- Coef[, which, drop=FALSE] if(! missing(X)) { if(! is.matrix(X)) X <- matrix(X, nrow=1) qoi <- matxv(X, Coef, bmat=TRUE) # X %*% t(Coef) ##nxp pxB = nxB if(missing(labels.)) { labels. <- dimnames(X)[[1]] if(length(labels.) == 0) { labels. <- as.character(1:nrow(X)) } } } else { qoi <- t(Coef) nns <- num.intercepts(obj) if(missing(labels.)) { labels. <- paste(ifelse(which > nns, 'Coefficient of ', ''), dimnames(Coef)[[2]], sep='') } } nq <- nrow(qoi) qoi <- fun(qoi) quan <- NULL if(what == 'box') { Co <- as.vector(Coef) predictor <- rep(colnames(Coef), each=nrow(Coef)) p <- ggplot(data.frame(predictor, Co), aes(x=predictor, y=Co)) + xlab('Predictor') + ylab('Coefficient') + geom_boxplot() + facet_wrap(~ predictor, scales='free') return(p) } else if(what == 'density') { probs <- (1 + conf.int) / 2 probs <- c(1 - probs, probs) quan <- matrix(NA, nrow=nq, ncol=2 * length(conf.int), dimnames=list(labels., format(probs))) for(j in 1 : nq) { histdensity(qoi[j,], xlab=labels.[j], ...) quan[j,] <- quantile(qoi[j,], probs, na.rm=TRUE) abline(v=quan[j,], lty=2) title(sub=paste('Fraction of effects >', fun(0), ' = ', format(mean(qoi[j,] > fun(0))),sep=''), adj=0) } } else { for(j in 1 : nq) { qqnorm(qoi[j,], ylab=labels.[j]) qqline(qoi[j,]) } } invisible(list(qoi=drop(qoi), quantiles=drop(quan))) } ## histdensity runs hist() and density(), using twice the number of ## class than the default for hist, and 1.5 times the width than the default ## for density histdensity <- function(y, xlab, nclass, width, mult.width=1, ...) { y <- y[is.finite(y)] if(missing(xlab)) { xlab <- label(y) if(xlab == '') xlab <- as.character(sys.call())[-1] } if(missing(nclass)) nclass <- (logb(length(y),base=2)+1)*2 hist(y, nclass=nclass, xlab=xlab, probability=TRUE, ...) if(missing(width)) { nbar <- logb(length(y), base = 2) + 1 width <- diff(range(y))/nbar*.75*mult.width } lines(density(y,width=width,n=200)) invisible() } confplot <- function(obj, X, against, method=c('simultaneous', 'pointwise'), conf.int=0.95, fun=function(x) x, add=FALSE, lty.conf=2, ...) { method <- match.arg(method) if(length(conf.int)>1) stop('may not specify more than one conf.int value') boot.Coef <- obj$boot.Coef if(length(boot.Coef) == 0) stop('did not specify "coef.reps=TRUE" to bootcov') if(!is.matrix(X)) X <- matrix(X, nrow=1) fitted <- fun(matxv(X, obj$coefficients)) if(method == 'pointwise') { pred <- matxv(X, boot.Coef, bmat=TRUE) ## n x B p <- fun(apply(pred, 1, quantile, probs=c((1 - conf.int)/2, 1 - (1 - conf.int)/2), na.rm=TRUE)) lower <- p[1,] upper <- p[2,] } else { boot.Coef <- rbind(boot.Coef, obj$coefficients) loglik <- obj$boot.loglik if(length(loglik) == 0) stop('did not specify "loglik=TRUE" to bootcov') crit <- quantile(loglik, conf.int, na.rm=TRUE) qual <- loglik <= crit boot.Coef <- boot.Coef[qual,,drop=FALSE] pred <- matxv(X, boot.Coef, bmat=TRUE) ## n x B upper <- fun(apply(pred, 1, max)) lower <- fun(apply(pred, 1, min)) pred <- fun(pred) } if(!missing(against)) { lab <- label(against) if(lab == '') lab <- (as.character(sys.call())[-1])[3] if(add) lines(against, fitted, ...) else plot(against, fitted, xlab=lab, type='l', ...) lines(against, lower, lty=lty.conf) lines(against, upper, lty=lty.conf) } if(missing(against)) list(fitted=fitted, upper=upper, lower=lower) else invisible(list(fitted=fitted, upper=upper, lower=lower)) } # Construct object suitable for boot:boot.ci # Use boot package to get BCa confidence limits for a linear combination of # model coefficients, e.g. bootcov results boot.Coef # If boot.ci fails return only ordinary percentile CLs bootBCa <- function(estimate, estimates, type=c('percentile','bca','basic'), n, seed, conf.int=0.95) { type <- match.arg(type) if(type != 'percentile' && ! requireNamespace('boot', quietly = TRUE)) stop('boot package not installed') estimate <- as.vector(estimate) ne <- length(estimate) if(!is.matrix(estimates)) estimates <- as.matrix(estimates) if(ncol(estimates) != ne) stop('no. columns in estimates != length of estimate') if(type == 'percentile') { a <- apply(estimates, 2, quantile, probs=c((1-conf.int)/2, 1-(1-conf.int)/2), na.rm=TRUE) if(ne == 1) a <- as.vector(a) return(a) } lim <- matrix(NA, nrow=2, ncol=ne, dimnames=list(c('Lower','Upper'),NULL)) R <- nrow(estimates) for(i in 1:ne) { w <- list(sim= 'ordinary', stype = 'i', t0 = estimate[i], t = estimates[,i,drop=FALSE], R = R, data = 1:n, strata = rep(1, n), weights = rep(1/n, n), seed = seed, statistic = function(...) 1e10, call = match.call()) cl <- try(boot::boot.ci(w, type=type, conf=conf.int), silent=TRUE) if(inherits(cl, 'try-error')) { cl <- c(NA,NA) warning('could not obtain bootstrap confidence interval') } else { cl <- if(type == 'bca') cl$bca else cl$basic m <- length(cl) cl <- cl[c(m - 1, m)] } lim[,i] <- cl } if(ne == 1) as.vector(lim) else lim } rms/R/psm.s0000644000176200001440000003125013704356626012254 0ustar liggesuserspsm <- function(formula, data=environment(formula), weights, subset, na.action=na.delete, dist='weibull', init=NULL, scale=0, control=survreg.control(), parms=NULL, model=FALSE, x=FALSE, y=TRUE, time.inc, ...) { call <- match.call() if(dist == 'extreme') warning('Unlike earlier versions of survreg, dist="extreme" does not fit\na Weibull distribution as it uses an identity link. To fit the Weibull\ndistribution use the default for dist or specify dist="weibull".') ## Start FEH callenv <- parent.frame() # don't delay these evaluations weights <- if(! missing(weights)) eval(substitute(weights), data, callenv) subset <- if(! missing(subset )) eval(substitute(subset), data, callenv) m <- modelData(data, formula, subset = subset, weights = weights, na.action=na.action, callenv=callenv) m <- Design(m, formula=formula, specials=c('strata', 'cluster')) atrx <- attributes(m) sformula <- atrx$sformula nact <- atrx$na.action Terms <- atrx$terms atr <- atrx$Design ## End FEH weights <- model.extract(m, 'weights') Y <- model.extract(m, "response") Ysave <- Y ## Start FEH atY <- attributes(Y) ncy <- ncol(Y) maxtime <- max(Y[, - ncy]) nnn <- c(nrow(Y), sum(Y[, ncy])) time.units <- units(Y) if(!length(time.units) || time.units == '') time.units <- "Day" if(missing(time.inc)) { time.inc <- switch(time.units, Day=30, Month=1, Year=1, maxtime / 10) if(time.inc >= maxtime | maxtime / time.inc > 25) time.inc <- max(pretty(c(0, maxtime))) / 10 } ## End FEH if (!inherits(Y, "Surv")) stop("Response must be a survival object") strats <- attr(Terms, "specials")$strata cluster<- attr(Terms, "specials")$cluster dropx <- NULL if (length(cluster)) { if (missing(robust)) robust <- TRUE tempc <- untangle.specials(Terms, 'cluster', 1 : 10) ord <- attr(Terms, 'order')[tempc$terms] if (any(ord > 1)) stop ("Cluster can not be used in an interaction") cluster <- strata(m[, tempc$vars], shortlabel=TRUE) #allow multiples dropx <- tempc$terms } if (length(strats)) { temp <- untangle.specials(Terms, 'strata', 1) dropx <- c(dropx, temp$terms) if (length(temp$vars) == 1) strata.keep <- m[[temp$vars]] else strata.keep <- strata(m[, temp$vars], shortlabel=TRUE) strata <- as.numeric(strata.keep) nstrata <- max(strata) } else { nstrata <- 1 strata <- 0 } if (length(dropx)) newTerms <- Terms[-dropx] else newTerms <- Terms X <- model.matrix(newTerms,m) ## Start FEH rnam <- dimnames(Y)[[1]] dimnames(X) <- list(rnam, c("(Intercept)", atr$colnames)) ## End FEH except for 23nov02 and later changes n <- nrow(X) nvar <- ncol(X) offset <- atrx$offset if(!length(offset)) offset <- rep(0, n) if (is.character(dist)) { dlist <- survreg.distributions[[dist]] if (is.null(dlist)) stop(paste(dist, ": distribution not found")) } else if (is.list(dist)) dlist <- dist else stop("Invalid distribution object") if (!length(dlist$dist)) { ## WHAT IS THIS? if (is.character(dlist$name) && is.function(dlist$init) && is.function(dlist$deviance)) {} else stop("Invalid distribution object") } else { if (!is.character(dlist$name) || !is.character(dlist$dist) || !is.function(dlist$trans) || !is.function(dlist$dtrans)) stop("Invalid distribution object") } type <- attr(Y, "type") if (type == 'counting') stop ("Invalid survival type") logcorrect <- 0 #correction to the loglik due to transformations if (length(dlist$trans)) { tranfun <- dlist$trans exactsurv <- Y[,ncol(Y)] == 1 if (any(exactsurv)) logcorrect <- ifelse(length(weights), sum(weights[exactsurv]*logb(dlist$dtrans(Y[exactsurv, 1]))), sum(logb(dlist$dtrans(Y[exactsurv, 1])))) if (type == 'interval') { if (any(Y[,3] == 3)) Y <- cbind(tranfun(Y[,1:2]), Y[,3]) else Y <- cbind(tranfun(Y[,1]), Y[,3]) } else { if (type == 'left') Y <- cbind(tranfun(Y[, 1]), 2 - Y[, 2]) else Y <- cbind(tranfun(Y[, 1]), Y[, 2]) } if (!all(is.finite(Y))) stop("Invalid survival times for this distribution") } else { if (type == 'left') Y[, 2] <- 2- Y[, 2] else if (type == 'interval' && all(Y[, 3] < 3)) Y <- Y[, c(1, 3)] } ## if (!length(dlist$itrans)) itrans <- function(x) x ## else ## itrans <- dlist$itrans if (length(dlist$scale)) { if (!missing(scale)) warning(paste(dlist$name, "has a fixed scale, user specified value ignored")) scale <- dlist$scale } if (length(dlist$dist)) dlist <- survreg.distributions[[dlist$dist]] if (missing(control)) control <- survreg.control(...) if (scale < 0) stop("Invalid scale value") if (scale >0 && nstrata >1) stop("Cannot have multiple strata with a fixed scale") ## Check for penalized terms pterms <- sapply(m, inherits, 'coxph.penalty') if (any(pterms)) { pattr <- lapply(m[pterms], attributes) ## ## the 'order' attribute has the same components as 'term.labels' ## pterms always has 1 more (response), sometimes 2 (offset) ## drop the extra parts from pterms temp <- c(attr(Terms, 'response'), attr(Terms, 'offset')) if (length(dropx)) temp <- c(temp, dropx+1) pterms <- pterms[-temp] temp <- match((names(pterms))[pterms], attr(Terms, 'term.labels')) ord <- attr(Terms, 'order')[temp] if (any(ord > 1)) stop ('Penalty terms cannot be in an interaction') ##pcols <- (attr(X, 'assign')[-1])[pterms] assign <- attrassign(X,newTerms) pcols <- assign[-1][pterms] fit <- survpenal.fit(X, Y, weights, offset, init=init, controlvals = control, dist= dlist, scale=scale, strata=strata, nstrat=nstrata, pcols, pattr,assign, parms=parms) } else fit <- survreg.fit(X, Y, weights, offset, init=init, controlvals=control, dist= dlist, scale=scale, nstrat=nstrata, strata, parms=parms) if (is.character(fit)) fit <- list(fail=fit) #error message else { if (scale == 0) { nvar <- length(fit$coef) - nstrata fit$scale <- exp(fit$coef[-(1:nvar)]) if (nstrata == 1) names(fit$scale) <- NULL else names(fit$scale) <- levels(strata.keep) fit$coefficients <- fit$coefficients[1:nvar] fit$idf <- 1 + nstrata } else { fit$scale <- scale fit$idf <- 1 } fit$loglik <- fit$loglik + logcorrect } if(length(nact)) fit$na.action <- nact ## FEH fit$df.residual <- n - sum(fit$df) fit$terms <- Terms fit$formula <- as.vector(attr(Terms, "formula")) fit$means <- apply(X,2, mean) fit$call <- call fit$sformula <- sformula fit$dist <- dist fit$df.resid <- n-sum(fit$df) ##used for anova.survreg if (model) fit$model <- m if (x) fit$x <- X[, -1, drop=FALSE] ##if (y) fit$y <- Y #FEH if (length(parms)) fit$parms <- parms ## Start FEH ##if (any(pterms)) class(fit)<- c('survreg.penal', 'survreg') ##else class(fit) <- 'survreg' fit$assign <- DesignAssign(atr, 1, Terms) fit$formula <- formula if(y) { class(Ysave) <- 'Surv' attr(Ysave, 'type') <- atY$type fit$y <- Ysave } scale.pred <- if(dist %in% c('weibull','exponential','lognormal','loglogistic')) c('log(T)','Survival Time Ratio') else 'T' logtest <- 2 * diff(fit$loglik) Nn <- if(length(weights)) sum(weights) else nnn[1] R2.max <- 1 - exp(2. * fit$loglik[1] / Nn) R2 <- (1 - exp(-logtest/Nn)) / R2.max df <- length(fit$coef) - 1 P <- if(df == 0) NA else 1. - pchisq(logtest, df) gindex <- GiniMd(fit$linear.predictors) Dxy <- if(type %in% c('right', 'left')) dxy.cens(fit$linear.predictors, Y)['Dxy'] else { warning('Dxy not computed since right or left censoring not in effect') NA } stats <- c(nnn, logtest, df, P, R2, Dxy, gindex, exp(gindex)) names(stats) <- c("Obs", "Events", "Model L.R.", "d.f.", "P", "R2", "Dxy", "g", "gr") if(length(weights)) stats <- c(stats, 'Sum of Weights'=sum(weights)) fit <- c(fit, list(stats=stats, weights=weights, maxtime=maxtime, units=time.units, time.inc=time.inc, scale.pred=scale.pred, non.slopes=1, Design=atr, fail=FALSE)) class(fit) <- if (any(pterms)) c('psm','rms','survreg.penal','survreg') else c('psm','rms','survreg') ## End FEH fit } Hazard <- function(object, ...) UseMethod("Hazard") Survival <- function(object, ...) UseMethod("Survival") Hazard.psm <- function(object, ...) { dist <- object$dist g <- survreg.auxinfo[[dist]]$hazard formals(g) <- list(times=NA, lp=NULL, parms=logb(object$scale)) g } Survival.psm <- function(object, ...) { dist <- object$dist g <- survreg.auxinfo[[dist]]$survival formals(g) <- list(times=NULL, lp=NULL, parms=logb(object$scale)) g } Quantile.psm <- function(object, ...) { dist <- object$dist g <- survreg.auxinfo[[dist]]$Quantile formals(g) <- list(q=.5, lp=NULL, parms=logb(object$scale)) g } Mean.psm <- function(object, ...) { dist <- object$dist g <- survreg.auxinfo[[dist]]$mean formals(g) <- list(lp=NULL, parms=logb(object$scale)) g } predict.psm <- function(object, newdata, type=c("lp","x","data.frame","terms","cterms","ccterms","adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", ...) { type <- match.arg(type) predictrms(object, newdata, type, se.fit, conf.int, conf.type, kint=1, na.action=na.action, expand.na=expand.na, center.terms=center.terms, ...) } residuals.psm <- function(object, type=c("censored.normalized", "response", "deviance","dfbeta","dfbetas", "working","ldcase","ldresp","ldshape", "matrix", "score"), ...) { type <- match.arg(type) if(type != 'censored.normalized') { r <- getS3method('residuals', 'survreg') s <- if(type == 'score') { X <- cbind('(Intercept)'=1, object$x) if(! length(X)) stop('did not use x=T with fit') wts <- object$weights if(! length(wts)) wts <- 1 res <- r(object, type='matrix') s <- as.vector(res[, 'dg']) * wts * X if(NROW(object$var) > length(coef(object))) s <- cbind(s, 'Log(scale)'=unname(res[,'ds'])) } else r(object, type=type) return(s) } y <- object$y aty <- attributes(y) if(length(y) == 0) stop('did not use y=T with fit') ncy <- ncol(y) scale <- object$scale dist <- object$dist trans <- survreg.distributions[[dist]]$trans r <- (trans(y[, -ncy, drop=FALSE]) - object$linear.predictors) / scale label(r) <- 'Normalized Residual' ev <- y[, ncy] lab <- aty$inputAttributes$event$label if(length(lab)) label(ev) <- lab ## Moved the following line here from bottom r <- Surv(r, ev) if(length(object$na.action)) r <- naresid(object$na.action, r) attr(r,'dist') <- dist attr(r,'type') <- aty$type class(r) <- c('residuals.psm.censored.normalized','Surv') g <- survreg.auxinfo[[dist]]$survival formals(g) <- list(times=NULL, lp=0, parms=0) attr(r,'theoretical') <- g r } lines.residuals.psm.censored.normalized <- function(x, n=100, lty=1, xlim=range(r[,-ncol(r)],na.rm=TRUE), lwd=3, ...) { r <- x x <- seq(xlim[1], xlim[2], length=n) tx <- x dist <- attr(r, 'dist') if(dist %in% c('weibull','loglogistic','lognormal')) tx <- exp(x) ## $survival functions log x lines(x, attr(r,'theoretical')(tx), lwd=lwd, lty=lty, ...) invisible() } survplot.residuals.psm.censored.normalized <- function(fit, x, g=4, col, main, ...) { r <- fit if(missing(x)) { survplot(npsurv(r ~ 1), conf='none', xlab='Residual', col=if(missing(col))par('col') else col, ...) if(!missing(main)) title(main) } else { if(is.character(x)) x <- as.factor(x) if(!is.factor(x) && length(unique(x))>5) x <- cut2(x, g=g) s <- is.na(r[,1]) | is.na(x) if(any(s)) {r <- r[!s,]; x <- x[!s,drop=TRUE]} survplot(npsurv(r ~ x, data=data.frame(x,r)), xlab='Residual', conf='none', col=if(missing(col))1:length(levels(x)) else par('col'), ...) if(missing(main)) { main <- if(length(lab <- attr(x,'label'))) lab else '' } if(main != '') title(main) } lines(r, lty=1, lwd=3) invisible() } rms/R/plot.nomogram.s0000644000176200001440000003103312761333163014241 0ustar liggesusersplot.nomogram <- function(x, lplabel="Linear Predictor", fun.side, col.conf=c(1, .3), conf.space=c(.08,.2), label.every=1, force.label=FALSE, xfrac=.35, cex.axis=.85, cex.var=1, col.grid=NULL, varname.label=TRUE, varname.label.sep="=", ia.space=.7, tck=NA, tcl=-0.25, lmgp=.4, naxes, points.label='Points', total.points.label='Total Points', total.sep.page=FALSE, total.fun, cap.labels=FALSE, ...) { set <- x info <- attr(set, 'info') fun <- info$fun fun.at <- info$fun.at nfun <- length(fun) funlabel <- info$funlabel fun.at <- info$fun.at fun.lp.at <- info$fun.lp.at R <- info$R sc <- info$sc maxscale <- info$maxscale Intercept <- info$Intercept Abbrev <- info$Abbrev conf.int <- info$conf.int lp <- info$lp lp.at <- info$lp.at su <- info$space.used nint <- info$nint discrete <- info$discrete minlength <- info$minlength col.conf <- rep(col.conf, length=length(conf.int)) space.used <- su[1] + ia.space * su[2] oldpar <- oPar() # in Hmisc Misc.s mgp <- oldpar$mgp mar <- oldpar$mar par(mgp=c(mgp[1], lmgp, mgp[3]), mar=c(mar[1], 1.1, mar[3], mar[4])) on.exit(setParNro(oldpar)) ## was par(oldpar) 11Apr02 tck2 <- tck / 2 tcl2 <- tcl / 2 tck3 <- tck / 3 tcl3 <- tcl / 3 se <- FALSE if(any(conf.int > 0)) { se <- TRUE zcrit <- qnorm((conf.int+1)/2) bar <- function(x, y, zcrit, se, col.conf, nlev=4) { y <- rep(seq(y[1], y[2], length=nlev), length.out=length(x)) for(j in 1:length(x)) { xj <- x[j]; yj <- y[j] W <- c(0,zcrit) * se[j] for(i in 1:length(zcrit)) { segments(xj - W[i + 1], yj, xj - W[i], yj, col=col.conf[i], lwd=1) segments(xj + W[i + 1], yj, xj + W[i], yj, col=col.conf[i], lwd=1) } } } } if(!missing(fun.side)) { if(!is.list(fun.side)) fun.side <- rep(list(fun.side),nfun) if(any(!(unlist(fun.side) %in% c(1,3)))) stop('fun.side must contain only the numbers 1 and 3') } num.lines <- 0 entities <- 0 ### start <- len <- NULL ### end <- 0 ## Determine how wide the labels can be xl <- -xfrac * maxscale if(missing(naxes)) naxes <- if(total.sep.page) max(space.used + 1, nfun + lp + 1) else space.used + 1 + nfun + lp + 1 Format <- function(x) { # like format but does individually f <- character(l <- length(x)) for(i in 1:l) f[i] <- format(x[i]) f } newpage <- function(naxes, xl, maxscale, cex.var, nint, space.used, col.grid, cex.axis, tck, tck2, tcl, tcl2, label.every, force.label, points=TRUE, points.label='Points', usr) { y <- naxes - 1 plot(0, 0, xlim=c(xl, maxscale), ylim=c(0, y), type="n",axes=FALSE, xlab="", ylab="") if(!missing(usr)) par(usr=usr) if(!points) return(y + 1) ax <- c(0,maxscale) text(xl, y, points.label, adj=0, cex=cex.var) x <- pretty(ax, n=nint) dif <- x[2] - x[1] x2 <- seq((x[1] + x[2]) / 2, max(x), by=dif) x2 <- sort(c(x2 - dif / 4, x2, x2 + dif / 4)) if(length(col.grid)) { segments(x , y, x, y - space.used, col=col.grid[1], lwd=1) segments(x2, y,x2, y - space.used, col=col.grid[-1], lwd=1) } axisf(3, at=x, pos=y, cex=cex.axis, tck=tck, tcl=tcl, label.every=label.every, force.label=force.label, padj=0) axisf(3, at=x2, labels=FALSE, pos=y, tck=tck2, tcl=tcl2, cex=cex.axis) y } y <- newpage(naxes, xl, maxscale, cex.var, nint, space.used, col.grid, cex.axis, tck, tck2, tcl, tcl2, label.every=label.every, force.label=force.label, points.label=points.label) i <- 0 ns <- names(set) for(S in set[ns %nin% c('lp', 'total.points', funlabel)]) { i <- i + 1 setinfo <- attr(S, 'info') type <- setinfo$type y <- y - (if(type == "continuation") ia.space else 1) if(y < -.05) { y <- newpage(naxes, xl, maxscale, cex.var, nint, space.used, col.grid, cex.axis, tck, tck2, tcl, tcl2, label.every=label.every,force.label=force.label, points.label=points.label) - (if(type == "continuation") ia.space else 1) } ## word wrap the labels so that they fit into the supplied space. text(xl, y, paste(strgraphwrap(ns[[i]], abs(xl), cex=cex.var), collapse='\n'), adj=0, cex=cex.var) x <- S[[1]] nam <- names(S)[1] #stored with fastest first if(is.character(x) && nam %in% names(Abbrev)) { transvec <- Abbrev[[nam]]$abbrev names(transvec) <- Abbrev[[nam]]$full x <- transvec[x] } fx <- if(is.character(x)) x else sedit(Format(x), " ", "") #axis not like bl - was translate() ### is <- start[i] ### ie <- is+len[i]-1 xt <- S$points ## Find flat pieces and combine their labels r <- rle(xt) if(any(r$length > 1)) { is <- 1 for(j in r$length) { ie <- is + j - 1 if(j > 1) { fx[ie] <- if(discrete[nam] || ie < length(xt)) paste(fx[is], "-", fx[ie],sep="") else paste(fx[is], '+', sep='') fx[is:(ie - 1)] <- "" xt[is:(ie - 1)] <- NA } is <- ie+1 } fx <- fx[!is.na(xt)] xt <- xt[!is.na(xt)] } ## record the side changes side <- c(1,3) ## subtract 0.6 from the side 1 mgp so that the labels are ## equaly seperated from the axis padj <- c(1,0) new.mgp <- vector(mode='list', 2) new.mgp[[2]] <- c(0, lmgp, 0) new.mgp[[1]] <- new.mgp[[2]] - c(0,0.6,0) ## Find direction changes ch <- if(length(xt) > 2) c(FALSE, FALSE, diff(diff(xt) > 0) != 0) else rep(FALSE, length(xt)) if(discrete[nam] && length(xt) > 1) { ## categorical - alternate adjacent levels j <- order(xt) lines(range(xt), rep(y, 2)) # make sure all ticks are connected for(k in 1:2) { is <- j[seq(k, length(j), by=2)] new.labs <- if(cap.labels) capitalize(fx[is]) else fx[is] axisf(side[k], at=xt[is], labels=new.labs, pos=y, cex=cex.axis, tck=tck,tcl=tcl, force.label=force.label || (minlength == 1 && nam %in% names(Abbrev)), disc=TRUE, mgp=new.mgp[[k]], padj=padj[k]) if(se) bar(xt[is], if(k == 1) y - conf.space - .32 else y + conf.space + .32, zcrit, sc * S$se.fit[is], col.conf) } } else if(!any(ch)) { axisf(1, at=xt, labels=fx, pos=y, cex=cex.axis, tck=tck, tcl=tcl, mgp=new.mgp[[1]], label.every=label.every, force.label=force.label, disc=discrete[nam], padj=padj[1]) if(se) bar(xt, y+conf.space, zcrit, sc*S$se.fit, col.conf) } else { lines(range(xt), rep(y, 2)) # make sure all ticks are connected j <- (1 : length(ch))[ch] if(max(j) < length(ch)) j <- c(j, length(ch) + 1) flag <- 1 is <- 1 for(k in j) { ie <- k - 1 axisf(side[flag], at=xt[is:ie], labels=fx[is:ie], pos=y, cex=cex.axis, tck=tck,tcl=tcl, label.every=label.every, force.label=force.label, mgp=new.mgp[[flag]], disc=discrete[nam], padj=padj[flag]) if(se) bar(xt[is:ie], if(side[flag] == 1) y - conf.space - .32 else y + conf.space + .32, zcrit, sc * S$se.fit[is:ie], col.conf) flag <- if(flag == 2) 1 else 2 is <- ie + 1 } } } S <- set$total.points x <- S$x new.max <- max(x) xl.old <- xl xl <- -xfrac*new.max u <- par()$usr if(!missing(total.fun)) total.fun() usr <- c(xl * u[1] / xl.old, new.max * u[2] / maxscale, u[3:4]) par(usr=usr) x.double <- seq(x[1], new.max, by=(x[2] - x[1]) / 5) y <- y - 1 if(y < -.05 || total.sep.page) y <- newpage(naxes, xl, maxscale, cex.var, nint, space.used, col.grid, cex.axis, tck, tck2, tcl, tcl2, label.every=label.every, force.label=force.label, points=FALSE,usr=usr) - 1 text(xl, y, total.points.label, adj=0, cex=cex.var) axisf(1, at=x, pos=y, cex=cex.axis, tck=tck, tcl=tcl, label.every=label.every, force.label=force.label, mgp=c(0, lmgp - 0.6, 0), padj=1) axisf(1, at=x.double, labels=FALSE, pos=y, tck=tck2, tcl=tcl2, cex=cex.axis) if(lp) { S <- set$lp x <- S$x x2 <- seq(lp.at[1], max(lp.at), by=(lp.at[2] - lp.at[1]) / 2) scaled.x2 <- (x2 - Intercept) * sc y <- y - 1 if(y < -.05) y <- newpage(naxes, xl, maxscale, cex.var, nint, space.used, col.grid, cex.axis, tck, tck2, tcl, tcl2, label.every=label.every, force.label=force.label, points=FALSE,usr=usr) - 1 text(xl, y, lplabel, adj=0, cex=cex.var) axisf(1, at=x, labels=Format(lp.at), pos=y, cex=cex.axis, tck=tck, tcl=tcl, label.every=label.every, force.label=force.label, mgp=c(0, lmgp - 0.6, 0), padj=1) axisf(1, at=scaled.x2, labels=FALSE, tck=tck2, tcl=tcl2, pos=y, cex=cex.axis) conf <- S$conf if(length(conf)) bar(conf$x, y + c(conf.space[1], conf.space[1] + conf$w * diff(conf.space)), zcrit, conf$se, col.conf, nlev=conf$nlev) } i <- 0 if(nfun > 0) for(S in set[funlabel]) { i <- i + 1 y <- y - 1 scaled <- S$x fat <- S$fat s <- S$which ### ??? if(y < -.05) y <- newpage(naxes, xl, maxscale, cex.var, nint, space.used, col.grid, cex.axis, tck, tck2, tcl, tcl2, label.every=label.every, force.label=force.label, points=FALSE,usr=usr) - 1 text(xl, y, funlabel[i], adj=0, cex=cex.var) sides <- if(missing(fun.side)) rep(1, length(fat)) else (fun.side[[i]])[s] if(length(sides)!=length(fat)) stop('fun.side vector not same length as fun.at or fun.lp.at') for(jj in 1:length(fat)) axis(sides[jj], at=scaled[jj], labels=fat[jj], pos=y, cex.axis=cex.axis, tck=tck, tcl=tcl, mgp=if(sides[jj] == 1) c(0,lmgp - 0.6, 0) else c(0, lmgp, 0), padj=if(sides[jj] == 1) 1 else 0) lines(range(scaled),rep(y,2)) #make sure all ticks are connected } invisible() } legend.nomabbrev <- function(object, which, x, y=NULL, ncol=3, ...) { abb <- attr(object, 'info')$Abbrev[[which]] if(length(abb) == 0) stop(paste('no abbreviation information for',which)) if(max(nchar(abb$abbrev)) == 1) if(length(y)) legend(x, y, abb$full, ncol=ncol, pch=paste(abb$abbrev,collapse=''), ...) else legend(x, abb$full, ncol=ncol, pch=paste(abb$abbrev,collapse=''), ...) else if(length(y)) legend(x, y, paste(format(abb$abbrev),':',abb$full,sep=''), ncol=ncol, ...) else legend(x, paste(format(abb$abbrev),':',abb$full,sep=''), ncol=ncol, ...) invisible() } ##Version of axis allowing tick mark labels to be forced, or to ##label every 'label.every' tick marks axisf <- function(side, at, labels=TRUE, pos, cex, tck, tcl, label.every=1, force.label=FALSE, disc=FALSE, ...) { ax <- function(..., cex) axis(..., cex.axis=cex) ax(side, at, labels=FALSE, pos=pos, cex=cex, tck=tck, tcl=tcl, ...) if(is.logical(labels) && !labels) return(invisible()) if(label.every > 1 && ! disc) { sq <- seq(along=at, by=label.every) at[-sq] <- NA } if(is.logical(labels)) labels <- format(at, trim=TRUE) if(force.label) { for(i in 1:length(labels)) if(!is.na(at[i])) ax(side, at[i], labels[i], pos=pos, cex=cex, tcl=0, ...) } else ax(side, at[! is.na(at)], labels[! is.na(at)], pos=pos, cex=cex, tcl=0, ...) invisible() } rms/R/survplot.npsurv.s0000644000176200001440000004036713564131232014704 0ustar liggesuserssurvplot.npsurv <- function(fit, xlim, ylim, xlab, ylab, time.inc, state=NULL, conf=c("bands", "bars", "diffbands", "none"), mylim=NULL, add=FALSE, label.curves=TRUE, abbrev.label=FALSE, levels.only=FALSE, lty, lwd=par('lwd'), col=1, col.fill=gray(seq(.95, .75, length=5)), loglog=FALSE, fun, n.risk=FALSE, aehaz=FALSE, times=NULL, logt=FALSE, dots=FALSE, dotsize=.003, grid=NULL, srt.n.risk=0, sep.n.risk=.056, adj.n.risk=1, y.n.risk, cex.n.risk=.6, cex.xlab=par('cex.lab'), cex.ylab=cex.xlab, pr=FALSE, ...) { conf <- match.arg(conf) polyg <- ordGridFun(grid=FALSE)$polygon conf.int <- fit$conf.int if(!length(conf.int) | conf == "none") conf.int <- 0 opar <- par(c('mar', 'xpd')) on.exit(par(opar)) cylim <- function(ylim) if(length(mylim)) c(min(ylim[1], mylim[1]), max(ylim[2], mylim[2])) else ylim fit.orig <- fit units <- fit$units if(!length(units)) units <- "Day" maxtime <- fit$maxtime if(! length(maxtime)) maxtime <- max(fit$time) mintime <- min(fit$time, 0) pret <- pretty(c(mintime, maxtime)) maxtime <- max(pret) mintime <- min(pret) if(missing(time.inc)) { time.inc <- switch(units, Day=30, Month=1, Year=1, (maxtime - mintime) / 10) if(time.inc > maxtime) time.inc <- (maxtime - mintime) / 10 } if(n.risk && ! length(fit$n.risk)) { n.risk <- FALSE warning("fit does not have number at risk\nIs probably from a parametric model\nn.risk set to FALSE") } mstate <- inherits(fit, 'survfitms') if(mstate) { ## Multi-state model for competing risks if(missing(fun)) fun <- function(y) 1 - y if(missing(state)) stop('state must be given when response is a multi-state/competing risk object from Surv()') if(length(state) != 1) stop('at present state can only be a single state') states <- fit$states if(state %nin% states) stop(paste('state is not in', paste(states, collapse=', '))) } trans <- loglog || mstate || ! missing(fun) if(missing(ylab)) ylab <- if(loglog) "log(-log Survival Probability)" else if(mstate) paste('Cumulative Incidence of', upFirst(state)) else if(trans) "" else "Survival Probability" if(loglog) fun <- function(y) logb(-logb(ifelse(y == 0 | y == 1, NA, y))) else if(! trans) fun <- function(y) y if(missing(xlab)) xlab <- if(logt) paste("log Follow-up Time in ", units, "s", sep="") else labelPlotmath('Follow-up Time', paste(fit$units, 's', sep='')) ## else labelPlotmath(fit$time.label, fit$units) if(missing(xlim)) xlim <- if(logt) logb(c(maxtime / 100, maxtime)) else c(mintime, maxtime) convert <- if(mstate) { istate <- match(state, states) conv <- function(f, istate) { f$surv <- 1 - f$pstate [, istate] f$lower <- 1 - f$lower [, istate] f$upper <- 1 - f$upper [, istate] f$std.err <- f$std.err[, istate] icens <- which(states == '(s0)') if(! length(icens)) stop('Program logic error: did not find (s0) column with competing risks') f$n.risk <- f$n.risk[, icens] if(all(f$n.risk == 0)) stop('program logic error: all n.risk are zero') f } formals(conv) <- list(f=NULL, istate=istate) conv } else function(f) f fit <- convert(fit) origsurv <- fit$surv if(trans) { fit$surv <- fun(fit$surv) fit$surv[is.infinite(fit$surv)] <- NA ## handle e.g. logit(1) - Inf would mess up ylim in plot() if(conf.int > 0) { fit$lower <- fun(fit$lower) fit$upper <- fun(fit$upper) fit$lower[is.infinite(fit$lower)] <- NA fit$upper[is.infinite(fit$upper)] <- NA if(missing(ylim)) ylim <- cylim(range(c(fit$lower, fit$upper), na.rm=TRUE)) } else if(missing(ylim)) ylim <- cylim(range(fit$surv, na.rm=TRUE)) } else if(missing(ylim)) ylim <- c(0, 1) if(length(grid)) { dots <- FALSE if(is.logical(grid)) grid <- if(grid) gray(.8) else NULL } if(logt | trans) { dots <- FALSE; grid <- NULL } olev <- slev <- names(fit$strata) if(levels.only) slev <- gsub('.*=', '', slev) sleva <- if(abbrev.label) abbreviate(slev) else slev ns <- length(slev) slevp <- ns > 0 labelc <- is.list(label.curves) || label.curves if(!slevp) labelc <- FALSE ns <- max(ns, 1) y <- 1 : ns stemp <- if(ns == 1) rep(1, length(fit$time)) else rep(1:ns, fit$strata) if(n.risk | (conf.int > 0 & conf == "bars")) { stime <- seq(mintime, maxtime, time.inc) v <- convert(summary(fit.orig, times=stime, print.it=FALSE)) v$surv <- fun(v$surv) v$lower <- fun(v$lower) v$upper <- fun(v$upper) vs <- if(ns > 1) as.character(v$strata) ## survival:::summary.survfit was not preserving order of strata levels } xd <- xlim[2] - xlim[1] yd <- ylim[2] - ylim[1] if(n.risk && !add) { mar <- opar$mar if(mar[4] < 4) {mar[4] <- mar[4] + 2; par(mar=mar)} } ## One curve for each value of y, excl style used for C.L. lty <- if(missing(lty)) seq(ns + 1)[-2] else rep(lty, length=ns) lwd <- rep(lwd, length=ns) col <- rep(col, length=ns) if(conf == 'diffbands' && ns < 2) conf <- 'bands' if(labelc || conf %in% c('bands', 'diffbands')) curves <- vector('list', ns) Tim <- Srv <- list() par(xpd=NA) nevents <- totaltime <- numeric(ns) cuminc <- character(ns) for(i in 1 : ns) { st <- stemp == i time <- fit$time[st] surv <- fit$surv[st] osurv <- origsurv[st] ## nevents[i] <- sum(fit$n.event[st]) ## nrsk <- fit$n.risk[st] ## neachtime <- c(- diff(nrsk), min(nrsk)) ## totaltime[i] <- sum(neachtime * time) nevents[i] <- if(mstate) { if(ns == 1) fit$numevents[, state] else fit$numevents[olev[i], state] } else { if(ns == 1) fit$numevents else fit$numevents[olev[i]] } totaltime[i] <- if(ns == 1) fit$exposure else fit$exposure[olev[i]] if(length(times)) { cumi <- 1. - approx(time, osurv, xout=times, method='constant')$y noun <- units %in% c('', ' ') cuminc[i] <- paste('Cum. inc.@ ', if(noun) 't=', paste(times, collapse=','), if(! noun) paste(' ', units, sep=''), ':', paste(round(cumi, 3), collapse=','), sep='') } if(logt) time <- logb(time) s <- !is.na(time) & (time >= xlim[1]) if(i==1 & !add) { plot(time, surv, xlab='', xlim=xlim, ylab='', ylim=ylim, type="n", axes=FALSE) mgp.axis(1, at=if(logt) pretty(xlim) else seq(xlim[1], max(pretty(xlim)), time.inc), labels=TRUE, axistitle=xlab, cex.lab=cex.xlab) mgp.axis(2, at=pretty(ylim), axistitle=ylab, cex.lab=cex.ylab) if(dots || length(grid)) { xlm <- pretty(xlim) xlm <- c(xlm[1], xlm[length(xlm)]) xp <- seq(xlm[1], xlm[2], by=time.inc) yd <- ylim[2] - ylim[1] if(yd <= .1) yi <- .01 else if(yd <=.2 ) yi <- .025 else if(yd <=.4 ) yi <- .05 else yi <- .1 yp <- seq(ylim[2], ylim[1] + if(n.risk && missing(y.n.risk)) yi else 0, by = -yi) if(dots) for(tt in xp) symbols(rep(tt, length(yp)), yp, circles=rep(dotsize, length(yp)), inches=dotsize, add=TRUE) else abline(h=yp, v=xp, col=grid, xpd=FALSE) } } tim <- time[s]; srv <- surv[s] if(conf.int > 0 && conf == "bands") { blower <- fit$lower[st][s] bupper <- fit$upper[st][s] } ##don't let step function go beyond x-axis - ##this cuts it off but allows step to proceed axis end if(max(tim) > xlim[2]) { srvl <- srv[tim <= xlim[2] + 1e-6] ## s.last <- min(srv[tim<=xlim[2]+1e-6]) #not work w/fun s.last <- srvl[length(srvl)] k <- tim < xlim[2] tim <- c(tim[k], xlim[2]) srv <- c(srv[k], s.last) if(conf.int > 0 && conf == "bands") { low.last <- blower[time <= xlim[2] + 1e-6] low.last <- low.last[length(low.last)] up.last <- bupper[time <= xlim[2] + 1e-6] up.last <- up.last[length(up.last)] blower <- c(blower[k], low.last) bupper <- c(bupper[k], up.last) } } if(logt) { if(conf %nin% c('bands', 'diffbands')) lines(tim, srv, type="s", lty=lty[i], col=col[i], lwd=lwd[i]) if(labelc || conf %in% c('bands', 'diffbands')) curves[[i]] <- list(tim, srv) } else { xxx <- c(mintime, tim) yyy <- c(fun(1), srv) if(conf %nin% c('bands', 'diffbands')) lines(xxx, yyy, type="s", lty=lty[i], col=col[i], lwd=lwd[i]) if(labelc || conf %in% c('bands', 'diffbands')) curves[[i]] <- list(xxx, yyy) } if(pr) { zest <- rbind(time[s], surv[s]) dimnames(zest) <- list(c("Time", "Survival"), rep("", sum(s))) if(slevp)cat("\nEstimates for ", slev[i], "\n\n") print(zest, digits=3) } if(conf.int > 0) { if(conf == 'bands') { if(logt) polyg(x = c(tim, max(tim), rev(tim)), y = c(blower, rev(bupper), max(bupper)), col = col.fill[i], type='s') else polyg(x = c(max(tim), tim, rev(c(tim, max(tim)))), y = c(fun(1), blower, rev(c(fun(1), bupper))), col = col.fill[i], type = "s") } else if(conf == 'diffbands') survdiffplot(fit.orig, conf=conf, fun=fun, convert=convert, xlim=xlim) else { j <- if(ns == 1) TRUE else vs == olev[i] tt <- v$time[j] #may not get predictions at all t ss <- v$surv[j] lower <- v$lower[j] upper <- v$upper[j] if(logt) tt <- logb(ifelse(tt == 0, NA, tt)) tt <- tt + xd * (i - 1) * .01 errbar(tt, ss, upper, lower, add=TRUE, lty=lty[i], col=col[i]) } } if(n.risk) { j <- if(ns == 1) TRUE else vs == olev[i] tt <- v$time[j] nrisk <- v$n.risk[j] tt[1] <- xlim[1] #was xd*.015, .030, .035 if(missing(y.n.risk)) y.n.risk <- ylim[1] if(y.n.risk == 'auto') y.n.risk <- - diff(ylim) / 3 yy <- y.n.risk + yd * (ns - i) * sep.n.risk nri <- nrisk nri[tt > xlim[2]] <- NA text(tt[1], yy, nri[1], cex=cex.n.risk, adj=adj.n.risk, srt=srt.n.risk) if (length(nri) > 1) text(tt[-1], yy, nri[-1], cex=cex.n.risk, adj=1) if(slevp) text(xlim[2] + xd * .025, yy, adj=0, sleva[i], cex=cex.n.risk) } } if(conf %in% c('bands', 'diffbands')) for(i in 1:ns) lines(curves[[i]][[1]], curves[[i]][[2]], lty=lty[i], lwd=lwd[i], col=col[i], type='s') if(aehaz || length(times)) { un <- if(units == ' ' | units == '') '' else paste('/', tolower(units), sep='') haz <- round(nevents / totaltime, 4) txt <- paste(nevents, 'events') if(aehaz) txt <- paste(txt, ', hazard=', haz, un, sep='') if(length(times)) txt <- paste(txt, ', ', sep='') if(length(times)) txt <- paste(txt, cuminc) if(! labelc) text(xlim[2], ylim[2], txt, adj=1) else { maxlen <- max(nchar(sleva)) sleva <- substring(paste(sleva, ' '), 1, maxlen) for(j in 1 : ns) sleva[j] <- eval(parse(text=sprintf("expression(paste('%s ',scriptstyle('(%s)')))", sleva[j], txt[j]))) } } if(labelc) labcurve(curves, sleva, type='s', lty=lty, lwd=lwd, opts=label.curves, col.=col) invisible(slev) } survdiffplot <- function(fit, order=1:2, fun=function(y) y, xlim, ylim, xlab, ylab="Difference in Survival Probability", time.inc, conf.int, conf=c("shaded", "bands", "diffbands", "none"), add=FALSE, lty=1, lwd=par('lwd'), col=1, n.risk=FALSE, grid=NULL, srt.n.risk=0, adj.n.risk=1, y.n.risk, cex.n.risk=.6, cex.xlab=par('cex.lab'), cex.ylab=cex.xlab, convert=function(f) f) { conf <- match.arg(conf) if(missing(conf.int)) conf.int <- fit$conf.int if(! length(conf.int) | conf == "none") conf.int <- 0 opar <- par(c('xpd', 'mar')) on.exit(par(opar)) units <- fit$units if(!length(units)) units <- "Day" maxtime <- fit$maxtime if(!length(maxtime)) maxtime <- max(fit$time) mintime <- min(fit$time, 0) pret <- pretty(c(mintime, maxtime)) maxtime <- max(pret) mintime <- min(pret) if(missing(time.inc)) { time.inc <- switch(units, Day=30, Month=1, Year=1, (maxtime - mintime) / 10) if(time.inc > maxtime) time.inc <- (maxtime - mintime) / 10 } if(n.risk && !length(fit$n.risk)) { n.risk <- FALSE warning("fit does not have number at risk\nIs probably from a parametric model\nn.risk set to FALSE") } if(missing(xlab)) xlab <- if(units==' ') 'Time' else paste(units, "s", sep="") if(missing(xlim)) xlim <- c(mintime, maxtime) if(length(grid) && is.logical(grid)) grid <- if(grid) gray(.8) else NULL polyg <- ordGridFun(grid=FALSE)$polygon times <- sort(unique(c(fit$time, seq(mintime, maxtime, by=time.inc)))) ## Note: summary.survfit computes standard errors on S(t) scale f <- convert(summary(fit, times=times)) slev <- levels(f$strata) ns <- length(slev) if(ns !=2 ) stop('must have exactly two strata') a <- f$strata == slev[1] h <- function(level, times, f) { strata <- f$strata i <- strata == level tim <- f$time[i] surv <- f$surv[i] se <- f$std.err[i] nrisk <- f$n.risk[i] j <- match(times, tim) list(time=times, surv=surv[j], se=se[j], nrisk=nrisk[j]) } a <- h(slev[order[1]], times, f) b <- h(slev[order[2]], times, f) surv <- if(conf == 'diffbands') (fun(a$surv) + fun(b$surv)) / 2 else fun(a$surv) - fun(b$surv) se <- sqrt(a$se^2 + b$se^2) z <- qnorm((1 + conf.int) / 2) if(conf == 'diffbands') { lo <- surv - 0.5 * z * se hi <- surv + 0.5 * z * se k <- ! is.na(times + lo + hi) & times < xlim[2] polyg(c(times[k], rev(times[k])), c(lo[k], rev(hi[k])), col=gray(.9), type='s') return(invisible(slev)) } lo <- surv - z * se hi <- surv + z * se if(missing(ylim)) ylim <- range(c(lo, hi), na.rm=TRUE) if(!add) { plot(times, surv, type='n', axes=FALSE, xlim=xlim, ylim=ylim, xlab='', ylab='') mgp.axis(2, labels=TRUE, axistitle=ylab, cex.lab=cex.ylab) mgp.axis(1, at=seq(xlim[1], max(pretty(xlim)), time.inc), labels=TRUE, axistitle=xlab, cex.lab=cex.xlab) } if(length(grid)) { xlm <- pretty(xlim) xlm <- c(xlm[1], xlm[length(xlm)]) xp <- seq(xlm[1], xlm[2], by=time.inc) ylm <- pretty(ylim) yp <- seq(min(ylm), max(ylm), by=ylm[2] - ylm[1]) abline(h=yp, v=xp, col=grid, xpd=FALSE) } k <- !is.na(times + lo + hi) switch(conf, shaded=polyg(c(times[k], rev(times[k])), c(lo[k], rev(hi[k])), col=gray(.85), type='s'), bands={ lines(times, lo, col=gray(.7)) lines(times, hi, col=gray(.7)) }, diffbands=NULL, none=NULL) lines(times, surv, type='s', lwd=lwd, col=col) abline(h=0, col=gray(.7)) title(sub=paste(slev[order], collapse=' - '), adj=0) if(n.risk) { nrisktimes <- seq(0, maxtime, by=time.inc) nriskinfo <- convert(summary(fit, times=nrisktimes)) anr <- h(slev[order[1]], nrisktimes, nriskinfo) bnr <- h(slev[order[2]], nrisktimes, nriskinfo) nrisk <- pmin(anr$nrisk, bnr$nrisk) xd <- xlim[2] - xlim[1] yd <- ylim[2] - ylim[1] if(! add) { mar <- opar$mar if(mar[4] < 4) {mar[4] <- mar[4] + 2; par(mar=mar)} } par(xpd=NA) tt <- nrisktimes tt[1] <- xlim[1] if(missing(y.n.risk)) y.n.risk <- ylim[1] if(y.n.risk == 'auto') y.n.risk <- - diff(ylim) / 3 yy <- y.n.risk nri <- nrisk nri[tt > xlim[2]] <- NA text(tt[1], yy, nri[1], cex=cex.n.risk, adj=adj.n.risk, srt=srt.n.risk) if (length(nri) > 1) text(tt[-1], yy, nri[-1], cex=cex.n.risk, adj=1) } invisible(slev) } rms/R/validate.lrm.s0000644000176200001440000001357112250460641014031 0ustar liggesusers#Resampling optimism of discrimination and reliability of a logistic #regression model #B: # reps #bw=T to incorporate backward stepdown (using fastbw) with params rule,type,sls #pr=T to print results of each bootstrap rep validate.lrm <- function(fit,method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, kint, Dxy.method=if(k==1)"somers2" else "lrm", emax.lim=c(0,1), ...) { k <- fit$non.slopes y <- fit$y if(length(y)==0) stop("fit did not use x=TRUE,y=TRUE") if(!is.factor(y)) y <- factor(y) ## was category 11Apr02 fit$y <- unclass(y) - 1 #mainly for Brier score (B) if(missing(kint)) kint <- floor((k+1)/2) penalty.matrix <- fit$penalty.matrix discrim <- function(x, y, fit, iter, evalfit=FALSE, pr=FALSE, Dxy.method="somers2", penalty.matrix, kint, ...) { if(evalfit) { # Fit was for bootstrap sample stats <- fit$stats lr <- stats["Model L.R."] Dxy <- if(Dxy.method=="lrm") stats["Dxy"] else somers2(x,y)["Dxy"] intercept <- 0 shrink <- 1 n <- stats["Obs"] D <- (lr - 1)/n U <- -2/n Q <- D - U R2 <- stats["R2"] g <- stats['g'] gp <- stats['gp'] } else { k <- fit$non.slopes null.model <- length(fit$coefficients)==k refit <- if(null.model) lrm.fit(y=y) else lrm.fit(x,y,tol=1e-13) kr <- refit$non.slopes ## Model with no variables = null model stats <- refit$stats lr <- stats["Model L.R."] Dxy <- if(Dxy.method=="lrm") stats["Dxy"] else somers2(x,y)["Dxy"] intercept <- refit$coefficients[kint] shrink <- if(null.model) 1 else refit$coefficients[kr + 1] n <- stats["Obs"] D <- (lr-1)/n L01 <- -2 * sum( (y >= kint)*x - logb(1 + exp(x)), na.rm=TRUE) U <- (L01 - refit$deviance[2] - 2)/n Q <- D - U R2 <- stats["R2"] g <- GiniMd(shrink*x) gp <- GiniMd(plogis(intercept + shrink*x)) } P <- plogis(x) # 1/(1+exp(-x)) B <- sum(((y >= kint) - P)^2)/n z <- c(Dxy, R2, intercept, shrink, D, U, Q, B, g, gp) names(z) <- c("Dxy", "R2", "Intercept", "Slope", "D", "U", "Q", "B", "g", "gp") z } lrmfit <- function(x, y, maxit=12, tol=1e-7, penalty.matrix=NULL, xcol=NULL, ...) { if(length(xcol) && length(penalty.matrix) > 0) penalty.matrix <- penalty.matrix[xcol, xcol, drop=FALSE] lrm.fit(x, y, maxit=maxit, penalty.matrix=penalty.matrix, tol=tol) } z <- predab.resample(fit, method=method, fit=lrmfit, measure=discrim, pr=pr, B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics, force=force, estimates=estimates, Dxy.method=Dxy.method, non.slopes.in.x=FALSE, penalty.matrix=penalty.matrix, kint=kint, ...) kept <- attr(z, 'kept') calib <- z[3:4,5] p <- seq(emax.lim[1],emax.lim[2],.0005) L <- logb(p/(1-p)) P <- plogis(calib[1]+calib[2]*L) # 1/(1+exp(-calib[1]-calib[2]*L)) emax <- max(abs(p-P), na.rm=TRUE) z <- rbind(z[1:4,],c(0,0,emax,emax,emax,z[1,6]),z[5:nrow(z),]) dimnames(z) <- list(c("Dxy", "R2","Intercept", "Slope", "Emax", "D", "U", "Q", "B", "g", "gp"), c("index.orig","training","test","optimism", "index.corrected","n")) structure(z, class='validate', kept=kept) } validate.orm <- function(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, ...) { k <- fit$non.slopes y <- fit$y if(length(y)==0) stop("fit did not use x=TRUE, y=TRUE") if(!is.factor(y)) y <- factor(y) penalty.matrix <- fit$penalty.matrix discrim <- function(x, y, fit, iter, evalfit=FALSE, pr=FALSE, penalty.matrix, ...) { if(evalfit) { # Fit was for bootstrap sample stats <- fit$stats lr <- stats["Model L.R."] rho <- stats["rho"] shrink <- 1 n <- stats["Obs"] R2 <- stats["R2"] g <- stats['g'] pdm <- stats['pdm'] } else { k <- fit$non.slopes null.model <- length(fit$coefficients)==k refit <- if(null.model) ormfit2(y=y) else ormfit2(x, y, tol=1e-13) kr <- refit$non.slopes ## Model with no variables = null model stats <- refit$stats lr <- stats["Model L.R."] rho <- stats['rho'] shrink <- if(null.model) 1 else refit$coefficients[kr + 1] n <- stats["Obs"] R2 <- stats["R2"] g <- GiniMd(shrink*x) pdm <- stats['pdm'] } z <- c(rho, R2, shrink, g, pdm) names(z) <- c("rho", "R2", "Slope", "g", "pdm") z } ormfit2 <- function(x, y, maxit=12, tol=1e-7, penalty.matrix=NULL, xcol=NULL, ...) { if(length(xcol) && length(penalty.matrix) > 0) penalty.matrix <- penalty.matrix[xcol, xcol, drop=FALSE] # x has names() like y >= ... - DROP ?? # predab.resample is getting constant x as if an intercept orm.fit(x, y, maxit=maxit, penalty.matrix=penalty.matrix, tol=tol) } z <- predab.resample(fit, method=method, fit=ormfit2, measure=discrim, pr=pr, B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics, force=force, estimates=estimates, non.slopes.in.x=FALSE, penalty.matrix=penalty.matrix, allow.varying.intercepts=TRUE, ...) kept <- attr(z, 'kept') dimnames(z) <- list(c("rho", "R2", "Slope", "g", "pdm"), c("index.orig","training","test","optimism", "index.corrected","n")) structure(z, class='validate', kept=kept) } rms/R/sensuc.s0000644000176200001440000002320412250460344012741 0ustar liggesuserssensuc <- function(fit, or.xu=seq(1,6,by=.5), or.u=or.xu, prev.u=.5, constrain.binary.sample=TRUE, or.method=c('x:u y:u','u|x,y'), event=function(y) if(is.matrix(y))y[,ncol(y)] else 1*y) { type <- class(fit)[1] if(type %nin% c('lrm','cph')) stop('fit must be from lrm or cph') or.method <- match.arg(or.method) X <- fit$x Y <- fit$y if(length(X)==0 || length(Y)==0) stop('did not specify x=TRUE, y=TRUE to fit') x <- X[,1] unq <- sort(unique(x)) if(length(unq) != 2 || unq[1] != 0 || unq[2] != 1) stop('x is not binary') event <- event(Y) unq <- sort(unique(event)) if(length(unq) != 2 || unq[1] != 0 || unq[2] != 1) stop('Y or event is not binary') ##Function to generate Bernoullis with exact proportion p except for roundoff bern <- function(n, p, constrain) { if(constrain) { sort.random <- function(x) { un <- runif(length(x)) x[order(un)] } ones <- round(n*p) zeros <- n - ones sort.random(c(rep(0,zeros),rep(1,ones))) } else sample(0:1, n, replace=TRUE, c(1-p,p)) } a00 <- mean(!event & !x) a10 <- mean(event & !x) a01 <- mean(!event & x) a11 <- mean(event & x) p.event <- mean(event) b1 <- p.event b0 <- 1 - b1 c1 <- mean(x) c0 <- 1 - c1 n <- length(event) n00 <- sum(!event & !x) n10 <- sum(event & !x) n01 <- sum(!event & x) n11 <- sum(event & x) m1 <- prev.u * n m0 <- n - m1 m <- length(or.xu) * length(or.u) OR.xu <- OR.u <- effect.x <- OOR.xu <- effect.u <- effect.u.adj <- Z <- double(m) Prev.u <- matrix(NA,nrow=m,ncol=4, dimnames=list(NULL,c('event=0 x=0','event=1 x=0', 'event=0 x=1','event=1 x=1'))) odds <- function(x) { p <- mean(x) p/(1-p) } j <- 0 cat('Current odds ratio for x:u=') for(c.or.xu in or.xu) { cat(c.or.xu,'') for(c.or.u in or.u) { j <- j + 1 OR.xu[j] <- c.or.xu OR.u[j] <- c.or.u if(or.method=='u|x,y') { beta <- logb(c.or.u) gamma <- logb(c.or.xu) f <- function(alpha,beta,gamma,a00,a10,a01,a11,prev.u) a00*plogis(alpha)+ a10*plogis(alpha+beta)+ a01*plogis(alpha+gamma)+ a11*plogis(alpha+beta+gamma) - prev.u alpha <- uniroot(f, lower=-10, upper=10, beta=beta, gamma=gamma, a00=a00, a10=a10, a01=a01, a11=a11, prev.u=prev.u)$root p00 <- plogis(alpha) p10 <- plogis(alpha+beta) p01 <- plogis(alpha+gamma) p11 <- plogis(alpha+beta+gamma) } else { ## Raking method, thanks to M Conaway rake2x2 <- function(prow,pcol,odds) { pstart <- matrix(1, nrow=2, ncol=2) pstart[1,1] <- odds pstart <- pstart/sum(pstart) oldp <- pstart maxdif <- 1 while(maxdif > .0001) { ## Adjust row totals obsrow <- oldp[,1]+oldp[,2] adjrow <- prow / obsrow newp <- oldp * cbind(adjrow,adjrow) ## Adjust col totals obscol <- newp[1,]+newp[2,] adjcol <- pcol / obscol newp <- newp * rbind(adjcol,adjcol) maxdif <- max(abs(newp - oldp)) oldp <- newp } c(newp[1,],newp[2,]) } lambda <- c.or.xu theta <- c.or.u prow <- c(1-prev.u, prev.u) pcol <- c(n00,n01,n10,n11)/n a <- matrix(c( 1,0,1,0,0,0,0,0, 0,1,0,1,0,0,0,0, 0,0,0,0,1,0,1,0, 0,0,0,0,0,1,0,1, 1,1,0,0,0,0,0,0, 0,0,1,1,0,0,0,0, 0,0,0,0,1,1,0,0, 0,0,0,0,0,0,1,1, 1,0,0,0,1,0,0,0, 0,1,0,0,0,1,0,0, 0,0,1,0,0,0,1,0, 0,0,0,1,0,0,0,1), nrow=12,byrow=TRUE) aindx <- matrix(c( 1,3, 2,4, 5,7, 6,8, 1,2, 3,4, 5,6, 7,8, 1,5, 2,6, 3,7, 4,8), ncol=2, byrow=TRUE) pcol1 <- c(pcol[1]+pcol[3], pcol[2]+pcol[4]) u <- rake2x2(prow, pcol1, lambda) pcol2 <- c(pcol[1]+pcol[2],pcol[3]+pcol[4]) w <- rake2x2(prow, pcol2, theta) newp8 <- p8start <- rep(1/8, 8) targvec <- c(u, w, pcol) d <- 1 while(d > .0001) { for(i in 1:12) { adjust <- targvec[i] / sum(a[i,] * newp8) newp8[aindx[i,]] <- adjust * newp8[aindx[i,]] } chktarg <- a %*% as.matrix(newp8) d <- max(abs(chktarg - targvec)) } p00 <- newp8[5]/a00 p01 <- newp8[6]/a01 p10 <- newp8[7]/a10 p11 <- newp8[8]/a11 ## prn(c(newp8[5],newp8[5]*n,newp8[5]/(newp8[1]+newp8[5]), ## newp8[5]*n/n00,newp8[5]/a00)) ## w_newp8 ## A_w[1];B_w[2];C_w[3];D_w[4];E_w[5];FF_w[6];G_w[7];H_w[8] ## prn((FF+H)*(A+C)/(B+D)/(E+G)) ## prn((G+H)*(A+B)/(E+FF)/(C+D)) ## w1_p01*b0+p11*b1 ## w2_p00*b0+p10*b1 ## prn((w1/(1-w1))/(w2/(1-w2))) ## z1_p10*c0+p11*c1 ## z2_p00*c0+p10*c1 ## prn((z1/(1-z1))/(z2/(1-z2))) } Prev.u[j,] <- c(p00,p10,p01,p11) u <- rep(0, n) i <- !event & !x u[i] <- bern(sum(i), p00, constrain.binary.sample) i <- event & !x u[i] <- bern(sum(i), p10, constrain.binary.sample) i <- !event & x u[i] <- bern(sum(i), p01, constrain.binary.sample) i <- event & x u[i] <- bern(sum(i), p11, constrain.binary.sample) OOR.xu[j] <- odds(u[x==1])/odds(u[x==0]) if(type=='cph') { g <- coxphFit(as.matrix(u),Y,rep(1,n),toler.chol=1e-11, iter.max=15,eps=.0001,method='efron') effect.u[j] <- exp(g$coefficients) g <- coxphFit(cbind(u,X),Y,rep(1,n),toler.chol=1e-11, iter.max=15,eps=.0001,method='efron') cof <- g$coefficients vr <- g$var } else { effect.u[j] <- odds(event[u==1])/odds(event[u==0]) g <- lrm.fit(cbind(u,X),event,maxit=20,tol=1E-11) ns <- g$non.slopes cof <- g$coefficients[-(1:ns)] vr <- g$var[-(1:ns),-(1:ns)] } z <- cof/sqrt(diag(vr)) effect.u.adj[j] <- exp(cof[1]) effect.x[j] <- exp(cof[2]) Z[j] <- z[2] } } cat('\n\n') structure(list(OR.xu=OR.xu,OOR.xu=OOR.xu,OR.u=OR.u, effect.x=effect.x,effect.u=effect.u,effect.u.adj=effect.u.adj, Z=Z,prev.u=prev.u,cond.prev.u=Prev.u, type=type), class='sensuc') } plot.sensuc <- function(x, ylim=c((1+trunc(min(x$effect.u)-.01))/ ifelse(type=='numbers',2,1), 1+trunc(max(x$effect.u)-.01)), xlab='Odds Ratio for X:U', ylab=if(x$type=='lrm')'Odds Ratio for Y:U' else 'Hazard Ratio for Y:U', digits=2, cex.effect=.75, cex.z=.6*cex.effect, delta=diff(par('usr')[3:4])/40, type=c('symbols','numbers','colors'), pch=c(15,18,5,0),col=c(2,3,1,4),alpha=.05, impressive.effect=function(x)x > 1,...) { type <- match.arg(type) Z <- abs(x$Z) or <- x$OOR.xu eu <- x$effect.u ex <- x$effect.x zcrit <- qnorm(1-alpha/2) plot(or, eu, ylim=ylim, xlab=xlab, ylab=ylab, type='n', ...) if(type=='numbers') { text(or, eu, round(ex,digits), cex=cex.effect) text(or, eu - delta, round(Z,2), cex=cex.z) } else { i <- impressive.effect(ex) & Z >= zcrit if(any(i)) if(type=='symbols') points(or[i], eu[i], pch=pch[1]) else text(or[i], eu[i], round(ex[i],digits), cex=cex.effect, col=col[1]) i <- impressive.effect(ex) & Z < zcrit if(any(i)) if(type=='symbols') points(or[i], eu[i], pch=pch[2]) else text(or[i], eu[i], round(ex[i],digits), cex=cex.effect, col=col[2]) i <- !impressive.effect(ex) & Z < zcrit if(any(i)) if(type=='symbols') points(or[i], eu[i], pch=pch[3]) else text(or[i], eu[i], round(ex[i],digits), cex=cex.effect, col=col[3]) i <- !impressive.effect(ex) & Z >= zcrit if(any(i)) if(type=='symbols') points(or[i], eu[i], pch=pch[4]) else text(or[i], eu[i], round(ex[i],digits), cex=cex.effect, col=col[4]) } title(sub=paste('Prevalence of U:',format(x$prev.u)),adj=0) invisible() } rms/R/calibrate.default.s0000644000176200001440000001610413343244063015015 0ustar liggesuserscalibrate.default <- function(fit, predy, method=c("boot","crossvalidation",".632","randomization"), B=40, bw=FALSE, rule=c("aic","p"), type=c("residual","individual"), sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, kint, smoother="lowess", digits=NULL, ...) { call <- match.call() method <- match.arg(method) rule <- match.arg(rule) type <- match.arg(type) ns <- num.intercepts(fit) if(missing(kint)) kint <- floor((ns+1)/2) clas <- attr(fit,"class") model <- if(any(clas=="lrm"))"lr" else if(any(clas=="ols")) "ol" else stop("fit must be from lrm or ols") lev.name <- NULL yvar.name <- as.character(formula(fit))[2] y <- fit$y n <- length(y) if(length(y) == 0) stop("fit did not use x=TRUE,y=TRUE") if(model == "lr") { y <- factor(y) lev.name <- levels(y)[kint+1] fit$y <- as.integer(y)-1 } predicted <- if(model=="lr") plogis(fit$linear.predictors-fit$coefficients[1] + fit$coefficients[kint]) else fit$linear.predictors if(missing(predy)) { if(n < 11) stop("must have n > 10 if do not specify predy") p <- sort(predicted) predy <- seq(p[5], p[n-4], length=50) p <- NULL } penalty.matrix <- fit$penalty.matrix cal.error <- function(x, y, iter, smoother, predy, kint, model, digits=NULL, ...) { if(model=="lr") { x <- plogis(x) y <- y >= kint } if(length(digits)) x <- round(x, digits) smo <- if(is.function(smoother)) smoother(x, y) else lowess(x, y, iter=0) cal <- approx(smo, xout=predy, ties=function(x)x[1])$y if(iter==0) structure(cal - predy, keepinfo=list(orig.cal=cal)) else cal - predy } fitit <- function(x, y, model, penalty.matrix=NULL, xcol=NULL, ...) { if(length(penalty.matrix) && length(xcol)) { if(model=='ol') xcol <- xcol[-1] - 1 # take off intercept position penalty.matrix <- penalty.matrix[xcol, xcol, drop=FALSE] } f <- switch(model, lr = lrm.fit(x, y, penalty.matrix=penalty.matrix, tol=1e-13), ol = if(length(penalty.matrix)==0) { w <- lm.fit.qr.bare(x, y, intercept=TRUE, xpxi=TRUE) w$var <- w$xpxi * sum(w$residuals^2) / (length(y) - length(w$coefficients)) w } else lm.pfit(x, y, penalty.matrix=penalty.matrix) ) if(any(is.na(f$coefficients))) f$fail <- TRUE f } z <- predab.resample(fit, method=method, fit=fitit, measure=cal.error, pr=pr, B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics, force=force, estimates=estimates, non.slopes.in.x=model=="ol", smoother=smoother, predy=predy, model=model, kint=kint, penalty.matrix=penalty.matrix, ...) orig.cal <- attr(z, 'keepinfo')$orig.cal z <- cbind(predy, calibrated.orig=orig.cal, calibrated.corrected=orig.cal - z[,"optimism"], z) structure(z, class="calibrate.default", call=call, kint=kint, model=model, lev.name=lev.name, yvar.name=yvar.name, n=n, freq=fit$freq, non.slopes=ns, B=B, method=method, predicted=predicted, smoother=smoother) } print.calibrate.default <- function(x, B=Inf, ...) { at <- attributes(x) cat("\nEstimates of Calibration Accuracy by ",at$method," (B=",at$B,")\n\n", sep="") dput(at$call) if(at$model=="lr") { lab <- paste("Pr{",at$yvar.name,sep="") if(at$non.slopes==1) lab <- paste(lab,"=",at$lev.name,"}",sep="") else lab <- paste(lab,">=",at$lev.name,"}",sep="") } else lab <- at$yvar.name cat("\nPrediction of",lab,"\n\n") predicted <- at$predicted if(length(predicted)) { ## for downward compatibility s <- !is.na(x[,'predy'] + x[,'calibrated.corrected']) err <- predicted - approx(x[s,'predy'],x[s,'calibrated.corrected'], xout=predicted, ties=mean)$y cat('\nn=',length(err), ' Mean absolute error=', round(mean(abs(err),na.rm=TRUE),3),' Mean squared error=', round(mean(err^2,na.rm=TRUE),5), '\n0.9 Quantile of absolute error=', round(quantile(abs(err),.9,na.rm=TRUE),3), '\n\n',sep='') } print.default(x) kept <- at$kept if(length(kept)) { cat("\nFactors Retained in Backwards Elimination\n\n") varin <- ifelse(kept, '*', ' ') print(varin[1:min(nrow(varin), B),], quote=FALSE) cat("\nFrequencies of Numbers of Factors Retained\n\n") nkept <- apply(kept, 1, sum) tkept <- table(nkept) names(dimnames(tkept)) <- NULL print(tkept) } invisible() } plot.calibrate.default <- function(x, xlab, ylab, xlim, ylim, legend=TRUE, subtitles=TRUE, cex.subtitles=.75, riskdist=TRUE, scat1d.opts=list(nhistSpike=200), ...) { at <- attributes(x) if(missing(ylab)) ylab <- if(at$model=="lr") "Actual Probability" else paste("Observed", at$yvar.name) if(missing(xlab)) { if(at$model=="lr") { xlab <- paste("Predicted Pr{",at$yvar.name,sep="") if(at$non.slopes==1) { xlab <- if(at$lev.name=="TRUE") paste(xlab, "}", sep="") else paste(xlab,"=", at$lev.name, "}", sep="") } else xlab <- paste(xlab,">=", at$lev.name, "}", sep="") } else xlab <- paste("Predicted", at$yvar.name) } p <- x[,"predy"] p.app <- x[,"calibrated.orig"] p.cal <- x[,"calibrated.corrected"] if(missing(xlim) & missing(ylim)) xlim <- ylim <- range(c(p, p.app, p.cal), na.rm=TRUE) else { if(missing(xlim)) xlim <- range(p) if(missing(ylim)) ylim <- range(c(p.app, p.cal, na.rm=TRUE)) } plot(p, p.app, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, type="n", ...) predicted <- at$predicted err <- NULL if(length(predicted)) { ## for downward compatibility s <- !is.na(p + p.cal) err <- predicted - approx(p[s], p.cal[s], xout=predicted, ties=mean)$y cat('\nn=',n <- length(err), ' Mean absolute error=', round(mae <- mean(abs(err), na.rm=TRUE),3),' Mean squared error=', round(mean(err^2, na.rm=TRUE),5), '\n0.9 Quantile of absolute error=', round(quantile(abs(err), .9, na.rm=TRUE),3), '\n\n', sep='') if(subtitles) title(sub=paste('Mean absolute error=', round(mae,3), ' n=', n, sep=''), cex.sub=cex.subtitles, adj=1) if(riskdist) do.call('scat1d', c(list(x=predicted), scat1d.opts)) } lines(p, p.app, lty=3) lines(p, p.cal, lty=1) abline(a=0, b=1, lty=2) if(subtitles) title(sub=paste("B=", at$B, "repetitions,", at$method), cex.sub=cex.subtitles, adj=0) if(!(is.logical(legend) && !legend)) { if(is.logical(legend)) legend <- list(x=xlim[1] + .55*diff(xlim), y=ylim[1] + .32*diff(ylim)) legend(legend, c("Apparent", "Bias-corrected", "Ideal"), lty=c(3,1,2), bty="n") } invisible(err) } rms/R/datadist.s0000644000176200001440000001372012434363047013246 0ustar liggesusersdatadist <- function(..., data, q.display, q.effect=c(.25,.75), adjto.cat=c('mode','first'), n.unique=10) { adjto.cat <- match.arg(adjto.cat) X <- list(...) argnames <- as.character(sys.call())[-1] if(inherits(x <- X[[1]],"datadist")) { Limits <- x$limits Values <- x$values X[[1]] <- NULL argnames <- argnames[-1] } else { Limits <- list() Values <- list() } if(is.data.frame(X[[1]])) { if(length(X) > 1) stop('when the first argument is a data frame, no other variables may be specified') X <- X[[1]] } else if(is.recursive(X[[1]]) && length(Terms <- X[[1]]$terms) && length(D <- attr(Terms,"Design"))) { n <- D$name[D$assume != "interaction"] X <- list() if(missing(data)) for(nm in n) X[[nm]] <- eval.parent(nm) else if(length(names(data))) { j <- match(n, names(data), 0) if(any(j == 0)) stop(paste("variable(s)", paste(n[j == 0],collapse=" "), "in model not found on data=, \nwhich has variables", paste(names(data),collapse=" "))) for(nm in n) X[[nm]] <- data[[nm]] } else for(nm in n) X[[nm]] <- get(nm, data) } else { if(length(X) & !length(names(X))) names(X) <- argnames[1 : length(X)] ### NEED TO FIX: R has no database.object if(!missing(data)) { ## This duplicative code is for efficiency for large data frames stop('program logic error') if(length(X)) { ## if(is.numeric(data)) X <- c(X,database.object(data)) ## else X <- c(X, data) } else { ## if(is.numeric(data)) X <- database.object(data) ## else X <- data } } } nam <- names(X) p <- length(nam) if(p == 0) stop("you must specify individual variables or a data frame") maxl <- 0 for(i in 1 : p) { values <- NULL x <- X[[i]] if(is.character(x)) x <- as.factor(x) lx <- length(x) lev <- levels(x) ll <- length(lev) limits <- rep(NA, 5) if(is.matrix(x) | (i > 1 && lx != maxl)) warning(paste(nam[i],"is a matrix or has incorrect length; ignored")) else { if(ll && (ll < length(x))) values <- lev # if # levels=length(x) is ID variable ## First look for ordered variable with numeric levels (scored() var) if(is.ordered(x) && all.is.numeric(lev)) { levx <- sort(as.numeric(lev)) limits <- c(levx[1],levx[(ll+1)/2],levx[ll],levx[1],levx[ll], levx[1],levx[ll]) values <- levx } else if(ll) { adjto <- if(adjto.cat == 'first') lev[1] else { tab <- table(x) (names(tab)[tab == max(tab)])[1] } limits <- factor(c(NA,adjto,NA,lev[1],lev[ll],lev[1],lev[ll]), levels=lev) ## non-ordered categorical } else { # regular numeric variable clx <- setdiff(class(x), c('integer', 'numeric')) ## Above prevents rounding of quantiles to integers y <- x[!is.na(x)] n <- length(y) if(n < 2) stop(paste("fewer than 2 non-missing observations for",nam[i])) values <- sort(unique(y)) names(values) <- NULL nunique <- length(values) if(nunique < 2) { warning(paste(nam[i],"is constant")) limits <- rep(y[1], 7) } else { r <- range(values) limits[6 : 7] <- r if(nunique<4) q <- r else { if(missing(q.display)) { q.display <- 10 / max(n, 200) q.display <- c(q.display, 1 - q.display) } q <- quantile(unclass(y), q.display) } #chron obj. not work here limits[4] <- q[1]; limits[5] <- q[2] ## check for very poorly distributed categorical numeric variable if(limits[4] == limits[5]) limits[4 : 5] <- r ## Use low category if binary var, middle if 3-level, median otherwise if(nunique < 3) limits[2] <- values[1] else if(nunique == 3) limits[2] <- values[2] else limits[2] <- median(unclass(y)) if(nunique < 4) q <- r else q <- quantile(unclass(y), q.effect) limits[1] <- q[1]; limits[3] <- q[2] if(limits[1] == limits[3]) limits[c(1,3)] <- r if(nunique > n.unique) values <- NULL class(limits) <- clx } } Limits[[nam[i]]] <- limits if(length(values)) Values[[nam[i]]] <- values maxl <- max(maxl, lx) } } Limits <- structure(Limits, class="data.frame", row.names=c("Low:effect","Adjust to", "High:effect","Low:prediction", "High:prediction","Low","High")) ##data.frame(Limits) gives error with chron objects d <- list(limits=Limits, values=Values) class(d) <- "datadist" d } print.datadist <- function(x, ...) { lim <- x$limits for(n in names(lim)) { z <- lim[[n]] if(inherits(z,"dates") | inherits(z,"times")) lim[[n]] <- factor(format(z)) } if(length(lim)) print(lim) ##print.data.frame doesn't print chron objects correctly if(length(V <- x$values)) { cat("\nValues:\n\n") wid <- .Options$width for(n in names(V)) { v <- V[[n]] if(length(v) == 0) next # for gendata if(is.character(v) && length(v) > 80) v <- c(v[1 : 20], paste("+", length(v), "others")) w <- if(is.character(v)) v else format(v) nc <- nchar(paste(w, collapse=" ")) if(nc+nchar(n) + 4 > wid) {cat(n,":\n"); print(v, quote=FALSE)} else cat(n,":",w,"\n") } } invisible() } rms/R/hazard.ratio.plot.s0000644000176200001440000001040213065740424015005 0ustar liggesusershazard.ratio.plot <- function(x, Srv, which, times, e=30, subset, conf.int=.95, legendloc=NULL, smooth=TRUE, pr=FALSE, pl=TRUE, add=FALSE, ylim,cex=.5, xlab="t",ylab, antilog=FALSE, ...) { if(missing(ylab)) ylab <- if(antilog)"Hazard Ratio" else "Log Hazard Ratio" trans <- if(antilog) function(x) exp(x) else function(x) x if(is.matrix(x)) { nam <- dimnames(x)[[2]] if(! length(nam)) nam <- paste("x[",1:ncol(x),"]",sep="") } else { nam <- label(x) x <- as.matrix(unclass(x)) if(! length(nam)) nam <- "" } y <- Srv[,1]; event <- Srv[,2] if(length(y) != nrow(x))stop("number of rows in x must be length of y") nx <- ncol(x) if(missing(which)) which <- 1:nx labele <- label(Srv, type='event') if(! length(labele)) labele <- "" isna <- is.na(matxv(x,rep(1,nx)) + y + event) if(! missing(subset))isna <- isna | (! subset) x <- x[! isna,,drop=FALSE] if(length(dimnames(x)[[2]])==0) dimnames(x) <- list(NULL,paste("x",1:nx,sep="")) y <- y[! isna] event <- event[! isna] if(! missing(times))uft<-c(0,sort(times),1000000) else { nblock <- max(round(sum(event) / e), 2) uft<-c(0, quantile(y[event == 1], seq(0, 1, length=nblock + 1))[2 : nblock], 1000000) uft <- unique(uft) } thr <- NULL lhr <- NULL se <- NULL for(i in seq(length(uft)-1)) { s<-y>=uft[i] tt<-pmin(y[s],uft[i+1]) ev<-event[s] & (y[s]<=uft[i+1]) if(sum(ev)>nx) { cox <- coxphFit(x[s,,drop=FALSE], cbind(tt,ev), iter.max=10, eps=.0001, method="efron", type=attr(Srv, 'type')) if(! is.character(cox)) { if(pr) { r <- range(tt) cat(paste("Time interval:",format(r[1]),"-", format(r[2])," At Risk:",sum(s), " Events:",sum(ev),"\n")) k <- cbind(cox$coefficients,sqrt(diag(cox$var))) dimnames(k) <- list(names(cox$coefficients), c("Coef","S.E.")) print(k) } tmid <- mean(y[y>=uft[i] & y<=uft[i+1]]) thr <- c(thr,tmid) lhr <- cbind(lhr,cox$coef) se <- cbind(se,sqrt(diag(cox$var))) } } } if(! pl) return(list(time=thr,log.hazard.ratio=lhr,se=se)) zcrit<-qnorm((1+conf.int)/2) for(j in which) { lhrj <- lhr[j,] sej <- se[j,] labelx <- nam[j] if(missing(ylim)) ylim <- trans(range(c(lhrj+zcrit*sej,lhrj-zcrit*sej))) if(! add) { oldpar <- par(c('err','mar')) on.exit(par(oldpar)) oldmar <- oldpar$mar if(labelx != "" & labele != "") oldmar[1] <- oldmar[1] + 1 par(err=-1,mar=oldmar) plot(thr, trans(lhrj), xlab=xlab, ylim=ylim, ylab=ylab,...) } else points(thr,trans(lhrj)) lines(thr,trans(lhrj)) lines(thr,trans(lhrj+zcrit*sej),lty=2) lines(thr,trans(lhrj-zcrit*sej),lty=2) leg <- c("Subset Estimate",paste(format(conf.int),"C.L.")) ltype <- 1:2 if(smooth & length(thr)>3) { lines(supsmu(thr, trans(lhrj)), lty=3) leg <- c(leg,"Smoothed") ltype <- c(ltype,3) } if(! add) { labels <- "" if(labelx != "")labels <- paste("Predictor:",labelx,"\n",sep="") if(labele != "")labels <- paste(labels,"Event:",labele,sep="") title(sub=labels,adj=1,cex=cex) if(! interactive() && ! length(legendloc)) legendloc <- "ll" if(! length(legendloc)) { cat("Click left mouse button at upper left corner for legend\n") z <- locator(1) legendloc <- "l" } else if(legendloc[1] != "none") { if(legendloc[1] == "ll") z <- list(x=par("usr")[1],y=par("usr")[3]) else z <- list(x=legendloc[1],y=legendloc[2]) } if(legendloc[1] != "none") legend(z,leg,lty=ltype,cex=cex,bty="n") } } list(time=thr,log.hazard.ratio=lhr,se=se) } rms/R/plotp.Predict.s0000644000176200001440000002015114014235266014171 0ustar liggesusersplotp.Predict <- function(data, subset, xlim, ylim, xlab, ylab, rdata=NULL, nlevels=3, vnames=c('labels', 'names'), histSpike.opts=list(frac=function(f) 0.01 + 0.02 * sqrt(f - 1)/sqrt(max(f, 2) - 1), side=1, nint=100), ncols=3, width=800, ...) { varypred <- ('.set.' %in% names(data)) && ('.predictor.' %nin% names(data)) if(varypred) { data$.predictor. <- data$.set. data$.set. <- NULL } predpres <- length(data$.predictor.) > 0 vnames <- match.arg(vnames) dohist <- function(...) { so <- histSpike.opts do.call('histSpikeg', c(list(...), so)) } info <- attr(data, 'info') at <- info$Design label <- at$label units <- at$units adjust <- info$adjust varying <- setdiff(info$varying, '.set.') if(predpres && identical(sort(unique(data$.predictor.)), sort(varying))) varying <- NULL conf.int <- info$conf.int if(length(varying) > 2) stop('more than 2 varying variables not allowed') pmlabel <- character(length(label)) names(pmlabel) <- names(label) for(i in 1 : length(label)) pmlabel[i] <-markupSpecs$html$varlabel(label[i], units[i]) if(predpres) data$.Predictor. <- switch(vnames, names = data$.predictor., labels = pmlabel[as.character(data$.predictor.)] ) if(! missing(subset)) { subset <- eval(substitute(subset), data) data <- data[subset,, drop=FALSE] } if(missing(ylab)) ylab <- info$ylabhtml if(! length(data$lower)) conf.int <- FALSE cllab <- if(conf.int) paste(conf.int, 'C.L.') if(missing(ylim)) ylim <- if(conf.int) with(data, c(min(c(yhat, lower), na.rm=TRUE), max(c(yhat, upper), na.rm=TRUE))) else range(data$yhat, na.rm=TRUE) adjto <- paste0('Adjusted to:
', adjust) if(predpres) names(adjto) <- unique(data$.predictor.) fm <- function(x) format(x, digits=4) if(predpres) { ## User did not specify which predictors to plot; all plotted data$.predictor. <- factor(data$.predictor., unique(data$.predictor.)) ## Determine which predictors are discrete isdiscrete <- function(z) is.factor(z) || is.character(z) || length(unique(z[!is.na(z)])) <= nlevels lp <- levels(data$.predictor.) isdis <- sapply(data[lp], isdiscrete) ## Do all continuous predictors vcon <- lp[! isdis] ncont <- 0 cont <- list() height <- 400 * ceiling(length(vcon) / ncols) for(v in vcon) { ncont <- ncont + 1 dat <- data[data$.predictor. == v,, drop=FALSE] dat$.x. <- dat[[v]] xlab <- pmlabel[v] ht <- if(conf.int) with(dat, paste0(v, '=', fm(.x.), '
', fm(yhat), ' [', fm(lower), ',', fm(upper), ']')) else with(dat, paste0(v, '=', fm(.x.), '
', fm(yhat))) if(length(varying) != 2) { ht[1] <- paste0(ht[1], '
', adjto[v]) dat$.ht. <- ht a <- plotly::plot_ly(dat, height=height, width=width) a <- plotly::add_lines(a, x=~.x., y=~yhat, text=~.ht., color=I('black'), hoverinfo='text', name='Estimate', legendgroup='Estimate', showlegend=ncont == 1) if(conf.int) a <- plotly::add_ribbons(a, x=~.x., ymin=~lower, ymax=~upper, color=I('lightgray'), hoverinfo='none', name=cllab, legendgroup=cllab, showlegend=ncont == 1) if(length(rdata) && v %in% names(rdata)) { form <- as.formula(paste('yhat ~', v)) a <- histSpikeg(form, data=rdata, predictions=dat, ylim=ylim, plotly=a, showlegend=ncont == 1) } } else { # a second variable (for superpositioning) is varying w <- varying[2] dat$.g. <- dat[[w]] j <- which(dat$.x. == min(dat$.x.)) ht[j] <- paste0(ht[j], '
', adjto[v]) dat$.ht. <- ht a <- plotly::plot_ly(dat, height=height, width=width) a <- plotly::add_lines(a, x=~.x., y=~yhat, text=~.ht., color=~.g., hoverinfo='text', name='Estimate', legendgroup='Estimate', showlegend=ncont == 1) if(conf.int) a <- plotly::add_ribbons(a, x=~.x., ymin=~lower, ymax=~upper, color=~.g., hoverinfo='none', name=cllab, legendgroup=cllab, showlegend=ncont == 1) if(length(rdata) && all(c(v, w) %in% names(rdata))) { form <- as.formula(paste('yhat ~', v, '+', w)) a <- histSpikeg(form, data=rdata, predictions=dat, ylim=ylim, plotly=a, showlegend=ncont == 1) } } a <- plotly::layout(a, xaxis=list(title=xlab), yaxis=list(title=ylab)) cont[[ncont]] <- a } if(ncont > 0) { if(ncont == 1) cont <- cont[[1]] else { nrows <- ceiling(ncont / ncols) cont <- plotly::subplot(cont, nrows=nrows, shareY=TRUE, titleX=TRUE) } } ## Do all categorical predictors if(sum(isdis) == 0) return(cont) vcat <- lp[isdis] ncat <- 0 catg <- list() nlev <- integer(length(vcat)) major <- minor <- character(0) X <- Lower <- Upper <- numeric(0) for(v in vcat) { ncat <- ncat + 1 dat <- data[data$.predictor. == v,, drop=FALSE] dat$.x. <- dat[[v]] xlab <- pmlabel[v] X <- c(X, dat$yhat) if(conf.int) { Lower <- c(Lower, dat$lower) Upper <- c(Upper, dat$upper) } minor <- c(minor, as.character(dat[[v]])) major <- c(major, rep(xlab, nrow(dat))) } catg <- dotchartpl(X, major, minor, lower=Lower, upper=Upper, htext=format(X, digits=4), xlab=ylab, tracename='Estimate', limitstracename=cllab, width=width) return(list(Continuous=cont, Categorical=catg)) } ## .predictor. not present; assume one plot v <- varying[1] data$.x. <- data[[v]] if(length(varying) > 1) { w <- varying[2] data$.g. <- data[[w]] } ht <- with(data, paste0(v, '=', fm(data$.x.), '
', fm(yhat))) if(conf.int) ht <- paste0(ht, ' [', fm(data$lower), ', ', fm(data$upper), ']') j <- which(data$.x. == min(data$.x.)) ht[j] <- paste0(ht[j], '
', adjto) data$.ht. <- ht a <- plotly::plot_ly(data) if(length(varying) == 1) { a <- plotly::add_lines(a, x=~.x., y=~yhat, color=I('black'), text=~.ht., hoverinfo='text', name='Estimate') if(conf.int) a <- plotly::add_ribbons(a, x=~.x., ymin=~lower, ymax=~upper, hoverinfo='none', name=cllab, color=I('lightgray')) if(length(rdata) && varying %in% names(rdata)) { form <- as.formula(paste('yhat ~', v)) a <- histSpikeg(form, predictions=data, data=rdata, plotly=a, ylim=ylim) } } else { # superpositioning (grouping) variable also present a <- plotly::add_lines(a, x=~.x., y=~yhat, color=~.g., text=~.ht., hoverinfo='text') if(conf.int) a <- plotly::add_ribbons(a, x=~.x., ymin=~lower, ymax=~upper, color=~.g., hoverinfo='none') if(length(rdata) && all(varying %in% names(rdata))) { form <- as.formula(paste('yhat ~', v, '+', w)) a <- histSpikeg(form, predictions=data, data=rdata, plotly=a, ylim=ylim) } } if(missing(xlab)) xlab <- pmlabel[v] if(missing(xlim)) xlim <- NULL #range(data$.x.) plotly::layout(a, xaxis=list(title=xlab, range=xlim), yaxis=list(title=ylab, range=ylim)) } rms/R/validate.cph.s0000644000176200001440000001136414024422774014015 0ustar liggesusersvalidate.cph <- function(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, dxy=TRUE, u, tol=1e-9, ...) { atr <- fit$Design need.surv <- dxy & any(atr$assume.code == 8) if(need.surv & missing(u)) stop("Presence of strata -> survival estimates needed for dxy; u omitted") modtype <- fit$method discrim <- function(x, y, strata, fit, iter, evalfit=FALSE, dxy=TRUE, need.surv=FALSE, u, modtype, pr=FALSE, ...) { n <- nrow(y) if(! length(x) || length(unique(x)) == 1) { Dxy <- 0 slope <- 1 D <- 0 U <- 0 R2 <- 0 } else { x <- as.matrix(x) dimnames(x) <- list(as.character(1:nrow(x)),as.character(1:ncol(x))) if(evalfit) { #Fit was for training sample lr <- -2 * (fit$loglik[1] - fit$loglik[2]) ll0 <- -2 * fit$loglik[1] slope <- 1 D <- (lr - 1)/ll0 U <- -2/ll0 R2.max <- 1 - exp(-ll0/n) R2 <- (1 - exp(-lr/n))/R2.max g <- GiniMd(x) } else { type <- attr(y, "type") storage.mode(x) <- "double" f <- coxphFit(x=x, y=y, strata=strata, iter.max=10, eps=.0001, method=modtype, type=type) if(f$fail) stop('fit failure in discrim,coxphFit') ##x is x*beta from training sample lr <- -2 * (f$loglik[1]-f$loglik[2]) ll0 <- -2 * f$loglik[1] slope <- f$coef[1] D <- (lr - 1)/ll0 R2.max <- 1 - exp(-ll0/n) R2 <- (1 - exp(-lr/n))/R2.max f.frozen <- coxphFit(x=x, y=y, strata=strata, iter.max=0, eps=.0001, method=modtype, init=1, type=type) if(f.frozen$fail) stop('fit failure in discrim for f.frozen') U <- -2 * (f.frozen$loglik[2] - f$loglik[2]) / ll0 g <- GiniMd(slope*x) } } Q <- D - U z <- c(R2, slope, D, U, Q, g) nam <- c("R2","Slope", "D", "U", "Q", "g") if(dxy) { if(need.surv) { attr(x, "strata") <- strata x <- survest(fit, linear.predictors=x, times=u, conf.int=FALSE)$surv dxytype <- 'time' } else dxytype <- 'hazard' Dxy <- dxy.cens(x, y, type=dxytype)["Dxy"] z <- c(Dxy, z) nam <- c("Dxy", nam) } names(z) <- nam z } cox.fit <- function(x, y, strata, u, need.surv=FALSE, modtype, tol=1e-9, ...) { if(! length(x)) return(list(fail=FALSE,coefficients=numeric(0))) if(! need.surv) u <- 0 ## coxph(x,y,e,pr=F,surv=need.surv) if(! need.surv) { type <- attr(y, 'type') storage.mode(x) <- "double" x <- as.matrix(x) dimnames(x) <- list(as.character(1:nrow(x)),as.character(1:ncol(x))) f <- coxphFit(x=x, y=y, strata=strata, iter.max=10, eps=.0001, method=modtype, toler.chol=tol, type=type) if(f$fail) return(f) if(any(is.na(f$coef))) { cat('Singularity in coxph.fit. Coefficients:\n'); print(f$coef) return(list(fail=TRUE)) } return(f) } x <- x #Don't want lazy evaluation of complex expression f <- if(length(strata)) cph(y ~ x + strat(strata), surv=TRUE, method=modtype) else cph(y ~ x, surv=TRUE, method=modtype) f$non.slopes <- f$assume.code <- f$assign <- f$name <- f$assume <- NULL ##Don't fool fastbw called from predab.resample f } predab.resample(fit, method=method, fit=cox.fit, measure=discrim, pr=pr, B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics, force=force, estimates=estimates, dxy=dxy, u=u, need.surv=need.surv, modtype=modtype,tol=tol, ...) } dxy.cens <- function(x, y, type=c('time','hazard')) { type <- match.arg(type) negate <- type == 'hazard' if(! is.Surv(y)) y <- Surv(y) else { stype <- attr(y, 'type') if(length(stype) == 1 && stype == 'left') { y <- Surv(y[,1], y[,2]) # right censored negate <- ! negate } } i <- is.na(x) | is.na(y) if(any(i)) { x <- x[! i] y <- y[! i,] } k <- suppressWarnings(survConcordance.fit(y, x)) cindex <- (k[1] + k[3] / 2) / sum(k[1:3]) cindex <- 1 - cindex # survConcordance c=larger risk score, shorter T se <- k[5] / (2 * sum(k[1 : 3])) dxy <- 2 * (cindex - .5) se <- 2 * se if(negate) dxy <- - dxy structure(c(dxy=dxy, se=se), names=c('Dxy','se')) } rms/R/latex.ols.s0000644000176200001440000000256313020562305013353 0ustar liggesuserslatex.ols <- function(object, title, file='', append=FALSE, which, varnames, columns=65, inline=FALSE, before=if(inline)"" else "& &", after="", pretrans=TRUE, caption=NULL, digits=.Options$digits, size='', ...) { f <- object md <- prType() %in% c('html', 'md', 'markdown') w <- if(length(caption)) { if(md) paste('
', caption, '
', sep='') else paste('\\begin{center} \\bf', caption,'\\end{center}') } if(missing(which) & !inline) { Y <- paste("{\\rm ",as.character(attr(f$terms,"formula")[2]),"}",sep="") w <- c(w, paste("\\[{\\rm E(",Y, "}) = X\\beta, {\\rm \\ \\ where} \\\\ \\]", sep="")) } at <- f$Design if(missing(which)) which <- 1:length(at$name) if(missing(varnames)) varnames <- at$name[at$assume.code!=9] if(! md) cat(w, file=file, sep=if(length(w)) "\n" else "", append=append) z <- latexrms(f, file=file, append=TRUE, which=which, varnames=varnames, columns=columns, before=before, after=after, prefix="X\\hat{\\beta}", inline=inline, pretrans=pretrans, digits=digits, size=size) if(md) htmltools::HTML(c(paste0(w, '\n'), as.character(z))) else z } rms/R/cr.setup.s0000644000176200001440000000134612250460457013214 0ustar liggesuserscr.setup <- function(y) { yname <- as.character(substitute(y)) if(!is.factor(y)) y <- factor(y, exclude=NA) y <- unclass(y) # in case is.factor ylevels <- levels(y) kint <- length(ylevels) - 1 y <- as.integer(y-1) reps <- ifelse(is.na(y), 1, ifelse(y < kint-1, y+1, kint)) subs <- rep(1:length(y), reps) cuts <- vector('list',kint+2) cuts[[1]] <- NA for(j in 0:kint) cuts[[j+2]] <- if(j < kint-1) 0:j else 0:(kint-1) cuts <- unlist(cuts[ifelse(is.na(y),1,y+2)]) y <- rep(y, reps) Y <- 1*(y==cuts) labels <- c('all', paste(yname,'>=',ylevels[2:kint],sep='')) cohort <- factor(cuts, levels=0:(kint-1), labels=labels) list(y=Y, cohort=cohort, subs=subs, reps=reps) } rms/R/residuals.lrm.s0000644000176200001440000003252113662101075014230 0ustar liggesusersresiduals.lrm <- function(object, type=c("li.shepherd", "ordinary","score","score.binary","pearson", "deviance","pseudo.dep","partial", "dfbeta","dfbetas","dffit","dffits","hat","gof","lp1"), pl=FALSE, xlim, ylim, kint=1, label.curves=TRUE, which, ...) { gotsupsmu <- FALSE type <- match.arg(type) dopl <- (is.logical(pl) && pl) || is.character(pl) ylabpr <- NULL # y-axis label for partial residuals k <- object$non.slopes L <- object$linear.predictors isorm <- inherits(object, 'orm') trans <- object$trans cumprob <- if(length(trans)) trans$cumprob else plogis deriv <- if(length(trans)) trans$deriv else function(x, f) f * (1 - f) if(length(L) == 0) stop('you did not use linear.predictors=TRUE for the fit') if(kint < 1 | kint > k) stop(paste('kint must be from 1-', k, sep='')) cof <- object$coefficients ordone <- type %in% c('li.shepherd','partial','gof','score','score.binary') ## residuals explicitly handled for ordinal model if(ordone && !missing(kint)) stop('may not specify kint for li.shepherd, partial, score, score.binary, or gof') if(isorm) L <- L - cof[attr(L, 'intercepts')] + cof[1] if(k > 1 && kint != 1 && ! ordone) L <- L - cof[1] + cof[kint] P <- cumprob(L) if(length(Y <- object$y) == 0) stop("you did not specify y=TRUE in the fit") rnam <- names(Y) cnam <- names(cof) if(!is.factor(Y)) Y <- factor(Y) lev <- levels(Y) lev2 <- names(cof)[1:k] Y <- unclass(Y) - 1 if(! ordone && k > 1) Y <- Y >= kint if(k > 1 && missing(kint) && !ordone) warning(paste('using first intercept and ', lev2[kint], ' to compute residuals or test GOF', sep='')) if(type=="gof") { if(length(X <- object$x)==0) stop("you did not use x=TRUE in the fit") stats <- matrix(NA, nrow=k, ncol=5, dimnames=list(if(k > 1) lev2, c("Sum of squared errors", "Expected value|H0", "SD", "Z", "P"))) X <- cbind(1,X) for(j in 1:k) { y <- Y >= j p <- cumprob(L - cof[1] + cof[j]) sse <- sum((y - p)^2) wt <- p * (1 - p) d <- 1 - 2 * p z <- lm.wfit(X, d, wt, method='qr') ## res <- summary(lm.wfit(X, d, wt, method="qr"))$residuals 11Apr02 res <- z$residuals * sqrt(z$weights) sd <- sqrt(sum(res^2)) ev <- sum(wt) z <- (sse-ev)/sd P <- 2 * pnorm(- abs(z)) stats[j,] <- c(sse, ev, sd, z, P) } return(drop(stats)) } naa <- object$na.action if(type=="ordinary") return(naresid(naa, Y - cumprob(L))) if(type %in% c('score','score.binary','partial')) { nc <- length(cof) if(missing(which)) which <- if(type == 'score') 1:nc else 1:(nc - k) else if(type=='score') which <- k + which } if(type=='score' || type=='score.binary') plotit <- function(w, ylim, xlab, ylab, lev=names(w)) { statsum <- function(x) { n <- length(x) xbar <- sum(x) / n if(n < 2) {low <- hi <- NA} else { se <- 1.959964 * sqrt(sum((x - xbar)^2) / (n - 1) / n) low <- xbar - se; hi <- xbar + se } c(mean=xbar, lower=low, upper=hi) } k <- length(w) w <- lapply(w, statsum) plot(c(1,k), c(0,0), xlab=xlab, ylab=ylab, ylim=if(length(ylim)==0) range(unlist(w)) else ylim, type='n', axes=FALSE) mgp.axis(2) mgp.axis(1, at=1:k, labels=lev) abline(h=0, lty=2, lwd=1) ii <- 0 for(ww in w) { ii <- ii+1 points(ii, ww[1]) errbar(ii, ww[1], ww[3], ww[2], add=TRUE) } } if(type=='score.binary') { if(k==1) stop('score.binary only applies to ordinal models') if(!dopl) stop('score.binary only applies if you are plotting') if(!length(X <- unclass(object$x))) stop('you did not specify x=TRUE for the fit') xname <- dimnames(X)[[2]] yname <- as.character(formula(object))[2] for(i in which) { xi <- X[,i] r <- vector('list',k) names(r) <- lev[-1] for(j in 1:k) r[[j]] <- xi * ((Y >= j) - cumprob(L - cof[1] + cof[j])) if(pl!='boxplot') plotit(r, ylim=if(missing(ylim)) NULL else ylim, xlab=yname, ylab=xname[i]) else boxplot(r, varwidth=TRUE, notch=TRUE, err=-1, ylim=if(missing(ylim)) quantile(unlist(r), c(.1, .9)) else ylim, ...) title(xlab=yname, ylab=xname[i]) } invisible() } if(type=="score") { if(! length(X <- unclass(object$x))) stop("you did not specify x=TRUE for the fit") if(k == 1) return(naresid(naa, cbind(1, X) * (Y - P))) # only one intercept # z <- function(i, k, L, coef) # cumprob(coef[pmin(pmax(i, 1), k)] + L) ## Mainly need the pmax - 0 subscript will drop element from vector ## z$k <- k; z$L <- L-cof[1]; z$coef <- cof # formals(z) <- list(i=NA, k=k, L=L-cof[1], coef=cof) ## set defaults in fctn def'n u <- matrix(NA, nrow=length(L), ncol=length(which), dimnames=list(names(L), names(cof)[which])) ## Compute probabilities of being in observed cells # pc <- ifelse(Y==0, 1 - z(1), ifelse(Y == k, z(k), z(Y) - z(Y + 1)) ) xname <- dimnames(X)[[2]] yname <- as.character(formula(object))[2] ints <- c(1e100, cof[1 : k], -1e100) L <- L - cof[1] Ly <- L + ints[Y + 1] Ly1 <- L + ints[Y + 2] cumy <- cumprob(Ly) cumy1 <- cumprob(Ly1) ## Compute probabilities of being in observed cells # pc <- ifelse(Y==0, 1 - z(1), ifelse(Y == k, z(k), z(Y) - z(Y + 1)) ) pc <- cumy - cumy1 derivy <- deriv(Ly , cumy) derivy1 <- deriv(Ly1, cumy1) ii <- 0 for(i in which) { ii <- ii + 1 di <- if(i <= k) ifelse(Y == 0, i == 1, Y == i) else X[, i - k] di1 <- if(i <= k) ifelse(Y == k, 0, (Y + 1) == i) else X[, i - k] # di <- if(i <= k) ifelse(Y==0, if(i==1) 1 else 0, Y==i) else X[,i - k] # di1 <- if(i <= k) ifelse(Y==0 | Y==k, 0, (Y + 1) == i) else X[,i - k] ui <- (derivy * di - derivy1 * di1) / pc # ui <- ifelse(Y == 0, -z(1) * di, # ifelse(Y == k, (1 - z(k)) * di, # (deriv(L + ints[Y + 1L], z(Y)) * di - # deriv(L + ints[Y + 2L], z(Y + 1)) * di1) / pc ) ) u[,ii] <- ui if(dopl && i > k) { if(pl=='boxplot') { boxplot(split(ui, Y), varwidth=TRUE, notch=TRUE, names=lev, err=-1, ylim=if(missing(ylim)) quantile(ui, c(.1, .9)) else ylim, ...) title(xlab=yname, ylab=paste('Score Residual for', xname[i - k])) } else plotit(split(ui,Y), ylim=if(missing(ylim)) NULL else ylim, lev=lev, xlab=yname, ylab=paste('Score Residual for', xname[i-k])) } } return(if(dopl) invisible(naresid(naa, u)) else naresid(naa, u)) } if(type == "li.shepherd") { if(length(X <- object$x)==0) stop("you did not use x=TRUE in the fit") N <- length(Y) px <- 1 - cumprob(outer(cof[1:k], as.vector(X %*% cof[- (1:k)]), "+")) low.x = rbind(0, px)[cbind(Y + 1L, 1:N)] hi.x = 1 - rbind(px, 1)[cbind(Y + 1L, 1:N)] return(low.x - hi.x) } if(type=="pearson") return(naresid(naa, (Y - P) / sqrt(P * (1 - P)))) if(type=="deviance") { r <- ifelse(Y==0,-sqrt(2 * abs(log(1 - P))), sqrt(2 * abs(logb(P)))) return(naresid(naa, r)) } if(type=="pseudo.dep") { r <- L + (Y - P) / P / (1 - P) return(naresid(naa, r)) } if(type=="partial") { if(!length(X <- unclass(object$x))) stop("you did not specify x=TRUE in the fit") cof.int <- cof[1 : k] cof <- cof[- (1 : k)] if(!missing(which)) { X <- X[, which, drop=FALSE] cof <- cof[which] } atx <- attributes(X) dx <- atx$dim if(k==1) r <- X * matrix(cof, nrow=dx[1], ncol=dx[2], byrow=TRUE) + (Y - P) / P / (1 - P) else { r <- X * matrix(cof, nrow=dx[1], ncol=dx[2], byrow=TRUE) R <- array(NA, dim=c(dx, k), dimnames=c(atx$dimnames, list(lev2))) for(j in 1:k) { y <- Y >= j p <- cumprob(L - cof.int[1] + cof.int[j]) R[,,j] <- r + (y - p) / p / (1 - p) } } if(dopl) { xname <- atx$dimnames[[2]]; X <- unclass(X) for(i in 1:dx[2]) { if(pl == "loess") { if(k > 1) stop('pl="loess" not implemented for ordinal response') xi <- X[,i] ri <- r[,i] ddf <- data.frame(xi,ri) plot(xi, ri, xlim=if(missing(xlim)) range(xi) else xlim, ylim=if(missing(ylim)) range(ri) else ylim, xlab=xname[i], ylab=ylabpr) lines(lowess(xi,ri)) } else if(k==1) { xi <- X[,i]; ri <- r[,i] plot(xi, ri, xlab=xname[i], ylab="Partial Residual", xlim=if(missing(xlim))range(xi) else xlim, ylim=if(missing(ylim))range(ri) else ylim) if(pl=="lowess") lines(lowess(xi, ri, iter=0, ...)) else lines(supsmu(xi, ri, ...)) } else { xi <- X[,i] ri <- R[,i,,drop=TRUE] smoothed <- vector('list',k) ymin <- 1e30; ymax <- -1e30 for(j in 1:k) { w <- if(pl!='supsmu') lowess(xi, ri[,j], iter=0, ...) else supsmu(xi, ri[,j], ...) ymin <- min(ymin,w$y) ymax <- max(ymax,w$y) smoothed[[j]] <- w } plot(0, 0, xlab=xname[i], ylab=ylabpr, xlim=if(missing(xlim))range(xi) else xlim, ylim=if(missing(ylim))range(pretty(c(ymin,ymax))) else ylim, type='n') us <- par('usr')[1:2] for(j in 1:k) { w <- smoothed[[j]] lines(w, lty=j) if(is.character(label.curves)) { xcoord <- us[1]+(us[2]-us[1])*j/(k+1) text(xcoord, approx(w, xout=xcoord, rule=2, ties=mean)$y, lev2[j]) } } if(is.list(label.curves) || (is.logical(label.curves) && label.curves)) labcurve(smoothed, lev2, opts=label.curves) } } return(invisible(if(k==1)naresid(naa,r) else R)) } return(if(k==1) naresid(naa,r) else R) } ##if(type=='convexity') { ## if(missing(p.convexity)) { ## pq <- quantile(P, c(.01, .99)) ## if(pq[1]==pq[2]) pq <- range(P) ## p.convexity <- seq(pq[1], pq[2], length=100) ## } ## lcp <- length(p.convexity) ## cp <- single(lcp) ## for(i in 1:lcp) { ## p <- p.convexity[i] ## cp[i] <- mean(((p/P)^Y)*(((1-p)/(1-P))^(1-Y))) ## } ## if(pl) plot(p.convexity, cp, xlab='p', ylab='C(p)', type='l') ## return(invisible(cp)) ##} if(type %in% c("dfbeta", "dfbetas", "dffit", "dffits", "hat", "lp1")) { if(length(X <- unclass(object$x)) == 0) stop("you did not specify x=TRUE for the fit") v <- P * (1 - P) g <- lm(L + (Y - P) / v ~ X, weights=v) infl <- lm.influence(g) dfb <- coef(infl) ## R already computed differences dimnames(dfb) <- list(rnam, c(cnam[kint], cnam[-(1:k)])) if(type=="dfbeta") return(naresid(naa, dfb)) if(type=="dfbetas") { ## i <- c(kint, (k+1):length(cof)) vv <- vcov(object, intercepts=1) return(naresid(naa, sweep(dfb, 2, diag(vv)^.5,"/"))) ## was diag(object$var[i, i]) } if(type=="hat") return(naresid(naa, infl$hat)) if(type=="dffit") return(naresid(naa, (infl$hat * g$residuals)/(1 - infl$hat))) if(type=="dffits") return(naresid(naa, (infl$hat^.5)*g$residuals/(infl$sigma * (1 - infl$hat)) )) if(type=="lp1") return(naresid(naa, L - (infl$hat * g$residuals) / (1 - infl$hat))) } } residuals.orm <- function(object, type=c("li.shepherd", "ordinary","score","score.binary","pearson", "deviance","pseudo.dep","partial", "dfbeta","dfbetas","dffit","dffits","hat","gof","lp1"), pl=FALSE, xlim, ylim, kint, label.curves=TRUE, which, ...) { type <- match.arg(type) args <- list(object=object, type=type, pl=pl, label.curves=label.curves, ...) if(!missing(kint)) args$kint <- kint if(!missing(xlim)) args$xlim <- xlim if(!missing(ylim)) args$ylim <- ylim if(!missing(which)) args$which <- which do.call('residuals.lrm', args) } plot.lrm.partial <- function(..., labels, center=FALSE, ylim) { dotlist <- list(...) nfit <- length(dotlist) if(missing(labels)) labels <- (as.character(sys.call())[-1])[1:nfit] vname <- dimnames(dotlist[[1]]$x)[[2]] nv <- length(vname) if(nv==0) stop('you did not specify x=TRUE on the fit') r <- vector('list', nv) for(i in 1:nfit) r[[i]] <- resid(dotlist[[i]], 'partial') for(i in 1:nv) { curves <- vector('list',nfit) ymin <- 1e10; ymax <- -1e10 for(j in 1:nfit) { xx <- dotlist[[j]]$x[,vname[i]] yy <- r[[j]][,vname[i]] if(center)yy <- yy - mean(yy) curves[[j]] <- lowess(xx, yy, iter=0) ymin <- min(ymin, curves[[j]]$y) ymax <- max(ymax, curves[[j]]$y) } for(j in 1:nfit) { if(j==1) plot(curves[[1]], xlab=vname[i], ylab=NULL, ylim=if(missing(ylim)) c(ymin, ymax) else ylim, type='l') else lines(curves[[j]], lty=j) } if(nfit>1) labcurve(curves, labels) } invisible() } rms/R/latex.cph.s0000644000176200001440000000770113215527455013344 0ustar liggesuserslatex.cph <- function(object, title, file='', append=FALSE, surv=TRUE, maxt=FALSE, which=NULL, varnames, columns=65, inline=FALSE, before=if(inline)"" else "& &", after="", dec=3, pretrans=TRUE, caption=NULL, digits=.Options$digits, size='', ...) { md <- prType() %in% c('html', 'md', 'markdown') f <- object whichThere <- length(which) atr <- f$Design lev <- names(f$freq) Intercept <- -f$center strata <- levels(f$strata) ## was f$strata w <- if(length(caption)) { if(md) paste('
', caption, '
') else paste('\\begin{center} \\bf',caption,'\\end{center}') } if(!length(which) & !inline) { if(length(strata)==0) { w <- c(w,paste("\\[{\\rm Prob}\\{T\\geq t\\} = S_{0}(t)^{{\\textstyle e}^{X\\beta}}, {\\rm \\ \\ where} \\\\ \\]",sep="")) } else { sname <- atr$name[atr$assume.code==8] strata.sub <- letters[8+(1:length(sname))] s <- paste("{\\rm ",sname,"}=",strata.sub,sep="") s <- paste(s, collapse=",") w <- c(w,paste("\\[{\\rm Prob}\\{T\\geq t\\ |\\ ",s,"\\}=S_{", paste(strata.sub,collapse=""), "}(t)^{{\\textstyle e}^{X\\beta}}, {\\rm \\ \\ where} \\\\ \\]", sep="")) } } if(!length(which)) which <- 1:length(atr$name) if(missing(varnames)) varnames <- atr$name[atr$assume.code!=9] if(! md) cat(w, sep=if(length(w))"\n" else "", file=file, append=append) Z <- latexrms(f, file=file, append=TRUE, which=which, varnames=varnames, columns=columns, before=before, after=after, prefix=if(!whichThere)"X\\hat{\\beta}" else NULL, intercept=Intercept, inline=inline, pretrans=pretrans, digits=digits, size=size) if(md) Z <- c(paste0(w, '\n'), as.character(z)) if(inline) return(if(md) htmltools::HTML(Z) else Z) ss <- f$surv.summary if(surv && length(ss)) { fs <- levels(f$strata) # was f$strata nstrat <- 0; if(length(fs)) nstrat <- length(fs) times <- as.numeric(dimnames(ss)[[1]]) maxtime <- f$maxtime if(max(times)>=maxtime) maxt <- FALSE if(nstrat==0) { s <- matrix(ss[, , 1], ncol=1) if(maxt) { s <- cbind(s, f$surv[L <- length(f$surv)]) times <- c(times, f$time[L]) } dimnames(s) <- list(format(times), "$S_{0}(t)$") if(md) { z <- htmlTable::txtRound(s, digits=dec) z <- htmlTable::htmlTable(z, rowlabel='$t$', escape.html=FALSE, css.cell='min-width: 9em;') Z <- c(Z, as.character(z)) } else latex(s, file=file, append=TRUE, rowlabel="$t$", rowlabel.just="r", dec=dec, table.env=FALSE) } else { ## Change . to ,blank n <- sedit(paste(fs,',',sep=''), '.', ', ') ## Change sname=*, to *, n <- sedit(n, paste(sname,'=*,',sep=''), rep('*, ', length(sname))) n <- substring(n, 1, nchar(n) - sum(atr$assume.code == 8) - 1) s <- ss[, , 1] if(maxt) { smax <- rep(NA, nstrat) for(i in 1 : nstrat) smax[i] <- f$surv[[i]][abs(f$time[[i]]-maxtime) < 0.001] s <- rbind(s, smax) times <- c(times, maxtime) } dimnames(s) <- list(format(times), paste("$S_{", n, "}(t)$", sep="")) if(md) { z <- htmlTable::txtRound(s, digits=dec) Z <- c(Z, as.character( htmlTable::htmlTable(z, rowlabel='$t$', escape.html=FALSE, css.cell='min-width: 9em;'))) } else latex(s, file=file, append=TRUE, rowlabel="$t$", rowlabel.just="r", dec=dec, table.env=FALSE) } } if(md) htmltools::HTML(Z) } rms/R/pentrace.s0000644000176200001440000003051213105326322013237 0ustar liggesuserspentrace <- function(fit, penalty, penalty.matrix, method=c('grid', 'optimize'), which=c('aic.c', 'aic', 'bic'), target.df=NULL, fitter, pr=FALSE, tol=1e-7, keep.coef=FALSE, complex.more=TRUE, verbose=FALSE, maxit=12, subset, noaddzero=FALSE) { ## Need to check Strata for cph method <- match.arg(method) which <- match.arg(which) tdf <- length(target.df) if(tdf) method <- 'optimize' if(! length(X <- fit$x) || ! length(Y <- as.matrix(fit$y))) stop("you did not specify x=TRUE and y=TRUE in the fit") fit$x <- fit$y <- NULL ## if(length(pn <- fit$penalty) > 0 && max(unlist(pn)) != 0) ## warning('you did not specify penalty= in fit so that unpenalized model can be a candidate for the best model') sc.pres <- match("parms", names(fit), 0) > 0 fixed <- NULL dist <- fit$dist parms <- fit$parms clas <- class(fit)[1] isols <- clas=='ols' if(!(isols || inherits(fit, 'lrm'))) stop("at present pentrace only works for lrm or ols") if(missing(fitter)) fitter <- switch(clas, ols=function(x, y, maxit, ...) lm.pfit(x, y, ...), lrm=function(x, y, maxit=12, ...) lrm.fit(x, y, maxit=maxit, ...), cph=function(x, y, maxit=12, ...) coxphFit(x, y, strata=Strata, iter.max=maxit, eps=.0001, method="efron", toler.chol=tol), psm=function(x, y, maxit=12,...) survreg.fit2(x, y, dist=dist, parms=parms, fixed=fixed, offset=NULL, init=NULL, maxiter=maxit)) if(!length(fitter))stop("fitter not valid") Strata <- fit$strata if(!missing(subset)) { Y <- Y[subset,, drop=FALSE] X <- X[subset,, drop=FALSE] Strata <- Strata[subset, drop=FALSE] # NULL[] is NULL } n <- nrow(Y) atr <- fit$Design if(missing(penalty.matrix)) penalty.matrix <- Penalty.matrix(atr, X) obj.best <- -1e10 ns <- num.intercepts(fit) islist <- is.list(penalty) if(islist) { penalty <- expand.grid(penalty) if(complex.more && ncol(penalty) > 1 && nrow(penalty) > 1) { ikeep <- NULL for(i in 1:nrow(penalty)) { ok <- TRUE peni <- penalty[i,] for(j in 2:length(peni)) if(peni[[j]] < peni[[j-1]]) ok <- FALSE if(ok) ikeep <- c(ikeep, i) } penalty <- penalty[ikeep,,drop=FALSE] } np <- nrow(penalty) } else { if(method == 'grid' && ! noaddzero) penalty <- c(0, penalty[penalty > 0]) np <- length(penalty) } if(method=='optimize') { stop('method="optimize" not yet implemented in R') if((islist && nrow(penalty) > 1) || (!islist && length(penalty) > 1)) stop('may not specify multiple potential penalties when method="optimize"') objective <- function(pen, X, Y, z) { ##Problem with sending so many auxiliary parameters to nlminb - ##nlminb's internal parameters got shifted somehow n <- z$n; penalty.matrix <- z$penalty.matrix; pennames <- z$pennames isols <- z$isols; islist <- z$islist; tol <- z$tol; maxit <- z$maxit ns <- z$ns; fitter <- z$fitter; pr <- z$pr; atr <- z$atr; tdf <- length(z$target.df) if(length(pen) > 1) { pen <- structure(as.list(pen), names=pennames) penfact <- Penalty.setup(atr, pen)$multiplier } else penfact <- pen if(length(penfact)==1 || !islist) pm <- penfact*penalty.matrix else { a <- diag(sqrt(penfact)) pm <- a %*% penalty.matrix %*% a } f <- fitter(X, Y, penalty.matrix=pm, tol=tol, maxit=maxit) if(length(f$fail) && f$fail) stop('fitter failed. Try changing maxit or tol') if(isols) { ## ols (from lm.pfit) already stored correct LR chisq and effective df stats <- f$stats df <- stats['d.f.'] lr <- stats['Model L.R.'] dag <- f$effective.df.diagonal } else { v <- f$var #Later: vcov(f) f.nopenalty <- fitter(X, Y, initial=f$coef, maxit=1, tol=tol) if(length(f.nopenalty$fail) && f.nopenalty$fail) stop('fitter failed. Try changing tol') info.matrix.unpenalized <- if(length(f.nopenalty$info.matrix)) f.nopenalty$info.matrix else solvet(f.nopenalty$var, tol=tol) # -> vcov dag <- diag(info.matrix.unpenalized %*% v) df <- if(ns==0)sum(dag) else sum(dag[-(1:ns)]) lr <- f.nopenalty$stats["Model L.R."] } obj <- switch(z$which, aic.c <- lr - 2*df*(1 + (df + 1) / (n - df - 1)), aic <- lr - 2 * df, bic <- lr - df * logb(n)) if(tdf) obj <- abs(df - z$target.df) if(pr) { w <- if(tdf) df else obj names(w) <- NULL pp <- if(islist) unlist(pen) else c(Penalty=pen) print(c(pp, Objective=w)) } if(!tdf) obj <- -obj else attr(obj,'df') <- df obj } res <- nlminb(unlist(penalty), objective, lower=0, X=X, Y=Y, z=list(n=n, penalty.matrix=penalty.matrix, pennames=names(penalty), isols=isols, islist=islist, tol=tol, maxit=maxit, ns=ns, fitter=fitter, atr=atr, pr=pr, which=which, target.df=target.df), control=list(abs.tol=.00001, rel.tol=if(tdf)1e-6 else .00001)) return(list(penalty=if(islist) structure(as.list(res$parameters),names=names(penalty)) else res$parameters, objective=if(tdf)res$aux$df else -res$objective)) } df <- aic <- bic <- aic.c <- if(islist) double(length(penalty[[1]])) else double(length(penalty)) for(i in 1 : np) { if(islist) { pen <- penalty[i,] penfact <- Penalty.setup(atr, pen)$multiplier } else { pen <- penalty[i] penfact <- pen } unpenalized <- all(penfact==0) if(i==1) Coef <- if(keep.coef) matrix(NA,ncol=length(fit$coef),nrow=np) else NULL if(unpenalized) f <- fit else { if(length(penfact) == 1 || !islist) pm <- penfact * penalty.matrix else { a <- diag(sqrt(penfact)) pm <- a %*% penalty.matrix %*% a } f <- fitter(X, Y, penalty.matrix=pm, tol=tol, maxit=maxit) if(length(f$fail) && f$fail) stop('fitter failed. Try changing maxit or tol') } if(keep.coef) Coef[i,] <- f$coef if(unpenalized || isols) { ## ols (from lm.pfit) already stored correct LR chisq and effective df stats <- f$stats df[i] <- stats['d.f.'] lr <- stats['Model L.R.'] dag <- if(unpenalized) rep(1, length(df[i])) else f$effective.df.diagonal } else { v <- f$var #Later: vcov(f, regcoef.only=T) f.nopenalty <- fitter(X, Y, initial=f$coef, maxit=1, tol=tol) if(length(f.nopenalty$fail) && f.nopenalty$fail) stop('fitter failed. Try changing tol') info.matrix.unpenalized <- if(length(f.nopenalty$info.matrix)) f.nopenalty$info.matrix else solvet(f.nopenalty$var, tol=tol) # -> vcov dag <- diag(info.matrix.unpenalized %*% v) df[i] <- if(ns == 0)sum(dag) else sum(dag[- (1 : ns)]) lr <- f.nopenalty$stats["Model L.R."] if(verbose) { cat('non slopes',ns,'\neffective.df.diagonal:\n') print(dag) } } aic[i] <- lr - 2 * df[i] bic[i] <- lr - df[i] * logb(n) aic.c[i] <- lr - 2 * df[i] * (1 + (df[i] + 1) / (n - df[i] - 1)) obj <- switch(which, aic.c=aic.c[i], aic=aic[i], bic=bic[i]) if(obj > obj.best) { pen.best <- pen df.best <- df[i] obj.best <- obj f.best <- f var.adj.best <- if(unpenalized || isols) f$var else v %*% info.matrix.unpenalized %*% v diag.best <- dag } if(pr) { d <- if(islist) as.data.frame(pen, row.names='') else data.frame(penalty=pen, row.names='') d$df <- df[i] d$aic <- aic[i] d$bic <- bic[i] d$aic.c <- aic.c[i] cat('\n'); print(d) } } mat <- if(islist) as.data.frame(penalty) else data.frame(penalty=penalty) mat$df <- df mat$aic <- aic mat$bic <- bic mat$aic.c <- aic.c structure(list(penalty=pen.best, df=df.best, objective=obj.best, fit=f.best, var.adj=var.adj.best, diag=diag.best, results.all=mat, Coefficients=Coef), class="pentrace") } plot.pentrace <- function(x, method=c('points', 'image'), which=c('effective.df', 'aic', 'aic.c', 'bic'), pch=2, add=FALSE, ylim, ...) { method <- match.arg(method) x <- x$results.all penalty <- x[[1]] effective.df <- x$df aic <- x$aic bic <- x$bic aic.c <- x$aic.c if(length(x) == 5) { ## only one variable given to penalty= if('effective.df' %in% which) { if(add) lines(penalty, effective.df) else plot(penalty, effective.df, xlab="Penalty", ylab="Effective d.f.", type="l", ...) if(length(which) == 1) return(invisible()) } if(!add) plot(penalty, aic, ylim=if(missing(ylim)) range(c(aic, bic)) else ylim, xlab="Penalty", ylab=expression(paste("Information Criterion (", chi^2, " scale)")), type=if('aic' %in% which)"l" else "n", lty=3, ...) else if('aic' %in% which) lines(penalty, aic, lty=3, ...) if('bic' %in% which) lines(penalty, bic, lty=2, ...) if('aic.c' %in% which) lines(penalty, aic.c, lty=1, ...) if(!add && length(setdiff(which, 'effective.df')) > 1) title(sub=paste(if('aic.c' %in% which) "Solid: AIC_c", if('aic' %in% which) "Dotted: AIC", if('bic' %in% which) "Dashed: BIC",sep=' '), adj=0,cex=.75) return(invisible()) } ## At least two penalty factors if(add) stop('add=TRUE not implemented for >=2 penalty factors') X1 <- x[[1]] X2 <- x[[2]] nam <- names(x) x1 <- sort(unique(X1)) x2 <- sort(unique(X2)) n1 <- length(x1) n2 <- length(x2) aic.r <- rank(aic); aic.r <- aic.r/max(aic.r) if(method=='points') { plot(0, 0, xlim=c(1,n1), ylim=c(1,n2), xlab=nam[1], ylab=nam[2], type='n', axes=FALSE, ...) mgp.axis(1, at=1:n1, labels=format(x1)) mgp.axis(2, at=1:n2, labels=format(x2)) ix <- match(X1, x1) iy <- match(X2, x2) for(i in 1:length(aic)) points(ix[i], iy[i], pch=pch, cex=(.1+aic.r[i])*3) return(invisible(aic.r)) } z <- matrix(NA,nrow=n1,ncol=n2) for(i in 1:n1) for(j in 1:n2) z[i,j] <- aic.r[X1==x1[i] & X2==x2[j]] image(x1, x2, z, xlab=nam[1], ylab=nam[2], zlim=range(aic.r), ...) invisible(aic.r) } print.pentrace <- function(x, ...) { cat('\nBest penalty:\n\n') pen <- if(is.list(x$penalty)) as.data.frame(x$penalty,row.names='') else data.frame(penalty=x$penalty, row.names='') pen$df <- x$df pen$aic <- x$aic print(pen) cat('\n') if(is.data.frame(x$results.all)) print(x$results.all, row.names=FALSE) else print(as.data.frame(x$results.all,), row.names=FALSE) # row.names=rep('',length(x$results.all[[1]])))) invisible() } effective.df <- function(fit, object) { atr <- fit$Design dag <- if(missing(object)) fit$effective.df.diagonal else object$diag if(length(dag)==0) stop('object not given or fit was not penalized') ia.or.nonlin <- param.order(atr, 2) nonlin <- param.order(atr, 3) ia <- param.order(atr, 4) ia.nonlin <- param.order(atr, 5) ns <- num.intercepts(fit) if(ns > 0) dag <- dag[-(1:ns)] z <- rbind(c(length(dag), sum(dag)), c(sum(!ia.or.nonlin), sum(dag[!ia.or.nonlin])), c(sum(ia.or.nonlin), sum(dag[ia.or.nonlin])), c(sum(nonlin), sum(dag[nonlin])), c(sum(ia), sum(dag[ia])), c(sum(ia.nonlin), sum(dag[ia.nonlin]))) dimnames(z) <- list(c('All','Simple Terms','Interaction or Nonlinear', 'Nonlinear', 'Interaction','Nonlinear Interaction'), c('Original','Penalized')) cat('\nOriginal and Effective Degrees of Freedom\n\n') print(round(z,2)) invisible(z) } rms/R/cph.s0000644000176200001440000005160313734135020012215 0ustar liggesusers## This is a modification of the R survival package's coxph function ## written by Terry Therneau and ported to R by Thomas Lumley cph <- function(formula = formula(data), data = environment(formula), weights, subset, na.action = na.delete, method =c("efron", "breslow", "exact", "model.frame", "model.matrix"), singular.ok = FALSE, robust = FALSE, model = FALSE, x = FALSE, y = FALSE, se.fit = FALSE, linear.predictors = TRUE, residuals = TRUE, nonames = FALSE, eps = 1e-4, init, iter.max = 10, tol = 1e-9, surv = FALSE, time.inc, type = NULL, vartype = NULL, debug = FALSE, ...) { method <- match.arg(method) call <- match.call() if (! inherits(formula,"formula")) { ## I allow a formula with no right hand side ## The dummy function stops an annoying warning message "Looking for ## 'formula' of mode function, ignored one of mode ..." if (inherits(formula, "Surv")) { xx <- function(x) formula(x) formula <- xx(paste(deparse(substitute(formula)), 1, sep="~")) } else stop("Invalid formula") } callenv <- parent.frame() # don't delay these evaluations weights <- if(! missing(weights)) eval(substitute(weights), data, callenv) subset <- if(! missing(subset )) eval(substitute(subset), data, callenv) data <- modelData(data, formula, weights=weights, subset=subset, na.action=na.action, dotexpand=FALSE, callenv=callenv) nstrata <- 0 Strata <- NULL odb <- .Options$debug if(length(odb) && is.logical(odb) && odb) debug <- TRUE if(length(z <- attr(terms(formula, allowDotAsName=TRUE), "term.labels")) > 0 && any(z !=".")) { #X's present X <- Design(data, formula, specials=c('strat', 'strata')) atrx <- attributes(X) atr <- atrx$Design nact <- atrx$na.action sformula <- atrx$sformula mmcolnames <- atr$mmcolnames if(method == "model.frame") return(X) Terms <- terms(sformula, specials=c('strat', 'strata'), data=data) asm <- atr$assume.code name <- atr$name specials <- attr(Terms, 'specials') if(length(specials$strata)) stop('cph supports strat(), not strata()') stra <- specials$strat cluster <- attr(X, 'cluster') if(length(cluster)) { if(missing(robust)) robust <- TRUE attr(X, 'cluster') <- NULL } Terms.ns <- Terms if(length(stra)) { temp <- untangle.specials(Terms.ns, "strat", 1) Terms.ns <- Terms.ns[- temp$terms] #uses [.terms function Strata <- list() strataname <- attr(Terms, 'term.labels')[stra - 1] j <- 0 for(i in (1 : length(asm))[asm == 8]) { nstrata <- nstrata + 1 xi <- X[[i + 1]] levels(xi) <- paste(name[i], "=", levels(xi), sep="") Strata[[nstrata]] <- xi } Strata <- interaction(as.data.frame(Strata), drop=TRUE) } xpres <- length(asm) && any(asm != 8) Y <- model.extract(X, 'response') if(! inherits(Y, "Surv")) stop("response variable should be a Surv object") n <- nrow(Y) weights <- model.extract(X, 'weights') offset <- attr(X, 'offset') ## Cox ph fitter routines expect null if no offset ##No mf if only strata factors if(! xpres) { X <- matrix(nrow=0, ncol=0) assign <- NULL } else { X <- model.matrix(sformula, X) ## Handle special case where model was fitted using previous fit$x alt <- attr(mmcolnames, 'alt') if(debug) { print(cbind('colnames(X)'=colnames(X)[-1], mmcolnames=mmcolnames, 'Design colnames'=atr$colnames, alt=alt)) } # prn(colnames(X)); prn(mmcolnames); prn(alt)} if(! all(mmcolnames %in% colnames(X)) && length(alt)) mmcolnames <- alt X <- X[, mmcolnames, drop=FALSE] assign <- attr(X, "assign") assign[[1]] <- NULL # remove intercept position, renumber } nullmod <- FALSE } else { ## model with no right-hand side X <- NULL Y <- data[[1]] sformula <- formula mmcolnames <- '' weights <- if('(weights)' %in% names(data)) data[['(weights)']] atr <- atrx <- NULL Terms <- terms(formula, allowDotAsName=TRUE) if(! inherits(Y, "Surv")) stop("response variable should be a Surv object") Y <- Y[! is.na(Y)] assign <- NULL xpres <- FALSE nullmod <- TRUE nact <- NULL } ny <- ncol(Y) maxtime <- max(Y[, ny - 1]) rnam <- if(! nonames) dimnames(Y)[[1]] if(xpres) dimnames(X) <- list(rnam, atr$colnames) if(method == "model.matrix") return(X) time.units <- units(Y) if(! length(time.units) || time.units == '') time.units <- "Day" if(missing(time.inc)) { time.inc <- switch(time.units, Day = 30, Month = 1, Year = 1, maxtime / 10) if(time.inc >= maxtime | maxtime / time.inc > 25) time.inc <- max(pretty(c(0, maxtime))) / 10 } ytype <- attr(Y, 'type') if(nullmod) f <- NULL else { fitter <- if( method == "breslow" || method == "efron") { if (ytype == 'right') coxph.fit else agreg.fit } else if (method == 'exact') { if(ytype == 'right') getFromNamespace('coxexact.fit', 'survival') else agexact.fit } else stop(paste ("Unknown method", method)) if (missing(init)) init <- NULL f <- fitter(X, Y, strata=Strata, offset=offset, weights=weights, init=init, method=method, rownames=rnam, control=coxph.control(eps=eps, toler.chol=tol, toler.inf=1, iter.max=iter.max)) } if (is.character(f)) { cat("Failure in cph:\n", f, "\n") return(structure(list(fail=TRUE), class="cph")) } else { if(length(f$coefficients) && any(is.na(f$coefficients))) { vars <- names(f$coefficients)[is.na(f$coefficients)] msg <- paste("X matrix deemed to be singular; variable", paste(vars, collapse=" ")) if(singular.ok) warning(msg) else { cat(msg,"\n") return(structure(list(fail=TRUE), class="cph")) } } } f$terms <- Terms f$sformula <- sformula f$mmcolnames <- mmcolnames if(robust) { f$naive.var <- f$var ## Terry gets a little tricky here, calling resid before adding ## na.action method to avoid re-inserting NAs. Also makes sure ## X and Y are there if(! length(cluster)) cluster <- FALSE fit2 <- c(f, list(x=X, y=Y, weights=weights, method=method)) if(length(stra)) fit2$strata <- Strata r <- getS3method('residuals', 'coxph')(fit2, type='dfbeta', collapse=cluster, weighted=TRUE) f$var <- t(r) %*% r } nvar <- length(f$coefficients) ev <- factor(Y[, ny], levels=0 : 1, labels=c("No Event", "Event")) n.table <- { if(! length(Strata)) table(ev, dnn='Status') else table(Strata, ev, dnn=c('Stratum', 'Status')) } f$n <- n.table nevent <- sum(Y[, ny]) if(xpres) { logtest <- -2 * (f$loglik[1] - f$loglik[2]) R2.max <- 1 - exp(2 * f$loglik[1] / n) R2 <- (1 - exp(- logtest / n)) / R2.max P <- 1 - pchisq(logtest,nvar) gindex <- GiniMd(f$linear.predictors) dxy <- dxy.cens(f$linear.predictors, Y, type='hazard')['Dxy'] stats <- c(n, nevent, logtest, nvar, P, f$score, 1-pchisq(f$score,nvar), R2, dxy, gindex, exp(gindex)) names(stats) <- c("Obs", "Events", "Model L.R.", "d.f.", "P", "Score", "Score P", "R2", "Dxy", "g", "gr") } else { stats <- c(n, nevent) names(stats) <- c("Obs", "Events") } f$method <- NULL if(xpres) dimnames(f$var) <- list(atr$colnames, atr$colnames) f <- c(f, list(call=call, Design=atr, assign=DesignAssign(atr, 0, atrx$terms), na.action=nact, fail = FALSE, non.slopes = 0, stats = stats, method=method, maxtime = maxtime, time.inc = time.inc, units = time.units)) if(xpres) { f$center <- sum(f$means * f$coefficients) f$scale.pred <- c("log Relative Hazard", "Hazard Ratio") attr(f$linear.predictors,"strata") <- Strata names(f$linear.predictors) <- rnam if(se.fit) { XX <- X - rep(f$means, rep.int(n, nvar)) # see scale() function ## XX <- sweep(X, 2, f$means) # center (slower;so is scale) se.fit <- drop(((XX %*% f$var) * XX) %*% rep(1,ncol(XX)))^.5 names(se.fit) <- rnam f$se.fit <- se.fit } } if(model) f$model <- data if(is.character(surv) || surv) { if(length(Strata)) { iStrata <- as.character(Strata) slev <- levels(Strata) nstr <- length(slev) } else nstr <- 1 srv <- NULL tim <- NULL s.e. <- NULL timepts <- seq(0, maxtime, by=time.inc) s.sum <- array(double(1), c(length(timepts), nstr, 3), list(format(timepts), paste("Stratum", 1 : nstr), c("Survival", "n.risk", "std.err"))) g <- list(n=sum(f$n), coefficients=f$coefficients, linear.predictors=f$linear.predictors, method=f$method, type=type, means=f$means, var=f$var, x=X, y=Y, strata=Strata, offset=offset, weights=weights, terms=Terms, call=call) g <- survfit.cph(g, se.fit=is.character(surv) || surv, type=type, vartype=vartype, conf.type='log') strt <- if(nstr > 1) rep(names(g$strata), g$strata) for(k in 1 : nstr) { j <- if(nstr == 1) TRUE else strt == slev[k] yy <- Y[if(nstr == 1) TRUE else iStrata == slev[k], ny - 1] maxt <- max(yy) ##n.risk from surv.fit does not have usual meaning if not Kaplan-Meier tt <- c(0, g$time[j]) su <- c(1, g$surv[j]) se <- c(NA, g$std.err[j]) if(maxt > tt[length(tt)]) { tt <- c(tt, maxt) su <- c(su, su[length(su)]) se <- c(se, NA) } kk <- 0 for(tp in timepts) { kk <- kk + 1 t.choice <- max((1 : length(tt))[tt <= tp+1e-6]) if(tp > max(tt) + 1e-6 & su[length(su)] > 0) { Su <- NA Se <- NA } else { Su <- su[t.choice] Se <- se[t.choice] } n.risk <- sum(yy >= tp) s.sum[kk, k, 1 : 3] <- c(Su, n.risk, Se) } if(! is.character(surv)) { if(nstr == 1) { tim <- tt srv <- su s.e. <- se } else { tim <- c(tim, list(tt)) srv <- c(srv, list(su)) s.e. <- c(s.e., list(se)) } } } if(is.character(surv)) f$surv.summary <- s.sum else { if(nstr > 1) { names(srv) <- names(tim) <- names(s.e.) <- levels(Strata) ### } f <- c(f, list(time=tim, surv=srv, std.err=s.e., surv.summary=s.sum)) } } f$strata <- Strata ### was $Strata if(x) f$x <- X if(y) f$y <- Y f$weights <- weights f$offset <- offset if(! linear.predictors) f$linear.predictors <- NULL if(! residuals ) f$residuals <- NULL class(f) <- c("cph", "rms", "coxph") f } coxphFit <- function(..., method, strata=NULL, rownames=NULL, offset=NULL, init=NULL, toler.chol=1e-9, eps=.0001, iter.max=10, type) { fitter <- if( method == "breslow" || method == "efron") { if (type == 'right') coxph.fit else agreg.fit } else if (method == 'exact') { if(type == 'right') getFromNamespace('coxexact.fit', 'survival') else agexact.fit } else stop("Unkown method ", method) res <- fitter(..., strata=strata, rownames=rownames, offset=offset, init=init, method=method, control=coxph.control(toler.chol=toler.chol, toler.inf=1, eps=eps, iter.max=iter.max)) if(is.character(res)) return(list(fail=TRUE)) if(iter.max > 1 && res$iter >= iter.max) return(list(fail=TRUE)) res$fail <- FALSE res } Survival.cph <- function(object, ...) { if(! length(object$time) || ! length(object$surv)) stop("did not specify surv=T with cph") f <- function(times, lp=0, stratum=1, type=c("step","polygon"), time, surv) { type <- match.arg(type) if(length(stratum) > 1) stop("does not handle vector stratum") if(length(times) == 0) { if(length(lp) > 1) stop("lp must be of length 1 if times=NULL") return(surv[[stratum]] ^ exp(lp)) } s <- matrix(NA, nrow=length(lp), ncol=length(times), dimnames=list(names(lp), format(times))) if(is.list(time)) {time <- time[[stratum]]; surv <- surv[[stratum]]} if(type == "polygon") { if(length(lp) > 1 && length(times) > 1) stop('may not have length(lp)>1 & length(times>1) when type="polygon"') su <- approx(time, surv, times, ties=mean)$y return(su ^ exp(lp)) } for(i in 1 : length(times)) { tm <- max((1 : length(time))[time <= times[i] + 1e-6]) su <- surv[tm] if(times[i] > max(time) + 1e-6) su <- NA s[,i] <- su ^ exp(lp) } drop(s) } formals(f) <- list(times=NULL, lp=0, stratum=1, type=c("step","polygon"), time=object$time, surv=object$surv) f } Quantile.cph <- function(object, ...) { if(! length(object$time) || ! length(object$surv)) stop("did not specify surv=T with cph") f <- function(q=.5, lp=0, stratum=1, type=c("step","polygon"), time, surv) { type <- match.arg(type) if(length(stratum)>1) stop("does not handle vector stratum") if(is.list(time)) {time <- time[[stratum]]; surv <- surv[[stratum]]} Q <- matrix(NA, nrow=length(lp), ncol=length(q), dimnames=list(names(lp), format(q))) for(j in 1 : length(lp)) { s <- surv^exp(lp[j]) if(type == "polygon") Q[j,] <- approx(s, time, q, ties=mean)$y else for(i in 1 : length(q)) if(any(s <= q[i])) Q[j,i] <- min(time[s <= q[i]]) #is NA if none } drop(Q) } formals(f) <- list(q=.5, lp=0, stratum=1, type=c('step','polygon'), time=object$time, surv=object$surv) f } Mean.cph <- function(object, method=c("exact","approximate"), type=c("step","polygon"), n=75, tmax=NULL, ...) { method <- match.arg(method) type <- match.arg(type) if(! length(object$time) || ! length(object$surv)) stop("did not specify surv=TRUE with cph") if(method == "exact") { f <- function(lp=0, stratum=1, type=c("step","polygon"), tmax=NULL, time, surv) { type <- match.arg(type) if(length(stratum) > 1) stop("does not handle vector stratum") if(is.list(time)) {time <- time[[stratum]]; surv <- surv[[stratum]]} Q <- lp if(! length(tmax)) { if(min(surv) > 1e-3) warning(paste("Computing mean when survival curve only defined down to", format(min(surv)), "\n Mean is only a lower limit")) k <- rep(TRUE, length(time)) } else { if(tmax > max(time)) stop(paste("tmax=", format(tmax), "> max follow-up time=", format(max(time)))) k <- (1 : length(time))[time <= tmax] } for(j in 1 : length(lp)) { s <- surv ^ exp(lp[j]) Q[j] <- if(type == "step") sum(c(diff(time[k]), 0) * s[k]) else trap.rule(time[k], s[k]) } Q } formals(f) <- list(lp=0, stratum=1, type=c("step","polygon"), tmax=tmax, time=object$time, surv=object$surv) return(f) } else { lp <- object$linear.predictors lp.seq <- if(length(lp)) lp.seq <- seq(min(lp), max(lp), length=n) else 0 time <- object$time surv <- object$surv nstrat <- if(is.list(time)) length(time) else 1 areas <- list() for(is in 1 : nstrat) { tim <- if(nstrat == 1) time else time[[is]] srv <- if(nstrat == 1) surv else surv[[is]] if(! length(tmax)) { if(min(srv) > 1e-3) warning(paste("Computing mean when survival curve only defined down to", format(min(srv)), "\n Mean is only a lower limit")) k <- rep(TRUE, length(tim)) } else { if(tmax > max(tim)) stop(paste("tmax=",format(tmax), "> max follow-up time=", format(max(tim)))) k <- (1 : length(tim))[tim <= tmax] } ymean <- lp.seq for(j in 1 : length(lp.seq)) { s <- srv ^ exp(lp.seq[j]) ymean[j] <- if(type == "step") sum(c(diff(tim[k]),0) * s[k]) else trap.rule(tim[k], s[k]) } areas[[is]] <- ymean } if(nstrat > 1) names(areas) <- names(time) ff <- function(lp=0, stratum=1, lp.seq, areas) { if(length(stratum) > 1) stop("does not handle vector stratum") area <- areas[[stratum]] if(length(lp.seq) == 1 && all(lp == lp.seq)) ymean <- rep(area, length(lp)) else ymean <- approx(lp.seq, area, xout=lp, ties=mean)$y if(any(is.na(ymean))) warning("means requested for linear predictor values outside range of linear\npredictor values in original fit") names(ymean) <- names(lp) ymean } formals(ff) <- list(lp=0, stratum=1, lp.seq=lp.seq, areas=areas) } ff } predict.cph <- function(object, newdata=NULL, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", ...) { type <- match.arg(type) predictrms(object, newdata, type, se.fit, conf.int, conf.type, kint, na.action, expand.na, center.terms, ...) } print.cph <- function(x, digits=4, table=TRUE, conf.int=FALSE, coefs=TRUE, title='Cox Proportional Hazards Model', ...) { k <- 0 z <- list() if(length(zz <- x$na.action)) { k <- k + 1 z[[k]] <- list(type=paste('naprint', class(zz)[1], sep='.'), list(zz)) } if(table && length(x$n) && is.matrix(x$n)) { k <- k + 1 z[[k]] <- list(type='print', list(x$n)) } if(length(x$coef)) { stats <- x$stats ci <- x$clusterInfo misc <- reListclean(Obs =stats['Obs'], Events=stats['Events'], 'Cluster on' = ci$name, Clusters = ci$n, Center = round(x$center, digits)) lr <- reListclean('LR chi2' = stats['Model L.R.'], 'd.f.' = stats['d.f.'], 'Pr(> chi2)' = stats['P'], 'Score chi2' = stats['Score'], 'Pr(> chi2)' = stats['Score P']) disc <- reListclean(R2 = stats['R2'], Dxy = stats['Dxy'], g = stats['g'], gr = stats['gr']) k <- k + 1 headings <- c('', 'Model Tests', 'Discrimination\nIndexes') data <- list(misc, c(lr, c(2,NA,4,2,4)), c(disc, 3)) z[[k]] <- list(type='stats', list(headings=headings, data=data)) beta <- x$coef se <- sqrt(diag(x$var)) k <- k + 1 z[[k]] <- list(type='coefmatrix', list(coef = x$coef, se = sqrt(diag(x$var)))) if(conf.int) { zcrit <- qnorm((1 + conf.int)/2) tmp <- cbind(exp(beta), exp( - beta), exp(beta - zcrit * se), exp(beta + zcrit * se)) dimnames(tmp) <- list(names(beta), c("exp(coef)", "exp(-coef)", paste("lower .", round(100 * conf.int, 2), sep = ""), paste("upper .", round(100 * conf.int, 2), sep = ""))) k <- k + 1 z[[k]] <- list(type='print', list(tmp, digits=digits)) } } prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } rms/R/Predict.s0000644000176200001440000003242414024430465013041 0ustar liggesusersPredict <- function(object, ..., fun=NULL, funint=TRUE, type=c("predictions","model.frame","x"), np=200, conf.int=.95, conf.type=c('mean', 'individual', 'simultaneous'), usebootcoef=TRUE, boot.type=c('percentile', 'bca', 'basic'), posterior.summary=c('mean', 'median', 'mode'), adj.zero=FALSE, ref.zero=FALSE, kint=NULL, ycut=NULL, time=NULL, loglog=FALSE, digits=4, name, factors=NULL, offset=NULL) { fit <- object type <- match.arg(type) conf.type <- match.arg(conf.type) boot.type <- match.arg(boot.type) posterior.summary <- match.arg(posterior.summary) draws <- fit$draws bayes <- length(draws) > 0 if(bayes && conf.type == 'simultaneous') stop('conf.type simultaneous does not work for Bayesian models') oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) cl <- class(fit) isblrm <- 'blrm' %in% cl isorm <- 'orm' %in% cl islrm <- 'lrm' %in% cl Center <- 0. if('cph' %in% cl) Center <- fit$center kintgiven <- length(kint) > 0 if(! length(kint)) { kint <- fit$interceptRef if(! length(kint)) kint <- 1 } pppo <- fit$pppo if(! isblrm) pppo <- 0 partialpo <- pppo > 0L if(isblrm) cppo <- fit$cppo if(partialpo && ! length(cppo)) stop('only implemented for constrained partial PO models') ylevels <- if(isblrm) fit$ylevels else fit$yunique if(islrm || isorm || isblrm) { if(kintgiven && ! length(ycut)) ycut <- ylevels[kint + 1] if(length(ycut) && ! kintgiven) kint <- if(all.is.numeric(ylevels)) which(ylevels == ycut) - 1 else max((1 : length(ylevels))[ylevels <= ycut]) - 1 } Pred <- function(type='lp') { if(type == 'x') { if(isblrm) predict(fit, settings, type='x', kint=kint, ycut=ycut, zcppo=FALSE) else predictrms(fit, settings, type='x') } else { if(isblrm) predict(fit, settings, type='lp', fun=fun, funint=funint, kint=kint, ycut=ycut, posterior.summary=posterior.summary, cint=conf.int) else predictrms(fit, settings, kint=kint, ref.zero=ref.zero, type='lp', conf.int=conf.int, conf.type=conf.type) } } dotlist <- if(length(factors)) factors else rmsArgs(substitute(list(...))) fname <- if(missing(name)) '' else name at <- fit$Design assume <- at$assume.code name <- at$name ##interactions are placed at end by design ioff <- attr(fit$terms, 'offset') if(length(ioff)) { offsetExpression <- rownames(attr(fit$terms, 'factors'))[ioff] offsetVariableName <- all.vars(as.formula(paste('~', offsetExpression))) if(! length(offset)) stop('model has offset term but offset=list(...) not given to Predict') if(length(offset) > 1) stop('offset may only contain one variable') if(length(offset[[1]]) != 1) stop('offset variable must contain 1 value') if(names(offset) != offsetVariableName) stop(paste('offset does not have correct variable name (', offsetVariableName, ')', sep='')) } if('time' %in% name) { dotlist$time <- time time <- NULL } if(length(fname) > 1 || (length(dotlist) == 0 && fname == '')) { m <- match.call(expand.dots=FALSE) m[[1]] <- as.name('Predict') nams <- if(length(fname) > 1) fname else name[assume != 9] res <- vector('list', length(nams)) names(res) <- nams i <- 0L info <- NULL # handles case where nams is empty, when no predictors ## For each predictor that "move" call Predict separately, and rbind results callenv <- parent.frame() for(nam in nams) { i <- i + 1L m$name <- nam lv <- eval(m, callenv) j <- attr(lv, 'info') if(i == 1L) info <- j else { info$varying <- c(info$varying, j$varying) info$adjust <- c(info$adjust, j$adjust) } attr(lv, 'info') <- NULL lv$.predictor. <- nam res[[nam]] <- lv } lv <- do.call('rbind.data.frame', res) class(lv) <- c('Predict', 'data.frame') attr(lv, 'info') <- info return(lv) } f <- sum(assume != 9) ##limit list to main effects factors parms <- at$parms label <- at$label values <- at$values yunits <- fit$units units <- at$units scale <- fit$scale.pred if(! length(scale)) scale <- "X * Beta" if(! length(fun)) { ylab <- scale[1] if(length(time)) ylab <- ylabPlotmath <- ylabhtml <- if(loglog) paste("log[-log S(", format(time), ")]", sep="") else paste(format(time), yunits, "Survival Probability") else if(scale[1] == 'X * Beta') { ylabPlotmath <- expression(X*hat(beta)) ylabhtml <- 'X&\beta;' } else ylabPlotmath <- ylabhtml <- ylab } else ylab <- ylabPlotmath <- ylabhtml <- '' if(ref.zero & length(time)) stop("ref.zero=TRUE does not make sense with time given") if(fname == '') factors <- dotlist ## name not an argument else { factors <- list() for(g in fname) factors[[g]] <- NA } nf <- length(factors) fnam <- names(factors) if(nf < 1) stop("must specify predictors to vary") which <- charmatch(fnam, name, 0L) if(any(which == 0L)) stop(paste("predictors(s) not in model:", paste(names(factors)[which == 0L], collapse=" "))) if(any(assume[which] == 9L)) stop("cannot plot interaction terms") lim <- Getlim(at, allow.null=TRUE, need.all=FALSE) fnam <- names(factors) nf <- length(factors) xadjdf <- lim$limits[2L, , drop=FALSE] xadj <- unclass(xadjdf) varying <- NULL if(nf == 0L) return(as.data.frame(xadj)) if(nf < f) { ## number of arguments < total number of predictors ## Some non-varying predictors settings <- xadj if(adj.zero) for(x in names(settings)) { i <- match(x, name) settings[[x]] <- if(assume[i] %in% c(5L, 8L)) parms[[i]][1] else if(length(V <- lim$values[[x]]) & is.character(V)) V[1] else 0L } for(n in fnam) settings[[n]] <- factors[[n]] } else settings <- factors for(i in 1L : nf) { n <- fnam[i] v <- settings[[n]] lv <- length(v) if(lv == 0L) stop('a predictor has zero length') if(lv == 1L && is.na(v)) settings[[n]] <- value.chk(at, which(name == n), NA, np, lim) if(length(settings[[n]]) > 1L) varying <- c(varying, n) } if(prod(sapply(settings,length)) > 1e5) stop('it is not sensible to predict more than 100,000 combinations') settings <- expand.grid(settings) if(length(ioff)) settings[[offsetVariableName]] <- offset[[1]] adjust <- NULL for(n in name[assume != 9L & name %nin% fnam]) adjust <- paste(adjust, n, "=", if(is.factor(xadj[[n]])) as.character(xadj[[n]]) else format(xadj[[n]]), " ", sep="") j <- assume != 9L label <- label[j] units <- units[j] assume <- assume[j] names(label) <- names(units) <- names(assume) <- name[j] at <- list(label=label, units=units, assume.code=assume) info <- list(varying=varying, adjust=adjust, Design=at, ylabPlotmath=ylabPlotmath, ylabhtml=ylabhtml, ylab=ylab, yunits=yunits, ref.zero=ref.zero, adj.zero=adj.zero, time=time, conf.int=conf.int) if(type == 'model.frame') { attr(settings, 'info') <- info return(settings) } ## Number of non-slopes nrp <- num.intercepts(fit) nrpcoef <- num.intercepts(fit, 'coef') if(nrp > 0L && (kint < 1L || kint > nrp)) stop('illegal intercept number for kint') beta <- fit$coefficients bootdone <- length(boot.Coef <- fit$boot.Coef) && usebootcoef if(bootdone && conf.type %in% c('individual','simultaneous')) { warning('bootstrap estimates ignored when conf.type="simultaneous" or "individual"') bootdone <- FALSE } isMean <- length(fun) && ! is.function(fun) && fun == 'mean' if(isMean && ! bootdone & conf.int > 0 & ! bayes) stop('specifying fun="mean" with conf.int > 0 does not make sense when not running bootcov (with coef.reps=TRUE)') if(isMean && isorm && conf.int > 0) stop("fun='mean' not implemented for orm models when confidence intervals are requested") if(! length(time)) { xx <- Pred() if(length(attr(xx, "strata")) && any(is.na(attr(xx, "strata")))) warning("Computed stratum NA. Requested stratum may not\nexist or reference values may be illegal strata combination\n") if(length(xx) == 0L) stop("model has no covariables and survival not plotted") xb <- if(is.list(xx)) xx$linear.predictors else xx if(isMean) { m <- Mean(fit) xb <- m(xb) } if(bootdone && conf.int > 0) { X <- Pred(type='x') pred <- t(matxv(X, boot.Coef, kint=kint, bmat=TRUE)) if(isMean) { for(k in 1L : nrow(pred)) pred[k,] <- m(pred[k,], intercepts=boot.Coef[k, 1L : nrp]) } lim <- bootBCa(xb, pred, type=boot.type, n=nobs(fit), seed=fit$seed, conf.int=conf.int) if(! is.matrix(lim)) lim <- as.matrix(lim) xx$lower <- lim[1L, ] xx$upper <- lim[2L, ] } # end if(bootdone) } # if(! length(time)) else { # time specified if(bootdone) stop('time may not be specified if bootcov was used with coef.reps=TRUE') xx <- survest(fit, settings, times=time, loglog=loglog, conf.int=conf.int) xb <- as.vector(xx$surv) } # end time specified if(conf.int > 0) { lower <- as.vector(xx$lower) upper <- as.vector(xx$upper) } if(! isblrm && length(fun) && is.function(fun)) { ## If fun is for example the result of Mean.lrm or Quantile.orm ## and confidence limits are requested, must use entire design matrix ## to get variances. Note that conf.int must also have been requested ## when calling Mean/Quantile xb <- if(conf.int > 0 && all(c('X', 'conf.int') %in% names(formals(fun)))) { X <- Pred(type='x') fun(xb, X=X, conf.int=conf.int) } else fun(xb) if(conf.int > 0 && length(lims <- attr(xb, 'limits'))) { lower <- lims$lower upper <- lims$upper } else if(conf.int > 0) { lower <- fun(lower) upper <- fun(upper) } } settings$yhat <- xb if(conf.int > 0) { settings$lower <- lower settings$upper <- upper } class(settings) <- c('Predict', 'data.frame') attr(settings, 'info') <- info settings } print.Predict <- function(x, ...) { print.data.frame(x) info <- attr(x, 'info') cat('\nResponse variable (y):', info$ylab,'\n') if(length(info$adjust) == 1) cat('\nAdjust to:',info$adjust,'\n') ci <- info$conf.int if(ci > 0) cat('\nLimits are', ci, 'confidence limits\n') invisible() } perimeter <- function(x, y, xinc=diff(range(x))/10., n=10., lowess.=TRUE) { s <- ! is.na(x+y) x <- x[s] y <- y[s] m <- length(x) if(m < n) stop("number of non-NA x must be >= n") i <- order(x) x <- x[i] y <- y[i] s <- n : (m - n + 1L) x <- x[s] y <- y[s] x <- round(x / xinc) * xinc g <- function(y, n) { y <- sort(y) m <- length(y) if(n > (m - n + 1L)) c(NA, NA) else c(y[n], y[m - n + 1L]) } r <- unlist(tapply(y, x, g, n=n)) i <- seq(1L, length(r), by=2) rlo <- r[i] rhi <- r[-i] s <- ! is.na(rlo + rhi) if(! any(s)) stop("no intervals had sufficient y observations") x <- sort(unique(x))[s] rlo <- rlo[s] rhi <- rhi[s] if(lowess.) { rlo <- lowess(x, rlo)$y rhi <- lowess(x, rhi)$y } structure(cbind(x, rlo, rhi), dimnames=list(NULL, c("x","ymin","ymax")), class='perimeter') } rbind.Predict <- function(..., rename) { d <- list(...) ns <- length(d) if(ns == 1) return(d[[1]]) info <- attr(d[[1L]], 'info') if(! missing(rename)) { trans <- function(input, rename) { k <- input %in% names(rename) if(any(k)) input[k] <- rename[input[k]] input } info$varying <- trans(info$varying, rename) names(info$Design$label) <- trans(names(info$Design$label), rename) names(info$Design$units) <- trans(names(info$Design$units), rename) names(info$Design$assume.code) <- trans(names(info$Design$assume.code), rename) } info$Design$label <- c(info$Design$label, .set.='Set') info$Design$units <- c(info$Design$units, .set.='') info$varying <- c(info$varying, '.set.') sets <- names(d) if(! length(sets)) sets <- paste('Set', 1 : ns) obs.each.set <- sapply(d, function(x) length(x[[1]])) .set. <- rep(sets, obs.each.set) .set. <- factor(.set., levels=unique(.set.)) info$adjust <- sapply(d, function(x) attr(x, 'info')$adjust) ## If first varying variable is not always the same but the second ## is, take varying[1] to be ".x." ## What in the heck is this for??? if(FALSE) { first <- sapply(d, function(x) attr(x, 'info')$varying[1]) second <- sapply(d, function(x) { y <- attr(x, 'info')$varying if(length(y) < 2) '' else y[2] } ) if((length(unique(first)) > 1) && (all(second == second[1]))) info$varying[1] <- '.x.' } if(! missing(rename)) for(i in 1L : ns) names(d[[i]]) <- trans(names(d[[i]]), rename) result <- do.call('rbind.data.frame', d) result$.set. <- .set. attr(result, 'info') <- info class(result) <- c('Predict', 'data.frame') result } rms/R/rms.s0000644000176200001440000005545114024261703012252 0ustar liggesusers# Design FEH 1Aug90, re-written 21Oct91 # # Augments S formula language to include: # # name - name[i] = name of ith original variable in x # label - label[i] = label of ith original variable (=name if none) # assume - assume(original x) # assume.code - coded version of assume (1-11, 9=added interaction) # parms - parms(original x) # for interaction effects parms[[i]] is a matrix with dim # 3 x (1+# interaction terms). First element in pair # is 1 if first factor is represented as an expanded # non-linear term, 0 otherwise (this applies to polynomial, # lspline, rcspline, scored). Second element applies to # second factor in interaction effect. Third element # applies to third factor (0 if second order interaction) # First column contains factor numbers involved in interaction. # limits - limits(original x) # values - For continuous variables with <=10 unique values, is # vector of values. NULL otherwise. # interactions - 3 x k matrix of factor numbers # # Cannot have interactions between two stratification factors. # # Design <- function(mf, formula=NULL, specials=NULL, allow.offset=TRUE, intercept=1) { debug <- length(.Options$rmsdebug) && .Options$rmsdebug if(debug) { cat('--------------------------------------------------\n') prn(list(names(mf), formula, specials)) } if(length(formula)) { Terms <- terms(formula, specials=specials, data=mf) attr(mf, 'terms') <- Terms } else Terms <- attr(mf, 'terms') Terms.orig <- Terms Term.labels <- attr(Terms, 'term.labels') ## offsets are not included anywhere in terms even though they are ## in the model frame response.pres <- attr(Terms, 'response') > 0 ## Function to construct the colname names that model.matrix will ## create. This is primarily used to subset the columns of the model ## matrix to get rid of terms involving strat main effects and to get ## rid of interaction terms involving non-reference values mmnames <- function(assume.code, rmstrans.names, term.label, iaspecial, class) { if(debug) { prn(assume.code); prn(rmstrans.names); prn(term.label); prn(iaspecial); prn(class) } ## Don't let >=i be translated to >i: rmst <- gsub('>=', '>>', rmstrans.names) ## Don't let <=i be translated to = 10) # was == 10 if(length(rmst) > 1) gsub('\\[', '', gsub('\\]', '', rmst)) else term.label else paste0(term.label, rmst) w <- gsub('>>', '>=', w) w <- gsub('<<', '<=', w) w <- gsub('@EQ@', '==', w) alt <- if(assume.code >= 10) # was == 10 if(length(rmst) > 1) paste0(term.label, rmstrans.names) else term.label else w ## Alternate names to try - handles case where model is fitted on a ## previous fit$x matrix attr(w, 'alt') <- alt w } offs <- model.offset(mf) iscluster <- if(length(Term.labels)) substring(Term.labels, 1, 8) == 'cluster(' else FALSE istime <- if(length(Term.labels)) substring(Term.labels, 1, 6) == 'aTime(' else FALSE ## Handle cluster() and aTime() ## Save right hand side of formula less cluster() and time() terms sformula <- formula(Terms) if(any(iscluster)) sformula <- removeFormulaTerms(sformula, 'cluster') if(any(istime)) sformula <- removeFormulaTerms(sformula, 'aTime') if(any(iscluster)) { clustername <- Term.labels[iscluster] cluster <- mf[[clustername]] mf[[clustername]] <- NULL Terms <- Terms[! iscluster] Term.labels <- Term.labels[! iscluster] if(any(istime)) istime <- if(length(Term.labels)) substring(Term.labels, 1, 6) == 'aTime(' else FALSE } else {cluster <- clustername <- NULL} if(any(istime)) { timename <- Term.labels[istime] time <- mf[[timename]] mf[[timename]] <- NULL Terms <- Terms[! istime] Term.labels <- Term.labels[! istime] } else {time <- timename <- NULL} ioffset <- integer(0) if(length(offs)) { if(! allow.offset) stop("offset variable not allowed in formula") ## first case below is with offset= in fit call, 2nd with offset(var) ioffset <- which(names(mf) == '(offset)' | substring(names(mf), 1, 7) == 'offset(') if(! any(ioffset)) stop('program logic error 1') } ## For some reason, model frame sometimes has a blank name if using %ia% namx <- names(mf) if(any(namx == "")) { namx <- names(mf) <- c(namx[1], Term.labels) dimnames(mf)[[2]] <- namx dimnames(attr(Terms, "factors"))[[1]] <- namx } wts <- if(any(namx == '(weights)'))(1 : length(namx))[namx == '(weights)'] else 0 if(debug) prn(llist(names(mf), ioffset, response.pres, wts)) coluse <- setdiff(1 : ncol(mf), c(ioffset, 1 * response.pres, wts)) inner.name <- if(length(Terms) > 0) unique(var.inner(Terms)) ## Handles case where a function has two arguments that are names, ## e.g. rcs(x,knots) -> want x only ## Note: these exclude interaction terms and %ia% terms factors <- attr(Terms, "factors") if(length(factors) && response.pres) factors <- factors[-1, , drop=FALSE] attr(Terms, "intercept") <- intercept ## name: nice version of design matrix column names constructed here ## mmname: column names that model.matrix will later create ## Used for later keeping only those columns that don't pertain ## to strat main effects or to strat interactions involving ## non-reference categories fname <- flabel <- name <- mmname <- strt <- asm <- len <- fname.incl.dup <- ia <- funits <- NULL parm <- nonlinear <- limits <- values <- list() scol <- 1 colnam <- mmcolnam <- mmcolnamalt <- list() Altcolnam <- NULL XDATADIST <- .Options$datadist if(length(XDATADIST)) { if(inherits(XDATADIST, 'datadist')) datadist <- XDATADIST else { if(! exists(XDATADIST)) stop(paste("dataset", XDATADIST, "not found for options(datadist=)")) datadist <- eval(as.name(XDATADIST)) } Limits <- datadist$limits Limnames <- dimnames(Limits)[[2]] } nc <- 0 options(Design.attr=NULL, TEMPORARY=FALSE) ##Used internally by asis, rcs, ... anyfactors <- length(coluse) > 0 i1.noia <- 0 if(length(Term.labels) < length(coluse)) stop(paste('program logic error tl\nTerm.labels:', paste(Term.labels, collapse=', '), '\ncoluse:', paste(coluse, collapse=', '))) it <- 0 if(anyfactors) for(i in coluse) { if(i != wts) { i1 <- i - response.pres xi <- mf[[i]] cls <- rev(class(xi))[1] z <- attributes(xi) assu <- z$assume.code if(! length(assu) || assu != 9) i1.noia <- i1.noia + 1 if(! length(assu)) { ## Not processed w/asis,et nam <- inner.name[i1.noia] lab <- attr(xi, "label") ord <- is.ordered(xi) && all.is.numeric(levels(xi)) if(! length(lab) || lab == "") lab <- nam if(ord) { xi <- scored(xi, name=nam, label=lab) attr(mf[, i], "contrasts") <- attr(xi, "contrasts") } else if(is.character(xi) | is.factor(xi)) { if(is.ordered(xi) && .Options$contrasts[2] != 'contr.treatment') stop(paste('Variable', nam, 'is an ordered factor with non-numeric levels.\n', 'You should set options(contrasts=c("contr.treatment", "contr.treatment"))\nor rms will not work properly.')) xi <- catg(xi, name=nam, label=lab) } else if(is.matrix(xi)) xi <- matrx(xi, name=nam, label=lab) else xi <- asis(xi, name=nam, label=lab) z <- c(z, attributes(xi)) } za <- z$assume.code zname <- z$name fname.incl.dup <- c(fname.incl.dup, zname) if(! length(fname) || ! any(fname == zname)) { # unique factor nc <- nc + 1 fname <- c(fname, zname) flabel <- c(flabel, z$label) asm <- c(asm, za) colnam[[i1]] <- z$colnames it <- it + 1 mmn <- mmnames(za, colnam[[i1]], Term.labels[it], z$iaspecial, cls) mmcolnam[[i1]] <- mmn alt <- attr(mmn, 'alt') mmcolnamalt[[i1]] <- alt if(debug) prn(c(mmn, alt)) if(za != 8 && length(colnam)) { name <- c(name, colnam [[i1]]) mmname <- c(mmname, mmcolnam[[i1]]) Altcolnam <- c(Altcolnam, alt) } if(za != 9) { funits <- c(funits, if(length(z$units))z$units else '') if(length(z$parms)) parm[[zname]] <- z$parms if(length(XDATADIST)) { limits[[zname]] <- if(any(Limnames == zname)) { j <- match(zname, Limnames, 0) #require EXACT match Limits[, j[j > 0]] } else rep(NA, 7) j <- match(zname, names(datadist$values), 0) if(j > 0) { values[[zname]] <- datadist$values[[j]] l1 <- levels(xi); l2 <- datadist$values[[j]] if(length(l1) && ((length(l1) != length(l2)) || any(sort(l1) != sort(l2)))) warning(paste('Variable', zname, 'has levels', paste(l1, collapse=' '), 'which do not match levels given to datadist (', paste(l2, collapse=' '), '). datadist values ignored.')) values[[zname]] <- l1 } } } if(length(nonl <- z$nonlinear)) nonlinear[[zname]] <- nonl if(za == 9) { iia <- match(z$ia, fname) if(any(is.na(iia)))stop(paste(paste(z$ia, collapse=" "), "cannot be used in %ia% since not listed as main effect")) ia <- cbind(ia, c(iia, 0)) parms <- rbind(z$parms, 0) parms[, 1] <- c(iia, 0) if(length(parms)) parm[[zname]] <- parms } } nrows <- if(is.matrix(xi))nrow(xi) else length(xi) } } ##Save list of which factors where %ia% interactions ## (before adding automatic ias) which.ia <- (1 : length(asm))[asm == 9] ##Add automatically created interaction terms if(anyfactors) { nrf <- if(! length(factors)) 0 else nrow(factors) if(length(factors)) for(i in 1 : ncol(factors)) { f <- factors[, i] j <- (1 : length(f))[f > 0] nia <- length(j) if(nia > 1) { fn <- fname.incl.dup[j] jf <- match(fn, fname.incl.dup) if(any(is.na(jf))) stop("program logic error 2") nc <- nc + 1 asm <- c(asm, 9) if(nia == 2) ialab <- paste(fn[1], "*", fn[2]) else if(nia == 3)ialab <- paste(fn[1], "*", fn[2], "*", fn[3]) else stop("interaction term not second or third order") fname <- c(fname, ialab) flabel <- c(flabel, ialab) if(sum(asm[jf] == 8) > 1) stop("cannot have interaction between two strata factors") nn <- mmnn <- mmnnalt <- list() for(k in 1 : nia) { if(asm[jf[k]] == 5 | asm[jf[k]] == 8) nn[[k]] <- paste0(fn[k], "=", parm[[fname[jf[k]]]][-1]) else if(asm[jf[k]] == 7) { nn[[k]] <- c(fn[k], paste0(fn[k], "=", parm[[fname[jf[k]]]][c(-1, -2)])) } else nn[[k]] <- colnam[[jf[k]]] mmnn[[k]] <- mmcolnam[[jf[k]]] mmnnalt[[k]] <- mmcolnamalt[[jf[k]]] } if(nia == 2) {nn[[3]] <- mmnn[[3]] <- mmnnalt[[3]] <- ""} parms <- jf if(length(jf) == 2) parms <- c(parms, 0) nonlin <- NULL nl1 <- nonlinear[[fname[jf[1]]]] nl2 <- nonlinear[[fname[jf[2]]]] ## Strata factors don't have nonlinear duplicated for # levels - 1 if(asm[jf[1]] == 8) nl1 <- rep(FALSE, length(parm[[fname[jf[1]]]]) - 1) if(asm[jf[2]] == 8) nl2 <- rep(FALSE, length(parm[[fname[jf[2]]]]) - 1) if(nia == 2) nl3 <- FALSE else if(asm[jf[3]] == 8) nl3 <- rep(FALSE, length(parm[[fname[jf[3]]]]) - 1) else nl3 <- nonlinear[[fname[jf[3]]]] n1 <- nn[[1]] n2 <- nn[[2]] n3 <- nn[[3]] mmn1 <- mmnn[[1]] mmn2 <- mmnn[[2]] mmn3 <- mmnn[[3]] mmnalt1 <- mmnnalt[[1]] mmnalt2 <- mmnnalt[[2]] mmnalt3 <- mmnnalt[[3]] ## model.matrix makes auto-products move first variable fastest, etc. for(j3 in 1 : length(n3)) { for(j2 in 1 : length(n2)) { for(j1 in 1 : length(n1)) { parms <- cbind(parms, c(nl1[j1], nl2[j2], nl3[j3])) nonlin <- c(nonlin, nl1[j1] | nl2[j2] | nl3[j3]) name <- c(name, if(nia == 2) paste(n1[j1], "*", n2[j2]) else paste(n1[j1], "*", n2[j2], "*", n3[j3])) mmname <- c(mmname, if(nia == 2) paste0(mmn1[j1], ':', mmn2[j2]) else paste0(mmn1[j1], ':', mmn2[j2], ':', mmn3[j3])) Altcolnam <- c(Altcolnam, if(nia == 2) paste0(mmnalt1[j1], ':', mmnalt2[j2]) else paste0(mmnalt1[j1], ':', mmnalt2[j2], ':', mmnalt3[j3])) } } } ## If was 2-way interaction and one of the factors was restricted %ia%, ## adjust indicators k <- match(jf, which.ia, 0) if(any(k > 0)) { if(nia == 3) stop("cannot have 2-way interaction with an %ia% interaction") k <- jf[k > 0] wparm <- parms[, 1] == k; wparm[3] <- TRUE parms[wparm,] <- parm[[fname[k]]][1 : 2,, drop=FALSE] jf <- parms[, 1] nonlin <- apply(parms, 2, any)[-1] } if(length(jf) == 2) {jf <- c(jf, 0); parms[3, ] <- 0} ia <- cbind(ia, jf) if(length(parms)) parm[[ialab]] <- parms if(length(nonlin)) nonlinear[[ialab]] <- nonlin } } } if(anyfactors) { if(length(XDATADIST)) limits <- structure(limits, row.names=c("Low:effect", "Adjust to", "High:effect", "Low:prediction", "High:prediction", "Low", "High"), class="data.frame") ##data.frame converts variables always NA to factor! if(length(funits) != sum(asm != 9)) warning('program logic warning 1') else names(funits) <- fname[asm != 9] attr(mmname, 'alt') <- if(! all(Altcolnam == mmname)) Altcolnam if(any(duplicated(mmname))) stop(paste0('duplicated column name in design matrix:', paste(mmname[duplicated(mmname)], collapse=' '), '\nMost likely caused by a variable name concatenated to a factor level\nbeing the same is the name of another variable.')) atr <- list(name=fname, label=flabel, units=funits, colnames=name, mmcolnames=mmname, assume=c("asis", "polynomial", "lspline", "rcspline", "category", "","scored", "strata", "interaction", "matrix", "gTrans")[asm], assume.code=as.integer(asm), parms=parm, limits=limits, values=values, nonlinear=nonlinear, interactions=if(length(ia)) structure(ia, dimnames=NULL)) nact <- attr(mf, 'na.action') if(length(nact) && length(nmiss <- nact$nmiss)) { jia <- grep('%ia%', names(nmiss), fixed=TRUE) if(length(jia)) nmiss <- nmiss[-jia] jz <- which(names(nmiss) != '(weights)' & ! grepl('offset\\(', names(nmiss)) & names(nmiss) != '(offset)' & ! grepl('cluster\\(', names(nmiss)) & ! grepl('aTime\\(', names(nmiss))) if(response.pres) jz <- jz[jz > 1] names(nmiss)[jz] <- fname[asm != 9] attr(mf, 'na.action')$nmiss <- nmiss } } else atr <- list(name=NULL, assume=NULL, assume.code=NULL, parms=NULL) attr(mf, 'Design') <- atr attr(mf, 'terms') <- Terms attr(mf, 'sformula') <- sformula if(length(cluster)) { attr(mf, 'cluster') <- cluster attr(mf, 'clustername') <- var.inner(as.formula(paste0('~', clustername))) } if(length(time)) { attr(mf, 'time') <- time attr(mf, 'timename') <- var.inner(as.formula(paste0('~', timename))) } if(length(offs)) attr(mf, 'offset') <- offs mf } modelData <- function(data=environment(formula), formula, formula2=NULL, weights=NULL, subset=NULL, na.action=na.delete, dotexpand=TRUE, callenv=parent.frame(n=2)) { ## calibrate.cph etc. uses a matrix, even if only one column ismat <- function(z) { cl <- class(z) ('matrix' %in% cl) && ('rms' %nin% cl) ## && ncol(z) > 1 } ## Get a list of all variables in either formula ## This is for innermost variables, e.g. Surv(a,b) will produce a,b v1 <- all.vars(formula) v2 <- all.vars(formula2) V <- unique(c(v1, v2)) edata <- is.environment(data) rhsdot <- length(v1) == 2 && v1[2] == '.' if(rhsdot && edata) stop('may not specify ~ . in formula when data= is absent') if(edata) { env <- data data <- list() for(v in V) { xv <- env[[v]] if(is.factor(xv)) xv <- xv[, drop=TRUE] ## Note: Surv() has class 'Surv' without class 'matrix' ## This keeps columns together by calling as.data.frame.rms if(ismat(xv)) class(xv) <- unique(c('rms', class(xv))) data[[v]] <- xv } ## Any variables whose length is not equal to the maximum length over ## all variables mentioned in the formulas remain in the original ## environment and will be found in the later eval() ## E.g. rcs(x, knots) where knots is a separate variable n <- sapply(data, NROW) if(! length(n)) stop('no data found') if(diff(range(n)) != 0) data <- data[which(n == max(n))] ## Watch out: if a variable in data has dimnames[[2]], as.data.frame ## uses that as the new variable name even if the variable already ## had a name in the list. This is why a 1-column matrix is kept ## as a matrix in the ismat function above data <- as.data.frame(data) } # end if(edata) ## Can't do else data[V] here as formula may have e.g. Surv(time,event) ## and hasn't been evaluated yet, where data has time and event if(length(weights)) data$`(weights)` <- weights if(length(subset)) data <- data[subset, ] ## Make sure that the second formula doesn't create any NAs on ## observations that didn't already have an NA for variables in main formula if(length(formula2)) { i <- ! complete.cases(data[intersect(names(data), v1)]) j <- ! complete.cases(data[intersect(names(data), v2)]) if(any(j & ! i)) stop('A variable in the second formula was missing on an observation that was not missing on any variable in main formula') } noexpand <- rhsdot & ! dotexpand deparse2 <- function(x) # from stats paste(deparse(x, width.cutoff = 500L, backtick = !is.symbol(x) && is.language(x)), collapse = " ") processdata <- function(formula, data) { if(noexpand) { # no RHS variables to be used predvars <- formula[[2]] varnames <- deparse(predvars) if(length(weights)) { predvars[[2]] <- as.name('(weights)') varnames <- c(varnames, '(weights)') } } else { Terms <- terms(formula, data=data, specials=NULL) vars <- attr(Terms, 'variables') predvars <- attr(Terms, 'predvars') if( ! length(predvars)) predvars <- vars if(length(weights)) predvars[[length(predvars) + 1]] <- as.name('(weights)') } varnames <- vapply(predvars, deparse2, " ")[-1L] data <- if(edata) eval(predvars, data, env) else eval(predvars, data, callenv) if(is.matrix(data)) data <- data.frame(data) # e.g. Surv() object names(data) <- varnames ## Any remaining matrices not of class 'rms' must be given class rms ## so that as.data.frame will not split up their variables ism <- sapply(data, ismat) if(any(ism)) for(i in which(ism)) class(data[[i]]) <- unique(c('rms', class(data[[i]]))) ## Since subsetting was completed earlier, now drop unused factor levels ## NOTE: strat() variables are also factors; don't drop their attributes isf <- sapply(data, is.factor) if(any(isf)) for(i in which(isf)) { di <- data[[i]] at <- attributes(di) di <- di[, drop=TRUE] if(length(at$assume.code) && at$assume.code == 8) { at$levels <- at$parms <- levels(di) at$colnames <- paste0(at$name, '=', levels(di)[-1]) attributes(di) <- at[c('class', 'name', 'label', 'assume', 'assume.code', 'parms', 'nonlinear', 'colnames','levels')] data[[i]] <- di } } ## If any variables are less than the maximum length, these must ## have come from the parent environment and did not have subset applied len <- sapply(data, NROW) if(min(len) != max(len)) { if(! length(subset)) stop('program logic error: variables vary in length but subset= was not given') for(i in which(len > min(len))) { x <- data[[i]] data[[i]] <- if(is.matrix(x)) x[subset,,drop=FALSE] else x[subset] } len <- sapply(data, NROW) if(min(len) != max(len)) stop('program logic error in variable lengths') } data <- as.data.frame(data, check.names=FALSE) data <- na.action(data) nac <- attr(data, 'na.action') attr(data, 'na.action') <- nac data } dat <- processdata(formula, data) if(length(formula2)) { omit <- attr(dat, 'na.action')$omit if(length(omit)) data <- data[-omit, , drop=FALSE] dat2 <- processdata(formula2, data) attr(dat, 'data2') <- dat2 } dat } ## Handle spline and other variables with rms class as.data.frame.rms <- function(x, row.names = NULL, optional = FALSE, ...) { nrows <- NROW(x) row.names <- if(optional) character(nrows) else as.character(1:nrows) value <- list(x) if(! optional) names(value) <- deparse(substitute(x))[[1]] structure(value, row.names=row.names, class='data.frame') } rms/R/val.prob.s0000644000176200001440000002764313005641266013202 0ustar liggesusers#Compute various measures of reliability and discrimination for a set #of predicted probabilities p or predicted logits logit. #If pl=T, the following apply: # Plots reliability curve, for which xlab is optional label. # If smooth=T and pl=T, plots lowess(p,y,iter=0) # lim is x-axis and y-axis range, default=c(0,1) # If m or g is specified, also computes and plots proportions of y=1 # by quantile groups of p (or 1/(1+exp(-logit))). If m is given, # groups are constructed to have m observations each on the average. # Otherwise, if g is given, g quantile groups will be constructed. # If instead cuts is given, proportions will be computed based on the # cut points in the vector cuts, e.g. cuts<-seq(0,1,by=.2). # If legendloc is given, a legend will be plotted there # Otherwise, it is placed at (.6, .38) # Use legendloc=locator(1) to use the mouse for legend positioning. # Use legendloc="none" to suppress legend. # If statloc is given, some statistics will be plotted there # Use statloc=locator(1) to use the mouse. This is done after the legend. # legendloc and statloc can be lists as returned by locator() or they # can be vectors, e.g. c(x,y). # #Frank Harrell 1 Jun 91 # val.prob <- function(p, y, logit, group, weights=rep(1,length(y)), normwt=FALSE, pl=TRUE, smooth=TRUE, logistic.cal=TRUE, xlab="Predicted Probability", ylab="Actual Probability", lim=c(0,1), m, g, cuts, emax.lim=c(0,1), legendloc=lim[1] + c(.55 * diff(lim), .27 * diff(lim)), statloc=c(0,.99), riskdist=c("predicted", "calibrated"), cex=.7, mkh=.02, connect.group=FALSE, connect.smooth=TRUE, g.group=4, evaluate=100, nmin=0) { if(missing(p)) p <- plogis(logit) else logit <- qlogis(p) if(length(p) != length(y)) stop("lengths of p or logit and y do not agree") names(p) <- names(y) <- names(logit) <- NULL riskdist <- match.arg(riskdist) Spi <- function(p, y) { z <- sum((y - p)*(1 - 2*p)) / sqrt(sum((1 - 2 * p) * (1 - 2 * p) * p * (1-p))) P <- 2 * pnorm(- abs(z)) c(Z=z, P=P) } if(! missing(group)) { if(length(group)==1 && is.logical(group) && group) group <- rep('', length(y)) if(! is.factor(group)) group <- if(is.logical(group) || is.character(group)) as.factor(group) else cut2(group, g=g.group) names(group) <- NULL nma <- ! (is.na(p + y + weights) | is.na(group)) ng <- length(levels(group)) } else { nma <- ! is.na(p + y + weights) ng <- 0 } logit <- logit[nma] y <- y[nma] p <- p[nma] if(ng > 0) { group <- group[nma] weights <- weights[nma] return(val.probg(p, y, group, evaluate, weights, normwt, nmin) ) } if(length(unique(p)) == 1) { P <- mean(y) Intc <- qlogis(P) n <- length(y) D <- -1 / n L01 <- -2 * sum(y * logit - logb(1 + exp(logit)), na.rm=TRUE) L.cal <- -2 * sum(y * Intc - logb(1 + exp(Intc)), na.rm=TRUE) U.chisq <- L01 - L.cal U.p <- 1 - pchisq(U.chisq, 1) U <- (U.chisq - 1) / n Q <- D - U spi <- unname(Spi(p, y)) stats <- c(0, .5, 0, D, 0, 1, U, U.chisq, U.p, Q, mean((y - p[1]) ^ 2), Intc, 0, 0, 0, rep(abs(p[1] - P), 2), spi) names(stats) <- c("Dxy","C (ROC)", "R2","D","D:Chi-sq","D:p","U","U:Chi-sq","U:p","Q", "Brier","Intercept","Slope","Emax","E90","Eavg", "S:z", "S:p") return(stats) } i <- ! is.infinite(logit) nm <- sum(! i) if(nm > 0) warning(paste(nm, "observations deleted from logistic calibration due to probs. of 0 or 1")) f.fixed <- lrm.fit(logit[i], y[i], initial=c(0., 1.), maxit=1L) f.recal <- lrm.fit(logit[i], y[i]) stats <- f.fixed$stats n <- stats["Obs"] predprob <- seq(emax.lim[1], emax.lim[2], by=.0005) Sm <- lowess(p, y, iter=0) cal.smooth <- approx(Sm, xout=p, ties=mean)$y er <- abs(p - cal.smooth) eavg <- mean(er) emax <- max(er) e90 <- unname(quantile(er, 0.9)) if(pl) { plot(.5, .5, xlim=lim, ylim=lim, type="n", xlab=xlab, ylab=ylab) abline(0, 1, lwd=6, col=gray(.85)) lt <- 1; leg <- "Ideal"; marks <- -1; lwd <- 6; col <- gray(.85) if(logistic.cal) { lt <- c(lt, 1); leg <- c(leg, "Logistic calibration") lwd <- c(lwd, 1); col <- c(col, 'black') marks <- c(marks, -1) } if(smooth) { if(connect.smooth) { lines(Sm, lty=3) lt <- c(lt, 3) lwd <- c(lwd, 1); col <- c(col, 'black') marks <- c(marks, -1) } else { points(Sm) lt <- c(lt, 0) lwd <- c(lwd, 1); col <- c(col, 'black') marks <- c(marks, 1) } leg <- c(leg, "Nonparametric") } if(! missing(m) | ! missing(g) | ! missing(cuts)) { if(! missing(m)) q <- cut2(p, m=m, levels.mean=TRUE, digits=7) else if(! missing(g)) q <- cut2(p, g=g, levels.mean=TRUE, digits=7) else if(! missing(cuts)) q <- cut2(p, cuts=cuts, levels.mean=TRUE, digits=7) means <- as.numeric(levels(q)) prop <- tapply(y, q, function(x) mean(x, na.rm=TRUE)) points(means, prop, pch=2) if(connect.group) {lines(means, prop); lt <- c(lt, 1)} else lt <- c(lt, 0) leg <- c(leg, "Grouped observations") col <- c(col, 'black'); lwd <- c(lwd, 1) marks <- c(marks, 2) } } lr <- stats["Model L.R."] p.lr <- stats["P"] D <- (lr - 1) / n L01 <- -2 * sum(y * logit - logb(1 + exp(logit)), na.rm=TRUE) U.chisq <- L01 - f.recal$deviance[2] p.U <- 1 - pchisq(U.chisq, 2) U <- (U.chisq - 2)/n Q <- D - U Dxy <- stats["Dxy"] C <- stats["C"] R2 <- stats["R2"] B <- mean((p - y) ^ 2) spi <- unname(Spi(p, y)) stats <- c(Dxy, C, R2, D, lr, p.lr, U, U.chisq, p.U, Q, B, f.recal$coef, emax, e90, eavg, spi) names(stats) <- c("Dxy","C (ROC)", "R2","D","D:Chi-sq","D:p","U","U:Chi-sq","U:p","Q", "Brier","Intercept","Slope","Emax","E90","Eavg","S:z","S:p") if(pl) { logit <- seq(-7, 7, length=200) prob <- plogis(logit) pred.prob <- f.recal$coef[1] + f.recal$coef[2] * logit pred.prob <- plogis(pred.prob) if(logistic.cal) lines(prob, pred.prob, lty=1) lp <- legendloc if(! is.logical(lp)) { if(! is.list(lp)) lp <- list(x=lp[1],y=lp[2]) legend(lp, leg, lty=lt, pch=marks, cex=cex, lwd=lwd, col=col, bty="n") } if(! is.logical(statloc)) { dostats <- c("Dxy", "C (ROC)", "R2", "D", "U", "Q", "Brier", "Intercept", "Slope", "Emax", "E90", "Eavg", "S:z", "S:p") leg <- format(names(stats)[dostats]) #constant length leg <- paste(leg, ":", format(stats[dostats]),sep="") if(! is.list(statloc)) statloc <- list(x=statloc[1], y=statloc[2]) text(statloc,paste(format(names(stats[dostats])), collapse="\n"), adj=c(0, 1), cex=cex) text(statloc$x + .225 * diff(lim), statloc$y, paste(format(round(stats[dostats], 3)), collapse="\n"), adj=c(1,1), cex=cex) } if(is.character(riskdist)) { if(riskdist=="calibrated") { x <- f.recal$coef[1] + f.recal$coef[2] * qlogis(p) x <- plogis(x) x[p == 0] <- 0; x[p == 1] <- 1 } else x <- p bins <- seq(lim[1], lim[2], length=101) x <- x[x >= lim[1] & x <= lim[2]] f <- table(cut(x, bins)) j <- f > 0 bins <- (bins[-101])[j] f <- f[j] f <- lim[1] + .15 * diff(lim) * f / max(f) segments(bins, 0, bins, f) } } stats } val.probg <- function(p, y, group, evaluate=100, weights, normwt, nmin) { if(normwt) weights <- length(y)*weights/sum(weights) ng <- length(lg <- levels(group)) if(ng==1) {ng <- 0; lg <- character(0)} stats <- matrix(NA, nrow=ng+1, ncol=12, dimnames=list(nn <- c(lg,'Overall'), c('n','Pavg','Obs','ChiSq','ChiSq2','Eavg', 'Eavg/P90','Med OR','C','B','B ChiSq','B cal'))) curves <- vector('list',ng+1) names(curves) <- nn q.limits <- c(.01,.025,.05,.1,.25,.5,.75,.9,.95,.975,.99) limits <- matrix(NA, nrow=ng+1, ncol=length(q.limits), dimnames=list(nn, as.character(q.limits))) for(i in 1:(ng+1)) { s <- if(i==(ng+1)) 1:length(p) else group==lg[i] P <- p[s] Y <- y[s] wt <- weights[s] lims <- wtd.quantile(P, wt, q.limits, na.rm=FALSE, normwt=FALSE) limits[i,] <- lims n <- sum(wt) n1 <- sum(wt[Y == 1]) c.index <- (mean(wtd.rank(P, wt, na.rm=FALSE, normwt=FALSE)[Y == 1]) - (n1 + 1)/2)/(n - n1) ## c.index <- somers2(P, Y, wt, normwt=FALSE, na.rm=FALSE)['C'] sm <- wtd.loess.noiter(P, Y, wt, na.rm=FALSE, type='all') ##all -> return all points curve <- if(length(sm$x) > evaluate) approx(sm, xout=seq(min(P), max(P), length=evaluate), ties=mean) else { o <- order(sm$x) nd <- ! duplicated(sm$x[o]) list(x=sm$x[o][nd], y=sm$y[o][nd]) } if(nmin > 0) { cuts <- wtd.quantile(P, wt, c(nmin, n-nmin)/n, normwt=FALSE, na.rm=FALSE) keep <- curve$x >= cuts[1] & curve$x <= cuts[2] curve <- list(x=curve$x[keep], y=curve$y[keep]) } curves[[i]] <- curve cal.smooth <- sm$y eavg <- sum(wt * abs(P - cal.smooth))/n b <- sum(wt * ((P - Y)^2))/n E0b <- sum(wt * P * (1 - P))/n Vb <- sum(wt * ((1 - 2 * P)^2) * P * (1 - P))/n/n bchisq <- (b - E0b)^2 / Vb b.cal <- sum(wt * ((cal.smooth - Y)^2))/n pred <- sum(wt * P)/n obs <- sum(wt * Y)/n L <- ifelse(P==0 | P==1, NA, qlogis(P)) w <- ! is.na(L) del <- matrix(c(sum((wt*(Y-P))[w]),sum((wt*L*(Y-P))[w])),ncol=2) v <- rbind(c(sum((wt*P*(1-P))[w]), sum((wt*L*P*(1-P))[w])), c(NA, sum((wt*L*L*P*(1-P))[w]))) v[2,1] <- v[1,2] chisq <- (sum(wt * (P - Y))^2) / sum(wt * P * (1 - P)) chisq2 <- del %*% solve(v) %*% t(del) p90 <- diff(lims[c(3,9)]) Lcal <- ifelse(cal.smooth <= 0 | cal.smooth >= 1, NA, qlogis(cal.smooth)) or <- exp(wtd.quantile(abs(L - Lcal), wt, .5, na.rm=TRUE, normwt=FALSE)) stats[i,] <- c(n, pred, obs, chisq, chisq2, eavg, eavg/p90, or, c.index, b, bchisq, b.cal) } structure(list(stats=stats, cal.curves=curves, quantiles=limits), class='val.prob') } print.val.prob <- function(x, ...) { print(round(x$stats,3)) cat('\nQuantiles of Predicted Probabilities\n\n') print(round(x$quantiles,3)) invisible() } plot.val.prob <- function(x, xlab="Predicted Probability", ylab="Actual Probability", lim=c(0,1), statloc=lim, stats=1:12, cex=.5, lwd.overall=4, quantiles=c(0.05,0.95), flag=function(stats) ifelse( stats[,'ChiSq2'] > qchisq(.99,2) | stats[,'B ChiSq'] > qchisq(.99,1),'*',' '), ...) { stats <- x$stats[,stats,drop=FALSE] lwd <- rep(par('lwd'), nrow(stats)) lwd[dimnames(stats)[[1]]=='Overall'] <- lwd.overall curves <- x$cal.curves labcurve(curves, pl=TRUE, xlim=lim, ylim=lim, xlab=xlab, ylab=ylab, cex=cex, lwd=lwd, ...) abline(a=0, b=1, lwd=6, col=gray(.86)) if(is.logical(statloc) && ! statloc) return(invisible()) if(length(quantiles)) { limits <- x$quantiles quant <- round(as.numeric(dimnames(limits)[[2]]),3) w <- quant %in% round(quantiles,3) if(any(w)) for(j in 1:nrow(limits)) { qu <- limits[j,w] scat1d(qu, y=approx(curves[[j]], xout=qu, ties=mean)$y) } } xx <- statloc[1]; y <- statloc[2] for(i in 0:ncol(stats)) { column.text <- if(i==0) c('Group', paste(flag(stats),dimnames(stats)[[1]],sep='')) else c(dimnames(stats)[[2]][i], format(round(stats[,i], if(i %in% c(4:5,11))1 else 3))) cat(column.text, '\n') text(xx, y, paste(column.text, collapse='\n'), adj=0, cex=cex) xx <- xx + (1 + .8 * max(nchar(column.text))) * cex * par('cxy')[1] } invisible() } rms/R/latex.psm.s0000644000176200001440000000265213020562270013355 0ustar liggesuserslatex.psm <- function(object, title, file='', append=FALSE, which=NULL, varnames, columns=65, inline=FALSE, before=if(inline)"" else "& &", after="", pretrans=TRUE, caption=NULL, digits=.Options$digits, size='', ...) { md <- prType() %in% c('html', 'md', 'markdown') f <- object whichNot <- length(which)==0 w <- if(length(caption)) { if(md) paste('
', caption, '
', sep='') else paste('\\begin{center} \\bf',caption,'\\end{center}') } if(whichNot & !inline) { dist <- f$dist w <- c(w, paste("\\[{\\rm Prob}\\{T\\geq t\\} = ", survreg.auxinfo[[dist]]$latex(f$scale), "{\\rm \\ \\ where} \\\\ \\]",sep="")) } atr <- f$Design if(whichNot) which <- 1:length(atr$name) if(missing(varnames)) varnames <- atr$name[atr$assume.code!=9] if(! md) cat(w, sep=if(length(w)) "\n" else "", file=file, append=append) z <- latexrms(f, file=file, append=TRUE, which=which, varnames=varnames, columns=columns, before=before, after=after, prefix=if(whichNot)"X\\hat{\\beta}" else NULL, inline=inline,pretrans=pretrans, digits=digits, size=size) if(md) htmltools::HTML(c(paste0(w, '\n'), as.character(z))) else z } rms/R/nomogram.s0000644000176200001440000004022714024265237013271 0ustar liggesusersnomogram <- function(fit, ..., adj.to, lp=TRUE, lp.at=NULL, fun=NULL, fun.at=NULL, fun.lp.at=NULL, funlabel="Predicted Value", interact=NULL, kint=NULL, conf.int=FALSE, conf.lp=c("representative", "all", "none"), est.all=TRUE, posterior.summary=c('mean', 'median', 'mode'), abbrev=FALSE, minlength=4, maxscale=100, nint=10, vnames=c("labels","names"), varname.label=TRUE, varname.label.sep="=", omit=NULL, verbose=FALSE) { conf.lp <- match.arg(conf.lp) vnames <- match.arg(vnames) posterior.summary <- match.arg(posterior.summary) if(length(fit$pppo) && fit$pppo > 0) stop('nomogram will not work for partial proportional odds models') Format <- function(x) { # like format but does individually f <- character(l <- length(x)) for(i in 1:l) f[i] <- format(x[i]) f } abb <- (is.logical(abbrev) && abbrev) || is.character(abbrev) if(is.logical(conf.int) && conf.int) conf.int <- c(.7,.9) draws <- fit$draws bayes <- length(draws) > 0 se <- any(conf.int > 0) if(bayes) se <- FALSE nfun <- if(!length(fun)) 0 else if(is.list(fun)) length(fun) else 1 if(nfun>1 && length(funlabel) == 1) funlabel <- rep(funlabel, nfun) if(nfun>0 && is.list(fun) && length(names(fun))) funlabel <- names(fun) if(length(fun.at) && !is.list(fun.at)) fun.at <- rep(list(fun.at), nfun) if(length(fun.lp.at) && !is.list(fun.lp.at)) fun.lp.at <- rep(list(fun.lp.at), nfun) at <- fit$Design assume <- at$assume.code if(any(assume >= 10)) warning("does not currently work with matrix or gTrans factors in model") name <- at$name names(assume) <- name parms <- at$parms label <- if(vnames == "labels") at$label else name if(any(d <- duplicated(name))) stop(paste("duplicated variable names:", paste(name[d],collapse=" "))) label <- name if(vnames == "labels") { label <- at$label if(any(d <- duplicated(label))) stop(paste("duplicated variable labels:", paste(label[d],collapse=" "))) } ia <- at$interactions factors <- rmsArgs(substitute(list(...))) nf <- length(factors) which <- if(est.all) (1:length(assume))[assume != 8] else (1:length(assume))[assume != 8 & assume != 9] if(nf > 0) { jw <- charmatch(names(factors), name, 0) if(any(jw == 0)) stop(paste("factor name(s) not in the design:", paste(names(factors)[jw == 0], collapse=" "))) if(!est.all) which <- jw } Limval <- Getlim(at, allow.null=TRUE, need.all=FALSE) values <- Limval$values lims <- Limval$limits[c(6, 2, 7),, drop=FALSE] ## Keep character variables intact lims <- unclass(lims) for(i in 1:length(lims)) if(is.factor(lims[[i]])) lims[[i]] <- as.character(lims[[i]]) attr(lims, 'class') <- 'data.frame' # so can subscript later ## Find underlying categorical variables ucat <- rep(FALSE, length(assume)) names(ucat) <- name for(i in (1:length(assume))[assume != 5 & assume < 8]) { ucat[i] <- !is.null(V <- values[[name[i]]]) # did add && is.character(V) if(ucat[i]) parms[[name[i]]] <- V } discrete <- assume == 5 | assume == 8 | ucat names(discrete) <- name ## Number of non-slopes: nrp <- if(bayes) num.intercepts(fit) else num.intercepts(fit, 'coef') ir <- fit$interceptRef if(!length(ir)) ir <- 1 if(!length(kint)) kint <- ir coefs <- if(bayes) coef(fit, stat=posterior.summary) else fit$coefficients Intercept <- if(nrp > 0) coefs[kint] else if(length(fit$center)) (- fit$center ) else 0 intercept.offset <- coefs[kint] - coefs[ir] beta0 <- Intercept settings <- list() for(i in which[assume[which] < 9]) { ni <- name[i] z <- factors[[ni]] lz <- length(z) if(lz < 2) settings[[ni]] <- value.chk(at, i, NA, -nint, Limval, type.range="full") else if(lz > 0 && any(is.na(z))) stop("may not specify NA as a variable value") if(lz == 1) lims[2,i] <- z else if(lz > 1) { settings[[ni]] <- z if(is.null(lims[[ni]]) || is.na(lims[2, ni])) { lims[[ni]] <- c(NA, z[1], NA) warning(paste("adjustment values for ",ni, " not defined in datadist; taken to be first value specified (", z[1], ")" ,sep="")) } } } adj <- lims[2,, drop=FALSE] if(!missing(adj.to)) for(nn in names(adj.to)) adj[[nn]] <- adj.to[[nn]] isna <- sapply(adj, is.na) if(any(isna)) stop( paste("adjustment values not defined here or with datadist for", paste(name[assume != 9][isna],collapse=" "))) num.lines <- 0 entities <- 0 main.space.used <- ia.space.used <- 0 set <- list() nset <- character(0) iset <- 0 start <- len <- NULL end <- 0 ## Sort to do continuous factors first if any interactions present main.effects <- which[assume[which] < 8] ## this logic not handle strata w/intera. if(any(assume == 9)) main.effects <- main.effects[order(10 * discrete[main.effects] + (name[main.effects] %in% names(interact)))] ## For each predictor, get vector of predictor numbers directly or ## indirectly associated with it rel <- related.predictors(at) # Function in rmsMisc.s already.done <- structure(rep(FALSE,length(name)), names=name) for(i in main.effects) { nam <- name[i] if(already.done[nam] || (nam %in% omit)) next r <- if(length(rel[[nam]])) sort(rel[[nam]]) else NULL if(length(r) == 0) { #main effect not contained in any interactions num.lines <- num.lines + 1 main.space.used <- main.space.used + 1 entities <- entities + 1 x <- list() x[[nam]] <- settings[[nam]] iset <- iset + 1 attr(x,'info') <- list(nfun=nfun, predictor=nam, effect.name=nam, type='main') set[[iset]] <- x nset <- c(nset, label[i]) start <- c(start, end + 1) n <- length(settings[[nam]]) len <- c(len, n) end <- end + n } else { namo <- name[r] s <- !(name[r] %in% names(interact)) if(any(s)) { if(!length(interact)) interact <- list() for(j in r[s]) { nj <- name[j] if(discrete[j]) interact[[nj]] <- parms[[nj]] } s <- !(name[r] %in% names(interact)) } if(any(s)) stop(paste("factors not defined in interact=list(...):", paste(name[r[s]], collapse=","))) combo <- expand.grid(interact[namo]) #list[vector] gets sublist class(combo) <- NULL ## so combo[[n]] <- as.character will really work acombo <- combo if(abb) for(n in if(is.character(abbrev)) abbrev else names(acombo)) { if(discrete[n]) { acombo[[n]] <- abbreviate(parms[[n]], minlength=if(minlength == 1) 4 else minlength)[combo[[n]]] ## lucky that abbreviate function names its result } } for(n in names(combo)) if(is.factor(combo[[n]])) { combo[[n]] <- as.character(combo[[n]]) ## so row insertion will work xadj acombo[[n]] <- as.character(acombo[[n]]) #so format() will work } entities <- entities + 1 already.done[namo] <- TRUE for(k in 1:length(combo[[1]])) { num.lines <- num.lines + 1 if(k == 1) main.space.used <- main.space.used + 1 else ia.space.used <- ia.space.used + 1 x <- list() x[[nam]] <- settings[[nam]] #store fastest first for(nm in namo) x[[nm]] <- combo[[nm]][k] iset <- iset + 1 set.name <- paste(nam, " (", sep="") for(j in 1:length(acombo)) { set.name <- paste(set.name, if(varname.label) paste(namo[j], varname.label.sep, sep="") else "", format(acombo[[j]][k]), sep="") if(j < length(acombo)) set.name <- paste(set.name," ",sep="") } set.name <- paste(set.name, ")", sep="") ## Make list of all terms needing inclusion in calculation ## Include interation term names - interactions.containing in rmsMisc.s ia.names <- NULL for(j in r) ia.names <- c(ia.names, name[interactions.containing(at, j)]) ia.names <- unique(ia.names) attr(x,'info') <- list(predictor=nam, effect.name=c(nam, namo[assume[namo] != 8], ia.names), type=if(k == 1) "first" else "continuation") set[[iset]] <- x nset <- c(nset, set.name) ## Don't include strata main effects start <- c(start, end + 1) n <- length(settings[[nam]]) len <- c(len, n) end <- end + n } } } xadj <- unclass(rms.levels(adj, at)) for(k in 1:length(xadj)) xadj[[k]] <- rep(xadj[[k]], sum(len)) j <- 0 for(S in set) { j <- j + 1 ns <- names(S) nam <- names(S) for(k in 1:length(nam)) xadj[[nam[k]]][start[j] : (start[j] + len[j]-1)] <- S[[k]] } xadj <- structure(xadj, class='data.frame', row.names=as.character(1 : sum(len))) xx <- predictrms(fit, newdata=xadj, type="terms", center.terms=FALSE, se.fit=FALSE, kint=kint) if(any(is.infinite(xx))) stop("variable limits and transformations are such that an infinite axis value has resulted.\nRe-run specifying your own limits to variables.") if(se) xse <- predictrms(fit, newdata=xadj, se.fit=TRUE, kint=kint) R <- matrix(NA, nrow=2, ncol=length(main.effects), dimnames=list(NULL,name[main.effects])) R[1,] <- 1e30 R[2,] <- -1e30 ## R <- apply(xx, 2, range) - does not work since some effects are for ## variable combinations that were never used in constructing axes for(i in 1:num.lines) { is <- start[i]; ie <- is + len[i]-1 s <- set[[i]] setinfo <- attr(s, 'info') nam <- setinfo$effect.name xt <- xx[is : ie, nam] if(length(nam) > 1) xt <- apply(xt, 1, sum) # add all terms involved set[[i]]$Xbeta <- xt r <- range(xt) pname <- setinfo$predictor R[1,pname] <- min(R[1,pname], r[1]) R[2,pname] <- max(R[2,pname], r[2]) if(se) { set[[i]]$Xbeta.whole <- xse$linear.predictors[is:ie] #note-has right interc. set[[i]]$se.fit <- xse$se.fit[is:ie] } } R <- R[,R[1,] < 1e30, drop=FALSE] sc <- maxscale / max(R[2,] - R[1,]) Intercept <- Intercept + sum(R[1,]) ###if(missing(naxes)) naxes <- ### if(total.sep.page) max(space.used + 1, nfun + lp + 1) else ### space.used + 1 + nfun + lp + 1 i <- 0 names(set) <- nset ns <- names(set) Abbrev <- list() qualForce <- character() for(S in set) { i <- i + 1 setinfo <- attr(S,'info') type <- setinfo$type x <- S[[1]] nam <- names(S)[1] #stored with fastest first fx <- if(is.character(x)) x else sedit(Format(x)," ","") #axis not like bl - was translate() if(abb && discrete[nam] && (is.logical(abbrev) || nam %in% abbrev)) { old.text <- fx fx <- if(abb && minlength == 1)letters[1:length(fx)] else abbreviate(fx, minlength=minlength) Abbrev[[nam]] <- list(abbrev=fx, full=old.text) } j <- match(nam, name, 0) if(any(j == 0)) stop("program logic error 1") is <- start[i] ie <- is + len[i] - 1 xt <- (S$Xbeta - R[1,nam]) * sc set[[i]]$points <- xt ## Find flat pieces and combine their labels r <- rle(xt) if(any(r$length > 1)) { is <- 1 for(j in r$length) { ie <- is + j - 1 if(j > 1) { fx[ie] <- if(discrete[nam] || ie < length(xt)) paste(fx[is], "-", fx[ie], sep="") else paste(fx[is], '+', sep='') fx[is:(ie-1)] <- "" xt[is:(ie-1)] <- NA } is <- ie + 1 } fx <- fx[!is.na(xt)] xt <- xt[!is.na(xt)] } } if(! length(lp.at)) { xb <- if(bayes) { X <- fit[['x']] if(! length(X)) stop('when lp.at is not specified you must specify x=TRUE in the fit') jj <- if(nrp == 0) 1 : length(coefs) else (nrp + 1) : length(coefs) beta0 + (X %*% coefs[jj]) } else fit$linear.predictors if(!length(xb)) xb <- fit$fitted.values if(!length(xb)) xb <- fit$fitted if(!length(xb)) stop("lp.at not given and fit did not store linear.predictors or fitted.values") if(nrp > 1) xb <- xb + intercept.offset lp.at <- pretty(range(xb), n=nint) } sum.max <- if(entities == 1) maxscale else max(maxscale, sc * max(lp.at - Intercept)) x <- pretty(c(0, sum.max), n=nint) new.max <- max(x) iset <- iset + 1 nset <- c(nset, 'total.points') set[[iset]] <- list(x=x) if(lp) { x2 <- seq(lp.at[1], max(lp.at), by=(lp.at[2] - lp.at[1]) / 2) scaled.x <- (lp.at - Intercept) * sc iset <- iset + 1 nset <- c(nset, 'lp') if(se && conf.lp != 'none') { xxb <- NULL xse <- NULL for(S in set) { xxb <- c(xxb, S$Xbeta.whole) xse <- c(xse, S$se.fit) } i <- order(xxb) if(length(xxb)<16 | conf.lp == "representative") {nlev <- 4; w <- 1} else {nlev <- 8; w <- 2} if(conf.lp == "representative") { deciles <- cut2(xxb[i], g=10) mean.xxb <- tapply(xxb[i], deciles, mean) median.se <- tapply(xse[i], deciles, median) xc <- (mean.xxb - Intercept)*sc sec <- sc*median.se } else { xc <- (xxb[i]-Intercept)*sc sec <- sc*xse[i] } set[[iset]] <- list(x=scaled.x, x.real=lp.at, conf=list(x=xc, se=sec, w=w, nlev=nlev)) } else set[[iset]] <- list(x=scaled.x, x.real=lp.at) } if(nfun > 0) { if(!is.list(fun)) fun <- list(fun) i <- 0 for(func in fun) { i <- i + 1 ## Now get good approximation to inverse of fun evaluated at fat ## Unless inverse function given explicitly if(!missing(fun.lp.at)) { xseq <- fun.lp.at[[i]] fat <- func(xseq) w <- xseq } else { if(missing(fun.at)) fat <- pretty(func(range(lp.at)), n=nint) else fat <- fun.at[[i]] if(verbose) { cat('Function',i,'values at which to place tick marks:\n') print(fat) } xseq <- seq(min(lp.at), max(lp.at), length=1000) fu <- func(xseq) s <- !is.na(fu) w <- approx(fu[s], xseq[s], fat, ties=mean)$y if(verbose) { cat('Estimated inverse function values (lp):\n') print(w) } } s <- !(is.na(w) | is.na(fat)) w <- w[s] fat <- fat[s] fat.orig <- fat fat <- if(is.factor(fat)) as.character(fat) else Format(fat) scaled <- (w - Intercept) * sc iset <- iset + 1 nset <- c(nset, funlabel[i]) set[[iset]] <- list(x=scaled, x.real=fat.orig, fat=fat, which=s) } } names(set) <- nset attr(set, 'info') <- list(fun=fun, lp=lp, lp.at=lp.at, discrete=discrete, funlabel=funlabel, fun.at=fun.at, fun.lp.at=fun.lp.at, Abbrev=Abbrev, minlength=minlength, conf.int=conf.int, R=R, sc=sc, maxscale=maxscale, Intercept=Intercept, nint=nint, space.used=c(main=main.space.used, ia=ia.space.used)) class(set) <- "nomogram" set } print.nomogram <- function(x, dec=0, ...) { obj <- x w <- diff(range(obj$lp$x)) / diff(range(obj$lp$x.real)) cat('Points per unit of linear predictor:', format(w), '\nLinear predictor units per point :', format(1 / w), '\n\n') fun <- FALSE for(x in names(obj)) { k <- x == 'total.points' || x == 'lp' || x == 'abbrev' if(k) { fun <- TRUE; next } y <- obj[[x]] if(fun) { z <- cbind(round(y[[1]],dec), y$x.real) dimnames(z) <- list(rep('',nrow(z)), c('Total Points',x)) } else { z <- cbind(format(y[[1]]), format(round(y$points,dec))) dimnames(z) <- list(rep('',length(y$points)), c(x, 'Points')) ## didn't use data.frame since wanted blank row names } cat('\n') print(z, quote=FALSE) cat('\n') } invisible() } rms/R/lrm.fit.strat.s0000644000176200001440000001733313163533540014161 0ustar liggesuserslrm.fit.strat <- function(x, y, strata, offset=0, initial, maxit=25, eps=.025, tol=1E-7, trace=FALSE, penalty.matrix=NULL, strata.penalty=0, weights=NULL, normwt) { cal <- match.call() opts <- double(12) len.penmat <- length(penalty.matrix) lev <- levels(strata) nstrat <- length(lev) strata <- unclass(strata) n <- length(y) if(!length(weights)) { normwt <- FALSE weights <- rep(1,n) } if(length(weights) != n) stop('weights and y must be same length') storage.mode(weights) <- 'double' opts[12] <- normwt ## weights not implemented for stratified models yet initial.there <- !missing(initial) if(missing(x) || length(x)==0) { nx <- 0 xname <- NULL x <- 0 } else { if(!is.matrix(x)) x <- as.matrix(x) storage.mode(x) <- "double" dx <- dim(x) nx <- dx[2] if(dx[1]!=n)stop("x and y must have same length") xname <- dimnames(x)[[2]] if(length(xname)==0) xname <- paste("x[",1:nx,"]",sep="") } nxin <- nx if(!is.factor(y)) y <- as.factor(y) y <- unclass(y) # in case is.factor ylevels <- levels(y) if(n < 3)stop("must have >=3 non-missing observations") kint <- as.integer(length(ylevels)-1) if(kint != 1) stop('only works for binary y') ftable <- integer(5001*(kint+1)) levels(y) <- ylevels numy <- table(y) y <- as.integer(y-1) nvi <- as.integer(nxin+kint+nstrat-1) if(missing(initial)) { ncum <- rev(cumsum(rev(numy)))[2:(kint+1)] pp <- ncum/n initial <-logb(pp/(1-pp)) initial <- initial - mean(offset) } if(length(initial) < nvi) initial <- c(initial,rep(0,nvi-length(initial))) storage.mode(initial) <- "double" loglik <- -2 * sum(numy * logb(numy/n)) if(nxin > 0) { if(len.penmat==0) penalty.matrix <- matrix(0,nrow=nx,ncol=nx) if(nrow(penalty.matrix)!=nx || ncol(penalty.matrix)!=nx) stop(paste("penalty.matrix does not have",nx,"rows and columns")) penmat <- rbind( matrix(0,ncol=kint+nx,nrow=kint), cbind(matrix(0,ncol=kint,nrow=nx),penalty.matrix)) } else penmat <- matrix(0, ncol=kint, nrow=kint) storage.mode(penmat) <- 'double' ofpres <- !all(offset == 0) storage.mode(offset) <- 'double' if(nxin==0 & !ofpres) { loglik <- rep(loglik,2) z <- list(coef=initial,u=rep(0,kint),opts=c(rep(0,7),.5,0,0,0)) } if(ofpres) { ##Fit model with only intercept(s) and offset z <- .Fortran("lrmfit",coef=initial,as.integer(0),0,x,y,offset, u=double(kint), double(kint*(kint+1)/2),loglik=double(1),n,as.integer(0), numy,kint, v=double(kint*kint),double(kint),double(kint), double(kint),pivot=integer(kint),opts=opts,ftable, penmat,weights, PACKAGE="rms") loglik <- c(loglik,z$loglik) if(z$opts[6] | z$opts[7] maxit) return(list(fail=TRUE, class='lrm')) xname <- c(xname, lev[-1]) if(kint==1) name <- "Intercept" else name <- paste("y>=",ylevels[2:(kint+1)],sep="") name <- c(name, xname) theta <- drop(theta) names(theta) <- name loglik <- c(loglik, obj) dimnames(AA) <- list(name[1:nns],name[1:nns]) dimnames(BB) <- dimnames(BCi) <- list(name[1:nns],name[(nns+1):nvi]) names(BCi) <- NULL llnull <- loglik[length(loglik)-1] model.lr <- llnull-loglik[length(loglik)] model.df <- nvi - kint if(initial.there) model.p <- NA else { if(model.df>0) model.p <- 1-pchisq(model.lr,model.df) else model.p <- 1 } r2 <- 1-exp(-model.lr/n) r2.max <- 1-exp(-llnull/n) r2 <- r2/r2.max Brier <- mean((pred - (y>0))^2) stats <- c(n,max(abs(u)),model.lr,model.df,model.p, ## z$opts[8],z$opts[9],z$opts[10], z$opts[11], r2, Brier) nam <- c("Obs","Max Deriv", "Model L.R.","d.f.","P", ##"C","Dxy","Gamma","Tau-a", "R2","Brier") names(stats) <- nam vcov <- function(fit, which=c('strata.var','var','strata.var.diag')) { which <- match.arg(which) strata.penalty <- fit$strata.penalty v <- 1 / (fit$strata.unpen.diag.info + strata.penalty) nstrat <- fit$nstrat k <- (strata.penalty/nstrat)/(1 - (strata.penalty/nstrat)*sum(v)) sname <- fit$strata.levels[-1] CC <- diag(v) + k * v %*% t(v) -t(fit$cov.nonstrata.strata) %*% fit$BCi switch(which, strata.var = structure(CC, dimnames=list(sname,sname)), strata.var.diag = structure(diag(CC), names=sname), var = structure(rbind(cbind(fit$var,fit$cov.nonstrata.strata), cbind(t(fit$cov.nonstrata.strata),CC)), dimnames=list(nn <- names(fit$coef),nn))) } retlist <- list(call=cal,freq=numy, stats=stats,fail=FALSE,coefficients=theta[1:nns], non.slopes=1,est=1:(nvi-kint), var=AA,u=u, deviance=loglik, linear.predictors=logit, penalty.matrix=if(nxin>0 && any(penalty.matrix!=0)) penalty.matrix else NULL, nstrat=nstrat, strata.levels=lev, strata.coefficients=theta[(nns+1):nvi], strata.penalty=strata.penalty, strata.unpen.diag.info=dd, cov.nonstrata.strata=BB, BCi=BCi, vcov=vcov, ## info.matrix=rbind(cbind(A,B),cbind(t(B),diag(dd)))) info.matrix=A) class(retlist) <- c("lrm","lm") retlist } rms/R/robcov.s0000644000176200001440000000505213101622310012721 0ustar liggesusersrobcov <- function(fit, cluster, method=c('huber','efron')) { method <- match.arg(method) var <- vcov(fit, intercepts='all') vname <- dimnames(var)[[1]] if(inherits(fit, "ols")) var <- fit$df.residual * var / sum(fit$residuals ^ 2) #back to X'X else if(method=='efron') stop('method="efron" only works for ols fits') X <- as.matrix(residuals(fit, type=if(method=='huber')"score" else "hscore")) n <- nrow(X) if(missing(cluster)) { clusterInfo <- NULL cluster <- 1 : n } else { if(any(is.na(cluster))) stop("cluster contains NAs") clusterInfo <- list(name=deparse(substitute(cluster))) } if(length(cluster) != n) stop("length of cluster does not match number of observations used in fit") cluster <- as.factor(cluster) p <- ncol(var) j <- is.na(X %*% rep(1, ncol(X))) if(any(j)) { X <- X[! j,, drop=FALSE] cluster <- cluster[! j, drop=TRUE] n <- length(cluster) } j <- order(cluster) X <- X[j, , drop=FALSE] clus.size <- table(cluster) if(length(clusterInfo)) clusterInfo$n <- length(clus.size) clus.start <- c(1, 1 + cumsum(clus.size)) nc <- length(levels(cluster)) clus.start <- clus.start[- (nc + 1)] storage.mode(clus.start) <- "integer" W <- matrix(.Fortran(F_robcovf, n, p, nc, clus.start, clus.size, X, double(p), double(p * p), w=double(p * p))$w, nrow=p) ##The following has a small bug but comes close to reproducing what robcovf does ##W <- tapply(X,list(cluster[row(X)],col(X)),sum) ##W <- t(W) %*% W ##The following logic will also do it, also at great cost in CPU time ##for(j in levels(cluster)) { ## s <- cluster==j ## if(sum(s)==1) sx <- X[s,,drop=F] ## else {sx <- apply(X[s,,drop=F], 2, sum); dim(sx) <- c(1,p)} ## ## sc <- sc + t(sx) %*% sx ## ## } adjvar <- var %*% W %*% var ##var.new <- diag(adjvar) ##deff <- var.new/var.orig; names(deff) <- vname ##eff.n <- n/exp(mean(log(deff))) ##if(pr) { ## v <- cbind(var.orig, var.new, deff) ## dimnames(v) <- list(vname, c("Original Variance","Adjusted Variance", ## "Design Effect")) ## .Options$digits <- 4 ## cat("\n\nEffect of Adjustment for Cluster Sampling on Variances of Parameter #Estimates\n\n") ## print(v) ## cat("\nEffective sample size:",format(round(eff.n,1)),"\n\n") ## nn <- n^2/sum(clus.size^2) ## cat("\nN^2/[sum of Ni^2] :",format(round(nn,1)),"\n\n") ## } fit$orig.var <- var fit$var <- adjvar fit$clusterInfo <- clusterInfo ##fit$design.effects <- deff ##fit$effective.n <- eff.n fit } rms/R/latex.rms.s0000644000176200001440000005065213211131314013353 0ustar liggesuserslatexrms <- function(object, file="", append=FALSE, which=1:p, varnames, columns=65, prefix=NULL, inline=FALSE, before=if(inline)"" else "& &", after="", intercept, pretrans=TRUE, digits=.Options$digits, size='') { html <- prType() == 'html' ## Break character for non-math mode: brchar <- if(html) '
' else '\\\\' f <- object at <- f$Design name <- at$name ac <- at$assume.code p <- length(name) nrp <- num.intercepts(f) ## f$term.labels does not include strat TL <- attr(terms(f),"term.labels") tl <- TL ##Get inner transformations from <- c('asis(*)','pol(*)','lsp(*)','rcs(*)','catg(*)','scored(*)', 'strat(*)','matrx(*)','I(*)') to <- rep('*',9) TLi <- paste0("h(",sedit(TL, from, to),")") ## change wrapping function to h() h <- function(x,...) deparse(substitute(x)) for(i in (1:p)[ac != 9]) TLi[i] <- eval(parse(text=TLi[i])) TLi <- ifelse(TLi == name | ac == 1 | ac == 9, "", TLi) anytr <- any(TLi != "") if(! missing(varnames)) { if(length(varnames) != sum(ac != 9)) stop("varnames is wrong length") vn <- name vn[ac != 9] <- varnames varnames <- vn tl <- sedit(tl, name, varnames, wild.literal=TRUE) if(anytr) TLi <- sedit(TLi, name, varnames, wild.literal=TRUE) } else varnames <- name lnam <- nchar(varnames) ## digits at end of name -> subscript, change font ## used to be {\\mit *} vnames <- sedit(varnames, '*$', '_{*}', test=all.digits) if(is.character(which)) { wh <- charmatch(which, name, 0) if(any(wh == 0))stop(paste("variable name not in model:", paste(which[wh == 0], collapse=" "))) } interaction <- at$interactions if(length(interaction) == 0) interaction <- 0 parms <- at$parms ##If any interactions to be printed, make sure all main effects are included ia <- ac[which] == 9 if(length(which) < p & any(ia)) { for(i in which[ia]) which <- c(which,parms[[name[i]]][,1]) which <- which[which>0] which <- sort(unique(which)) } from <- c('sqrt(*)', 'log(', 'I(*)', '1/(*)', 'pmin(', 'pmax(') to <- c('\\sqrt{*}','\\log(','[*]', '(*)^{-1}','\\min(','\\max(') tl <- sedit(tl, from, to) tl <- sedit(tl, varnames, vnames, wild.literal=TRUE) ltl <- nchar(tl) tl <- paste0("{\\rm ", tl, "}") if(anytr) { TLi <- sedit(TLi, from, to) TLi <- sedit(TLi, varnames, vnames, wild.literal=TRUE) TLi <- ifelse(TLi == "", "", paste0("{\\rm ", TLi, "}")) } varnames <- paste0("{\\rm ", vnames, "}") Two.Way <- function(prm,Nam,nam.coef,lNam,cof,coef,f,columns,lcof,varnames, lnam, at, digits=digits) { i1 <- prm[1,1] i2 <- prm[2,1] num.nl <- any(prm[1,-1] != 0)+any(prm[2,-1] != 0) ##If single factor with nonlinear terms, get it as second factor ##Otherwise, put factor with most # terms as second factor rev <- FALSE if((num.nl == 1 & any(prm[1,-1] != 0)) || (length(Nam[[i1]]) > length(Nam[[i2]]))) { i1 <- i2 i2 <- prm[1,1] rev <- TRUE } N1 <- Nam[[i1]]; N2 <- Nam[[i2]] n1 <- nam.coef[[i1]]; n2 <- nam.coef[[i2]] q <- NULL; cur <- ""; m <- 0 for(j1 in 1:length(N1)) { nam1 <- nam.coef[[i1]][j1] l1 <- lNam[[i1]][j1] lN2 <- length(N2) cnam <- if(rev) paste(nam.coef[[i2]], "*", nam1) else paste(nam1, "*", nam.coef[[i2]]) mnam <- match(cnam, names(cof), nomatch=0) act <- mnam[mnam>0] lN2.act <- length(act) ##Check if restricted interaction between a rcs and another nonlinear ##var, i.e. >1 2nd term possible, only 1 (linear) there, and at first ##nonlinear term of rcs if(lN2.act == 1 & lN2>1 & at$assume.code[i1] == 4 & j1 == 2) { if(cur != "") { q <- c(q, cur) m <- 0 cur <- "" } v <- paste0("+", N2[1], "[") n <- lNam[[i2]][1] if(m + n > columns) { q <- c(q, cur) cur <- "" m <- 0 } cur <- paste0(cur, v) m <- m+n cnam <- paste(nam.coef[[if(rev)i2 else i1]][1], "*", nam.coef[[if(rev)i1 else i2]][-1]) v <- rcspline.restate(at$parms[[at$name[i1]]], c(0, coef[cnam]), x=varnames[i1], lx=lnam[i1], columns=columns, before="", after="", begin=cur, nbegin=m, digits=digits) m <- attr(v, "columns.used")+1 #+1 for "]" v <- attr(v, "latex") j <- length(v) if(j>1) q <- c(q, v[-j]) cur <- paste(v[j], "]") break } else if(lN2.act == 1) { v <- paste0(cof[act],"\\:",N1[j1],"\\:\\times\\:", N2[mnam>0]) n <- l1+lNam[[i2]][mnam > 0] + 2 if(m + n > columns) { q <- c(q, cur) cur <- "" m <- 0 } cur <- paste0(cur, v) m <- m + n } else if(lN2.act>0) { if(cur != "") { q <- c(q, cur) m <- 0 cur <- "" } v <- paste0("+", N1[j1], "[") n <- l1 + 1 if(m + n > columns) { q <- c(q, cur) cur <- "" m <- 0 } cur <- paste0(cur, v) m <- m + n if(at$assume.code[i2] == 4 & ! any(mnam == 0)) { ##rcspline, interaction not restricted v <- rcspline.restate(at$parms[[at$name[i2]]], coef[act], x=varnames[i2], lx=lnam[i2], columns=columns, before="", after="", begin=cur, nbegin=m, digits=digits) m <- attr(v, "columns.used") + 1 #1 for "]" v <- attr(v, "latex") j <- length(v) if(j>1) q <- c(q, v[-j]) cur <- paste(v[j],"]") } else { for(j2 in 1:lN2) { l <- mnam[j2] if(l>0) { #not a restricted-out nonlinear term if(j2 == 1 && substring(cof[l],1,1) == "+") cof[l] <- substring(cof[l],2) v <- paste0(cof[l], "\\:", N2[j2]) n <- lcof[l] + lNam[[i2]][j2] if(m + n > columns) { q <- c(q, cur) cur <- "" m <- 0 } cur <- paste0(cur, v) m <- m + n } } cur <- paste(cur, "]") } } } if(cur != "") q <- c(q, cur) attr(q, "columns.used") <- m q } Three.Way <- function(prm,Nam,nam.coef,lNam,cof,coef,f,columns,lcof,at) { i1 <- prm[1,1]; i2 <- prm[2,1]; i3 <- prm[3,1] N1 <- Nam[[i1]]; N2 <- Nam[[i2]]; N3 <- Nam[[i3]] q <- NULL cur <- "" m <- 0 l <- 0 for(j3 in 1:length(N3)) { for(j2 in 1:length(N2)) { for(j1 in 1:length(N1)) { l <- l + 1 v <- paste0(cof[l], "\\:", N1[j1], "\\:\\times\\:", N2[j2], "\\:\\times\\:", N3[j3]) n <- lcof[l] + lNam[[i1]][j1] + lNam[[i2]][j2] + lNam[[i3]][j3] + 3 if(m + n > columns) { q <- c(q, cur) cur <- "" m <- 0 } cur <- paste0(cur, v) m <- m + n } } } q <- c(q, cur) attr(q, "columns.used") <- m q } if(! inline) { tex <- "\\begin{eqnarray*}" if(size != '') tex <- c(tex, paste0('\\', size)) if(length(prefix)) tex <- c(tex, if(html) paste0(prefix, '= & & \\\\') else paste0("\\lefteqn{", prefix, "=}\\\\")) } else tex <- NULL cur <- "" cols <- 0 Coef <- f$coef if((length(which) == p)&& (nrp == 1 | ! missing(intercept))) { cof <- if(missing(intercept)) format(Coef[1], digits=digits) else format(intercept, digits=digits) cur <- cof cols <- nchar(cof) } anyivar <- anyplus <- FALSE # anyivar = any indicator variable Nam <- lNam <- nam.coef <- list() for(i in (1:p)[which]) { ass <- ac[i] nam <- varnames[i] prm <- at$parms[[at$name[i]]] if(ass %in% c(5,7,8)) { if(ass == 7) prm <- format(prm) oprm <- prm lprm <- nchar(prm) z <- substring(prm,1,1) == "[" u <- ! z & ass == 7 prm <- sedit(prm, c(' ','&','%'), c('\\ ','\\&','\\%')) prm <- ifelse(z | u, prm, paste0("{\\rm ", prm, "}")) prm <- ifelse(z,paste(nam,"\\in ",prm),prm) prm <- ifelse(u,paste(nam,"=",prm),prm) lprm <- lprm + (z | u)*(lnam[i]+1) prm <- paste0("[", prm, "]") anyivar <- TRUE } if(ass != 8) { k <- f$assign[[TL[i]]] coef <- Coef[k] nam.coef[[i]] <- names(coef) cof <- formatSep(coef, digits=digits) lcof <- nchar(cof) cof <- latexSN(cof) cof <- ifelse(coef<=0, cof, paste0("+", cof)) cof.sp <- cof if(ass == 2 | ass == 10) { r <- grep("times",cof) r <- if(length(r) == 0) 1:length(cof) else -r cof.sp[r] <- paste0(cof.sp[r], "\\:") } else if(length(grep("time",cof[1])) == 0) cof.sp[1] <- paste0(cof[1], "\\:") ## medium space between constant and variable names if constant ## does not end in 10^x } newline <- FALSE switch(ass, { # 1 - asis (linear) nam <- tl[i] Nam[[i]] <- nam lNam[[i]] <- ltl[i] q <- paste0(cof.sp, nam) m <- ltl[i]+lcof }, { # 2 - pol q <- "" m <- 0 pow <- 1:prm nams <- ifelse(pow == 1,nam, paste0(nam, "^{", pow, "}")) Nam[[i]] <- nams; lNam[[i]] <- rep(lnam[i],prm) for(j in pow) q <- paste0(q,cof.sp[j], nams[j]) m <- prm * lnam[i] + sum(lcof) }, { # 3 - lsp if(cols>0) { tex <- c(tex, cur) cur <-"" cols <- 0 } anyplus <- TRUE q <- paste0(cof.sp[1], nam) m <- lcof[1]+lnam[i] nams <- nam; lnams <- lnam[i] kn <- format(-prm) lkn <- nchar(kn) for(j in 1:length(prm)) { z <- paste0("(", nam, if(prm[j]<0) "+" else NULL, if(prm[j] != 0) kn[j] else NULL, ")_{+}") nams <- c(nams, z) u <- lnam[i]+lkn[j]+2 lnams <- c(lnams,u) v <- paste0(cof[j+1], z) n <- lcof[j+1]+u if(m + n > columns) { cur <- paste(cur, q) tex <- c(tex, cur) cur <- "" cols <- 0 q <- "" m <- 0 } q <- paste0(q, v) m <- m + n } Nam[[i]] <- nams; lNam[[i]] <- lnams }, { # 4 - rcs q <- rcspline.restate(prm, coef, x=nam, lx=lnam[i], columns=columns, before="",after="",digits=digits) anyplus <- TRUE m <- attr(q, "columns.used") nn <- nam; ln <- lnam[i] for(j in 1:(length(prm)-2)) { nam <- paste0(nam, "'") nn <- c(nn, nam) ln <- c(ln, lnam[i]+j) } Nam[[i]] <- nn #Two.Way only needs first name lNam[[i]] <- ln #for 2nd-order ia with 1 d.f. (restr ia) ##Three.Way needs original design matrix q <- attr(q, "latex") if(substring(sedit(q[1]," ",""),1,1) != "-") q[1] <- paste0("+", q[1]) j <- length(q) if(cur != "") { tex <- c(tex,cur) cur <- "" cols <- 0 } if(j>1) { tex <- c(tex, q[-j]) q <- q[j] } } , { # 5 - catg Nam[[i]] <- prm[-1] lNam[[i]] <- lprm[-1] if(cols>0) { tex <- c(tex,cur) cur <- "" cols <- 0 } q <- "" m <- 0 for(j in 2:length(prm)) { v <- paste0(cof[j-1], prm[j]) n <- lcof[j-1]+lprm[j] if(m + n > columns) { cur <- paste(cur,q) tex <- c(tex, cur) cur <- "" cols <- 0 q <- "" m <- 0 } q <- paste0(q, v) m <- m + n } }, q <- "", { # 7 - scored if(cols>0) { tex <- c(tex,cur) cur <- "" cols <- 0 } q <- paste0(cof.sp[1], nam) m <- nchar(q) nams <- nam lnams <- lnam[i] for(j in 3:length(prm)) { z <- prm[j] v <- paste0(cof[j-1], z) u <- lprm[j]+lnam[i]+3 n <- lcof[j-1]+u nams <- c(nams, z) lnams <- c(lnams,u) if(m + n > columns) { cur <- paste(cur, q) tex <- c(tex, cur) cur <- "" cols <- 0 q <- "" m <- 0 } q <- paste0(q, v) m <- m + n } Nam[[i]] <- nams; lNam[[i]] <- lnams }, ##Strat factor doesn't exist as main effect, but keep variable ##names and their lengths if they will appear in interactions later { # 8 - strat ## if(length(Nam[[i]]) == 0 && any(interaction == i)) 22Nov10 if(any(interaction == i)) { nam.coef[[i]] <- paste0(name[i], "=", oprm[-1]) Nam[[i]] <- prm[-1] lNam[[i]] <- lprm[-1] } q <- "" }, { if(prm[3,1] == 0) q <- Two.Way(prm,Nam,nam.coef,lNam,cof,coef,f,columns,lcof, varnames,lnam,at,digits=digits) else q <- Three.Way(prm,Nam,nam.coef,lNam,cof,coef,f, columns,lcof,at) m <- attr(q, "columns.used") j <- length(q) if(cur != "") { tex <- c(tex,cur) cur <- "" cols <- 0 } if(j>1) { tex <- c(tex,q[-j]) q <- q[j] } }, { # 10 - matrx nam <- names(coef) if(cols>0) { tex <- c(tex,cur) cur <- "" cols <- 0 } q <- "" m <- 0 lnam <- nchar(nam) nam <- paste0("{\\rm ", nam, "}") Nam[[i]] <- nam; lNam[[i]] <- lnam for(j in 1:length(prm)) { v <- paste0(cof.sp[j], nam[j]) n <- lcof[j]+lnam[j] if(m + n > columns) { cur <- paste(cur, q) tex <- c(tex, cur) cur <- "" cols <- 0 q <- "" m <- 0 } q <- paste0(q, v) m <- m + n } } ) if(length(q) && q != "") { if(cols+m > columns) { tex <- c(tex, cur) cur <- "" cols <- 0 } cur <- paste(cur, q) cols <- cols + m } } if(cur != "") tex <- c(tex, cur) if(inline) { if(before != '') tex <- c(before, tex) if(size != '') tex <- c(paste0('{\\', size), tex) if(after != '') tex <- c(tex, after) if(size != '') tex <- c(tex, '}') if(html) return(htmltools::HTML(paste0(tex, '\n'))) cat(tex, sep="\n", file=file, append=append) return(structure(list(file=file,style=NULL), class='latex')) } tex <- c(tex, "\\end{eqnarray*}") tex <- ifelse(tex == paste0(prefix, '= & & \\\\') | substring(tex,1,1) == "\\", tex, paste(before, tex, "\\\\")) if(anyivar | anyplus) { s <- if(length(which) == p) "and " else "where " if(anyivar) s <- paste0(s, "\\([c]=1\\) if subject is in group \\(c\\), 0 otherwise") ## Had trouble with Rmarkdown recognizing math mode with $...$ if(anyivar && anyplus) s <- paste0(s, '; ') if(anyplus) s <- paste0(s, "\\((x)_{+}=x\\) if \\(x > 0\\), 0 otherwise", brchar) tex <- c(tex, s) } if(anytr & pretrans) { i <- TLi != "" if(sum(i) == 1) tr <- paste0("\\(", varnames[i], "\\) is pre--transformed as \\(", TLi[i], "\\).") else { tr <- if(html) { z <- cbind(Variable=paste0('\\(', varnames, '\\)'), Transformation=paste0('\\(', TLi, '\\)')) as.character(htmlTable::htmlTable(z, caption='Pre-transformations', css.cell='min-width: 9em;', align='|l|l|', align.header='|c|c|', escape.html=FALSE)) # sep='\n') } else c("\\vspace{0.5ex}\\begin{center}{\\bf Pre--Transformations}\\\\", "\\vspace{1.5ex}\\begin{tabular}{|l|l|} \\hline", "\\multicolumn{1}{|c|}{Variable} & \\multicolumn{1}{c|}{Transformation} \\\\ \\hline", paste0("\\(",varnames[i],"\\) & \\(",TLi[i],"\\) \\\\"), "\\hline", "\\end{tabular}\\end{center}") } tex <- c(tex, tr) } if(html) return(htmltools::HTML(paste0(tex, '\n'))) cat(tex, sep="\n", file=file, append=append) structure(list(file=file, style=NULL), class='latex') } rms/R/npsurv.s0000644000176200001440000000252513562106745013012 0ustar liggesusersnpsurv <- function(formula, data, subset, na.action, ...) { M <- match.call() m <- M m[[1]] <- as.name('model.frame') m[names(m) %nin% c('', 'formula', 'data', 'subset', 'na.action')] <- NULL g <- eval(m, sys.parent()) Y <- model.extract(g, 'response') m <- M m[[1]] <- as.name('survfit') m$formula <- formula f <- eval(m, sys.parent()) f$maxtime <- max(f$time) f$units <- units(Y) f$time.label <- label(Y, type='time') f$event.label <- label(Y, type='event') strat <- rep('', NROW(Y)) if(length(f$strata)) { X <- g[-1] nx <- ncol(X) for(j in 1 : nx) strat <- paste(strat, names(X)[j], '=', as.character(X[[j]]), if(j < nx) ', ', sep='') } f$numevents <- if(inherits(f, 'survfitms')) { ## competing risk data; survfit.formula forgot to compute ## number of events for each state states <- attr(Y, 'states') state <- factor(Y[, 'status'], 0 : length(states), attr(Y, 'inputAttributes')$event$levels) # c('censor', states)) table(strat, state) } else tapply(Y[, 'status'], strat, sum, na.rm=TRUE) ## Compute person-time of exposure while we're at it f$exposure <- tapply(Y[, 1], strat, sum, na.rm=TRUE) f$call <- match.call() class(f) <- c('npsurv', class(f)) f } rms/R/anova.rms.s0000644000176200001440000007030413702174150013350 0ustar liggesusers#main.effect=F to suppress printing main effects when the factor in #question is involved in any interaction. anova.rms <- function(object, ..., main.effect=FALSE, tol=1e-9, test=c('F','Chisq'), india=TRUE, indnl=TRUE, ss=TRUE, vnames=c('names', 'labels'), posterior.summary=c('mean', 'median', 'mode'), ns=500, cint=0.95) { ava <- function(idx) { chisq <- coef[idx] %*% solvet(cov[idx, idx], coef[idx], tol=tol) c(chisq, length(idx)) } eEV <- function(test=integer()) { coef <- if(length(test)) draws[, test, drop=FALSE] else draws co <- if(length(test)) cov[test, test, drop=FALSE] else cov m <- nrow(coef) chisq <- numeric(m) for(i in 1 : m) chisq[i] <- coef[i,, drop=FALSE] %*% solvet(co, t(coef[i,, drop=FALSE]), tol=tol) if(! length(test)) return(chisq) # Assumes stored in chisqT ## variance explained by a variable/set of variables is ## approximated by the Wald chi-square ## pev = partial explained variation = chisq/(chisq for full model) pev <- chisq / chisqT ## Overall pev is the pev at the posterior mean/median beta (last element) ## Also compute HPD interval. ci <- rmsb::HPDint(pev[-m], cint) c(REV=pev[m], Lower=ci[1], Upper=ci[2], d.f.=length(test)) } obj.name <- as.character(sys.call())[2] itype <- 1 #Wald stats. Later sense score stats from object$est misstest <- missing(test) test <- match.arg(test) vnames <- match.arg(vnames) posterior.summary <- match.arg(posterior.summary) is.ols <- inherits(object,'ols') nrp <- num.intercepts(object) cov <- vcov(object, regcoef.only=TRUE, intercepts='none') draws <- object$draws bayes <- length(draws) > 0 chisqBayes <- NULL if(bayes) { if(nrp > 0) draws <- draws[, -(1 : nrp), drop=FALSE] betaSummary <- rmsb::getParamCoef(object, posterior.summary) if(nrp > 0) betaSummary <- betaSummary[-(1 : nrp)] X <- object$x if(! length(X)) stop('x=TRUE must have been specified to fit') nc <- ncol(X) ndraws <- nrow(draws) ns <- min(ndraws, ns) if(ns < ndraws) { j <- sample(1 : ndraws, ns, replace=FALSE) draws <- draws[j,, drop=FALSE] } ## Augment draws with a last row with posterior central tendency draws <- rbind(draws, posteriorSummary=betaSummary) ## Compute variances of linear predictor without omitting variables chisqT <- eEV() m <- length(chisqT) ci <- rmsb::HPDint(chisqT[-m], cint) chisqBayes <- c(chisqT[m], ci) names(chisqBayes) <- c('Central', 'Lower', 'Upper') } if(misstest) test <- if(is.ols) 'F' else 'Chisq' if(!is.ols && test=='F') stop('F-test not allowed for this type of model') if(bayes) test <- 'Chisq' if(!is.ols) ss <- FALSE at <- object$Design assign <- object$assign name <- at$name labels <- at$label nama <- names(assign)[1] asso <- 1*(nama=="(Intercept)" | nama=="Intercept") names(assign)[-asso] <- name namelab <- if(vnames == 'names') name else labels ia <- at$interactions nia <- if(!length(ia)) 0 else ncol(ia) assume <- at$assume.code parms <- at$parms f <- length(assume) ## If using labels instead of names, substitute labels in interaction labels, ## e.g. change x1 * x2 to label(x1) * label(x2) if(vnames == 'labels' && any(assume == 9)) { for(i in which(assume == 9)) { parmi <- parms[[name[i]]] parmi <- parmi[, 1][parmi[, 1] > 0] namelab[i] <- paste(labels[parmi], collapse=' * ') } } ncall <- names(sys.call())[-(1 : 2)] alist <- as.character(sys.call())[-(1 : 2)] if(length(alist) && length(ncall)) alist <- alist[ncall == ''] which <- if(length(alist)) { jw <- charmatch(alist, name, 0) if(any(jw == 0)) stop(paste("factor names not in design: ", paste(alist[jw == 0], collapse=" "))) jw } else 1 : f if(! bayes) { if(length(object$est) && !length(object$u)) stop("est in fit indicates score statistics but no u in fit") if(itype == 1) { if(!length(object$coefficients)) stop("estimates not available for Wald statistics") coef <- object$coefficients cik <- attr(coef, 'intercepts') } else { if(!length(object$u)) stop("score statistics not available") coef <- object$u } } cov <- vcov(object, regcoef.only=TRUE, intercepts='none') if(bayes) for(j in 1:length(assign)) assign[[j]] <- assign[[j]] - nrp else { ## Omit row/col for scale parameters ## Compute # intercepts nrpcoef to skip in testing nrpcoef <- num.intercepts(object, 'coef') if(nrpcoef > 0) { coef <- coef[-(1 : nrpcoef)] for(j in 1:length(assign)) assign[[j]] <- assign[[j]] - nrpcoef } if(itype == 2 & nrp != 0) stop("fit score statistics and x are incompatible") nc <- length(coef) } dos <- if(bayes) eEV else ava stats <- NULL lab <- NULL W <- vinfo <- list() s <- 0 all.slopes <- rep(FALSE, nc) all.ia <- rep(FALSE, nc) all.nonlin <- rep(FALSE, nc) num.ia <- 0 num.nonlin <- 0 issue.warn <- FALSE for(i in which) { j <- assume[i] parmi <- parms[[name[i]]] low.fact <- if(j != 9) i else (parmi[,1])[parmi[,1] > 0] nl <- if(!length(names(at$nonlinear))) at$nonlinear[[i]] else at$nonlinear[[name[i]]] if(!length(nl)) nl <- rep(FALSE, length(assign[[name[i]]])) ## Factor no. according to model matrix is 1 + number of non-strata factors ## before this factor if(j != 8) { ##ignore strata jfact <- if(i==1) 1 else 1 + sum(assume[1 : (i - 1)] != 8) main.index <- assign[[jfact + asso]] nonlin.ia.index <- NULL #Should not have to be here. Bug in S? all.slopes[main.index] <- TRUE ni <- if(nia == 0) 0 else sum(ia == i) if(nia==0) ni <- 0 else for(k in 1:ncol(ia)) ni <- ni + !any(is.na(match(low.fact, ia[,k]))) if(ni==0 | main.effect) { w <- dos(main.index) s <- s+1; W[[s]] <- main.index stats <- rbind(stats, w) lab <- c(lab, namelab[i]) vinfo[[s]] <- list(name=name[i], type='main effect') } ## If term is involved in any higher order effect, get pooled test ## by adding in all high-order effects containing this term ## For 2nd order interaction, look for 3rd order interactions ## containing both factors ## nonlin.ia.index <- NULL #Used to be here. Bug in S? if(ni > 0) { ia.index <- NULL mm <- (1:f)[assume == 9] mm <- mm[mm != i] for(k in mm) { parmk <- parms[[name[k]]] hi.fact <- parmk[,1] m <- match(low.fact, hi.fact) if(!any(is.na(m))) { kfact <- if(k==1) 1 else 1 + sum(assume[1:(k-1)] != 8) idx <- assign[[kfact + asso]] ia.index <- c(ia.index, idx) if(ncol(parmk)>1) for(jj in 1:length(m)) nonlin.ia.index <- c(nonlin.ia.index, idx[parmk[m[jj],-1] == 1]) nonlin.ia.index <- if(length(nonlin.ia.index)) unique(nonlin.ia.index) else NULL ##Highest order can be counted twice } } idx <- c(main.index, ia.index) all.slopes[idx] <- TRUE w <- dos(idx) s <- s + 1; W[[s]] <- idx stats <- rbind(stats, w) lab <- c(lab, paste(namelab[i], " (Factor+Higher Order Factors)")) vinfo[[s]] <- list(name=name[low.fact], type=if(j==9) 'interaction' else 'combined effect') ## If factor i in >1 interaction, print summary ## Otherwise, will be printed later if(india && (j != 9 & ni > 1)) { w <- dos(ia.index) s <- s + 1; W[[s]] <- ia.index stats <- rbind(stats, w) lab <- c(lab, " All Interactions") vinfo[[s]] <- list(name=name[low.fact], type='combined interactions') } } if(any(nl) && any(!nl)) { ## Tests of adequacy of linear relationship idx <- c(main.index[nl], nonlin.ia.index) num.nonlin <- num.nonlin+1 all.nonlin[idx] <- TRUE if(indnl) { w <- dos(idx) s <- s + 1; W[[s]] <- idx stats <- rbind(stats,w) lab <- c(lab, if(!length(nonlin.ia.index))" Nonlinear" else " Nonlinear (Factor+Higher Order Factors)") vinfo[[s]] <- list(name=name[low.fact], type=if(j==9) 'nonlinear interaction' else 'nonlinear') } } ## If interaction factor involves a non-linear term from an ## expanded polynomial, lspline, rcspline, or scored factor, ## do tests to see if a simplification (linear interaction) is ## adequate. Do for second order only. if(j == 9) { num.ia <- num.ia+1 all.ia[main.index] <- TRUE if(parmi[3,1] > 0) issue.warn <- TRUE if(parmi[3,1] == 0 && ncol(parmi) > 1) { nonlin.x <- as.logical(parmi[1,2:ncol(parmi)]) nonlin.y <- as.logical(parmi[2,2:ncol(parmi)]) nonlin.xy <- nonlin.x | nonlin.y nonlin.xandy <- nonlin.x & nonlin.y idx <- main.index[nonlin.xy] li <- length(idx) if(li > 0) { num.nonlin <- num.nonlin+1 all.nonlin[idx] <- TRUE if(indnl) { w <- dos(idx) s <- s + 1 W[[s]] <- idx stats <- rbind(stats, w) lab <- c(lab," Nonlinear Interaction : f(A,B) vs. AB") vinfo[[s]] <- list(name=name[low.fact], type='nonlinear interaction') } idx <- main.index[nonlin.xandy] li <- length(idx) if(indnl && li > 0) { w <- dos(idx) s <- s + 1 W[[s]] <- idx stats <- rbind(stats,w) lab <- c(lab, " f(A,B) vs. Af(B) + Bg(A)") vinfo[[s]] <- list(name=name[low.fact], type='doubly nonlinear interaction') } idx <- main.index[nonlin.x] li <- length(idx) if(indnl && (li > 0 & any(nonlin.x != nonlin.xy))) { w <- dos(idx) s <- s+1 W[[s]] <- idx stats <- rbind(stats, w) lab <- c(lab, paste(" Nonlinear Interaction in", namelab[parmi[1,1]],"vs. Af(B)")) vinfo[[s]] <- list(name=name[low.fact], type='nonlinear interaction in first variable') } idx <- main.index[nonlin.y] li <- length(idx) if(indnl && (li > 0 & any(nonlin.y != nonlin.xy))) { w <- dos(idx) s <- s + 1 W[[s]] <- idx stats <- rbind(stats,w) lab <- c(lab, paste(" Nonlinear Interaction in", namelab[parmi[2,1]],"vs. Bg(A)")) vinfo[[s]] <- list(name=name[low.fact], type='nonlinear interaction in second variable') } } } } } } ## If all lines so far had (Factor +Higher Order Factors) in them, ## remove this redundancy if(length(grep('\\(Factor\\+Higher Order Factors\\)', lab)) == length(lab)) lab <- gsub('\\(Factor\\+Higher Order Factors\\)', '', lab) ## If >1 test of adequacy, print pooled test of all nonlinear effects if(num.nonlin > 1 || (num.nonlin==1 & !indnl)) { idx <- (1:nc)[all.nonlin] li <- length(idx) w <- dos(idx) s <- s + 1; W[[s]] <- idx stats <- rbind(stats, w) lab <- c(lab, "TOTAL NONLINEAR") vinfo[[s]] <- list(type='total nonlinear') } ## If >1 test of interaction, print pooled test of all interactions in list if(num.ia > 1 || (num.ia==1 & !india)) { idx <- (1:nc)[all.ia] li <- length(idx) w <- dos(idx) s <- s+1 W[[s]] <- idx stats <- rbind(stats,w) lab <- c(lab, "TOTAL INTERACTION") vinfo[[s]] <- list(type='total interaction') } ## If >0 test of adequacy and >0 test of interaction, print pooled test of ## all nonlinear and interaction terms if(num.nonlin>0 & num.ia>0) { idx <- (1:nc)[all.nonlin | all.ia] li <- length(idx) w <- dos(idx) s <- s + 1 W[[s]] <- idx stats <- rbind(stats,w) lab <- c(lab, "TOTAL NONLINEAR + INTERACTION") vinfo[[s]] <- list(type='complexity') } ## Get total test for all factors listed idx <- (1:nc)[all.slopes | all.ia] w <- dos(idx) s <- s + 1; W[[s]] <- idx stats <- rbind(stats,w) lab <- c(lab, "TOTAL") vinfo[[s]] <- list(type='global') statnam <- if(bayes) c('REV', 'Lower', 'Upper', 'd.f.') else c('Chi-Square','d.f.') if(! bayes) { if(is.ols) { sigma2 <- object$stats['Sigma']^2 dfe <- object$df.residual } if(ss) { stats <- cbind(stats[,2], stats[,1]*sigma2, stats[,1]*sigma2/stats[,2], stats[,1]) statnam <- c('d.f.', 'Partial SS', 'MS', 'Chi-Square') stats <- rbind(stats, Error=c(dfe, sigma2*dfe, sigma2, NA)) s <- s + 1; W[[s]] <- NA lab <- c(lab, 'ERROR') vinfo[[s]] <- list(type='error') } j <- statnam == 'Chi-Square' dfreg <- stats[,statnam=='d.f.'] if(test=='F') { stats[,j] <- stats[,j] / dfreg statnam[j] <- 'F' stats <- cbind(stats, P=1 - pf(stats[,j], dfreg, dfe)) attr(stats,'df.residual') <- dfe } else stats <- cbind(stats,1 - pchisq(stats[,j], dfreg)) statnam <- c(statnam, 'P') } dimnames(stats) <- list(lab, statnam) attr(stats,'formula') <- formula(object) attr(stats,"obj.name") <- obj.name attr(stats,"class") <- c("anova.rms","matrix") names(W) <- lab attr(stats,"which") <- W if(! bayes) attr(stats,"coef.names") <- names(coef) attr(stats,"non.slopes") <- nrp attr(stats,"vinfo") <- vinfo attr(stats,"chisqBayes") <- chisqBayes if(issue.warn) warning("tests of nonlinear interaction with respect to single component \nvariables ignore 3-way interactions") stats } print.anova.rms <- function(x, which=c('none','subscripts', 'names','dots'), table.env=FALSE, ...) { lang <- prType() if(lang != 'plain') return(latex.anova.rms(x, file='', table.env=table.env, ...)) stats <- x digits <- c('Chi-Square'=2, F=2, 'd.f.'=0, 'Partial SS'=15, MS=15, P=4, REV=3, Lower=3, Upper=3) cstats <- matrix('', nrow=nrow(stats), ncol=ncol(stats), dimnames=dimnames(stats)) bchi <- attr(stats, 'chisqBayes') which <- match.arg(which) do.which <- which!='none' && length(W <- attr(stats,'which')) if(do.which) { if(which=='subscripts') simplifyr <- function(x) { x <- sort(unique(x)) n <- length(x) ranges <- character(n) m <- 0 s <- x while(length(s) > 0) { j <- s == s[1] + (1:length(s))-1 m <- m+1 ranges[m] <- if(sum(j)>1) paste(range(s[j]),collapse='-') else s[1] s <- s[!j] } ranges[1:m] } k <- length(W) w <- character(k) coef.names <- attr(stats,'coef.names') for(i in 1:k) { z <- W[[i]] if(all(is.na(z))) w[i] <- '' else { z <- sort(z) w[i] <- switch(which, subscripts=paste(simplifyr(z), collapse=','), names=paste(coef.names[z],collapse=','), dots={ dots <- rep(' ',length(coef.names)) dots[z] <- '.' paste(dots,collapse='') }) } } } sn <- colnames(cstats) for(j in 1:ncol(cstats)) cstats[,j] <- format(round(stats[,j], digits[sn[j]])) cstats[is.na(stats)] <- '' j <- sn=='P' cstats[stats[,j] < 0.00005,j] <- '<.0001' cstats <- cbind(rownames(stats), cstats) dimnames(cstats) <- list(rep("",nrow(stats)), c("Factor ",colnames(stats))) heading <- if(length(bchi)) paste(' Relative Explained Variation Response:', as.character(attr(stats, "formula")[2]), sep = "") else paste(" ", if(any(colnames(stats) == 'F')) "Analysis of Variance" else "Wald Statistics", " Response: ", as.character(attr(stats, "formula")[2]), sep = "") cat(heading,"\n\n") if(any(sn=='MS')) cstats[cstats[,1]=='TOTAL',1] <- 'REGRESSION' if(do.which) cstats <- cbind(cstats, Tested=w) print(cstats,quote=FALSE) if(do.which && which!='names') { cat('\nSubscripts correspond to:\n') print(coef.names, quote=FALSE) } if(!any(sn=='MS') && length(dfe <- attr(stats,'df.residual'))) cat('\nError d.f.:', dfe, '\n') if(length(bchi)) { bchi <- round(bchi, 1) cat('\nApproximate total model Wald total chi-square used in denominators of REV:\n', bchi['Central'], ' [', bchi['Lower'], ', ', bchi['Upper'], ']\n', sep='') } invisible() } latex.anova.rms <- function(object, title=paste('anova', attr(object, 'obj.name'), sep='.'), dec.chisq=2, dec.F=2, dec.ss=NA, dec.ms=NA, dec.P=4, dec.REV=3, table.env=TRUE, caption=NULL, ...) { lang <- prType() html <- lang == 'html' sn <- colnames(object) rowl <- rownames(object) if(any(sn=='MS')) rowl[rowl=='TOTAL'] <- 'REGRESSION' if(! html) rowl <- latexTranslate(rowl) specs <- markupSpecs[[lang]] bold <- specs$bold math <- specs$math ## Translate interaction symbol (*) to times symbol ## rowl <- gsub('\\*', specs$times, rowl) # changed * to $times$ rowl <- gsub('*', specs$times, rowl, fixed=TRUE) ## Put TOTAL rows in boldface rowl <- ifelse(substring(rowl, 1, 5) %in% c("REGRE", "ERROR"), bold(rowl), rowl) rowl <- ifelse(substring(rowl, 1, 1) == " ", paste0(specs$lspace, specs$italics(substring(rowl,2)), sep=""), rowl) # preserve leading blank P <- object[,3] dstats <- as.data.frame(object) attr(dstats, 'row.names') <- rowl digits <- c('Chi-Square'=dec.chisq, F=dec.F, 'd.f.'=0, 'Partial SS'=dec.ss, MS=dec.ms, P=dec.P, REV=dec.REV, Lower=dec.REV, Upper=dec.REV) dig <- digits[sn] sn[sn=='Chi-Square'] <- specs$chisq(add='') names(dstats) <- ifelse(sn %nin% c('d.f.','MS','Partial SS'), math(sn), sn) resp <- as.character(attr(object, 'formula')[2]) if(! html) resp <- latexTranslate(resp) bchi <- attr(object, 'chisqBayes') wl <- if(length(bchi)) 'Relative Explained Variation' else if(any(sn == 'F')) 'Analysis of Variance' else 'Wald Statistics' if(! length(caption)) caption <- paste0(wl, " for ", specs$code(resp)) i <- 0 for(nn in names(dstats)) { i <- i + 1 dstats[[nn]] <- formatNP(dstats[[nn]], digits=dig[i], lang = lang, pvalue = nn == math('P')) } if(length(bchi)) { bchi <- round(bchi, 1) w <- paste0('Approximate total model Wald ', specs$math(specs$chisq(add='')), ' used in denominators of REV:', bchi['Central'], ' [', bchi['Lower'], ', ', bchi['Upper'], '].') caption <- paste0(caption, '. ', w) } if(html) { al <- rep('r', length(sn)) w <- htmlTable::htmlTable(dstats, caption=caption, css.cell=rep('padding-left:3ex;', ncol(dstats)), align=al, align.header=al, rowlabel='', escape.html=FALSE) htmltools::HTML(as.character(paste0(w, '\n'))) } else latex(dstats, title=title, caption = if(table.env) caption else NULL, insert.top = if(length(caption) && ! table.env) paste0('\\Needspace{2in}\n', caption), rowlabel="", col.just=rep('r',length(sn)), table.env=table.env, ...) } html.anova.rms <- function(object, ...) latex.anova.rms(object, ...) plot.anova.rms <- function(x, what=c("chisqminusdf","chisq","aic", "P","partial R2","remaining R2", "proportion R2", "proportion chisq"), xlab=NULL, pch=16, rm.totals=TRUE, rm.ia=FALSE, rm.other=NULL, newnames, sort=c("descending","ascending","none"), margin=c('chisq', 'P'), pl=TRUE, trans=NULL, ntrans=40, height=NULL, width=NULL, ...) { what <- match.arg(what) sort <- match.arg(sort) isbase <- Hmisc::grType() == 'base' htmlSpecs <- markupSpecs$html schisq <- htmlSpecs$chisq() nbsp <- htmlSpecial('nbsp') if(! length(xlab)) { xlab <- if(isbase) switch(what, chisq=expression(chi^2), "proportion chisq"=expression(paste("Proportion of Overall", ~chi^2)), chisqminusdf=expression(chi^2~-~df), aic="Akaike Information Criterion", P="P-value", "partial R2"=expression(paste("Partial",~R^2)), "remaining R2"=expression(paste("Remaining~",R^2, "~After Removing Variable")), "proportion R2"=expression(paste("Proportion of Overall", ~R^2))) else switch(what, chisq = schisq, "proportion chisq" = paste('Proportion of Overall', schisq), chisqminusdf = paste0(schisq, nbsp, '-', nbsp, 'df'), aic = "Akaike Information Criterion", P = "P-value", "partial R2" = 'Partial R2', "remaining R2" = 'Remaining R2 After Removing Variable', "proportion R2"='Proportion of Overall R2') } rm <- c(if(rm.totals) c("TOTAL NONLINEAR","TOTAL NONLINEAR + INTERACTION", "TOTAL INTERACTION","TOTAL"), " Nonlinear"," All Interactions", "ERROR", " f(A,B) vs. Af(B) + Bg(A)", rm.other) rn <- rownames(x) rm <- c(rm, rn[substring(rn, 2, 10) == "Nonlinear"]) k <- !(rn %in% rm) if(rm.ia) k[grep("\\*", rn)] <- FALSE an <- x[k,, drop=FALSE] if(! isbase && ! length(height)) height <- plotlyParm$heightDotchart(length(w)) if('REV' %in% colnames(x)) { # Bayesian xlab <- 'Relative Explained Variation' i <- switch(sort, none = 1 : nrow(an), descending = order(an[, 'REV'], decreasing=TRUE), ascending = order(an[, 'REV'])) an <- an[i,, drop=FALSE] rownames(an) <- sub(' (Factor+Higher Order Factors)', '', rownames(an), fixed=TRUE) if(isbase) { xlim <- range(an[, 1:3]) dotchart2(an[, 'REV'], xlab=xlab, pch=pch, xlim=xlim, ...) dotchart2(an[, 'Lower'], pch=91, add=TRUE) dotchart2(an[, 'Upper'], pch=93, add=TRUE) return(invisible(an)) } p <- dotchartpl(an[, 'REV'], major=rownames(an), lower=an[,'Lower'], upper=an[,'Upper'], xlab=xlab, limitstracename='HPD Interval', width=width, height=height) return(p) } if(what %in% c("partial R2", "remaining R2", "proportion R2")) { if("Partial SS" %nin% colnames(x)) stop('to plot R2 you must have an ols model and must not have specified ss=FALSE to anova') sse <- x ['ERROR', 'Partial SS'] ssr <- x ['TOTAL', 'Partial SS'] pss <- an[, 'Partial SS'] sst <- sse + ssr } dof <- an[, 'd.f.'] P <- an[, 'P'] if(any(colnames(an) == 'F')) { chisq <- an[, 'F'] * dof totchisq <- x['TOTAL', 'F'] * x['TOTAL', 'd.f.'] } else { chisq <- an[, 'Chi-Square'] totchisq <- x['TOTAL', 'Chi-Square'] } w <- switch(what, chisq = chisq, chisqminusdf = chisq - dof, aic = chisq - 2 * dof, P = P, "partial R2" = pss / sst, "remaining R2" = (ssr - pss) / sst, "proportion R2" = pss / ssr, "proportion chisq" = chisq / totchisq) if(missing(newnames)) newnames <- sedit(names(w)," (Factor+Higher Order Factors)", "") names(w) <- newnames is <- switch(sort, descending = order(-w), ascending = order( w), none = 1 : length(w)) w <- w [is] an <- an[is,, drop=FALSE ] chisq <- chisq[is] dof <- dof[is] P <- P[is] if(pl) { auxtitle <- auxdata <- NULL fn <- function(x, right) { m <- max(abs(x), na.rm=TRUE) left <- max(floor(log10(m)) + 1, 1) nFm(x, left, right) } if(any(c('partial R2', 'remaining R2') %in% margin)) { if("Partial SS" %nin% colnames(x)) stop('to show R2 you must have an ols model and must not have specified ss=FALSE to anova') sse <- x['ERROR', 'Partial SS'] ssr <- x['TOTAL', 'Partial SS'] sst <- sse + ssr pss <- an[, 'Partial SS'] } if(length(margin)) for(marg in margin) { aux <- if(isbase) switch(marg, chisq = list('chi^2', fn(chisq, 1)), 'proportion chisq' = list('Proportion~chi^2', fn(chisq / totchisq, 2)), 'd.f.' = list('d.f.', fn(dof, 0)), P = list('P', fn(P, 4)), 'partial R2' = list('Partial~R^2', fn(pss / sst, 2)), 'proportion R2' = list('Proportion~R^2', fn(pss / ssr, 2))) else switch(marg, chisq = paste(htmlSpecs$chisq(dof), fn(chisq, 1)), 'proportion chisq' = paste0('Proportion ', schisq, '=', fn(chisq / totchisq, 2)), 'd.f.' = paste('d.f.=', fn(dof, 0)), P = paste('P=', fn(P, 4)), 'partial R2' = paste('Partial R2=', fn(pss / sst, 2)), 'proportion R2' = paste('Proportion R2=', fn(pss / ssr, 2))) if(isbase) { if(length(auxtitle)) auxtitle <- paste(auxtitle, aux[[1]], sep='~~') else auxtitle <- aux[[1]] if(length(auxdata)) auxdata <- paste(auxdata, aux[[2]], sep=' ') else auxdata <- aux[[2]] } else auxdata <- if(length(auxdata)) paste(auxdata, aux, sep=paste0(nbsp,nbsp)) else aux } ## convert to expression if not using plotly if(length(auxtitle) && isbase) auxtitle <- parse(text = auxtitle) dc <- if(isbase) dotchart3 else dotchartp if(length(trans)) { nan <- names(w) w <- pmax(0, w) pan <- pretty(w, n=ntrans) tan <- trans(w); names(tan) <- nan p <- dc(tan, xlab=xlab, pch=pch, axisat=trans(pan), axislabels=pan, auxtitle=auxtitle, auxdata=auxdata, auxwhere='hover', height=height, width=width, ...) } else p <- dc(w, xlab=xlab, pch=pch, auxtitle=auxtitle, auxdata=auxdata, auxwhere='hover', height=height, width=width, ...) } if(isbase) invisible(w) else p } rms/R/survfit.cph.s0000644000176200001440000001450312773070356013730 0ustar liggesuserssurvfit.cph <- function(formula, newdata, se.fit=TRUE, conf.int=.95, individual=FALSE, type=NULL, vartype=NULL, conf.type=c('log', 'log-log', 'plain', 'none'), id, ...) { object <- formula Call <- match.call() Call[[1]] <- as.name("survfit") ## nicer output for the user censor <- TRUE ftype <- object$type if (! length(ftype)) { ## Use the appropriate one from the model w <- c("exact", "breslow", "efron") survtype <- match(object$method, w) } else { w <- c("kalbfleisch-prentice", "aalen", "efron", "kaplan-meier", "breslow", "fleming-harrington", "greenwood", "tsiatis", "exact") survtype <- match(match.arg(type, w), w) survtype <- c(1,2,3,1,2,3,2,2,1)[survtype] } vartype <- if(! length(vartype)) survtype else { w <- c("greenwood", "aalen", "efron", "tsiatis") vt <- match(match.arg(vartype, w), w) if(vt == 4) 2 else vt } if (! se.fit) conf.type <- "none" else conf.type <- match.arg(conf.type) xpres <- length(object$means) > 0 y <- object[['y']] # require exact name match type <- attr(y, 'type') if(! length(y)) stop('must use y=TRUE with fit') if(xpres) { X <- object[['x']] if(! length(X)) stop('must use x=TRUE with fit') n <- nrow(X) xcenter <- object$means X <- X - rep(xcenter, rep.int(n, ncol(X))) } else { n <- nrow(y) X <- matrix(0, nrow=n, ncol=1) } strata <- object$strata ### strata.pres <- length(strata) > 0 if(! length(strata)) strata <- rep(0, n) offset <- object$offset if(! length(offset)) offset <- rep(0., n) weights <- object$weights if(! length(weights)) weights <- rep(1., n) missid <- missing(id) if (! missid) individual <- TRUE else if (missid && individual) id <- rep(0, n) else id <- NULL if (individual && type != "counting") stop("The individual option is only valid for start-stop data") ## Compute confidence limits for survival based on -log survival, ## constraining to be in [0,1]; d = std.error of cum hazard * z value ciupper <- function(surv, d) ifelse(surv==0, 0, pmin(1, surv*exp(d))) cilower <- function(surv, d) ifelse(surv==0, 0, surv*exp(-d)) risk <- rep(exp(object$linear.predictors), length=n) ## need to center offset?? ## coxph.fit centered offset inside linear predictors if(missing(newdata)) { X2 <- if(xpres) matrix(0., nrow=1, ncol=ncol(X)) else matrix(0., nrow=1, ncol=1) rq <- ro <- NULL newrisk <- 1 } else { if (length(object$frail)) stop("The newdata argument is not supported for sparse frailty terms") X2 <- predictrms(object, newdata, type='x', expand.na=FALSE) ## result with type='x' has attributes strata and offset which may be NULL rq <- attr(X2, 'strata') ro <- attr(X2, 'offset') n2 <- nrow(X2) if(length(rq) && any(levels(rq) %nin% levels(strata))) stop('new dataset has strata levels not found in the original') if(! length(rq)) rq <- rep(1, n2) ro <- if(length(ro)) ro - mean(offset) else rep(0., n2) X2 <- X2 - rep(xcenter, rep.int(n2, ncol(X2))) newrisk <- exp(matxv(X2, object$coefficients) + ro) } y2 <- NULL if (individual) { if(missing(newdata)) stop("The newdata argument must be present when individual=TRUE") isS <- sapply(newdata, is.Surv) if(sum(isS) != 1) stop("newdata must contain exactly one Surv object when individual=TRUE") y2 <- newdata[[which(isS)]] warning('some aspects of individual=TRUE not yet implemented. Try survfit.coxph.') } g <- survfitcoxph.fit(y, X, weights, X2, risk, newrisk, strata, se.fit, survtype, vartype, if(length(object$var)) object$var else matrix(0, nrow=1, ncol=1), id=id, y2=y2, strata2=rq) if(strata.pres) { if (is.matrix(g$surv)) { nr <- nrow(g$surv) #a vector if newdata had only 1 row indx1 <- split(1:nr, rep(1:length(g$strata), g$strata)) rows <- indx1[as.numeric(rq)] #the rows for each curve indx2 <- unlist(rows) #index for time, n.risk, n.event, n.censor indx3 <- as.integer(rq) #index for n and strata for(i in 2:length(rows)) rows[[i]] <- rows[[i]]+ (i-1)*nr #linear subscript indx4 <- unlist(rows) #index for surv and std.err temp <- g$strata[indx3] names(temp) <- row.names(X2) #row.names(mf2) new <- list(n = g$n[indx3], time= g$time[indx2], n.risk= g$n.risk[indx2], n.event=g$n.event[indx2], n.censor=g$n.censor[indx2], strata = temp, surv= g$surv[indx4], cumhaz = g$cumhaz[indx4]) if (se.fit) new$std.err <- g$std.err[indx4] g <- new } } ## Insert type so that survfit.cph produces object like survfit.coxph g$type <- type if (! censor) { kfun <- function(x, keep) { if (is.matrix(x)) x[keep,, drop=FALSE] else if (length(x) == length(keep)) x[keep] else x } keep <- g$n.event > 0 if(length(g$strata)) { w <- factor(rep(names(g$strata), g$strata), levels=names(g$strata)) g$strata <- c(table(w[keep])) } g <- lapply(g, kfun, keep) } if (se.fit) { zval <- qnorm(1 - (1 - conf.int)/2, 0, 1) if (conf.type=='plain') { u <- g$surv + zval* g$std.err * g$surv z <- g$surv - zval* g$std.err * g$surv g <- c(g, list(upper=pmin(u,1), lower=pmax(z,0), conf.type='plain', conf.int=conf.int)) } if (conf.type=='log') g <- c(g, list(upper=ciupper(g$surv, zval * g$std.err), lower=cilower(g$surv, zval * g$std.err), conf.type='log', conf.int=conf.int)) if (conf.type=='log-log') { who <- (g$surv==0 | g$surv==1) #special cases xx <- ifelse(who, .1, g$surv) #avoid some "log(0)" messages u <- exp(-exp(log(-log(xx)) + zval * g$std.err/log(xx))) u <- ifelse(who, g$surv + 0 * g$std.err, u) z <- exp(-exp(log(-log(xx)) - zval*g$std.err/log(xx))) z <- ifelse(who, g$surv + 0 * g$std.err, z) g <- c(g, list(upper=u, lower=z, conf.type='log-log', conf.int=conf.int)) } } g$requested.strata <- rq g$call <- Call class(g) <- c('survfit.cph', 'survfit.cox', 'survfit') g } rms/R/which.influence.s0000644000176200001440000000613513654557730014535 0ustar liggesuserswhich.influence <- function(fit, cutoff=.2) { cox <- inherits(fit,"cph") stats <- resid(fit, "dfbetas") rnam <- which(! is.na(stats[,1])) stats <- stats[rnam,, drop=FALSE] ##delete rows added back due to NAs d <- dimnames(stats)[[1]] if(length(d)) rnam <- d at <- fit$Design w <- list() namw <- NULL k <- 0 oldopt <- options('warn') options(warn=-1) on.exit(options(oldopt)) if(! cox) { ww <- rnam[abs(stats[, 1]) >= cutoff] if(length(ww)) { k <- k + 1 w[[k]] <- ww namw <- "Intercept" } } Assign <- fit$assign nrp <- num.intercepts(fit) assadj <- if(nrp > 1) nrp - 1 else 0 nm <- names(Assign)[1] if(nm=="Intercept" | nm=="(Intercept)") Assign[[1]] <- NULL ##remove and re-number j <- 0 for(i in (1 : length(at$name))[at$assume.code != 8]) { j <- j + 1 as <- Assign[[j]] - assadj if(length(as) == 1) ww <- rnam[abs(stats[, as]) >= cutoff] else { z <- rep(FALSE, length(rnam)) for(r in as) z <- z | abs(stats[, r]) >= cutoff ww <- rnam[z] } if(length(ww)) { k <- k+1 w[[k]] <- ww namw <- c(namw, at$name[i]) } } if(length(w)) names(w) <- namw w } ##show.influence was written by: ##Jens Oehlschlaegel-Akiyoshi ##oehl@psyres-stuttgart.de ##Center for Psychotherapy Research ##Christian-Belser-Strasse 79a ##D-70597 Stuttgart Germany show.influence <- function(object, dframe, report=NULL, sig=NULL, id=NULL) { who <- unlist(object) nam <- names(object) ## In future parse out interaction components in case main effects ## not already selected ia <- grep('\\*', nam) # remove interactions if(length(ia)) nam <- nam[-ia] nam <- nam[nam %nin% 'Intercept'] # remove Intercept rnam <- dimnames(dframe)[[1]] if(! length(rnam)) rnam <- 1:nrow(dframe) if (length(report)) col <- c(nam, dimnames(dframe[,report,drop=FALSE])[[2]] ) else col <- nam row <- rnam %in% who if(any(col %nin% names(dframe))) stop(paste('needed variables not in dframe:', paste(col[col %nin% names(dframe)],collapse=' '))) dframe <- dframe[row,col,drop=FALSE] rnam <- rnam[row] Count <- table(who) Count <- as.vector(Count[match(rnam,names(Count))]) for (i in 1 : length(nam)) { ni <- nam[i] val <- dframe[,ni] if (length(sig) && is.numeric(val)) val <- signif(val, sig) else val <- format(val) dframe[,ni] <- paste(ifelse(rnam %in% object[[ni]],"*",""), val, sep = "") ## In future change i to also find any object containing the ## variable (e.g., interaction) was object[[i]] dframe[,i] 24Nov00 } if (length(sig) && length(report)) for (i in (length(nam) + 1) : dim(dframe)[2]) if(is.numeric(dframe[, i])) dframe[,i] <- signif(dframe[, i], sig) dframe <- data.frame(Count,dframe) if(length(id)) row.names(dframe) <- id[as.numeric(row.names(dframe))] print(dframe, quote=FALSE) invisible(dframe) } rms/R/validate.Rq.s0000644000176200001440000000411712577352067013633 0ustar liggesusersvalidate.Rq <- function(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, u=NULL, rel=">", tolerance=1e-7, ...) { Rqfit <- RqFit(fit, wallow=FALSE) rqfit <- if(bw) function(x, y, ...) { # need covariance matrix if(length(colnames(x)) && colnames(x)[1]=='Intercept') x <- x[, -1, drop=FALSE] w <- Rq(if(length(x)) y ~ x else y ~ 1, tau=fit$tau, method=fit$method, se=fit$se, hs=fit$hs) w$fail <- FALSE w } else function(...) { w <- Rqfit(...) w$fail <- FALSE w } fit.orig <- fit fit.orig$fail <- FALSE discrim <- function(x, y, fit, iter, evalfit=FALSE, u=NULL, rel=NULL, pr=FALSE, ...) { resid <- if(evalfit) fit$residuals else y - x mad <- mean(abs(resid)) if(evalfit) { #Fit being examined on sample used to fit intercept <- 0 slope <- 1 } else { if(length(fit$coef)==1) {intercept <- median(y)-mean(x); slope <- 1} else { cof <- Rqfit(x, y)$coefficients ##Note x is really x*beta from other fit intercept <- cof[1] slope <- cof[2] } } z <- c(mad, if(diff(range(x)) > 0) cor(x, y, method='spearman') else 0, GiniMd(slope*x), intercept, slope) nam <- c("MAD", "rho", "g", "Intercept", "Slope") if(length(u)) { yy <- if(rel==">") ifelse(y > u, 1, 0) else if(rel==">=") ifelse(y >= u, 1, 0) else if(rel=="<") ifelse(y < u, 1, 0) else ifelse(y <= u, 1, 0) z <- c(z, somers2(x,yy)["Dxy"]) nam <- c(nam, paste("Dxy Y", rel, format(u), sep='')) } names(z) <- nam z } predab.resample(fit.orig, method=method, fit=rqfit, measure=discrim, pr=pr, B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics, force=force, estimates=estimates, tolerance=tolerance, backward=bw, u=u, rel=rel, ...) } rms/R/Gls.s0000644000176200001440000003471513215527015012200 0ustar liggesusers## This is a modification of the gls function in the nlme package. ## gls is authored by Jose Pinheiro, Douglas Bates, Saikat DebRoy, ## Deepayan Sarkar, and R-core Gls <- function (model, data = sys.frame(sys.parent()), correlation = NULL, weights = NULL, subset, method = c("REML", "ML"), na.action = na.omit, control = list(), verbose = FALSE, B=0, dupCluster=FALSE, pr=FALSE, x=FALSE) { Call <- match.call() controlvals <- glsControl() if (!missing(control)) { if (!is.null(control$nlmStepMax) && control$nlmStepMax < 0) { warning("Negative control$nlmStepMax - using default value") control$nlmStepMax <- NULL } controlvals[names(control)] <- control } if (!inherits(model, "formula") || length(model) != 3L) stop("\nModel must be a formula of the form \"resp ~ pred\"") method <- match.arg(method) REML <- method == "REML" if (! is.null(correlation)) groups <- getGroupsFormula(correlation) else groups <- NULL glsSt <- glsStruct(corStruct = correlation, varStruct = varFunc(weights)) model <- terms(model, data=data) ## new mfArgs <- list(formula = asOneFormula(formula(glsSt), model, groups), data = data, na.action = na.action) if (!missing(subset)) mfArgs[["subset"]] <- asOneSidedFormula(Call[["subset"]])[[2L]] mfArgs$drop.unused.levels <- TRUE dataMod <- do.call("model.frame", mfArgs) rn <- origOrder <- row.names(dataMod) ## rn FEH 6apr03 if (length(groups)) { groups <- eval(parse(text = paste("~1", deparse(groups[[2L]]), sep = "|"))) grps <- getGroups(dataMod, groups, level = length(getGroupsFormula(groups, asList = TRUE))) ord <- order(grps) grps <- grps[ord] dataMod <- dataMod[ord, , drop = FALSE] rn <- rn[ord] revOrder <- match(origOrder, rn) } else grps <- NULL X <- model.frame(model, dataMod) dul <- .Options$drop.unused.levels if(!length(dul) || dul) { on.exit(options(drop.unused.levels=dul)) options(drop.unused.levels=FALSE) } X <- Design(X) atrx <- attributes(X) sformula <- atrx$sformula desatr <- atrx$Design mt <- atrx$terms mmcolnames <- desatr$mmcolnames attr(X,'Design') <- NULL contr <- lapply(X, function(el) if (inherits(el, "factor")) contrasts(el)) contr <- contr[!unlist(lapply(contr, is.null))] X <- model.matrix(model, X) parAssign <- attr(X, "assign") fixedSigma <- controlvals$sigma > 0 alt <- attr(mmcolnames, 'alt') if(! all(mmcolnames %in% colnames(X)) && length(alt)) mmcolnames <- alt X <- X[, c('(Intercept)', mmcolnames), drop=FALSE] colnames(X) <- cn <- c('Intercept', desatr$colnames) y <- eval(model[[2L]], dataMod) N <- nrow(X) p <- ncol(X) fTerms <- terms(as.formula(model)) namTerms <- attr(fTerms, "term.labels") if (attr(fTerms, "intercept") > 0) namTerms <- c("Intercept", namTerms) namTerms <- factor(parAssign, labels = namTerms) parAssign <- split(order(parAssign), namTerms) ## Start FEH 4apr03 if(B > 0) { bootcoef <- matrix(NA, nrow=B, ncol=p, dimnames=list(NULL,cn)) Nboot <- integer(B) if(length(grps)) { obsno <- split(1L : N,grps) levg <- levels(grps) ng <- length(levg) if(!length(levg)) stop('program logic error') } else { obsno <- 1L : N levg <- NULL ng <- N } } for(j in 0 : B) { if(j == 0) s <- 1L : N else { if(ng == N) { s <- sample(1L : N, N, replace=TRUE) dataMods <- dataMod[s,] } else { grps.sampled <- sample(levg, ng, replace=TRUE) s <- unlist(obsno[grps.sampled]) dataMods <- dataMod[s,] if(!dupCluster) { grp.freqs <- table(grps) newgrps <- factor(rep(paste('C',1L : ng,sep=''), table(grps)[grps.sampled])) dataMods$id <- newgrps } } Nboot[j] <- Nb <- length(s) if(pr) cat(j,'\r') } attr(glsSt, "conLin") <- if(j==0) list(Xy = array(c(X, y), c(N, p + 1L), list(rn, c(cn, deparse(model[[2L]])))), dims = list(N = N, p = p, REML = as.integer(REML)), logLik = 0, sigma=controlvals$sigma, fixedSigma=fixedSigma) else list(Xy = array(c(X[s,,drop=FALSE], y[s]), c(Nb, p + 1L), list(rn[s], c(cn, deparse(model[[2L]])))), dims = list(N = Nb, p = p, REML = as.integer(REML)), logLik = 0, sigma=controlvals$sigma, fixedSigma=fixedSigma) ## FEH colnames(X) -> cn, ncol(X) -> p, j>0 case above glsEstControl <- controlvals[c("singular.ok", "qrTol")] ## qrTol above not in gls glsSt <- Initialize(glsSt, if(j==0) dataMod else dataMods, glsEstControl) parMap <- attr(glsSt, "pmap") numIter <- numIter0 <- 0 repeat { co <- c(coef(glsSt)) ## FEH oldPars <- c(attr(glsSt, "glsFit")[["beta"]], co) if (length(co)) { optRes <- if(controlvals$opt == 'nlminb') { nlminb(co, function(glsPars) -logLik(glsSt, glsPars), control = list(trace = controlvals$msVerbose, iter.max = controlvals$msMaxIter)) } else { optim(co, function(glsPars) -logLik(glsSt, glsPars), method = controlvals$optimMethod, control = list(trace = controlvals$msVerbose, maxit = controlvals$msMaxIter, reltol = if (numIter == 0) controlvals$msTol else 100 * .Machine$double.eps)) } coef(glsSt) <- optRes$par } else optRes <- list(convergence = 0) attr(glsSt, "glsFit") <- glsEstimate(glsSt, control = glsEstControl) if (!needUpdate(glsSt)) { if (optRes$convergence) stop(optRes$message) break } numIter <- numIter + 1L glsSt <- update(glsSt, if(j==0) dataMod else dataMods) ## FEH aConv <- c(attr(glsSt, "glsFit")[["beta"]], coef(glsSt)) conv <- abs((oldPars - aConv)/ifelse(aConv == 0, 1L, aConv)) aConv <- c(beta = max(conv[1L : p])) conv <- conv[-(1L : p)] for (i in names(glsSt)) { if (any(parMap[, i])) { aConv <- c(aConv, max(conv[parMap[, i]])) names(aConv)[length(aConv)] <- i } } if (verbose) { cat("\nIteration:", numIter) ## cat("\nObjective:", format(aNlm$value), "\n") ## ERROR: aNlm doesn't exist. Need to fix. print(glsSt) cat("\nConvergence:\n") print(aConv) } if (max(aConv) <= controlvals$tolerance) break if (numIter > controlvals$maxIter) stop("Maximum number of iterations reached without convergence.") } if(j > 0) { bootcoef[j,] <- attr(glsSt, "glsFit")[["beta"]] bootc <- coef(glsSt$corStruct, unconstrained=FALSE) if(j == 1L) { ncb <- ncol(bootc) if(!length(ncb)) ncb <- length(bootc) bootcorr <- matrix(NA, nrow=B, ncol=ncb, dimnames=list(NULL, names(bootc))) } bootcorr[j,] <- bootc } if(j==0) glsSt0 <- glsSt ## FEH 4apr03 } ## end bootstrap reps if(pr && B > 0) cat('\n') glsSt <- glsSt0 ## FEH glsFit <- attr(glsSt, "glsFit") namBeta <- names(glsFit$beta) attr(parAssign, "varBetaFact") <- varBeta <- glsFit$sigma * glsFit$varBeta * sqrt((N - REML * p)/(N - p)) varBeta <- crossprod(varBeta) dimnames(varBeta) <- list(namBeta, namBeta) Fitted <- fitted(glsSt) if (length(grps)) { grps <- grps[revOrder] Fitted <- Fitted[revOrder] Resid <- y[revOrder] - Fitted attr(Resid, "std") <- glsFit$sigma/(varWeights(glsSt)[revOrder]) } else { Resid <- y - Fitted attr(Resid, "std") <- glsFit$sigma/(varWeights(glsSt)) } names(Resid) <- names(Fitted) <- origOrder attr(Resid, 'label') <- 'Residuals' cr <- class(Resid) if(length(cr) && any(cr == 'labelled')) { if(length(cr) == 1L) Resid <- unclass(Resid) else class(Resid) <- setdiff(cr, 'labelled') } apVar <- if (controlvals$apVar && FALSE) glsApVar(glsSt, glsFit$sigma, .relStep = controlvals[[".relStep"]], minAbsPar = controlvals[["minAbsParApVar"]], natural = controlvals[["natural"]]) else "Approximate variance-covariance matrix not available" dims <- attr(glsSt, "conLin")[["dims"]] dims[["p"]] <- p attr(glsSt, "conLin") <- NULL attr(glsSt, "glsFit") <- NULL attr(glsSt, 'fixedSigma') <- fixedSigma grpDta <- inherits(data, 'groupedData') estOut <- list(modelStruct = glsSt, dims = dims, contrasts = contr, coefficients = glsFit[["beta"]], varBeta = varBeta, sigma = if(fixedSigma) controlvals$sigma else glsFit$sigma, g=GiniMd(Fitted), apVar = apVar, logLik = glsFit$logLik, numIter = if(needUpdate(glsSt)) numIter else numIter0, groups = grps, call = Call, method = method, fitted = Fitted, residuals = Resid, parAssign = parAssign, Design=desatr, assign=DesignAssign(desatr, 1L, mt), formula=model, sformula=sformula, terms=fTerms, B=B, boot.Coef=if(B > 0) bootcoef, boot.Corr=if(B > 0) bootcorr, Nboot=if(B > 0) Nboot, var=if(B > 0) var(bootcoef), x=if(x) X[, -1L, drop=FALSE]) ## Last 2 lines FEH 29mar03 if(grpDta) { attr(estOut, "units") <- attr(data, "units") attr(estOut, "labels") <- attr(data, "labels") } attr(estOut, "namBetaFull") <- colnames(X) class(estOut) <- c('Gls','rms','gls') estOut } print.Gls <- function(x, digits=4, coefs=TRUE, title, ...) { ## Following taken from print.gls with changes marked FEH summary.gls <- getS3method('summary', 'gls') k <- 0 z <- list() dd <- x$dims errordf <- dd$N - dd$p mCall <- x$call if(missing(title)) title <- if (inherits(x, "gnls")) "Generalized Nonlinear Least Squares Fit" else paste("Generalized Least Squares Fit by", ifelse(x$method == "REML", "REML", "Maximum Mikelihood")) ltype <- if (inherits(x, "gnls")) 'Log-likelihood' else paste('Log-', ifelse(x$method == "REML", "restricted-", ""), 'likelihood', sep='') if(prType() == 'latex') ltype <- paste(ltype, ' ', sep='') misc <- reListclean(Obs=dd$N, Clusters=if(length(x$groups)) length(unique(x$groups)) else dd$N, g=x$g) llike <- reListclean(ll=x$logLik, 'Model d.f.' = dd$p - 1L, sigma = x$sigma, 'd.f.' = errordf) names(llike)[1L] <- ltype k <- k + 1L z[[k]] <- list(type='stats', list( headings = c('', ''), data = list(c(misc, c(NA,NA,3L)), c(llike, c(2L,NA,digits,NA))))) if(any(names(x)=='var') && length(x$var)) { se <- sqrt(diag(x$var)) beta <- coef(x) k <- k + 1L z[[k]] <- list(type='coefmatrix', list(coef = beta, se= se), title='Using bootstrap variance estimates') } else { ## summary.gls calls BIC which tries to use logLik.rms. ## Make it use logLik.gls instead class(x) <- 'gls' s <- summary.gls(x)$tTable k <- k + 1L z[[k]] <- list(type='coefmatrix', list(coef = s[,'Value'], se = s[,'Std.Error'], errordf = errordf)) } if (length(x$modelStruct) > 0) { k <- k + 1L z[[k]] <- list(type='print', list(summary(x$modelStruct))) } if(x$B > 0) { k <- k + 1L z[[k]] <- list(type='cat', list('Bootstrap repetitions:',x$B)) tn <- table(x$Nboot) if(length(tn) > 1L) { k < k + 1L z[[k]] <- list(type='print', list(tn), title = 'Table of Sample Sizes used in Bootstraps') } else { k <- k + 1L z[[k]] <- list(type='cat', list('Bootstraps were all balanced with respect to clusters')) } dr <- diag(x$varBeta)/diag(x$var) k <- k + 1L z[[k]] <- list(type='print', list(round(dr, 2L)), title = 'Ratio of Original Variances to Bootstrap Variances') k <- k + 1L r <- round(t(apply(x$boot.Corr, 2L, quantile, probs=c(.025,.975))), 3L) colnames(r) <- c('Lower','Upper') z[[k]] <- list(type='print', list(r), title = 'Bootstrap Nonparametric 0.95 Confidence Limits for Correlation Parameters') } prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } vcov.Gls <- function(object, intercepts='all', ...) { v <- if(any(names(object)=='var') && length(object$var)) object$var else object$varBeta if(length(intercepts) == 1L && intercepts == 'none') v <- v[-1L, -1L, drop=FALSE] v } predict.Gls <- function(object, newdata, type=c("lp","x","data.frame","terms","cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1L, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", ...) { type <- match.arg(type) predictrms(object, newdata, type, se.fit, conf.int, conf.type, kint=kint, na.action, expand.na, center.terms, ...) } latex.Gls <- function(...) latexrms(...) rms/R/Glm.r0000644000176200001440000001702313704356450012170 0ustar liggesusers#' rms Version of glm #' #' This function saves `rms` attributes with the fit object so that #' `anova.rms`, `Predict`, etc. can be used just as with `ols` #' and other fits. No `validate` or `calibrate` methods exist for #' `Glm` though. #' #' For the `print` method, format of output is controlled by the user #' previously running `options(prType="lang")` where `lang` is #' `"plain"` (the default), `"latex"`, or `"html"`. #' #' #' @aliases Glm #' @param #' formula,family,data,weights,subset,na.action,start,offset,control,model,method,x,y,contrasts #' see [stats::glm()]; for `print` `x` is the result of `Glm` #' @param ... ignored #' model coefficients, standard errors, etc. Specify `coefs=n` to print #' only the first `n` regression coefficients in the model. #' @return a fit object like that produced by [stats::glm()] but with #' `rms` attributes and a `class` of `"rms"`, `"Glm"`, #' `"glm"`, and `"lm"`. The `g` element of the fit object is #' the \eqn{g}-index. #' @seealso [stats::glm()],[Hmisc::GiniMd()], [prModFit()], [stats::residuals.glm] #' @keywords models regression #' @md #' @examples #' #' ## Dobson (1990) Page 93: Randomized Controlled Trial : #' counts <- c(18,17,15,20,10,20,25,13,12) #' outcome <- gl(3,1,9) #' treatment <- gl(3,3) #' f <- glm(counts ~ outcome + treatment, family=poisson()) #' f #' anova(f) #' summary(f) #' f <- Glm(counts ~ outcome + treatment, family=poisson()) #' # could have had rcs( ) etc. if there were continuous predictors #' f #' anova(f) #' summary(f, outcome=c('1','2','3'), treatment=c('1','2','3')) #' Glm <- function(formula, family = gaussian, data = environment(formula), weights, subset, na.action = na.delete, start = NULL, offset = NULL, control = glm.control(...), model = TRUE, method = "glm.fit", x = FALSE, y = TRUE, contrasts = NULL, ...) { call <- match.call() if (is.character(family)) family <- get(family) if (is.function(family)) family <- family() if (!length(family$family)) { print(family) stop("`family' not recognized") } mt <- terms(formula, dta=data) callenv <- parent.frame() # don't delay these evaluations weights <- if(! missing(weights)) eval(substitute(weights), data, callenv) subset <- if(! missing(subset )) eval(substitute(subset), data, callenv) mf <- modelData(data, formula, subset = subset, weights=weights, na.action=na.action, callenv=callenv) mf <- Design(mf, formula=formula) at <- attributes(mf) desatr <- at$Design attr(mf, 'Design') <- NULL nact <- attr(mf, 'na.action') sformula <- at$sformula mmcolnames <- desatr$mmcolnames switch(method, model.frame = return(mf), glm.fit = 1, stop(paste("invalid `method':", method))) xvars <- as.character(attr(mt, "variables"))[-1] if ((yvar <- attr(mt, "response")) > 0) xvars <- xvars[-yvar] xlev <- if (length(xvars) > 0) { xlev <- lapply(mf[xvars], levels) xlev[!sapply(xlev, is.null)] } X <- if(! is.empty.model(mt)) model.matrix(mt, mf, contrasts) intcpt <- if(attr(mt, 'intercept') > 0) '(Intercept)' alt <- attr(mmcolnames, 'alt') if(! all(mmcolnames %in% colnames(X)) && length(alt)) mmcolnames <- alt X <- X[, c(intcpt, mmcolnames), drop=FALSE] colnames(X) <- c(if(length(intcpt)) 'Intercept', desatr$colnames) # colnames(X) <- if(attr(mt, 'intercept') > 0) # c('Intercept', desatr$colnames) # else desatr$colnames Y <- model.response(mf, "numeric") weights <- model.weights(mf) offset <- attr(mf, 'offset') if(!length(offset)) offset <- 0 if (length(weights) && any(weights < 0)) stop("Negative wts not allowed") if (length(offset) > 1 && length(offset) != NROW(Y)) stop(paste("Number of offsets is", length(offset), ", should equal", NROW(Y), "(number of observations)")) fit <- glm.fit(x = X, y = Y, weights = weights, start = start, offset = offset, family = family, control = control, intercept = attr(mt, "intercept") > 0) if (length(offset) && attr(mt, "intercept") > 0) { fit$null.deviance <- if(is.empty.model(mt)) fit$deviance else glm.fit(x = X[, "Intercept", drop = FALSE], y = Y, weights = weights, start = start, offset = offset, family = family, control = control, intercept = TRUE)$deviance } if (model) fit$model <- mf if (x) fit$x <- X[, -1, drop=FALSE] if (!y) fit$y <- NULL fit <- c(fit, list(call = call, formula = formula, sformula=sformula, terms = mt, data = data, offset = offset, control = control, method = method, contrasts = attr(X, "contrasts"), xlevels = xlev, Design=desatr, na.action=nact, assign=DesignAssign(desatr,1,mt), g=GiniMd(fit$linear.predictors))) class(fit) <- c('Glm', 'rms', 'glm', 'lm') fit } ##' Print a `Glm` Object ##' ##' Prints a `Glm` object, optionally in LaTeX or html ##' @title print.glm ##' @param x `Glm` object ##' @param digits number of significant digits to print ##' @param coefs specify `coefs=FALSE` to suppress printing the table of ##' @param title a character string title to be passed to `prModFit` ##' @param ... ignored ##' @author Frank Harrell print.Glm <- function(x, digits=4, coefs=TRUE, title='General Linear Model', ...) { k <- 0 z <- list() if(length(zz <- x$na.action)) { k <- k + 1 z[[k]] <- list(type=paste('naprint', class(zz)[1], sep='.'), list(zz)) } cof <- coef(x) lr <- x$null.deviance - x$deviance dof <- x$rank - (names(cof)[1]=='Intercept') pval <- 1 - pchisq(lr, dof) ci <- x$clusterInfo misc <- reListclean(Obs=length(x$residuals), 'Residual d.f.'=x$df.residual, 'Cluster on'=ci$name, Clusters=ci$n, g = x$g) lr <- reListclean('LR chi2' = lr, 'd.f.' = dof, 'Pr(> chi2)' = pval) headings <- c('', 'Model Likelihood\nRatio Test') data <- list(c(misc, c(NA,NA,NA,NA,3)), c(lr, c(2, NA,-4))) k <- k + 1 z[[k]] <- list(type='stats', list(headings=headings, data=data)) se <- sqrt(diag(vcov(x))) k <- k + 1 z[[k]] <- list(type='coefmatrix', list(coef=cof, se=se)) prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } summary.Glm <- function(...) summary.rms(...) vcov.Glm <- function(object, regcoef.only=TRUE, intercepts='all', ...) { v <- object$var if(!length(v)) v <- getS3method('vcov', 'glm')(object, ...) ns <- num.intercepts(object, 'var') if(ns > 0 && length(intercepts)==1 && intercepts=='none') v <- v[-(1 : ns), -(1 : ns), drop=FALSE] v } # Varcov.glm <- function(object, ...) #{ # if(length(object$var)) # return(object$var) ## for Glm # # s <- summary.glm(object) # s$cov.unscaled * s$dispersion #} # residuals.Glm <- function(object, ...) residuals.glm(object, ...) predict.Glm <- function(object, newdata, type=c("lp","x","data.frame","terms","cterms","ccterms","adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", ...) { type <- match.arg(type) predictrms(object, newdata, type, se.fit, conf.int, conf.type, kint, na.action, expand.na, center.terms, ...) } latex.Glm <- function(...) latexrms(...) rms/R/survest.psm.s0000644000176200001440000001170412161464664013766 0ustar liggesuserssurvest <- function(fit, ...) UseMethod("survest") ##Use x= if input is a design matrix, newdata= if a data frame or data matrix #or vector. Can specify (centered) linear predictor values instead (linear.predictors). #Strata is attached to linear.predictors or x as "strata" attribute. #data matrix assumes that categorical variables are coded with integer codes survest.psm <- function(fit, newdata, linear.predictors, x, times, fun, loglog=FALSE, conf.int=.95, what=c("survival","hazard","parallel"), ...) { # ... so survplot will work what <- match.arg(what) if(what=='parallel') conf.int <- FALSE trans <- switch(what, survival=Survival(fit), hazard=Hazard(fit), parallel=Survival(fit)) if(missing(fun)) fun <- if(loglog) function(x) logb(ifelse(x==0|x==1,NA,x)) else function(x) x if(what=="hazard" & conf.int>0) { warning('conf.int ignored for what="hazard"') conf.int <- FALSE } if(conf.int > 0) { cov <- vcov(fit, regcoef.only=TRUE) # ignore scale if(!missing(linear.predictors)) { warning("conf.int set to 0 since linear.predictors specified") conf.int <- 0 } } if(any(attr(fit,'class')=='pphsm')) stop("fit should not have been passed thru pphsm") nvar <- length(fit$coef) - num.intercepts(fit) if(missing(linear.predictors)) { if(nvar > 0 && missing(x) && missing(newdata)) { linear.predictors <- fit$linear.predictors if(conf.int > 0) stop("may not specify conf.int unless x or newdata given") rnam <- names(linear.predictors) } else { if(nvar==0) { x <- as.matrix(1) # no predictors linear.predictors <- fit$coef[1] } else { if(missing(x)) x <- cbind(Intercept=1, predict(fit, newdata, type="x")) linear.predictors <- matxv(x, fit$coef) } if(conf.int > 0) { g1 <- drop(((x %*% cov) * x) %*% rep(1, ncol(x))) last <- { nscale <- length(fit$icoef) - 1 ncol(fit$var) - (1 : nscale) + 1 } g2 <- drop(x %*% fit$var[-last, last, drop=FALSE]) } rnam <- dimnames(x)[[1]] } } else rnam <- names(linear.predictors) if(what == 'parallel') { if(length(times)>1 && (length(times) != length(linear.predictors))) stop('length of times must = 1 or number of subjects when what="parallel"') return(trans(times, linear.predictors)) } if(missing(times)) times <- seq(0, fit$maxtime, length=200) nt <- length(times) n <- length(linear.predictors) if(n > 1 & missing(times)) warning("should specify times if getting predictions for >1 obs.") if(conf.int>0) zcrit <- qnorm((conf.int + 1) / 2) comp <- function(a, b, Trans) Trans(b, a) surv <- drop(outer(linear.predictors, times, FUN=comp, Trans=trans)) if(conf.int > 0 && (nt==1 || n==1)) { dist <- fit$dist link <- survreg.distributions[[dist]]$trans z <- if(length(link)) link(times) else times sc <- fit$scale ## TODO: generalize for vector of scale params logtxb <- outer(linear.predictors, z, function(a,b) b - a) se <- sqrt(g1 + logtxb * (2 * g2 + logtxb * fit$var[last, last])) / sc prm <- 0 tm <- if(length(link)) 1 else 0 lower <- trans(tm,-drop(logtxb / sc + zcrit * se), parms=prm) upper <- trans(tm,-drop(logtxb / sc - zcrit * se), parms=prm) if(what=='survival') { lower[times == 0] <- 1 upper[times == 0] <- 1 } std.err <- drop(se) } if(nt==1 | n==1) { surv <- fun(surv); surv[is.infinite(surv)] <- NA if(conf.int > 0) { lower <- fun(lower); lower[is.infinite(lower)] <- NA upper <- fun(upper); upper[is.infinite(upper)] <- NA retlist <- list(time=times,surv=surv, lower=lower,upper=upper, std.err=std.err, linear.predictors=linear.predictors) } else retlist <- list(time=times,surv=surv, linear.predictors=linear.predictors) retlist <- structure(c(retlist, list(conf.int=conf.int, units=fit$units, n.risk=fit$stats["Obs"], n.event=fit$stats["Events"], what=what)), class='survest.psm') return(retlist) } if(n==1) names(surv) <- format(times) else { if(is.matrix(surv)) dimnames(surv) <- list(rnam, format(times)) else names(surv) <- rnam } surv } print.survest.psm <- function(x, ...) { cat('\nN:',x$n.risk,'\tEvents:',x$n.event) z <- if(length(unique(x$time)) > 1) data.frame(Time=x$time) else { cat('\tTime:',x$time[1],' ',x$units,'s',sep='') data.frame(LinearPredictor=x$linear.predictors) } cat('\n\n') z$whatever <- x$surv names(z)[2] <- x$what if(x$conf.int) { z$Lower <- x$lower z$Upper <- x$upper z$SE <- x$std.err } print.data.frame(z) invisible() } rms/R/residuals.cph.s0000644000176200001440000000133612773171345014221 0ustar liggesusersresiduals.cph <- function(object, type = c("martingale", "deviance", "score", "schoenfeld", "dfbeta", "dfbetas", "scaledsch","partial"), ...) { type <- match.arg(type) x <- object[['x']] y <- object[['y']] if(type != 'martingale' && !length(x)) stop('you must specify x=TRUE in the fit') if(type %nin% c('deviance','martingale') && !length(y)) stop('you must specify y=TRUE in the fit') strata <- object$strata if(length(strata)) { object$strata <- strata terms <- terms(object) attr(terms,'specials')$strata <- attr(terms,'specials')$strat object$terms <- terms } getS3method('residuals', 'coxph')(object, type=type, ...) } rms/R/ia.operator.s0000644000176200001440000000600312250461004013654 0ustar liggesusers#ia.operator.s - restricted interaction operators for use with rms #F. Harrell 8 Nov 91 #Set up proper attributes for a restricted interaction for a model #such as y ~ rcs(x1) + rcs(x2) + x1 %ia% x2 or x1 %ia% rcs(x2) #or rcs(x1) %ia% x2 "%ia%" <- function(x1, x2) { a1 <- attributes(x1) a2 <- attributes(x2) nam <- as.character(sys.call())[-1] redo <- function(x,nam) { if(is.null(attr(x,"assume.code"))) { if(!is.null(class(x)) && class(x)[1]=="ordered") x <- scored(x, name=nam) else if(is.character(x) | is.factor(x)) x <- catg(x, name=nam) else if(is.matrix(x)) x <- matrx(x, name=nam) else x <- asis(x, name=nam) } ass <- attr(x,"assume.code") nam <- attr(x,"name") if(ass==5) { colnames <- attr(x,"colnames") len <- length(attr(x,"parms"))-1 } else if(ass==8) { prm <- attr(x,"parms") colnames <- paste(nam,"=",prm[-1],sep="") len <- length(prm)-1 } else if(ass==7) { prm <- attr(x,"parms") colnames <- c(nam,paste(nam,"=",prm[-(1:2)],sep="")) len <- length(prm)-1 } else { if(is.null(ncol(x))) { len <- 1 colnames <- nam } else { colnames <- dimnames(x)[[2]] len <- ncol(x) } } attr(x,"colnames") <- colnames attr(x,"len") <- len if(ass==8) attr(x,"nonlinear") <- rep(FALSE, len) x } x1 <- redo(x1,nam[1]) x2 <- redo(x2,nam[2]) a1 <- attributes(x1) a2 <- attributes(x2) n1 <- a1$colnames n2 <- a2$colnames nl1 <- a1$nonlinear nl2 <- a2$nonlinear as1 <- a1$assume.code as2 <- a2$assume.code l1 <- a1$len l2 <- a2$len if(any(nl1) & any(nl2)) nc <- l1+l2-1 else nc <- l1*l2 if(is.matrix(x1)) nr <- nrow(x1) else nr <- length(x1) x <- matrix(single(1),nrow=nr,ncol=nc) name <- character(nc) parms <- matrix(integer(1),nrow=2,ncol=nc+1) nonlinear <- logical(nc) k <- 0 if(!is.factor(x1)) x1 <- as.matrix(x1) if(!is.factor(x2)) x2 <- as.matrix(x2) for(i in 1:l1) { if(as1==5 | as1==8) x1i <- unclass(x1)==(i+1) else x1i <- x1[,i] for(j in 1:l2) { ##Remove doubly nonlinear terms if(nl1[i] & nl2[j]) break k <- k + 1 if(as2==5 | as2==8) x2j <- unclass(x2)==(j+1) else x2j <- x2[,j] x[,k] <- x1i * x2j name[k] <- paste(n1[i],"*",n2[j]) parms[,k+1] <- c(nl1[i],nl2[j]) nonlinear[k] <- nl1[i] | nl2[j] } } dimnames(x) <- list(NULL, name) attr(x,"ia") <- c(a1$name, a2$name) attr(x,"parms") <- parms attr(x,"nonlinear") <- nonlinear attr(x,"assume.code") <- 9 attr(x,"name") <- paste(a1$name,"*",a2$name) attr(x,"label") <- attr(x,"name") attr(x,"colnames") <- name attr(x,"class") <- "rms" x } rms/R/specs.rms.s0000644000176200001440000001074514024502610013356 0ustar liggesusers#Print description of specifications. Can come from individual variable #created by dx, complete design created by Design(), or complete design #carried forward in fit specs <- function(fit, ...) UseMethod('specs') specs.rms <- function(fit, long=FALSE, ...) { Call <- if(length(fit$call)) fit$call else if(length(attr(fit,'call'))) attr(fit,'call') else attr(fit, 'formula') tl <- attr(fit$terms, "term.labels") if(!length(tl)) tl <- attr(terms(formula(fit)), 'term.labels') ass <- fit$assign strata <- levels(fit$strata) if(is.null(fit$assume)) { d <- fit$Design fit <- d } assume <- fit$assume if(is.null(assume)) stop("fit does not have design information") parms <- fit$parms name <- fit$name lim <- fit$limits ia.order <- fit$ia.order label <- fit$label units <- fit$units if(length(ass)) { if(names(ass)[1] %in% c("(Intercept)", "Intercept")) ass[[1]] <- NULL names(ass) <- name[assume != "strata"] } f <- length(assume) d <- matrix("", nrow=f, ncol=3) d[,1] <- assume iint <- 0 jfact <- 0 trans <- rep("", f) # Pick off inner transformation of variable. To complete, need to # evaluate h function # from <- c("asis","pol","lsp","rcs","catg","scored","strat","matrx","I") # from <- paste(from,"(\\(.*\\))",sep="") # tl <- translate(tl, from, "\\1") # tl <- paste("h(",tl,")",sep="") from <- c('asis(*)', 'pol(*)', 'lsp(*)', 'rcs(*)', 'catg(*)', 'scored(*)', 'strat(*)', 'matrx(*)', 'gTrans(*)', 'I(*)') to <- rep('*', 10) tl <- paste("h(", sedit(tl, from, to), ")", sep="") ##change wrapping function to h() h <- function(x, ...) deparse(substitute(x)) for(i in 1 : f) { if(assume[i] == "interaction") iint <- iint+1 else { tr <- eval(parse(text = tl[i])) if(tr != name[i]) trans[i] <- tr } len <- if(assume[i] == "strata") 0 else length(ass[[name[i]]]) d[i,3] <- as.character(len) parmi <- parms[[name[i]]] if(d[i,1] == "transform") d[i,2] <- "function" else { if(length(parmi)) { if(d[i,1] == "interaction") { i1 <- parmi[1, -1] != 0 i2 <- parmi[2, -1] != 0 i3 <- parmi[3, -1] != 0 if(parmi[3,1] == 0) { #2nd order interaction iao <- 1 * (any(i1) & !any(i2)) + 2 * (! any(i1) & any(i2)) + 3 * (any(i1) & any(i2) & ! any(i1 & i2)) + 4 * any(i1 & i2) d[i,2] <- c("linear x linear - AB", "nonlinear x linear - f(A)B", "linear x nonlinear - Ag(B)", "Af(B) + Bg(A)", "f(A,B) - all cross-products")[iao+1] } else #3rd order d[i,2] <- paste(if(any(i1)) "nonlinear" else "linear", "x", if(any(i2)) "nonlinear" else "linear", "x", if(any(i3)) "nonlinear" else "linear") if(ncol(parmi) == 1) d[i,2] <- " " } else { lab <- "" if(assume[i] == 'gTrans') parmi <- '' for(z in parmi) if(is.character(z)) lab <- paste(lab, z) else lab <- paste(lab, signif(z, 5)) d[i,2] <- lab } } } } collab <- c("Assumption", "Parameters", "d.f.") if(any(trans != "")) { collab <- c("Transformation", collab) d <- cbind(trans, d) } if(any(name != label)) { collab <- c("Label", collab) d <- cbind(label, d) } if(length(units) && any(units != '')) { collab <- c('Units', collab) unitsb <- rep('', length(assume)) unitsb[assume != 'interaction'] <- units d <- cbind(unitsb, d) } dimnames(d) <- list(name, collab) structure(list(call=Call, how.modeled=d, limits=if(long) lim, strata=strata), class='specs.rms') } print.specs.rms <- function(x, ...) { dput(x$call) cat('\n') print(x$how.modeled, quote=FALSE) if(length(x$limits)) {cat('\n'); print(x$limits)} if(length(x$strata)) { cat("\n Strata\n\n") print(x$strata,quote=FALSE) } invisible() } rms/R/validate.ols.s0000644000176200001440000002352413214625107014034 0ustar liggesusersvalidate <- function(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=0.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE,...) UseMethod("validate") #Resampling optimism of discrimination and reliability of an ols regression #B: # reps #bw=T to incorporate backward stepdown (using fastbw) with params rule,type,sls #pr=T to print results of each bootstrap rep #Requires: predab.resample, fastbw, ols #Frank Harrell 11 June 91 validate.ols <- function(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, u=NULL, rel=">", tolerance=1e-7, ...) { fit.orig <- fit penalty.matrix <- fit.orig$penalty.matrix discrim <- function(x, y, fit, iter, evalfit=FALSE, u=NULL, rel=NULL, pr=FALSE, ...) { resid <- if(evalfit) fit$residuals else y - x n <- length(resid) sst <- (n - 1) * var(y) # sum(y^2) - (sum(y)^2)/n mse <- sum(resid^2) rsquare <- 1 - mse / sst mse <- mse / n if(evalfit) { #Fit being examined on sample used to fit intercept <- 0 slope <- 1 } else { if(length(fit$coef)==1) {intercept <- mean(y)-mean(x); slope <- 1} else { coef <- lsfit(x, y)$coef #Note x is really x*beta from other fit intercept <- coef[1] slope <- coef[2] } } z <- c(rsquare, mse, GiniMd(slope*x), intercept, slope) nam <- c("R-square", "MSE", "g", "Intercept", "Slope") if(length(u)) { yy <- if(rel==">") ifelse(y>u, 1, 0) else if(rel==">=") ifelse(y>=u, 1, 0) else if(rel=="<") ifelse(y"|rel==">=") P <- pnorm(- (u - x) / sqrt(mse)) else P <- pnorm((u - x) / sqrt(mse)) P0 <- sum(yy) / n L <- -2*sum(yy * logb(P) + (1 - yy) * logb(1 - P )) L0<- -2*sum(yy * logb(P0) + (1 - yy) * logb(1 - P0)) R2 <- (1 - exp(-(L0 - L) / n)) / (1 - exp(-L0 / n)) z <- c(z, R2) nam <- c(nam, paste("R2 Y", rel, format(u), sep="")) } names(z) <- nam z } ols.fit <- function(x, y, tolerance=1e-7, backward, penalty.matrix=NULL, xcol=NULL, ...) { fail <- FALSE if(!length(x)) { ybar <- mean(y) n <- length(y) residuals <- y - ybar v <- sum(residuals ^ 2) / (n - 1) return(list(coef=ybar, var=v / n, residuals=residuals, fail=fail)) } if(length(penalty.matrix) > 0) { if(length(xcol)) { xcol <- xcol[-1] - 1 # remove position for intercept penalty.matrix <- penalty.matrix[xcol, xcol, drop=FALSE] } fit <- lm.pfit(x, y, penalty.matrix=penalty.matrix, tol=tolerance) if(any(is.na(fit$coefficients))) fail <- TRUE } else { fit <- lm.fit.qr.bare(x, as.vector(y), tolerance=tolerance, intercept=TRUE, xpxi=TRUE) if(any(is.na(fit$coefficients))) fail <- TRUE if(backward) fit$var <- sum(fit$residuals^2) * fit$xpxi/ (length(y) - length(fit$coefficients)) } c(fit, fail=fail) } predab.resample(fit.orig, method=method, fit=ols.fit, measure=discrim, pr=pr, B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics, force=force, estimates=estimates, tolerance=tolerance, backward=bw,u=u, penalty.matrix=penalty.matrix, rel=rel, ...) } print.validate <- function(x, digits=4, B=Inf, ...) { kept <- attr(x, 'kept'); attr(x, 'kept') <- NULL print(round(unclass(x), digits), ...) if(length(kept) && B > 0) { cat("\nFactors Retained in Backwards Elimination\n\n") varin <- ifelse(kept, '*', ' ') print(varin[1:min(nrow(varin), B),], quote=FALSE) cat("\nFrequencies of Numbers of Factors Retained\n\n") nkept <- apply(kept, 1, sum) tkept <- table(nkept) names(dimnames(tkept)) <- NULL print(tkept) } } latex.validate <- function(object, digits=4, B=Inf, file='', append=FALSE, title=first.word(deparse(substitute(x))), caption=NULL, table.env=FALSE, size='normalsize', extracolsize=size, ...) { chg <- function(x, old, new) { names(new) <- old tx <- new[x] ifelse(is.na(tx), x, tx) } x <- object kept <- attr(x, 'kept'); attr(x, 'kept') <- NULL cn <- colnames(x) cn <- chg(cn, c('index.orig', 'training', 'test', 'optimism', 'index.corrected', 'n'), c('Original\nSample', 'Training\nSample', 'Test\nSample', 'Optimism', 'Corrected\nIndex', '$n$')) rn <- rownames(x) rn <- chg(rn, c('Dxy','R2','Emax','D','U','Q','B','g','gp','gr','rho','pdm'), c('$D_{xy}$','$R^{2}$','$E_{\\max}$','$D$','$U$', '$Q$','$B$','$g$','$g_{p}$','$g_{r}$','$\\rho$', '$|\\overline{\\mathrm{Pr}(Y\\geq Y_{0.5})-\\frac{1}{2}}|$')) dimnames(x) <- list(rn, cn) cat('\\Needspace{2in}\n', file=file, append=append) cat('\\begin{center}\\', size, '\n', sep='', file=file, append=append) if(length(caption) && !table.env) cat(caption, '\n\n', sep='', file=file, append=TRUE) cdec <- ifelse(cn == '$n$', 0, digits) latex(unclass(x), cdec=cdec, rowlabel='Index', title=title, caption=if(table.env) caption, table.env=table.env, file=file, append=TRUE, center='none', extracolsize=extracolsize, ...) cat('\\end{center}\n', file=file, append=TRUE) if(length(kept) && B > 0) { varin <- ifelse(kept, '$\\bullet$', ' ') nr <- nrow(varin) varin <- varin[1:min(nrow(varin), B),, drop=FALSE] cat('\\Needspace{2in}\n', sep='', file=file, append=append) cat('\\begin{center}\\', size, '\n', sep='', file=file, append=TRUE) if(table.env) { cap <- paste(caption, '. Factors retained in backwards elimination', sep='') if(nr > B) cap <- paste(cap, paste(' (first', B, 'resamples).', sep='')) } else { cap <- 'Factors Retained in Backwards Elimination' if(nr > B) cap <- c(cap, paste('First', B, 'Resamples')) cap <- paste(cap, collapse='\\\\') cat(cap, '\n\n', file=file, append=TRUE) } latex(varin, ..., caption=if(table.env) cap, title=paste(title,'retained', sep='-'), rowname=NULL, file=file, append=TRUE, table.env=table.env, center='none', extracolsize=extracolsize) if(!table.env) cat('\\end{center}\n', file=file, append=TRUE) cap <- if(table.env) paste(caption, '. Frequencies of numbers of factors retained', sep='') else { cap <- 'Frequencies of Numbers of Factors Retained' cat('\\Needspace{1in}\n', sep='', file=file, append=append) cat('\\begin{center}\\', size, '\n', sep='', file=file, append=TRUE) cat(cap, '\n\n', file=file, append=TRUE) } nkept <- apply(kept, 1, sum) tkept <- t(as.matrix(table(nkept))) latex(tkept, ..., caption=if(table.env) cap, title=paste(title, 'freq', sep='-'), rowname=NULL, file=file, append=TRUE, table.env=table.env, center='none', extracolsize=extracolsize) if(!table.env) cat('\\end{center}\n', file=file, append=TRUE) } } html.validate <- function(object, digits=4, B=Inf, caption=NULL, ...) { chg <- function(x, old, new) { names(new) <- old tx <- new[x] ifelse(is.na(tx), x, tx) } x <- object kept <- attr(x, 'kept'); attr(x, 'kept') <- NULL cn <- colnames(x) cn <- chg(cn, c('index.orig', 'training', 'test', 'optimism', 'index.corrected', 'n'), c('Original
Sample', 'Training
Sample', 'Test
Sample', 'Optimism', 'Corrected
Index', '$n$')) rn <- rownames(x) rn <- chg(rn, c('Dxy','R2','Emax','D','U','Q','B','g','gp','gr', 'rho','pdm'), c('$D_{xy}$','$R^{2}$','$E_{\\max}$','$D$','$U$', '$Q$','$B$','$g$','$g_{p}$','$g_{r}$','$\\rho$', '$|\\overline{\\mathrm{Pr}(Y\\geq Y_{0.5})-\\frac{1}{2}}|$')) dimnames(x) <- list(rn, cn) cdec <- ifelse(cn == '$n$', 0, digits) ## Bug in htmlTable::txtRound for vector digits z <- unclass(x) for(i in 1 : length(cdec)) z[, i] <- round(z[, i], cdec[i]) cat(htmlTable::htmlTable(z, rowlabel='Index', caption=caption, css.cell = c('', rep('padding-left:3ex;', ncol(z) - 1)), escape.html=FALSE), sep='\n') cat('

\n') if(length(kept) && B > 0) { varin <- ifelse(kept, htmlSpecial('mediumsmallwhitecircle'), ' ') nr <- nrow(varin) varin <- varin[1:min(nrow(varin), B),, drop=FALSE] cap <- 'Factors Retained in Backwards Elimination' if(nr > B) cap <- c(cap, paste('First', B, 'Resamples')) cat(htmlTable::htmlTable(varin, caption=cap, rnames=FALSE, css.cell = c('', rep('padding-left:3ex;', ncol(varin) - 1)), escape.html=FALSE), sep='\n') cap <- 'Frequencies of Numbers of Factors Retained' nkept <- apply(kept, 1, sum) tkept <- t(as.matrix(table(nkept))) cat(htmlTable::htmlTable(tkept, caption=cap, rnames=FALSE, css.cell = c('', rep('padding-left:3ex;', length(nkept) - 1)), escape.html=FALSE)) } } rms/R/matinv.s0000644000176200001440000000306413101622231012730 0ustar liggesusers#Uses matinv Fortran function, which uses ginv and sweep #Returns matrix inverse with attributes rank (integer rank of x) # and swept (logical - whether or not ith variable has been swept) #Input matrix should set its swept attribute before the first invocation # of matinv for that matrix. If swept isn't set, it defaults to all F. # #Inverse is with respect to diagonal elements which[1],which[2],... #For collinearities, the appropriate rows and columns of inv are set to 0 #Caller must negate matrix when finished with all partial inversions if # negate is false. The default is to automatically negate the which # portion of the inverse, i.e., to assume that no further operations are # to be done on the matrix # #Eps is singularity criterion, like 1-Rsquare # #F. Harrell 1 Aug 90 matinv <- function(a, which, negate=TRUE, eps=1E-12) { swept <- attr(a,"swept") if(!is.matrix(a)) a <- as.matrix(a) storage.mode(a) <- "double" m<-nrow(a) if(missing(which))which <- 1:m else { rw <- range(which) if(rw[1] < 1 | rw[2] > m) stop("illegal elements to invert") } storage.mode(which) <- "integer" if(!length(swept))swept <- rep(FALSE, m) if(m!=ncol(a))stop("matrix must be square") y <- .Fortran(F_matinv,x = a, as.integer(m), as.integer(length(which)),which, swept=swept, logical(m), double(m*(m+1)/2), double(m), rank = integer(1), as.double(eps), as.logical(negate)) x <- y$x attr(x,"rank") <- y$rank attr(x,"swept") <- y$swept dimnames(x) <- dimnames(a) x } rms/R/val.surv.s0000644000176200001440000002015213703327703013225 0ustar liggesusersval.surv <- function(fit, newdata, S, est.surv, censor, u, fun, lim, evaluate=100, pred, maxdim=5, ...) { usehare <- ! missing(u) if(usehare) { if(missing(fun)) { if(missing(fit)) stop('must specify fit if u is specified and fun is not') fun <- if(inherits(fit, 'coxph')) function(p) log(-log(p)) else if(length(it <- survreg.distributions[[fit$dist]]$quantile)) it else if(length(it <- survreg.auxinfo[[fit$dist]]$quantile)) it else stop('fun is not given and it cannot be inferred') } } if(missing(S)) { S <- fit$y units <- fit$units if(! length(S)) stop('when S is omitted you must use y=TRUE in the fit') } else units <- attr(S, 'units') if('Surv' %nin% attr(S, 'class')) stop('S must be a Surv object') if(ncol(S) != 2) stop('S must be a right-censored Surv object') if(missing(est.surv)) est.surv <- if(usehare) { if(missing(newdata)) survest(fit, times=u)$surv else survest(fit, newdata, times=u)$surv } else { if(missing(newdata)) survest(fit, times=S[,1], what='parallel') else survest(fit, newdata, times=S[,1], what='parallel') } if(usehare) { i <- ! is.na(est.surv + S[,1] + S[,2]) est.surv <- est.surv[i] S <- S[i,] curtail <- function(x) pmin(.9999, pmax(x, .0001)) f <- polspline::hare(S[,1], S[,2], fun(curtail(est.surv)), maxdim=maxdim, ...) if(missing(pred)) { if(missing(lim)) lim <- datadist(est.surv)$limits[c('Low:prediction','High:prediction'),] pseq <- seq(lim[1], lim[2], length=evaluate) } else pseq <- pred actual <- 1 - polspline::phare(u, fun(curtail(est.surv)), f) actualseq <- 1 - polspline::phare(u, fun(curtail(pseq)), f) w <- structure(list(harefit=f, p=est.surv, actual=actual, pseq=pseq, actualseq=actualseq, u=u, fun=fun, n=nrow(S), d=sum(S[,2]), units=units), class='val.survh') return(w) } n <- nrow(S) nac <- if(! missing(fit)) fit$na.action if(! missing(censor) && length(censor) > 1 && ! missing(fit)) { if(length(censor) > n && length(nac)) { ## Missing observations were deleted during fit j <- ! is.na(naresid(nac, censor)) censor <- censor[j] } if(length(censor) != n) stop("length of censor does not match # rows used in fit") } est.surv.censor <- lp <- NULL if(! missing(censor)) { if(missing(fit)) stop('fit must be specified when censor is specified') est.surv.censor <- if(missing(newdata)) survest(fit, times=censor, what='parallel') else survest(fit, newdata, times=censor, what='parallel') if(mc <- sum(is.na(est.surv.censor))) warning(paste(mc, 'observations had missing survival estimates at censoring time')) lp <- if(missing(newdata)) predict(fit, type='lp') else predict(fit, newdata, type='lp') } if(length(est.surv) != n) stop('length of est.surv must equal number of rows in S') structure(list(S=S, est.surv=est.surv, censor.est.surv=if(length(est.surv.censor)) est.surv.censor, lp=if(length(lp))lp, na.action=nac), class='val.surv') } print.val.survh <- function(x, ...) { cat('\nValidation of Predicted Survival at Time=', format(x$u), '\tn=', x$n, ', events=', x$d, '\n\n') cat('hare fit:\n\n') print(x$harefit) cat('\nFunction used to transform predictions:\n') cat(paste(format(x$fun), collapse=' ')) error <- abs(x$p - x$actual) er <- c(mean(error, na.rm=TRUE), quantile(error, .9, na.rm=TRUE)) erf <- format(round(er, 4)) cat('\n\nMean absolute error in predicted probabilities:', erf[1],'\n') cat('0.9 Quantile of absolute errors :', erf[2], '\n') er } plot.val.survh <- function(x, lim, xlab, ylab, riskdist=TRUE, add=FALSE, scat1d.opts=list(nhistSpike=200), ...) { if(missing(lim)) lim <- range(c(x$pseq, x$actualseq), na.rm=TRUE) units <- if(x$u == 1) x$units else paste(x$units, 's', sep='') lab <- paste('Probability of Surviving ', format(x$u), ' ', units, sep='') if(add) lines(x$pseq, x$actualseq, ...) else plot(x$pseq, x$actualseq, type='l', xlim=lim, ylim=lim, xlab=if(missing(xlab)) paste('Predicted', lab) else xlab, ylab=if(missing(ylab)) paste('Actual', lab) else ylab) abline(a=0, b=1, lty=2) if(riskdist) do.call('scat1d', c(list(x=x$p), scat1d.opts)) invisible() } plot.val.surv <- function(x, group, g.group=4, what=c('difference','ratio'), type=c('l','b','p'), xlab, ylab, xlim, ylim, datadensity=TRUE, ...) { S <- x$S est.surv <- x$est.surv censor.est.surv <- x$censor.est.surv what <- match.arg(what) type <- match.arg(type) n <- length(est.surv) nac <- x$na.action if(! missing(group)) { if(length(group) > n && length(nac)) { ## Missing observations were deleted during fit j <- ! is.na(naresid(nac, est.surv)) group <- group[j] } if(length(group) != n) stop("length of group does not match # rows used in fit") if(! is.factor(group)) group <- if(is.logical(group) || is.character(group)) as.factor(group) else cut2(group, g=g.group) } if(length(censor.est.surv)) { if(missing(group)) group <- rep(1, length(censor.est.surv)) i <- S[,2]==1 group <- group[i] if(sum(i)<2) stop('fewer than 2 uncensored observations') y <- switch(what, difference=1-est.surv - .5*(1-censor.est.surv), ratio=(1 - est.surv) / (.5 * (1 - censor.est.surv))) meanF <- tapply(1 - est.surv[i], group, mean, na.rm=TRUE) meanE <- tapply(.5*(1-censor.est.surv[i]), group, mean, na.rm=TRUE) res <- matrix(cbind(meanF,meanE), ncol=2, dimnames=list(levels(group), c('Mean F(T|T 0 if(bayes && conf.type == 'simultaneous') stop('conf.type simultaneous does not apply to Bayesian model fits') isblrm <- inherits(object, 'blrm') partialpo <- isblrm && object$pppo > 0 if(partialpo & (length(ycut) != 1)) stop('must specify a single value of ycut for partial prop. odds model') pred <- function(d) if(isblrm) predict(object, d, type='x', ycut=ycut) else predict(object, d, type='x') assume <- at$assume.code if(is.null(assume)) stop("fit does not have design information") if(any(assume == 10)) warning("summary.rms does not currently work with matrix factors in model") name <- at$name parms <- at$parms scale <- object$scale.pred if(missing(antilog)) antilog <- length(scale)==2 if(antilog & length(scale) < 2) scale <- c("","Antilog") factors <- rmsArgs(substitute(list(...))) nf <- length(factors) if(est.all) which <- (1 : length(assume))[assume != 9] if(nf > 0) { jw <- charmatch(names(factors), name, 0) if(any(jw == 0)) stop(paste("factor name(s) not in the design:", paste(names(factors)[jw == 0], collapse=" "))) if(!est.all) which <- jw if(any(assume[which] == 9)) stop("cannot estimate effects for interaction terms alone") } Limval <- Getlim(at, allow.null=TRUE, need.all=FALSE) values <- Limval$values ## The next statement (9Jun98) makes limits[1:3,] keep all levels of ## factors. Problem is that [.data.frame does not pass drop to [] ## when first subscripts are specified oldopt <- options('drop.factor.levels') options(drop.factor.levels=FALSE) on.exit(options(oldopt)) lims <- Limval$limits[1 : 3 , , drop=FALSE] ##Find underlying categorical variables ucat <- rep(FALSE, length(assume)) for(i in (1:length(assume))[assume != 5 & assume < 8]) ucat[i] <- name[i] %in% names(values) && length(V <- values[[name[i]]]) && is.character(V) stats <- lab <- NULL beta <- if(bayes) coef(object, stat=posterior.summary) else object$coef lc <- length(beta) ## Number of non-slopes: nrp <- if(bayes) num.intercepts(object) else num.intercepts(object, 'coef') nrp1 <- nrp + 1 ## Exclude non slopes j <- nrp1 : lc beta <- beta[j] if(bayes) draws <- draws[, j, drop=FALSE] var <- vcov(object, regcoef.only=TRUE, intercepts='none') zcrit <- if(length(idf <- object$df.residual)) qt(1. - alp, idf) else qnorm(1. - alp) cll <- paste(signif(conf.int, 3)) bcoef <- if(usebootcoef) object$boot.Coef if(length(bcoef)) bcoef <- bcoef[, nrp1 : lc, drop=FALSE] jf <- 0 if(nf > 0) for(i in jw) { jf <- jf + 1 z <- value.chk(at, i, factors[[jf]], 0, Limval) lz <- length(z) if(lz == 1 && !is.na(z)) lims[2, i] <- z if(lz == 2) { if(!is.na(z[1])) lims[1, i] <- z[1] if(!is.na(z[2])) lims[3, i] <- z[2] } else if(lz == 3) lims[!is.na(z), i] <- z[!is.na(z)] if(lz < 1 | lz > 3) stop("must specify 1,2, or 3 values for a factor") } adj <- lims[2,, drop=FALSE] isna <- sapply(adj, is.na) if(any(isna)) stop(paste("adjustment values not defined here or with datadist for", paste(name[assume != 9][isna], collapse=" "))) k <- which[assume[which] %nin% c(8, 5, 10) & ! ucat[which]] m <- length(k) if(m) { isna <- is.na(lims[1, name[k], drop=FALSE] + lims[3, name[k], drop=FALSE]) ##note char. excluded from k if(any(isna)) stop(paste("ranges not defined here or with datadist for", paste(name[k[isna]], collapse=" "))) } xadj <- unclass(rms.levels(adj, at)) if(m) { adj <- xadj M <- 2 * m odd <- seq(1, M, by=2) even <- seq(2, M, by=2) ##Extend data frame for(i in 1:length(adj)) adj[[i]] <- rep(adj[[i]], M) i <- 0 for(l in k) { i <- i + 1 adj[[name[l]]][(2 * i - 1) : (2 * i)] <- lims[c(1, 3), name[l]] } xx <- pred(adj) xd <- matrix(xx[even,] - xx[odd,], nrow=m) xb <- xd %*% beta se <- drop((((xd %*% var) * xd) %*% rep(1, ncol(xd)))^.5) if(conf.type == 'simultaneous' && length(xb) > 1) { if(verbose) { cat('Confidence intervals are simultaneous for these estimates:\n') print(as.vector(xb)) } u <- confint(multcomp::glht(object, cbind(matrix(0, nrow=nrow(xd), ncol=nrp), xd), df=if(length(idf)) idf else 0), level=conf.int)$confint low <- u[, 'lwr'] up <- u[, 'upr'] } else if(length(bcoef)) { best <- t(xd %*% t(bcoef)) lim <- bootBCa(xb, best, type=boot.type, n=nobs(object), seed=object$seed, conf.int=conf.int) if(is.matrix(lim)) { low <- lim[1,] up <- lim[2,] } else { low <- lim[1] up <- lim[2] } } else if(bayes) { best <- t(xd %*% t(draws)) lim <- apply(best, 2, rmsb::HPDint, prob=conf.int) low <- lim[1, ] up <- lim[2, ] } else { low <- xb - zcrit*se up <- xb + zcrit*se } lm <- as.matrix(lims[, name[k], drop=FALSE]) stats <- cbind(lm[1,], lm[3,], lm[3,] - lm[1,], xb, se, low, up, 1) lab <- if(vnames=='names') name[k] else labels[k] if(antilog) { stats <- rbind(stats, cbind(stats[, 1 : 3,drop=FALSE], exp(xb), NA, exp(low), exp(up), 2)) lab <- c(lab, rep(paste("", scale[2]), m)) w <- integer(M) w[odd] <- 1 : m w[even] <- m + (1 : m) stats <- stats[w,] lab <- lab[w] } } for(j in 1 : length(xadj)) xadj[[j]] <- rep(xadj[[j]], 2) for(i in which[assume[which] == 5 | ucat[which]]) { ## All comparisons with reference category parmi <- if(ucat[i]) values[[name[i]]] else parms[[name[i]]] parmi.a <- if(abbrev) abbreviate(parmi) else parmi iref <- as.character(xadj[[name[i]]][1]) ki <- match(iref, parmi) for(j in parmi) { if(j != iref) { kj <- match(j, parmi) adj <- xadj adj[[name[i]]] <- c(iref, j) adj <- as.data.frame(adj) xx <- pred(adj) xd <- matrix(xx[2,] - xx[1,], nrow=1) xb <- xd %*% beta se <- sqrt((xd %*% var) %*% t(xd)) if(conf.type == 'simultaneous' && length(xb) > 1) { if(verbose) { cat('Confidence intervals are simultaneous for these estimates:\n') print(as.vector(xb)) } u <- confint(multcomp::glht(object, cbind(matrix(0, nrow=nrow(xd), ncol=nrp), xd), df=if(length(idf)) idf else 0), level=conf.int)$confint low <- u[,'lwr'] up <- u[,'upr'] } else if(length(bcoef)) { best <- t(xd %*% t(bcoef)) lim <- bootBCa(xb, best, type=boot.type, n=nobs(object), seed=object$seed, conf.int=conf.int) if(is.matrix(lim)) { low <- lim[1,] up <- lim[2,] } else { low <- lim[1] up <- lim[2] } } else if(bayes) { best <- t(xd %*% t(draws)) lim <- apply(best, 2, rmsb::HPDint, prob=conf.int) low <- lim[1, ] up <- lim[2, ] } else { low <- xb - zcrit*se up <- xb + zcrit*se } stats <- rbind(stats,cbind(ki, kj, NA, xb, se, low, up, 1)) lab <-c(lab, paste(if(vnames=='names') name[i] else labels[i], " - ", parmi.a[kj], ":", parmi.a[ki], sep="")) if(antilog) { stats <- rbind(stats,cbind(ki, kj, NA, exp(xb), NA, exp(low), exp(up), 2)) lab <- c(lab, paste("", scale[2]))} } } } dimnames(stats) <- list(lab, c("Low", "High", "Diff.", "Effect", "S.E.", paste("Lower", cll), paste("Upper", cll), "Type")) attr(stats, "heading") <- paste(" Effects Response : ", as.character(formula(object))[2], sep='') attr(stats,"class") <- c("summary.rms", "matrix") attr(stats,"scale") <- scale attr(stats,"obj.name") <- obj.name interact <- at$interactions adjust <- "" if(length(interact)) { interact <- sort(unique(interact[interact > 0])) nam <- name[which[match(which, interact, 0) > 0]] if(length(nam)) for(nm in nam) adjust <- paste(adjust, nm, "=", if(is.factor(xadj[[nm]])) as.character(xadj[[nm]])[1] else format(xadj[[nm]][1]), " ", sep="") } attr(stats, "adjust") <- adjust attr(stats, "conf.type") <- if(length(bcoef)) blabel else if(bayes) 'HPD' else 'z' stats } print.summary.rms <- function(x, ..., table.env=FALSE) { switch(prType(), latex = latex.summary.rms(x, ..., file='', table.env=table.env), html = return(html.summary.rms(x, ...)), plain = { cstats <- dimnames(x)[[1]] for(i in 1 : 7) cstats <- cbind(cstats, format(signif(x[, i], 5))) dimnames(cstats) <- list(rep("", nrow(cstats)), c("Factor", dimnames(x)[[2]][1 : 7])) cat(attr(x,"heading"), "\n\n") print(cstats, quote=FALSE) if((A <- attr(x, "adjust")) != "") cat("\nAdjusted to:", A, "\n") blab <- switch(attr(x, 'conf.type'), 'bootstrap nonparametric percentile' = 'Bootstrap nonparametric percentile confidence intervals', 'bootstrap BCa' = 'Bootstrap BCa confidence intervals', 'basic bootstrap' = 'Basic bootstrap confidence intervals', HPD = 'Bayesian highest posterior density intervals', '') if(blab != '') cat('\n', blab, '\n', sep='') cat('\n') } ) invisible() } latex.summary.rms <- function(object, title=paste('summary', attr(object, 'obj.name'), sep='.'), table.env=TRUE, ...) { title <- title # because of lazy evaluation caption <- latexTranslate(attr(object, "heading")) scale <- attr(object, "scale") object <- object[, -8, drop=FALSE] rowl <- latexTranslate(dimnames(object)[[1]]) rowl <- ifelse(substring(rowl, 1, 1) == " ", paste("~~{\\it ", substring(rowl,2), "}", sep=""), rowl) # preserve leading blank rowl <- sedit(rowl, "-", "---") cstats <- matrix("", nrow=nrow(object), ncol=ncol(object), dimnames=dimnames(object)) for(i in 1 : 7) cstats[,i] <- format(signif(object[, i], 5)) ## for(i in 4 : 7) cstats[,i] <- format(round(object[, i], 2)) cstats[is.na(object)] <- "" caption <- sedit(caption, " Response","~~~~~~Response") cstats <- as.data.frame(cstats) attr(cstats,"row.names") <- rowl names(cstats)[3] <- "$\\Delta$" latex(cstats, caption=if(table.env) caption else NULL, title=title, rowlabel="", col.just=rep("r", 7), table.env=table.env, ...) } html.summary.rms <- function(object, digits=4, dec=NULL,...) { caption <- attr(object, "heading") ## scale <- attr(object, "scale") object <- object[, -8, drop=FALSE] rowl <- dimnames(object)[[1]] rowl <- ifelse(substring(rowl, 1, 1) == " ", paste(" ", substring(rowl, 2), "", sep=""), rowl) # preserve leading blank rowl <- sedit(rowl, "-", "---") cstats <- matrix("", nrow=nrow(object), ncol=ncol(object), dimnames=dimnames(object)) for(i in 1 : 7) cstats[,i] <- if(length(dec)) format(round(object[, i], dec)) else format(signif(object[, i], digits)) cstats[is.na(object)] <- "" caption <- sub('^ *', '', caption) ## htmlTable creates invalid html if start caption with blank caption <- sub(' Response : ', '  Response: ', caption) caption <- paste0(caption, '') cstats <- as.data.frame(cstats) attr(cstats,"row.names") <- rowl names(cstats)[3] <- "Δ" htmltools::HTML(paste0( htmlTable::htmlTable(cstats, caption=caption, ## css.cell = 'min-width: 6em;', css.cell=c('', rep('padding-left:4ex;', ncol(cstats))), rowlabel='', align='r', align.header='r', escape.html=FALSE))) } ## plot is not using bootstrap percentile or Bayesian HPD ## intervals but is using SE-based CLs # was q=c(.7, .8, .9, .95, .99) plot.summary.rms <- function(x, at, log=FALSE, q=c(0.9, 0.95, 0.99), xlim, nbar, cex=1, nint=10, cex.main=1, clip=c(-1e30, 1e30), main, col=rgb(red=.1, green=.1, blue=.8, alpha=c(.1,.4,.7)), col.points=rgb(red=.1, green=.1, blue=.8, alpha=1), pch=17, lwd=if(length(q) == 1) 3 else 2 : (length(q) + 1), digits=4, ...) { isbase <- Hmisc::grType() == 'base' pp <- plotlyParm # in Hmisc scale <- attr(x, "scale") adjust <- attr(x, "adjust") if(adjust != '') adjust <- paste("Adjusted to:", adjust, sep="") Type <- x[, "Type"] x <- x[Type==1,, drop=FALSE] lab <- dimnames(x)[[1]] effect <- x[, "Effect"] se <- x[, "S.E."] cond <- if(isbase) ! log && any(Type == 2) else any(Type == 2) if(cond) { fun <- exp tlab <- scale[2] } else { fun <- function(x) x if(log) { if(length(scale) == 2) tlab <- scale[2] else tlab <- paste("exp(", scale[1], ")", sep="") } else tlab <- scale[1] } if(!length(scale)) tlab <- '' ## mainly for Glm fits if(!missing(main)) tlab <- main fmt <- function(k) { m <- length(k) f <- character(m) for(i in 1 : m) f[i] <- format(k[i]) f } sep <- if(isbase) ' - ' else '
' dif <- x[, 'Diff.'] ## Reformat for factor predictors if(any(is.na(dif))) lab[is.na(dif)] <- sub(' - ', sep, lab[is.na(dif)]) lb <- ifelse(is.na(x[, 'Diff.']), lab, paste(lab, sep, fmt(x[, 'High']), ':', fmt(x[, 'Low']), sep='')) if(isbase) { confbar <- function(y, est, se, q, col, col.points, pch=17, lwd=rep(3, length(q)), clip=c(-1e30, 1e30), fun = function(x) x, qfun = function(x) ifelse(x==.5, qnorm(x), ifelse(x < .5, qnorm(x / 2), qnorm((1 + x) / 2)))) { n <- length(q) q <- c(1 - rev(q), .5, q) a <- fun(est) points(a, y, col=col.points, pch=pch) a <- fun(est + se * qfun(q)) a[a < clip[1]] <- NA; a[a > clip[2]] <- NA m <- length(q) segments(c(a[1], a[m]), y, c(a[2], a[m - 1]), y, col=col[1], lwd=lwd[1]) if(n > 1) segments(c(a[2], a[m - 1]), y, c(a[3], a[m - 2]), col=col[2], lwd=lwd[2]) if(n > 2) segments(c(a[3], a[m - 2]), y, c(a[4], a[m - 3]), col=col[3], lwd=lwd[3]) names(a) <- format(q) invisible(a) } augment <- if(log | any(Type == 2)) c(.1, .5, .75, 1) else 0 n <- length(effect) out <- qnorm((max(q) + 1) / 2) if(missing(xlim) && !missing(at)) xlim <- range(if(log) logb(at) else at) else if(missing(xlim)) { xlim <- fun(range(c(effect - out * se, effect + out * se))) xlim[1] <- max(xlim[1], clip[1]) xlim[2] <- min(xlim[2], clip[2]) } else augment <- c(augment, if(log) exp(xlim) else xlim) plot.new(); par(new=TRUE) mxlb <- .1 + max(strwidth(lb, units='inches', cex=cex)) tmai <- par('mai') on.exit(par(mai=tmai)) par(mai=c(tmai[1], mxlb, 1.5*tmai[3], tmai[4])) outer.widths <- fun(effect + out * se) - fun(effect - out * se) if(missing(nbar)) nbar <- n npage <- ceiling(n/nbar) is <- 1 for(p in 1 : npage) { ie <- min(is + nbar - 1, n) plot(1:nbar, rep(0,nbar), xlim=xlim, ylim=c(1,nbar), type="n", axes=FALSE, xlab="", ylab="") if(cex.main > 0) title(tlab, cex=cex.main) lines(fun(c(0, 0)), c(nbar - (ie - is), nbar), lty=2) if(log) { pxlim <- pretty(exp(xlim), n=nint) pxlim <- sort(unique(c(pxlim, augment))) ## For wome weird reason, sometimes duplicates (at xlim[2]) ## still remain pxlim <- pxlim[pxlim >= exp(xlim[1])] if(!missing(at)) pxlim <- at axis(3, logb(pxlim), labels=format(pxlim)) } else { pxlim <- pretty(xlim, n=nint) pxlim <- sort(unique(c(pxlim, augment))) pxlim <- pxlim[pxlim >= xlim[1]] if(!missing(at)) pxlim <- at axis(3, pxlim) } imax <- (is : ie)[outer.widths[is : ie] == max(outer.widths[is : ie])][1] for(i in is : ie) { confbar(nbar - (i - is + 1) + 1, effect[i], se[i], q=q, col=col, col.points=col.points, fun=fun, clip=clip, lwd=lwd, pch=pch) mtext(lb[i], 2, 0, at=nbar - (i - is + 1) + 1, cex=cex, adj=1, las=1) } if(adjust != "") { xx <- par('usr')[2] if(nbar > ie) text(xx, nbar - (ie - is + 1), adjust, adj=1, cex=cex) else title(sub=adjust, adj=1, cex=cex) } is <- ie + 1 } return(invisible()) } ## Use plotly instead qfun <- function(x) ifelse(x == 0.5, qnorm(x), ifelse(x < 0.5, qnorm(x / 2), qnorm((1 + x) / 2))) ## ??? don't we need a different qfun for ols using t dist? n <- length(q) feffect <- fun(effect) hte <- format(feffect, digits=digits) if(adjust != '') hte <- paste(hte, adjust, sep='
') p <- plotly::plot_ly(x=~ feffect, y=~ lb, text = ~ hte, type = 'scatter', mode='markers', hoverinfo='text', name = 'Estimate', height = pp$heightDotchart(length(lb))) for(i in 1 : n) { lower <- fun(effect + se * qfun(1. - q[i])) upper <- fun(effect + se * qfun(q[i])) ## Interrupt line segments with NA m <- 2 * length(effect) x <- rep(NA, m) x[seq(1, m, by=2)] <- lower x[seq(2, m, by=2)] <- upper ycl <- rep(lb, each=2) ht <-ifelse(is.na(x), '', format(x, digits=digits)) cl95 <- which(abs(q - 0.95) < 0.000001) vis <- ! length(cl95) || i %in% cl95 dat <- data.frame(x, ycl, ht) p <- plotly::add_markers(p, x=~ x, y=~ ycl, text=~ ht, data=dat, marker = list(symbol='line-ns-open'), hoverinfo = 'text', name = paste(format(q)[i], 'CI'), visible = if(vis) TRUE else 'legendonly') } plotly::layout(p, xaxis = list(type = if(log) 'log' else 'linear', zeroline=FALSE, title=tlab), yaxis = list(title='', autorange='reversed'), margin = list(l=pp$lrmargin(lb)), shapes = list( list(type = "line", line = list(color = "lightgray"), x0 =fun(0), x1 = fun(0), xref = "x", y0 = 0, y1=length(lb), yref='y')) ) } rms/R/validate.rpart.s0000644000176200001440000001303012713363356014366 0ustar liggesusersvalidate.rpart <- function(fit, method, B, bw, rule, type, sls, aics, force, estimates, pr=TRUE, k, rand, xval = 10, FUN, ...) { if(missing(FUN)) FUN <- function(..., k) rpart::prune(..., cp=k) act <- (fit$call)$na.action if(! length(act)) act <- function(x) x m <- model.frame(fit, na.action = act) if(! is.data.frame(m)) stop('you must specify model=T in the fit') y <- model.extract(m, 'response') ytype <- if(inherits(y, 'Surv')) 'Surv' else if(is.logical(y) || ((length(un <- sort(unique(y[! is.na(y)]))) == 2) && un[1] == 0 && un[2] == 1)) 'binary' else 'other' if(ytype == 'binary' && is.factor(y)) y <- as.numeric(y) - 1 dxyf <- switch(ytype, binary = function(x, y) somers2(x, y)['Dxy'], Surv = function(x, y) - dxy.cens(x, y)['Dxy'], other = function(x, y) dxy.cens(x, y)['Dxy']) call <- match.call() method <- call$method size <- NULL if(missing(k)) { k <- fit$cptable[, 'CP'] size <- fit$cptable[, 'nsplit'] } if(missing(rand)) rand <- sample(xval, NROW(m[[1]]), replace = TRUE) which <- unique(rand) pdyx.app <- pdyx.val <- pb.app <- pb.val <- double(length(k)) l <- 0 for(kk in k) { l <- l + 1 dyx.val <- dyx.app <- b.val <- b.app <- double(length(which)) j <- 0 for(i in which) { j <- j + 1 s <- rand != i tlearn <- rpart::rpart(model=m[s, ]) papp <- if(kk == 0) tlearn else FUN(tlearn, k = kk, ...) if(nrow(papp$frame) == 1) { dyx.app[j] <- dyx.val[j] <- 0 #no splits if(ytype != 'Surv') b.app[j] <- b.val[j] <- mean((y - mean(y))^2, na.rm = TRUE) } else { yhat <- predict(papp, newdata = m[s, ]) if(is.matrix(yhat) && ncol(yhat) > 1) yhat <- yhat[, ncol(yhat), drop=TRUE] ysub <- if(ytype == 'Surv') y[s, ] else y[s] ## tree with factor binary y if(ytype != 'Surv') b.app[j] <- mean((yhat - ysub)^2) dyx.app[j] <- dxyf(yhat, ysub) s <- rand == i yhat <- predict(papp, newdata = m[s, ]) ysub <- if(ytype == 'Surv') y[s, ] else y[s] if(ytype != 'Surv') b.val[j] <- mean((yhat - ysub)^2) dyx.val[j] <- dxyf(yhat, ysub) } } pdyx.app[l] <- mean(dyx.app) pdyx.val[l] <- mean(dyx.val) pb.app[l] <- mean(b.app) pb.val[l] <- mean(b.val) if(pr) { dyx.app <- c(dyx.app, pdyx.app[l]) dyx.val <- c(dyx.val, pdyx.val[l]) b.app <- c(b.app, pb.app[l]) b.val <- c(b.val, pb.val[l]) cat("\n\nk=", format(kk), ":\n\n") rnam <- c(as.character(1 : j), "Mean") if(ytype == 'Surv') { dyx <- cbind(dyx.app, dyx.val) dimnames(dyx) <- list(rnam, c('Dxy Training', 'Dxy Test')) } else { dyx <- cbind(dyx.app, dyx.val, b.app, b.val) dimnames(dyx) <- list(rnam, c("Dxy Training", "Dxy Test", "MSE Training", "MSE Test")) } print(dyx) } } if(ytype == 'Surv') pb.app <- pb.val <- NULL structure(list(k = k, size = size, dxy.app = pdyx.app, dxy.val = pdyx.val, mse.app = pb.app, mse.val = pb.val, ytype = ytype, xval = xval), class = "validate.rpart") } print.validate.rpart <- function(x, ...) { cat(x$xval, "-fold cross-validation\n\n", sep = "") w <- cbind(k = x$k, size = x$size, Dxy.apparent = x$dxy.app, Dxy.val = x$dxy.val, MSE.apparent = x$mse.app, MSE.val = x$mse.val) if(x$ytype == 'binary') dimnames(w) <- list(NULL, c("k", if(length(x$size)) "size", "Dxy.apparent", "Dxy.val", "Brier.apparent", "Brier.val")) invisible(print(w)) } plot.validate.rpart <- function(x, what = c("mse", "dxy"), legendloc = locator, ...) { if(! missing(what) && x$ytype == 'Surv' && 'mse' %in% what) stop('may not specify what="dxy" for survival trees') if(x$ytype == 'Surv') what <- 'dxy' obj <- x if(length(obj$size)) { x <- obj$size xlab <- "Number of Nodes" } else { x <- obj$k xlab <- "Cost/Complexity Parameter" } if("mse" %in% what) { blab <- if(obj$ytype == 'binary') "Brier Score" else "Mean Squared Error" ylim <- range(c(obj$mse.app, obj$mse.val)) plot(x, obj$mse.app, xlab = xlab, ylab = blab, ylim = ylim, type = "n") lines(x, obj$mse.app, lty = 3) lines(x, obj$mse.val, lty = 1) title(sub = paste(obj$xval, "-fold cross-validation", sep = ""), adj = 0) if(is.function(legendloc)) legend(legendloc(1), c("Apparent", "Cross-validated"), lty = c(3, 1), bty = "n") else { legend(grconvertX(legendloc[1], from='npc'), grconvertY(legendloc[2], from='npc'), c("Apparent", "Cross-validated"), lty = c(3, 1), bty = "n") } } if("dxy" %in% what) { ylim <- range(c(obj$dxy.app, obj$dxy.val)) plot(x, obj$dxy.app, xlab = xlab, ylab = "Somers' Dxy", ylim = ylim, type = "n") lines(x, obj$dxy.app, lty = 3) lines(x, obj$dxy.val, lty = 1) title(sub = paste(obj$xval, "-fold cross-validation", sep = ""), adj = 0) if(is.function(legendloc)) legend(legendloc(1), c("Apparent", "Cross-validated"), lty = c(3, 1), bty = "n") else { par(usr=c(0,1,0,1)) legend(legendloc[1],legendloc[2], c("Apparent", "Cross-validated"), lty = c(3, 1), bty = "n") } } invisible() } rms/R/plot.xmean.ordinaly.s0000644000176200001440000000602012250461106015340 0ustar liggesusersplot.xmean.ordinaly <- function(x, data, subset, na.action, subn=TRUE, cr=FALSE, topcats=1, cex.points=.75, ...) { X <- match.call(expand.dots=FALSE) X$subn <- X$cr <- X$topcats <- X$cex.points <- X$... <- NULL if(missing(na.action)) X$na.action <- na.keep Terms <- if(missing(data)) terms(x) else terms(x, data=data) X$formula <- Terms X[[1]] <- as.name('model.frame') X <- eval.parent(X) resp <- attr(Terms, 'response') if(resp==0) stop('must have a response variable') nx <- ncol(X) - 1 Y <- X[[resp]] nam <- as.character(attr(Terms, 'variables')) nam <- nam[-1] dopl <- function(x, y, cr, xname, yname) { s <- !is.na(unclass(Y)+x) y <- y[s] x <- x[s] n <- length(x) f <- lrm.fit(x, y) fy <- f$freq/n ##Following is pulled out of predict.lrm ns <- length(fy) - 1 # number of intercepts k <- ns + 1 intcept <- f$coef[1:ns] xb <- f$linear.predictors - intcept[1] xb <- sapply(intcept, '+', xb) P <- 1/(1+exp(-xb)) P <- matrix(P, ncol=ns) P <- cbind(1, P) - cbind(P, 0) #one column per prob(Y=j) xmean.y <- tapply(x, y, mean) xp <- x*P/n xmean.y.po <- apply(xp, 2, sum)/fy yy <- 1:length(fy) rr <- c(xmean.y, xmean.y.po) if(cr) { u <- cr.setup(y) s <- u$subs yc <- u$y xc <- x[s] cohort <- u$cohort xcohort <- matrix(0, nrow=length(xc), ncol=length(levels(cohort))-1) xcohort[col(xcohort)==unclass(cohort)-1] <- 1 # generate dummies cof <- lrm.fit(cbind(xcohort, xc), yc)$coefficients cumprob <- rep(1, n) for(j in 1:k) { P[,j] <- cumprob* (if(j==k) 1 else plogis(cof[1] + (if(j>1) cof[j] else 0) + cof[k]*x)) cumprob <- cumprob - P[,j] } xp <- x*P/n xmean.y.cr <- apply(xp, 2, sum)/fy rr <- c(rr, xmean.y.cr) } plot(yy, xmean.y, type='b', ylim=range(rr), axes=FALSE, xlab=yname, ylab=xname, ...) mgp.axis(1, at=yy, labels=names(fy)) mgp.axis(2) lines(yy, xmean.y.po, lty=2, ...) if(cr) points(yy, xmean.y.cr, pch='C', cex=cex.points) if(subn) title(sub=paste('n=',n,sep=''),adj=0) } for(i in 1:nx) { x <- X[[resp+i]] if(is.factor(x)) { f <- table(x) ncat <- length(f) if(ncat < 2) { warning(paste('predictor', nam[resp+i],'only has one level and is ignored')) next } nc <- min(ncat-1, topcats) cats <- (names(f)[order(-f)])[1:nc] for(wcat in cats) { xx <- 1*(x==wcat) xname <- paste(nam[resp+i], wcat, sep='=') dopl(xx, Y, cr, xname, nam[resp]) } } else dopl(x, Y, cr, nam[resp+i], nam[resp]) } invisible() } rms/R/poma.r0000644000176200001440000001450113722734567012414 0ustar liggesusers#' Examine proportional odds and parallelism assumptions of `orm` and `lrm` model fits. #' #' Based on codes and strategies from Frank Harrell's canonical `Regression Modeling Strategies` text #' #' Strategy 1: Apply different link functions to Prob of Binary Ys (defined by cutval). Regress transformed outcome on combined X and assess constancy of slopes (betas) across cut-points \cr #' Strategy 2: Generate score residual plot for each predictor (for response variable with <10 unique levels) \cr #' Strategy 3: Assess parallelism of link function transformed inverse CDFs curves for different XBeta levels (for response variables with >=10 unique levels) #' #' @param mod.orm Model fit of class `orm` or `lrm`. For `fit.mult.impute` objects, `poma` will refit model on a singly-imputed data-set #' @param cutval Numeric vector; sequence of observed values to cut outcome #' #' @author Yong Hao Pua #' #' @import rms #' #' @export #' #' @seealso Harrell FE. *Regression Modeling Strategies: with applications to linear models, #' logistic and ordinal regression, and survival analysis.* New York: Springer Science, LLC, 2015. #' #' @examples #' #'## orm model (response variable has fewer than 10 unique levels) #'mod.orm <- orm(carb ~ cyl + hp , x=TRUE, y=TRUE, data = mtcars) #'poma(mod.orm) #' #' #'## orm model (response variable has >=10 unique levels) #'mod.orm <- orm(mpg ~ cyl + hp , x=TRUE, y=TRUE, data = mtcars) #'poma(mod.orm) #' #' #' ## orm model using imputation #' dat <- mtcars #' ## introduce NAs #' dat[sample(rownames(dat), 10), "cyl"] <- NA #' im <- aregImpute(~ cyl + wt + mpg + am, data = dat) #' aa <- fit.mult.impute(mpg ~ cyl + wt , xtrans = im, data = dat, fitter = orm) #' poma(aa) poma <- function(mod.orm, cutval) { ### Ensure that lrm and orm objects are supplied if(!any(class(mod.orm) %in% Cs(lrm, orm))) { stop('rms object must be of class lrm or orm', call. = FALSE) } ## (Re-)create mod.orm from a singly-imputed dataset if(any(class(mod.orm) %in% Cs(fit.mult.impute) )) { cat("Refitting model on a singly-imputed dataset \n") fit_mult_call <- as.character(mod.orm$call) myformula <- fit_mult_call[[2]] myfitter <- fit_mult_call[[3]] myaregimpute <- fit_mult_call[[4]] mydata <- fit_mult_call[[5]] # extract imputed values imputed <- impute.transcan(x = get(myaregimpute), imputation = 1, data = get(mydata), list.out = TRUE, pr = FALSE) # create one imputed dataset imputed_df <- get(mydata) imputed_df[names(imputed)] <- imputed # recreate model mod.orm <- eval(parse(text = sprintf(" %s(%s, x = T, y = T, data = imputed_df)", myfitter, myformula))) } ## Generate dataset with no missing values data = mod.orm$call$data data = eval (data) [ , all.vars(mod.orm$sformula)] data <- data [complete.cases(data), ] ### Convert DV into numeric vector when factor DV is supplied mydv <- eval (data) [ , all.vars(mod.orm$sformula)[1] ] cat("Unique values of Y:", unique(sort(mydv)), "\n") ### Compute combined predictor (X) values if(any(class(mydv) %in% "factor") ) { aa <- paste0("as.numeric(", mod.orm$terms[[2]], ") ~") rhs <- mod.orm$terms[[3]] bb <- paste(deparse(rhs), collapse = "") newformula <- paste(aa, bb) cat("Formula used with non-numeric DV:", newformula, "\n") cat("Cut-point for factor DV refers to the jth levels - not observed Y values \n") mod.ols <- ols(as.formula(newformula) , x=TRUE, y=TRUE, data=eval(data)) } else { cat("Cut-point for continuous DV refers to observed Y values \n") mod.ols <- ols(formula(mod.orm), x=TRUE, y=TRUE, data=eval(data)) } combined_x <- fitted(mod.ols) ### Set cutpoint values for Y ### for factor DV: cutpoints = 2 to max no. of levels (jth levels) ### for continuous DV: cutpoints = y unique values (quartiles for truly continuous response var) if (missing(cutval)) { if (any(class(mydv) %in% "factor")) cutval <- seq(2, length(unique(mydv))) else if( length(unique(mydv)) <= 10 ) cutval <- unique(sort(mydv))[-1] else cutval <- quantile(unique(mydv), c(0.25, 0.5, 0.75), na.rm = TRUE) ## quartiles as cutpoints for continuous DV } ### Apply link functions to Prob of Binary Y (defined by cutval) ### Regress transformed outcome as a function of combined X. Check for constancy of slopes ### Codes taken from rms p368 r <- NULL for (link in c("logit", "probit", "cloglog")) { for (k in cutval) { co <- coef(glm(mod.ols$y < k ~ combined_x, data=eval(data), family=binomial (link))) r <- rbind(r, data.frame (link=link, cut.value=k, slope =round(co[2],2))) } } cat("rms-368: glm cloglog on Prob[Y=j} \n") print(r, row.names=FALSE) ### Graphical Assessment if(length(unique(mod.orm$y)) < 10) { par(ask=TRUE) ## Generate Score residual plot for each predictor/terms ## Adjust par(mfrow) settings based on number of terms (codes are a little unwieldy) numpred <- dim(mod.orm$x)[[2]] if(numpred >= 9 ) par(mfrow = c(3,3)) else if (numpred >= 5) par(mfrow = c(2,3)) else if (numpred >= 3) par(mfrow = c(2,2)) else if (numpred >=2) par(mfrow = c(1,2)) else par(mfrow = c(1,1)) resid(mod.orm, "score.binary", pl=TRUE) par(ask=F) par(mfrow = c(1,1)) } else { ## Assess parallelism of link function transformed inverse CDFs curves ## Codes to generate curves are from Harrell's rms book p368-369 p <- function (fun, row, col) { f <- substitute (fun) g <- function (F) eval(f) ## Number of groups (2 to 5) based on sample size ecdfgroups = pmax(2, pmin(5, round( dim(mod.orm$x)[[1]]/20))) z <- Ecdf (~ mod.ols$y, groups = cut2(combined_x, g = ecdfgroups), fun = function (F) g(1 - F), xlab = all.vars(mod.ols$sformula)[[1]], ylab = as.expression (f) , xlim = c(quantile(mod.orm$y, 0.10, na.rm= TRUE), quantile(mod.orm$y, 0.85, na.rm= TRUE)), label.curve= FALSE) print (z, split =c(col , row , 2, 1) , more = row < 2 | col < 2) } p (fun = log (F/(1-F)), 1, 1) p( fun = -log ( -log (F)), 1, 2) } } rms/R/survplot.rms.s0000644000176200001440000002553012773171476014162 0ustar liggesuserssurvplot <- function(fit, ...) UseMethod("survplot") survplot.rms <- function(fit, ..., xlim, ylim=if(loglog) c(-5,1.5) else if(what == "survival" & missing(fun)) c(0,1), xlab, ylab, time.inc, what=c("survival","hazard"), type=c("tsiatis","kaplan-meier"), conf.type=c("log","log-log","plain","none"), conf.int=FALSE, conf=c("bands","bars"), mylim=NULL, add=FALSE, label.curves=TRUE, abbrev.label=FALSE, levels.only=FALSE, lty, lwd=par('lwd'), col=1, col.fill=gray(seq(.95, .75, length=5)), adj.subtitle=TRUE, loglog=FALSE, fun, n.risk=FALSE, logt=FALSE, dots=FALSE, dotsize=.003, grid=NULL, srt.n.risk=0, sep.n.risk=.056, adj.n.risk=1, y.n.risk, cex.n.risk=.6, cex.xlab=par('cex.lab'), cex.ylab=cex.xlab, pr=FALSE) { what <- match.arg(what) polyg <- ordGridFun(grid=FALSE)$polygon ylim <- ylim ## before R changes missing(fun) type <- match.arg(type) conf.type <- match.arg(conf.type) conf <- match.arg(conf) opar <- par(c('mar', 'xpd')) on.exit(par(opar)) cylim <- function(ylim) if(length(mylim)) c(min(ylim[1], mylim[1]), max(ylim[2], mylim[2])) else ylim psmfit <- inherits(fit,'psm') if(what == "hazard" && !psmfit) stop('what="hazard" may only be used for fits from psm') if(what == "hazard" & conf.int > 0) { warning('conf.int may only be used with what="survival"') conf.int <- FALSE } if(loglog) { fun <- function(x) logb(-logb(ifelse(x == 0 | x == 1, NA, x))) use.fun <- TRUE } else if(!missing(fun)) { use.fun <- TRUE if(loglog) stop("cannot specify loglog=T with fun") } else { fun <- function(x) x use.fun <- FALSE } if(what == "hazard" & loglog) stop('may not specify loglog=T with what="hazard"') if(use.fun | logt | what == "hazard") { dots <- FALSE; grid <- NULL } cox <- inherits(fit,"cph") if(cox) { if(n.risk | conf.int>0) surv.sum <- fit$surv.summary exactci <- !(is.null(fit$x)|is.null(fit$y)) ltype <- "s" #step functions for cph } else { if(n.risk) stop("the n.risk option applies only to fits from cph") exactci <- TRUE ltype <- "l" } par(xpd=NA) ## Compute confidence limits for survival based on -log survival, ## constraining to be in [0,1]; d = std.error of cum hazard * z value ciupper <- function(surv, d) ifelse(surv == 0, 0, pmin(1, surv*exp(d))) cilower <- function(surv, d) ifelse(surv == 0, 0, surv*exp(-d)) labelc <- is.list(label.curves) || label.curves units <- fit$units if(missing(ylab)) { if(loglog) ylab <- "log(-log Survival Probability)" else if(use.fun) ylab <- "" else if(what == "hazard") ylab <- "Hazard Function" else ylab <- "Survival Probability" } if(missing(xlab)) { if(logt) xlab <- paste("log Survival Time in ",units,"s",sep="") else xlab <- paste(units,"s",sep="") } maxtime <- fit$maxtime maxtime <- max(pretty(c(0,maxtime))) if(missing(time.inc)) time.inc <- fit$time.inc if(missing(xlim)) xlim <- if(logt)logb(c(maxtime/100,maxtime)) else c(0,maxtime) if(length(grid) && is.logical(grid)) grid <- if(grid) gray(.8) else NULL if(is.logical(conf.int)) { if(conf.int) conf.int <- .95 else conf.int <- 0 } zcrit <- qnorm((1+conf.int)/2) xadj <- Predict(fit, type='model.frame', np=5, factors=rmsArgs(substitute(list(...)))) info <- attr(xadj, 'info') varying <- info$varying if(length(varying) > 1) stop('cannot vary more than one predictor') adjust <- if(adj.subtitle) info$adjust else NULL if(length(xadj)) { nc <- nrow(xadj) covpres <- TRUE } else { nc <- 1 covpres <- FALSE } y <- if(length(varying)) xadj[[varying]] else '' curve.labels <- NULL xd <- xlim[2] - xlim[1] if(n.risk & !add) { mar <- opar$mar if(mar[4] < 4) { mar[4] <- mar[4] + 2 par(mar=mar) } } ## One curve for each value of y, excl style used for C.L. lty <- if(missing(lty)) seq(nc+1)[-2] else rep(lty, length=nc) col <- rep(col, length=nc) lwd <- rep(lwd, length=nc) i <- 0 if(levels.only) y <- gsub('.*=', '', y) abbrevy <- if(abbrev.label) abbreviate(y) else y abbrevy <- if(is.factor(abbrevy)) as.character(abbrevy) else format(abbrevy) if(labelc || conf == 'bands') curves <- vector('list',nc) for(i in 1:nc) { ci <- conf.int ay <- if(length(varying)) xadj[[varying]] else '' if(covpres) { adj <- xadj[i,,drop=FALSE] w <- survest(fit, newdata=adj, fun=fun, what=what, conf.int=ci, type=type, conf.type=conf.type) } else w <- survest(fit, fun=fun, what=what, conf.int=ci, type=type, conf.type=conf.type) time <- w$time if(logt) time <- logb(time) s <- !is.na(time) & (time>=xlim[1]) surv <- w$surv if(is.null(ylim)) ylim <- cylim(range(surv, na.rm=TRUE)) stratum <- w$strata if(is.null(stratum)) stratum <- 1 if(!is.na(stratum)) { ##can be NA if illegal strata combinations requested cl <- if(is.factor(ay)) as.character(ay) else format(ay) curve.labels <- c(curve.labels, abbrevy[i]) if(i == 1 & !add) { plot(time, surv, xlab='', xlim=xlim, ylab='', ylim=ylim, type="n", axes=FALSE) mgp.axis(1, at=if(logt)pretty(xlim) else seq(xlim[1], max(pretty(xlim)), time.inc), labels=TRUE, axistitle=xlab, cex.lab=cex.xlab) mgp.axis(2, at=pretty(ylim), axistitle=ylab, cex.lab=cex.ylab) if(!logt & (dots || length(grid))) { xlm <- pretty(xlim) xlm <- c(xlm[1], xlm[length(xlm)]) xp <- seq(xlm[1], xlm[2],by=time.inc) yd <- ylim[2] - ylim[1] if(yd <= .1) yi <- .01 else if(yd <= .2) yi <- .025 else if(yd <= .4) yi <- .05 else yi <- .1 yp <- seq(ylim[2], ylim[1] + if(n.risk && missing(y.n.risk)) yi else 0, by=- yi) if(dots) for(tt in xp)symbols(rep(tt, length(yp)), yp, circles=rep(dotsize, length(yp)), inches=dotsize, add=TRUE) else abline(h=yp, v=xp, col=grid, xpd=FALSE) } } tim <- time[s]; srv <- surv[s] if(conf.int > 0 && conf == 'bands') { blower <- w$lower[s] bupper <- w$upper[s] } if(max(tim) > xlim[2]) { if(ltype == "s") { ##Get estimate at last permissible point to plot ## s.last <- min(srv[tim<=xlim[2]+1e-6]) #not work with function s.last <- srv[tim <= xlim[2] + 1e-6] s.last <- s.last[length(s.last)] k <- tim < xlim[2] tim <- c(tim[k], xlim[2]); srv <- c(srv[k], s.last) if(conf.int > 0 && conf == 'bands') { low.last <- blower[time <= xlim[2] + 1e-6] low.last <- low.last[length(low.last)] up.last <- bupper[time <= xlim[2] + 1e-6] up.last <- up.last[length(up.last)] blower <- c(blower[k],low.last) bupper <- c(bupper[k],up.last) } } else tim[tim > xlim[2]] <- NA } ##don't let step function go beyond x-axis - ##this cuts it off but allows step to proceed axis end if(conf != 'bands') lines(tim, srv, type=ltype, lty=lty[i], col=col[i], lwd=lwd[i]) if(labelc || conf == 'bands') curves[[i]] <- list(tim, srv) if(pr) { zest <- rbind(tim,srv) dimnames(zest) <- list(c("Time","Survival"), rep("",length(srv))) cat("\nEstimates for ", cl,"\n\n") print(zest, digits=3) } if(conf.int > 0) { if(conf == "bands") { polyg(x = c(tim, rev(tim)), y = c(blower, rev(bupper)), col = col.fill[i], type=ltype) } else { if(exactci) { # not from cph(surv=T) tt <- seq(0, maxtime, time.inc) v <- survest(fit, newdata=adj, times=tt, what=what, fun=fun, conf.int=ci, type=type, conf.type=conf.type) tt <- v$time #may not get predictions at all t ss <- v$surv lower <- v$lower upper <- v$upper if(!length(ylim)) ylim <- cylim(range(ss, na.rm=TRUE)) if(logt) tt <- logb(ifelse(tt == 0, NA, tt)) } else { tt <- as.numeric(dimnames(surv.sum)[[1]]) if(logt) tt <- logb(tt) ss <- surv.sum[,stratum,'Survival']^ exp(w$linear.predictors) se <- surv.sum[,stratum,'std.err'] ss <- fun(ss) lower <- fun(cilower(ss, zcrit*se)) upper <- fun(ciupper(ss, zcrit*se)) ss[is.infinite(ss)] <- NA lower[is.infinite(lower)] <- NA upper[is.infinite(upper)] <- NA } tt <- tt + xd*(i-1)*.01 errbar(tt, ss, upper, lower, add=TRUE, lty=lty[i], col=col[i]) } } if(n.risk) { if(length(Y <- fit$y)) { tt <- seq(max(0,xlim[1]),min(maxtime,xlim[2]),by=time.inc) ny <- ncol(Y) if(!length(str <- fit$strata)) Y <- Y[,ny-1] else Y <- Y[unclass(str) == unclass(stratum), ny - 1] nrisk <- rev(cumsum(table( cut(-Y,sort(unique(-c(tt,range(Y)+ c(-1,1))))) )[-length(tt)-1])) } else { if(is.null(surv.sum)) stop("you must use surv=T or y=T in fit to use n.risk=T") tt <- as.numeric(dimnames(surv.sum)[[1]]) l <- (tt >= xlim[1]) & (tt <= xlim[2]) tt <- tt[l] nrisk <- surv.sum[l,stratum,2] } tt[1] <- xlim[1] #was xd*.015, .030, .035 yd <- ylim[2] - ylim[1] if(missing(y.n.risk)) y.n.risk <- ylim[1] if(y.n.risk == 'auto') y.n.risk <- - diff(ylim) / 3 yy <- y.n.risk + yd*(nc-i)*sep.n.risk #was .029, .038, .049 nri <- nrisk nri[tt > xlim[2]] <- NA text(tt[1], yy, nri[1], cex=cex.n.risk, adj=adj.n.risk, srt=srt.n.risk) if (length(nri) > 1) text(tt[-1], yy, nri[-1], cex=cex.n.risk, adj=1) text(xlim[2]+xd*.025, yy, adj=0, curve.labels[i], cex=cex.n.risk) } } } ## to keep bands from covering up lines plot lines last if(conf == 'bands') for(i in 1:length(y)) lines(curves[[i]][[1]], curves[[i]][[2]], type=ltype, lty=lty[i], col=col[i], lwd=lwd[i]) if(labelc) labcurve(curves, curve.labels, type=ltype, lty=lty, col.=col, lwd=lwd, opts=label.curves) if(length(adjust)) title(sub=paste("Adjusted to:",adjust), adj=0, cex=.6) invisible(list(adjust=adjust, curve.labels=curve.labels)) } rms/R/bj.s0000644000176200001440000003247113704356256012055 0ustar liggesusersbj <- function(formula, data=environment(formula), subset, na.action=na.delete, link="log", control=NULL, method='fit', x=FALSE, y=FALSE, time.inc) { call <- match.call() callenv <- parent.frame() # don't delay these evaluations subset <- if(! missing(subset )) eval(substitute(subset), data, callenv) X <- modelData(data, formula, subset = subset, na.action=na.action, callenv=callenv) X <- Design(X, formula=formula) if(method=='model.frame') return(X) atrx <- attributes(X) nact <- atrx$na.action Terms <- atrx$terms atr <- atrx$Design sformula <- atrx$sformula mmcolnames <- atr$mmcolnames lnames <- c("logit","probit","cloglog","identity","log","sqrt", "1/mu^2","inverse") link <- pmatch(link, lnames, 0) if(link==0) stop("invalid link function") link <- lnames[link] Y <- model.extract(X, "response") atY <- attributes(Y) ncy <- ncol(Y) maxtime <- max(Y[,-ncy]) nnn <- c(nrow(Y),sum(Y[,ncy])) if (! inherits(Y, "Surv")) stop("Response must be a survival object") type <- attr(Y, "type") linkfun <- make.link(link)$linkfun if (type != 'right') stop ("Surv type must by 'right' censored") Y <- cbind(linkfun(Y[,1]), Y[,2]) X <- model.matrix(sformula, X) alt <- attr(mmcolnames, 'alt') if(! all(mmcolnames %in% colnames(X)) && length(alt)) mmcolnames <- alt X <- X[, mmcolnames, drop=FALSE] assgn <- DesignAssign(atr, 1, Terms) if(method == 'model.matrix') return(X) time.units <- units(Y) if(is.null(time.units) || time.units=='') time.units <- "Day" if(missing(time.inc)) { time.inc <- switch(time.units, Day=30, Month=1, Year=1, maxtime / 10) if(time.inc >= maxtime | maxtime / time.inc > 25) time.inc <- max(pretty(c(0, maxtime))) / 10 } rnam <- dimnames(Y)[[1]] dimnames(X) <- list(rnam, atr$colnames) n <- nrow(X) nvar <- ncol(X) fit <- bj.fit(X, Y, control=control) if(link == 'log') fit$stats <- c(fit$stats, gr=unname(exp(fit$stats['g']))) if(fit$fail) { cat("Failure in bj.fit\n") return(fit) } if (length(nact)) fit$na.action <- nact fit <- c(fit, list(maxtime=maxtime, units=time.units, time.inc=time.inc, non.slopes=1, assign=assgn)) class(fit) <- c("bj", "rms") fit$sformula <- sformula fit$terms <- Terms fit$formula <- as.vector(attr(Terms, "formula")) fit$call <- call fit$Design <- atr if (x) fit$x <- X if (y) { class(Y) <- 'Surv' attr(Y,'type') <- atY$type fit$y <- Y } scale.pred <- if(link=="log") c("log(T)","Survival Time Ratio") else "T" fit$scale.pred <- scale.pred fit$link <- link fit } bj.fit <- function(x, y, control = NULL) { if(ncol(y) != 2) stop("y is not a right-censored Surv object") status <- y[, 2] yy <- y[, 1] iter.max <- control$iter.max eps <- control$eps trace <- control$trace tol <- control$tol max.cycle <- control$max.cycle if(length(iter.max) == 0) iter.max <- 20 if(length(eps) == 0) eps <- 0.001 if(length(trace) == 0) trace <- FALSE if(length(tol) == 0) tol <- 1e-007 if(length(max.cycle) == 0) max.cycle <- 30 x <- as.matrix(x) if(all(x[, 1] == 1)) x <- x[, -1, drop = FALSE] d <- dim(x) nvar <- d[2] if(length(nvar) == 0) nvar <- 0 N <- length(yy) if(nvar > 0) { xbar <- apply(x, 2, mean) xm <- x - rep(xbar, rep(N, nvar)) } else xm <- 0 timeorig <- yy order.orig <- 1:N dummystrat <- factor(rep(1, N)) betahat <- rep(0, max(nvar, 1)) betamatrix <- NULL sse <- 0 n <- 0 ## ## new stuff nonconv <- FALSE ## repeat { oldbeta <- betahat oldsse <- sse if(nvar == 0) ypred <- 0 else { betahat <- solvet(t(xm) %*% xm, t(xm) %*% yy, tol = tol) ypred <- x %*% betahat } alphahat <- mean(yy - ypred) sse <- sum((yy - ypred)^2) razlika <- oldsse/sse if(trace) cat("iteration = ", n, " sse ratio = ", format(razlika), "\n") n <- n + 1 if(trace) cat(" alpha = ", format(alphahat), " beta = ", format(betahat), "\n\n") ehat <- timeorig - ypred if(! nonconv) { if(abs(razlika - 1) <= eps) break else if(n > iter.max) { cyclesse <- NULL cycleperiod <- 0 nonconv <- TRUE firstsse <- sse } } else { betamatrix <- cbind(betamatrix, c(alphahat, betahat)) cyclesse <- c(cyclesse, sse) cycleperiod <- cycleperiod + 1 if(any(abs(firstsse - cyclesse) < 1e-007)) { cat("\nCycle period = ", cycleperiod, "\n") meanbeta <- apply(betamatrix, 1, mean) alphahat <- meanbeta[1] betahat <- meanbeta[2:length(meanbeta)] ypred <- x %*% betahat ehat <- timeorig - ypred break } else if(cycleperiod >= max.cycle) break } state <- status state[ehat == max(ehat)] <- 1 S <- structure(cbind(ehat, state), class = "Surv", type = "right") KM.ehat <- survfitKM(dummystrat, S, conf.type = "none", se.fit = FALSE) n.risk <- KM.ehat$n.risk surv <- KM.ehat$surv repeats <- c(diff( - n.risk), n.risk[length(n.risk)]) surv <- rep(surv, repeats) w <- - diff(c(1, surv)) m <- order(ehat, - status) bla <- cumsum((w * ehat[m])) bla <- (bla[length(bla)] - bla)/(surv + state[m]) ## Put bla back into original order bl <- bla bl[(1 : N)[m]] <- bla yhat <- if(nvar == 0) bl else x %*% betahat + bl yy[state == 0] <- yhat[state == 0] } n <- n - 1 if(nonconv) { if(cycleperiod < max.cycle) cat("\nNo convergence in", n, "steps, but cycle found - average beta returned\n") else { cat("\nNo convergence in", n, "steps\n") return(list(fail = TRUE)) } } f <- list(fail = FALSE, iter = n) cof <- if(nvar == 0) alphahat else c(alphahat, betahat) dx <- dimnames(x)[[2]] if(length(dx) == 0 && nvar > 0) dx <- paste("x", 1:nvar, sep = "") names(cof) <- c("Intercept", dx) f$coefficients <- cof ehat.u <- ehat[status == 1] edf <- sum(status) - nvar - 1 sigma <- sqrt(sum((ehat.u - mean(ehat.u))^2)/edf) if(nvar > 0) { x <- cbind(Intercept = 1, x)[status == 1, , drop = FALSE] f$var <- solvet(t(x) %*% x, tol = tol) * sigma * sigma } else f$var <- (sigma * sigma)/N f$linear.predictors <- alphahat + as.vector(ypred) g <- GiniMd(f$linear.predictors) stats <- c(N, sum(status), nvar, edf, sigma, g) names(stats) <- c("Obs", "Events", "d.f.", "error d.f.", "sigma", "g") f$stats <- stats if(any(status == 0)) yy <- structure(yy, class = "impute", imputed = (1:N)[status == 0]) f$y.imputed <- yy f } bjplot <- function(fit, which=1:dim(X)[[2]]) { if(!all(c('x','y') %in% names(fit))) stop('must specify x=TRUE,y=TRUE to bj to use bjplot') X <- fit$x Y <- fit$y xnam <- dimnames(X)[[2]] yy <- fit$y.imputed imp <- is.imputed(yy) trans <- if(fit$link=='identity') '' else fit$link ## Do Hillis plot first N <- length(fit$y[, 1]) dummystrat <- factor(rep(1, N)) S <- resid(fit) S[S[, 1] == max(S[, 1]), 2] <- 1 m <- order(fit$y[, 1], - fit$y[, 2]) resd <- S[m, 1] cens <- S[m, 2] KM.ehat <- survfitKM(dummystrat, S, conf.type = "none", se.fit = FALSE) repeats <- c(diff( - KM.ehat$n.risk), KM.ehat$n.risk[length(KM.ehat$n.risk)]) if(length(KM.ehat$time) != N) { time <- rep(KM.ehat$time, repeats) surv <- rep(KM.ehat$surv, repeats) } else { time <- KM.ehat$time surv <- KM.ehat$surv } u <- runif(N-1, 0, surv[1:(N - 1)]) w <- approx(surv, time, xout=u, method='constant', f=0) t.i <- c(w$y, max(time)) surv.i <- c(w$x, min(surv)) residnew <- resd residnew[cens == 0] <- t.i[cens == 0] retlist <- list(predictor = fit$linear.predictor[m], x = fit$x[m, ], res.cens = resd, hillis = residnew, cens = cens) predictor <- fit$linear.predictor[m] plot(predictor, resd, type = "n", xlab = "Linear Predictor", ylab = "Residuals") points(predictor[cens == 0], resd[cens == 0], pch = 1) points(predictor[cens == 1], resd[cens == 1], pch = 16) plot(predictor, residnew, type = "n", xlab = "Linear Predictor", ylab = "Residuals") points(predictor[cens == 0], residnew[cens == 0], pch = 1) points(predictor[cens == 1], residnew[cens == 1], pch = 16) for(i in which) { xi <- X[,i] ry <- range(yy,Y) plot(xi, Y[,1], xlab=xnam[i], ylab=paste('Observed',trans,'Time'), type='n', ylim=ry) points(xi[!imp], Y[!imp,1], pch=16) if(any(imp)) { points(xi[imp], Y[imp,1], pch=1) plot(xi, yy, xlab = xnam[i], ylab=paste('Imputed',trans,'Time'), type = "n", ylim=ry) points(xi[imp], yy[imp], pch = 1) segments(xi[imp], Y[imp,1], xi[imp], yy[imp]) points(xi[!imp], yy[!imp], pch = 16) plot(xi, yy, xlab=xnam[i], ylab=paste('Observed or Imputed',trans,'Time'), type='n', ylim=ry) points(xi[!imp], yy[!imp], pch=16) points(xi[imp], yy[imp], pch=1) } } invisible(retlist) } print.bj <- function(x, digits=4, long=FALSE, coefs=TRUE, title="Buckley-James Censored Data Regression", ...) { k <- 0 z <- list() if(length(zz <- x$na.action)) { k <- k + 1 z[[k]] <- list(type=paste('naprint', class(zz)[1], sep='.'), list(zz)) } stats <- x$stats ci <- x$clusterInfo misc <- reListclean(Obs = stats['Obs'], Events = stats['Events'], 'Cluster on' = ci$name, 'Clusters' = ci$n) dfstat <- reListclean('Regression d.f.' = stats['d.f.'], sigma=stats['sigma'], 'd.f.'=stats['error d.f.']) disc <- reListclean(g = stats['g'], gr = stats['gr']) k <- k + 1 z[[k]] <- list(type='stats', list(headings=c('', '', 'Discrimination\nIndexes'), data=list(misc, c(dfstat,c(NA,digits,NA)), c(disc, 3)))) cof <- x$coefficients se <- sqrt(diag(x$var)) k <- k + 1 z[[k]] <- list(type='coefmatrix', list(coef = cof, se = se)) p <- length(cof) if(long && p > 1) { ss <- diag(1/se) correl <- ss %*% x$var %*% ss dimnames(correl) <- list(names(cof), names(cof)) ll <- lower.tri(correl) correl[ll] <- format(round(correl[ll], digits=max(digits-2,2))) correl[!ll] <- "" k <- k + 1 z[[k]] <- list(type='print', list(correl[-1, - p, drop = FALSE], quote = FALSE), title='Correlation Matrix for Parameter Estimates') } prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } predict.bj <- function(object, newdata, type=c("lp","x","data.frame","terms","cterms","ccterms","adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean','individual','simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type=="terms", ...) { type <- match.arg(type) predictrms(object, newdata, type, se.fit, conf.int, conf.type, kint, na.action, expand.na, center.terms, ...) } residuals.bj <- function(object, type = c("censored","censored.normalized"), ...) { type <- match.arg(type) y <- object$y aty <- attributes(y) if('y' %nin% names(object)) stop('did not use y=TRUE with fit') ncy <- ncol(y) r <- y[, - ncy, drop=FALSE] - object$linear.predictors if(type=='censored.normalized') r <- r / object$stats['sigma'] label(r) <- if(type=='censored') 'Residual' else 'Normalized Residual' ev <- y[, ncy] label(ev) <- label(y) units(r) <- units(y) r <- Surv(r, ev) attr(r,'type') <- aty$type class(r) <- c('residuals.bj', 'Surv') if (length(object$na.action)) naresid(object$na.action, r) else r } validate.bj <- function(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, tol=1e-7, rel.tolerance=1e-3, maxiter=15, ...) { if(!(length(fit$x) && length(fit$y))) stop('you must specify x=TRUE and y=TRUE to bj') xb <- fit$linear.predictors ny <- dim(fit$y) nevents <- sum(fit$y[,ny[2]]) ##Note: fit$y already has been transformed by the link function by psm distance <- function(x,y,fit,iter,evalfit=FALSE,fit.orig, maxiter=15, tol=1e-7, rel.tolerance=1e-3, ...) { ##Assumes y is matrix with 1st col=time, 2nd=event indicator dxy.cens(x, y)["Dxy"] } predab.resample(fit, method=method, fit=bj.fit2, measure=distance, pr=pr, B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics, force=force, estimates=estimates, maxiter=maxiter, tol=tol, rel.tolerance=rel.tolerance, ...) } bj.fit2 <- function(x, y, iter=0, maxiter=15, init=NULL, rel.tolerance=1e-3, tol=1e-7, ...) { e <- y[, 2] if(sum(e) < 1)return(list(fail=TRUE)) x <- x #Get around lazy evaluation creating complex expression f <- bj.fit(as.matrix(x), y, control=list(iter.max=maxiter, eps=rel.tolerance, tol=tol)) if(f$fail) warning('bj.fit failed') f } latex.bj <- function(...) latexrms(...) rms/R/gIndex.s0000644000176200001440000000635612761051054012671 0ustar liggesusersgIndex <- function(object, partials=TRUE, type=c('ccterms', 'cterms', 'terms'), lplabel=if(length(object$scale) && is.character(object$scale)) object$scale[1] else 'X*Beta', fun, funlabel=if(missing(fun)) character(0) else deparse(substitute(fun)), postfun=if(length(object$scale)==2) exp else NULL, postlabel=if(length(postfun)) ifelse(missing(postfun), if((length(object$scale) > 1) && is.character(object$scale)) object$scale[2] else 'Anti-log', deparse(substitute(postfun))) else character(0), ...) { obj.name <- as.character(sys.call())[2] type <- match.arg(type) labels <- attr(object, 'Design')$label lp <- predict(object, ...) if(partials) { terms <- predict(object, type=type) if(nrow(terms) != length(lp)) warning('expected predicted linear predictors and terms to have same no. of rows') p <- ncol(terms) g <- matrix(0, nrow=p, ncol=1 + (length(postfun) > 0), dimnames=list(colnames(terms), c(lplabel, postlabel))) for(i in 1:p) { gmd <- GiniMd(terms[,i], na.rm=TRUE) g[i,] <- c(gmd, if(length(postfun)) postfun(gmd)) } } gmd <- GiniMd(lp, na.rm=TRUE) Total <- matrix(c(gmd, if(length(postfun)) postfun(gmd)), nrow=1, ncol=1 + (length(postfun) > 0), dimnames=list('Total', c(lplabel, postlabel))) g <- if(partials) rbind(g, Total) else Total gtrans <- NULL if(!missing(fun)) { gtrans <- GiniMd(fun(lp), na.rm=TRUE) names(gtrans) <- funlabel } structure(g, gtrans=gtrans, class='gIndex', lplabel=lplabel, funlabel=funlabel, postlabel=postlabel, partials=partials, labels=c(labels, Total='Total'), type=type, formula=formula(object)) } print.gIndex <- function(x, digits=4, abbrev=FALSE, vnames=c("names","labels"), ...) { vnames <- match.arg(vnames) at <- attributes(x) if(vnames == 'labels') { lab <- at$labels[rownames(x)] rownames(x) <- if(abbreviate) abbreviate(lab) else lab } cat('\ng Index: ', format(at$formula), '\n\n') x <- matrix(x, nrow=nrow(x), dimnames=list(rownames(x), c(at$lplabel, at$postlabel))) print(x, digits=digits) if(length(gtrans <- at$gtrans)) cat('\ng Index on transformed linear predictors (', names(gtrans), '): ', format(gtrans, digits=digits), '\n', sep='') cat('\n') invisible() } plot.gIndex <- function(x, what=c('pre', 'post'), xlab=NULL, pch=16, rm.totals=FALSE, sort=c('descending', 'ascending', 'none'), ...) { what <- match.arg(what) sort <- match.arg(sort) at <- attributes(x) if(!length(xlab)) xlab <- paste('g Index:', if(what=='pre') at$lplabel else at$postlabel) x <- if(what=='pre') x[, 1] else x[, 2] if(rm.totals) x <- x[-length(x)] x <- switch(sort, descending=-sort(-x), ascending=sort(x), none=x) dotchart3(x, xlab=xlab, pch=pch, ...) invisible(x) } rms/R/fastbw.s0000644000176200001440000002152313146300522012725 0ustar liggesusers# Fast backward elimination using a slow but numerically stable version # of the Lawless-Singhal method (Biometrics 1978), used in the SAS # PHGLM and LOGIST procedures # Uses function solvet, a slightly edited version of solve that passes # the tol argument to qr. # Modified 12Oct92 - if scale parameter present, ignore last row and col of cov # Modified 22Sep93 - new storage format for design attributes # Modified 1Mar94 - add k.aic # Modified 4Mar96 - use S commands instead of avia if not under UNIX # Modified 19Feb11 - added force argument # # F. Harrell 18Jan91 fastbw <- function(fit, rule=c("aic", "p"), type=c("residual","individual","total"), sls=.05, aics=0, eps=1e-9, k.aic=2, force=NULL) { rule <- match.arg(rule) type <- match.arg(type) ns <- num.intercepts(fit) if(length(force)) force <- force + ns L <- if(ns==0) NULL else 1:ns pt <- length(fit$coef) p <- pt - ns atr <- fit$Design assume <- atr$assume.code if(!length(assume)) stop("fit does not have design information") assign <- fit$assign nama <- names(assign)[1] asso <- 1*(nama=="(Intercept)" | nama=="Intercept") f <- sum(assume != 8) strt <- integer(f) len <- strt j <- 0 for(i in 1:length(assume)) { if(assume[i] != 8) { j <- j+1 aj <- assign[[j + asso]] strt[j] <- min(aj) len[j] <- length(aj) } } name <- atr$name[assume != 8] ed <- as.integer(strt + len - 1) if(type == 'total') type <- 'residual' if(length(force) && type != 'individual') warning('force probably does not work unless type="individual"') factors.in <- 1:f parms.in <- 1:pt ## Not needed if using solve() instead of avia ## Allocate work areas for avia ## s1 <- double(pt) ## s2 <- s1 ## s3 <- double(2*pt) ## s4 <- s1 ## vsub <- double(pt*pt) ## pivot <- integer(pt) factors.del <- integer(f) chisq.del <- double(f) df.del <- integer(f) resid.del <- double(f) df.resid <- integer(f) beta <- fit$coef Cov <- vcov(fit, regcoef.only=TRUE, intercepts='all') ## Above ignores scale parameters; 'all' for orm fits cov <- Cov Coef <- matrix(NA, nrow=f, ncol=pt, dimnames=list(NULL, names(beta))) d <- 0 dor2 <- inherits(fit, 'ols') && (length(fit$y) || (length(fit$fitted.values) && length(fit$residuals))) if(dor2) { ## X <- fit$x Y <- if(length(fit$y))fit$y else fit$fitted.values + fit$residuals r2 <- double(f) sst <- sum((Y-mean(Y))^2) sigma2 <- fit$stats['Sigma']^2 ## Get X'Y using b=(X'X)^-1 X'Y, X'X^-1 = var matrix / sigma2 xpy <- matrix(solvet(Cov, beta, tol=eps)*sigma2, ncol=1) ypy <- sum(Y^2) } for(i in 1:f) { fi <- length(factors.in) ln <- len[factors.in] st <- as.integer(ns + c(1, 1 + cumsum(ln[-fi]))[1 : fi]) en <- as.integer(st + ln - 1) if(any(en > nrow(cov))) stop('program logic error') crit.min <- 1e10 chisq.crit.min <- 1e10 jmin <- 0 dfmin <- 0 k <- 0 factors.in.loop <- factors.in #indirect reference prob in S 3.1 for(j in factors.in.loop) { k <- k + 1 ## can't get this to work in R - CHECK: ## z <- if(.R.) ## .Fortran("avia",beta,cov,chisq=double(1),length(beta), ## st[k]:en[k], ## ln[k],df=integer(1),eps,vsub,s1,s2,s3,s4,pivot,NAOK=TRUE, ## PACKAGE="Design") else ## .Fortran("avia",beta,cov,chisq=double(1),length(beta), ## st[k]:en[k], ## ln[k],df=integer(1),eps,vsub,s1,s2,s3,s4,pivot,NAOK=TRUE) ## chisq <- z$chisq ## df <- z$df ##replace previous 5 statements with following 3 to use slow method q <- st[k] : en[k] chisq <- if(any(q %in% force)) Inf else beta[q] %*% solvet(cov[q,q], beta[q], tol=eps) df <- length(q) crit <- switch(rule, aic=chisq-k.aic * df, p=pchisq(chisq, df)) if(crit < crit.min) { jmin <- j crit.min <- crit chisq.crit.min <- chisq df.min <- df } } factors.in <- factors.in[factors.in != jmin] parms.in <- parms.in[parms.in < strt[jmin] | parms.in > ed[jmin]] if(length(parms.in)==0) q <- 1:pt else q <- (1:pt)[-parms.in] ## if(under.unix && !.R.) { ## z <- if(.R.) ## .Fortran("avia",fit$coef,Cov,chisq=double(1), ## pt,q,as.integer(pt-length(parms.in)), ## df=integer(1),eps,vsub,s1,s2,s3,s4,pivot,NAOK=TRUE, ## PACKAGE="Design") else ## .Fortran("avia",fit$coef,Cov,chisq=double(1), ## pt,q,as.integer(pt-length(parms.in)), ## df=integer(1),eps,vsub,s1,s2,s3,s4,pivot,NAOK=TRUE) ## resid <- z$chisq ## resid.df <- z$df ##} ##replace previous 5 statements with following 2 to use slow method resid <- fit$coef[q] %*% solvet(Cov[q,q], fit$coef[q], tol=eps) resid.df <- length(q) del <- switch(type, residual = switch(rule, aic=resid - k.aic*resid.df <= aics, p=1 - pchisq(resid,resid.df) > sls), individual = switch(rule, aic = crit.min <= aics, p = 1 - crit.min > sls) ) if(del) { d <- d + 1 factors.del[d] <- jmin chisq.del [d] <- chisq.crit.min df.del [d] <- df.min resid.del [d] <- resid df.resid [d] <- resid.df if(length(parms.in)) { cov.rm.inv <- solvet(Cov[-parms.in, -parms.in], tol=eps) cov.cross <- Cov[parms.in, -parms.in, drop=FALSE] w <- cov.cross %*% cov.rm.inv beta <- fit$coef[parms.in] - w %*% fit$coef[-parms.in] cov <- Cov[parms.in, parms.in] - w %*% t(cov.cross) cof <- rep(0, pt) cof[parms.in] <- beta Coef[d,] <- cof if(dor2) { ## yhat <- matxv(X[,parms.in,drop=F], beta) ## r2[d] <- 1 - sum((yhat-Y)^2)/sst ## sse = Y'(I - H)Y, where H = X*inv(X'X)*X' ## = Y'Y - Y'X*inv(X'X)*X'Y ## = Y'Y - Y'Xb sse <- ypy - t(xpy[parms.in, , drop=FALSE])%*%beta r2[d] <- 1 - sse/sst } } else { beta <- NULL; cov <- NULL if(dor2) r2[d] <- 0 } } else break } if(d > 0) { u <- 1:d fd <- factors.del[u] if(dor2) { r2 <- r2[u] Coef <- Coef[u,, drop=FALSE] } res <- cbind(chisq.del[u], df.del[u], 1 - pchisq(chisq.del[u], df.del[u]), resid.del[u], df.resid[u], 1 - pchisq(resid.del[u], df.resid[u]), resid.del[u] - k.aic * df.resid[u]) labs <- c("Chi-Sq", "d.f.", "P", "Residual", "d.f.", "P", "AIC") dimnames(res) <- list(name[fd], labs) if(length(fd)==f) fk <- NULL else fk <- (1:f)[-fd] } else { fd <- NULL res <- NULL fk <- 1:f } nf <- name[fk] pd <- NULL if(d > 0) for(i in 1:d) pd <- c(pd, (strt[fd[i]] : ed[fd[i]])) if(length(fd) == f) fk <- NULL else if(d==0) fk <- 1:f else fk <- (1:f)[-fd] if(length(pd)==p) pk <- L else if(d==0) pk <- 1:pt else pk <- (1:pt)[-pd] if(length(pd) != p) { beta <- as.vector(beta) names(beta) <- names(fit$coef)[pk] dimnames(cov) <- list(names(beta),names(beta)) } if(dor2) res <- cbind(res, R2=r2) r <- list(result=res, names.kept=nf, factors.kept=fk, factors.deleted=fd, parms.kept=pk, parms.deleted=pd, coefficients=beta, var=cov, Coefficients=Coef, force=if(length(force)) names(fit$coef)[force]) class(r) <- "fastbw" r } print.fastbw <- function(x, digits=4, estimates=TRUE,...) { res <- x$result fd <- x$factors.deleted if(length(fd)) { cres <- cbind(dimnames(res)[[1]], format(round(res[,1], 2)), format(res[,2]), format(round(res[,3], 4)), format(round(res[,4], 2)), format(res[,5]), format(round(res[,6], 4)), format(round(res[,7], 2)), if(ncol(res) > 7)format(round(res[,8], 3))) dimnames(cres) <- list(rep("", nrow(cres)), c("Deleted", dimnames(res)[[2]])) cat("\n") if(length(x$force)) cat('Parameters forced into all models:\n', paste(x$force, collapse=', '), '\n\n') print(cres, quote=FALSE) if(estimates && length(x$coef)) { cat("\nApproximate Estimates after Deleting Factors\n\n") cof <- coef(x) vv <- if(length(cof)>1) diag(x$var) else x$var z <- cof/sqrt(vv) stats <- cbind(cof, sqrt(vv), z, 1 - pchisq(z^2,1)) dimnames(stats) <- list(names(cof), c("Coef","S.E.","Wald Z","P")) print(stats, digits=digits) } } else cat("\nNo Factors Deleted\n") cat("\nFactors in Final Model\n\n") nk <- x$names.kept if(length(nk))print(nk, quote=FALSE) else cat("None\n") } rms/R/plot.contrast.r0000644000176200001440000001115213702620505014251 0ustar liggesusers##' Plot Bayesian Contrast Posterior Densities ##' ##' If there are exactly two contrasts and `bivar=TRUE` plots an elliptical or kernal (based on `bivarmethod` posterior density contour with probability `prob`). Otherwise plots a series of posterior densities of contrasts along with HPD intervals, posterior means, and medians. When the result being plotted comes from `contrast` with `fun=` specified, both the two individual estimates and their difference are plotted. ##' @title plot.contrast.rms ##' @param x the result of `contrast.rms` ##' @param bivar set to `TRUE` to plot 2-d posterior density contour ##' @param bivarmethod see [rmsb::pdensityContour()] ##' @param prob posterior coverage probability for HPD interval or 2-d contour ##' @param which applies when plotting the result of `contrast(..., fun=)`, defaulting to showing the posterior density of both estimates plus their difference. Set to `"ind"` to only show the two individual densities or `"diff"` to only show the posterior density for the differences. ##' @param nrow for [ggplot2::facet_wrap()] ##' @param ncol likewise ##' ##' @param ... unused ##' @return `ggplot2` object ##' @author Frank Harrell ##' @md plot.contrast.rms <- function(x, bivar=FALSE, bivarmethod=c('ellipse', 'kernel'), prob=0.95, which=c('both', 'diff', 'ind'), nrow=NULL, ncol=NULL, ...) { bivarmethod <- match.arg(bivarmethod) which <- match.arg(which) if('esta' %in% names(x)) { # Handle output pertaining to fun= result differently esta <- x$esta estb <- x$estb w <- function(draws, what) { nd <- nrow(draws) theta <- as.vector(draws) contr <- colnames(draws) if(length(contr) == 1 && contr == '1') contr <- '' cont <- factor(rep(contr, each=nd), contr) d <- data.frame(what, contr=cont, theta) f <- function(x) { hpd <- rmsb::HPDint(x, prob) r <- c(mean(x), median(x), hpd) names(r) <- c('Mean', 'Median', 'Lower', 'Upper') r } est <- apply(draws, 2, f) stat <- rownames(est) stat <- ifelse(stat %in% c('Lower', 'Upper'), paste(prob, 'HPDI'), stat) eparam <- factor(rep(contr, each=nrow(est)), contr) stat <- rep(stat, length(contr)) est <- as.vector(est) de <- data.frame(what, contr=eparam, est, stat) list(d=d, de=de) } w1 <- w(esta, 'First') w2 <- w(estb, 'Second') w3 <- w(esta - estb, 'First - Second') d <- rbind(w1$d, w2$d, w3$d) de <- rbind(w1$de, w2$de, w3$de) lev <- c('First', 'Second', 'First - Second') d$what <- factor(d$what, lev) de$what <- factor(de$what, lev) if(which == 'diff') { d <- subset(d, what == 'First - Second') de <- subset(de, what == 'First - Second') } else if(which == 'ind') { d <- subset(d, what != 'First - Second') de <- subset(de, what != 'First - Second') } g <- ggplot(d, aes(x=theta)) + geom_density() + geom_vline(data=de, aes(xintercept=est, color=stat, alpha=I(0.4))) + facet_grid(what ~ contr) + guides(color=guide_legend(title='')) + xlab('') + ylab('') return(g) } cdraws <- x$cdraws if(! length(cdraws)) stop('plot method for contrast.rms objects implemented only for Bayesian models') nd <- nrow(cdraws) cn <- colnames(cdraws) if(all(cn == as.character(1 : ncol(cdraws)))) cn <- paste('Contrast', cn) colnames(cdraws) <- cn if(ncol(cdraws) == 2 && bivar) { g <- rmsb::pdensityContour(cdraws[, 1], cdraws[, 2], prob=prob, pl=TRUE, method=bivarmethod) g <- g + xlab(cn[1]) + ylab(cn[2]) return(g) } hpd <- apply(cdraws, 2, rmsb::HPDint, prob=prob) draws <- as.vector(cdraws) which <- colnames(cdraws) param <- factor(rep(which, each=nd), which) g <- function(x) c(mean=mean(x), median=median(x)) est <- apply(cdraws, 2, g) est <- rbind(est, hpd) stat <- rownames(est) stat <- ifelse(stat %in% c('Lower', 'Upper'), paste(prob, 'HPDI'), stat) eparam <- factor(rep(which, each=nrow(est)), which) stat <- rep(stat, length(which)) est <- as.vector(est) d <- data.frame(param, draws) de <- data.frame(param=eparam, est, stat) g <- ggplot(d, aes(x=draws)) + geom_density() + geom_vline(data=de, aes(xintercept=est, color=stat, alpha=I(0.4))) + facet_wrap(~ param, scales='free', nrow=nrow, ncol=ncol) + guides(color=guide_legend(title='')) + xlab('') + ylab('') g } utils::globalVariables(c('theta', 'what')) rms/R/calibrate.cph.s0000644000176200001440000001321613703355523014150 0ustar liggesusers#Resampling optimism of reliability of a Cox survival model #For predicting survival at a fixed time u, getting grouped K-M estimates #with avg. of m subjects in a group, or using cutpoints cuts if present, #e.g. cuts=c(0,.2,.4,.6,.8,1). #B: # reps method=see predab.resample #bw=T to incorporate backward stepdown (using fastbw) with params rule,type,sls #pr=T to print results of each rep #what="observed" to get optimism in observed (Kaplan-Meier) survival for #groups by predicted survival #what="observed-predicted" to get optimism in KM - Cox - more suitable if #distributions of predicted survival vary greatly withing quantile groups #defined from original sample's predicted survival calibrate.cph <- function(fit, cmethod=c('hare', 'KM'), method="boot", u, m=150, pred, cuts, B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, what="observed-predicted", tol=1e-12, maxdim=5, ...) { call <- match.call() cmethod <- match.arg(cmethod) oldopt <- options('digits') options(digits=3) on.exit(options(oldopt)) unit <- fit$units if(unit=="") unit <- "Day" ssum <- fit$surv.summary if(!length(ssum)) stop('did not use surv=TRUE for cph( )') cat("Using Cox survival estimates at ", dimnames(ssum)[[1]][2], " ", unit, "s\n", sep="") surv.by.strata <- ssum[2, , 1] #2nd time= at u, all strata xb <- fit$linear.predictors if(length(stra <- fit$strata)) surv.by.strata <- surv.by.strata[stra] survival <- as.vector(surv.by.strata ^ exp(xb)) if(cmethod=='hare' && missing(pred)) { lim <- datadist(survival)$limits[c('Low:prediction','High:prediction'),] pred <- seq(lim[1], lim[2], length=100) } if(cmethod=='KM' && missing(cuts)) { g <- max(1, floor(length(xb) / m)) cuts <- unique(quantile(c(0, 1, survival), seq(0, 1, length=g + 1), na.rm=TRUE)) } if(cmethod=='hare') cuts <- NULL else pred <- NULL distance <- function(x, y, strata, fit, iter, u, fit.orig, what="observed", pred, orig.cuts, maxdim, ...) { ## Assumes y is matrix with 1st col=time, 2nd=event indicator if(sum(y[, 2]) < 5) return(NA) surv.by.strata <- fit$surv.summary[2, , 1] ##2 means to use estimate at first time past t=0 (i.e., at u) if(length(strata)) surv.by.strata <- surv.by.strata[strata] #Get for each stratum in data cox <- as.vector(surv.by.strata ^ exp(x - fit$center)) ##Assumes x really= x * beta if(length(orig.cuts)) { pred.obs <- groupkm(cox, Surv(y[,1], y[,2]), u=u, cuts=orig.cuts) dist <- if(what == "observed") pred.obs[, "KM"] else pred.obs[, "KM"] - pred.obs[, "x"] } else { pred.obs <- val.surv(fit, S=Surv(y[, 1], y[, 2]), u=u, est.surv=cox, pred=pred, maxdim=maxdim) dist <- if(what=='observed') pred.obs$actualseq else pred.obs$actualseq - pred } if(iter == 0 && pr) print(pred.obs) if(iter == 0) structure(dist, keepinfo=list(pred.obs=pred.obs)) else dist } coxfit <- function(x, y, strata, u, iter=0, ...) { etime <- y[,1] e <- y[,2] if(sum(e) < 5) return(list(fail=TRUE)) x <- x #Get around lazy evaluation creating complex expression f <- if(length(x)) { if(length(strata)) cph(Surv(etime,e) ~ x + strat(strata), surv=TRUE, time.inc=u) else cph(Surv(etime,e) ~ x, surv=TRUE, time.inc=u) } else cph(Surv(etime,e) ~ strat(strata), surv=TRUE, time.inc=u) ## Gets predicted survival at times 0, u, 2u, 3u, ... attr(f$terms, "Design") <- NULL ## Don't fool fastbw called from predab.resample f } reliability <- predab.resample(fit, method=method, fit=coxfit, measure=distance, pr=pr, B=B, bw=bw, rule=rule, type=type, u=u, m=m, what=what, sls=sls, aics=aics, force=force, estimates=estimates, pred=pred, orig.cuts=cuts, tol=tol, maxdim=maxdim, ...) kept <- attr(reliability, 'kept') keepinfo <- attr(reliability, 'keepinfo') n <- reliability[, "n"] rel <- reliability[, "index.corrected"] opt <- reliability[, "optimism"] rel <- cbind(mean.optimism=opt, mean.corrected=rel, n=n) e <- fit$y[, 2] pred.obs <- keepinfo$pred.obs if(cmethod == 'KM') { mean.predicted <- pred.obs[,"x"] KM <- pred.obs[,"KM"] obs.corrected <- KM - opt structure(cbind(reliability[,c("index.orig","training","test"), drop=FALSE], rel, mean.predicted=mean.predicted, KM=KM, KM.corrected=obs.corrected, std.err=pred.obs[, "std.err", drop=FALSE]), predicted=survival, kept=kept, class="calibrate", u=u, units=unit, n=length(e), d=sum(e), p=length(fit$coefficients), m=m, B=B, what=what, call=call) } else { calibrated <- pred.obs$actualseq calibrated.corrected <- calibrated - opt structure(cbind(pred=pred, reliability[, c("index.orig", "training", "test"), drop=FALSE], rel, calibrated=calibrated, calibrated.corrected=calibrated.corrected), predicted=survival, kept=kept, class="calibrate", u=u, units=unit, n=length(e), d=sum(e), p=length(fit$coefficients), m=m, B=B, what=what, call=call) } } rms/R/calibrate.psm.s0000644000176200001440000001223113654066603014174 0ustar liggesuserscalibrate.psm <- function(fit, cmethod=c('hare', 'KM'), method="boot", u, m=150, pred, cuts, B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, what="observed-predicted", tol=1e-12, maxiter=15, rel.tolerance=1e-5, maxdim=5, ...) { call <- match.call() cmethod <- match.arg(cmethod) ## if(cmethod=='hare') ## { ## require('polspline') || ## { ## cat('polspline package not installed. Reverting to cmethod="KM"\n') ## cmethod <- 'KM' ## } ## } if(! length(fit$y)) stop("fit did not store y") oldopt <- options('digits') options(digits=3) on.exit(options(oldopt)) unit <- fit$units if(unit=="") unit <- "Day" ny <- dim(fit$y) nevents <- sum(fit$y[, ny[2]]) survival <- survest.psm(fit, times=u, conf.int=FALSE)$surv if(cmethod=='hare' && missing(pred)) { lim <- datadist(survival)$limits[c('Low:prediction','High:prediction'),] pred <- seq(lim[1], lim[2], length=100) } if(cmethod=='KM' && missing(cuts)) { g <- max(1, floor(ny[1]/m)) cuts <- quantile(c(0, 1, survival), seq(0, 1, length=g+1), na.rm=TRUE) } if(cmethod=='hare') cuts <- NULL else pred <- NULL dist <- fit$dist parms <- fit$parms distance <- function(x, y, fit, iter, u, fit.orig, what="observed", pred, orig.cuts, maxdim, ...) { ##Assumes y is matrix with 1st col=time, 2nd=event indicator if(sum(y[,2]) < 5) return(NA) class(fit) <- 'psm' # for survest.psm which uses Survival.psm fit$dist <- fit.orig$dist psurv <- survest.psm(fit, linear.predictors=x, times=u, conf.int=FALSE)$surv ##Assumes x really= x * beta if(length(orig.cuts)) { pred.obs <- groupkm(psurv, y, u=u, cuts=orig.cuts) dist <- if(what=="observed") pred.obs[,"KM"] else pred.obs[,"KM"] - pred.obs[,"x"] } else { pred.obs <- val.surv(fit, S=y, u=u, est.surv=psurv, pred=pred, maxdim=maxdim) dist <- if(what=='observed') pred.obs$actualseq else pred.obs$actualseq - pred } if(iter == 0) structure(dist, keepinfo=list(pred.obs=pred.obs)) else dist } b <- min(10, B) overall.reps <- max(1, round(B/b)) ## Bug in S prevents>10 loops in predab.resample if(pr) cat("\nAveraging ", overall.reps," repetitions of B=",b,"\n\n") rel <- 0 opt <- 0 nrel <- 0 B <- 0 for(i in 1:overall.reps) { reliability <- predab.resample(fit, method=method, fit=survreg.fit2, measure=distance, pr=pr, B=b, bw=bw, rule=rule, type=type, u=u, m=m, what=what, dist=dist, parms=parms, family=family, sls=sls, aics=aics, force=force, estimates=estimates, strata=FALSE, tol=tol, pred=pred, orig.cuts=cuts, maxiter=maxiter, rel.tolerance=rel.tolerance, maxdim=maxdim, ...) kept <- attr(reliability, 'kept') # TODO: accumulate over reps keepinfo <- attr(reliability, 'keepinfo') n <- reliability[,"n"] rel <- rel + n * reliability[,"index.corrected"] opt <- opt + n * reliability[,"optimism"] nrel <- nrel + n B <- B + max(n) if(pr) print(reliability) } mean.corrected <- rel/nrel mean.opt <- opt/nrel rel <- cbind(mean.optimism=mean.opt, mean.corrected=mean.corrected, n=nrel) if(pr) { cat("\nMean over ",overall.reps," overall replications\n\n") print(rel) } pred.obs <- keepinfo$pred.obs if(cmethod=='KM') { pred <- pred.obs[,"x"] KM <- pred.obs[,"KM"] se <- pred.obs[,"std.err"] obs.corrected <- KM - mean.opt structure(cbind(reliability[,c("index.orig","training","test"), drop=FALSE], rel,mean.predicted=pred, KM=KM, KM.corrected=obs.corrected, std.err=se), predicted=survival, kept=kept, class="calibrate", u=u, units=unit, n=ny[1], d=nevents, p=length(fit$coefficients)-1, m=m, B=B, what=what, call=call) } else { calibrated <- pred.obs$actualseq calibrated.corrected <- calibrated - mean.opt structure(cbind(pred=pred, reliability[,c("index.orig","training","test"), drop=FALSE], rel, calibrated=calibrated, calibrated.corrected=calibrated.corrected), predicted=survival, kept=kept, class="calibrate", u=u, units=unit, n=ny[1], d=nevents, p=length(fit$coefficients)-1, m=m, B=B, what=what, call=call) } } rms/R/predab.resample.s0000644000176200001440000003054113703217746014521 0ustar liggesuserspredab.resample <- function(fit.orig, fit, measure, method=c("boot","crossvalidation",".632","randomization"), bw=FALSE, B=50, pr=FALSE, prmodsel=TRUE, rule="aic", type="residual", sls=.05, aics=0, tol=1e-12, force=NULL, estimates=TRUE, non.slopes.in.x=TRUE, kint=1, cluster, subset, group=NULL, allow.varying.intercepts=FALSE, debug=FALSE, ...) { method <- match.arg(method) oldopt <- options('digits') options(digits=4) on.exit(options(oldopt)) efit <- function(...) list(fail=TRUE) ## Following logic prevents having to load a copy of a large x object if(any(match(c("x", "y"), names(fit.orig), 0) == 0)) stop("must have specified x=T and y=T on original fit") fparms <- fit.orig[c("terms", "Design")] oassign <- fit.orig$assign non.slopes <- num.intercepts(fit.orig, 'coef') # x.index <- if(non.slopes==0 || non.slopes.in.x) function(i,...) i # else # function(i, ns) { # if(any(i > ns)) i[i > ns] - ns # else NULL # } x.index <- function(i, ns) if(ns == 0) i else setdiff(i, 1 : ns) - ns Xb <- function(x, b, non.slopes, n, kint=1) { if(length(x)) matxv(x, b, kint=kint) else if(non.slopes == 0 || ! length(kint)) rep(0, n) else rep(b[kint], n) } # if(length(x)) { # if(non.slopes == 0 || non.slopes.in.x) x %*% b # else b[kint] + x %*% b[-(1 : non.slopes)] # } # else { # if(non.slopes==0) rep(0, n) # else # rep(b[kint], n) # } # } nac <- fit.orig$na.action x <- as.matrix(fit.orig$x) n <- nrow(x) ## Remove model.matrix class for subset operations later attr(x,'class') <- NULL y <- fit.orig$y if(is.factor(y)) y <- unclass(y) if(! is.Surv(y)) y <- as.matrix(y) ## some subjects have multiple records now multi <- ! missing(cluster) if(length(group)) { if(multi || method != 'boot') stop('group is currently allowed only when method="boot" and cluster is not given') if(length(group) > n) { ## Missing observations were deleted during fit if(length(nac)) j <- ! is.na(naresid(nac, y) %*% rep(1, ncol(y))) group <- group[j] } if(length(group) != n) stop('length of group does not match # rows used in fit') group.inds <- split(1 : n, group) # see bootstrap() ngroup <- length(group.inds) } else ngroup <- 0 if(multi) { if(method != 'boot') stop('cluster only implemented for method="boot"') if(length(cluster) > n) { ## Missing observations were deleted during fit if(length(nac)) { j <- ! is.na(naresid(nac, y) %*% rep(1, ncol(y))) cluster <- cluster[j] } } if(length(cluster) != n) stop('length of cluster does not match # rows used in fit') if(any(is.na(cluster))) stop('cluster has NAs') n.orig <- length(unique(cluster)) cl.samp <- split(1 : n, cluster) } else n.orig <- n if(! missing(subset)) { if(length(subset) > n && length(nac)) { j <- ! is.na(naresid(nac, y) %*% rep(1, ncol(y))) subset <- subset[j] } if(length(subset) != n && all(subset >= 0)) stop('length of subset does not match # rows used in fit') if(any(is.na(subset))) stop('subset has NAs') if(! is.logical(subset)) { subset2 <- rep(FALSE, n) subset2[subset] <- TRUE subset <- subset2 subset2 <- NULL } } stra <- fit.orig$strata if(bw) { if(fit.orig$fail) return() cat("\n Backwards Step-down - Original Model\n") fbw <- fastbw(fit.orig, rule=rule, type=type, sls=sls, aics=aics, eps=tol, force=force) if(prmodsel) print(fbw, estimates=estimates) orig.col.kept <- fbw$parms.kept if(! length(orig.col.kept)) stop("no variables kept in original model") ## Check that x.index works if allow.varying.intercepts xcol <- x.index(orig.col.kept, non.slopes) ## Refit subset of predictors on whole sample fit.orig <- fit(x[, xcol, drop=FALSE], y, strata=stra, iter=0, tol=tol, xcol=xcol, ...) } else orig.col.kept <- seq(along=fit.orig$coef) b <- fit.orig$coef xcol <- x.index(orig.col.kept, non.slopes) xb <- Xb(x[, xcol, drop=FALSE], b, non.slopes, n, kint=kint) index.orig <- if(missing(subset)) measure(xb, y, strata=stra, fit=fit.orig, iter=0, evalfit=TRUE, fit.orig=fit.orig, kint=kint, ...) else measure(xb[subset], y[subset,,drop=FALSE], strata=stra[subset], fit=fit.orig, iter=0, evalfit=FALSE, fit.orig=fit.orig, kint=kint, ...) keepinfo <- attr(index.orig, 'keepinfo') test.stat <- double(length(index.orig)) train.stat <- test.stat name <- fparms$Design$name if(bw) varin <- matrix(FALSE, nrow=B, ncol=length(name)) j <- 0 num <- 0 if(method == "crossvalidation") { per.group <- n / B if(per.group < 2) { stop("B > n/2") } sb <- sample(n, replace=FALSE) } ##Cross-val keeps using same random set of indexes, without replacement ntest <- 0 #Used in getting weighted average for .632 estimator if(method == ".632") { ## Must do assignments ahead of time so can weight estimates ## according to representation in bootstrap samples S <- matrix(integer(1), nrow=n, ncol=B) W <- matrix(TRUE, nrow=n, ncol=B) for(i in 1 : B) { S[, i] <- s <- sample(n, replace=TRUE) W[s, i] <- FALSE #now these obs are NOT omitted } nomit <- drop(W %*% rep(1,ncol(W))) #no. boot samples omitting each obs if(min(nomit) == 0) stop("not every observation omitted at least once ", "in bootstrap samples.\nRe--run with larger B") W <- apply(W / nomit, 2, sum) / n if(pr) { cat("\n\nWeights for .632 method (ordinary bootstrap weights ", format(1 / B), ")\n", sep="") print(summary(W)) } } pb <- setPb(B, type=if(method == 'crossvalidation') 'Cross' else 'Boot', onlytk=! pr, every=1*(B < 20) + 5*(B >= 20 & B < 50) + 10*(B >= 50 & B < 100) + 20*(B >= 100 & B < 1000) + 50*(B >= 1000)) for(i in 1 : B) { pb(i) switch(method, crossvalidation = { is <- 1 + round((i - 1) * per.group) ie <- min(n, round(is + per.group - 1)) test <- sb[is : ie] train <- -test }, #cross-val boot = { if(ngroup) { train <- integer(n.orig) for(si in 1 : ngroup) { gi <- group.inds[[si]] lgi <- length(gi) train[gi] <- if(lgi == 1) gi else { ## sample behaves differently when first arg is ## a single integer sample(gi, lgi, replace=TRUE) } } } else { train <- sample(n.orig, replace=TRUE) if(multi) train <- unlist(cl.samp[train]) } test <- 1 : n }, #boot ".632" = { train <- S[, i] test <- -train }, #boot .632 randomization = { train <- sample(n, replace=FALSE) test <- 1 : n } ) #randomization xtrain <- if(method == "randomization") 1 : n else train if(debug) { cat('\nSubscripts of training sample:\n') print(train) cat('\nSubscripts of test sample:\n') print(test) } f <- tryCatch(fit(x[xtrain, , drop=FALSE], y[train, , drop=FALSE], strata=stra[train], iter=i, tol=tol, ...), error=efit) if(! length(f$fail)) f$fail <- FALSE f$assign <- NULL #Some programs put a NULL assign (e.g. ols.val fit) ni <- num.intercepts(f) fail <- f$fail if(! fail) { ## Following if..stop was before f$assign above if(! allow.varying.intercepts && ni != non.slopes) { stop('A training sample has a different number of intercepts (', ni ,')\n', 'than the original model fit (', non.slopes, ').\n', 'You probably fit an ordinal model with sparse cells and a re-sample\n', 'did not select at least one observation for each value of Y.\n', 'Add the argument group=y where y is the response variable.\n', 'This will force balanced sampling on levels of y.') } clf <- attr(f, "class") # class is removed by c() below f[names(fparms)] <- fparms assign <- oassign ## Slopes are shifted to the left when fewer unique values of Y ## occur (especially for orm models) resulting in fewer intercepts if(non.slopes != ni) for(z in 1 : length(assign)) assign[[z]] <- assign[[z]] - (non.slopes - ni) f$assign <- assign attr(f, "class") <- clf if(! bw) { coef <- f$coef col.kept <- seq(along=coef) } else { f <- fastbw(f, rule=rule, type=type, sls=sls, aics=aics, eps=tol, force=force) if(pr && prmodsel) print(f, estimates=estimates) varin[j + 1, f$factors.kept] <- TRUE col.kept <- f$parms.kept if(! length(col.kept)) f <- tryCatch(fit(NULL, y[train,, drop=FALSE], stra=stra[xtrain], iter=i, tol=tol,...), error=efit) else { xcol <- x.index(col.kept, ni) f <- tryCatch(fit(x[xtrain, xcol, drop=FALSE], strata=stra[xtrain], y[train,, drop=FALSE], iter=i, tol=tol, xcol=xcol, ...), error=efit) } if(length(f$fail) && f$fail) fail <- TRUE else coef <- f$coef } } if(! fail) { j <- j + 1 xcol <- x.index(col.kept, ni) xb <- Xb(x[,xcol,drop=FALSE], coef, ni, n, kint=kint) if(missing(subset)) { train.statj <- measure(xb[xtrain], y[train,,drop=FALSE], strata=stra[xtrain], fit=f, iter=i, fit.orig=fit.orig, evalfit=TRUE, kint=kint, ...) test.statj <- measure(xb[test], y[test,,drop=FALSE], strata=stra[test], fit=f, iter=i, fit.orig=fit.orig, evalfit=FALSE, kint=kint, ...) } else { ii <- xtrain if(any(ii < 0)) ii <- (1 : n)[ii] ii <- ii[subset[ii]] train.statj <- measure(xb[ii], y[ii,,drop=FALSE], strata=stra[ii], fit=f, iter=i, fit.orig=fit.orig, evalfit=FALSE, kint=kint, ...) ii <- test if(any(ii < 0)) ii <- (1 : n)[ii] ii <- ii[subset[ii]] test.statj <- measure(xb[ii], y[ii,,drop=FALSE], fit=f, iter=i, strata=stra[ii], fit.orig=fit.orig, evalfit=FALSE, kint=kint, ...) } na <- is.na(train.statj + test.statj) num <- num + ! na if(pr) print(cbind(training=train.statj, test=test.statj)) train.statj[na] <- 0 test.statj[na] <- 0 if(method == ".632") { wt <- W[i] if(any(na)) warning('method=".632" does not properly handle missing summary indexes') } else wt <- 1 train.stat <- train.stat + train.statj test.stat <- test.stat + test.statj * wt ntest <- ntest + 1 } } if(pr) cat("\n\n") if(j != B) cat("\nDivergence or singularity in", B - j, "samples\n") train.stat <- train.stat / num if(method != ".632") { test.stat <- test.stat / num optimism <- train.stat - test.stat } else optimism <- .632 * (index.orig - test.stat) res <- cbind(index.orig=index.orig, training=train.stat, test=test.stat, optimism=optimism, index.corrected=index.orig-optimism, n=num) if(bw) { varin <- varin[1 : j, , drop=FALSE] dimnames(varin) <- list(rep("", j), name) } structure(res, class='validate', kept=if(bw) varin, keepinfo=keepinfo) } rms/R/lrm.s0000644000176200001440000002343013704356012012235 0ustar liggesuserslrm <- function(formula, data=environment(formula), subset, na.action=na.delete, method="lrm.fit", model=FALSE, x=FALSE, y=FALSE, linear.predictors=TRUE, se.fit=FALSE, penalty=0, penalty.matrix, tol=1e-7, strata.penalty=0, var.penalty=c('simple','sandwich'), weights, normwt=FALSE, scale=FALSE, ...) { call <- match.call() var.penalty <- match.arg(var.penalty) callenv <- parent.frame() # don't delay these evaluations weights <- if(! missing(weights)) eval(substitute(weights), data, callenv) subset <- if(! missing(subset )) eval(substitute(subset), data, callenv) data <- modelData(data, formula, weights=weights, subset=subset, na.action=na.action, callenv=callenv) tform <- terms(formula, specials='strat', data=data) nstrata <- 1 if(length(atl <- attr(tform, "term.labels")) && any(atl!=".")) { ##X's present X <- Design(data, formula=formula, specials='strat') atrx <- attributes(X) sformula <- atrx$sformula nact <- atrx$na.action if(method=="model.frame") return(X) Terms <- atrx$terms attr(Terms, "formula") <- formula atr <- atrx$Design mmcolnames <- atr$mmcolnames Y <- model.extract(X, 'response') offs <- atrx$offset if(!length(offs)) offs <- 0 weights <- wt <- model.extract(X, 'weights') if(length(weights)) warning('currently weights are ignored in model validation and bootstrapping lrm fits') if(model) m <- X stra <- attr(tform, 'specials')$strat Strata <- NULL Terms.ns <- Terms if(length(stra)) { temp <- untangle.specials(Terms.ns, 'strat', 1) Terms.ns <- Terms.ns[-temp$terms] attr(Terms, "factors") <- pmin(attr(Terms,"factors"),1) attr(Terms.ns,"factors") <- pmin(attr(Terms.ns,"factors"),1) Strata <- X[[stra]] nstrata <- length(levels(Strata)) } X <- model.matrix(Terms.ns, X) alt <- attr(mmcolnames, 'alt') if(! all(mmcolnames %in% colnames(X)) && length(alt)) mmcolnames <- alt ## prn(colnames(X)); prn(mmcolnames) X <- X[, mmcolnames, drop=FALSE] colnames(X) <- atr$colnames xpres <- length(X) > 0 p <- length(atr$colnames) n <- length(Y) penpres <- !(missing(penalty) && missing(penalty.matrix)) if(penpres && missing(var.penalty)) warning('default for var.penalty has changed to "simple"') if(!penpres) penalty.matrix <- matrix(0,ncol=p,nrow=p) else { if(missing(penalty.matrix)) penalty.matrix <- Penalty.matrix(atr, X) else if(nrow(penalty.matrix)!=p || ncol(penalty.matrix)!=p) stop( paste("penalty.matrix does not have",p,"rows and columns")) psetup <- Penalty.setup(atr, penalty) penalty <- psetup$penalty multiplier <- psetup$multiplier if(length(multiplier)==1) penalty.matrix <- multiplier * penalty.matrix else { a <- diag(sqrt(multiplier)) penalty.matrix <- a %*% penalty.matrix %*% a } } } else { # X <- eval.parent(m) X <- Design(data, formula=formula, specials='strat') offs <- model.offset(X) if(! length(offs)) offs <- 0 Y <- model.extract(X, 'response') Y <- Y[!is.na(Y)] Terms <- X <- NULL xpres <- FALSE penpres <- FALSE penalty.matrix <- NULL } ##Model: y~. without data= -> no predictors if(method == "model.matrix") return(X) if(! is.factor(Y)) Y <- as.vector(Y) # in case Y is a matrix if(nstrata > 1) { if(scale) stop('scale=TRUE not implemented for stratified model') f <- lrm.fit.strat(X, Y, Strata, offset=offs, penalty.matrix=penalty.matrix, strata.penalty=strata.penalty, tol=tol, weights=weights, normwt=normwt, ...) } else { if(existsFunction(method)) { fitter <- getFunction(method) f <- fitter(X, Y, offset=offs, penalty.matrix=penalty.matrix, tol=tol, weights=weights, normwt=normwt, scale=scale, ...) } else stop(paste("unimplemented method:", method)) } if(f$fail) { warning("Unable to fit model using ", dQuote(method)) return(f) } f$call <- NULL if(model) f$model <- m if(x) { f$x <- X f$strata <- Strata } if(y) f$y <- Y nrp <- f$non.slopes if(penpres) { f$penalty <- penalty if(nstrata == 1) { ## Get improved covariance matrix v <- f$var if(var.penalty=='sandwich') f$var.from.info.matrix <- v f.nopenalty <- fitter(X, Y, offset=offs, initial=f$coef, maxit=1, tol=tol, scale=scale) ## info.matrix.unpenalized <- solvet(f.nopenalty$var, tol=tol) info.matrix.unpenalized <- f.nopenalty$info.matrix dag <- diag(info.matrix.unpenalized %*% v) f$effective.df.diagonal <- dag f$var <- if(var.penalty == 'simple') v else v %*% info.matrix.unpenalized %*% v df <- sum(dag[-(1:nrp)]) lr <- f.nopenalty$stats["Model L.R."] pval <- 1 - pchisq(lr, df) f$stats[c('d.f.','Model L.R.','P')] <- c(df, lr, pval) } } ass <- if(xpres) DesignAssign(atr, nrp, Terms) else list() if(xpres) { if(linear.predictors) names(f$linear.predictors) <- names(Y) else f$linear.predictors <- NULL if(se.fit) { if(nstrata > 1) stop('se.fit=T not implemented for strat') xint <- matrix(0, nrow=length(Y), ncol=f$non.slopes) xint[,1] <- 1 X <- cbind(xint, X) se <- drop((((X %*% f$var) * X) %*% rep(1, ncol(X)))^.5) names(se) <- names(Y) f$se.fit <- se } } f <- c(f, list(call=call, Design=if(xpres)atr, scale.pred=c("log odds","Odds Ratio"), terms=Terms, assign=ass, na.action=nact, fail=FALSE, interceptRef=1, nstrata=nstrata, sformula=sformula)) class(f) <- c("lrm","rms","glm") f } print.lrm <- function(x, digits=4, strata.coefs=FALSE, coefs=TRUE, title='Logistic Regression Model', ...) { latex <- prType() == 'latex' z <- list() k <- 0 if(length(x$freq) > 3) { k <- k + 1 z[[k]] <- list(type='print', list(x$freq), title='Frequencies of Responses') } if(length(x$sumwty)) { k <- k + 1 z[[k]] <- list(type='print', list(x$sumwty), title='Sum of Weights by Response Category') } if(!is.null(x$nmiss)) { ## for backward compatibility k <- k + 1 z[[k]] <- list(type='print', list(x$nmiss), title='Frequencies of Missing Values Due to Each Variable') } else if(length(x$na.action)) { k <- k + 1 z[[k]] <- list(type=paste('naprint',class(x$na.action),sep='.'), list(x$na.action)) } ns <- x$non.slopes nstrata <- x$nstrata if(!length(nstrata)) nstrata <- 1 pm <- x$penalty.matrix penaltyFactor <- NULL if(length(pm)) { psc <- if(length(pm)==1) sqrt(pm) else sqrt(diag(pm)) penalty.scale <- c(rep(0,ns),psc) cof <- matrix(x$coef[-(1:ns)], ncol=1) k <- k + 1 z[[k]] <- list(type='print', list(as.data.frame(x$penalty, row.names='')), title='Penalty factors') penaltyFactor <- as.vector(t(cof) %*% pm %*% cof) } ## ?ok to have uncommented next 3 lines? est.exp <- 1:ns if(length(x$est)) est.exp <- c(est.exp, ns + x$est[x$est + ns <= length(x$coefficients)]) vv <- diag(x$var) cof <- x$coef if(strata.coefs) { cof <- c(cof, x$strata.coef) vv <- c(vv, vcov(x)) ## TODO: implement in vcov: ## vv <- c(vv, vcov(x, which='strata.var.diag')) if(length(pm)) penalty.scale <- c(penalty.scale, rep(NA, x$nstrata - 1)) } score.there <- nstrata==1 && (length(x$est) < length(x$coef) - ns) stats <- x$stats maxd <- stats['Max Deriv'] ci <- x$clusterInfo misc <- reListclean(Obs =stats['Obs'], 'Sum of weights'=stats['Sum of Weights'], Strata=if(nstrata > 1) nstrata, 'Cluster on' = ci$name, 'Clusters' = ci$n, 'max |deriv|' = maxd) if(length(x$freq) < 4) { names(x$freq) <- paste(if(latex)'~~' else ' ', names(x$freq), sep='') misc <- c(misc[1], x$freq, misc[-1]) } lr <- reListclean('LR chi2' = stats['Model L.R.'], 'd.f.' = round(stats['d.f.'], 3), 'Pr(> chi2)' = stats['P'], Penalty = penaltyFactor) disc <- reListclean(R2 = stats['R2'], g = stats['g'], gr=stats['gr'], gp = stats['gp'], Brier=stats['Brier']) discr <-reListclean(C = stats['C'], Dxy = stats['Dxy'], gamma = stats['Gamma'], 'tau-a' = stats['Tau-a']) headings <- c('','Model Likelihood\nRatio Test', 'Discrimination\nIndexes', 'Rank Discrim.\nIndexes') data <- list(misc, c(lr, c(2,NA,-4,2)), c(disc,3), c(discr,3)) k <- k + 1 z[[k]] <- list(type='stats', list(headings=headings, data=data)) if(coefs) { k <- k + 1 z[[k]] <- list(type='coefmatrix', list(coef=cof, se=sqrt(vv), aux=if(length(pm)) penalty.scale, auxname='Penalty Scale')) } if(score.there) { q <- (1:length(cof))[-est.exp] if(length(q)==1) vv <- x$var[q,q] else vv <- diag(x$var[q,q]) Z <- x$u[q]/sqrt(vv) stats <- cbind(format(Z,digits=2), format(1-pchisq(Z^2,1),digits=4)) dimnames(stats) <- list(names(cof[q]),c("Score Z","P")) k <- k + 1 z[[k]] <- list(type='print', list(stats)) } prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } rms/R/residuals.ols.s0000644000176200001440000000505312161462344014235 0ustar liggesusersresiduals.ols <- function(object, type=c("ordinary","score","dfbeta","dfbetas","dffit","dffits","hat", "hscore"), ...) { type <- match.arg(type) naa <- object$na.action if(type=="ordinary") return(naresid(naa, object$residuals)) if(!length(object$x))stop("did not specify x=TRUE in fit") X <- cbind(Intercept=1, object$x) if(type=="score") return(naresid(naa, X * object$residuals)) infl <- ols.influence(object) if(type=="hscore") return(naresid(naa, X * (object$residuals / (1 - infl$hat)))) if(type=="dfbeta" | type=="dfbetas") { r <- t(coef(object) - t(coef(infl))) if(type=="dfbetas") r <- sweep(r, 2, diag(object$var)^.5, "/") } else if(type=="dffit") r <- (infl$hat * object$residuals)/(1 - infl$hat) else if(type=="dffits") r <- (infl$hat^.5)*object$residuals / (infl$sigma * (1 - infl$hat)) else if(type=="hat") r <- infl$hat naresid(naa, r) } ## lm.influence used to work but now it re-computes X for unknown ## reasons 24Nov00 ols.influence <- function(lm, x) { GET <- function(x, what) { ## eventually, x[[what, exact=TRUE]] if(is.na(n <- match(what, names(x)))) NULL else x[[n]] } wt <- GET(lm, "weights") ## should really test for < 1/BIG if machine pars available e <- lm$residuals n <- length(e) if(length(wt)) e <- e * sqrt(wt) beta <- lm$coef if(is.matrix(beta)) { beta <- beta[, 1] e <- e[, 1] warning("multivariate response, only first y variable used") } na <- is.na(beta) beta <- beta[!na] p <- GET(lm, "rank") if(!length(p)) p <- sum(!na) R <- lm$qr$qr if(p < max(dim(R))) R <- R[1:p, 1:p] qr <- GET(lm, "qr") if(!length(qr)) { lm.x <- cbind(Intercept=1, GET(lm, "x")) if(length(wt)) lm.x <- lm.x * sqrt(wt) if(any(na)) lm.x <- lm.x[, !na, drop = FALSE] stop('not implemented') # left.solve doesn't exist in R ## Q <- left.solve(R, lm.x) } else { if(length(wt) && any(zero <- wt == 0)) { Q <- matrix(0., n, p) dimnames(Q) <- list(names(e), names(beta)) Q[!zero, ] <- qr.Q(qr)[, 1:p, drop = FALSE] } else { Q <- qr.Q(qr) if(p < ncol(Q)) Q <- Q[, 1:p, drop = FALSE] } } h <- as.vector((Q^2 %*% array(1, c(p, 1)))) h.res <- (1 - h) z <- e/h.res v1 <- e^2 z <- t(Q * z) v.res <- sum(v1) v1 <- (v.res - v1/h.res)/(n - p - 1) ## BKW (2.8) dbeta <- backsolve(R, z) list(coefficients = t(beta - dbeta), sigma = sqrt(v1), hat = h) } rms/R/bplot.s0000644000176200001440000000706313715036004012565 0ustar liggesusersbplot <- function(x, formula, lfun=lattice::levelplot, xlab, ylab, zlab, adj.subtitle=!info$ref.zero, cex.adj=.75, cex.lab=1, perim, showperim=FALSE, zlim=range(yhat, na.rm=TRUE), scales=list(arrows=FALSE), xlabrot, ylabrot, zlabrot=90, ...) { lfunname <- deparse(substitute(lfun)) if(missing(xlabrot)) xlabrot <- switch(lfunname, wireframe=30, contourplot=0, levelplot=0, 0) if(missing(ylabrot)) ylabrot <- switch(lfunname, wireframe=-40, contourplot=90, levelplot=90, 0) info <- attr(x, 'info') varying <- info$varying if(length(varying) < 2) stop('should vary at least two variables') if(missing(formula)) { nx <- varying[1] ny <- varying[2] formula <- paste('yhat ~', nx, '*', ny) if(length(varying) > 2) formula <- paste(formula, '|', paste(varying[-(1:2)], collapse='*')) formula <- as.formula(formula) } else { ter <- attributes(terms(formula)) vars <- ter$term.labels nx <- vars[1] ny <- vars[2] if(!ter$response) formula <- as.formula(paste('yhat', format(formula))) } data <- x yhat <- x$yhat y <- x[[ny]] x <- x[[nx]] at <- info$Design label <- at$label units <- at$units if(missing(xlab)) xlab <- labelPlotmath(label[nx], units[nx]) xlab <- list(label=xlab, rot=xlabrot, cex=cex.lab) if(missing(ylab)) ylab <- labelPlotmath(label[ny], units[ny]) ylab <- list(label=ylab, rot=ylabrot, cex=cex.lab) if(missing(zlab)) zlab <- info$ylabPlotmath zlab <- list(label=zlab, rot=zlabrot, cex=cex.lab) adjust <- info$adjust if(!missing(perim)) { Ylo <- approx(perim[,1], perim[,2], x, ties=mean)$y Yhi <- approx(perim[,1], perim[,3], x, ties=mean)$y Ylo[is.na(Ylo)] <- 1e30 Yhi[is.na(Yhi)] <- -1e30 yhat[y < Ylo] <- NA yhat[y > Yhi] <- NA data$yhat <- yhat } else if(showperim) stop('cannot request showperim without specifying perim') sub <- if(adj.subtitle && length(info$adjust)) list(label=paste('Adjusted to:', info$adjust), cex=cex.adj) else NULL pan <- function(...) { fname <- paste('lattice::panel', gsub('lattice::', '', lfunname), sep='.') f <- eval(parse(text = fname)) do.call(f, list(...)) if(showperim) { lattice::llines(perim[,'x'], perim[,'ymin'], col=gray(.85)) lattice::llines(perim[,'x'], perim[,'ymax'], col=gray(.85)) } } lfun(formula, panel=pan, scales=scales, zlim=zlim, ..., data=data, xlab=xlab, ylab=ylab, zlab=zlab, sub=sub) } perimeter <- function(x, y, xinc=diff(range(x))/10, n=10, lowess.=TRUE) { s <- !is.na(x + y) x <- x[s] y <- y[s] m <- length(x) if(m < n) stop("number of non-NA x must be >= n") i <- order(x) x <- x[i] y <- y[i] s <- n:(m-n+1) x <- x[s] y <- y[s] x <- round(x/xinc)*xinc g <- function(y, n) { y <- sort(y) m <- length(y) if(n > (m - n + 1)) c(NA, NA) else c(y[n], y[m-n+1]) } r <- unlist(tapply(y, x, g, n=n)) i <- seq(1, length(r), by=2) rlo <- r[i] rhi <- r[-i] s <- !is.na(rlo + rhi) if(!any(s)) stop("no intervals had sufficient y observations") x <- sort(unique(x))[s] rlo <- rlo[s] rhi <- rhi[s] if(lowess.) { rlo <- lowess(x, rlo)$y rhi <- lowess(x, rhi)$y } structure(cbind(x, rlo, rhi), dimnames=list(NULL, c("x","ymin","ymax")), class='perimeter') } rms/R/contrast.s0000644000176200001440000002736014006114244013301 0ustar liggesuserscontrast <- function(fit, ...) UseMethod("contrast") contrast.rms <- function(fit, a, b, a2, b2, ycut=NULL, cnames=NULL, fun=NULL, funint=TRUE, type=c('individual','average','joint'), conf.type=c('individual','simultaneous'), usebootcoef=TRUE, boot.type=c('percentile','bca','basic'), posterior.summary=c('mean', 'median', 'mode'), weights='equal', conf.int=0.95, tol=1e-7, expand=TRUE, ...) { type <- match.arg(type) conf.type <- match.arg(conf.type) boot.type <- match.arg(boot.type) posterior.summary <- match.arg(posterior.summary) draws <- fit$draws bayes <- length(draws) > 0 if(bayes & (type == 'joint' || conf.type == 'simultaneous')) stop('type=joint or conf.type=simultaneous not allowed for Bayesian models') zcrit <- if(length(idf <- fit$df.residual)) qt((1 + conf.int) / 2, idf) else qnorm((1 + conf.int) / 2) bcoef <- if(usebootcoef) fit$boot.Coef pmode <- function(x) { dens <- density(x) dens$x[which.max(dens$y)[1]] } if(! bayes) betas <- coef(fit) fite <- fit if(inherits(fit, 'orm')) { nrp <- 1 ## Note: is 1 for orm because vcov defaults to intercepts='mid' w <- c(fit$interceptRef, (num.intercepts(fit) + 1) : length(betas)) betas <- betas[w] fite$coefficients <- betas # for simult confint if(usebootcoef) bcoef <- bcoef[, w, drop=FALSE] } else nrp <- num.intercepts(fit, 'var') if(length(bcoef) && conf.type != 'simultaneous') conf.type <- switch(boot.type, percentile = 'bootstrap nonparametric percentile', bca = 'bootstrap BCa', basic = 'basic bootstrap') partialpo <- inherits(fit, 'blrm') && fit$pppo > 0 if(partialpo & ! length(ycut)) stop('must specify ycut for partial prop. odds model') cppo <- fit$cppo if(partialpo && ! length(cppo)) stop('only implemented for constrained partial PO models') pred <- function(d) { ## predict.blrm duplicates rows of design matrix for partial PO models ## if ycut has length > 1 and only one observation is being predicted if(partialpo) predict(fit, d, type='x', ycut=ycut) else predict(fit, d, type='x') } da <- do.call('gendata', list(fit, factors=a, expand=expand)) xa <- pred(da) if(! missing(b)) { db <- do.call('gendata', list(fit, factors=b, expand=expand)) xb <- pred(db) } ma <- nrow(xa) if(missing(b)) { xb <- 0 * xa db <- da } mb <- nrow(xb) if(! missing(a2)) { if(missing(b) || missing(b2)) stop('b and b2 must be given if a2 is given') da2 <- do.call('gendata', list(fit, factors=a2, expand=expand)) xa2 <- pred(da2) ma2 <- nrow(xa2) db2 <- do.call('gendata', list(fit, factors=b2, expand=expand)) xb2 <- pred(db2) mb2 <- nrow(xb2) } allsame <- function(x) diff(range(x)) == 0 vary <- NULL mall <- c(ma, mb) ncols <- c(ncol(da), ncol(db)) if(! missing(a2)) { mall <- c(mall, ma2, mb2) ncols <- c(ncols, ncol(da2), ncol(db2)) } if(allsame(mall) && ! allsame(ncols)) stop('program logic error') if(any(sort(names(da)) != sort(names(db)))) stop('program logic error') if(! missing(a2) && (any(sort(names(da)) != sort(names(da2))) || any(sort(names(da)) != sort(names(db2))))) stop('program logic error') if(type != 'average' && ! length(cnames)) { ## If all lists have same length, label contrasts by any variable ## that has the same length and values in all lists k <- integer(0) nam <- names(da) for(j in 1 : length(da)) { w <- nam[j] eq <- all(as.character(da[[w]]) == as.character(db[[w]])) if(! missing(a2)) eq <- eq & all(as.character(da[[w]]) == as.character(da2[[w]])) & all(as.character(da[[2]]) == as.character(db2[[w]])) if(eq) k <- c(k, j) } if(length(k)) vary <- da[k] } else if(max(mall) > 1) { ## Label contrasts by values of longest variable in list if ## it has the same length as the expanded design matrix d <- if(ma > 1) a else b if(! missing(a2) && (max(ma2, mb2) > max(ma, mb))) d <- if(ma2 > 1) a2 else b2 l <- sapply(d, length) vary <- if(sum(l == max(mall)) == 1) d[l == max(mall)] } if(sum(mall > 1) > 1 && ! allsame(mall[mall > 1])) stop('lists of settings with more than one row must all have the same # rows') mm <- max(mall) if(mm > 1 && any(mall == 1)) { if(ma == 1) xa <- matrix(xa, nrow=mm, ncol=ncol(xa), byrow=TRUE) if(mb == 1) xb <- matrix(xb, nrow=mm, ncol=ncol(xb), byrow=TRUE) if(! missing(a2)) { if(ma2 == 1) xa2 <- matrix(xa2, nrow=mm, ncol=ncol(xa2), byrow=TRUE) if(mb2 == 1) xb2 <- matrix(xb2, nrow=mm, ncol=ncol(xb2), byrow=TRUE) } } if(bayes && length(fun) && inherits(fit, 'blrm')) { if(! missing(a2)) stop('fun= is only implemented for blrm fits') if(missing(b)) stop('b must be specified when fun= is given') if(!missing(ycut)) stop('ycut not used with fun=') pa <- predict(fit, da, fun=fun, funint=funint, posterior.summary='all') pb <- predict(fit, db, fun=fun, funint=funint, posterior.summary='all') if(length(cnames)) colnames(pa) <- colnames(pb) <- cnames # If fun has an intercepts argument, the intecept vector must be # updated for each draw if(! length(cnames)) cnames <- if(length(vary)) rep('', ncol(pa)) else as.character(1 : ncol(pa)) colnames(pa) <- colnames(pb) <- cnames res <- list(esta=pa, estb=pb, Xa=xa, Xb=xb, nvary=length(vary)) return(structure(res, class='contrast.rms')) } # end if bayes & length(fun) ... X <- xa - xb if(! missing(a2)) X <- X - (xa2 - xb2) m <- nrow(X) if(nrp > 0) X <- cbind(matrix(0., nrow=m, ncol=nrp), X) if(is.character(weights)) { if(weights != 'equal') stop('weights must be "equal" or a numeric vector') weights <- rep(1, m) } else if(length(weights) > 1 && type != 'average') stop('can specify more than one weight only for type="average"') else if(length(weights) != m) stop(paste('there must be', m, 'weights')) weights <- as.vector(weights) if(m > 1 && type=='average') X <- matrix(apply(weights*X, 2, sum) / sum(weights), nrow=1, dimnames=list(NULL, dimnames(X)[[2]])) cdraws <- NULL if(bayes) { cdraws <- draws %*% t(X) if(length(cnames)) colnames(cdraws) <- cnames v <- var(cdraws) ndf <- if(is.matrix(v)) nrow(v) else 1 ci <- apply(cdraws, 2, rmsb::HPDint, prob=conf.int) lower <- ci[1, ] upper <- ci[2, ] PP <- apply(cdraws, 2, function(u) mean(u > 0)) se <- apply(cdraws, 2, sd) est <- switch(posterior.summary, mode = apply(cdraws, 2, pmode), mean = colMeans(cdraws), median = apply(cdraws, 2, median)) P <- Z <- NULL } else { est <- matxv(X, betas) v <- X %*% vcov(fit, regcoef.only=TRUE) %*% t(X) ndf <- if(is.matrix(v)) nrow(v) else 1 se <- as.vector(if(ndf == 1) sqrt(v) else sqrt(diag(v))) Z <- est / se P <- if(length(idf)) 2 * pt(- abs(Z), idf) else 2 * pnorm(- abs(Z)) if(conf.type != 'simultaneous') { if(length(bcoef)) { best <- t(matxv(X, bcoef, bmat=TRUE)) lim <- bootBCa(est, best, type=boot.type, n=nobs(fit), seed=fit$seed, conf.int=conf.int) if(is.matrix(lim)) { lower <- lim[1,] upper <- lim[2,] } else { lower <- lim[1] upper <- lim[2] } } else { lower <- est - zcrit*se upper <- est + zcrit*se } } else { u <- confint(multcomp::glht(fite, X, df=if(length(idf)) idf else 0), level=conf.int)$confint lower <- u[,'lwr'] upper <- u[,'upr'] } PP <- NULL; posterior.summary='' } if(type != 'average' && length(ycut)) cnames <- paste0(cnames, ' ', fit$yname, '=', ycut) res <- list(Contrast=est, SE=se, Lower=lower, Upper=upper, Z=Z, Pvalue=P, PP=PP, var=v, df.residual=idf, X=X, ycut=ycut, yname=if(length(ycut)) fit$yname, cnames=if(type=='average') NULL else cnames, nvary=length(vary), conf.type=conf.type, conf.int=conf.int, posterior.summary=posterior.summary, cdraws = cdraws) if(type != 'average') res <- c(vary, res) r <- qr(v, tol=tol) nonred <- r$pivot[1 : r$rank] # non-redundant contrasts redundant <- (1 : length(est)) %nin% nonred res$redundant <- redundant if(type=='joint') { est <- est[! redundant] v <- v[! redundant, ! redundant, drop=FALSE] res$jointstat <- as.vector(est %*% solve(v, tol=tol) %*% est) } structure(res, class='contrast.rms') } print.contrast.rms <- function(x, X=FALSE, fun=function(u) u, jointonly=FALSE, prob=0.95, ...) { # See if a result of fun= on a Bayesian fit if('esta' %in% names(x)) { esta <- x$esta estb <- x$estb f <- function(x) { hpd <- rmsb::HPDint(x, prob) r <- c(mean(x), median(x), hpd) names(r) <- c('Posterior Mean', 'Posterior Median', paste(c('Lower', 'Upper'), prob, 'HPD')) r } cat('\nPosterior Summaries for First X Settings\n\n') print(t(apply(esta, 2, f))) cat('\nPosterior Summaries for Second X Settings\n\n') print(t(apply(estb, 2, f))) cat('\nPosterior Summaries of First - Second\n\n') print(t(apply(esta - estb, 2, f))) return(invisible()) } edf <- x$df.residual sn <- if(length(edf)) 't' else 'Z' pn <- if(length(edf)) 'Pr(>|t|)' else 'Pr(>|z|)' w <- x[1 : (x$nvary + 7)] isn <- sapply(w, is.null) w <- w[! isn] if(length(w$Z)) w$Z <- round(w$Z, 2) if(length(w$Pvalue)) w$Pvalue <- round(w$Pvalue, 4) if(length(w$PP)) w$PP <- round(w$PP, 4) if(length(w$PP)) pn <- 'Pr(Contrast>0)' no <- names(w) no[no=='SE'] <- 'S.E.' no[no=='Z'] <- sn no[no %in% c('Pvalue', 'PP')] <- pn cnames <- x$cnames if(! length(cnames)) cnames <- if(x$nvary) rep('', length(x[[1]])) else as.character(1 : length(x[[1]])) if(any(x$redundant)) cnames <- paste(ifelse(x$redundant, '*', ' '), cnames) w <- data.frame(w, row.names=paste(format(1:length(cnames)), cnames, sep='')) if(length(x$y)) { w$.y. <- x$y names(w)[names(w) == '.y.'] <- x$yname } w$Contrast <- fun(w$Contrast) if(! all(1:10 == fun(1:10))) w$SE <- rep(NA, length(w$SE)) w$Lower <- fun(w$Lower) w$Upper <- fun(w$Upper) # Assign modified names to w names(w) <- no # Print w if(!jointonly) { ## print(as.matrix(w), quote=FALSE) print(w, ...) if(any(x$redundant)) cat('\nRedundant contrasts are denoted by *\n') } jstat <- x$jointstat if(length(jstat)) { cat('\nJoint test for all contrasts=0:\n\n') ndf <- sum(!x$redundant) if(length(edf)) { Fstat <- jstat / ndf Pval <- 1 - pf(Fstat, ndf, edf) cat('F(', ndf, ',', edf, ')=', round(Fstat,3),', P=', round(Pval,4), '\n', sep='') } else { Pval <- 1 - pchisq(jstat, ndf) cat('Chi-square=', round(jstat, 2),' with ', ndf, ' d.f. P=', round(Pval, 4),'\n', sep='') } } if(!jointonly && length(edf))cat('\nError d.f.=',edf,'\n') if(x$posterior.summary == '') cat('\nConfidence intervals are', x$conf.int, x$conf.type, 'intervals\n') else { cat('\nIntervals are', x$conf.int, 'highest posterior density intervals\n') cat('Contrast is the posterior', x$posterior.summary, '\n') } if(X) { cat('\nDesign Matrix for Contrasts\n\n') if(is.matrix(x$X)) dimnames(x$X) <- list(cnames, dimnames(x$X)[[2]]) print(x$X) } invisible() } rms/R/orm.s0000644000176200001440000004265213760500204012243 0ustar liggesusersorm <- function(formula, data=environment(formula), subset, na.action=na.delete, method="orm.fit", model=FALSE, x=FALSE, y=FALSE, linear.predictors=TRUE, se.fit=FALSE, penalty=0, penalty.matrix, tol=1e-7, eps=0.005, var.penalty=c('simple','sandwich'), scale=FALSE, ...) { call <- match.call() var.penalty <- match.arg(var.penalty) if(!missing(penalty) || !missing(penalty.matrix)) stop('penalty not yet implemented') nact <- NULL tform <- terms(formula, data=data) if(! missing(data) || ( length(atl <- attr(tform,"term.labels")) && any(atl!="."))) { ##X's present callenv <- parent.frame() # don't delay these evaluations subset <- if(! missing(subset )) eval(substitute(subset), data, callenv) X <- modelData(data, formula, subset = subset, na.action=na.action, callenv=callenv) X <- Design(X, formula=formula) atrx <- attributes(X) sformula <- atrx$sformula nact <- atrx$na.action if(method == "model.frame") return(X) Terms <- atrx$terms attr(Terms, "formula") <- formula atr <- atrx$Design mmcolnames <- atr$mmcolnames Y <- model.extract(X, 'response') offs <- atrx$offset if(!length(offs)) offs <- 0 if(model) m <- X X <- model.matrix(sformula, X) alt <- attr(mmcolnames, 'alt') if(! all(mmcolnames %in% colnames(X)) && length(alt)) mmcolnames <- alt X <- X[, mmcolnames, drop=FALSE] colnames(X) <- atr$colnames xpres <- length(X) > 0 p <- length(atr$colnames) n <- length(Y) penpres <- !(missing(penalty) && missing(penalty.matrix)) if(penpres && missing(var.penalty)) warning('default for var.penalty has changed to "simple"') if(!penpres) penalty.matrix <- matrix(0,ncol=p,nrow=p) else { if(missing(penalty.matrix)) penalty.matrix <- Penalty.matrix(atr, X) else if(nrow(penalty.matrix) != p || ncol(penalty.matrix) != p) stop( paste("penalty.matrix does not have",p,"rows and columns")) psetup <- Penalty.setup(atr, penalty) penalty <- psetup$penalty multiplier <- psetup$multiplier if(length(multiplier)==1) penalty.matrix <- multiplier*penalty.matrix else { a <- diag(sqrt(multiplier)) penalty.matrix <- a %*% penalty.matrix %*% a } } } else { X <- eval.parent(m) offs <- model.offset(X) if(!length(offs)) offs <- 0 Y <- model.extract(X, 'response') Y <- Y[!is.na(Y)] Terms <- X <- NULL xpres <- FALSE penpres <- FALSE penalty.matrix <- NULL } ##Model: y~. without data= -> no predictors if(method=="model.matrix") return(X) if(existsFunction(method)) { fitter <- getFunction(method) f <- fitter(X, Y, offset=offs, penalty.matrix=penalty.matrix, tol=tol, eps=eps, scale=scale, ...) } else stop(paste("unimplemented method:", method)) if(f$fail) { cat("Unable to fit model using ", dQuote(method), '\n') return(f) } f$call <- NULL f$sformula <- sformula if(model) f$model <- m if(x) f$x <- X if(y) f$y <- Y nrp <- f$non.slopes if(penpres) { f$penalty <- penalty ## Get improved covariance matrix v <- f$var if(var.penalty == 'sandwich') f$var.from.info.matrix <- v f.nopenalty <- fitter(X, Y, offset=offs, initial=f$coef, maxit=1, tol=tol) ## info.matrix.unpenalized <- solvet(f.nopenalty$var, tol=tol) info.matrix.unpenalized <- f.nopenalty$info.matrix dag <- diag(info.matrix.unpenalized %*% v) f$effective.df.diagonal <- dag f$var <- if(var.penalty == 'simple') v else v %*% info.matrix.unpenalized %*% v df <- sum(dag[-(1:nrp)]) lr <- f.nopenalty$stats["Model L.R."] pval <- 1 - pchisq(lr, df) f$stats[c('d.f.','Model L.R.','P')] <- c(df, lr, pval) } ass <- if(xpres) DesignAssign(atr, nrp, Terms) else list() if(xpres) { if(linear.predictors) names(f$linear.predictors) <- names(Y) else f$linear.predictors <- NULL if(se.fit) { X <- cbind(1, X) se <- drop((((X %*% f$var) * X) %*% rep(1, ncol(X)))^.5) names(se) <- names(Y) f$se.fit <- se } } f <- c(f, list(call=call, Design=if(xpres)atr, scale.pred=if(f$family=='logistic') c("log odds","Odds Ratio") else if(f$family=='loglog') c("log hazard", "Hazard Ratio"), terms=Terms, assign=ass, na.action=nact)) class(f) <- c("orm","rms") f } print.orm <- function(x, digits=4, coefs=TRUE, intercepts=x$non.slopes < 10, title, ...) { if(missing(title)) { title <- switch(x$family, logistic = 'Logistic (Proportional Odds)', probit = 'Probit', cauchit = 'Cauchy', loglog = '-log-log', cloglog = 'Complementary log-log') title <- paste(title, 'Ordinal Regression Model') } z <- list() k <- 0 lf <- length(x$freq) if(lf > 3 && lf <= 20) { k <- k + 1 z[[k]] <- list(type='print', list(x$freq), title='Frequencies of Responses') } if(length(x$nmiss)) { ## for backward compatibility k <- k + 1 z[[k]] <- list(type='print', list(x$nmiss), title='Frequencies of Missing Values Due to Each Variable') } else if(length(x$na.action)) { k <- k + 1 z[[k]] <- list(type=paste('naprint', class(x$na.action), sep='.'), list(x$na.action)) } ns <- x$non.slopes ## coefficient intercepts kept: (fit.mult.impute) cik <- attr(x$coef, 'intercepts') # esp. for fit.mult.impute if(length(cik) && intercepts) { warning('intercepts=TRUE not implemented for fit.mult.impute objects') intercepts <- FALSE } pm <- x$penalty.matrix penaltyFactor <- NULL if(length(pm)) { psc <- if(length(pm) == 1) sqrt(pm) else sqrt(diag(pm)) penalty.scale <- c(rep(0, ns), psc) cof <- matrix(x$coef[-(1 : ns)], ncol=1) ## This logic does not handle fit.mult.impute objects k <- k + 1 z[[k]] <- list(type='print', list(as.data.frame(x$penalty, row.names='')), title='Penalty factors') penaltyFactor <- as.vector(t(cof) %*% pm %*% cof) } vv <- diag(vcov(x, intercepts=if(intercepts) 'all' else 'none')) if(!intercepts) { nints <- if(!length(cik)) ns else { if(length(cik) == 1 && cik ==0) 0 else length(cik) } ints.to.delete <- if(ns == 0 || nints == 0) integer(0) else 1:nints vv <- c(rep(NA, nints), vv) } cof <- x$coef stats <- x$stats maxd <- stats['Max Deriv'] ci <- x$clusterInfo misc <- reListclean(Obs = stats['Obs'], 'Distinct Y' = stats['Distinct Y'], 'Cluster on' = ci$name, Clusters = ci$n, 'Median Y' = stats['Median Y'], 'max |deriv|' = maxd) if(length(x$freq) < 4) { names(x$freq) <- paste(if(prType() == 'latex') '~~' else ' ', names(x$freq), sep='') misc <- c(misc[1], x$freq, misc[-1]) } lr <- reListclean('LR chi2' = stats['Model L.R.'], 'd.f.' = round(stats['d.f.'],3), 'Pr(> chi2)' = stats['P'], 'Score chi2' = stats['Score'], 'Pr(> chi2)' = stats['Score P'], Penalty = penaltyFactor) disc <- reListclean(R2=stats['R2'], g=stats['g'], gr=stats['gr'], '|Pr(Y>=median)-0.5|'=stats['pdm']) discr <-reListclean(rho=stats['rho']) headings <- c('', 'Model Likelihood\nRatio Test', 'Discrimination\n Indexes', 'Rank Discrim.\nIndexes') data <- list(misc, c(lr, c(2,NA,-4,2,-4,2)), c(disc,3), c(discr,3)) k <- k + 1 z[[k]] <- list(type='stats', list(headings=headings, data=data)) if(coefs) { k <- k + 1 if(!intercepts) { j <- - ints.to.delete cof <- cof[j] vv <- vv[j] if(length(pm)) penalty.scale <- penalty.scale[j] } z[[k]] <- list(type='coefmatrix', list(coef=cof, se=sqrt(vv), aux=if(length(pm)) penalty.scale, auxname='Penalty Scale')) } prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } Mean.orm <- function(object, codes=FALSE, ...) Mean.lrm(object, codes=codes, ...) Quantile.orm <- function(object, codes=FALSE, ...) { ns <- object$non.slopes if(ns < 2) stop('using this function only makes sense for >2 ordered response categories') if(codes) vals <- 1:length(object$freq) else { vals <- object$yunique if(!length(vals)) vals <- names(object$freq) vals <- as.numeric(vals) if(any(is.na(vals))) stop('values of response levels must be numeric for codes=FALSE') } f <- function(q=numeric(0), lp=numeric(0), X=numeric(0), intercepts=numeric(0), slopes=numeric(0), info=numeric(0), values=numeric(0), interceptRef=integer(0), trans=trans, conf.int=0, method=c('interpolated', 'discrete')) { inverse <- trans$inverse cumprob <- trans$cumprob deriv <- trans$deriv ns <- length(intercepts) method <- match.arg(method) lp <- if(length(lp)) lp - intercepts[interceptRef] else matxv(X, slopes) lb <- matrix(sapply(intercepts, '+', lp), ncol = ns) if(method == 'interpolated'){ m.yvals <- matrix(NA, nrow = nrow(lb), ncol = ns + 2) cp <- cbind(cumprob(lb), 0) for(j in 1:nrow(lb)){ ws <- c(0, (cp[j, -ns-1] - cp[j, 1]) / (cp[j, ns] - cp[j, 1]), 1) m.yvals[j,] <- (1 - ws) * c(values[1], values) + ws * c(values, values[ns + 1]) } z <- sapply(1:nrow(lb), function(i) approx(c(1, cp[i,]), m.yvals[i,], xout = cumprob(inverse(1 - q)), rule = 2)$y) } if(method == 'discrete'){ m.cdf <- cbind(1 - cumprob(lb), 1) id <- apply(m.cdf, 1, FUN=function(x) {min(which(x >= q))[1]}) z <- values[id] } names(z) <- names(lp) if(conf.int) { if(! length(X)) stop('when conf.int > 0 must provide X') lb.se <- matrix(NA, ncol = ns, nrow = nrow(X)) info.inverse <- as.matrix(solve(info)) idx <- which(names(c(intercepts, slopes)) %in% colnames(X)) dlb.dtheta <- as.matrix(cbind(1, X)) for(i in 1:ns){ v.i <- info.inverse[c(i, idx), c(i, idx)] lb.se[, i] <- sqrt(diag(dlb.dtheta %*% v.i %*% t(dlb.dtheta))) } w <- qnorm((1 + conf.int) / 2) ci.ub <- matrix(sapply(1:ns, FUN=function(i) {1 - cumprob(lb[, i] - w * lb.se[, i])}), ncol = ns) ci.lb <- matrix(sapply(1:ns, FUN=function(i) {1 - cumprob(lb[, i] + w * lb.se[, i])}), ncol = ns) if(method == 'interpolated'){ z.ub <- sapply(1:nrow(lb), function(i) approx(c(1, 1 - ci.lb[i,], 0), m.yvals[i,], xout = cumprob(inverse(1 - q)), rule = 2)$y) z.lb <- sapply(1:nrow(lb), function(i) approx(c(1, 1 - ci.ub[i,], 0), m.yvals[i,], xout = cumprob(inverse(1 - q)), rule = 2)$y) } if(method == 'discrete'){ id <- apply(cbind(ci.lb, 1), 1, FUN=function(x) {min(which(x >= q))[1]}) z.ub <- values[id] id <- apply(cbind(ci.ub, 1), 1, FUN=function(x) {min(which(x >= q))[1]}) z.lb <- values[id] } attr(z, 'limits') <- list(lower = z.lb, upper = z.ub) } z } ## Re-write first derivative so that it doesn't need the f argument if(object$family == "logistic") object$trans$deriv <- function(x) {p <- plogis(x); p * (1. - p)} trans <- object$trans if(! length(trans)) trans <- probabilityFamilies$logistic ir <- object$interceptRef if(! length(ir)) ir <- 1 formals(f) <- list(q=numeric(0), lp=numeric(0), X=numeric(0), intercepts=object$coef[1:ns], slopes=object$coef[-(1 : ns)], info=object$info.matrix, values=vals, interceptRef=ir, trans=trans, conf.int=0, method=c('interpolated', 'discrete')) f } ExProb <- function(object, ...) UseMethod("ExProb") ExProb.orm <- function(object, codes=FALSE, ...) { ns <- object$non.slopes if(codes) vals <- 1:length(object$freq) else { vals <- as.numeric(object$yunique) if(any(is.na(vals))) stop('values of response levels must be numeric for codes=FALSE') } f <- function(lp=numeric(0), X=numeric(0), y=NULL, intercepts=numeric(0), slopes=numeric(0), info=numeric(0), values=numeric(0), interceptRef=integer(0), trans=trans, yname=NULL, conf.int=0) { cumprob <- trans$cumprob lp <- if(length(lp)) lp - intercepts[interceptRef] else matxv(X, slopes) prob <- cumprob(sapply(c(1e30, intercepts), '+', lp)) dim(prob) <- c(length(lp), length(values)) if(! length(y)) { colnames(prob) <- paste('Prob(Y>=', values, ')', sep='') y <- values result <- structure(list(y=values, prob=prob, yname=yname), class='ExProb') } else { p <- t(apply(prob, 1, function(probs) { pa <- approx(values, probs, xout=y, f=1, method='constant')$y pa[y < min(values)] <- 1. pa[y > max(values)] <- 0. pa } ) ) if(length(y) > 1) { colnames(p) <- paste('Prob(Y>=', y, ')', sep='') p <- list(y=y, prob=p) } result <- structure(drop(p), class='ExProb') } if(conf.int){ if(! length(X)) stop('must specify X if conf.int > 0') index <- sapply(y, FUN=function(x) {if(x <= min(values)) result <- 1 else if(x >= max(values)) result <- length(values) else which(x <= values)[1] - 1}) info.inverse <- as.matrix(solve(info)) idx <- which(names(c(intercepts, slopes)) %in% colnames(X)) dlb.dtheta <- as.matrix(cbind(1, X)) lb.se <- sapply(1:length(y), function(i) {diag(dlb.dtheta %*% info.inverse[c(index[i], idx), c(index[i], idx)] %*% t(dlb.dtheta)) }) lb.se <- matrix(sqrt(lb.se), ncol = length(y)) m.alpha <- c(intercepts, slopes)[index] lb <- matrix(sapply(m.alpha, '+', lp), ncol = length(y)) ci.ub <- matrix(sapply(1:length(y), FUN=function(i) {cumprob(lb[, i] + qnorm((1 + conf.int) / 2) * lb.se[, i])}), ncol = length(y)) ci.lb <- matrix(sapply(1:length(y), FUN=function(i) {cumprob(lb[, i] - qnorm((1 + conf.int) / 2) * lb.se[, i])}), ncol = length(y)) ci.ub[, which(y <= min(values))] <- ci.lb[, which(y <= min(values))] <- 1 ci.ub[, which(y >= max(values))] <- ci.lb[, which(y >= max(values))] <- 0 if(length(y) > 1) colnames(ci.ub) <- colnames(ci.lb) <- colnames(result$prob) attr(result, 'limits') <- list(lower = ci.lb, upper = ci.ub) } result } trans <- object$trans formals(f) <- list(lp=numeric(0), X=numeric(0), y=NULL, intercepts=object$coef[1:ns], slopes=object$coef[-(1 : ns)], info=object$info.matrix, values=vals, interceptRef=object$interceptRef, trans=trans, yname=all.vars(object$terms)[1], conf.int=0) f } plot.ExProb <- function(x, ..., data=NULL, xlim=NULL, xlab=x$yname, ylab=expression(Prob(Y>=y)), col=par('col'), col.vert='gray85', pch=20, pch.data=21, lwd=par('lwd'), lwd.data=lwd, lty.data=2, key=TRUE) { xlab <- xlab Y <- x[[2]] x <- x[[1]] if(!length(xlim)) xlim <- range(x) plot(0, 0, xlim=xlim, ylim=c(0,1), type='n', xlab=xlab, ylab=ylab, ...) if(!is.matrix(Y)) Y <- matrix(Y, nrow=1) xpts <- ypts <- numeric(0) for(i in 1:nrow(Y)) { y <- Y[i,] segments(x, y, x, c(y[-1], 0), col=col.vert) segments(x[-length(x)], y[-1], x[-1], y[-1], col=i, lwd=lwd) points(x, y, pch=pch, col=i) if(length(data)) { xpts <- c(xpts, x); ypts <- c(ypts, y) } } if(!length(data)) return(invisible()) if(!is.list(data)) { groups <- rep(' ', length(data)) Y <- nomiss(data) } else { if(!is.list(data) || length(data) < 2) stop('inappropriate data=') data <- nomiss(data) groups <- data[[1]] Y <- data[[2]] } i <- 0 for(g in unique(groups)) { i <- i + 1 s <- groups == g y <- nomiss(Y[s]) x <- sort(unique(y)) f <- c(1., 1. - cumsum(table(y)) / length(y)) if(x[1] > min(Y)) { x <- c(min(Y), x) f <- c(1., f) } y <- f[-length(f)] segments(x, y, x, c(y[-1], 0), col=col.vert, lty=lty.data) segments(x[-length(x)], y[-1], x[-1], y[-1], col=i, lty=lty.data, lwd=lwd.data) points(x, y, pch=pch.data, col=i) xpts <- c(xpts, x); ypts <- c(ypts, y) } if(key && is.list(data)) putKeyEmpty(xpts, ypts, labels=unique(groups), col=1:i, xlim=xlim, grid=FALSE) invisible() } rms/R/Function.rms.s0000644000176200001440000003210513662732631014037 0ustar liggesusersFunction.rms <- function(object, intercept=NULL, digits=max(8,.Options$digits), posterior.summary=c('mean', 'median', 'mode'), ...) { posterior.summary <- match.arg(posterior.summary) oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) at <- object$Design name <- at$name ac <- at$assume.code p <- length(name) nrp <- num.intercepts(object) name.main <- name[ac!=9] #non-intercepts pm <- length(name.main) adj.to <- Getlim(at, allow.null=TRUE, need.all=TRUE)$limits['Adjust to',] draws <- object$draws # uses coef.rmsb if Bayesian Coef <- if(length(draws)) coef(object, stat=posterior.summary) else object$coef chr <- function(y, digits) if(is.factor(y) || is.character(y)) paste('"',as.character(y),'"',sep='') else formatSep(y, digits) adj.to <- unlist(lapply(adj.to,chr,digits=digits)) z <- paste('function(',paste(name.main,'=',adj.to,collapse=','), ') {', sep='') ##f$term.labels does not include strat TL <- attr(terms(object),"term.labels") ##Get inner transformations ##from <- c("asis","pol","lsp","rcs","catg","scored","strat","matrx","I") ##from <- paste(from,"(\\(.*\\))",sep="") from <- c('asis(*)','pol(*)','lsp(*)','rcs(*)','catg(*)','scored(*)', 'strat(*)','matrx(*)','I(*)') to <- rep('*',9) ##trans <- paste("h(",translate(TL[ac!=9], from, "\\1"),")",sep="") trans <- paste("h(",sedit(TL[ac!=9], from, to),")",sep="") ##change wrapping function to h() h <- function(x,...) deparse(substitute(x)) for(i in (1:pm)) trans[i] <- eval(parse(text=trans[i])) j <- trans != name.main if(any(j)) z <- paste(z, paste(name.main[j],'<-',trans[j],collapse=';'), ';',sep='') interaction <- at$interactions if(length(interaction) == 0) interaction <- 0 parms <- at$parms Two.Way <- function(prm,Nam,nam.coef,cof,coef,f,varnames,at,digits) { i1 <- prm[1,1]; i2 <- prm[2,1] num.nl <- any(prm[1,-1] != 0)+any(prm[2,-1] != 0) ##If single factor with nonlinear terms, get it as second factor ##Otherwise, put factor with most # terms as second factor rev <- FALSE if((num.nl==1 & any(prm[1,-1] != 0)) || (length(Nam[[i1]]) > length(Nam[[i2]]))) { i1 <- i2; i2 <- prm[1,1]; rev <- TRUE } N1 <- Nam[[i1]]; N2 <- Nam[[i2]] n1 <- nam.coef[[i1]]; n2 <- nam.coef[[i2]] v <- "" for(j1 in 1:length(N1)) { nam1 <- nam.coef[[i1]][j1] lN2 <- length(N2) cnam <- if(rev) paste(nam.coef[[i2]],"*",nam1) else paste(nam1, "*", nam.coef[[i2]]) mnam <- match(cnam, names(cof), nomatch=0) act <- mnam[mnam>0] lN2.act <- length(act) ##Check if restricted interaction between a rcs and another nonlinear ##var, i.e. >1 2nd term possible, only 1 (linear) there, and at first ##nonlinear term of rcs if(lN2.act==1 & lN2>1 & at$assume.code[i1]==4 & j1==2) { v <- paste(v,"+",N2[1],"*(",sep="") cnam <- paste(nam.coef[[if(rev)i2 else i1]][1], "*", nam.coef[[if(rev)i1 else i2]][-1]) vv <- attr(rcspline.restate(at$parms[[at$name[i1]]], c(0, coef[cnam]), x=varnames[i1], digits=digits), 'function.text') v <- paste(v, vv, ')', sep='') break } else if(lN2.act==1) { vv <- paste(cof[act],"*",N1[j1],"*", N2[mnam>0], sep="") v <- paste(v, vv, sep='') } else if(lN2.act>0) { vv <- paste("+",N1[j1],"*(",sep="") v <- paste(v, vv, sep='') if(at$assume.code[i2]==4 & !any(mnam==0)) { ##rcspline, interaction not restricted vv <- attr(rcspline.restate(at$parms[[at$name[i2]]], coef[act], x=varnames[i2], digits=digits), 'function.text') v <- paste(v, vv, ')', sep='') } else { for(j2 in 1:lN2) { l <- mnam[j2] if(l>0) { #not a restricted-out nonlinear term if(j2==1 && substring(cof[l],1,1)=="+") cof[l] <- substring(cof[l],2) vv <- paste(cof[l],"*",N2[j2],sep="") v <- paste(v, vv, sep='') } } v <- paste(v, ")", sep='') } } } v } Three.Way <- function(prm,Nam,nam.coef,cof,coef,f,at,digits) { i1 <- prm[1,1]; i2 <- prm[2,1]; i3 <- prm[3,1] N1 <- Nam[[i1]]; N2 <- Nam[[i2]]; N3 <- Nam[[i3]] v <- ""; l <- 0 for(j3 in 1:length(N3)) { for(j2 in 1:length(N2)) { for(j1 in 1:length(N1)) { l <- l+1 v <- paste(v,cof[l], "*", N1[j1], "*", N2[j2], "*", N3[j3], sep="") } } } v } if(nrp==1 | length(intercept)) { cof <- if(! length(intercept)) formatSep(Coef[1], digits) else formatSep(intercept, digits) z <- paste(z, cof, sep='') } Nam <- list(); nam.coef <- list() assig <- object$assign for(i in (1:p)) { ass <- ac[i] nam <- name[i] prm <- at$parms[[nam]] if(any(ass==c(5,7,8))) prm <- chr(at$parms[[nam]],digits=digits) k <- assig[[TL[i]]] coef <- Coef[k] nam.coef[[i]] <- names(coef) cof <- formatSep(coef,digits) cof <- ifelse(coef<=0, cof, paste("+", cof, sep="")) switch(ass, { nam <- name[i]; Nam[[i]] <- nam q <- paste(cof, '*', nam, sep="") }, { q <- ""; pow <- 1:prm nams <- ifelse(pow==1,nam,paste(nam,"^",pow,"",sep="")) Nam[[i]] <- nams for(j in pow) q <- paste(q, cof[j], "*", nams[j], sep="") }, { q <- paste(cof[1], "*", nam, sep="") nams <- nam kn <- formatSep(-prm,digits) for(j in 1:length(prm)) { zz <- paste("pmax(", nam, if(prm[j]<0) "+" else NULL, if(prm[j]!=0) kn[j] else NULL, ",0)", sep="") nams <- c(nams, zz) q <- paste(q, cof[j+1], "*", zz, sep="") } Nam[[i]] <- nams }, { q <- attr(rcspline.restate(prm, coef, x=nam, digits=digits), 'function.text') if(coef[1]>=0) q <- paste('+',q,sep='') nn <- nam for(j in 1:(length(prm)-2)) { nam <- paste(nam, "'", sep=""); nn <- c(nn, nam) } Nam[[i]] <- nn #Two.Way only needs first name #for 2nd-order ia with 1 d.f. (restr ia) #Three.Way needs original design matrix } , { nn <- paste('(',nam,'==',prm[-1],')',sep='') Nam[[i]] <- nn q <- '' for(j in 1:(length(prm)-1)) { vv <- paste(cof[j], nn[j], sep="*") q <- paste(q, vv, sep="") } }, q <- '', { q <- paste(cof[1], "*", nam, sep="") nams <- nam for(j in 3:length(prm)) { zz <- prm[j] vv <- paste(cof[j-1], "*(", nam, "==", zz, ")", sep="") nams <- c(nams, zz) q <- paste(q, vv, sep="") } Nam[[i]] <- nams }, ##Strat factor doesn't exist as main effect, but keep variable ##names and their lengths if they will appear in interactions later { ## was if(!length(Nam[[i]]) && any... if(any(interaction==i)) { nam.coef[[i]] <- paste(name[i], "=", prm[-1], sep="") Nam[[i]] <- prm[-1] } q <- "" }, { if(prm[3,1] == 0) q <- Two.Way(prm,Nam,nam.coef,cof,coef,object, name, at, digits) else q <- Three.Way(prm,Nam,nam.coef,cof,coef, object,at, digits) }, { nam <- names(coef) q <- "" nam <- paste("(", nam, ")", sep="") Nam[[i]] <- nam for(j in 1:length(prm)) { vv <- paste(cof[j], '*', nam[j], sep="") q <- paste(q, vv, sep="") } }) z <- paste(z, q, sep='') } z <- paste(z, '}') eval(parse(text=z)) } Function.cph <- function(object, intercept=-object$center, ...) Function.rms(object, intercept=intercept, ...) sascode <- function(object, file="", append=FALSE) { chr <- function(y) if(is.factor(y) || is.character(y)) paste('"',as.character(y),'"',sep='') else as.character(y) n <- names(object)[names(object)!=''] for(i in n) if(file=='') cat(i,'=',chr(object[[i]]),';\n') else cat(i,'=',chr(object[[i]]),';\n',file=file, append=append|i>1) tf <- tempfile() dput(object, file=tf) object <- scan(file=tf, what='', sep='\n', quiet=TRUE) object <- paste(paste(object[3:(length(object)-1)],collapse='\n'),';',sep='') ##com <- 'sed -e "s/pmax/max/g" -e "s/pmin/min/g" -e "s/==/=/g" ##-e "s/<-/=/g" -e "s/\\^/\*\*/g"' ##w <- sys(com, w) object <- sedit(object, c('pmax','pmin','==','<-','^'), c('max','min','=','=','**'), wild.literal=TRUE) if(file=='') cat(object, sep='\n') else cat(object, sep="\n", file=file, append=TRUE) invisible() } perlcode <- function(object) { group_translate <- function(expr) { result <- vector("list", length(expr) - 1) for (i in 2:length(expr)) { result[[i-1]] <- convert(expr[[i]]) } paste(result, collapse=";\n ") } simple_translate <- function(expr) { paste(convert(expr[[2]]), as.character(expr[[1]]), convert(expr[[3]])) } exp_translate <- function(expr) { expr[[1]] <- "**" simple_translate(expr) } pmax_pmin_translate <- function(expr) { result <- vector("list", length(expr) - 1) for (i in 2:length(expr)) { result[[i-1]] <- convert(expr[[i]]) } name <- substr(as.character(expr[[1]]), 2, 4) paste(name, "((", paste(result, collapse=", "), "))", sep="") } equal_translate <- function(expr) { perlop <- if (is.character(expr[[2]]) || is.character(expr[[3]])) "eq" else "==" lhs <- convert(expr[[2]]) rhs <- convert(expr[[3]]) sprintf("(%s %s %s) ? 1 : 0", lhs, perlop, rhs) } parenthesis_translate <- function(expr) { sprintf("(%s)", convert(expr[[2]])) } assign_translate <- function(expr) { expr[[1]] <- "=" simple_translate(expr) } log_translate <- function(expr) { paste("log(", convert(expr[[2]]), ")", sep="") } R_to_perl <- list( "{" = group_translate, "-" = simple_translate, "+" = simple_translate, "*" = simple_translate, "/" = simple_translate, "^" = exp_translate, "==" = equal_translate, "(" = parenthesis_translate, "<-" = assign_translate, "pmax" = pmax_pmin_translate, "pmin" = pmax_pmin_translate, "log" = log_translate ) variable_translate <- function(v) { sprintf("$%s", gsub("\\.", "_", v)) } convert <- function(expr) { if (length(expr) == 1) { x <- as.character(expr) if (typeof(expr) == "symbol") { variable_translate(x) } else { if (is.character(expr)) { sprintf('"%s"', x) } else { x } } } else { op <- as.character(expr[[1]]) if (typeof(expr[[1]]) == "symbol" && op %in% names(R_to_perl)) { f <- R_to_perl[[op]] f(expr) } else { stop("don't know how to convert operator: ", op) } } } f <- object if (typeof(f) != "closure") { stop("argument must be a function") } fargs <- formals(f) fbody <- body(f) function_name <- as.character(match.call()[[2]]) if (length(function_name) > 1) { function_name <- "f" } result <- list(sprintf("use List::Util 'max', 'min';\nsub %s {", function_name)) for (i in 1:length(names(fargs))) { v <- names(fargs)[[i]] result <- c(result, sprintf("my %s = $_[%d];", variable_translate(v), i-1)) } result <- c(result, convert(fbody)) paste(paste(result, collapse="\n "), "}", sep="\n") } rms/R/ols.s0000644000176200001440000002572714024531417012253 0ustar liggesusersols <- function(formula, data=environment(formula), weights, subset, na.action=na.delete, method = "qr", model = FALSE, x = FALSE, y = FALSE, se.fit=FALSE, linear.predictors=TRUE, penalty=0, penalty.matrix, tol=1e-7, sigma=NULL, var.penalty=c('simple','sandwich'), ...) { call <- match.call() var.penalty <- match.arg(var.penalty) # X's present w <- terms(formula, data=data) if(length(attr(w, "term.labels"))) { callenv <- parent.frame() # don't delay these evaluations weights <- if(! missing(weights)) eval(substitute(weights), data, callenv) subset <- if(! missing(subset )) eval(substitute(subset), data, callenv) X <- modelData(data, formula, subset = subset, weights=weights, na.action=na.action, callenv=callenv) X <- Design(X, formula=formula) offset <- attr(X, 'offset') atrx <- attributes(X) sformula <- atrx$sformula atr <- atrx$Design nact <- atrx$na.action Terms <- atrx$terms assig <- DesignAssign(atr, 1, Terms) mmcolnames <- atr$mmcolnames penpres <- FALSE if(! missing(penalty) && any(unlist(penalty) != 0)) penpres <- TRUE if(! missing(penalty.matrix) && any(penalty.matrix != 0)) penpres <- TRUE if(penpres && missing(var.penalty)) warning('default for var.penalty has changed to "simple"') if(method == "model.frame") return(X) scale <- as.character(formula[2]) attr(Terms, "formula") <- formula weights <- model.extract(X, 'weights') if(length(weights) && penpres) stop('may not specify penalty with weights') Y <- model.extract(X, 'response') ## For some reason integer class being attached to Y if labelled class(Y) <- setdiff(class(Y), 'integer') n <- length(Y) if(model) m <- X X <- model.matrix(Terms, X) alt <- attr(mmcolnames, 'alt') if(! all(mmcolnames %in% colnames(X)) && length(alt)) mmcolnames <- alt ## prn(mmcolnames); prn(colnames(X)) X <- X[, c('(Intercept)', mmcolnames), drop=FALSE] colnames(X) <- c('Intercept', atr$colnames) #if(length(atr$colnames)) # dimnames(X)[[2]] <- c("Intercept", atr$colnames) #else dimnames(X)[[2]] <- c("Intercept", dimnames(X)[[2]][-1]) if(method == "model.matrix") return(X) } ##Model with no covariables: else { if(length(weights)) stop('weights not implemented when no covariables are present') assig <- NULL yy <- attr(terms(formula), "variables")[1] Y <- eval(yy, sys.parent(2)) nmiss <- sum(is.na(Y)) if(nmiss==0) nmiss <- NULL else names(nmiss) <- as.character(yy) Y <- Y[! is.na(Y)] yest <- mean(Y) coef <- yest n <- length(Y) if(! length(sigma)) sigma <- sqrt(sum((Y - yest) ^ 2) / (n - 1)) cov <- matrix(sigma * sigma / n, nrow=1, ncol=1, dimnames=list("Intercept","Intercept")) fit <- list(coefficients=coef, var=cov, non.slopes=1, fail=FALSE, residuals=Y - yest, df.residual=n - 1, intercept=TRUE, sformula=sformula) if(linear.predictors) { fit$linear.predictors <- rep(yest, n); names(fit$linear.predictors) <- names(Y) } if(model) fit$model <- m if(x) fit$x <- NULL #matrix(1, ncol=1, nrow=n, ## dimnames=list(NULL,"Intercept")) if(y) fit$y <- Y class(fit) <- c("ols","rms","lm") return(fit) } if(! penpres) { fit <- if(length(weights)) lm.wfit(X, Y, weights, method=method, offset=offset, tol=tol, ...) else lm.fit (X, Y, method=method, offset=offset, tol=tol, ...) cov.unscaled <- chol2inv(fit$qr$qr) ## For some reason when Y was labelled, fit functions are making ## residuals and fitted.values class integer fit$fitted.values <- unclass(fit$fitted.values) fit$residuals <- unclass(fit$residuals) r <- fit$residuals yhat <- Y - r if(length(weights)) { ## see summary.lm sse <- sum(weights * r^2) m <- sum(weights * yhat / sum(weights)) ssr <- sum(weights * (yhat - m)^2) r2 <- ssr / (ssr + sse) if(!length(sigma)) sigma <- sqrt(sse / fit$df.residual) } else { sse <- sum(r ^ 2) if(!length(sigma)) sigma <- sqrt(sse / fit$df.residual) r2 <- 1 - sse/sum((Y - mean(Y)) ^ 2) } fit$var <- sigma * sigma * cov.unscaled cnam <- dimnames(X)[[2]] dimnames(fit$var) <- list(cnam, cnam) fit$stats <- c(n=n,'Model L.R.'= - n * logb(1. - r2), 'd.f.'=length(fit$coef) - 1, R2=r2, g=GiniMd(yhat), Sigma=sigma) } else { p <- length(atr$colnames) if(missing(penalty.matrix)) penalty.matrix <- Penalty.matrix(atr, X) if(nrow(penalty.matrix) != p || ncol(penalty.matrix) != p) stop('penalty matrix does not have', p, 'rows and columns') psetup <- Penalty.setup(atr, penalty) penalty <- psetup$penalty multiplier <- psetup$multiplier if(length(multiplier) == 1) penalty.matrix <- multiplier * penalty.matrix else { a <- diag(sqrt(multiplier)) penalty.matrix <- a %*% penalty.matrix %*% a } fit <- lm.pfit(X[, -1, drop=FALSE], Y, offset=offset, penalty.matrix=penalty.matrix, tol=tol, var.penalty=var.penalty) fit$fitted.values <- unclass(fit$fitted.values) fit$residuals <- unclass(fit$residuals) fit$penalty <- penalty } if(model) fit$model <- m if(linear.predictors) { fit$linear.predictors <- Y - fit$residuals if(length(offset)) fit$linear.predictors <- fit$linear.predictors + offset names(fit$linear.predictors) <- names(Y) } if(y) fit$y <- Y if(se.fit) { se <- drop((((X %*% fit$var) * X) %*% rep(1, ncol(X))) ^ 0.5) names(se) <- names(Y) fit$se.fit <- se } if(x) fit$x <- X[, -1, drop=FALSE] fit <- c(fit, list(call=call, terms=Terms, Design=atr, non.slopes=1, na.action=nact, scale.pred=scale, fail=FALSE)) fit$assign <- assig fit$sformula <- sformula class(fit) <- c("ols", "rms", "lm") fit } lm.pfit <- function(X, Y, offset=NULL, penalty.matrix, tol=1e-7, regcoef.only=FALSE, var.penalty=c('simple', 'sandwich')) { if(length(offset)) Y <- Y - offset var.penalty <- match.arg(var.penalty) X <- cbind(Intercept=1, X) p <- ncol(X) - 1 pm <- rbind(matrix(0, ncol=p + 1, nrow=1), # was ncol=p+1 cbind(matrix(0, ncol=1, nrow=p), penalty.matrix)) xpx <- t(X) %*% X Z <- solvet(xpx + pm, tol=tol) coef <- Z %*% t(X) %*% Y if(regcoef.only) return(list(coefficients=coef)) yhat <- drop(X %*% coef) res <- Y - yhat n <- length(Y) sse <- sum(res^2) s2 <- drop( (sse + t(coef) %*% pm %*% coef) / n ) var <- if(var.penalty=='simple') s2 * Z else s2 * Z %*% xpx %*% Z cnam <- dimnames(X)[[2]] dimnames(var) <- list(cnam, cnam) sst <- (n - 1) * var(Y) lr <- n*(1 + logb(sst / n)) - n * logb(s2) - sse / s2 s2.unpen <- sse / n dag <- diag((xpx / s2.unpen) %*% (s2 * Z)) df <- sum(dag) - 1 stats <- c(n=n, 'Model L.R.'=lr, 'd.f.'=df, R2=1 - sse / sst, g=GiniMd(yhat), Sigma=sqrt(s2)) list(coefficients=drop(coef), var=var, residuals=res, df.residual=n - df - 1, penalty.matrix=penalty.matrix, stats=stats, effective.df.diagonal=dag) } predict.ols <- function(object, newdata, type=c("lp", "x", "data.frame", "terms", "cterms", "ccterms", "adjto", "adjto.data.frame", "model.frame"), se.fit=FALSE, conf.int=FALSE, conf.type=c('mean', 'individual', 'simultaneous'), kint=1, na.action=na.keep, expand.na=TRUE, center.terms=type == "terms", ...) { type <- match.arg(type) predictrms(object, newdata, type=type, se.fit=se.fit, conf.int=conf.int, conf.type=conf.type, kint=kint, na.action=na.action, expand.na=expand.na, center.terms=center.terms, ...) } print.ols <- function(x, digits=4, long=FALSE, coefs=TRUE, title="Linear Regression Model", ...) { latex <- prType() == 'latex' k <- 0 z <- list() if(length(zz <- x$na.action)) { k <- k + 1 z[[k]] <- list(type=paste('naprint', class(zz)[1], sep='.'), list(zz)) } stats <- x$stats pen <- length(x$penalty.matrix) > 0 resid <- x$residuals n <- length(resid) p <- length(x$coef) - (names(x$coef)[1] == "Intercept") if(length(stats)==0) cat("n=", n," p=", p, "\n\n", sep="") ndf <- stats['d.f.'] df <- c(ndf, n - ndf - 1, ndf) r2 <- stats['R2'] sigma <- stats['Sigma'] rdf <- df[2] rsqa <- 1 - (1 - r2) * (n - 1) / rdf lrchisq <- stats['Model L.R.'] ci <- x$clusterInfo if(lst <- length(stats)) { misc <- reListclean(Obs=stats['n'], sigma=sigma, 'd.f.'=df[2], 'Cluster on'=ci$name, Clusters=ci$n) lr <- reListclean('LR chi2' = lrchisq, 'd.f.' = ndf, 'Pr(> chi2)' = 1 - pchisq(lrchisq, ndf)) disc <- reListclean(R2=r2, 'R2 adj'=rsqa, g=stats['g']) headings <- c('', 'Model Likelihood\nRatio Test', 'Discrimination\nIndexes') data <- list(c(misc, c(NA,digits,NA,NA,NA)), c(lr, c(2,NA,4)), c(disc,3)) k <- k + 1 z[[k]] <- list(type='stats', list(headings=headings, data=data)) } if(rdf > 5) { if(length(dim(resid)) == 2) { rq <- apply(t(resid), 1, quantile) dimnames(rq) <- list(c("Min", "1Q", "Median", "3Q", "Max"), dimnames(resid)[[2]]) } else { rq <- quantile(resid) names(rq) <- c("Min", "1Q", "Median", "3Q", "Max") } k <- k + 1 z[[k]] <- list(type=if(latex)'latexVector' else 'print', list(rq, digits=digits), tex=latex, title='Residuals') } else if(rdf > 0) { k <- k + 1 z[[k]] <- list(type=if(latex)'latexVector' else 'print', list(resid, digits=digits), tex=latex, title='Residuals') } if(nsingular <- df[3] - df[1]) { k <- k + 1 z[[k]] <- list(type='cat', paste(nsingular, 'coefficients not defined because of singularities')) } k <- k + 1 se <- sqrt(diag(x$var)) z[[k]] <- list(type='coefmatrix', list(coef = x$coefficients, se = se, errordf = rdf)) if(!pen) { if(long && p > 0) { correl <- diag(1/se) %*% x$var %*% diag(1/se) dimnames(correl) <- dimnames(x$var) cat("\nCorrelation of Coefficients:\n") ll <- lower.tri(correl) correl[ll] <- format(round(correl[ll], digits), ...) correl[!ll] <- "" k <- k + 1 z[[k]] <- list(type='print', list(correl[-1, - (p + 1), drop = FALSE], quote=FALSE, digits = digits)) } } prModFit(x, title=title, z, digits=digits, coefs=coefs, ...) } rms/R/pphsm.s0000644000176200001440000000260212470210740012564 0ustar liggesuserspphsm <- function(fit) { warning("at present, pphsm does not return the correct covariance matrix") clas <- class(fit)[1] if(clas %nin% c('psm', 'survreg')) stop("fit must be created by psm or survreg") if(fit$dist %nin% c('exponential','weibull')) stop("fit must have used dist='weibull' or 'exponential'") fit$coefficients <- -fit$coefficients/fit$scale fit$scale.pred <- c("log Relative Hazard","Hazard Ratio") class(fit) <- c("pphsm", class(fit)) fit } print.pphsm <- function(x, digits = max(options()$digits - 4, 3), correlation = TRUE, ...) { if (length(f <- x$fail) && f) stop(" Survreg failed. No summary provided") cat("Parametric Survival Model Converted to PH Form\n\n") stats <- x$stats stats[3] <- round(stats[3],2) stats[5] <- round(stats[5],4) stats[6] <- round(stats[6],2) print(formatSep(stats),quote=FALSE) cat("\n") print(c(x$coef, x$icoef[2]), digits=digits) correl <- x$correl if (correlation && !is.null(x$correl)) { ## FEH p <- dim(correl)[2] if (p > 1) { cat("\nCorrelation of Coefficients:\n") ll <- lower.tri(correl) correl[ll] <- format(round(correl[ll], digits = digits)) correl[!ll] <- "" print(correl[-1, -p, drop = FALSE], quote = FALSE) } } cat("\n") invisible() } vcov.pphsm <- function(object, ...) .NotYetImplemented() rms/R/plot.Predict.s0000644000176200001440000003574413163741776014043 0ustar liggesusersplot.Predict <- function(x, formula, groups=NULL, cond=NULL, varypred=FALSE, subset, xlim, ylim, xlab, ylab, data=NULL, subdata, anova=NULL, pval=FALSE, cex.anova=.85, col.fill=gray(seq(.825, .55, length=5)), adj.subtitle, cex.adj, cex.axis, perim=NULL, digits=4, nlevels=3, nlines=FALSE, addpanel, scat1d.opts=list(frac=0.025, lwd=0.3), type=NULL, yscale=NULL, scaletrans=function(z) z, ...) { isbase <- TRUE ## plotly does not apply for lattice graphics if(! isbase && length(anova)) stop('anova not yet implemented for grType plotly') if(varypred) { x$.predictor. <- x$.set. x$.set. <- NULL } predpres <- length(x$.predictor.) if(missing(addpanel)) addpanel <- function(...) {} subdatapres <- !missing(subdata) if(subdatapres) subdata <- substitute(subdata) doscat1d <- function(x, y, col) { so <- scat1d.opts if(!length(so$col)) so$col <- col do.call('scat1d', c(list(x=x, y=y), so, grid=TRUE)) } info <- attr(x, 'info') at <- info$Design label <- at$label units <- at$units values <- info$values adjust <- info$adjust yunits <- info$yunits varying <- info$varying conf.int <- info$conf.int dotlist <- list(...) gname <- groups if(length(gname)) { if(length(gname) > 1 || !is.character(gname) || gname %nin% names(x)) stop('groups must be a single predictor name') } if(length(cond)) { if(length(cond) > 1 || !is.character(cond) || cond %nin% names(x)) stop('cond must be a single predictor name') } if(missing(ylab)) ylab <- if(isbase) info$ylabPlotmath else info$ylabhtml if(!length(x$lower)) conf.int <- FALSE if(missing(ylim)) ylim <- range(pretty( if(conf.int) c(x$yhat, x$lower, x$upper) else x$yhat), na.rm=TRUE) if(missing(adj.subtitle)) adj.subtitle <- length(adjust) > 0 sub <- if(adj.subtitle && length(adjust)==1) paste('Adjusted to:', adjust, sep='') else NULL cex <- par('cex') if(missing(cex.adj)) cex.adj <- .75*cex if(length(sub)) sub <- list(sub, cex=cex.adj, just=c('center','bottom')) subset <- if(missing(subset)) TRUE else eval(substitute(subset),x) oldopt <- options('digits') options(digits=digits) on.exit(options(oldopt)) tanova <- if(length(anova)) function(name, x, y) annotateAnova(name, plotmathAnova(anova, pval), x, y, cex=cex.anova) else function(...) {} if(predpres) { if(! missing(formula)) stop('formula may not be given when multiple predictors were varied separately') p <- as.factor(x$.predictor.) xp <- rep(NA, length(p)) levs <- at <- labels <- limits <- list() lp <- levels(p) np <- length(lp) groups <- gr <- if(length(gname)) as.factor(x[[gname]]) cond <- co <- if(length(cond)) as.factor(x[[cond]]) perhapsAbbrev <- function(k) { len <- nchar(k) if(sum(len) > 30 | max(len) > 12) abbreviate(k, minlength=max(3, round(17 / length(k)))) else k } for(w in lp) { i <- p == w z <- x[[w]] l <- levels(z) ll <- length(l) levs[[w]] <- if(ll) l else character(0) xp[i] <- as.numeric(z[i]) if(length(groups)) gr[i] <- groups[i] if(length(cond)) co[i] <- cond[i] at[[w]] <- if(ll) 1 : ll else pretty(z[i]) labels[[w]] <- if(ll) perhapsAbbrev(l) else format(at[[w]]) limits[[w]] <- if(ll) c(2 / 3, ll + 1 / 3) else range(z[i]) } if(length(cond)) { nuc <- length(levels(cond)) at <- at[rep(seq(1, length(at)), each=nuc)] labels <- labels[rep(seq(1, length(labels)), each=nuc)] limits <- limits[rep(seq(1, length(limits)), each=nuc)] levs <- levs[rep(seq(1, length(levs)), each=nuc)] formula <- if(!conf.int) x$yhat ~ xp | cond*p else Cbind(x$yhat, x$lower, x$upper) ~ xp | cond*p } else { formula <- if(!conf.int) x$yhat ~ xp | p else Cbind(x$yhat, x$lower, x$upper) ~ xp | p } panpred <- function(x, y, ...) { pn <- lattice::panel.number() lev <- levs[[pn]] col <- lattice::trellis.par.get('superpose.line')$col if(!length(lev) && length(unique(x[!is.na(x)])) > nlevels) { # continuous x yy <- y if(length(perim)) { j <- perim(x, NULL) yy[j] <- NA if(length(attr(yy, 'other'))) attr(yy, 'other')[j, ] <- NA } panel.xYplot(x, yy, ...) tanova(names(levs)[pn], x, yy) if(length(data) && length(xd <- data[[names(levs)[pn]]])) { xd <- xd[!is.na(xd)] doscat1d(xd, approx(x, y, xout=xd, rule=2, ties=mean)$y, col=col[1]) } } else { # discrete x lattice::panel.points(x, y, pch=19) yoth <- attr(y, 'other') yo <- length(yoth) if(yo) for(u in unique(x)) lattice::llines(c(u, u), yoth[x==u, ]) tanova(names(levs)[pn], if(yo) c(x, x, x) else x, if(yo) c(y, yoth[, 1], yoth[, 2]) else y) } addpanel(x, y, ...) } scales <- list(x=list(relation='free', limits=limits, at=at, labels=labels)) if(!missing(cex.axis)) scales$x$cex <- cex.axis if(length(yscale)) scales$y <- yscale r <- list(formula=formula, groups=gr, subset=subset, type=if(length(type))type else 'l', method=if(conf.int & (!length(type) || type != 'p')) 'filled bands' else 'bars', col.fill=col.fill, xlab='', ylab=ylab, ylim=ylim, panel=panpred, scales=scaletrans(scales), between=list(x=.5)) if(length(dotlist)) r <- c(r, dotlist) if(length(sub )) r$sub <- sub } else { # predictor not listed v <- character(0) bar <- '' f <- if(!missing(formula)) gsub(' .*','',as.character(formula)[2]) else varying[1] iv <- var.inner(as.formula(paste('~', f))) if(missing(xlab)) xlab <- labelPlotmath(label[iv], units[iv], html=! isbase) if(missing(formula)) { xvar <- varying[1] ## change formula like ~x|foo to x if(length(varying) > 1) { v <- varying[-1] if(length(gname)) { groups <- x[[gname]] v <- setdiff(v, gname) } else { nu <- sapply(x[v], function(u) length(unique(u))) if(!predpres && any(nu <= nlevels)) { i <- which.min(nu) gname <- v[i] groups <- x[[gname]] v <- setdiff(v, gname) } } if(length(v)) { bar <- paste(v, collapse='*') for(z in v) { if(z %in% names(x) && is.numeric(x[[z]])) { x[[z]] <- factor(format(x[[z]])) levels(x[[z]]) <- paste(z,':',levels(x[[z]]), sep='') } } } } xv <- x[[xvar]] xdiscrete <- is.factor(xv) || is.character(xv) || length(unique(xv[!is.na(xv)])) <= nlevels if(xdiscrete) { f <- paste(xvar, if(conf.int) 'Cbind(yhat,lower,upper)' else 'yhat', sep='~') if(bar != '') f <- paste(f, bar, sep='|') formula <- eval(parse(text=f)) if(length(v)) for(z in v) { if(z %in% names(x) && is.numeric(x[[z]])) { x[[z]] <- factor(format(x[[z]])) levels(x[[z]]) <- paste(z,':',levels(x[[z]]), sep='') } } r <- Dotplot(formula, groups=groups, subset=subset, xlim=ylim, xlab=ylab, ylab=xlab, sub=sub, data=x, between=list(x=.5), ...) return(r) } if(bar != '') f <- paste(f, '|', bar) } else { # formula given f <- as.character(formula)[2] xvar <- gsub(' .*', '', f) if(length(grep('\\|', f))) { g <- gsub(' ', '', f) g <- gsub('.*\\|', '', g) v <- strsplit(g, '\\*')[[1]] } if(length(v)) for(z in v) { if(z %in% names(x) && is.numeric(x[[z]])) { x[[z]] <- factor(format(x[[z]])) levels(x[[z]]) <- paste(z,':',levels(x[[z]]), sep='') } } } f <- paste(if(conf.int) 'Cbind(yhat,lower,upper)' else 'yhat', f, sep='~') formula <- eval(parse(text=f)) xv <- x[[xvar]] xscale <- NULL xdiscrete <- (is.factor(xv) || is.character(xv)) && nlines if(xdiscrete) { xv <- as.factor(xv) xlev <- levels(xv) xscale <- list(x=list(at=1:length(xlev), labels=xlev)) if(!missing(cex.axis)) xscale$x$cex <- cex.axis x[[xvar]] <- as.integer(xv) } ## Continuing: no predpres case pannopred <- function(x, y, groups=NULL, subscripts, ...) { ogroups <- groups if(length(groups)) groups <- groups[subscripts] yy <- y if(length(perim)) { if(! length(groups)) { j <- ! perim(x, NULL) yy[j] <- NA if(length(attr(yy, 'other'))) attr(yy, 'other')[j, ] <- NA } else { ## perim and groups specified for(w in if(is.factor(groups)) levels(groups) else unique(groups)) { i <- which(groups == w) j <- ! perim(x[i], w) yy[i[j]] <- NA if(length(attr(yy, 'other'))) attr(yy, 'other')[i[j], ] <- NA } } } panel.xYplot(x, yy, groups=ogroups, subscripts=subscripts, ...) tanova(xvar, x, yy) col <- lattice::trellis.par.get('superpose.line')$col xd <- data[[xvar]] use <- TRUE if(length(xd) && subdatapres) { use <- eval(subdata, data) if(length(use) != nrow(data)) stop('subdata must evaluate to a length of nrow(data)') } if(length(groups) && length(gd <- data[[gname]]) && length(xd)) { g <- groups[subscripts] j <- 0 for(w in levels(g)) { j <- j + 1 z <- g==w xg <- x[z] yg <- y[z] x1 <- xd[use & gd==w] x1 <- x1[!is.na(x1)] doscat1d(x1, approx(xg, yg, xout=x1, rule=2, ties=mean)$y, col=col[j]) } } else if(length(xd)) { xd <- xd[use & !is.na(xd)] doscat1d(xd, approx(x, y, xout=xd, rule=2, ties=mean)$y, col=col[1]) } addpanel(x, y, groups=NULL, subscripts=subscripts, ...) } r <- list(formula=formula, data=x, subset=subset, type=if(length(type)) type else if(xdiscrete) 'b' else 'l', method=if(conf.int & (!length(type) || type!='p')) 'filled bands' else 'bars', col.fill=col.fill, xlab=xlab, ylab=ylab, ylim=ylim, panel=pannopred, between=list(x=.5)) scales <- NULL if(length(xscale)) scales <- xscale if(length(yscale)) scales$y <- yscale r$scales <- scaletrans(scales) if(!missing(xlim)) r$xlim <- xlim if(!conf.int) r$method <- NULL if(length(gname)) r$groups <- x[[gname]] if(length(sub)) r$sub <- sub if(length(dotlist)) r <- c(r, dotlist) } do.call('xYplot', r) } pantext <- function(object, x, y, cex=.5, adj=0, fontfamily='Courier', lattice=TRUE) { k <- paste(capture.output(object), collapse='\n') fam <- fontfamily if(lattice) { za <- function(x, y, ..., xx, yy, text, cex, adj, family) lattice::ltext(xx, yy, text, cex=cex, adj=adj, fontfamily=family) formals(za) <- eval(substitute(alist(x=, y=, ...=, xx=xx, yy=yy, text=k, cex=cex, adj=adj, family=fam), list(xx=x, yy=y, k=k, cex=cex, adj=adj, fam=fam))) za } else { zb <- function(x, y, text, cex, adj, family, ...) text(x, y, text, adj=adj, cex=cex, family=family, ...) formals(zb) <- eval(substitute(alist(x=x, y=y, text=k, cex=cex, adj=adj, family=fam, ...=), list(x=x, y=y, k=k, cex=cex, adj=adj, fam=fam))) zb } } plotmathAnova <- function(anova, pval) { vi <- attr(anova, 'vinfo') aname <- sapply(vi, function(x) paste(x$name, collapse=',')) atype <- sapply(vi, function(x) x$type) wanova <- atype %in% c('main effect', 'combined effect') test <- if('F' %in% colnames(anova)) 'F' else 'Chi-Square' stat <- round(anova[wanova, test], 1) pstat <- anova[wanova, 'P'] dof <- anova[wanova, 'd.f.'] stat <- if(test == 'Chi-Square') paste('chi[', dof, ']^2 == ', stat, sep='') else paste('F[paste(', dof, ',",",', anova['ERROR', 'd.f.'], ')] == ', stat, sep='') if(pval) { pval <- formatNP(pstat, digits=3, pvalue=TRUE) pval <- ifelse(grepl('<', pval), paste('P', pval, sep=''), paste('P==', pval, sep='')) stat <- paste(stat, pval, sep='~~') } names(stat) <- aname[wanova] stat } ## stat is result of plotmathAnova ## xlim and ylim must be specified if ggplot=TRUE annotateAnova <- function(name, stat, x, y, ggplot=FALSE, xlim=NULL, ylim=NULL, cex, size=4, flip=FALSE, empty=FALSE, dataOnly=FALSE) { x <- as.numeric(x) y <- as.numeric(y) if(flip) { yorig <- y y <- x x <- yorig ylimorig <- ylim ylim <- xlim xlim <- ylimorig } ## size is for ggplot2 only; is in mm ## See if an area is available near the top or bottom of the ## current panel if(! ggplot) { cpl <- lattice::current.panel.limits() xlim <- cpl$xlim ylim <- cpl$ylim } else if(! length(xlim) || ! length(ylim)) stop('xlim and ylim must be given if ggplot=TRUE') dy <- diff(ylim) if(! empty && !any(y > ylim[2] - dy / 7)) { z <- list(x = mean(xlim), y = ylim[2] - .075 * dy) # was -.025 adj <- c(.5, 1) } else if(! empty && !any(y < ylim[1] + dy / 7)) { z <- list(x = mean(xlim), y = ylim[1] + .075 * dy) # was .025 adj <- c(.5, 0) } else { z <- if(! length(xlim) || ! length(ylim)) largest.empty(x, y, grid=TRUE, method='exhaustive') else largest.empty(x, y, grid=TRUE, method='exhaustive', xlim=xlim, ylim=ylim) adj <- c(if(z$x > mean(xlim)) 1 else .5, if(z$y > mean(ylim)) 1 else 0) } if(flip) { zyorig <- z$y z$y <- z$x z$x <- zyorig adj <- rev(adj) } ## parse=TRUE: treat stat[name] as an expression if(dataOnly) return(list(x=z$x, y=z$y, label=stat[name], hjust=adj[1], vjust=adj[2])) if(ggplot) annotate('text', x=z$x, y=z$y, label=stat[name], parse=TRUE, size=size, hjust=adj[1], vjust=adj[2]) else lattice::ltext(z$x, z$y, parse(text=stat[name]), cex=cex, adj=adj) } rms/R/latex.lrm.s0000644000176200001440000001106213020562255013346 0ustar liggesuserslatex.lrm <- function(object, title, file='', append=FALSE, which, varnames, columns=65, inline=FALSE, before=if(inline)"" else "& &", after="", pretrans=TRUE, caption=NULL, digits=.Options$digits, size='', ...) { f <- object md <- prType() %in% c('html', 'md', 'markdown') if(missing(which) & !inline) { Y <- paste("{\\rm ",as.character(attr(f$terms,"formula")[2]),"}",sep="") lev <- names(f$freq) nrp <- f$non.slopes w <- '\\[' j <- if(lev[2]=="TRUE") "" else paste("=",lev[2],sep="") if(nrp==1) w <- paste(w,"{\\rm Prob}\\{",Y, j, "\\} = \\frac{1}{1+\\exp(-X\\beta)}", sep="") else w <- paste(w,"{\\rm Prob}\\{", Y, "\\geq j\\} = \\frac{1}{1+\\exp(-\\alpha_{j}-X\\beta)}", sep="") w <- paste(w, ", {\\rm \\ \\ where} \\\\ \\]", sep="") if(length(caption)) { if(md) w <- c(paste('

', caption, '
'), w) else w <- c(paste('\\begin{center} \\bf',caption, '\\end{center}'), w) } if(nrp > 1) { w <- c(w,"\\begin{eqnarray*}") cof <- format(f$coef[1:nrp]) for(i in 1:nrp) w <- c(w, paste("\\hat{\\alpha}_{\\rm ", lev[i+1],"} &=&",cof[i],"\\\\",sep="")) w <- c(w,"\\end{eqnarray*}",sep="") } } else w <- NULL if(missing(which) | missing(varnames)) at <- f$Design if(missing(which)) which <- 1:length(at$name) if(missing(varnames)) varnames <- at$name[at$assume.code!=9] if(! md) cat(w, file=file, append=append, sep=if(length(w))"\n" else "") z <- latexrms(f, file=file, append=TRUE, which=which, varnames=varnames, columns=columns, before=before, after=after, prefix="X\\hat{\\beta}", inline=inline, pretrans=pretrans, digits=digits, size=size) if(md) htmltools::HTML(c(paste0(w, '\n'), as.character(z))) else z } latex.orm <- function(object, title, file='', append=FALSE, which, varnames, columns=65, inline=FALSE, before=if(inline)"" else "& &", after="", pretrans=TRUE, caption=NULL, digits=.Options$digits, size='', intercepts=nrp < 10, ...) { f <- object md <- prType() %in% c('html', 'md', 'markdown') if(missing(which) & !inline) { Y <- paste("{\\rm ",as.character(attr(f$terms,"formula")[2]),"}",sep="") lev <- names(f$freq) nrp <- f$non.slopes z <- '\\alpha_{y} + X\\beta' zm <- '- \\alpha_{y} - X\\beta' dist <- switch(f$family, logistic = paste('\\frac{1}{1+\\exp(', zm, ')}', sep=''), probit = paste('\\Phi(', z, ')', sep=''), cauchit = paste('\\frac{1}{\\pi}\\tan^{-1}(', z, ') + \\frac{1}{2}', sep=''), loglog = paste('\\exp(-\\exp(', zm, '))', sep=''), cloglog = paste('1 - \\exp(-\\exp(', z, ')', sep='')) w <- '\\[' w <- paste(w, "{\\rm Prob}\\{", Y, "\\geq y | X\\} = ", dist, sep='') w <- paste(w, ", {\\rm \\ \\ where} \\\\ \\]", sep="") if(length(caption)) { if(md) w <- c(paste('
', caption, '
'), w) else w <- c(paste('\\begin{center} \\bf',caption, '\\end{center}'), w) } if(intercepts) { nl <- as.numeric(lev) if(!any(is.na(nl))) lev <- format(nl, digits=digits) w <- c(w,"\\begin{eqnarray*}") cof <- format(f$coef[1:nrp], digits=digits) for(i in 1:nrp) w <- c(w, paste("\\hat{\\alpha}_{\\rm ", lev[i+1],"} &=&",cof[i],"\\\\",sep="")) w <- c(w, "\\end{eqnarray*}", sep="") } } else w <- NULL if(missing(which) | missing(varnames)) at <- f$Design if(missing(which)) which <- 1:length(at$name) if(missing(varnames)) varnames <- at$name[at$assume.code!=9] if(! md) cat(w, file=file, append=append, sep=if(length(w))"\n" else "") z <- latexrms(f, file=file, append=TRUE, which=which, varnames=varnames, columns=columns, before=before, after=after, prefix="X\\hat{\\beta}", inline=inline, pretrans=pretrans, digits=digits, size=size) if(md) htmltools::HTML(c(paste0(w, '\n'), as.character(z))) else z } rms/R/lrm.fit.s0000644000176200001440000002415213703374031013020 0ustar liggesusers#If est is specified and it is not 1:ncol(x), user will have to negate # $var[est,est] before running matinv on non.slopes+(1:nx)[-est] in # obtaining score # statistics. # Use est=NULL to compute score stat components for all vars # fit$non.slopes = # intercepts # trace to print loglik at each iteration # Set tol=0 to turn off singularity checking # tol is only used during iteractions, not for final inversion (since # solve does not pass the tolerance argument). #Mod 1-2-91 : change !is.matrix to is.vector #Sent to statlib : ??/??/?? #Mod 5-24-91: if maxiter=1, does not compute p-values #Mod 6-11-91: model with no covariables return proper fail, added is.null to # missing(x), is.vector to !is.matrix(x) #Mod 10-8-91: Changed missing data routines to tstna, naset (see na.fortran.f), # added specialsok=T to .Fortran calls #Mod 10-11-91:Added class attribute "logist" to fit object, improved call #Mod 10-30-91:Changed to lrm.fit for use with lrm function, # removed subset, print.iter->trace,maxiter->maxit,dlike->eps, # eps->tol, f$coef->f$coefficients # Remove attributes(x) from fit object #Mod 3-5-92 :Use solvet instead of solve, to pass tol argument # 6-9-92 :Change to Nagelkerke R2 # 9-27.92 :Remove dyn load commands (using .First.lib now) # 5-23-94 :Check for 0 length as well as NULL # 11-28-94 :added Brier score, # return linear predictor, get rid of "nused", improve NA logic # 1-17-95 :added penalty, penalty.matrix # 9-30-95 :changed penalty matrix to be self-contained # 5-06-96 :return information matrix # 6-06-02 :added back weights, normwt like SAS PROC LOGIST # 1-17-03 :made all versions use weights, double precision for x,y # 5-13-10 :change B to use middle intercept; added g-index # 8-17-14 :added scale lrm.fit <- function(x, y, offset=0, initial, est, maxit=12, eps=.025, tol=1E-7, trace=FALSE, penalty.matrix=NULL, weights=NULL, normwt=FALSE, scale=FALSE) { cal <- match.call() opts <- double(12) opts[1:4] <- c(tol, eps, maxit, trace) len.penmat <- length(penalty.matrix) n <- length(y) wtpres <- TRUE if(!length(weights)) { wtpres <- FALSE normwt <- FALSE weights <- rep(1, n) } if(length(weights) != n) stop('length of wt must equal length of y') if(normwt) weights <- weights * n / sum(weights) storage.mode(weights) <- 'double' opts[12] <- normwt initial.there <- !missing(initial) if(missing(x) || length(x) == 0) { nx <- 0 xname <- NULL if(!missing(est))stop("est was given without x") est <- NULL x <- 0 } else { if(! is.matrix(x)) x <- as.matrix(x) dx <- dim(x) nx <- dx[2] if(dx[1] != n)stop("x and y must have same length") if(scale) { x <- scale(x) scinfo <- attributes(x)[c('scaled:center', 'scaled:scale')] xbar <- as.matrix(scinfo[[1]]) xsd <- as.matrix(scinfo[[2]]) } storage.mode(x) <- "double" if(missing(est)) est <- 1:nx else if(length(est)) { estr <- range(est) if(estr[1] < 1 | estr[2] > nx) stop("est has illegal column number for x") if(anyDuplicated(est)) stop("est has duplicates") storage.mode(est) <- "integer" } xname <- dimnames(x)[[2]] if(length(xname)==0) xname <- paste("x[",1:nx,"]",sep="") } nxin <- length(est) if(! is.factor(y)) y <- as.factor(y) y <- unclass(y) # in case is.factor ylevels <- levels(y) ofpres <- !all(offset == 0) opts[5] <- ofpres if(ofpres && length(offset) != n) stop("offset and y must have same length") storage.mode(offset) <- "double" if(n < 3) stop("must have >=3 non-missing observations") kint <- as.integer(length(ylevels) - 1) ftable <- integer(5001 * (kint + 1)) levels(y) <- ylevels numy <- table(y) names(numy) <- ylevels y <- as.integer(y - 1) nvi <- as.integer(nxin + kint) sumwty <- tapply(weights, y, sum) sumwt <- sum(sumwty) if(!wtpres && any(numy != sumwty)) stop('program logic error 1') sumw <- if(normwt) numy else as.integer(round(sumwty)) if(missing(initial)) { ncum <- rev(cumsum(rev(sumwty)))[2 : (kint + 1)] pp <- ncum/sumwt initial <- log(pp / (1 - pp)) if(ofpres) initial <- initial - mean(offset) } if(length(initial) < nvi) initial <- c(initial, rep(0, nvi - length(initial))) storage.mode(initial) <- "double" loglik <- -2 * sum(sumwty*logb(sumwty/sum(sumwty))) ## loglik <- -2 * sum(numy * logb(numy/n)) if(nxin > 0) { if(len.penmat == 0) penalty.matrix <- matrix(0, nrow=nx, ncol=nx) if(nrow(penalty.matrix) != nx || ncol(penalty.matrix) != nx) stop(paste("penalty.matrix does not have", nx, "rows and columns")) penmat <- rbind( matrix(0, ncol=kint+nx, nrow=kint), cbind(matrix(0, ncol=kint, nrow=nx), penalty.matrix)) } else penmat <- matrix(0, ncol=kint, nrow=kint) storage.mode(penmat) <- 'double' if(nxin == 0 & ! ofpres) { loglik <- rep(loglik,2) z <- list(coef=initial, u=rep(0,kint), opts=as.double(c(rep(0,7), .5, 0, 0, 0, 0))) } if(ofpres) { ##Fit model with only intercept(s) and offset z <- .Fortran(F_lrmfit, coef=initial, as.integer(0), as.integer(0), x, y, offset, u=double(kint), double(kint*(kint+1)/2),loglik=double(1), n, as.integer(0), sumw, kint, v=double(kint*kint), double(kint), double(2*kint), double(kint), pivot=integer(kint), opts=opts, ftable, penmat, weights) loglik <- c(loglik,z$loglik) if(z$opts[6] | z$opts[7] < kint) return(structure(list(fail=TRUE), class="lrm")) initial <- z$coef } if(nxin > 0) { ##Fit model with intercept(s), offset, and any fitted covariables z <- .Fortran(F_lrmfit, coef=initial, nxin, est, x, y, offset, u=double(nvi), double(nvi*(nvi+1)/2), loglik=double(1), n, nx, sumw, nvi, v=double(nvi*nvi), double(nvi), double(2*nvi), double(nvi), pivot=integer(nvi), opts=opts, ftable, penmat, weights) irank <- z$opts[7] if(irank < nvi) { cat("singular information matrix in lrm.fit (rank=",irank, "). Offending variable(s):\n") cat(paste(xname[est[z$pivot[nvi : (irank + 1)] - kint]], collapse=" "),"\n") return(structure(list(fail=TRUE), class="lrm")) } loglik <- c(loglik, z$loglik) } dvrg <- z$opts[6] > 0 if(nxin != nx) { ##Set up for score statistics - last model is not refitted but derivatives ##with respect to all other columns of x are evaluated initial <- rep(0,nx) if(length(est)) initial[est] <- z$coef[(kint + 1) : nvi] initial <- c(z$coef[1 : kint], initial) nvi <- as.integer(kint + nx) opts[3] <- 1 #Max no. iterations z <- .Fortran(F_lrmfit, coef=initial, nx, 1:nx, x, y, offset, u=double(nvi), double(nvi*(nvi+1)), double(1), n, nx, sumw, nvi, v=double(nvi*nvi), double(nvi), double(2*nvi), double(nvi), integer(nvi), opts=opts, ftable, penmat, weights) } ##Invert v with respect to fitted variables if(nxin == 0) elements <- 1 : kint else elements <- c(1 : kint, kint + est) if(nx == 0 && !ofpres) { v <- NULL; info.matrix <- NULL irank <- kint } else { if(nxin == nx) { info.matrix <- matrix(z$v, nrow=nvi, ncol=nvi) v <- solvet(info.matrix, tol=tol) irank <- nvi } else { info.matrix <- matrix(z$v, nrow=nvi, ncol=nvi) v <- matinv(info.matrix, elements, negate=TRUE, eps=tol) info.matrix <- info.matrix[elements, elements] usc <- z$u[-elements] resid.chi2 <- usc %*% solve(v[-elements, -elements], tol=tol) %*% usc resid.df <- nx - nxin irank <- attr(v,"rank") attr(v,"rank") <- NULL } } if(kint == 1) name <- "Intercept" else name <- paste("y>=", ylevels[2 : (kint + 1)], sep="") name <- c(name, xname) kof <- z$coef ## Compute linear predictor before unscaling beta, as x is scaled lp <- if(nxin > 0) matxv(x, kof, kint=1) else rep(kof[1], n) if(scale && nx > 0) { trans <- rbind(cbind(diag(kint), matrix(0, nrow=kint, ncol=nx)), cbind(-matrix(rep(xbar/xsd, kint), ncol=kint), diag(1 / as.vector(xsd)))) v <- t(trans) %*% v %*% trans kof <- (kof %*% trans)[,, drop=TRUE] } names(kof) <- name names(z$u) <- name if(length(v)) dimnames(v) <- list(name, name) llnull <- loglik[length(loglik)-1] model.lr <- llnull - loglik[length(loglik)] model.df <- irank - kint model.p <- if(initial.there) NA else if(model.df > 0) 1 - pchisq(model.lr, model.df) else 1 r2 <- 1 - exp(- model.lr / sumwt) r2.max <- 1 - exp(- llnull / sumwt) r2 <- r2 / r2.max kmid <- floor((kint + 1) / 2) lpmid <- lp - kof[1] + kof[kmid] prob <- plogis(lpmid) event <- y > (kmid - 1) ## B <- mean((prob - event)^2) B <- sum(weights*(prob - event)^2) / sum(weights) g <- GiniMd(lpmid) gp <- GiniMd(prob) stats <- c(n, max(abs(z$u[elements])), model.lr, model.df, model.p, z$opts[8], z$opts[9], z$opts[10], z$opts[11], r2, B, g, exp(g), gp) nam <- c("Obs", "Max Deriv", "Model L.R.", "d.f.", "P", "C", "Dxy", "Gamma", "Tau-a", "R2", "Brier", "g", "gr", "gp") if(nxin != nx) { stats <- c(stats, resid.chi2, resid.df, 1 - pchisq(resid.chi2, resid.df)) nam <- c(nam, "Residual Score", "d.f.", "P") } names(stats) <- nam if(wtpres) stats <- c(stats, 'Sum of Weights'=sumwt) retlist <- list(call=cal, freq=numy, sumwty=if(wtpres)sumwty else NULL, stats=stats, fail=dvrg, coefficients=kof, var=v, u=z$u, deviance=loglik, est=est, non.slopes=kint, linear.predictors=lp, penalty.matrix=if(nxin>0 && any(penalty.matrix!=0)) penalty.matrix else NULL, info.matrix=info.matrix, weights=if(wtpres) weights else NULL) class(retlist) <- 'lrm' retlist } rms/R/validate.psm.s0000644000176200001440000001117113654066176014046 0ustar liggesusersvalidate.psm <- function(fit, method="boot", B=40, bw=FALSE, rule="aic", type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, dxy=TRUE, tol=1e-12, rel.tolerance=1e-5, maxiter=15, ...) { xb <- fit$linear.predictors ny <- dim(fit$y) nevents <- sum(fit$y[, ny[2]]) ##Note: fit$y already has been transformed by the link function by psm dist <- fit$dist scale <- fit$scale parms <- fit$parms ## inverse <- survreg.distributions[[dist]]$itrans distance <- function(x, y, fit, iter, evalfit=FALSE, fit.orig, dxy=TRUE, dist,parms, tol=1e-12, maxiter=15, rel.tolerance=1e-5, ...) { ##Assumes y is matrix with 1st col=time, 2nd=event indicator if(evalfit) { #Fit was for training sample lr <- 2 * diff(fit$loglik) ll0 <- -2 * fit$loglik[1] R2.max <- 1 - exp(-ll0 / length(x)) R2 <- (1 - exp(-lr / length(x))) / R2.max intercept <- 0 slope <- 1 D <- (lr - 1) / ll0 U <- -2 / ll0 gindex <- GiniMd(x) } else { f <- survreg.fit2(x, y, iter=iter, dist=dist, parms=parms, tol=tol, maxiter=maxiter, rel.tolerance=rel.tolerance) if(f$fail) stop("survreg.fit2 failed in distance") lr <- 2 * diff(f$loglik) ll0 <- -2 * f$loglik[1] R2.max <- 1 - exp(-ll0 / length(x)) R2 <- (1 - exp(-lr / length(x))) / R2.max intercept <- f$coefficients[1] slope <- f$coefficients[2] D <- (lr - 1) / ll0 init <- c(0, 1, if(length(f$scale)) log(f$scale) else NULL) f.frozen <- survreg.fit2(x, y, dist=dist, parms=parms, tol=tol, maxiter=0, init=init) if(f.frozen$fail) stop('survreg.fit2 failed for frozen coefficient re-fit') ll0 <- -2 * f.frozen$loglik[1] frozen.lr <- 2 * diff(f.frozen$loglik) U <- (frozen.lr - lr) / ll0 gindex <- GiniMd(slope * x) } Q <- D - U z <- c(R2, intercept, slope, D, U, Q, gindex) nam <- c("R2", "Intercept", "Slope", "D", "U", "Q", "g") if(dxy) { Dxy <- dxy.cens(x,y)["Dxy"] z <- c(Dxy, z) nam <- c("Dxy", nam) } names(z) <- nam z } predab.resample(fit, method=method, fit=survreg.fit2, measure=distance, pr=pr, B=B, bw=bw, rule=rule, type=type, dxy=dxy, dist=dist, parms=parms, sls=sls, aics=aics, force=force, estimates=estimates, strata=FALSE, tol=tol, maxiter=maxiter, rel.tolerance=rel.tolerance, ...) } survreg.fit2 <- function(x, y, iter=0, dist, parms=NULL, tol, maxiter=15, init=NULL, rel.tolerance=1e-5, fixed=NULL, ...) { e <- y[, 2] if(sum(e) < 5) return(list(fail=TRUE)) x <- x #Get around lazy evaluation creating complex expression dlist <- survreg.distributions[[dist]] logcorrect <- 0 trans <- dlist$trans if (length(trans)) { if(ncol(y) != 2) stop('only implemented for 2-column Surv object') y[, 1] <- trans(y[, 1]) exactsurv <- y[, ncol(y)] == 1 if(any(exactsurv)) { ## survreg now stores y on original scale ## ytrans <- if(length(dlist$itrans)) dlist$itrans(y[exactsurv,1]) else ## y[exactsurv,1] ytrans <- trans(y[exactsurv, 1]) logcorrect <- sum(logb(dlist$dtrans(y[exactsurv, 1]))) } } if (length(dlist$dist)) dlist <- survreg.distributions[[dlist$dist]] f <- survreg.fit(cbind(Intercept=1, x), y, dist=dlist, parms=parms, controlvals=survreg.control(maxiter=maxiter, rel.tolerance=rel.tolerance), offset=rep(0, length(e)), init=init) if(is.character(f)) { warning(f); return(list(fail=TRUE)) } f$fail <- FALSE ## TODO: fetch scale properly if fixed nstrata <- length(f$icoef) - 1 if (nstrata > 0) { nvar <- length(f$coef) - nstrata f$scale <- exp(f$coef[-(1:nvar)]) names(f$scale) <- NULL # get rid of log( ) in names f$coefficients <- f$coefficients[1:nvar] } else f$scale <- scale f$loglik <- f$loglik + logcorrect # f$var <- solvet(f$imat, tol=tol) # sd <- survreg.distributions[[dist]] # f$deviance <- sum(sd$deviance(y,f$parms, f$deriv[,1])) # f$null.deviance <- f$deviance + 2*(f$loglik[2] - f$ndev[2]) # f$loglik <- c(f$ndev[2], f$loglik[2]) f } rms/R/ie.setup.s0000644000176200001440000000321312257363077013207 0ustar liggesusersie.setup <- function(failure.time, event, ie.time, break.ties=FALSE) { s <- !is.na(ie.time) if(all(s)) warning('every subject had an intervening event') if(!any(s)) stop('no intervening events') if(any(ie.time[s] > failure.time[s])) stop('an ie.time was after a failure.time') if(break.ties) { mindif <- min(diff(sort(unique(failure.time[!is.na(failure.time)])))) ## 8Nov01 Thanks: Josh Betcher k <- s & (ie.time==failure.time) if(sum(k)==0) warning('break.times=T but no ties found') ie.time[k] <- ie.time[k] - runif(sum(k),0,mindif) } if(any(ie.time[s]==failure.time[s])) stop('ie.times not allowed to equal failure.times') n <- length(failure.time) reps <- ifelse(is.na(ie.time), 1, 2) subs <- rep(1:n, reps) start <- end <- ev <- ie.status <- vector('list', n) start[] <- 0 end[] <- failure.time ev[] <- event ie.status[] <- 0 for(i in seq(along=s)[s]) { start[[i]] <- c(0, ie.time[i]) end[[i]] <- c(ie.time[i], failure.time[i]) ev[[i]] <- c(0, event[i]) ie.status[[i]] <- c(0, 1) } start <- unlist(start) end <- unlist(end) ev <- unlist(ev) ie.status <- unlist(ie.status) u <- units(failure.time) units(end) <- if(u=='')'Day' else u s <- !is.na(start+end) & (end <= start) if(any(s)) { cat('stop time <= start time:\n') print(cbind(start=start[s], end=end[s])) stop() } S <- Surv(start, end, ev) list(S=S, ie.status=ie.status, subs=subs, reps=reps) } rms/MD50000644000176200001440000003212414024574032011366 0ustar liggesusersac11f48efd221e9c6e33297c20755016 *DESCRIPTION e53027552bdbdc4d8afdaa0af1450754 *NAMESPACE 3ac6d0f551fb6b956a3bcc394836c5e6 *NEWS a5263ced794350451f27b08a43049789 *R/Function.rms.s 199bcb3cdc16592748f2bc6777937ae2 *R/Glm.r bb679c037dd774df497420c28a00ded9 *R/Gls.s 6c1b7f496955f7fb876c76b8243972c0 *R/Predict.s 374ff6efb148e22024b63d5b6dec1c70 *R/Rq.s 2bfae01957b15261721e33db09fa7aa6 *R/anova.rms.s f8400fb96d4d1bbc63d021ee57661625 *R/bj.s a684122d37546a33ee8a827018e88f20 *R/bootcov.s 3facce56093ac3e4e904d8f640e6046f *R/bplot.s 59713401df36aacb6c11dc05a79bfd84 *R/calibrate.cph.s 253f9532dfb924f49cedf04a67a3752e *R/calibrate.default.s f868e8ca1885d191cbd50983e796c851 *R/calibrate.psm.s b669693cb946283723841f9836f5fca5 *R/calibrate.s 78ba114cbde3e92d71f27edc140544ec *R/contrast.s 744ac0848e45561dea4dcb70a4273eac *R/cph.s 707e17e96143dc6256fe44d671f43963 *R/cr.setup.s 4bcb6d69d3c683eca38eaf28fc37b019 *R/datadist.s 3c0fca0e0221ccc08a0536c8fc8d170d *R/fastbw.s 07800d51720e07511fca35baa694143f *R/gIndex.s c4108b6a3c4d35f3b55b5207cd238563 *R/gendata.s 9b4b4cfa30911d6417df3ecab9d80347 *R/ggplot.Predict.s 842d64b40e1837283bef2cd41952bc72 *R/groupkm.s 9a2d001d398705fbd7588aa6591b6942 *R/hazard.ratio.plot.s 020407ed74c1c5d884f44ddcfccf6f7c *R/ia.operator.s 0c9042c90e269e41bb589b4e7807a356 *R/ie.setup.s 1b7186005f70c0cc8aa9ffff66a3ff6f *R/latex.cph.s 68486fb68f058962fe96a12b191f5611 *R/latex.lrm.s 30b997920c57c5e219d4518305535b06 *R/latex.ols.s 47deda7fdf6457f32f18eef08fdac084 *R/latex.pphsm.s 6703b6b709c779699ba9426f09aad843 *R/latex.psm.s 232909048428a0ff67d916b184f4c9a7 *R/latex.rms.s b099703e617636428296362178e6be0a *R/lrm.fit.bare.r 05c483658cc00de9882cdc4547896045 *R/lrm.fit.s 1b2865fba391dbf3c72801a41fd403ed *R/lrm.fit.strat.s 0597122bee6f23a9129f4293bd9a4814 *R/lrm.s b6eef10f1355b1fa4f609f8e8fd7270a *R/matinv.s cbf9015d531734000689cf7f5c3e0011 *R/nomogram.s aa5a4de34751f9d209023211294c3ee8 *R/npsurv.s ec6acfbfbff26a60e5bbd7bdf0b9c1e0 *R/ols.s 6bcea95c16658ff05511b3a3577034c6 *R/orm.fit.s af4ab6cec57cf1c6f217e20b56607147 *R/orm.s f764acd51c355b1a0aa7250cadab35f2 *R/pentrace.s 58d4ce516aa760f8dd295e9e99fc0925 *R/plot.Predict.s 6d498522f4f4be25b648e950f03cddab *R/plot.contrast.r 181837dc6972d157884f57239e4f944e *R/plot.nomogram.s 13e3c07f0bef461881b7f3e2688c8502 *R/plot.xmean.ordinaly.s f860d692adcf86ebf891de9681e822ae *R/plotp.Predict.s 1586444dca29c1789abc64eff6bcd3eb *R/poma.r 5863a81bcc7e54c317961a96567734de *R/pphsm.s a1e93d71c8675459207d05c891c17f89 *R/predab.resample.s ceaeb947f35e70aab2b14c4de546a6ef *R/predict.lrm.s be391af0dea65ac1f5aeeb75bf0c2ba1 *R/predictrms.s 2cd80fa82463bbcae153072dafc09c5c *R/print.psm.s 90702873d63e006fde1cb586bb8b7e5e *R/psm.s 489aa87bc081c9beb220dbc832db9f97 *R/residuals.cph.s 9cbd5896ef27eb326283c5d40058fa89 *R/residuals.lrm.s b68bfa76a0838cec25be23f352234d82 *R/residuals.ols.s c5815c42fc53acbeeebc6a01b20ee22f *R/rms.s 30025db433b69547337bf3dcac8174ac *R/rms.trans.s e9080936826a7b4a22927e29e2c1f3b7 *R/rmsMisc.s e9d38f83f574873c3bfe7bb3989c25f7 *R/robcov.s 72c85f219fc0d0570ddb2f9c64e6f4f3 *R/sensuc.s 3c9e9590656fca662c6a6744b3a83328 *R/specs.rms.s 7546ebf206e97ba179861d56dd4b0972 *R/summary.rms.s 199f066cfd4820002b9c554ad90a8f33 *R/survest.cph.s 0beb2e9b0b69a54cbb9706466991232e *R/survest.psm.s b9b6b0b7aaa6475625958a7488af2158 *R/survfit.cph.s 4433b9ff6b74c70f68bd6956adfdee89 *R/survplot.npsurv.s ee251131b9dc4f6ef7c54cb8b191a0b6 *R/survplot.rms.s 94b0cd6acc6de1604482af353cf18919 *R/survplotp.npsurv.s 042d1cab4966f0c9c484b072266526f2 *R/survreg.distributions.s 25e3b5e119ebe51f1dce5f74eba4e675 *R/val.prob.s 0ebeb8a12e6d6579b4d058b99df2fca4 *R/val.surv.s 5af5237b40d2f105e0a895aa6cebd334 *R/validate.Rq.s e3e918384346e3160e9684174db7425e *R/validate.cph.s e442db117a13ffd5520af7ad36104305 *R/validate.lrm.s cf225a529e2b8f149b9458591a03acda *R/validate.ols.s 4f82e4f8d9b972ca287c47f755ce1236 *R/validate.psm.s 0a23e640e49d1020d9204dad743cb00f *R/validate.rpart.s a4d4fe03f275ad1a1c9c0880cdf345e7 *R/which.influence.s 6c73ac8f909c50913c05096d9cbf9b32 *README.md 80ddce7f0a779bd8f7c8c6cb50e3d403 *demo/00Index c661b1cce7d38348936c3106c2f14dae *demo/all.R 4eaccebb6b43bc42d655bd2b9837c1b6 *inst/tests/Glm.s eb5d7efa982e780027925dbf866f2f01 *inst/tests/Gls.s 21712e000a56691c56210e3307c565c5 *inst/tests/Predict.s bc8095c330b65347f9dcb17fcc34cf42 *inst/tests/Rq.s 082f3cf01ab5420275641230b0d2e995 *inst/tests/Rq2.s fd1cad5cf21ccf1ce675578fd8567221 *inst/tests/anova-ols-mult-impute.r 5abe2db59951c6dd6d3e4a69090208d5 *inst/tests/bj.r 45afa87c2c721d77eedd8ad556a1d4b2 *inst/tests/bootcov.r 3709c798ba6427dfc28b9e8c47b165e8 *inst/tests/boys.rda 05b06a767be863c4330341502f364bc5 *inst/tests/bplot.r c9ea8348d445668da8987f0fa5283130 *inst/tests/calibrate.cph.r c31bea1aab8ec112a1d25a339f21fd0a *inst/tests/calibrate.r 09026b645083392f61d6f7c7799a1a5b *inst/tests/contrast.r 85c9fc15158676d48b6339fe98250926 *inst/tests/contrast2.r adbe7641f0122e3401acd626dcc572eb *inst/tests/cph.s 6f8c28b595474f6575af2305c392b37d *inst/tests/cph2.s 8df570ced5278d86a1567cf69b4a11f4 *inst/tests/cph3.r 85318c956101d2a9598bbc01005b7a3f *inst/tests/cph4.r 35afcf92ebf18ff3460d07683a1aca25 *inst/tests/cph5.r 9f652d134601800ec339f912db13a830 *inst/tests/cphtdc.r 09fb3226572971aa603f5f97ea9b7fdf *inst/tests/examples.Rmd e4f5e070c649726d2392af7f63e39e9b *inst/tests/gTrans.r 27f517ba6ad9743756bc9495ce7ed4cd *inst/tests/ggplot.Predict.Rd.timing.r 614dbbecfaeb31d079ebd5ed37d27948 *inst/tests/ggplot2-without-ggplot.Predict.r 805ca8322aeef7fb0b6f17bc1f43e157 *inst/tests/ggplot2.r bb77e2753144a23478f115d6d35fe5b4 *inst/tests/ggplot2b.r ccca2ddaebe4ff659b2afb172ae68d28 *inst/tests/ggplot3.r 78c2e6879f6f52799942c55cd0a4b702 *inst/tests/ggplotly.Rmd bc665e8f1017b48f9c0fff93c20a3274 *inst/tests/latex.r b544031ca70e347c3e10c9029a1416e8 *inst/tests/lrm.ols.penalty.r 153fdee9973f5fb16eae2078fcfcfa9f *inst/tests/lrm.ordinal.s 604b862d13e71cb6029bd98f712b35c3 *inst/tests/lrm.s 31563fbc00ac91ab4eaafbfdcd64231e *inst/tests/lrm2.r 39f9ce73f10ef0bdb614859e1efa31f2 *inst/tests/lrmMean.s 0bfea8bedf36aaad4f651db178b61380 *inst/tests/mice.r 695c5f9aaaca8d02696bc338f14c5294 *inst/tests/model.matrix.s 7e1799a119c3ebb49f917e05367d2bc4 *inst/tests/modelData.r fc47b29381f0569c0453956c3fa66b4b *inst/tests/nomogram.r 87a4b2b30e186a88d025038e361cb67f *inst/tests/nomogram2.r ca4c22c089aa43a2ddba99e90a9c107a *inst/tests/nomogram2.rda 7d8263b574355eff308757294b871ddf *inst/tests/npsurv.r 1ce8ce55212efbaa3bd0c8bf6cef27ce *inst/tests/offset.r d94fe4e9c9f884bf11933fb64b7bde0b *inst/tests/ols.penalty.r 8f54e66d3bfb24cb79a191aaf263a85a *inst/tests/ols.r c3e3dcb1e9fbb6f9616df31598628a02 *inst/tests/orm-Mean.r ef182974715b68ffbf98ed6ef2b95d75 *inst/tests/orm-bootcov.r c65c6af36bd5f45e024a7d6124d3b487 *inst/tests/orm-example.r 802e3dcb0e399d85443741111e7bd313 *inst/tests/orm-profile.r eac2d98874615668431a60f6e0e7fa16 *inst/tests/orm-residuals.r 3852333978005ddc5ea290ea143c5ed5 *inst/tests/orm-simult.r 87f9fd6bf33ebfbc6caf5095b3abfb6a *inst/tests/orm.s e877fdb6c10ec5187f09a3aa1d9790e2 *inst/tests/orm2.s 2e3d6c8e679b251fd2af288fad9959f7 *inst/tests/orm3.r 7792ed341fe1b3fe9731ae107263ee73 *inst/tests/orm4.r 2aa7b3661a6ae74194ec9f95bbc3eaa9 *inst/tests/orm5.r 2c86a7ae3df88984bc58cd7fe8e1ee27 *inst/tests/pakpahan.dta a5775dcc1798405805eee86e3298b7dc *inst/tests/pentrace.s 69b043a0be51ad0b4de8c3a4cc9a02a2 *inst/tests/perlcode.s 01bb632b0c7bbf05df816b87cdaebb84 *inst/tests/plot.Predict.s c4d07b10b33c09476c5e36d9052e3946 *inst/tests/plotly-Predict.r 003ea0bb57c65c09ad9e70e36cf06945 *inst/tests/predictrms.s 25d9b1a1f75fe50e1489df3b34813864 *inst/tests/psm.s d02db66f5d4051ad57d42b66c6ae65d1 *inst/tests/psm2.s 1f42f45a8ae0cfd473cd93ae972f9211 *inst/tests/psm3.s 39eb77b8feecabe1d30a3ed68a8aa1d0 *inst/tests/qt50.rda 9e632d6adca8c8cd913cd6594b0ac764 *inst/tests/rcs.r 0f9966600de1adea550c3f1c4df50ef9 *inst/tests/rms.r 9a3297de7c30c85b4507000d8beb258e *inst/tests/rms2.r 47a0a4f75d74265a2d00c4fbc17eb0c6 *inst/tests/robcov.r dfa258ab4a69a4dded0da98c9bc1e5cf *inst/tests/robcov2.r 4a96f3b1c46b7b00aaa738436a4528b8 *inst/tests/robcov3.r e470d35ccfdd8bbb4d88c1a9fe2c166b *inst/tests/robcov_Yuqi.r 8dbb2cc72806004b13683a4f2a278fcd *inst/tests/sampledf.rda 27c039706d73f61ac6c182fcb67b0a02 *inst/tests/scale.r de76d469e88cbf53beace54bb8d62408 *inst/tests/simult.s f81aebc7eba0959cb9ad82e5a2ae0f5e *inst/tests/strat.model.matrix.r 21f2b7b8c1b5af38125b3429eb315521 *inst/tests/summary.r 586af52b1c14ecebdf848cbfc1f09750 *inst/tests/survest.r b13d53f26a50e79abb38cdb49579bf93 *inst/tests/survfit.cph.s 4709d22be7f0899bd955effe65cd5121 *inst/tests/survfit.timedep.s 4d7ab5f4a89a260232ed41413d52f761 *inst/tests/survplot.s 19ff3254ee3aa74a494f39b7f39547b1 *inst/tests/survplot2.r 64374db349a30cf366d552765930cea0 *inst/tests/survplotCompete.r a254d76230b0ae83cd5ed65337a95607 *inst/tests/survplotp.r f3c4b978f17dc7e6f0bc082971268370 *inst/tests/val.prob.r c3373ee5586af5c9ccf2e142b4a1169d *inst/tests/val.surv.data.txt f5e765656b7e801655fd37daaa52d5a1 *inst/tests/val.surv.s 77abedef52b847550d164a570bdfede3 *inst/tests/validate.cph.s 042369d3dc4216bdccc38b8499c5944a *inst/tests/validate.ols.r 2c89fe9a97b84c8f611e78038c8cfed5 *inst/tests/validate.rpart.r cef54868b06ae129a62eb72eea010da0 *inst/tests/which.influence.r d5aa55e05c59436e851a67e5d9957311 *man/ExProb.Rd 9088283fff73be21474b3a85dfc3e90b *man/Function.Rd b5a57f7887c62c8ca5376d0fcb289c28 *man/Glm.Rd 25208cb2439f5e5e198090dd70bf70a1 *man/Gls.Rd 8c9baa7c155e10adfc5786ff206a6bf4 *man/Predict.Rd 43788eb03113dc2e48593be5074c14b5 *man/Rq.Rd 247defa81fa6aa801c33633898809bc2 *man/anova.rms.Rd 0aa048ee04ef2893aa752f10b7fb3a05 *man/bj.Rd 8429775bf40e40c26f56e73a41c78d2e *man/bootBCa.Rd 6fa6ea554fdf2ada897678ffa7eb73cd *man/bootcov.Rd eaac0c933c3bb9ddb4ceff4cf77d8302 *man/bplot.Rd a245a4ee37f19f0330a774e07b9b5a67 *man/calibrate.Rd adbb0913b05c5bfef0b424a943b08c1a *man/contrast.Rd cdca0d212058a9aaecc9ab1c8f00d1ee *man/cph.Rd f9d4642191c52890918fef04bafb7ea5 *man/cr.setup.Rd 12d5866a7a96857fcd37ceef78e62539 *man/datadist.Rd 8f635900c3a357237df2332ab6e2321c *man/fastbw.Rd d06702b3048e30129cca0c9b0f81ef51 *man/gIndex.Rd a0bdb35af0b8ae4f10c7a9201310b0b5 *man/gendata.Rd ec54dfa535a97b795695f395a5b26999 *man/ggplot.Predict.Rd ff83596fa5313d3026cd655433246cbf *man/groupkm.Rd 31c5028eee2cdd75a2122b3f4e623580 *man/hazard.ratio.plot.Rd 95a227697d8e064d916ad91f0ef0d580 *man/ie.setup.Rd 4ca640181f708e2bc6b2e0f43fc4c05b *man/latex.cph.Rd 2b617c22826f7922db01c3feef5af00b *man/latexrms.Rd c409c5e6a594d2a7f1ed8be59cd91261 *man/lrm.Rd 1ea7f8b12bfbf9579c673fc6526f61eb *man/lrm.fit.Rd e248571a34ef545d192de74206c6828d *man/lrm.fit.bare.Rd 75ea79a8d4c9c93306325f86197f20d1 *man/matinv.Rd 419988c0bdc509aa244fdd370f5ae567 *man/nomogram.Rd bc52dc0397eb9a52797855930b20db99 *man/npsurv.Rd f9f1dcebe31d7bf587fda7e4af6d8fc6 *man/ols.Rd bcef7db27f7aa9c9fff580dd38860426 *man/orm.Rd ee5a9a921a140c2354221a26413c6c3a *man/orm.fit.Rd 7362322a6e52f0dd2cd3fa916969b1a9 *man/pentrace.Rd 546777f9d7b91949d12caf4d4c4614b9 *man/plot.Predict.Rd 1c66176050732b0a3dac7604acfa0f1b *man/plot.contrast.rms.Rd a3835c805896d06db576b8297d4444be *man/plot.xmean.ordinaly.Rd 2a398c222ac1241ce53cc6777cb753a1 *man/plotp.Predict.Rd 4a7342b024ea20b4aa13b7de8f7d65ab *man/poma.Rd 5f8df09e0816c68308abc6f8fed9d5d6 *man/pphsm.Rd ecce341cf72edf9b459708dc96642857 *man/predab.resample.Rd cb512ac324ec973049721caf280ab695 *man/predict.lrm.Rd f85d09ab515feeeb95e592a12ea156d9 *man/predictrms.Rd 08b70019441032decb9db8919f890263 *man/print.Glm.Rd 7bd0efa882e4d74a0b7f9fb12322a24a *man/print.cph.Rd a654014fa45a77cbf637b4bd75e524ca *man/print.ols.Rd 78af23be528a74970587acb6eb49a2fd *man/psm.Rd 7309d3819f51a633678d88699054602e *man/residuals.cph.Rd 891791f0e4d33850b4137258bc3f72d2 *man/residuals.lrm.Rd ae2bb7879844a3166f7c3d86cbcc448f *man/residuals.ols.Rd d8d8b14dc2053bf3d3be30f931fdfe8f *man/rms-internal.Rd 00fe972d3690ad3678982cfc18f0ff35 *man/rms.Rd c49d2f618abf77992adc74d7ed0cfbc7 *man/rms.trans.Rd d99145bbbdea062db0a4cb727ed28f0d *man/rmsMisc.Rd 1c8c30350e443bcf835339fc1f4b49fe *man/robcov.Rd 8b12e6662c14d699431df17954aea84c *man/sensuc.Rd 729edbf9d97a12c48b5b62e6c1a0d761 *man/setPb.Rd 05290f6bddf74c426aaaa5b0a505fb4d *man/specs.rms.Rd 1a023f7b35db5736324e2e921bc3f64c *man/summary.rms.Rd 9bb0a981f7f20751384b85a1d24179af *man/survest.cph.Rd 3ebe8b10eb94e6ec0b96152ce90b194f *man/survest.psm.Rd d5f219707c45a642e6a23a24aaa8ea28 *man/survfit.cph.Rd caf14a77811303e8e61be464ed16f5f7 *man/survplot.Rd 267ea36071ef9183d6e39d3ce547bbf5 *man/val.prob.Rd 0a37a868941038040641760513d97390 *man/val.surv.Rd 8e97e62870f33aac34ed7420760a6f1e *man/validate.Rd ae89a4b05e4a158b4e15156376bcfd61 *man/validate.Rq.Rd 3f9073fc024c9ddd76f588c88cbf8cfb *man/validate.cph.Rd 1d4dc21b1bc0757b933ca0e58c0b5760 *man/validate.lrm.Rd a227454802f07153a0cb64b3c7c0cbf7 *man/validate.ols.Rd 4354f28b31c1b35694d600c35973ef33 *man/validate.rpart.Rd a4abe6c8b9f87f6e7d2be022b4ec2e1d *man/vif.Rd 9a99f2660179334c51d4700ed6c46add *man/which.influence.Rd d3766ab6de08fc928c7253c043e013d5 *man/zzzrmsOverview.Rd 88fcae3360fb4e5e92d60d78002a9bc5 *src/init.c 00d319c508c47356a6b281cbe701cdcc *src/lrmfit.f ce0bcaf5e15b9792ebb9f634476f90d7 *src/mlmats.f 2ae77be06ac3f9ba6267485431a27cd8 *src/ormuv.f 3dcf4cf19fb5328393d19f0657d8e485 *src/ratfor/ormuv.r 47bdb0615589c0725e92bd818f2a01e9 *src/ratfor/robcovf.r b8312681c3d1e4b7998852669b8163af *src/robcovf.f rms/inst/0000755000176200001440000000000013555351205012034 5ustar liggesusersrms/inst/tests/0000755000176200001440000000000014024425074013174 5ustar liggesusersrms/inst/tests/survplotp.r0000644000176200001440000000226212773330167015446 0ustar liggesusersrequire(rms) y <- 1 : 100 units(y) <- 'year' S <- Surv(y) g <- rep('a', 100) g[seq(1, 100, by=5)] <- 'b' tapply(1:100, g, function(x) sum(x >= 50)) # a=41 b=10 tapply(1:100, g, function(x) length(x) / sum(x)) fs <- survfit(S ~ g) fs i <- fs$time %in% c(46, 50) fs$n.risk[i] # a=41 b=11 z <- qnorm(.975) sur <- fs$surv[i] seS <- fs$std.err[i] * sur # se on S(t) scale instead of log S(t) with(fs, cbind(n.risk[i], surv[i], lower[i], upper[i], std.err[i], seS, sur - z * seS, sur + z * seS)) # Last 2 columns not supposed to agree with summary.survfit since summary use log S(t) # as basis for CLs # summary(f, times=1:100)$time s <- summary(fs, times=50) s # a=41 b=10 # Manually compute lower and upper half CL at t=50 mean(sur) + c(-1, 1) * 0.5 * z * sqrt(seS[1]^2 +seS[2]^2) # .3774 .6224 # Compare to width of CL for smallest stratum above # ,2898 .7191 f <- npsurv(S ~ g) survplot(f) survplot(f, conf='diffbands') survdiffplot(f) # modern art survplotp(f) survplotp(f, aehaz=TRUE) survplotp(f, times=c(50,60)) survplotp(f, aehaz=TRUE, times=c(5,60)) h <- function(y) 1 - y survplotp(f, fun=h, ylab='Cumulative Incidence') survplotp(f, fun=h, aehaz=TRUE, times=c(5, 60)) rms/inst/tests/perlcode.s0000644000176200001440000000205511731252013015150 0ustar liggesusersrequire(rms) n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) cholesterol[1:3] <- NA # 3 missings, at random ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) g <- Function(fit) cat(perlcode(g), '\n') rms/inst/tests/mice.r0000644000176200001440000000131613662733614014306 0ustar liggesusersif(! require(mice)) quit(save='no') require(rms) set.seed(1) n <- 50 d <- data.frame(x1=runif(n), x2=sample(c('a','b','c'), n, TRUE), x3=sample(c('A','B','C','D'), n, TRUE), x4=sample(0:1, n, TRUE), y=runif(n)) d$x1[1:5] <- NA d$x2[3:9] <- NA d$x3[7:14] <- NA a <- aregImpute(~ x1 + x2 + x3 + x4 + y, data=d) ols(y ~ x1 + x2 + x3 + x4, data=d) fit.mult.impute(y ~ x1 + x2 + x3 + x4, ols, a, data=d) # works m <- mice(d) d1 <- complete(m, 1) ## ols(y ~ x1 + x2 + x3 + x4, data=d1) # fails w <- d1 attr(w$x2, 'contrasts') <- NULL attr(w$x3, 'contrasts') <- NULL ols(y ~ x1 + x2 + x3 + x4, data=w) # works fit.mult.impute(y ~ x1 + x2 + x3 + x4, ols, m, data=d) rms/inst/tests/orm-simult.r0000644000176200001440000000133513247050261015467 0ustar liggesusers# From Matthew Shun-Shin 2018-01-14 require(rms) set.seed(1) m <- 50 d <- expand.grid(arm=c('a','b','c'), i=1 : m) d$x <- runif(nrow(d)) d$y <- rnorm(nrow(d)) dd <- datadist(d) options(datadist="dd") f <- ols(y ~ x + arm, data=d) summary(f, verbose=TRUE) summary(f, conf.type='simult', verbose=TRUE) # simult ignored #Works contrast(f, list(arm=c('c','b')), list(arm='a')) contrast(f, list(arm=c('c','b')), list(arm="a"), conf.type='simultaneous') g <- orm(y ~ x + arm, data=d) summary(g, verbose=TRUE) summary(g, conf.type='simultaneous', verbose=TRUE) # simult ignored contrast(g, list(arm=c('b','c')), list(arm='a')) contrast(g, list(arm=c('b','c')), list(arm='a'), conf.type='simult') rms/inst/tests/bplot.r0000644000176200001440000000046412356322100014473 0ustar liggesusersrequire(rms) x1 <- runif(100) x2 <- runif(100) y <- x1 + 2 * x2 + 3 * runif(100) dd <- datadist(x1, x2); options(datadist='dd') f <- ols(y ~ x1 * x2) f p <- Predict(f, x1, x2, np=20) bplot(p, lfun=wireframe, col='red') bplot(p, lfun=wireframe, col='red', xlab='Age (days)', xlabrot=-10, cex.lab=1.4) rms/inst/tests/orm-Mean.r0000644000176200001440000000061213751560004015030 0ustar liggesusersrequire(rms) n <- 1e6 set.seed(8) y <- sample(1:5, n, prob=c(.1, .2, .35, .35, .05), replace=TRUE) table(y) x <- sample(0:1, n, replace=TRUE) means <- tapply(y, x, mean) means dd <- datadist(x); options(datadist='dd') f <- orm(y ~ x) M <- Mean(f) lp <- Predict(f) # same as Predict(f, x=0:1) here lp lp <- lp$yhat lp M(lp) X <- predict(f, data.frame(x=0:1), type='x') M(lp, X, conf.int=0.95) rms/inst/tests/qt50.rda0000644000176200001440000004267712365507240014477 0ustar liggesusers‹í|\Ù÷øÌ{AÄDDÁÂV°P…kw7¶ØŠb+v'v+v¡ˆt·„]‹ؽþÏysgxVŽÓqâ~¼ðê=2Ÿåcÿõ¢bmv¸*´ô‹ågÎ÷³4Éí:å?°·fQ|gìWŸíWñÏ÷pÖ܇ÿ…{þÿPr#Ϲ¡×ߥknøC‡îY”gcçÜÀù_¸wÃƳ»¯œx?;šò È¦_æ÷ôͯ*¹½s¬Öœ€Ù´âl'[W”çã²?ϱй×J¬¥sþ¶l\—µâ]Ð=NàcÙ˜fÏ­ËÆõ4Öo®Xô²™÷³E?Í=L¸L¹Öå²çiê£s¶ä„»¶þp÷ÑlU Ñ_Pq™<'–²2˜š0~²¿¸þÕ±ÖÆÚ +ùŒ9¶4æñÙ<ëgO›øo”ÿ’_úOì-ך|Jü–ŠÅàáU‘µF}$»¦2|šq‚¬æe{ë³gMÜÿlñ±ÈýØììÚßµÝ î[~’‰úûÎ<ùƒléw²u\¦n¥"×¥§oų‹þ ˆÁõ’i6k²+b¿®–ñßå¿SD~Ñô‘ÿI¿.'¥ÌfNnâ˜ïå´‰2”Û¢Çi׳¿ÊŸ—ûJ⳦N wKö\I6O‡UO%{'Üq?v·"¶ßõó%7¼ Îû^¾)§"gsƒƒŠËj rÊig<·EGþÌzmwœ=KvkD[ çùÜæ<åøëià ’µ¹-r¹ø>Ovp²ëËÉÏí½ý]œ5|žËz^ùY~EÎöWÝÕ¯Z“[XÙÑå¿\þ/ñý~ù7p4ÐxýRѦR~ë¿’óüU°ÿéïÙMÎËúäÏÿv‘Û·BXKq™ñM Nàâ ÂÓ˜óÜ·qŒ÷÷Ë?É?¢RÎL¹âïœp.Šá4yƒì±H1ÈŽwä9yÌœ]Ñјó=ßC[ÉÉß’‹þŠˆ¿¦ï¢™³¦\KMöžOcýøÆrÉsº¼Æù¿ðGµáŸ[ßãGè­ æú9?“þ‘;Éþ÷îXÛsvEŒEr²åÚöÏ.wü3v$»õÙ=+µŒåKÞþ¨¾Ö\ó½û–ÃÕÔwÿ¶¿ÿ¨.Êí\š'æ<5i£È¦*eïæœàãŠqµ\æoiäëy êŸrßþ^BìS~gOmU–ŽÆ³f« _©«—ý9Åq].ÓGÓ„%Î×Ó€™¾ÙUW†{vg[-°•ÙÀŸ1;˜Úª*øb?¯‡æïê²£«žfŸŽl\ô5i,‡õ½=äxj£ƒæxN´Ê®~ÏoËiœÿÜïí+çmós+#ÙÝmN°UZÆä¼¢É碑ËzvïÙñ6¡¹¶ —YH׈߉,¸o¿g~ïŽD^Ö,¿Ú‡ýňûoM[¥Ôx×ô·}Úªf‘ûŒb>ô¼!ƒWˆË¼3î[ºŠwÄqÙó®/mü­mÍ÷x¾Ö?8!7C¹šÞ ïXó`-‰µ'ÄØ¶ë­dpèâo •ÙœM‘Í»B6_¾.7zAÓ‰ý•ؽôaw@9(ú ^]Nàû*¬×T`g.®q63.³ˆ²KòAòr‚Lˆ2kÈeúÆzlLn7²ã#y•Ï%]”‡Ë”²¿¶ ®¨ˆïÛr·’-’7’ëÒlˆ—¨K̸ÌßúhÍ}5¿‰ˆ}&Ü·tú¯k«ßãÇïùr¿²Êï\Ž“ø®Ë}Ë#¢Ñôò;»…Æ}iþŽ^WvN>­¼jòGnjv_Ù3éIâc6ÎþQâ ªÄ×fÜ·E_FÍ"Æ¢í1à2ÿB´Q" ÍøC¤è÷ätOšzíGùí{÷.Ë ^v¼« ¦6?$·<þ½yÙéÕìpýÑxI^E;[ˆUñM¾ûG«è÷ˆzTSÊcW#îÛø^¬\6÷£i«¿Wä>¿XDŸKœ#‡£©?u¸L*;:j¬‘‰>ܦêhô‹úX¤—æ÷ÍžÓhifl£ù·Ùm9OmUÜWSŽE=§y>M©Ò˜§'5ý‡ïùæxkŒÉmªè§ŠtsJ¢ßiÌåî¼¢MÐç2ã6= 8¢MïÐHã݈í£ÐØ[ÓG×Ë}K9ÝD}û#vD>OsoMYÿ& »{ýsMÞé!Âé*æ<7aíÀ 1DS¬±.à„x2 ëe¬M°NÄ:MÕt«RqKgù[¤³N[ëu=;7ý=T^anÖÉœ·9bÇ©ãßÃ#Líÿ«°Og¡86k+¬S±¶ÆÚǶ`‹{«ßÑk_¬Ã±†c as ç› Š…:â± ÷WX§ ù Äv-ÖX¬as¶É¸í/ß +ùnÇðqâ‰'bKç ÅZ†|‡“Xq‚ݶbg%šQœpcQEwFƒÊøL<9S ߘ™âÛ¬bç©y¯Æ`ÂëòÀv·@5®)BUGÚÓØ:¬wq âÉaôòÃ÷ÚL~'ü}Ñc:ÖùX`=‹c®ØnÅÚ‹ÁNägLÃql—#ÖýøþHÀƒÌ  wŸC…VQÛ.XG`½#Ї?Œíx¶÷f–üI6ÏöÅûT¼Å6ëH¬çºs>8éË#\Úçðõ°ÿ¶m¯:ìÎæ0|ŸcE^ã ®'¶¨pÞC|‚u.öå裘ÉèP‘Ñyš{†ÕüsÂåV¢AWá^¸D\Gù•Yœ nCØD;ʯöPŽÅ=è vœÀï{Øýà]ó£±nÏaŒd”èO±A†ñ]3v‡$·$oU9áwÿn¬ŽaüH¿±0d<ÚXY~¥ ³jž¤»òÂêŽg¢õç±’|œÎÌ·ÅvhfÛ~ØÞÆŠôä‰+\ÔÏ”_úÈ r¾ ûê 2 Æ—gg&:Ñ\’W¹~ß[r™v”ç$ݯSƒ“t]><7w¦óN3ã8›wxt¢Ï0W¸yœ_¡¬WFÙR NÑ#z"Ÿ L7CØõIÌášBÂÝñ•±Å³)ˆÇ62^D\ôö ôÔ%¹.†êm †xÏu§>žË[ò^>„¯ƒ¼Ï#®È»:xŸHÔ¹†¨KÈûzhj/ÕgÔ§SŸdäÂF½eÔ× <(‡p\=” %Ò×utÙ«(šUØ}a-—Ž,Qa#Ä›¨xä×+;…û,Õœã:ÙãžKIwOúå¾#>ÅQ¯–Æ5<Ò­ò‹nážUÈÛÆD”w¤¡.Þ;}k+3~DY7}ÁîÅžñgÆGtç¤Ï 2ÙAØj@81Ùpd|I´¶bü€ü© ûv_´Žr˜HõßØÚ±}ê2þ$ž®(È‚Ñ%DïÉõ¾1ê½æ¨·ô‘už ç°ÀsœÃýÌP—ƹ|o—tn0¸¤§Hÿ"o¨ÿͺo²9x?j}\YS?“ –b¸b|Šøò¤{IO“]ÊøÛXã|t$e]æ3š ŽPë”Âl-Ê”Únâ~uQ¿ê¡]«ô7ËñDš¸~#Þí`å¸=Ú9žÉv~<Ÿ]qtwÿlQ“L𠝵EÞ ýIöžìª‚äm;º•Gú)N1y¢3£-PÅ3]‹þzÚåÀˆîéÆÀ­þ” «ô2üÑ(¶zxVþ(;£%£­E>VÛhÄOÁx%ç"ï*P~Çp/;?Þ™ÎáþTdǰU>b4F;Ç£Ì+𮕨;òãÙj!mpŸbh‡õZòd{‰Ÿòç\õV°»S ‘…Æ 3yéÍžõY¿ÛKôÑOÔEùê‹:©;òS»žxäU¨7ò£ ³`ÄyÛXKwY=ÓùIèÉ7¹Ãúˆ¾hGøŠ>ŒZ‹õ˜Ð*‘7 ¡?¡ ùèû(é.P&û„»Sá^†äß×f´!ÛLú|â?Æ“äWü›t:³˜35øhê`S¬Åˆ/ñ~ "ìè3F]f,Ü'zvêqsä+ä{ô±õð+æέ¶K(ߊ=B?|hZß—p™ß* ~&éÒÁŠÒ\¦>>Á©õ¥â3ã1–ŸµE¯ÒYHŽD½Nñ Á!¾¶hÀ7eô'?¹0;7Ñ‚â"K.3/KòWR «¾5—™KíÂP†éQO¶‡ ƒYFxæ°9EØzÂ÷4Ã˘ÍkÄúé>ˆß‰§).@FÒ”|u‚/Í£_¦@Ùá-šT@z*QŸê—Ãw´ÙʲœÂ ï§ùÔèãš}AÒ.öÈ‹þ[9ôŸ”G¡/Dú…¯ÈX”|™Ò‚MPßùبÉ6XÌ 6´/'èž n¤×ÉoVûÙÈ/J²×¤£Ð>› ç<8WéoGz馢x†ô/ò¼áu® ÔÍ:H ]DĘ||¼Kê&qT–|,žhM¾épôu)æÅ1]äIs”ÒÈyÏé¬Oöã=]<‡ò£Uˆ°sMÐ)f#=0®RûZ*ñ<ùßö¬¶î–èÅE²»#Ýv1—º`6ײúyjÝ£ —²w3ç"Ú×ñĤ‡,Ø{ïìAµ kó|gN=Ö®amvúš|’[â]QÇÝÊaoyE]–ù­Ç‰:¤È5«Xkà)ê5Q§‰þ—¨Ûèýxµ¬_—â2uÊ:O~òù@å*Ä‘d×Õ²¶L…:B—öX%¼" =SLŒ±0g×E^RVdSë•`xMøWvž÷|1}ÄUm¿ yÓõ†é.ö÷,&ÈCÐQ O5GY*íÎp¥³×çÔy žbCÊs Sœl"ø…ÉF£¬¨ˆ6buy>¤CÑœÚ+Öqßæ•DýXA¸3u®BÓ’=Åó©ce|ÖGzé¢O¤¶gƒÂý„³Rœ®Ž•Ñ&*ÑgTìÂkyÀ ¹&Œ $³èëq¸Lÿe Ãå$å~h=Æ\”ßÐAÞág ðøÓB£¶¯ûФ££XÎI>·:Ï~§Z.ñÞyÒ‹èïðlcçBZJ9YâÕ¢ŒÚ"ž¨;)ÏP‘x}pâZí¦ }âü¯ñüho fq’%ôÅ 2ööò8®";=ZˆCx¶’Þœ:7¦Â;·ŒÆõâ½™ }ÖE;’—ø©$ãaw†_aÆ«H_NÔÄÏH}¤å"Ô1Õ‘o»¾ä?`%ûh+øtÆ‘ECÙ=â^yð®ì蜤+º³x}ãò;å#”$ÇùX½(ø5:x%Ò‰"ðœå(d#×)%иå PÇ룾åÉ ž@­ Xýe܇éswJÊ,f2‡tV糘=ÖCšš#- PNk'ãQÂïMý눟%ÒA‡ò<•Q¼È§~Ë1¦È‹þºî¯$C:HÌy’œ‹…Î^]£’[›Í±ZÝÂߡD~R¼æ)Ûm¯WßÎUW¼kµM¤gÔë<Ù ŸÎЄÉNmáÞÔ{"Ž<åU‹fƒOw÷ vßÔG<]‡=7ä2ÿm*Šá‘Æj»C±Ê3¶o?LšGq†{'‘r¥õ4`õdsh| [#Æy3+ß„-ãÔ<®žOý·,šGqÂLuµØšÞì}Æ;ò,OryFˆéxÒ•¨§t þ$åmI†G—ò7<ú¹Šñ‚®¦<O¸Œ£Üð±ØW\ˆ•è÷ð邞à˳9¤sÉ·B?HEùhŒ¡õ1†Q"^ôoÄ*‘o¨7TÅþä1®Õ!ýƒ´Ñ1Ãvƒà¨}²¤÷)ÎŽò¾Ê>Xñîèw)PV(–å¿Wñ@Ðo {¦PŽ”·Fþ7@[©œ+Ä*{S^JEq1Úeö½Äw¤™åEI²¶ŸI×  ë¡P,xboŠ‘Uû™/ú‹b~:‹‚t!ù~½\¶\µO0R ¿ ýg¾°°†r×:ƒPà °^[%É-ù4-„…¿&Ä”×%|¿;†¸©ý£Œ)GO9^ò§IOà™Õy€G¬ö&Ÿž|ºV‚O¨Ö¤ó3¹Ãs(ÿt¨+Ô9Ù«,϶GuAÐs<ÅîÈû<ò…â/v”CF;¥¤<(ú*sÁ~RÎ]‰¼D>·ýYuéÉø·&ão‘Çkö]wF½©¤Ü'«Õe5‚ÉÛFö^C€gÛ‰“ŠÁBý¦úKðLPælNJÄU˜àgÏM±E}<åÕ| °†t¨›÷MQç~UQü0”_iˆú¶Q7fˉ™c«Ñi/<ëQ.°:($4£:Dû ó¿x˜û„Ï­d›”øöµÊ¡åRe¼2ñòÃq–G%£}S"ýJòBòM÷8„-&¤<ÅAäÃÓ^"_ı¶-'ä2Q>9”}йT_Ð}’]ëÁ`P!9Ü'ì­Î‘Qnh€g#Zyi1ú ( ÅPÿwE»‘eÜí‘òÝ<Ž_‹g¡¸³ Æ6ª2ÂyÉ¿¢<ÅVä;`¼cŒtæQÇ©í}SBÙPûæôí yŸäO­³ÉO#Lü‰¾ª’øf‚ SIvÔüOyÒ6ÂYÔß^È®’½i*è µGýh›é·œ<⤠Üú ûUBŸ*Ê™žE}–(_ÝÑomJß¿pm%ÄG…úT—lêÊȦH#´­FôÍõî~K%x_è'YÖ;Ìü.˜ÚÖ!é¡^P:3]UXjIú}’¶¤ó½dLIö„¾e¢N¡Ü+ŸóF¼ÉVÞA;ªCzè*Ü™ú{ÂÙL´H6õR¿å? ä =ŒKLú ïÆø¬‡g㑦:5…>äÍŽèÔB:Y¢=³¯Æ#N*´êï/MYýCV5ú(·B1 ÙòíèNRÎ}éqò‘è;ù&ôý„â{Ê›S @߀ȧDÚ裭4Æû3A>#®DyÓ)ÎñºC=ZÎA°•:èï*O6Ô ékŠrZ€ìÅËèóñ”o!‹÷Ç!òh{Õº’ü5ŠI¶HõöUçJÑn©¿Ò·Â ‚-WÛ-Ê=cü„>å¯Õ±X}Nð›‘FJ:åI–›>K^ôßí1†hN¹YÔ)îB½Ñ˜ÎL4%[Mô½ à¡H⼫(P““þÍÊÏ«ðî 褟HŸ“?E2LüùP°ó”«SÇ~„+Þ)ÿ—`»ÉÆ)XN éÍ£üÓ·U>û–Š›ŒOÉ5cµ– OêüÝ-ÙóåB«ÎyR)ÂðwfãT-ؘ™F;B€§^Kv–b²V™}ßÔ"¬­Åp¸.€á#¼7Ü·…Í“žÉ¿³žäˆù€û .+Fäo¯e/ô-Ö™=#ãòƒÞ¡ c6s‚,ØCùÓup¥aŒ×¨ˆß#‘—Ô߯Ig“\“î¢5-íH_«N´_¼fŠ•îr\5ä#õtE„Ý‘Q:’¾¿£>å @ÖAž±ÆØËeËç]?+ø³:”ÿ!^G¾°Cß‘GB?]ðÓ¾)¤?I÷àÙ„VÍ,0ÝÅeúÕ—éSÛ sÔ¼ ÷1®j<“>˜ÉÖtgë4c˜fÜ7þ½zÍpyvÙŒ‹}³™g§±^3n { ×è{,ƒ Âþ†ðÞ¨šì9Q¶ü0%Ñ·]-õ¹¡N[Ãüò(Nµ7A’œ¡Ì\@Ù}…2žíâä±ÒhÇtHÛàâ%·%Çè÷$ â´y¿é£¬(Ñ'³ ?õ”:ÿ@ßàˆWPÖU¨;xïŠHa å!I÷S,’_m5ž¥ ÚÝ‘ˆ_uÄ¿êóV§„ßÐÖÇåšGŸ@ÏŸ}R=” ]‡oYBw;V<£ 탅™Œ_ÐßW¡_¡‡zPm°)êmÔñ&”? Ÿu¡0MA¹0ŠÉŽÝÃ9ÈÏ:G“MGúªåA^IþÉ_!?…ìùg¾QPŽû ã%”Em!g¦ö „‹¾yS>ˆ~;¢ŽSHÉ¿ý’ý~¤»Õ1Jv¸4ô¥ú¹)k»³–`Ñk ¡ªï™b€U콎 y¥þ¬Í#ëלWµkn<óÁQVù#?©y‰tzUîÛœg -û6ÍúL¶©8ÂñE_èúj•ÈGÁûJ¶Tžó$[4š¾G»±=‰F”ð`wIúw%£G#á$; ÔKzˆ¿úF +ä7Ї6×aç'½9EàÕ¸§J<U’×HÆ}¹LýD•rU½¸LŸ][¡8¢G6ýäCÖטc&ÔxvòûJôõ\…³IëQ¬›20ë¹oõc$‹ŸÑçW îU4ÉS¾ ¯ò€ÔWmTÑãB|(K”'Ê*V ï<³ºÈCfŽ‚/Å£S]û>¬ ù ÊßRœi˜Gˆg(—A¹'uÜJqôY!®WLj£.}/¤ïõ¸—r —™Ë¡J>TkÖÒÍëÎüsòOÈ'š¡1ߔˌëÈP±gòŸ(‡L¹8Ê b<•§8³ÿèß¼€C‡â âµóBÌlBöw&Ó©”SÃy<ú5úè+ò_ñ¾ˆ¶¤Ï…œ…ú÷‰^ÂùÔßèö2x”“F96D¸åW j”¢\Îx“Pî™xs«€‡ÅltOè)ßjA쌔Ðd–xƒòä#ª¿ÛÕfs†´Qôdû¶Zœ£³Ñå»ÉZN9OòqÃ…x“§³ý鮘mÔAÄe%Eö-”ä›òÛ¸—>ÒÜm[ôKu¼ªK÷€g.€öļ/ÒãGu~q‚p‡:Nà·ãfÑBià{èL¾‚Ÿü¦¬µ~öt#„ÍEʃ_ì°Š%{8_÷î½*­ ‚³­=üt‚ºÀ‰5µÒ<8 ¡½“Ö/³µ„€çK3VBxãÆÎ×^BòG,-r‡¸Ïæ[šñ§æßY>§$$%}š™ç¼.4.rï|ØÕo-D÷íÛ×£°.D=œ¾á|XGØ~ÉÓ©gåv?&ªœ!ææœësà?&ªÊ^w ;æTüàÜ7üä͈ÑiîS±ìúã Ü1=¯Órˆðï»å™auðÏp½11¿B·6¨P»®Î7t¸¢ÎÏ(vÉÂûmšá;þ„¦ôFÞ@t)¯Ú[÷â9Š6¸èãØ "Y>Šrhþ—;/sñ}Htá2æì…ˆÍ gî½þ÷k%Ïs¨»Ž9Y75} A5«¼·š˜¢n4 ;ìR¬;>ßx# N]pi¡'ø ¸R§]‹PˆéÔ©‹ÍÞ^ò©Qû Ùñf?7‚=—7x»- ü.žôž[ð9DÔºãsêÊ,øÔ¿Â|»?á´§ãý­Õõá”[„—yÙ[p¼™Ý:ýµ·!ÂýžÅ™¤%Üñ/³òîàû!dUhühˆVþ~XËíp<âC%ÝÔ8{kÍÒ–¯öÀ©v v,PÜOuÚ1½?]Nˆq›Žn¼½ð>ŽçZÁu/œt>ÚβøFÕ/Ô¸ÜsÞñÅjaùmâ=Èaô2Oð[ï6åá 8±L´ùs=º§M©Ã'!¤ôÚQ I…‰^ªãë€ßËyI×F/ƒàFwNO+ÚÎnÚ´­W›=³aÃn“Ó!´™Ýúëæ«¤{ùlçÛ9ñ/·×‚°jÕô6‚“/úYê~‚ܯ‹µo g2¼ï4è á…úÙ~µ0ø8püȉëywäNõ|9^w [¢Ä?á‹ó$|1³åÚ£ÀY§Gõvví!gŠx®Œ²‡sï‹]ÖBSR.Ì8ôÝK7]';uš`ç2¯N ®l·‚×(Û¿xp¡¨!`òÊ+oƒ×B o»ç-½} `àFo]Ÿ€GMË÷Á–çK:´\Fø˜w¶®Aq‡†/ L€à"65¦{O‚ŽéyG¾„“3tn¤vò‡}&Ŷ`+ñWðœâ¦ÏßñƒÎë±­­m½“à´I×UsÉ|ùòè¶ZâÀI÷wLÞS BßÎ ül© és†úôB¬‚ö»Ô¯÷:Õ½í»`®vź»Þjðiœ\sUµpÚ°®Û’ཪ¨›o^8u÷Ři¦W!¡[¾{ÇBȱÇ6‡öÿó‹—\:¢íß™=L€ÀK÷;ìÓ/þŽ k.¼ ¡}Ê}pèYbÊ®Úp¯Ã&Û`Uþ]£àSÿn•õ+áÀ”µÅ| J¾Û¦|™ª¦Ëä—àínÓ|_aˆ:<ùÒ×EBÀð?÷8v=‡*¾ÿt¢l789b@_w}[8fn^.vâ=ˆé9}Ö´‘Ÿ Ä£ EþS!ÄO«ƒlzAœÅ±:e—â½]ÐÙ<©ÞZ{ô°ô|¨o ô¤QgÂ!æÌ¿•& áˆo»W뫆 ½·Öy ¸ºÁøÂ•ŸCx||¯&pâ¥AûS=JB„×”Ca/ºƒoóÀÍfBĸKo–ì A…êí¸í§^yU?©êÝíAE«êµ NØT›w«JC®±ÑÜìR2<¯òÇþõpØÌ"vE©.pæPßó=ýQ~ì^&vÜû†øçÙâ E X¿IçÚ·ú@¸î»3‹BÀô;fCv<‚!~)N†Ã‰Íó[¬9 !qGûyܶƒÈvm¿¤Ï?ýÃ+Rê­„Ó…tbª,ý§FwŸpý9œwËoÚá—j=Ö¢ì4Ù{¦ŸýÕû°§a´ëýðP‹«1Òâ^)|»cÝÆÅ ºÚŽ®Ï^* xiÝøÝ½aŸË<è<¬0œÙ°a“ã£Ó¤_¯%Y|€ˆ"NíU‚€Î¢RçHòâ¿Æ¦]¿+!h×ëc b÷uú‡Cì:¦þ›§Á¡Oå¦]yŠòúbäì¹<„‡øYáG¹ï‘P‚Æ—6ÝüéDîòéóÉù×bÍÑ{%Ûü¾Î*œÐ¿?/:ÿC²_æ¡î!hô7|?‡@W·  ®CÀÀšGR»€ÿ“Öƒ§ýô:ä§Cž—£I߉H ~ ¼ìlƒòÁ©áÅÆ­ÏGúN­ IOÕÙ âMõt¢BÈ;ÕÌ¿ên„XýëòsÃI¿Yz»¶†“oµYSq4ø2hxW‚Ñ©ÓÈ÷EÒ—b0n‘£CË ð9æTµòÐ×°Î9yãÜÉ'è‚8 äàÔÚµ›»^N„ðÉ·šÖ) >±}Â|<Ñ®W\ìÕ{‹„| /·à鲿N†“Wö5‰™G¬­ívJà^ Uª÷ÎßR^NñÆ+!¸\Ñݽ—@Úû§[JÔÛ'†??ðÚ´!„Ì+ØÏ¢­)Ù}›[­ÿ„Ã^.º¯k†ýãŒæ¯ª×î¾£Áß?ßÃðƒ«!¬_LjتŽÕâÁ»ñ[\!dF¾†ï-œ P9ÿõ´Nà_<Ô¦SÇ´(Èü„ïÜòr*ÊKz߆M t‡Ø÷¯¯ {áö–F¡S7@è Aƒ>ÌÜá;t»×ÚûŒ¯à>½·¤‡‘ŽÝ[=BþnYäØ_ÖÉåœ<µÛ®’ºòMŒ…$¡ƒZüÍk©MCt؇ƚ­îÉÏ{¥6"ÝdÊÆ¯$ø¸ÎÐìãG¹w.ŸÛõzú‘”2šó…ý¶žÉǽu†àvµZÔn¶ûŒ/]¦Û=Hö¼Ø¡ß[ˆ[]¦D/öúä佤±Û´Á—à&?ž[¬NÃÇrús¥‹¶!ÙÆÔ¤õ­ ˆ8x>C•ø Â\©qäuHœÛ‹«8à%DÕNù8¼Ölˆ·QßµÁbHôm÷ô¸Ã~H,ã¸åp‘BÜÝÇÒ?&¹@Ê<ÿφÏ]+tÐ׃'úï@=¹fs㌚ºpö©KhÛmÎÒºõ}žh(ñgãÁ-L¦Þ, ަΞž+½ ü³¾QÔŸ%!Òõ°Ïô[C!œ‘Òô`+nryNéŸáõ*x@ˆÍÎÒ«>…¨|oW»Q¢†èe½Bçv;|=Dݬ˜x·ø}{XÏÓ`JD^]dÁò©QðàÂã/\!ìÏ…oü·Jx…TagÍ—\Üî|ù¯z펕“Ã5Xåc’÷}ê)i^T©«Õ÷šO† UïžçëÔƒÐèZ'“F¸CÈ9ÝÃáñ^¸ fï¾oîB¬]y‹2åÆÁòÁî7Ù„±n5ì‘Ç"?TnIWgÞÞÑŽ¢?8²v+wÒ²|ô`¸ÐÕ¿ñסpá“ùUاÐ1ÄïZÙJÓ xDm=ÛécÀ?¥ÖžãÓ iwÙæSÖ€„ ÛQ«=¤|) 0½àåï ''Ý¿ÝÖõ=¦¾¶‚¸B¾o*œš IKzÎ64Ý †sË7 ÂFœºæSÄ|ÞWeï‘?6-m·xç'؉þÍû!iø ß2ËCdën7êö‰oÝ B÷‰„~>óbïÜÚ°ßÌêÀÐy;cÜå½Ë;@Boý{ÍbJÃʳÍ=´¶ƒy¯qk[fêÇ)k ¶è0ÍzØæÉ Í¢¼@´Ë<«¹vIÛÀËzgÑDH67Ïë¢×¶Z[·)[БìÁ#7Éï=pàXpÁ+´Îf_Øz²[F“í¤8 õK8휼îÚÛ òsÂ,ñŸÊM¹³=JÒ—¨o»Lm:ðÛ>J¦@ÿó@ÔîYÜaݬg­­À×ežõ’¤W=ô•ðÖÑŸ6ìãÔð!% d§OľÕJ8[¤H±mGw@겚éÛÝóÒ>E¯mׇÐÛÛ¦žðC½ÞÞê‹á»™pÌ­—MïŽ!%ÃX¿‘໤I™þÍZB`ê_Ÿ÷ÙáÊ«ó†Xy@ÌSÅ0¯'%!y{£‡y7_€„~•R­\gA¼~ɾÎO­ A×óÒ蕽!jšIÐݱ“ þäàŽ{6Õ„ˆzVu¾÷”ü§¢þÍ!nî¹e=¼…³f†¼I}#Ù“ÈòÜ‘°ÓIphLTÕÉï Fñ¢Å’ÍawÌ‚ºyÜàÐåΞ#;nƒÝõÎm¬Ðž˜´~æ°üÖ?8ô}%ðݲ0-ßÁOtÎB±nýz-,ØKZwvå™âîVÇáø’Ïm>¼ _Î{°çQ¸D7F‰¿Dúž5z5|fÿS’Ÿ’²SoÏ®ÊÜ¥T Ű«pðÐûÓ‘û$"ã¨2N‹éþKoË~7êäQ^X/Ù?Ñþâ=ÚßÞ“»ÔŽ;—2ŽvM6rZ™ U·ñnþ’[¯B.ÝnÑyJd¤Ì‡{×—vAXŠ÷)Ã`'Øan^>0ñŽt¾Ø ¯Wl(ÕZ—ôÓµs€¨W,W8½‚p>úm¿Fc0n}ÖõûOqÍéJjr'ˆ¯ü¾–“ÉFˆ:QòiÇ»ï혳§ï‰ÕùfË—ó‚¼ÿŒ'·è¤‘Ÿ!ftÙ¿>¿Ãu©õú&Br‡Ç>o†£ÿe»a5·bp4jæýE÷@xZIŸv·…àù«~ìþbôJmmº.*ÓîôÝæRÿjõ¾]§šBpFÆË6‡P¯Í8S{TÛu]¬ØýÍ< ÌßfB«ŽÛ¥u(?“FÕ¨ñf#æzÛ‡Ó¶×Ú,l[ ’»Û™?]bîz•ö¿w¿âÎZWˆ8cºêžóNˆm¼qá—îÃ!¥Ÿaºçlˆ3L¶¹b‡®¬Úe1Ķ*µúpo€è|MÞ¿?f£n,oÙp¾ºßyÄX•/Ô>Õ‚:TV¼þû Ù½UZÛñúõ’Ü‹-Êëö‰É+¨ÝYyÛ2ˆê7¡lÞW rNÀΔ;}ÉÎOò.Vb’óÌ{üü1$Z[—x¹.¢×uœþvþˆmn²ç¡ "ÜjëYýù/U|'‡¸ SC_&…ˆ¹û•8iKþÞšU›òÁñq~Ó mÞCd´Ý¸Åm6Bä‡Q묃 ‚7ÆK×õƒíoœíO>ƒ¸C,Ëmщýkßô"¸Ø{ƒvO‚˜Ðä6eå…ˆÕ—W;-ɀ׭ޱwT°£2œûR4uÑë5åQ´Wçƒë ÊjL”ë^_HXxy×Ý.á»­Êä(¯‘çêÐ’ówÛ°"¦÷g¨õ¨íìCàù6M7mƒíEÛ¼Êãæ~æÑ³¹“¥ñÀݧ;Ý„˜5•=ß|œ‰Ž®|â•p.¬Ý¢Ê5íÕc誦s Ö`Ï7¿aXÏÀiÿiˆŸ»rˆáˆÍŸúáR­çé˜Þqè"ºÇ ûú•‡ÄéýÊñC@ô ³´‹fa–·Ðõ†#ÊÃ9Œó-æN‚Hÿ~z·&ö‡¸VN~—Ðì²0ÿÁ*ï ¡ÊC£º't!îܺô#_}ÚÑ©ö8—o6»($UP$ Ñ[ê>8a„^p ÛºœJOhÐbŽ Äy˜&ïÁÍ'éç¹ßˆâþÔ{{óAÀw+¯ÛBñzËŒ=)°hýó%ãç¯ç|íçm’ô ÆiË«oôœhoŽê…ê?·Ljé—Fgžm’ì_xzÚ+qûÉ/~¡þtwÏ—àÑ^·^°±–òD¨'îOèrôU1ŸW­HNçYÍW@À¡JÅ¡xëj‘f!æÏv'W:÷‘úÑN˜úúH(à|ùkûÏRz²@Ð0]Ú×aÈ“WRü€÷pöÍ1Wˆ¬¶%}f{3ˆkÕ¥têü÷0d³…ÇÈÉ(Gž«[[ŒBÿ5tŠ]õ¿óÖ™ŒÃå!î¼OšO=ˆ]5©ðAQgñSc×?w™[åé¥Ws–N„„˜ë[Î—Ý ñMœjwÛÖ ’]ZÝíQ¼–D¿ð—ׯ,)â go”[RqM#Io‡Žºûg†ÏˆÝúչߤ¦ü £îÅjä-OñU~ÍÉp$iÝ)U{/i<âêÂ~ܼvý!dåéÓ… äñÜCï ‚°Úw{VËóÂÆ¸fô9þBG'5ù!Ò=`Ü:+va+hÔnûg²/ŽºÓÍ!¬qrõFµ”úÂbÎÅ]u!T¯zþ¦+¢ ¦QïVå›Aðý+}š·Aù£&ÝIù¥àjWŠ-|vB­>U¨Î™BÈ‹ÝÚÖ€¯_¿¦—1Y¥µÜíÚ•ŸÚžâ²­*LîK甿³¸UzßñNµ¤Å±êòùÚàH-ú Íf_¿"½£_Ñm½õÈ,ó‚ßÕ¹;S:¯h—s:_Nû‹mXXXD!Ï‚WÞ/¿o 6({ŽÑ8X@76ÏÎ'oÏèsd9[>øÕ8ãßdŒUÚ>3ëR&ïðªYàEø÷ÝtôUCùþYðˆ¨QVÇi[Cùx–ybþ2v‘kÙ[ç@D“ı«!òD»ME†~¥ûØ6ò/ï,m¼ƒyO³#¤÷ðÈR_>G´‡¨7m .ÇAü‹ùuN`œèwneJ?7ˆê¹iyYî„ítYk –––]7¿Ïô|R¯>šR"é Ë»EGêg|£õGÐ^þ¹òÙ…&7JÅJzBôëQß}Ü5h—ä‰|!“{)_"úåÁLž=s—ï;ˆn¸êýª¥<$8&í›só&åeϪ ©ç·õ-Uãå‘KwOï`1ó;LNKê 1¯ $ª¸"£ÖÝŒ pcoƒ‹è—=_øaøNÄ—wêzàÐy2»ã—½]YcœÑ ¢óëTþîwåýŽT(49éŒk:4P[µ_Ò-©Æ—ïŽù&Ý€ðíæ‹bª„¸ GÖ¯‚v;içÀÅ9d¡;V:Š~pÒæ"iK•’ß,æ‰ö„ /Òý<Ñ¿<9æióÔ‚=:C+>-Mô·_³rO´Ûâ½ ¿|½±9/ùé^“o 9бå}š[&NÌâ¯GELíÞø«‹§HùxŒoþ¬›Þ×’· rg×££¬`w§Nî;K‡a¼<,ÿ£C×(ÿ\mü)O龑OZòÊ ;öž™æs¹¸äŸ‰z)‰å;ðþgV5FùÊ«uøÁê¼ÔÉ·“$üqþž©%Hü$®ùL|óL{“7ìl}2“.có8îgÛ9®0_õ«Ô¢~êÓdFìß°á÷GørœqÙ+Áwœi¿;žMD8p¡S§.ŸÚdæ+}1Ž›|b êkŸWÆÑ] ›„|V8È´ò¬§Ò|Q>0~ÑO)qV·/ZøCá®$?%—ôû,}—ˆ˜éøó'Pâ;?Þ ’æïÈcW ¢lý­‡5‡èµÃô|Ã+C\Ã4“‡#R!qÆÂ‰;Fhå³³Ö^S ©êç¤å_Aävkãu fipoÓY/ aB_ÛcÎCXO|6K_@jY½ºóCÐ^O¯1êírcغ²Jx²jƱ÷ƾK¾=ÃÕ’†3îÍÎì¾{ÂÑ9¹¢ÚÙö²¸DÎä8ﯗÔy†bå×AHlÕNŠCÌàU+Ïu’äÝÿ¾ƒß¶Î\<.¸œ'}_¨>êôH9¹aN Æ ¯k_;l?“î«KÕ¥(_UˬS%ˆÐ´×âÿ#Ž.‰ön]ðàw!Ⱦű™N— Ø©gȦ¾“%ý¶7ãþ±Vg%?*ÀÖåBë?@¬rKõ-!b¾~Õ±Ÿ–CxPX/Ë!k 4|å‚õ{OCôá^ö»v£}5>b7vO}yÆdvÒ\óÕ¾P&àUêܴ˶«!ÚÆÕrDÝR\y©…¥µÑBˆˆóìc±÷*¾oZKB½.ÅgtÝÑ_<ŒƒvCôÈ"ó <ÑϘx©óí‹ý rèHû9­£à@—†zõ6Â>«^[V†V„Äûã?­v‡¸‰³ŒlWv„ˆƒ˜zœ…ØýQWV»@yÎ^SÂÓ ²ìæéAègFÝóX—áÜ¢–{–ïÒ#ˆze½nÐ&|Jë´âzDïp¿|åìeŒ+ï~°n{"—l¸±}Æ­ Ÿì·{{vN¹Ô5Ãù&_ù xúoòÿ¶uزâ{”ýsªu^Œ“¶=ø:±!DÞß볯ü5¦á½ñþ)aÏ–åÃ%¾ÏmËü:‰¿DÿAlE¿LÔ/ôKØ3º@Ôã¶ö ÆÖŸjÕêO±ÌôÛlï´ÐYœòËü6Ï0ÝÇõ¶Îõ¹’÷%¹à½{,JˆZvë‡éäñ¾d«N{µŽ{s\!ûqcäýp¿,úCŒ/þ&\mëÿ.žZáäÚ¿Ž²7mÌGâ³Î®ì€(×’î'ê|ѪOòܹ>OÄ «Õ.íBìÞ¤![_Í‚˜–ÁÎ7'Ü…ÞpNÔ£ËyòððV說§ß}ãº6<³à«Ïíj4³%ÉÁ&£ÚYü6±ß{Nq˚ƾ’]õõ Çô·›ÆËý:Í8qc鉷 üꦎÃß$Cd>—5½ –‡ØËž[R?ùAHrѼÓÇ/†¸¶ª3Û•Wh?Oƒ'(¯°Á)ðDc¾«Dè|Š;íÞB‡:lZ¾à³4sÓ£j›fó ª”Í‘Çne9¯ë¬{Óæ-ׇèUUï*w1“®ÇÒߥº¢]8ž¿ñÃPòOÓ] @ô¬3§K†èÕn ×¼!ºä-³7gÃŽ~ûg÷êýâj†nÚè]ˆæUY²¤?$ôTµø4x$$/I©Ø´ù5HjëÑ"Á¥ $*”ÌÏj þMò·üºß^= ½Ö>n¹õ*4-h„”/ŠÞÿÁ~`‰ ¡k»ÕNΟîÙïBÕ›2]Ëëõ« ¹«ÃÌíîJy¿äáý–Ô*¦þ½Íó­› å¹OK½æ“Ðo_]‹/¡„¸GÏ Ü¬2B Ú”»ú×5çY±>6wÚ2¾7Ú³oê^) ±\Ÿ2~ÎC ú`µº•§ÀþðîJ/éCóÇ•9Ÿé—‰~ybý5»3ƒˆ6/K.øc%¬ÿâ;mH›Lz²uì]lÅüP¸czþ/W7PžÞ¦ô5^_懑£Ëe¬º›e]¸qçÞ§:•Þ£¦ø4^;ÀBCÞo®+õ‡Î6{îc3ˆè:.ööøº\(4eZ™Þ¹øÌi Æ·Öp3>¢T£… ÌÍØ«gå :oëöçòÂþ¦Çt{_€¨ùó$»Õ„è”Eáý·®Vçí­lêA¨µ·ñ_ÃVBd‡nmÜÞzß|ìÚÒÂãùOGçËG®-þÞKaÔ•ûr^ÄÒÏ%ÿûLÄš©ž¦\áH™OžA EÞ.^“ &)Тð„Dˆ¿‘tøÍ7p¢Éû¡­ Bôè4èí,<ªÓ3Ç­æäíºÖ¡Ÿô{ªÈq\æGü›—|}öTnûv?›—Ú¸Gп@5iꈟT®p³ ˆõž¬¸÷Ðbפ®\®'ÝWøµ÷ÃæÙÇCàý~bZCÌUû[í*Þ¢{ßÑÑw9éIÓæ^A¾ÄµÜÐÅÔ¿/0´O€ðÝe›_‰»šùû–u]W˜¹´”ì™7Çé6ßœ&í#ú[ÞëN”Ým]~?0xêi éw2ɨVl)F5­åñ>ÂüÜæÁ’–6zO¹nZªå†Ø¼] ¹è­ô‚¤f+þt°5‡ä–—–þT ’îî:÷¨[#ˆ¹ßÔc¦5Ä.Ù6hÖÅeQ»ÏŠŽ]‹C¨ã² ëÛ–‚ðgÍÍÊ]ìáû&ÛÐØUÂ/Y×îF·‰wõ´ô¨1v«šå¯AÈÒôƒGõC„Û¢©U ¯…˜ÛMË—Û¸ÎM:1áƒW¿ðMÛ6L :iëÁJŸ–@ÔÖG[G}r€Ècîø^í 4ØàðtÇÒ}tÑûø |‚ûG>ðœ=ô¡˜U±å’>“t¨hžñó¥ßÓ‰ß0ÞŠ \aÝZ j|×Vú%· «5xï®Ûcõ;+ˆq6h’ïãH¹§ömë3e®*[¨ûaI_†L«^¯úX¤û¨åÎ5gSœTƦ±3DN;Ú²•y2Ä­ß_xÓÂÙóÔX/yC„´O|Á¡õôK@|—Wzuï †Dã7+ öOƒ¤u'ÊüYæ œßÖìÍÈ~8k[­ÏÏ @Ä‚cÛºE¡¿;|Òtǵ ¢ZóÅ5+Abé~– >‘‹o ¡xM¥KqWè 7· ¡õkîªñà•ž¶ÿóø6ñ±è®kWˆÝwo¦É£•÷îpÔñ±>ä²Ï®*i>Ž?ÐãÎTeõÝ<ÚÁÁ¾sÃ-jädUÄþnú‹¦@è_Ý*OÝ0B6lØô¨µ„$Ønš¹§ÉI÷„òG3ï«h›nŽÞÈ—MþêÝÜ ÂŠFÍ¿mñ™âÛPÿÌ?ÑÌôWV¬X¶ó%$xyxẼêóžM°Šüôõܼ ó!Ès»søô†´ï†gÎ'!,Ô7-½Â0õï¾”¶–y'>_ZýS²·Õ°!ÌÓ»ésw¹°©à¦«êüçÔ;”÷~ßù@Ë9ä²½SM]´ŽGy:¦›LªJ|Sò©;ý^BÝOy}TÒ¼hÙºÔcNÇœ 帆<Ä÷k#Náw¤VþiZàˆí…NTÚHóÒÕï]sM'y›ÄZ9^â{šl¾¶wmø‹m’l6:h»Ïd·^œ“'Iï—epäósKG9ž9ñ‘¶{•ÓEާ¼?'üÄþ‡Œ?å÷,?·¶ó‹­œoÄ6I6.¶Úús‚+ŸŸÛ6IWÛ>òù¬åXßå生KÛ9sÂOÛ<9òõb+΋P»dY—^?{Žœèù£ü mýÂÑvï9µ?ÙþÝõr8¹mö¼Úîïgáÿ]ºÿ(Þ9ÝwnñIúÁuòù9á)Ηù#ZízZ㢞–ãyyËBü/- ?êgÈ×ÉûVJ_õW·òœ¬@z͵÷ƒ:,€hUÝzƒ´ãùFwˇR×gè³®sºŸ,ëX¿È—R¿¸.]ÝßU>?˼lµQî´Ž_T Ò É VókA¸:ªÕÀG3ýõÍ÷k%ŽÛžEÞ0þw)<ÅK+¿¦}ÛJûæVïÊ×ý,œ\ÚEù>?݆½YkóáÉGHjæy,EiiƒÔÕ’wòöŸ“|’ËÌ{¶]iõÜ]¶yû^ÚDÕ9j $Î}Ð{îƒÖrn™âÊüö¹–Ê*?zq{û !YÎq‘Á׆OkSšÙ­jf·NzádÈæå'§ókƒŸÓ~òþܶ9á)ß_>ÿøÛ9;*¹Âm5}–ÁéE-=&zEHzÕ·F·—egO€³‹»Lw©Q¢’ú4˜¦ó†òkêñ¤‘ó/:4‘îI?ò•mfÓ»lUÞüiã#ç»H~ª8/L°3œ¬d‰¯Ä{ŒSÛÁd¸Çä8‰ÍÇåñ·ÜG‘™*ÐRšÏâAiß´9Åõš-ÆE}!êGo‘N"Ü4Ù~—eïòqy›L?„´š@ù už UK¼Ÿ&§ƒ›@nÀå=9ê¿Ø—óüwu€ó$~žÛàúÒhüïª4žªû¸žså¡pµ;•r?<Ë~³k[ήmIS‹é…dÌì<~±ÃÊÚ“™çá\–­éòw[1/£m<ìº÷Òº±àú"t¾,÷!ÒWÄO¤¿ø.òƒŒÎÒ<Ž(ÿlç¸kýN¤ Ë<= ~<Û_œ­Qñ¡Yø\”«“/úXâÒ¾)j¹íOùUõ=Êî'×qÚß´ZíOºú½k®ãÆ¿{ïIZúåû3§b)yNl1-± ’–¿|˜.ÝŸx/g}Èâ >Ö‰Ö‰æ¹>·¨×$>éE¥Ö<ÊoõžäoßNQxTqñÎ~²¬ÏÈÿJ›$ÃO®‡sÂ[~Nö.­öN iÛWþ.­ñÓ†ONór:G’¬•ã“ô}|åE«~ûµá-ï—㙦e¿PÆßÌþIýâ:¦ç²à!®K%=gÔz­d_/GVYÒ¿`dßE}™$ƒŸüN5-¤ØX­ñFš–V›>Ê©Í nØKŸmbÏÓïÎÑ•0KÌ¿õ`’:¾Ð…𘖞1-çKøŠçýQoÄ;ŸrµvÉê8cDßYVóβ²’ÿ%ú ¢¾‘Ñ]j5rMzevCœŸ‘ýýJøiËÈóíÚò΢Ÿ%ëϾ¶ï?;/·yxmù‘Ÿýò_k3d­¶ï¹¥£<®’·"Êõ~÷MÉâ7ˆvOÞñèµF­ƒ«j¿§D¦UûËf’Ÿ–!ƒ+ÿ®¢ ß_ÕjÓ¿ªMbm$ó“.‘ãëçÓÓöVÒ© ç/vèÖj`U¸1µKÄÔ¶py•ÙY¿o\]Økßšó]ÎSäÖ]¢ïùÉ·þ8’oŠèwrZ \Vë''éžÄö±šîWµ®“ÓIÔûòþ›ïžO¹ù\ún'³ <ù¸<)~ßE:Šïâ÷±Ü¶ò{‘ˇWk“§œÆµ¼ÿè¹~–.âþ"h×FÇ¥»Øþ]¸9G÷GáÈ×Éßs{¯"Ÿi£ƒ¶ögϯ­½ÂÚóv7¬þw)Z l A])3ô0‹þõÔâýO·¢¿&æÄV´o"½E}$‡­é žWœ'Êw íSþˆXrô[´|¯ÿÙ"·_9ÎÏÉâæ,pÒr·_–~Q³_ºÂmrŸlsw2zkÃ'K+îwK0Í’Ò¶Nî׈|$÷7Äö96¢Êrî{äžÉüÉeÙøÙ¹n‹ÃZýš‹æ@Ì‘ï“Û"ñ¹î¯jåvZ´ÿâyÒdósûû—4-ãÚ좼ÕŸ‹E¾.K¿ÿ4Y¬e\ÌûÉñ‘Ë»¼ùQ„#æµ’Ø»8.Ηóy¸Zwúe÷+ÿÝ’ˆÚ LjLÿN»,ùWy+“ëC9Äù¢›$ÃC~nmrzYO¾¯ü=M¶ü]„#ò·œO½s‚§íÜÚìJ{í[n÷—ÿNA_.Ç¢½•ûâ?ÏðáØï;¹ß%W¥ž§A ¡ñÓÅwÐo½ïë‹Í+þQú]~—ßåwù]~—ßEK©çõ­Ýþ]~—ßåwù]~—Ÿ)ÿ«q_½pµœõwáÀó¯ûw¹è©uŸ_loŸ[[û«öù»¥Þ£¹3«ÍØ’óÌßåGÊß½ïŸåÇÜî÷«øñ¿Ê×?[þísüÿB·ßåwù]²(ðלâ‹VÎü×öÓSï×ÿßÚïwù]~—ÿRï„Úߨ˜óÌÜ•œ~?žãúJ=#'dèžû%Èh”z&©ƒ¼ªþr¸¿Ëïò»üÿ[r›'ûý}æ¿YrÊ»‰þq½#jû)µ^ZÖ1{)½‡,0Û³h/­ûÅñ»÷WÁû]~—ßåwùß)*þçTœ¶:#]F ‡fXõY§ÁèQãÜúô?xë0=v許š=:n£Ü\†³Õ¸¡“²gãþC\FاÿñcGŠËû1zøÀ‰CÝ&‰;Œr9pì¸!CG³½#ÝÆŽ=IŽ^ÿá.ãDôÄNÃ.n.EÌeÓ ÆŽr¯(žÈ«b&þïë×ôÏÓþEõÿ¬=˜º’çrms/inst/tests/survplotCompete.r0000644000176200001440000000257212765623133016606 0ustar liggesusersrequire(rms) # # Time to progression/death for patients with monoclonal gammopathy # Competing risk curves (cumulative incidence) # status variable must be a factor with first level denoting right censoring m <- upData(mgus1, stop = stop / 365.25, units=c(stop='years'), labels=c(stop='Follow-up Time'), subset=start == 0) f <- npsurv(Surv(stop, event) ~ sex, data=m) levels(m$event) f$n.risk ## Compute long-hand for checking times <- f$time sex <- c(rep('female', f$strata['sex=female']), rep('male', f$strata['sex=male'])) n <- length(times) nrisk <- numeric(n) fu <- m$stop for(i in 1 : n) nrisk[i] <- sum(fu >= times[i] - 1e-7 & m$sex == sex[i]) w <- data.frame(sex, times, nrisk, f$n.risk) w ## xless(w) times <- seq(0, 36, by=4) g <- summary(f, times=times, print.it=FALSE) ## unclass(g) sex <- as.character(g$strata) n <- length(times) nrisk <- matrix(0, nrow=length(times), ncol=2) colnames(nrisk) <- c('female', 'male') for(sx in c('female', 'male')) for(i in 1 : n) nrisk[i, sx] <- sum(fu >= times[i] - 1e-7 & m$sex == sx) nrisk par(mar=c(8, 4, 1, 1)) survplot(f, state='pcm', n.risk=TRUE, xlim=c(0, 20), ylim=c(0, .5), col=1:2, y.n.risk=-.15) survplotp(f, state='pcm', xlim=c(0, 20), ylim=c(0, .5)) survplot(f, state='death', add=TRUE, col=3) f <- npsurv(Surv(stop, event) ~ sex, data=m) survplot(f, state='death', n.risk=TRUE, conf='diffbands') rms/inst/tests/simult.s0000644000176200001440000000240612700011055014664 0ustar liggesusersrequire(rms) require(multcomp) set.seed(13) n <- 200 x1 <- runif(n) y <- ifelse(runif(n) <= plogis(2*(x1-.5)), 1, 0) lrm(y ~ x1) f <- lrm(y ~ rcs(x1,4), x=TRUE, y=TRUE) g <- bootcov(f, B=1000, coef.reps=TRUE) anova(f) specs(f) # Get simultaneous confidence intervals for estimates at 3 x's pd <- function(xs) cbind(1, predict(f, data.frame(x1=xs), type='x')) X <- pd(c(0.05, 0.50, 0.7)) confint(glht(f, X)) # Add a redundant point that does not involve new parameters X <- pd(c(0.05, 0.50, 0.51, 0.7)) confint(glht(f, X)) # some differences, but slight # Add a point in a new X space (beyond outer knot) X <- pd(c(.05, 0.5, 0.51, 0.7, 1)) confint(glht(f, X)) # Add a long sequence of redundant interior points X <- pd(c(.05, seq(.5, .6, length=100), .7, 1)) confint(glht(f, X)) dd <- datadist(x1); options(datadist='dd') xs <- seq(0, 1, by=0.02) i <- Predict(f, x1=xs) s <- Predict(f, x1=xs, conf.type='simultaneous') boot <- Predict(g, x1=xs) b <- rbind(simultaneous=s, individual=i, bootstrap=boot) plot(b, ~ x1 | .set.) xYplot(Cbind(yhat,lower,upper) ~ x1, groups=.set., data=b, method='bands', type='l', label.curves=list(keys='lines'), keyloc=list(x=.1,y=1.5)) contrast(f, list(x1=.2), list(x1=.6)) contrast(f, list(x1=.2), list(x1=c(.6,.8)), conf.type='simult') rms/inst/tests/psm3.s0000644000176200001440000000066312246773012014251 0ustar liggesusers# From IM Nolte require(rms) set.seed(1) n <- 1000 v <- rbinom(n, 2, 0.2) time <- rnorm(n, v / 10 + 2, 0.5) c <- ifelse(time < 0.5, 2, ifelse(time > 3.5, 0, ifelse(time > 2.5, 3, 1))) time[c==2] <- 0.5 time[c==0] <- 3.5 time2 <- time + 0.1 time2[c==3] <- time[c==3] + runif(sum(c==3), 0.1, 0.5) S <- Surv(time, time2, c, type="interval") survreg(S ~ v, dist='gaussian') psm(S ~ v, dist='gaussian') rms/inst/tests/Glm.s0000644000176200001440000000255212577522560014114 0ustar liggesusersrequire(rms) counts <- c(18,17,15,20,10,20,25,13,12) outcome <- gl(3,1,9) treatment <- gl(3,3) f <- Glm(counts ~ outcome + treatment, family=poisson(), x=TRUE, y=TRUE) g <- bootcov(f,B=100) f g diag(vcov(g))/diag(vcov(f)) x <- runif(1000) y <- ifelse(runif(1000) < 0.5, 1, 0) f <- Glm(y ~ x, family=binomial(), x=TRUE, y=TRUE) g <- bootcov(f, B=100) diag(vcov(f))/diag(vcov(g)) ########################################################### ## Test offset() ## From rfunction.com/archives/223 and Max Gordon # Setup some variables suited for poisson regression Y <- c(15, 7, 36, 4, 16, 12, 41, 15) N <- c(4949, 3534, 12210, 344, 6178, 4883, 11256, 7125) x1 <- c(-0.1, 0, 0.2, 0, 1, 1.1, 1.1, 1) x2 <- c(2.2, 1.5, 4.5, 7.2, 4.5, 3.2, 9.1, 5.2) # Setup the rms environment ddist <- datadist(Y, N, x1, x2) options(datadist="ddist") ############################# # Poisson regression # ############################# form <- Y ~ offset(log(N)) + x1 + x2 a <- Glm(form, family=poisson) b <- glm(form, family=poisson) cbind(coef(a), coef(b)) nd <- data.frame(x1=1, x2=1.5, N=c(1, 1000)) cbind(predict(a, nd), predict(b, nd)) Predict(a, x1=1, x2=1.5, offset=list(N=1000)) ## Try with lm and ols a <- ols(form) b <- lm(form) cbind(coef(a), coef(b)) cbind(predict(a, nd), predict(b, nd)) Predict(a, x1=1, x2=1.5, offset=list(N=1000)) cbind(fitted(a), fitted(b)) cbind(resid(a), resid(b)) rms/inst/tests/ols.penalty.r0000644000176200001440000000166212615512220015626 0ustar liggesusers## See http://stats.stackexchange.com/questions/104889/k-fold-or-hold-out-cross-validation-for-ridge-regression-using-r/105453?noredirect=1#comment203976_105453 require(rms) #random population of 200 subjects with 1000 variables M <- matrix(rep(0, 200 * 100), 200, 1000) for (i in 1 : 200) { set.seed(i) M[i,] <- ifelse(runif(1000) < 0.5, -1, 1) } rownames(M) <- 1:200 ##random yvars set.seed(1234) u <- rnorm(1000) g <- as.vector(crossprod(t(M), u)) h2 <- 0.5 set.seed(234) y <- g + rnorm(200, mean=0, sd=sqrt((1 - h2) / h2 * var(g))) myd <- data.frame(y=y, M) training.id <- sample(1 : nrow(myd), round(nrow(myd) / 2, 0), replace = FALSE) test.id <- setdiff(1 : nrow(myd), training.id) myd_train <- myd[training.id,] myd_test <- myd[test.id,] frm <- as.formula(paste("y~", paste(names(myd_train)[2:100], collapse="+"))) f <- ols(frm, data = myd_train, x=TRUE, y=TRUE) p <- pentrace(f, seq(.05, 5, by=.05), noaddzero=TRUE) plot(p) rms/inst/tests/Rq.s0000644000176200001440000000331712472463526013757 0ustar liggesusers########################################################################### ### Purpose: Compare predictions for quantile regression between rq() from ### the quantreg package and wrapper Rq() from the rms package ### Author: Ben Saville ### Date: 7/26/12 ########################################################################### library(quantreg) library(rms) ### Simulate data set.seed(1) y = rnorm(1000,50,5) age = sample(5:15,size=1000,replace=TRUE) gender = as.factor(sample(c("male","female"),size=1000,replace=TRUE)) mydat = data.frame(y,age,gender) #################### Using rq() ## Fit model with rq k <- attr(rcs(age,4), 'parms') rq.test = rq(y ~ rcs(age, k) + gender + rcs(age, k)*gender, tau=0.50, data=mydat) ## Create dataset for predictions p.age = rep(5:15,2) p.gender = as.factor(rep(c("male","female"),each=11)) p.data = data.frame(p.age,p.gender) names(p.data) = c("age","gender") ## Predictions using predict() rq.preds = cbind(p.data, predict(rq.test, newdata=p.data)) ## Predictions using X %*% Beta p.gender.num = as.numeric(p.gender)-1 X.p = cbind(1, rcs(p.age, k), p.gender.num, rcs(p.age, k)*p.gender.num ) rq.preds.XB = X.p %*% rq.test$coefficients ## These match! cbind(rq.preds,rq.preds.XB) ################## Using Rq() ## Fit model with Rq Rq.test = Rq(y~ rcs(age, k) + gender + rcs(age, k)*gender, tau=0.5, data=mydat) ## prediction using Predict() Rq.preds = Predict(Rq.test, age=5:15, gender=c("male","female"),conf.int=FALSE) ## Note predict(Rq.test, newdata=p.data) gives the same values as Predict() ## Using X %*% Beta Rq.preds.XB = X.p %*% Rq.test$coefficients ## These don't match! cbind(Rq.preds, Rq.preds.XB) rms/inst/tests/predictrms.s0000644000176200001440000000464112677772753015566 0ustar liggesusers## David van Klaveren ## Erasmus MC, Department of Public Health, room Ae-110 ## E-mail d.vanklaveren.1@erasmusmc.nl ## se.fit comparisons before predictrms changed to use original covariate ## means instead of "adjust to" values for cph require(rms) n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c("Male","Female"), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=="Female")) dt <- -log(runif(n))/h label(dt) <- "Follow-up Time" e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" dd <- datadist(age, sex) options(datadist="dd") Srv <- Surv(dt,e) f <- cph(Srv ~ sex + age , x=TRUE, y=TRUE, se.fit=TRUE) ## skipped splines to keep example simple p <- predict(f,newdata=data.frame(sex,age), se.fit=TRUE) ## predict with newdata = original data ## linear predictors are equal for cph and predict: sum((f$linear.predictors-p$linear.predictors)^2) ## and so are se after predictrms fixed sum((f$se.fit-p$se.fit)^2) ### Reconstruction of difference X <- f$x beta <- f$coef cov <- f$var X.center.mean <- sweep(X, 2, c(mean(sex=="Male"), mean(age))) X.center.median <- sweep(X, 2, c(median(sex=="Male"),median(age))) lp.center.mean <- X.center.mean%*%beta se.center.mean <- drop(sqrt(((X.center.mean %*% cov) * X.center.mean) %*% rep(1, ncol(X.center.mean)))) se.center.median <- drop(sqrt(((X.center.median %*% cov) * X.center.median) %*% rep(1, ncol(X.center.median)))) ## linear predictors are equal for fit/predict and mean centered lp: sum((f$linear.predictors-lp.center.mean)^2) ## cph$se.fit is equal to mean centered se sum((f$se.fit-se.center.mean)^2) ## predict$.se.fit is no longer equal to median centered se sum((p$se.fit-se.center.median)^2) ## Check ref.zero=TRUE set.seed(1) n <- 30 x1 <- 100 + rnorm(n) x2 <- 5 + rnorm(n) x3 <- c(rep('a', 5), rep('b', 5), rep('c', 20)) dd <- datadist(x1, x2, x3); options(datadist='dd'); dd resid <- rnorm(n) y <- x1 + x2 + .5*(x3 == 'b') + 1*(x3 == 'c') + resid f <- ols(y ~ pol(x1, 2) + x2 + x3) f w <- data.frame(x1=100.4, x2=4.5, x3='c') predict(f, w, conf.int=.95) predict(f, w, type='adjto') c(median(x1), median(x1)^2, median(x2)) k <- coef(f) ycenter <- k[1] + k[2]*median(x1) + k[3]*median(x1)^2 + k[4]*median(x2) + k[6] ycenter predict(f, w, conf.int=.95, ref.zero=TRUE) k[1] + k[2]*100.4 + k[3]*100.4^2 + k[4]*4.5 + k[6] - ycenter rms/inst/tests/nomogram.r0000644000176200001440000000305712761333576015216 0ustar liggesusers# From Andy Bush require(rms) set.seed(20) x1<-10*runif(20,0,1) y1<-c(rep(0,10),rep(1,10)) y2<-5*rnorm(20,0,1) d<-data.frame(cbind(y1,y2,x1)) dd<-datadist(d) options(datadist='dd') flrm<-lrm(y1~x1,x=T,y=T,model=T) nomlrm<-nomogram(flrm) plot(nomlrm,xfac=.45) fols<-ols(y2~x1,x=T,y=T,model=T) nomols<-nomogram(fols) plot(nomols,xfac=.45) ## From Zongheng Zhang zh_zhang1984@hotmail.com n <- 1000 # sample size set.seed(88) # set seed for replication age<- rnorm(n, 65, 11) lac<- round(abs(rnorm(n, 3, 1)),1) sex<- factor(sample(1:2,n,prob=c(0.6,0.4),TRUE), labels=c('male','female')) shock<-factor(sample(1:4,n,prob=c(0.3,0.3,0.25,0.15),TRUE), labels=c('no','mild','moderate','severe')) z<- 0.2*age + 3*lac* as.numeric(sex)+ 5*as.numeric(shock) -rnorm(n,36,15) ## linear combination with a bias y <- ifelse(runif(n) <= plogis(z), 1, 0) library(rms) ddist <- datadist(age, lac, shock, sex) options(datadist='ddist') mod <- lrm(y ~ shock+lac*sex+age) nom <- nomogram(mod, lp.at=seq(-3,4,by=0.5), fun=plogis, fun.at=c(.001,.01,.05,seq(.1,.9,by=.1),.95,.99,.999), funlabel="Risk of Death", conf.int=c(0.1, 0.7), abbrev=TRUE, #had not been working for shock minlength=1) plot(nom, lplabel="Linear Predictor", fun.side=c(3,3,1,1,3,1,3,1,1,1,1,1,3), col.conf=c('red','green'), conf.space=c(0.1,0.5), label.every=3, col.grid = gray(c(0.8, 0.95)), which="shock") legend.nomabbrev(nom, which='shock', x=.5, y=.5) rms/inst/tests/psm.s0000644000176200001440000000331013662734063014164 0ustar liggesusersintr <- FALSE # set to TRUE if running interactivel so xless will run require(survival) n <- 400 set.seed(1) age <- rnorm(n, 50, 12) sex <- factor(sample(c('Female','Male'),n,TRUE)) # Population hazard function: h <- .02*exp(.06*(age-50)+.8*(sex=='Female')) d.time <- -log(runif(n))/h cens <- 15*runif(n) death <- ifelse(d.time <= cens,1,0) d.time <- pmin(d.time, cens) pol <- function(x, d) cbind(x, x^2) g <- survreg(Surv(d.time,death) ~ sex*pol(age,2), dist='lognormal') rg <- residuals(g, type='matrix')[,'dg'] require(rms) h <- survreg(Surv(d.time,death) ~ sex*pol(age,2), dist='lognormal', x=TRUE) # lognormal is bad fit for these data rbind(coef(g), coef(h)) rm(pol) f <- psm(Surv(d.time,death) ~ sex*pol(age,2), dist='lognormal', x=TRUE, y=TRUE) #, control=survreg.control()) rbind(coef(h), coef(f)) v <- vcov(f, regcoef.only=FALSE) diag(vcov(h)) / diag(v) r <- residuals(f, type='matrix')[,'dg'] if(intr) xless(cbind(rg, r)) if(intr) xless(residuals(f, type='score')) fr <- robcov(f) diag(vcov(f)) / diag(vcov(fr)) r <- residuals(f) g <- npsurv(r ~ sex) survplot(g) # Generate data where age is irrelevant but PH assumption for sex # is satisfied (Weibull fits but lognormal doesn't) set.seed(1) sex <- factor(sample(c('Female','Male'), n, TRUE)) # Population hazard function: h <- .02*exp(0.5 + 1.6*(sex=='Female')) d.time <- -log(runif(n))/h cens <- 15*runif(n) death <- ifelse(d.time <= cens,1,0) d.time <- pmin(d.time, cens) table(death) par(mfrow=c(1,2)) for(dist in c('lognormal', 'weibull')) { f <- psm(Surv(d.time, death) ~ sex, dist=dist, x=TRUE, y=TRUE) r <- residuals(f, type='censored.normalized') g <- npsurv(r ~ sex) survplot(g) lines(r) title(dist) } rms/inst/tests/orm-residuals.r0000644000176200001440000000204012462010211016124 0ustar liggesusersrequire(rms) set.seed(1) n <- 100 y <- sample(1 : 10, n, TRUE) x1 <- runif(n) x2 <- runif(n) f <- lrm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=.001) g <- orm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=.001) max(abs(coef(f) - coef(g))) max(abs(vcov(f) - vcov(g, intercepts='all'))) options(digits=4) dm <- function(x) if(length(dim(x))) dim(x) else length(x) for(type in c('li.shepherd', 'ordinary', 'score', 'pearson', 'deviance', 'pseudo.dep', 'partial', 'dfbeta', 'dfbetas', 'dffit', 'dffits', 'hat', 'gof', 'lp1')) { cat(type) rf <- resid(f, type=type) cat(' lrm dim', dm(rf)) rg <- resid(g, type=type) cat(' orm dim', dm(rg)) cat(' max |difference|', max(abs(rf - rg)), '\n') } options(digits=7) diag(vcov(f)) / diag(vcov(g, intercepts='all')) diag(vcov(robcov(f))) / diag(vcov(robcov(g), intercepts='all')) rf <- robcov(f) rg <- robcov(g) max(abs(rf$var - rg$var)) max(abs(vcov(rf, intercepts='all') - vcov(rg, intercepts='all'))) vcov(rf, regcoef.only=TRUE, intercepts='none') vcov(rg, regcoef.only=TRUE, intercepts='none') rms/inst/tests/anova-ols-mult-impute.r0000644000176200001440000000103512610173411017527 0ustar liggesusersrequire(rms) set.seed(1) x1 <- rnorm(100) x2 <- rnorm(100) y <- x1 + x2 + rnorm(100) x1[1:10] <- NA a <- aregImpute(~ y + x1 + x2) f <- fit.mult.impute(y ~ x1 + x2, ols, a, data=data.frame(x1,x2,y), n.impute=3, fit.reps=TRUE) ## Show how fit.mult.impute estimates sigma^2 s <- 0 for(i in 1 : 3) s <- s + f$fits[[i]]$stats['Sigma'] c(s / 3, f$stats['Sigma']) anova(f, test='Chisq') ## Make sure the chi-squares and sums of squares are not from one of the models for(i in 1 : 3) print(anova(f$fits[[i]], test='Chisq')) rms/inst/tests/val.prob.r0000644000176200001440000000201212665733254015110 0ustar liggesusers# Thanks: Kai Chen. M.D. # Resident # Dept of Breast surgery, # Breast Tumor Center; # Sun Yat-sen Memorial Hospital # chenkai23@mail.sysu.edu.cn # Fit logistic model on 100 observations simulated from the actual # model given by Prob(Y=1 given X1, X2, X3) = 1/(1+exp[-(-1 + 2X1)]), # where X1 is a random uniform [0,1] variable. Hence X2 and X3 are # irrelevant. After fitting a linear additive model in X1, X2, # and X3, the coefficients are used to predict Prob(Y=1) on a # separate sample of 100 observations. Note that data splitting is # an inefficient validation method unless n > 20,000. require(rms) set.seed(1) n <- 200 x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) logit <- 2*(x1-.5) P <- 1/(1+exp(-logit)) y <- ifelse(runif(n)<=P, 1, 0) d <- data.frame(x1,x2,x3,y) f <- lrm(y ~ x1 + x2 + x3, subset=1:100) pred.logit <- predict(f, d[101:200,]) phat <- 1/(1+exp(-pred.logit)) val.prob(phat, y[101:200], m=20, cex=.5) # subgroups of 20 obs. val.prob(x2[101:200], y[101:200], m=20, cex=.5) # subgroups of 20 obs. rms/inst/tests/gTrans.r0000644000176200001440000000420714024536057014624 0ustar liggesusersrequire(rms) n <- 40 set.seed(1) y <- runif(n) x1 <- runif(n) x2 <- rnorm(n) g <- sample(letters[1:4], n, TRUE) dd <- datadist(x1, x2, g); options(datadist='dd') f <- ols(y ~ pol(x1, 2) * g + x2) pol2 <- function(x) { z <- cbind(x, xsq=x^2) attr(z, 'nonlinear') <- 2 z } h <- ols(y ~ gTrans(x1, pol2) * g + x2) specs(h, long=TRUE) rbind(coef(f), coef(h)) summary(f) summary(h) ggplot(Predict(f)) ggplot(Predict(h)) k1 <- list(x1=c(.2, .4), g='b') k2 <- list(x1=c(.2, .4), g='d') contrast(f, k1) contrast(h, k1) contrast(f, k1, k2) contrast(h, k1, k2) f <- ols(y ~ lsp(x1, c(.2, .4))) # Duplicate lsp but give custom names for columns lspline <- function(x) { z <- cbind(x, x.2=pmax(x - .2, 0), x.4=pmax(x - .4, 0)) attr(z, 'nonlinear') <- 2:3 z } h <- ols(y ~ gTrans(x1, lspline)) rbind(coef(f), coef(h)) ggplot(Predict(f)) ggplot(Predict(h)) anova(f) anova(h) yl <- c(-0.25, 1.25) # Fit a straight line from x1=0.1 on, but force a flat relationship for x1 in [0, 0.1] # First do it forcing continuity at x1=0.1 h <- ols(y ~ pmax(x1, 0.1)) xseq <- c(0, 0.099, 1, 0.101, seq(0.2, .8, by=0.1)) ggplot(Predict(h, x1=xseq)) # Now allow discontinuity without a slope change flin <- function(x) cbind(x < 0.1, x) h <- ols(y ~ gTrans(x1, flin)) ggplot(Predict(h, x1=xseq), ylim=yl) + geom_point(aes(x=x1, y=y), data=data.frame(x1, y)) # Now have a discontinuity with a slope change flin <- function(x) cbind(x < 0.1, pmax(x - 0.1, 0)) h <- ols(y ~ gTrans(x1, flin)) ggplot(Predict(h, x1=xseq), ylim=yl) + geom_point(aes(x=x1, y=y), data=data.frame(x1, y)) # Discontinuous linear spline dlsp <- function(x) { z <- cbind(x, x >= 0.2, pmax(x - .2, 0), pmax(x - .4, 0)) attr(z, 'nonlinear') <- 2:4 z } h <- ols(y ~ gTrans(x1, dlsp)) ggplot(Predict(h), ylim=yl) ggplot(Predict(h, x1=c(.1, .199, .2, .201, .3, .4, 1)), ylim=yl) dlsp <- function(x) { z <- cbind(x, x >= 0.2, pmax(pmin(x, 0.6) - .2, 0), pmax(pmin(x, 0.6) - .4, 0)) attr(z, 'nonlinear') <- 2:4 z } h <- ols(y ~ gTrans(x1, dlsp)) ggplot(Predict(h), ylim=yl) # Try on a categorical predictor gr <- function(x) cbind(bc=x %in% c('b','c'), d=x == 'd') h <- ols(y ~ gTrans(g, gr)) ggplot(Predict(h, g)) rms/inst/tests/modelData.r0000644000176200001440000000227213715473522015263 0ustar liggesusersrequire(rms) x <- runif(20) x2 <- runif(20) X <- cbind(x, x2) y <- sample(0:1, 20, TRUE) m <- model.frame(y ~ X) names(m) mft <- attr(m, 'terms') mft attr(mft, 'term.labels') p <- terms(y ~ X) attr(p, 'term.labels') all.vars(y ~ X) d <- data.frame(y, I(X)) names(d) Xr <- X; class(Xr) <- c('rms', class(Xr)) ms <- modelData(formula=y ~ X) names(ms) # f <- lrm(y ~ X, x=TRUE, y=TRUE) X <- pol(x, 2) ms <- modelData(formula=y ~ X) names(ms) k <- 4 x <- 1:20 d <- data.frame(x) modelData(d, formula= ~ rcs(x,k)) d <- list(x=x, k=6) modelData(d, ~ rcs(x, k)) b <- 1:8 a <- c(1, 1, 2, 2, 3, 4, 7, 7) rmsb::Ocens(a, b) d <- data.frame(a, b) x <- runif(8) m <- modelData(d, rmsb::Ocens(a, b) ~ x, subset=1:4) attributes(m[[1]]) x <- c(rep('a', 10), rep('b', 11), rep('c', 12)) x <- factor(x, c('a', 'b', 'c', 'd')) table(x) y <- runif(length(x)) d <- data.frame(x, y) m <- modelData(d, y ~ x) attributes(m$x) ## LCAextend package example like this failed g <- function() { d <- data.frame(x=runif(20), y=sample(0:1, 20,TRUE)) w <- (1:20)/20 # d$w <- (1:20)/100 will take precedence # return(model.frame(y ~ x, weights=as.vector(w), data=d)) # works lrm(y ~ x, weights=as.vector(w), data=d) } g() rms/inst/tests/summary.r0000644000176200001440000000234312416251140015050 0ustar liggesusers# From Pedro Emmanuel Alvarenga Americano do Brasil emmanuel.brasil@gmail.com require(rms) set.seed(1) n <- 400 n1 <- 300; n2 <- 100 data <- data.frame(outcome=c(rnorm(n1, mean = .052, sd = .005), rnorm(n2, mean = .06, sd = .01)), v2=sample(seq(20,80,5),n,T), v3=sample(seq(60,150,1),n,T), v4=c(rnorm(n1, mean = 80, sd = 10), rnorm(n2, mean = 60, sd = 15)), v5=sample(c('M','M','F'),n,T), v6=c(rnorm(n1, mean = 80, sd = 10), rnorm(n2, mean = 120, sd = 30))) # checking data head(data) # setting datadist dd <- datadist(data); options(datadist="dd") # generating missings m <- function() sample(1:n, 20, FALSE) data$v2[m()] <- NA data$v3[m()] <- NA data$v4[m()] <- NA data$v5[m()] <- NA data$v6[m()] <- NA plot(naclus(data)) # Imputing imp <- aregImpute(~ outcome + v2 + v3 + v4 + v5 + v6, data, n.impute=10) # fitting f <- fit.mult.impute(outcome ~ v6 + v2 + rcs(v3) + v5 * rcs(v4), ols, imp, data, fit.reps=TRUE) coef(f) w <- NULL for(i in 1 : 10) w <- rbind(w, coef(f$fits[[i]])) w s <- summary(f) s unclass(s) # Effects are non-zero but small plot(s) rms/inst/tests/orm-profile.r0000644000176200001440000000201013760503445015610 0ustar liggesusersif(FALSE) { require(rms) set.seed(1) n <- 5000 x1 <- runif(n); x2 <- runif(n); x3 <- runif(n); x4 <- runif(n); x5 <- runif(n) y <- round(400 * runif(n)) fm <- y ~ x1 + x2 + x3 + x4 + x5 print(system.time(f <- lrm(fm))) print(system.time(f <- orm(fm))) ti <- numeric(0) rs <- c(5,10,20,40,80,120,160,200,250,seq(300, 1000, by=100),1500,2000,2500,3000) for(r in rs) { cat(r, '\n') y <- round(r * runif(n)) ti <- c(ti, system.time(orm(fm))['elapsed']) } plot(rs, ti) # linear in no. of intercepts! y <- round(1000 * runif(n)) print(system.time(f <- orm(fm, x=TRUE, y=TRUE))) print(system.time(validate(f, B=10))) # 15x longer vs. 10x Rprof() # for(i in 1 : 10) f <- orm(fm) print(validate(f, B=10)) Rprof(NULL) # s <- summaryRprof() if(require(proftools)) { tmp.dot <- tempfile() tmp.pdf <- tempfile() pd <- readProfileData() profileCallGraph2Dot(pd, filename = tmp.dot) system(sprintf("dot -Tpdf -o %s %s", tmp.pdf, tmp.dot)) browseURL(sprintf("file://%s", tmp.pdf)) unlink(tmp.dot) unlink(tmp.pdf) } } rms/inst/tests/orm4.r0000644000176200001440000000257212661605732014254 0ustar liggesusers# From http://stats.stackexchange.com/questions/195198 require(rms) d1 <- data.frame(cohort='one', sex='male', y=c(.476, .84, 1.419, 0.4295, 0.083, 2.9595, 4.20125, 1.6605, 3.493, 5.57225, 0.076, 3.4585)) d2 <- data.frame(cohort='one', sex='female', y=c(4.548333, 4.591, 3.138, 2.699, 6.622, 6.8795, 5.5925, 1.6715, 4.92775, 6.68525, 4.25775, 8.677)) d3 <- data.frame(cohort='two', sex='male', y=c(7.9645, 16.252, 15.30175, 8.66325, 15.6935, 16.214, 4.056, 8.316, 17.95725, 13.644, 15.76475)) d4 <- data.frame(cohort='two', sex='female', y=c(11.2865, 22.22775, 18.00466667, 12.80925, 16.15425, 14.88133333, 12.0895, 16.5335, 17.68925, 15.00425, 12.149)) d <- rbind(d1, d2, d3, d4) dd <- datadist(d); options(datadist='dd') # Fit the default ordinal model (prop. odds) f <- orm(y ~ cohort * sex, data=d) f anova(f) # Show intercepts as a function of y to estimate the underlying # conditional distribution. Result: more uniform than Gaussian alphas <- coef(f)[1 : num.intercepts(f)] yunique <- f$yunique[-1] par(mfrow=c(1,2)) plot(yunique, alphas) # Compare to distribution of residuals plot(ecdf(resid(ols(y ~ cohort * sex, data=d))), main='') M <- Mean(f) # Confidence intervals for means are approximate # Confidence intervals for odds ratios or exceedance probabilities # are correct for ordinal models Predict(f, cohort, sex, fun=M) with(d, summarize(y, llist(cohort, sex), smean.cl.normal)) rms/inst/tests/lrm2.r0000644000176200001440000000132313343003272014225 0ustar liggesusers# https://github.com/harrelfe/rms/issues/55 require(rms) set.seed(1) n <- 20 X2 <- factor(c(rep(0, n/2), rep(1, n/2))) X21 <- rep(1 : (n/2), 2) y <- rep(0 : 1, n/2) options(rmsdebug=TRUE) f <- lrm(y ~ X2 + X21, method='model.frame') attributes(f)$Design$mmcolnames # Problem is inherent to R colnames(model.matrix(~ X2 + X21)) ## https://github.com/harrelfe/rms/issues/29#issuecomment-417901353 d <- data.frame( X = sample(1:700), Y = sample(c("yes", "no"),700, replace = TRUE), Z = sample (c("Back pain", "Leg Pain", "Back pain = Leg pain"),700, replace = TRUE) ) options(rmsdebug=TRUE) lrm(Y~X+Z, data=d) d$Z <- sample(c("Back pain", "Leg Pain", "Back pain = Leg pain"),700, replace = TRUE) lrm(Y~X+Z, data=d) rms/inst/tests/latex.r0000644000176200001440000000074412716715171014507 0ustar liggesusersrequire(rms) set.seed(1) x <- runif(100) y <- abs(x - 0.5) + runif(100) f <- ols(y ~ rcs(x, 5)) latex(f, file='') require(rms) x1 <- runif(200); x2 <- runif(200) y <- sample(0:1, 200, TRUE) f <- lrm(y ~ rcs(x1) + rcs(x2)) cat('\\documentclass{article}\\begin{document}\\usepackage{longtable}\n', file='/tmp/e.tex') lat <- latex(f, file='/tmp/e.tex', append=TRUE) sink('/tmp/e.tex', append=TRUE) print(f, latex=TRUE) sink() cat('\\end{document}\n', file='/tmp/e.tex', append=TRUE) rms/inst/tests/validate.cph.s0000644000176200001440000000542312700015170015715 0ustar liggesusers## From Vikki require(rms) n <- 1000 set.seed(110222) data <- matrix(rep(0, 5000), ncol=5) data[, 1] <- sample(1:3, n, rep=TRUE, prob=c(.32, .30, .38)) for (i in 1:1000) { if (data[i, 1] == 1) data[i, 2] <- sample(1:3, 1, prob=c(.76, .18, .06)) if (data[i, 1] == 2) data[i, 2] <- sample(1:3, 1, prob=c(.67, .24, .09)) if (data[i, 1] == 3) data[i, 2] <- sample(1:3, 1, prob=c(.47, .37, .16))} for (i in 1:1000) { if (data[i, 1] == 1) data[i, 3] <- sample(1:4, 1, prob=c(.70, .19, .03, .08)) if (data[i, 1] == 2) data[i, 3] <- sample(1:4, 1, prob=c(.42, .28, .12, .18)) if (data[i, 1] == 3) data[i, 3] <- sample(1:4, 1, prob=c(.11, .29, .30, .30))} for (i in 1:1000) { if (data[i, 3] == 1) data[i, 4] <- 12*rgamma(1000, rate=0.4, shape=1.7)[c(sample(26:975, 1, prob=c(rep(1/950, 950))))] if (data[i, 3] == 2) data[i, 4] <- 12*rgamma(1000, rate=0.9, shape=1.7)[c(sample(26:975, 1, prob=c(rep(1/950, 950))))] if (data[i, 3] == 3) data[i, 4] <- 12*rgamma(1000, rate=1.2, shape=0.6)[c(sample(26:975, 1, prob=c(rep(1/950, 950))))] if (data[i, 3] == 4) data[i, 4] <- 12*rgamma(1000, rate=1.5, shape=0.7)[c(sample(26:975, 1, prob=c(rep(1/950, 950))))]} for (i in 1:1000) { if (data[i, 3] == 1) data[i, 5] <- sample(c(0, 1), 1, prob=c(.53, .47)) if (data[i, 3] == 2) data[i, 5] <- sample(c(0, 1), 1, prob=c(.17, .83)) if (data[i, 3] == 3) data[i, 5] <- sample(c(0, 1), 1, prob=c(.05, .95)) if (data[i, 3] == 4) data[i, 5] <- sample(c(0, 1), 1, prob=c(.06, .94))} d <- data.frame(tumor=factor(data[,1]), ecog=factor(data[,2]), rx=factor(data[,3]), os=data[,4], censor=data[,5]) S <- with(d, Surv(os, censor)) ## Check collinearity of rx with other predictors lrm(rx ~ tumor*ecog, data=d) ## What is the marginal strength of rx (assuming PH)? cph(S ~ rx, data=d) ## What is partial effect of rx (assuming PH)? anova(cph(S ~ tumor + ecog + rx, data=d)) ## What is combined partial effect of tumor and ecog adjusting for rx? anova(cph(S ~ tumor + ecog + strat(rx), data=d), tumor, ecog) ## nothing but noise ## What is their effect not adjusting for rx cph(S ~ tumor + ecog, data=d) ## huge f <- cph(S ~ tumor + ecog, x=TRUE, y=TRUE, surv=TRUE, data=d) set.seed(1) validate(f, B=100, dxy=TRUE) w <- rep(1, 1000) # only one stratum, doesn't change model ## model.matrix no longer works with one stratum if(FALSE) { f <- cph(S ~ tumor + ecog + strat(w), x=TRUE, y=TRUE, surv=TRUE, data=d) set.seed(1) validate(f, B=100, dxy=TRUE, u=60) ## identical to last validate except for -Dxy } f <- cph(S ~ tumor + ecog + strat(rx), x=TRUE, y=TRUE, surv=TRUE, time.inc=60, data=d) set.seed(1) validate(f, B=100, u=60) ## no predictive ability set.seed(1) validate(f, B=100, dxy=TRUE, u=60) ## Only Dxy indicates some predictive information; large in abs. value ## than model ignoring rx (0.3842 vs. 0.3177) rms/inst/tests/bootcov.r0000644000176200001440000000055412201156615015033 0ustar liggesusers# From Max Gordon require(rms) set.seed(1) center <- factor(sample(letters[1:8],500,TRUE)) treat <- factor(sample(c('a','b'), 500,TRUE)) y <- 8*(treat=='b') + rnorm(500,100,20) f <- ols(y ~ treat*center, x=TRUE, y=TRUE) g <- bootcov(f, B=50) lc <- levels(center) contrast(f, list(treat='b', center=lc), list(treat='a', center=lc)) rms/inst/tests/contrast2.r0000644000176200001440000000220313472310125015267 0ustar liggesusersrequire(rms) #Get a dataset/keep a few columns load('boys.rda') # originally from mice package d <- boys[,c("age", "bmi", "reg")] i <- with(d, is.na(bmi) | is.na(reg)) length(unique(d$age)); length(unique(d$age[! i])) #sum(is.na(dat$age)) #0 ####Models ##1) Complete case #Set datadist # dat_naomit <- na.omit(dat) # dd <- datadist(dat_naomit) # options(datadist = "dd") dd <- datadist(d); options(datadist='dd') #Run model f <- orm(age ~ bmi + reg, data = d) #Run a simple contrast contrast(f, list(bmi = 20), list(bmi = 19)) summary(f, bmi=19:20, est.all=FALSE) ##2) Multiple imputation (default settings) #Fit imputation model # imp_mod <- mice(dat, m = 5) #Happens with ‘aregImpute’ as well #Fit same orm model with imputed datasets a <- aregImpute(~ age + bmi + reg, data=d, n.impute=5) g <- fit.mult.impute( formula = age ~ bmi + reg, fitter = orm, xtrans = a, data = d ) dim(vcov(f, regcoef.only=TRUE)) dim(vcov(g, regcoef.only=TRUE)) summary(g, bmi=19:20, est.all=FALSE) #Try the same contrast contrast(g, list(bmi = 20), list(bmi = 19)) #Non-conformable dimension for matrix multiplication rms/inst/tests/rms.r0000644000176200001440000000303613155550513014163 0ustar liggesusersrequire(rms) set.seed(1) n <- 20 x <- as.matrix(1:n) #x <- cbind(1:n, (1:n)^2) #colnames(x) <- 'age' y <- sample(0:1, n, TRUE) f <- lrm(y ~ x) N <- 100 set.seed(1) time <- rexp(N) status <- sample(0:1, N, replace = TRUE) S <- Surv(time, status) x1 <- gl(2, 50) x2 <- runif(N) x3 <- sample(1:3, N, replace=TRUE) ols(time ~ x1) ols(time ~ scored(x3)) ols(time ~ catg(x3)) # Makes last colname x1 %ia% x2 which is really inconsistent: model.matrix(~ x1 + rcs(x2) + x1 %ia% x2) x3 <- c(rep('A', 33), rep('B', 33), rep('C', 34)) x4 <- runif(N) > 0.5 # Makes last 2 colnames x3 %ia% x2x3=B * x2, x3 %ia% x2x3=C * x2 model.matrix(~ x3 + rcs(x2) + x3 %ia% x2) cph(S ~ x3 + rcs(x2) + x3 %ia% x2) ols(time ~ x1 + rcs(x2) + x1 %ia% x2) lrm(status ~ x1 + rcs(x2) + x1 %ia% x2) options(debug=TRUE,width=110) cph(S ~ x1 + rcs(x2) + x1 %ia% rcs(x2)) cph(S ~ x1 + rcs(x2) + x1 %ia% x2) cph(S ~ x1 * rcs(x2)) ols(time ~ x1 + x4) cph(S ~ x1 + x4) colnames(model.matrix(~ x1 + x4 + x1 %ia% x4)) cph(S ~ x1 + x4 + x1 %ia% x4) ## From https://github.com/harrelfe/rms/issues/29#issuecomment-303423887 ## https://github.com/harrelfe/rms/issues/29#issuecomment-328581864 d <- expand.grid( X1 = factor(c('05: X1 <= 178','01: X1 <= 6', '03: X1 <= 52', '05: X1 <= 178')), X2 = factor(c('04: X2 <= 75','01: X2 <= 6', '05: X2 > 75', '05: X2 > 75')), X3 = factor(c('04: X3 <= 552','01: X3 <= 1', '04: X3 <= 552', '06: X3 > 1313')), rep = 1 : 100) set.seed(1) d$TARGET <- sample(0 : 1, nrow(d), replace=TRUE) lrm(TARGET ~ ., data = d) options(debug=TRUE) cph(Surv(TARGET) ~ ., data=d) rms/inst/tests/pakpahan.dta0000644000176200001440000022117413472601612015460 0ustar liggesusersnüWritten by R. dddddata_dftimedata_demfudata_agedata_countryid%9.0g%9.0g%9.0g%9.0gdata.dftimedata.demfudata.agedata.countryidÀ—@@S@ð?l˜@@P@ð?ðŒ@ÀU@ð?ìŠ@ð?@R@ð?øœ@@S@ð?„œ@@Q@ð?ð•@ð?R@ð?Œ@€Q@ð?Ä›@R@ð?h•@€S@ð?›@@S@ð?èš@€Q@ð?(@T@ð?¬œ@@R@ð?˜Ÿ@@Q@ð?”š@S@ð?$š@ÀQ@ð?ÿÿÿÿÿÓŒ@ð?@W@ð?äœ@€R@ð?°œ@T@ð?´œ@@R@ð?üœ@ÀP@ð?˜š@@R@ð? š@€S@ð? š@@T@ð? Ÿ@ÀQ@ð?œœ@V@ð?°œ@@R@ð?1@€U@ð?„›@R@ð?Àœ@@S@ð?°œ@ÀR@ð?`ž@R@ð? ›@Q@ð?›@Q@ð? ”@@R@ð?@ž@€Q@ð?@ž@€R@ð?Øš@ÀR@ð?@@€S@ð?Xž@€T@ð?‰œ@€V@ð?Hž@ÀP@ð?|›@€R@ð?Dš@ÀP@ð?ÿÿÿÿÿŽ@ð?€P@ð?ÿÿÿÿÿž@@P@ð?ÿÿÿÿÿž@@R@ð?Œ@W@ð?ž@@P@ð?8ž@@T@ð?ž@€P@ð?ž@€Q@ð?ž@@P@ð?ÿÿÿÿÿž@ÀP@ð?ÿÿÿÿÿž@€P@ð? ž@ÀP@ð?€‡@@U@ð?è@€R@ð?<œ@€S@ð?ðš@@R@ð?ˆ¢@ÀQ@ð?4š@Q@ð?è@@S@ð? Š@ð?V@ð? š@R@ð?Øš@€Q@ð? {@U@ð?ä@€S@ð?8›@@Q@ð?š@€R@ð?š@ÀQ@ð?n¡@T@ð?h‰@ÀS@ð?L@€S@ð?L@T@ð?Hž@€R@ð?D@S@ð?D@€U@ð?$@@Q@ð?$@€R@ð?ð?€Q@ð?Üœ@U@ð?ðp@T@ð?ðœ@Q@ð?Üœ@@S@ð?˜‡@ð?€U@ð?`Š@ÀP@ð?ÿÿÿÿÿc—@€R@ð?ÿÿÿÿÿc—@ÀR@ð?™@@P@ð?ÿÿÿÿÿËœ@T@ð?d›@@R@ð? „@€R@ð?T›@@U@ð?T›@ÀR@ð?H›@ÀP@ð?T›@€V@ð? ›@@Q@ð?Œ@ð?€T@ð?ÿÿÿÿÿkœ@@R@ð?‰œ@€Q@ð?ˆŒ@ð?@R@ð?ÿÿÿÿÿkœ@Q@ð?¢¡@Y@ð?pœ@S@ð?ÿÿÿÿÿ?ˆ@ð?U@ð?ü@@T@ð?ð™@€R@ð?Œ˜@ÀR@ð?t˜@@P@ð?@œ@€P@ð?<œ@@S@ð?4œ@€Q@ð?ÿÿÿÿÿ×—@@T@ð?@ÀP@ð?tš@€P@ð?tš@ÀQ@ð?¼›@€Q@ð?ðš@ÀP@ð?‹@ð?@V@ð?´š@€R@ð?0u@Q@ð?К@Q@ð?4š@S@ð?4š@@R@ð?<š@ÀR@ð?<š@@S@ð?ÿÿÿÿÿWš@S@ð?xš@@S@ð?š@S@ð?š@€R@ð?š@@R@ð?ð@T@ð? š@@R@ð?ÿÿÿÿÿOš@ÀT@ð?ÿÿÿÿÿOš@Q@ð?8š@€S@ð? š@@R@ð?Ü•@€R@ð?—@ÀP@ð?ø‰@ð?ÀS@ð?ø™@ÀQ@ð?ÿÿÿÿÿï™@€R@ð?Ì™@ÀP@ð?T˜@ÀQ@ð?И@@P@ð?‰@ð?€R@ð?ì•@ÀS@ð?ì•@€T@ð?È™@@Q@ð?H‘@Q@ð?ܘ@R@ð?´˜@@S@ð?4Ÿ@T@ð?ÿÿÿÿÿ{™@@S@ð?ÿÿÿÿÿ{™@@R@ð?ØŒ@R@ð?˜@ÀV@ð?,™@€P@ð?0™@€T@ð?$™@@U@ð?$™@@V@ð?И@Q@ð?$“@S@ð?ؘ@ÀQ@ð?¼˜@€R@ð?¼˜@S@ð? ˜@T@ð?d@€S@ð?¸’@@P@ð?ÿÿÿÿÿ™@T@ð?ÿÿÿÿÿ™@€S@ð? ’@U@ð?üˆ@ð?ÀT@ð?@™@@R@ð?„“@ÀR@ð?˜“@U@ð?˜“@Y@ð?Ô˜@@P@ð?ÿÿÿÿÿ×—@ÀR@ð?ÿÿÿÿÿ×—@T@ð?È—@€S@ð?ÿÿÿÿÿ×—@€Q@ð?ÿÿÿÿÿ×—@ÀP@ð?ć@ð?ÀU@ð?¸—@R@ð?¸—@€P@ð?0˜@@T@ð?H•@Q@ð?<‘@@P@ð?P•@ÀR@ð?P•@ÀQ@ð?¸•@T@ð?ü„@ð?@T@ð?ü”@U@ð?˜„@ð?€Q@ð?˜”@Q@ð?0”@€U@ð?´’@€P@ð?ˆ”@@Q@ð?ØŽ@Q@ð?ø‘@€V@ð?œ„@ð?@V@ð?œ„@ð?€V@ð?¨”@@S@ð?¨”@€S@ð?”@ÀP@ð?Œ”@@R@ð?4…@ð?€T@ð?…@ð?@V@ð?x‹@U@ð?ü”@@V@ð?è”@@Q@ð?˜”@€R@ð?°”@ÀQ@ð? ‡@€S@ð?à‚@ÀQ@ð?ˆ’@ð?€P@ð?Ä¢@@P@ð?ˆ¢@@Q@ð?†¢@R@ð?~¢@@R@ð?~’@ð?ÀR@ð?$™@ÀP@ð?$™@€P@ð?¼›@@U@ð?Ÿ@ÀP@ð?àt@@U@ð?ܘ@@P@ð?Ș@€Q@ð?ä“@@T@ð?ì’@ð?€R@ð?Ÿ@ÀP@ð?²¡@€U@ð? Ÿ@€S@ð?ÿÿÿÿÿ™@@U@ð?ÿÿÿÿÿ™@ÀP@ð?8ˆ@Q@ð?ô¢@€Q@ð?øž@ÀP@ð?8‚@@S@ð?™@U@ð?ô˜@S@ð?ô˜@S@ð? ¢@Q@ð? ¢@€P@ð?(¢@@Q@ð?Т@€R@ð?Т@Q@ð?¼ž@Q@ð?¸ž@@R@ð?¢@€R@ð?¢@@S@ð?Ę@€P@ð?n¢@@P@ð?t¢@€P@ð?à„@€R@ð?a@T@ð?â¡@ÀP@ð?Ä–@€P@ð?¸›@ÀP@ð?Ä–@@P@ð?Ø¢@@R@ð?¨ž@€Q@ð?pŽ@€Q@ð?ðŒ@U@ð?ž@ÀQ@ð?È’@ÀU@ð?ô@ÀP@ð?Ȉ@T@ð?J¢@€Q@ð?ž@€S@ð?ž@ÀR@ð?4¢@€P@ð?–@Q@ð?–@R@ð?¦¡@ÀT@ð?0€@@S@ð?–@€S@ð?À@R@ð?@€Q@ð?–@€P@ð?–@€Q@ð?p@ÀR@ð?p@@R@ð?`c@R@ð?f¡@@R@ð?@€Q@ð?´—@ÀP@ð?ì”@ÀP@ð?œ”@@P@ð?—@€P@ð?â @@T@ð?€¡@@R@ð?Д@@Q@ð?Д@€Q@ð?x—@€P@ð?´’@€P@ð?~@€S@ð? ¢@€P@ð?@€Q@ð?@@T@ð?ˆ™@@P@ð?L—@@P@ð?º¡@@S@ð?”@@R@ð?œ‘@ð?ÀQ@ð?ü¡@€P@ð?¡@€U@ð?Ä‘@ð?U@ð?¬¡@€Q@ð? —@€P@ð?ÿÿÿÿÿÓœ@@P@ð?¸œ@ÀR@ð?¸œ@ÀT@ð?ȉ@€T@ð?Ö¡@ÀP@ð?¸œ@@R@ð?¸œ@Q@ð?0@@P@ð?œ@@Q@ð?t@@U@ð?à›@@R@ð?l¡@€S@ð?0”@Q@ð?–¡@@P@ð?ê¢@€R@ð?„ @@P@ð?P|@€Q@ð?¬›@@P@ð?„ @@R@ð?– @ÀQ@ð?ÿÿÿÿÿÿ†@ð?@R@ð?Ì–@€Q@ð?¬“@@P@ð?py@U@ð?8›@ÀT@ð?8›@Q@ð?0›@€P@ð?‘@@T@ð?|–@ÀQ@ð?›@ÀQ@ð?‹@ð?@S@ð?X@@T@ð?o@T@ð?¡@Q@ð?›@ÀP@ð?üš@@T@ð?øš@€P@ð?ìš@ÀQ@ð?<š@ÀQ@ð?¼‘@€V@ð?—@T@ð?æ¡@R@ð?¶@ð?@R@ð?l @@R@ð?@ð?ÀT@ð?Ô“@€P@ð?¼“@€R@ð?°–@€S@ð?°–@€Q@ð?´“@@S@ð?´ƒ@ð?€T@ð?˜™@Q@ð?Œœ@@P@ð?ˆœ@ÀQ@ð?Й@@Q@ð?Й@ÀQ@ð?”™@ÀP@ð?Ì™@€S@ð?$œ@Q@ð?T @ÀT@ð?T @ÀS@ð?ôŸ@ÀT@ð?”Ÿ@@R@ð?4Ÿ@@R@ð?‘@ÀP@ð?ì™@€R@ð?äŸ@R@ð?´™@@Q@ð?ÿÿÿÿÿw™@€Q@ð?ÿÿÿÿÿc—@@P@ð?pœ@ÀP@ð?pœ@€Q@ð?d™@R@ð?ØŸ@@P@ð?œž@R@ð? ‡@ÀQ@ð?" @@Q@ð?lŸ@@P@ð?ÿÿÿÿÿcœ@€R@ð?ÿÿÿÿÿcœ@€R@ð? @@R@ð? @€P@ð?ÿÿÿÿÿwž@ÀP@ð?‘@W@ð?ØŸ@ÀS@ð?П@R@ð?<›@€P@ð?¨™@@Q@ð?Ÿ@ÀP@ð?Ÿ@ÀR@ð?Ÿ@@P@ð?Ÿ@ÀU@ð?8‰@€T@ð?ðž@@U@ð?Xž@R@ð?ø”@ÀQ@ð?Ð’@ÀQ@ð?à@ð?Q@ð?(—@@R@ð?ø@€P@ð?ø@€S@ð?@ž@@T@ð?¸~@ð?ÀQ@ð?¸Ž@ÀP@ð?à@R@ð?Ì@V@ð?à„@€T@ð?ô@€R@ð?ÿÿÿÿÿž@@R@ð?ð@@P@ð?ð@€R@ð?ð@€P@ð?Ø@€P@ð?<“@ÀP@ð?Ü@€Q@ð?â@@P@ð?|@ÀQ@ð?Ü@€Q@ð?¼œ@ÀS@ð?pž@@T@ð?@ð?€Q@ð?hž@€P@ð?dž@€Q@ð?Ü–@ÀS@ð?D‘@ÀP@ð?Ô‹@ð?ÀR@ð?$@ð?U@ð?Ì@€S@ð?€n@ÀU@ð?P@ÀP@ð?P–@€R@ð?P–@@S@ð? œ@ÀQ@ð?`™@@S@ð?è‚@ð?àð?è‚@ð?àð?ÿÿÿÿÿï™@S@ð?‰œ@@U@ð?‰@ð?@R@ð?H@@Q@ð?™@@S@ð?(‰@ð?ÀR@ð?(™@@S@ð?˜@ÀP@ð?¸˜@@R@ð?Ô˜@ÀP@ð?L—@ÀS@ð?8™@@Q@ð?Ä–@@Q@ð?\™@ÀR@ð?ÿÿÿÿÿ™@€T@ð?@™@Q@ð?ìš@@R@ð? •@@U@ð?™@@Q@ð?ÿÿÿÿÿƒ™@Q@ð?¼’@U@ð? ™@€S@ð?(ˆ@€S@ð?Àž@ÀR@ð?¡@R@ð?™@ÀS@ð?¡@ÀQ@ð?D¡@ÀP@ð?À]@€S@ð?¸˜@ÀR@ð?„ž@T@ð?(@U@ð?$š@W@ð?ä˜@T@ð?ôœ@S@ð?š@€S@ð?ø @@R@ð? Š@ð?€S@ð?š@@R@ð? š@@S@ð?ê @€Q@ð?Þ @ÀR@ð?¡@€P@ð?h@R@ð?Ü›@@T@ð?Ø @€P@ð?Ø @V@ð?<•@€T@ð?Ø @@R@ð?\š@ÀR@ð?P¢@ÀR@ð?l@S@ð?`@R@ð?`z@€Q@ð?l@ð?@Q@ð?t’@ð?R@ð?X@@S@ð?è‹@Q@ð?ÿÿÿÿÿ3@ð?ÀQ@ð?0@R@ð?.¢@@Q@ð?¼”@€S@ð?`€@ÀS@ð?’ @€P@ð?Ì”@€P@ð?’@ð?R@ð?P¢@€S@ð?Ê¡@€R@ð?Ê¡@@S@ð?â¡@S@ð?ÿÿÿÿÿ?@€P@ð?´˜@€P@ð?¢@€P@ð? ¢@€P@ð?@Ÿ@€R@ð?ؘ@W@ð?Œ‘@ð?@U@ð?Ä¡@ÀS@ð?h˜@S@ð?ä—@ÀP@ð?‚¡@ÀQ@ð?þ¡@€Q@ð?ä¡@ÀP@ð?¢@ð?R@ð?„ @€Q@ð?Ì @@Q@ð?* @€Q@ð?œ @@R@ð?¬ž@€S@ð?| @€R@ð?,˜@@S@ð?f @€P@ð?r @S@ð?n @€R@ð?”‘@ð?@Q@ð?l¡@ÀU@ð?€N@€S@ð?†‘@ð?ÀU@ð?Xœ@€Q@ð?b @€Q@ð?~¡@ÀP@ð?t¡@@R@ð?l¡@ÀP@ð?´Œ@ð?T@ð?\–@ÀT@ð?4—@Q@ð?H¡@@Q@ð?‰œ@@S@ð?4¡@ÀQ@ð?4¡@ÀQ@ð?4¡@€R@ð?z@ð?@R@ð?x @@Q@ð?Œ¡@€Q@ð?6¢@R@ð?€ @€P@ð?À¡@€R@ð?¡@@Q@ð?D¡@ÀP@ð?ðŒ@€U@ð?0˜@àð?ðŒ@U@ð?¡@@P@ð?Ä¡@@Q@ð?ü—@@P@ð?ø—@R@ð?Ø@€R@ð?p@€R@ð?8—@ÀP@ð?`–@T@ð?È @ÀU@ð?È @Q@ð?”‘@ð?€S@ð? @S@ð?ì@ð?€S@ð?‡@ð?€S@ð?”›@Q@ð?À@ð?Q@ð?ˆš@T@ð?Œ–@€S@ð?—@R@ð?—@ÀP@ð?ü•@ÀR@ð?ì•@ÀP@ð?ì•@ÀQ@ð?ð“@R@ð?p†@ð?€S@ð?p–@U@ð?ˆ–@S@ð?xˆ@€R@ð?¬ @ÀT@ð?Þ @ÀP@ð?u@@R@ð?|–@@R@ð?v¡@€Q@ð?v¡@Q@ð?$ž@ÀS@ð?ü@ð?T@ð?š™™™™™¹?€V@ð?Xˆ@@W@ð?ø@€Q@ð?ô@@S@ð?ô@ð?@S@ð?c@@S@ð? š@Q@ð?t@@Q@ð?Pž@€Q@ð?d™@Q@ð?,@ÀR@ð?ü@€P@ð?<¡@€P@ð?T–@ÀP@ð?DŽ@ð?ÀS@ð?Ä@ð?@Q@ð?Ž@ð?ÀR@ð?ˆ¡@R@ð?(ž@€P@ð?$ž@@Q@ð?ÿÿÿÿÿŽ@ð?€Q@ð?ÿÿÿÿÿž@ÀQ@ð?¨ž@Q@ð?Ž @Q@ð?†¡@ÀP@ð? ž@ÀR@ð?ž@@P@ð?¡@ÀP@ð?¡@R@ð?@ÀQ@ð?p„@ð?€V@ð?\”@ÀS@ð?X @ÀS@ð?X @ÀQ@ð?Ž @€R@ð?˜“@@S@ð?D”@S@ð?&£@S@ð?j @S@ð? œ@@T@ð? œ@@T@ð?p”@ÀR@ð?D”@U@ð?ô@R@ð?„@€T@ð?`@ð?@T@ð?`@@Q@ð?ˆ@€S@ð?@ð?€U@ð? @€S@ð? Ÿ@Q@ð?Àa@€P@ð?@ð?€S@ð?ÿÿÿÿÿÓ—@S@ð?Ÿ@€Q@ð? @ÀS@ð?  @Q@ð?ô†@ð?€P@ð?ˆœ@@R@ð?üž@@R@ð?üž@ÀP@ð?üž@@Q@ð?ðŒ@€R@ð? Ÿ@@Q@ð?И@€S@ð?°Ÿ@Q@ð?Œ•@S@ð?ô˜@€Q@ð?Š@W@ð?Œ•@€R@ð?Œ…@ð?€S@ð?ÿÿÿÿÿ»Ÿ@@R@ð?‘@ÀT@ð?d•@Q@ð?•@Q@ð?€N@W@ð?\“@@Q@ð?˜’@€R@ð?˜’@@S@ð?¼’@ÀS@ð?@a@@S@ð?X“@@S@ð?Pƒ@ð?€Q@ð?Tœ@U@ð?Tœ@ÀS@ð?TŒ@ð?@R@ð?¸›@ÀS@ð?¬“@S@ð?Pˆ@ÀP@ð?dž@ÀU@ð?,”@€R@ð?(”@ÀU@ð?4”@@S@ð?¼“@ÀS@ð?<’@€T@ð?€g@@R@ð?ÿÿÿÿÿÿ›@@Q@ð?„@ð?ÀT@ð? ”@ÀQ@ð?”@@S@ð?”@€S@ð?<ž@@Q@ð?ì“@€R@ð?è“@@Q@ð?œ™@ÀT@ð?ž@@R@ð?Ô†@ÀS@ð?Ѓ@ð?ÀU@ð?ü‘@ÀP@ð?ü‘@ÀQ@ð?˜‡@ÀS@ð?@™@€Q@ð?0@R@ð?Àj@T@ð?`m@S@ð?0@Q@ð?0@@Q@ð?ÿÿÿÿÿ›@@R@ð?ÿÿÿÿÿ›@ÀP@ð?ÿÿÿÿÿ+›@ÀP@ð?0s@ÀQ@ð? —@T@ð?ȇ@ð?ÀS@ð?Ä—@Q@ð?”—@S@ð? ‡@ð?ÀV@ð?œ—@Q@ð?œ—@@Q@ð?H@@U@ð? —@R@ð? —@R@ð?œ—@€Q@ð?|—@ÀP@ð?|—@Q@ð?àš@ÀQ@ð?Ôš@U@ð?X›@T@ð?d›@€Q@ð?P›@ÀQ@ð?¤™@S@ð?“@ð?€P@ð?„¢@€T@ð?Ì‹@ð?€R@ð?Ø›@ÀQ@ð?ä›@€Q@ð?XŸ@€P@ð?XŸ@Q@ð?‘@ð?ÀS@ð?àš@R@ð?ÿÿÿÿÿÚ@ÀS@ð?–@R@ð?›@€S@ð?¸¡@U@ð?„’@ð?ÀQ@ð?š@Q@ð?.¡@€Q@ð?ö @ÀP@ð?¸œ@ÀS@ð?’@ð?ÀS@ð?ð˜@@Q@ð?0•@@W@ð?ìš@ÀP@ð?Ü@ð?S@ð?ô˜@S@ð?0u@Q@ð?—@€T@ð?Ú @R@ð?H™@R@ð?¡@@R@ð?¡@@R@ð?Ö @@R@ð?\˜@S@ð?’@€P@ð?d›@ÀS@ð?TŽ@ð?€S@ð?Ì@T@ð?ä¡@€S@ð?ú‘@ð?€R@ð?¢@S@ð?p“@S@ð? @@S@ð? @€S@ð? Ž@€T@ð?œ@R@ð?@@R@ð?¢@ÀQ@ð? @ÀP@ð?. @@Q@ð?, @@Q@ð?¦¡@€Q@ð?Ρ@€P@ð?Lœ@€R@ð?øŒ@ð?T@ð?ôœ@ÀQ@ð?R‘@ð?@U@ð?Ê¡@@Q@ð?H˜@@R@ð?Ôv@V@ð?š@R@ð?¸¡@ÀR@ð?Ôš@ÀP@ð?`|@ð?@R@ð?,¡@ÀP@ð?0¡@ÀP@ð?¡@ÀQ@ð?Dœ@€V@ð?þ¢@ÀP@ð?`¡@€R@ð? Š@ÀV@ð?ˆŸ@€R@ð?ˆŸ@ÀP@ð?„Ÿ@S@ð?€Ÿ@S@ð?°‹@ð?@V@ð?¢¡@ÀQ@ð?„Ÿ@ÀQ@ð?j @Q@ð?`{@R@ð?œŸ@@S@ð?Ì @@U@ð?`›@U@ð?ÿÿÿÿÿo—@R@ð?ÿÿÿÿÿo—@Q@ð?¸ @€Q@ð?p¡@@P@ð?ì @@S@ð?0Ÿ@Q@ð?X@€S@ð?ÿÿÿÿÿçž@€R@ð?V‘@ð?€R@ð?|@@P@ð?ˆ˜@R@ð?0ƒ@ÀR@ð?hŠ@@R@ð?8ˆ@ð?@S@ð?@ð?@R@ð?¡@@S@ð?J¡@@R@ð?d@U@ð?¸@ð?€Q@ð?ÿÿÿÿÿ·š@@S@ð?ÿÿÿÿÿßž@@S@ð?ƒ@€R@ð?² @S@ð?˜ž@Q@ð?r @Q@ð?d @ÀR@ð?¬ž@@Q@ð?ÿÿÿÿÿ»Ÿ@ÀP@ð?ÿÿÿÿÿ·Ÿ@ÀU@ð?ÿÿÿÿÿ·@ð?Q@ð?ð•@@U@ð? @ÀP@ð?ø•@€R@ð?Üž@ÀR@ð?`ž@U@ð?@@P@ð?`@S@ð?T’@ÀR@ð?|•@@R@ð?üž@Q@ð?üŽ@ð?@T@ð?ì˜@@T@ð?ÿÿÿÿÿ3@ÀS@ð?„Ÿ@€Q@ð?ôž@R@ð?`’@€R@ð?ÿÿÿÿÿC˜@ÀR@ð?°•@@R@ð?ØŸ@@Q@ð?ÿÿÿÿÿ—@ÀS@ð?€O@@W@ð?Ж@W@ð?L™@€Q@ð?p@@Q@ð?p@€R@ð?Lž@@P@ð?¸@€R@ð?À–@@T@ð?( @ÀR@ð?t›@€Q@ð?$ @@Q@ð? @@T@ð?N @@P@ð?ÔŸ@€T@ð?hž@€P@ð?dž@ÀQ@ð?ž@R@ð?ü˜@€R@ð?Lž@@U@ð?ÿÿÿÿÿOŸ@R@ð?ÿÿÿÿÿGŸ@ÀP@ð?<›@ÀR@ð?˜˜@@P@ð?ÿÿÿÿÿGŸ@ÀS@ð? @W@ð?¼‰@ð?T@ð?pž@ÀQ@ð?L•@Q@ð?Ð’@R@ð?ÿÿÿÿÿGŸ@ÀQ@ð?è’@ÀT@ð?D“@€P@ð?XŒ@S@ð?x”@€Q@ð? “@@Q@ð?(“@@S@ð?|@T@ð?@Š@S@ð?ÿÿÿÿÿ™@€Q@ð?ÿÿÿÿÿ™@€Q@ð?ÿÿÿÿÿGŸ@€P@ð?ÿÿÿÿÿGŸ@ÀR@ð?°œ@@R@ð?Tœ@ÀS@ð?\–@€S@ð?0Ÿ@Q@ð?(œ@ÀP@ð?Tœ@S@ð? ™@V@ð?ÿÿÿÿÿû›@T@ð?`{@ÀR@ð?¼‘@U@ð?8œ@Q@ð?ô“@€S@ð?Ÿ@Q@ð?ÿÿÿÿÿ{ž@ÀS@ð?Л@@S@ð?Tœ@@Q@ð? @@Q@ð?œ@ÀQ@ð? l@€U@ð?¬›@ÀS@ð?¬›@@T@ð?¨›@€R@ð?…@@Q@ð?p|@€R@ð?ð•@@R@ð?D@€S@ð?Ðy@V@ð?t‹@ð?W@ð?(@€T@ð?ÿÿÿÿÿ_œ@ÀR@ð?X@T@ð?ž@@Q@ð?‹@ð?€U@ð?›@Q@ð?,‡@ð?€R@ð?›@@R@ð?$@R@ð?К@€W@ð?°š@@R@ð?´š@ÀQ@ð?¬œ@ÀT@ð?•@ÀQ@ð?ü‘@S@ð?|š@ÀR@ð?xš@€T@ð?€š@€Q@ð?˜š@€S@ð?˜š@€P@ð? –@@Q@ð?ÿÿÿÿÿSš@@S@ð?@š@@Q@ð?\š@ÀS@ð?ÿÿÿÿÿSŠ@ð?ÀS@ð?ÿÿÿÿÿOš@@R@ð?ÿÿÿÿÿkœ@€P@ð?ÿÿÿÿÿWš@Q@ð?<š@@S@ð?ä@U@ð?Øš@T@ð?X‹@ð?€T@ð? ”@ÀU@ð?К@ÀQ@ð?ð@€T@ð?È“@ÀR@ð?Èš@ÀR@ð?Èš@ÀR@ð?œ“@R@ð?8‘@ð?@T@ð?°š@ÀQ@ð?°š@€R@ð?š@ÀR@ð?š@ÀQ@ð?˜˜@@T@ð?L˜@ÀP@ð?L–@ÀT@ð?ÿÿÿÿÿŸ@€S@ð?”@S@ð?Ä—@€P@ð?¤–@€P@ð?$@ÀP@ð?–@ÀQ@ð?ðs@ÀS@ð?\‘@ÀR@ð?à@U@ð?‚@ð?ÀR@ð?’@S@ð?´—@@S@ð?¼—@V@ð?h@@T@ð?°r@@V@ð?\˜@V@ð?|‘@ð?€S@ð?¼˜@@R@ð? ˜@ÀP@ð?¸ˆ@Q@ð?d˜@Q@ð?˜@ÀQ@ð?È™@@V@ð?È™@€R@ð?Ä™@€T@ð?<—@€S@ð?´™@R@ð?´™@ÀQ@ð?¨™@€P@ð?¤™@€R@ð?~¢@@U@ð?¤™@€R@ð?¤™@@Q@ð? |@ð?ÀQ@ð?,ž@€S@ð?¶¢@Q@ð? @@U@ð?Ô†@€Q@ð?°z@ÀQ@ð?Œ¢@ÀS@ð?Ä@ÀQ@ð?؇@€T@ð?Й@€P@ð?t™@T@ð? t@S@ð?Øž@Q@ð?Øž@ÀR@ð?Øž@Q@ð?ÿÿÿÿÿ§@€Q@ð?œ•@R@ð? ™@S@ð?t™@ÀP@ð?t™@@Q@ð?ð|@ð?€X@ð?€“@R@ð?xŸ@@P@ð?xŸ@@Q@ð?P‹@ÀR@ð? @€S@ð?@@P@ð?P•@€U@ð?pž@ÀP@ð?x˜@€P@ð?x˜@@R@ð?XŠ@U@ð?¨†@@V@ð?@T@ÀS@ð?Z@ð?R@ð?â @ÀT@ð?â @@P@ð?â @@R@ð?â @ÀQ@ð?þ @€S@ð?þ @S@ð?² @Q@ð?² @@Q@ð?€[@Q@ð?ž@@S@ð?² @ÀQ@ð?Pœ@T@ð?ø˜@€R@ð?ø˜@@R@ð?à@€R@ð?ps@Q@ð?ˆ@U@ð?Ș@@R@ð?0—@Q@ð?,—@@R@ð?,—@@Q@ð?,—@€Q@ð?B¡@ÀP@ð? @ÀT@ð?4˜@ÀS@ð?¼—@R@ð?–@€R@ð?ø‚@€T@ð?š @T@ð?ôœ@@R@ð?¨Ž@@R@ð?š @€T@ð?Èœ@@P@ð?Èœ@Q@ð?Èœ@Q@ð?p @@R@ð?p @€Q@ð? @ÀQ@ð? @€Q@ð?ø‘@@V@ð?0˜@@V@ð?Ø—@€Q@ð?p@@S@ð?ôŠ@ð?€R@ð? ‰@ð?@S@ð?ð}@S@ð?ôš@@P@ð?d–@@Q@ð?h–@ÀP@ð?ìŠ@ð?U@ð?ìš@@Q@ð?ðŸ@àð?@Q@ð?@àð? x@€T@ð?r@€U@ð? œ@Q@ð? œ@€S@ð?  @€S@ð?@™@@S@ð? ¡@@S@ð?š@ÀT@ð?6¡@ÀP@ð?ü @€P@ð?`}@@S@ð?@@S@ð?&¡@@R@ð? œ@ÀR@ð?^ @@Q@ð?œ@ÀP@ð?˜@R@ð? “@€S@ð?ÿÿÿÿÿ§@€R@ð?ÿÿÿÿÿ§@€Q@ð?¸ @Q@ð?àu@ÀT@ð?ÿÿÿÿÿ§@ÀQ@ð?À @ÀP@ð?& @Q@ð?„•@R@ð?ˆŽ@ÀR@ð?”™@ÀQ@ð?Ÿ@ÀR@ð?ôž@ÀP@ð?ÿÿÿÿÿ#›@€R@ð?ÿÿÿÿÿ#›@S@ð?ÿÿÿÿÿ#›@€U@ð?`›@€R@ð?ÿÿÿÿÿ#›@ÀQ@ð?“@S@ð?›@@R@ð?›@€U@ð?›@S@ð?¼ @€P@ð?º @Q@ð?º @€Q@ð?ÿÿÿÿÿ#›@ÀQ@ð?ÿÿÿÿÿ#›@R@ð?  @@S@ð?èš@€P@ð?0’@ÀT@ð?@•@S@ð?@•@ÀQ@ð?”ž@Q@ð?@•@€R@ð?@•@€R@ð?@•@T@ð?@•@ÀP@ð?¬”@Q@ð?¬”@€Q@ð?Àj@ÀQ@ð?¬”@€P@ð?¬”@ÀR@ð?°€@ÀS@ð?´„@ð?€R@ð?´”@S@ð?´”@ÀR@ð?D•@Q@ð?D•@€S@ð?ÿÿÿÿÿK@ð?T@ð?ÿÿÿÿÿŸ@€Q@ð?ÿÿÿÿÿŸ@@Q@ð?ÿÿÿÿÿ§@€Q@ð?ð„@ð?ÀR@ð?ð”@R@ð?ð”@€Q@ð?ð”@@Q@ð?ð”@@Q@ð?ð”@€Q@ð?l’@€S@ð?l’@€S@ð?l’@ÀR@ð?Üš@Q@ð? •@ÀS@ð?à›@U@ð?à›@Q@ð?ÿÿÿÿÿ™@@S@ð?ÿÿÿÿÿ™@€Q@ð?Ô@Q@ð?¬Œ@ð?€T@ð? ‡@@S@ð? ˆ@ÀP@ð?à›@ÀP@ð?ÿÿÿÿÿ'›@@R@ð?@›@€S@ð?ð“@U@ð?ð“@@U@ð?H—@T@ð?ð“@@S@ð?x“@T@ð?àœ@ÀQ@ð?üš@@U@ð?üš@€T@ð?ÿÿÿÿÿ¯ˆ@ð?€V@ð?ÿÿÿÿÿל@@S@ð?|œ@ÀV@ð?ÿÿÿÿÿ§˜@ÀQ@ð?ÿÿÿÿÿ§˜@€S@ð?,™@T@ð?Tœ@€T@ð?,™@R@ð?,™@S@ð?ø‘@ÀP@ð?ø‘@€U@ð?ø‘@Q@ð?ø‘@T@ð?ø‘@€S@ð?ø‘@U@ð?(‚@R@ð?Ø@@S@ð?Ø@Q@ð?Ø…@ð?ÀS@ð?”‘@@T@ð?8œ@@P@ð?”‘@@S@ð?”‘@ÀP@ð?ˆ‘@@R@ð?ˆ‘@R@ð?ˆ‘@T@ð?“@ÀQ@ð?ì—@S@ð?T™@ÀP@ð?ˆ@S@ð?L™@€R@ð?P@€S@ð?L™@€S@ð?ð|@T@ð?xŠ@€Q@ð?8™@R@ð?8™@@Q@ð?8™@€Q@ð?8™@ÀR@ð?`Œ@@Q@ð?ÿÿÿÿÿ™@V@ð?<™@ÀP@ð?T›@ÀP@ð?Ѐ@W@ð?¨‡@€T@ð?Й@R@ð?X›@S@ð?È™@@S@ð?ÿÿÿÿÿë™@@Q@ð?ÿÿÿÿÿã™@@T@ð?ø‚@ÀR@ð?Ä™@ÀQ@ð?Ä™@@S@ð?ÿÿÿÿÿç‰@ð?U@ð?Ä–@ÀQ@ð?ü™@@P@ð?ø™@@R@ð?À…@€Q@ð?“@ð?W@ð?Ø™@S@ð?`™@@P@ð?”˜@@P@ð?¤™@Q@ð?Ä™@€S@ð?.“@ð?@V@ð?Ì™@R@ð?€_@@T@ð?p™@@T@ð?ÿÿÿÿÿ{™@àð?p™@ÀS@ð?ÿÿÿÿÿ{™@€Q@ð?ÿÿÿÿÿ™@ÀT@ð?Œ™@€P@ð?Œ™@ÀR@ð? ™@@P@ð? ™@€Q@ð?8‰@ð?@U@ð?4™@@R@ð?›@ð?@S@ð?h˜@€T@ð?ä›@Q@ð?̘@Q@ð?̈@ð?@S@ð?Ô˜@R@ð?$š@ð?ÀU@ð?̈@ð?@Q@ð?¸™@€P@ð?¼™@€P@ð?š@ÀU@ð?œ˜@€S@ð?è˜@@T@ð?ð˜@@S@ð?ð˜@€R@ð?¶‘@ð?€T@ð?œš@@P@ð?À…@€V@ð?À™@@R@ð?¼˜@€P@ð?¼˜@@P@ð?0˜@@U@ð?œ‘@€W@ð?Ρ@€Q@ð?°†@ÀP@ð?,™@€T@ð?ðŒ@@S@ð?ÿÿÿÿÿ™@ÀP@ð?ðš@ÀQ@ð?ÿÿÿÿÿWš@@P@ð?ÿÿÿÿÿWŠ@ð?ÀQ@ð? Ÿ@ÀR@ð?"¡@ÀP@ð?€˜@@P@ð?€˜@€U@ð?|˜@ÀP@ð?‘@ð?ÀS@ð?º‘@ð?@S@ð?tš@€R@ð?¼™@€Q@ð?”š@€S@ð?ÿÿÿÿÿWš@€P@ð?ª‘@ð?€S@ð?š@€R@ð?´–@ÀQ@ð?Š@ð?S@ð?ˆš@@U@ð?à @ÀP@ð?î @V@ð?l–@ð?Q@ð?@ž@@R@ð?à™@ÀR@ð?4ž@@S@ð?0ž@R@ð?H—@ÀR@ð?$—@@P@ð?ÿÿÿÿÿž@ÀQ@ð?T@€R@ð? —@@Q@ð?—@@P@ð?ø–@@P@ð?À„@ÀP@ð?Ÿ@@T@ð?À@@P@ð?¼@€U@ð?¨–@€P@ð?T›@ÀT@ð?dŽ@ð?S@ð?ê @Q@ð?Ž@ð?@T@ð?ø‘@T@ð?`@€P@ð?d@Q@ð?€E@€S@ð?@ð?ÀS@ð?~ @T@ð?@€S@ð?X‰@S@ð?8œ@ÀP@ð?@T@ð?Xœ@T@ð?hŽ@ÀU@ð?èœ@T@ð?@œ@€U@ð?Š@€R@ð?Ä–@€P@ð? @Q@ð? Ÿ@€P@ð? Ÿ@@R@ð?ðž@@Q@ð?ðž@ÀR@ð?|›@€R@ð?\œ@@P@ð?üž@€P@ð?Ô”@ÀQ@ð?°@ð?@S@ð?ÈŸ@ÀP@ð?Ì–@@P@ð?8š@T@ð?€ @@R@ð?Ìž@R@ð?ø@@P@ð?\Ÿ@€P@ð? Ÿ@ÀP@ð?p–@ÀQ@ð?p†@ð?€R@ð?X–@ÀS@ð?p–@ÀQ@ð?”–@ÀR@ð?üž@@P@ð?<–@@Q@ð?8“@@T@ð?Œ™@S@ð?Š@@T@ð?L”@Q@ð?ôž@ÀQ@ð?ˆ™@@Q@ð?˜™@R@ð?˜™@R@ð?ÿÿÿÿÿƒ™@@S@ð?ìž@R@ð?ÿÿÿÿÿw™@ÀS@ð?ðž@ÀP@ð?Ì–@ÀQ@ð?ÿÿÿÿÿ_œ@ÀQ@ð?ä›@T@ð?ì›@€P@ð?è›@€P@ð?ÿÿÿÿÿ÷›@Q@ð?|@€P@ð?$ž@€S@ð?¤›@S@ð?¤›@S@ð? ›@@P@ð? ›@T@ð?ðŒ@ÀR@ð?ÿÿÿÿÿŽ@ð?€R@ð?L@ÀP@ð?@@R@ð?ø@@Q@ð? “@€T@ð?è@€R@ð?\œ@ÀP@ð?xœ@ÀU@ð?ž@ÀU@ð?ÿÿÿÿÿž@T@ð?D@ÀP@ð?ž@€R@ð?ž@@R@ð?ÿÿÿÿÿ ž@€P@ð?ÿÿÿÿÿ ž@ÀQ@ð?´–@S@ð?(œ@€P@ð?œ@@P@ð?<’@R@ð?u@@P@ð?ô›@€Q@ð?ô›@Q@ð?Œ@ÀP@ð?ÿÿÿÿÿ«@@P@ð?ì›@@P@ð?(‚@ð?€S@ð?˜—@ÀS@ð?ìš@€R@ð?ìš@Q@ð?`ž@€T@ð?èš@€S@ð?ðš@@P@ð?èš@R@ð?¼˜@@U@ð?ð‘@€Q@ð? œ@@P@ð?ðŒ@ÀR@ð?ÿÿÿÿÿkœ@ÀP@ð?$”@S@ð?Ô™@@Q@ð?Ô™@ÀP@ð?ÿÿÿÿÿë™@ÀR@ð?ÿÿÿÿÿë™@ÀP@ð?8‚@@U@ð?(š@ÀP@ð?pw@ÀS@ð?˜™@S@ð? j@@P@ð?ü˜@S@ð? ™@ÀS@ð?ð˜@@P@ð?ð˜@@R@ð?È@@P@ð?ð˜@Q@ð?ð˜@@S@ð?(™@@S@ð?(™@ÀR@ð?ô—@€P@ð? ™@@S@ð?ä—@@R@ð?”‰@ð?ÀP@ð?ä—@ÀQ@ð?œ™@@S@ð?p™@€P@ð?è—@€S@ð?”@ÀP@ð?\@R@ð? @@V@ð?ä—@€P@ð?ä—@@P@ð?ð—@S@ð?è—@R@ð?ÿÿÿÿÿ×—@ÀR@ð?è—@@R@ð?ä—@€Q@ð?ÿÿÿÿÿÛ—@ÀR@ð?è—@ÀQ@ð?è—@€P@ð?ÿÿÿÿÿ×—@U@ð?P™@€Q@ð?ؘ@U@ð?T™@ÀR@ð?Š@@P@ð? @@S@ð?(™@ÀQ@ð?<—@ÀP@ð?ÿÿÿÿÿ‰@ð?ÀQ@ð?¤@€R@ð?0™@ÀU@ð?8—@R@ð?è‘@€U@ð?8—@R@ð?˜—@Q@ð? —@Q@ð?Œ—@@Q@ð?ðŒ@@Q@ð?ðŒ@ÀS@ð?À˜@Q@ð?ÈŸ@@R@ð?X’@ð?€R@ð?ÿÿÿÿÿg—@€Q@ð?ÿÿÿÿÿÿ–@@S@ð?\—@€T@ð?œ˜@ÀS@ð?Ö@ð?ÀQ@ð?¬–@@V@ð?€˜@@P@ð?´ž@@T@ð?ò@ð?@U@ð?4—@W@ð?¬†@ð?ÀS@ð? ¢@ÀR@ð?ˆ˜@@S@ð? —@€Q@ð?`b@ÀR@ð?´’@ÀS@ð?Þ¡@@T@ð?´–@@P@ð?ì–@@P@ð?І@ð?€R@ð? Œ@W@ð?TŽ@ð?@T@ð?Hž@ÀP@ð?”š@ÀP@ð?H˜@@P@ð?¸–@@S@ð?@‹@@X@ð?”–@ÀR@ð?¡@ÀQ@ð?8–@@R@ð? †@W@ð?¨†@ð?ÀS@ð?¨–@@U@ð?¨–@@T@ð?(£@U@ð?ô–@@S@ð?Ô–@ÀP@ð?ÿÿÿÿÿû–@@R@ð?‡@ð?U@ð? –@€Q@ð?ˆ–@Q@ð?ÿÿÿÿÿû›@Q@ð?$˜@@Q@ð?À–@@T@ð?X’@@U@ð?ÿÿÿÿÿo—@T@ð?ÿÿÿÿÿo—@€T@ð?ÿÿÿÿÿ£˜@@P@ð?ÿÿÿÿÿ£˜@@W@ð?”˜@ÀP@ð?°…@€S@ð?¨–@€T@ð?Tœ@@Q@ð?X¡@€Q@ð?Й@ÀP@ð?˜@€P@ð?°‡@ð?ÀR@ð?”—@Q@ð?HŽ@ÀS@ð?`—@ÀR@ð?`—@@R@ð?”—@@S@ð?º¡@@R@ð? —@@P@ð?\—@€Q@ð?X—@U@ð?¨™@ÀR@ð?è¡@T@ð?°—@€T@ð?´—@ÀS@ð?°—@€R@ð?°—@€V@ð?d¡@Q@ð?tœ@€T@ð?@†@@T@ð?‡@ð?ÀS@ð?Œ™@@Q@ð?\—@€P@ð?¨—@T@ð?~@ÀR@ð?Ì—@€T@ð?ˆ“@€R@ð?Ø‹@ÀT@ð?b¡@ÀP@ð?ÿÿÿÿÿÛ—@€Q@ð?ø—@ÀP@ð?œ—@@R@ð?„—@ÀQ@ð?„—@€S@ð?(@@R@ð?Ø›@ÀS@ð?”—@€S@ð?ÿÿÿÿÿ‹@ð?@T@ð?àq@S@ð?¸—@@R@ð?d–@€U@ð?œ—@@R@ð?Ì—@S@ð? —@ÀQ@ð?d @Q@ð?B‘@ð?V@ð?0ž@@S@ð?& @@P@ð? @ÀQ@ð? @@S@ð?€’@€S@ð?¤—@@S@ð? —@€P@ð? —@€Q@ð?@ÀS@ð?¬—@@S@ð?¤“@S@ð?Tž@€P@ð?°‘@S@ð?ÿÿÿÿÿc—@W@ð?ð}@U@ð?\—@Q@ð?x—@€R@ð? @ÀQ@ð?‡@ð?@T@ð?Üœ@S@ð?Øš@@R@ð?ÿÿÿÿÿ¯˜@Q@ð?(–@T@ð? š@€Q@ð?š@R@ð? s@@T@ð? ˜@ÀU@ð?R‘@ð?ÀV@ð?8@€R@ð?Ø–@@Q@ð?ì—@Q@ð? —@ÀT@ð?—@@Q@ð?ä—@Q@ð?*¡@€Q@ð?<—@R@ð?z‘@ð?€Q@ð?ˆ@ð?T@ð?¡@W@ð? š@@Q@ð?ÿÿÿÿÿ—@@P@ð? —@Q@ð?„—@Q@ð?’¡@€Q@ð?Ì™@€P@ð?(˜@€U@ð?Ø‚@ÀR@ð?`—@Q@ð?(—@@W@ð?xˆ@€U@ð?*¡@@P@ð?T–@ÀS@ð?—@R@ð?„—@@R@ð?\—@€P@ð?¤–@ÀR@ð?L–@R@ð?@–@@S@ð?Ô@@R@ð? –@€R@ð?°Ž@Q@ð?0–@ÀR@ð?0–@€T@ð?@€P@ð?H–@@T@ð?“@€S@ð?ä•@ÀQ@ð?ÿÿÿÿÿ³Ÿ@@Q@ð?ð•@ÀR@ð?ðš@T@ð?l•@ÀP@ð?h•@Q@ð? –@€U@ð?¤–@U@ð?à–@Q@ð?Œ—@€R@ð?ÿÿÿÿÿk—@@P@ð?ÿÿÿÿÿ#›@€R@ð?ÔŠ@ð?S@ð?„—@@R@ð?ž@ÀP@ð?P—@Q@ð?€™@@S@ð?N@ð?@U@ð?XŸ@ÀR@ð?T‘@ÀU@ð?à•@@S@ð?P @Q@ð?ÿÿÿÿÿßž@€R@ð?Lž@@P@ð?Pž@Q@ð?ü@ð?€Q@ð?–@V@ð?P–@ÀQ@ð?P–@€R@ð?(@@W@ð?œš@€T@ð?Œ–@@R@ð?8—@T@ð?^ @ÀT@ð?L @ÀP@ð? @€S@ð?¼–@ÀR@ð?H—@ÀP@ð?¨–@€Q@ð?ð•@€Q@ð?¢ @@R@ð?(—@S@ð?¸†@ð?€S@ð?ð˜@€P@ð?T@S@ð?¸—@€Q@ð?Œ@@X@ð?€G@@Q@ð? e@ÀT@ð?ÿÿÿÿÿ»Ÿ@@Q@ð?ð–@@R@ð?˜@€P@ð?ÿÿÿÿÿc—@Q@ð?ÿÿÿÿÿc—@S@ð?¸–@€Q@ð?Ä–@Q@ð? f@S@ð?ˆŸ@R@ð?f @ÀP@ð?T˜@€R@ð?ÿÿÿÿÿw™@€V@ð?”@€T@ð?Ð@ÀU@ð?ÿÿÿÿÿ»Ÿ@€R@ð?ÿÿÿÿÿOš@@R@ð?\š@€Q@ð?ø™@@R@ð?ÿÿÿÿÿ×—@€S@ð?X–@€R@ð?Œ@€U@ð?š@€U@ð?•@ÀQ@ð?T†@ð?€S@ð?P–@€Q@ð?À›@S@ð?\ž@@T@ð?P–@ÀP@ð?ä—@S@ð?L“@ÀT@ð?\š@ÀT@ð?Ø–@R@ð?ôš@ÀR@ð?àh@@Q@ð?`˜@ÀP@ð?„—@ÀQ@ð?$@ð?€U@ð?¤—@@S@ð?T›@ÀP@ð?H›@€R@ð?äš@ÀS@ð?0–@€Q@ð?0–@€R@ð?Èš@@T@ð?¸”@T@ð?ÿÿÿÿÿû–@ÀQ@ð?¬”@€Q@ð?(—@€S@ð?ƒ@@S@ð?>@ð?ÀR@ð?p˜@@R@ð?Ø–@T@ð?€Ÿ@ÀT@ð?øœ@ÀQ@ð?0€@U@ð?Ôž@R@ð?p•@ÀR@ð?ÿÿÿÿÿž@R@ð?`–@@R@ð?\–@S@ð?ؘ@ÀT@ð?üœ@ÀP@ð?ÿÿÿÿÿ{ž@ÀS@ð? —@€Q@ð?И@ÀQ@ð?ܘ@€T@ð?äœ@€P@ð?¨–@S@ð?ìœ@@P@ð?¼–@Q@ð?$™@€R@ð?ÿÿÿÿÿSŸ@€T@ð?È–@S@ð?È–@€T@ð?€–@ÀR@ð?„–@R@ð?„–@€R@ð?|–@ÀS@ð?$•@€S@ð?ì”@R@ð?´š@T@ð?$œ@€S@ð?Ì@€R@ð?Lž@ÀQ@ð?ÿÿÿÿÿ«@ÀR@ð?Ü•@€S@ð?P™@@T@ð?@–@€R@ð?ÿÿÿÿÿc—@R@ð?èœ@@S@ð? t@R@ð?€–@@R@ð?|˜@T@ð?¼•@€Q@ð?Ž@ÀU@ð? @ÀR@ð?À–@ÀT@ð?Ì›@€P@ð?´–@ÀT@ð?(ž@ÀP@ð?D”@U@ð?`…@S@ð?–@€S@ð?™@€S@ð?´˜@R@ð?x˜@Q@ð?$˜@@S@ð?ÿÿÿÿÿ¯˜@ÀU@ð?|@@S@ð?Ðz@ÀQ@ð?d˜@ÀP@ð?d˜@@R@ð?И@@T@ð?¸˜@@Q@ð? @@T@ð?0•@€Q@ð?Ž @W@ð?ÿÿÿÿÿ?˜@€S@ð?ÿÿÿÿÿÏ—@T@ð?ð—@S@ð?ð—@@R@ð?,•@@R@ð?,•@@Q@ð?(Ž@ð?@Q@ð?Ð@ÀP@ð?u@T@ð?ð{@ÀU@ð?|–@S@ð?4–@ÀQ@ð?d†@ð?€T@ð?¨ž@€Q@ð?,—@@R@ð? —@€T@ð? †@ð?U@ð?`‚@U@ð?”š@€P@ð?œ•@ÀV@ð? •@@Q@ð?4•@@R@ð?@•@ÀR@ð?•@R@ð?\–@@T@ð?ÿÿÿÿÿ;@€Q@ð?ÿÿÿÿÿ—@ÀS@ð?°v@€T@ð?¬†@ð?ÀR@ð?ä•@@P@ð?Àz@ÀP@ð?0—@€P@ð?@–@ÀP@ð?€“@ÀX@ð?´–@€U@ð?Œ–@@S@ð?x–@Q@ð?´–@€Q@ð?à–@ÀQ@ð?¸˜@@R@ð?ô‘@S@ð?(€@ð?ÀS@ð?4@R@ð?ä”@€R@ð? —@€P@ð?t•@€R@ð?˜š@€Q@ð?ô–@T@ð?L˜@@S@ð? u@U@ð?ÿÿÿÿÿ›@ÀR@ð?x‰@ÀR@ð?„˜@@P@ð?ðˆ@ÀV@ð?ìœ@€Q@ð?•@ÀQ@ð?•@R@ð?ð–@@Q@ð?ø”@€R@ð?À”@@R@ð?ˆ”@R@ð?ä“@€S@ð?À”@T@ð? —@€Q@ð?À”@R@ð?–@€T@ð?ø@ÀR@ð?Ž@ÀT@ð?Ì”@S@ð?Ì”@R@ð?ì–@@Q@ð?<•@Q@ð?Д@ÀP@ð?І@@S@ð?D•@Q@ð?—@ÀT@ð?@•@€P@ð?<•@Q@ð?Ä–@€P@ð?ÿÿÿÿÿG@U@ð? –@@P@ð?tš@ÀP@ð?ÀY@€S@ð?`}@S@ð?Д@ÀP@ð?Д@€P@ð?À”@R@ð?¸‹@@S@ð?D•@V@ð?˜Œ@€S@ð?Ðr@ÀS@ð? •@€S@ð?À•@R@ð?ä•@@Q@ð?Ä”@@Q@ð?Ü”@@Q@ð?Д@€Q@ð?Ø”@@Q@ð?à”@€P@ð?°–@@S@ð?•@R@ð?ä•@@R@ð?L•@€P@ð?D•@Q@ð?`•@@R@ð?`•@Q@ð?•@R@ð?•@€Q@ð?ƒ@€T@ð?•@ÀQ@ð?d•@Q@ð?,•@Q@ð?ü•@T@ð?•@ÀP@ð?•@€P@ð?ô”@ÀQ@ð?L–@ÀR@ð? •@ÀQ@ð?†@ð?S@ð?Ðt@@U@ð?Ü•@@S@ð?„•@@Q@ð?ÿÿÿÿÿ·š@ÀR@ð?¸–@@R@ð?ì–@€S@ð?d–@€T@ð?h–@€S@ð?¼œ@€P@ð?„–@€T@ð?ø”@€S@ð?ð”@Q@ð?–@S@ð?h–@@R@ð?ˆŠ@ð?ÀQ@ð?Ü–@@Q@ð?q@@Q@ð?Ä–@T@ð?Ä–@ÀS@ð?l–@€R@ð?h–@U@ð?h”@€T@ð?¤œ@@T@ð?¬–@ÀP@ð?œ–@R@ð?•@@Q@ð?@R@ð?Ж@€R@ð? –@@Q@ð?¨Œ@ÀT@ð?X”@@W@ð?–@€R@ð?–@T@ð?t–@ÀQ@ð?t–@Q@ð?¼”@ÀT@ð?•@@R@ð?•@S@ð?xš@€Q@ð?@v@@U@ð?Ô–@€S@ð?ø”@@Q@ð?@›@ÀT@ð? @ÀR@ð?̘@@P@ð?p•@ÀP@ð?`”@ÀQ@ð?¸”@ÀP@ð?Ø“@Q@ð?؃@ð?@R@ð?•@€R@ð?•@Q@ð?¤‘@ÀT@ð?øƒ@R@ð?h@€Q@ð?T’@€P@ð?d–@ÀP@ð?ˆ”@€S@ð?ˆ”@ÀR@ð?„”@@V@ð?|”@ÀR@ð?’@ÀR@ð?Ì”@R@ð?°–@ÀS@ð?Ж@€R@ð?•@€S@ð? ‚@ÀV@ð?Œ”@@U@ð?ð{@T@ð?H–@T@ð?ô“@ÀP@ð?|”@ÀS@ð?¤š@€Q@ð?„”@S@ð?•@@U@ð?ˆ‚@€S@ð?””@R@ð? –@T@ð?–@@Q@ð?à—@ÀS@ð?x–@€P@ð?P–@€S@ð?„”@@Q@ð?X–@€P@ð?p–@ÀQ@ð?X–@ÀV@ð?È@ð?ÀS@ð?H–@@Q@ð?Ä”@S@ð? `@@V@ð?°“@@Q@ð?¨”@ÀP@ð? ”@@Q@ð?„–@ÀR@ð?ü”@@U@ð?•@Q@ð?•@Q@ð?›@@R@ð?€•@@S@ð?`“@V@ð?`•@€P@ð?ÿÿÿÿÿÏœ@ÀQ@ð?¤…@ð?€V@ð?’@€R@ð?|•@€S@ð?|–@€R@ð?–@U@ð?`•@€S@ð?D–@€V@ð?Ô–@U@ð?¸–@ÀQ@ð?¸„@ð?€R@ð?Œ“@Q@ð?¸…@U@ð?ô”@@T@ð?°™@ÀR@ð?¸–@ÀR@ð?‘@@Q@ð?ð“@€T@ð?œ”@@Q@ð?ô“@Q@ð?\’@ÀS@ð?€›@@Q@ð?H”@€Q@ð?,„@ð?@U@ð?0”@Q@ð?H”@ÀP@ð?H”@Q@ð?t“@@R@ð?t“@Q@ð?H„@ð?€P@ð?H”@@R@ð?œ@Q@ð?<™@Q@ð? ”@€Q@ð? ”@@R@ð?Ø“@€S@ð?Г@ÀR@ð?À›@@T@ð?À›@R@ð?x“@Q@ð?x“@€T@ð?‘@@Q@ð?È‚@ÀT@ð?lƒ@ð?V@ð?ø“@ÀP@ð?ø“@ÀP@ð?˜”@Q@ð?”@€U@ð?\“@@Q@ð?h›@@S@ð?‹@ÀU@ð?È’@Q@ð?L@R@ð?¨Š@S@ð?,”@T@ð?,”@ÀR@ð?<“@ÀR@ð?@”@@X@ð?d”@S@ð?È•@€S@ð?˜“@R@ð?0”@@Q@ð?Г@ÀQ@ð?°‚@@R@ð?Pˆ@R@ð?D”@S@ð?D”@ÀR@ð?è“@ÀP@ð?T“@U@ð?p“@@S@ð?d“@S@ð?Ĉ@ð?T@ð?È“@@S@ð?´“@€R@ð?à“@@Q@ð?`›@@R@ð?à’@@Q@ð?È@€W@ð?‹@ÀV@ð?ÿÿÿÿÿ·š@S@ð?”@€T@ð?ä“@€P@ð?ä“@ÀP@ð?4”@T@ð?–@ÀP@ð?°”@R@ð?t“@ÀT@ð?œ›@ÀP@ð?ä“@€Q@ð?Ô”@@U@ð?›@€Q@ð?ìŠ@ð?@R@ð?D•@ÀU@ð?œ“@ÀS@ð?œ“@@S@ð?t“@€S@ð?h•@ÀR@ð?€l@€R@ð?@n@€T@ð?d”@ÀS@ð?lš@R@ð?˜š@Q@ð?ð—@S@ð?š@ÀP@ð?,˜@ÀR@ð?ÿÿÿÿÿï™@Q@ð?œ™@@Q@ð?p”@ÀQ@ð?h”@€P@ð?|”@@Q@ð?|”@€T@ð?˜‰@ð?€P@ð?ð’@T@ð?¬—@€S@ð?ø@ÀU@ð?Ø’@T@ð?0•@@R@ð?„@ð?ÀQ@ð?””@T@ð? ˜@@Q@ð?¼˜@Q@ð?š™™™™™¹?€V@ð?$“@ÀP@ð?0•@Q@ð?l“@€S@ð?´™@€Q@ð?|“@€P@ð?T“@@R@ð?t“@@T@ð?@“@€W@ð?8“@ÀQ@ð?8“@@Q@ð?q@Q@ð?„@ð?@Q@ð?˜‚@S@ð?˜@€R@ð? a@U@ð?P“@@Q@ð?0q@€T@ð?D“@€S@ð? ”@€R@ð?L™@€S@ð?؃@ð?€T@ð?Ô“@@R@ð?˜…@ÀT@ð?Ä”@ÀT@ð?øš@€S@ð?•@Q@ð?8™@€U@ð?`”@€P@ð?˜Œ@€R@ð?D›@ÀU@ð?<”@ÀP@ð?@”@T@ð?ÿÿÿÿÿ™@S@ð?D”@@Q@ð?<”@ÀQ@ð?4–@ÀQ@ð?”@@S@ð?”“@ÀP@ð?À”@ÀQ@ð?¸’@@S@ð?¸’@€T@ð?0“@€Q@ð?’@ÀS@ð?|–@€T@ð?‹@€P@ð?(”@ÀQ@ð?0¥@ÀP@ð?¤’@R@ð?ðu@€R@ð?H›@€R@ð?‡@€U@ð? e@V@ð?š™™™™™¹?@U@ð?P›@Q@ð?È”@T@ð?È”@@U@ð?ÿÿÿÿÿ«˜@S@ð?`•@@R@ð? ”@@S@ð?”@€Q@ð?hš@ÀP@ð?P–@T@ð?P–@ÀU@ð?h–@@T@ð?„†@ð?€R@ð?|–@ÀS@ð?`–@@P@ð?|–@@Q@ð?Ø•@Q@ð? ”@@S@ð?\’@@T@ð?•@€Q@ð?$”@Q@ð?\‘@U@ð? •@€T@ð?”@ÀQ@ð?”@Q@ð?m@@Q@ð?Ü–@@S@ð?”“@ÀQ@ð?´ƒ@ð?€S@ð?Ì—@@S@ð?X@€R@ð?P“@€Q@ð?ì’@@Q@ð?”@€Q@ð?àš@ÀP@ð?h@U@ð?„“@Q@ð?¨Ž@ÀQ@ð?@@R@ð?ø“@T@ð?h•@€P@ð?,”@€Q@ð?l‰@ð?@V@ð?rms/inst/tests/survest.r0000644000176200001440000000560412773075020015100 0ustar liggesusers# Survival time with stratification. Thanks: Cathy Jenkins require(rms) Load(sampledf) S <- with(sampledf, Surv(fu, death)) dd <- datadist(sampledf); options(datadist='dd') f <- cph(S ~ rcs(age, c(1, 2, 4)) + rcs(sqrt(cd4), sqrt(c(210,475,875))) + strat(site), data=sampledf, x=TRUE, y=TRUE, iter.max=30, eps=1e-10) g <- coxph(S ~ rcs(age, c(1, 2, 4)) + rcs(sqrt(cd4), sqrt(c(210,475,875))) + strata(site), data=sampledf, x=TRUE, y=TRUE, control=coxph.control(eps=1e-10, iter.max=30)) # -------------------------- # # Survival probabilities # # at 1 year # # for fixed ages/CD4 counts # # -------------------------- # Pd <- expand.grid(age=c(1,2,6,9,15), cd4=c(100,200,350,500), site=levels(sampledf$site)) pd <- Pd[1, ] a <- survfit(f, newdata=pd); a$strata b <- survfit(g, newdata=pd); b$strata h <- function(a, b, chkeq=FALSE) { a <- sapply(a, length) b <- sapply(b, length) k <- unique(c(names(a), names(b))) z <- matrix(NA, nrow=length(k), ncol=2, dimnames=list(k, c('a', 'b'))) z[names(a), 'a'] <- a z[names(b), 'b'] <- b print(z) if(chkeq) { k <- intersect(names(a), names(b)) for(n in k) cat(n, ' equal:', all.equal(a[[n]], b[[n]]), '\n') } } h(a, b) a <- survfit(f, newdata=Pd) b <- survfit(g, newdata=Pd) h(a, b, chkeq=TRUE) z <- summary(survfit(g, newdata=Pd[1:1,]), times=5) z <- survest(f, newdata=Pd[33,], times=5) comp <- function(a, b, ntimes=1, time=1, ib=TRUE) { b$std.err <- b$std.err / b$surv for(n in c('time', 'surv', 'std.err', 'lower', 'upper', if(length(a$strata)) 'strata')) { x <- a[[n]] y <- b[[n]][ib] if(n %nin% c('time', 'strata') && ntimes > 1) { x <- x[, time] y <- y[seq(time, length(y), by=ntimes)] } cat(n, ' equal:', if(length(x) == length(y)) all.equal(x, y) else paste('lengths:', length(x), length(y)), '\n') } } chk <- function(f, g, strat=FALSE) { a <- survest(f, newdata=Pd[33,], times=5) b <- summary(survfit(g, newdata=Pd[33,]), times=5) cat('-------------------------- newdata 1 row, 1 time\n') comp(a, b, ib=if(strat) 2 else TRUE) a <- survest(f, newdata=Pd, times=5) b <- summary(survfit(g, newdata=Pd), times=5) cat('-------------------------- newdata all, 1 time\n') comp(a, b) a <- survest(f, newdata=Pd, times=5:6) b <- summary(survfit(g, newdata=Pd), times=5:6) cat('-------------------------- newdata all, 2 times\n') comp(a, b, ntimes=2, time=1) } chk(f, g, strat=TRUE) ## Try with no strata f <- cph (S ~ rcs(age, c(1, 2, 4)) + rcs(sqrt(cd4), sqrt(c(210,475,875))) + site, data=sampledf, x=TRUE, y=TRUE, iter.max=30, eps=1e-10) g <- coxph(S ~ rcs(age, c(1, 2, 4)) + rcs(sqrt(cd4), sqrt(c(210,475,875))) + site, data=sampledf, x=TRUE, y=TRUE, control=coxph.control(eps=1e-10, iter.max=30)) cbind(coef(f), coef(g)) chk(f, g) rms/inst/tests/survplot.s0000644000176200001440000000126312700014435015252 0ustar liggesusersrequire(rms) n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('male','female'), n, TRUE)) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='female')) dt <- -log(runif(n))/h label(dt) <- 'Follow-up Time' e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" sex2 <- factor(as.vector(sex),levels=c('male','female')) dd <- datadist(age, sex, sex2) options(datadist='dd') S <- Surv(dt,e) f <- npsurv(S ~ sex) survplot(f, n.risk=TRUE) f2 <- npsurv(S ~ sex2) survplot(f2, n.risk=TRUE) f <- cph(S ~ strat(sex2), surv=TRUE) survplot(f, n.risk=TRUE, conf.int=.95) f <- cph(S ~ sex2, surv=TRUE) survplot(f, n.risk=TRUE, conf.int=.95) rms/inst/tests/examples.Rmd0000644000176200001440000000570613350451175015470 0ustar liggesusers--- title: "Examples for rms Package" author: "FE Harrell" date: '`r Sys.Date()`' output: html_document: toc: yes toc_depth: 3 number_sections: true toc_float: collapsed: false code_folding: hide theme: cerulean --- # Introduction ## Markdown This is an R Markdown html document using the template that is [here](http://biostat.mc.vanderbilt.edu/KnitrHtmlTemplate). Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . ```{r, results='hide'} require(rms) knitrSet(lang='markdown') ``` The following (r hidingTOC(buttonLabel="Outline")) uses the Hmisc `hidingTOC` function to define HTML styles related to a floating table of contents that can be minimized or be collapsed to major outline levels. For more details see [this](http://biostat.mc.vanderbilt.edu/KnitrHtmlTemplate). `r hidingTOC(buttonLabel="Outline")` # Data {.tabset} ## Setup ```{r t3} getHdata(titanic3) # Get the dataset from the VU DataSets page mu <- markupSpecs$html # markupSpecs is in Hmisc subtext <- mu$subtext code <- mu$code ``` ## Data Dictionary ```{r ddict} html(contents(titanic3), maxlevels=10, levelType='table') ``` ## Descriptive Statistics`r subtext('for the', code('titanic3'), 'dataset')` ```{r t3d, height=150} # Set graphics type so that Hmisc and rms packages use plotly # Chunk header height=150 is in pixels # For certain print methods set to use html options(grType='plotly', prType='html') s <- summaryM(age + pclass ~ sex, data=titanic3) html(s) plot(s) d <- describe(titanic3) plot(d) ``` The following doesn't work because it overlays two different legends ```{r sub,height=600,eval=FALSE} # Try combining two plots into one p <- plot(d) plotly::subplot(p[[1]], p[[2]], nrows=2, heights=c(.3, .7), which_layout=1) ``` # Logistic Regression Model ```{r lrmt,results='asis'} dd <- datadist(titanic3); options(datadist='dd') f <- lrm(survived ~ rcs(sqrt(age),5) * sex, data=titanic3) print(f) latex(f) a <- anova(f) print(a) plot(a) ``` ```{r summary} s <- summary(f, age=c(2, 21)) plot(s, log=TRUE) print(s, dec=2) ``` ```{r ggp,fig.height=5,fig.width=6} ggplot(Predict(f, age, sex), height=500, width=650) # uses ggplotly() plotp(Predict(f, age, sex)) # uses plotly directly plot(nomogram(f, fun=plogis, funlabel='Prob(survive)')) ``` # Survival Plots for `r mu$code('pbc')` Dataset Hover over the curves to see particular probability estimates and numbers at risk. Click on legend components to show/hide components. ```{r pbc,fig.height=6,fig.width=7} getHdata(pbc) pbc <- upData(pbc, fu.yrs = fu.days / 365.25, units = c(fu.yrs = 'year')) f <- npsurv(Surv(fu.yrs, status) ~ spiders, data=pbc) survplotp(f, time.inc=1, times=c(5, 10), fun=function(y) 1 - y) ``` # Computing Environment `r mu$session()` rms/inst/tests/bj.r0000644000176200001440000000026712660462466013771 0ustar liggesusers## From Nicholas Stroustrup nicholas_stroustrup@hms.harvard.edu require(rms) deaths = Surv(c(1,2,3,4,5,6),c(1,0,1,1,0,1)) cg = as.factor(as.character(c(1,1,1,0,0,0))) bj(deaths ~ cg) rms/inst/tests/nomogram2.rda0000644000176200001440000000443013526566541015601 0ustar liggesusers‹í[KoG ž]É–-§NÒ EÑFÑS(z)z í!䤧b#­ãdÉXÉI}ó/î©ç¤;)Q93»²o0ÙypÈÉñxå¼xôöç½·{ƘԤÝĤºk:¯_=>þŘÌÖƒÄd¦çžÃZè~ÝÙ­ûÿÔOºØÿXå•ãYñ¾¨ØZö.Ÿ0w0Ÿ_¿býÁ§Ù沫4nšÇ¾<Ñü’äbmkñã:›Æ!ägÈÍžÖ÷Å%Vß§é‘d¤uŸ>¾Ï·—ëÐb¤Éúø7&Î~(¿Cûb1cs?ãíõrií2ÑåïŠQÝ9$“;“ËÙ`r^pÙÁ(ŸNAÖÂäî\Á¨âÞÕÝ£¾WàýÄ~^>ô/=\‡†zœ}wM¼ ã»ÎúswÀ–%¶3KÁß„øš ðóÜ>§÷ù„1Ý5뜻ñ>‘£\ìš•Ïô¹ûû0çð!'‹îõj­ƒ=²†v,éc|—|§6®{°æ¸ølv™¿Ëe¶Žá‰˜R¶îì8®~¬wÄìjywv]Œ0ÆTÊcLº°ÜCôS¢ óãî|²DkÐÍw¨N¢ÏÉÿ OKðdfUË;Œ‹Øá‰1D .¿Ç[ÙÆúÞ!º»f½n0_2"3?ƒY}×Ï5YàøÍêIë=#ëÎÆfUonŒ1[ÆÆÎÖÖ¤!pl &<Ã0¿0;hÈ~ ²ˆwd—›;YÇ}lîƒ ­?Š#5+~©?Öl~,`À¸ÐZZòO'‡9¿>YÀÆã¶–ÑImPŒËó‡Èbîu ášê ø®W{Ö|7 .ï.ž3ÔÖ·fu6óu¬Ê!挔nj´2·Qûwjäº8/Ï+³þóô;ª‡èÃšÏ ©%Xs8…ûGæî‡fu·¸ó¨œÎòê}qô(¿š.¤:(uó—”¾ì Í*sÒüm7 ‹oî¶qjú}´±kHÇ6øCñ Ùj³7ìÞ61hëç­m:ú-ÿPή`jïY5¹¨oWã¿ÙÍc©ê4Ì&•Y\ „M—ßU™Ñ.8K³â¥©ÿ¼*ÏkuG/ËYa¶¿Y£×Ÿãg±4/ë>M‡oŸï¼Ò°Æø'Ù÷ù!é‘ü‹9_}<„â éñê;w}¼ÅðÔ§‹/o´\Å-¤ß«PÜB9ëOLÜc8‹á:&?cëµmÞûZ(NMêAӛ㱜û꣉¿¶˜ý¾ùPÎÇrÔ{“8Åävˆ_Žúâ¥Õ[Sî›Ä26W@F¼Žì>äÃâ¼àÝáéd|¼œ»©KO+íî<óA9ªïTG¯®.¶½ôÐ7ߨ§-ekÛÇ央k‡ßo[‹6NÌ&F+Ìѱ èÓ|Ký™Ñqh¾ú¸§¼g\ﵙְ·/ñÃǃ}ÎßÚí,\¿5›++àÕ¸í+~!W¿Z¬­¢[Âù³\¿ó ›àò•±ý‡„Oó[ÌMb{#w„>Ö]ˆ{)ïÁW©V|µÁÛ§Ì>ý¶IøÁ›moè‘ξ˜ºÒâMÏ_J{×b~ÆøÇe4>c1.Áàk”m)¾(,,Áš±±è ô¥ó¦‰š¬Æ÷“È­]G2¼Z<)†å /‡ëãªÀïžWå‡|†ÃÞëq9ž^VõUdÛ ÑM#hwYêýá”åãÁ6¥¬MÃd"IÓalc÷3[˜0Ö£_ÚÇ÷P]R׉ÒoÅ«‹fwƒ#j'à Ý› ëZœ9ÎD²‡¸~Çø¨Å ÑíÑ“ÖxrÈÙðMír,×%àóáõåvSŸ5^cö¶® o4ÅübmƒKm?Sôý“lyt´9w´ù­¹à6țЬ½/g½qÆ9ëFxôÜ8o š7'Oñšrøð´ª¯%㣇çżóg³³¢‚Á7gå¬8ú½œ^äãå+ž{‹Ùñd|¼\¹©ÛÓ­Âjw¥Ê^ä[ݦ|ïù¯FÚXÛKåµ}’^ß»II6„Y²ãæ‡æ·$§ñàÓ‹-ƶ„·M,|ËON«üÞy™OÿõZk¬ªHrms/inst/tests/pentrace.s0000644000176200001440000000073312215620207015157 0ustar liggesusers# From Yong Hao Pua require(rms) n<-20 set.seed(88) age <- rnorm(n, 50, 10) height <- rnorm(n, 1.7, 0.5) cholesterol <- rnorm(n, 200, 25) ch <- cut2(cholesterol, g=40, levels.mean=TRUE) sex <- factor(sample(c("female","male"), n,TRUE)) dbase= data.frame(sex, age, height, cholesterol, ch) dbase.dd <- datadist(dbase) options(datadist = "dbase.dd") fit <- ols (cholesterol ~ sex + height + age, x=T, y=T, data=dbase) pentrace(fit, seq(0, 20, by = 0.1)) rms/inst/tests/cph.s0000644000176200001440000000331612677746205014153 0ustar liggesusersrequire(rms) n <- 2000 set.seed(1) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- 1 + (runif(n)<=.4) sex <- factor(sample(c('Male','Female'), n, replace=TRUE, prob=c(.6,.4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex==2)) ft <- -log(runif(n))/h e <- ifelse(ft<=cens,1,0) print(table(e)) ft <- pmin(ft, cens) units(ft) <- "Year" Srv <- Surv(ft, e) dd <- datadist(age, sex) options(datadist="dd") f <- cph(Srv ~ rcs(age,4)+offset(1*(sex=="Male")), eps=1e-9) g <- coxph(Srv ~ rcs(age,4)+offset(1*(sex=="Male"))) f; g summary(f) # Make sure surv summary works f <- cph(Srv ~ age, surv='summary') f$surv.summary # Check relationship between R2 measure and censoring n <- 2000 set.seed(3) age <- 50 + 12*rnorm(n) sex <- factor(sample(c('female','male'), n, TRUE)) cens <- 15*runif(n) h <- .02 * exp(.1 * (age - 50) + .8 * (sex == 'male')) t.uncens <- -log(runif(n))/h e <- ifelse(t.uncens <= cens, 1, 0) print(table(e)) ft <- pmin(t.uncens, cens) f <- cph(Surv(ft, e) ~ age + sex, x=TRUE) f S <- var(f$x) cens <- 40*runif(n) e <- ifelse(t.uncens <= cens, 1, 0) print(table(e)) ft <- pmin(t.uncens, cens) g <- cph(Surv(ft, e) ~ age + sex) g cens <- 5*runif(n) e <- ifelse(t.uncens <= cens, 1, 0) print(table(e)) ft <- pmin(t.uncens, cens) i <- cph(Surv(ft, e) ~ age + sex) i cens <- 2*runif(n) e <- ifelse(t.uncens <= cens, 1, 0) print(table(e)) ft <- pmin(t.uncens, cens) j <- cph(Surv(ft, e) ~ age + sex) j # Compute Kent and O'Quigley rho squared W,A tilde ko <- function(fit, S) { cof <- coef(fit) rho <- t(cof) %*% S %*% cof prn(rho) drop(rho / (rho + 1)) } ko(f, S); ko(g, S); ko(i, S); ko(j, S) ## Compare with OLS R^2 y <- log(h) + rnorm(n) f <- ols(y ~ age + sex, x=TRUE) S <- var(cbind(1, f$x)) ko(f, S) rms/inst/tests/sampledf.rda0000644000176200001440000002714312771754302015474 0ustar liggesusers‹í{TTMºm“¡É9K“³$ED¡>3AI‚‚$g$I’¤ˆJ1 (Q@@EQðÌ€ä ™~ÅÌü³ffÝyofî½ï¾·ÖôZ§OשSõ}ûÛ{×i0Ým¥N´"j šÒRãTZþÍèoïéãáDv&h¸ñ{:ü› < ‘A{zȈÂé+{QXé½¥ª/)†Cû/²ÐË=?Egöä-ùpEv$.nÁÎ\.aKr„'/Oóá~1ãe'ùuÒÁ*ú—åsNuHZÜÁÒÑ‘µãO†€3-çìs† ˆ˜®'%q ÚSœiL—bÄyÐâ¤%Öß O£‡°}¿ÞèPÖÀƒ{„>2 Áq¢s)+}õس5vu}ÖSöÄÂ\N¥„n(¹ Aiƒ$ ‹#pðNÃ/>¿«CjONI‹L¼·°°‚Â+i5êó¯à´o ©_Xb-fýûÆáÜ`Ï—ì$h}ˆh´°w¿2 G8{*⫌ΰ›;×[5–qÎ:!Î}ùy‹N%$ޱÿÓy‰ÂÚ§*Ò˜!HD©é¥Å!p>úiZ¡§JÔÃÒØ!¶r¿¢•ÎôˆÏõ))„ô-èN]߬ѣW-Bæ}vÞé~ȯw¸Q—ÁAºéÝq<‚›2/~+y±6Í\ïÔiÀ>\ö0?e\_œë×·p‚¢Ù·Cv%N°1Ñ©£~UñŽ!3HàÛ›`ÍX 䜅Sûz ùØ<Ýnz¸©XËëT6¾¿žIyZXC®/Óóð²ÏpO°Ó¶º¢¦52-C´c_ÑÉMšuì·àn ÈæË2\±)„+ƈ2áxû¥dN½C”"C?y}•Iüù>I•·Ú`ÿªnzq3¸L°ešhÀ±àà¸ÅÓ}1×îWæ )¯_ڂΈ 8Ú‹×­H)©Ûú_“&ô¿Øâ´šF IžããiD8p4¯Rë œîˆÖ¼†ãæôc—µ E%ìÃøõÎyÑFY#˜îïq­/¹§>¼H)ñƒÃ6Ñ›–T‚Iˆëññqñ“èÐ/D1¾·ËºNP=¤ÔÕäBœo)•s„?dùs …jÃé;S”'˜oÞ¼J¼¬QÏÉõ^˜W‚~©_\>Ÿ5vWYXBJÓ#mSjÈX¹¿ ^¶ [+6•Ü2gDQ0ReëN#¼ŽË· ð€tbf^ÿDp‡Yw´AÉéNë;„ߣåícç…›‰¬­%7@SÞÓRr{\%Þ‘á…X§PÂñJW3íò†ó¼ ÃMp¾Ù1´©ø98|¿Hs]].=¯z#e“ ™ô.ýªèxÌTvTY˜À%Öæˆ†]ÓûU›}“¤Vî6¹6¡®ÈL[†Ìƒ.1l‡Ä%{…OCNp!•¼j,#éì_ñ}Oú)ž²Å<ág™m‹ù+²f¦ÆLF"õxØ”‡9€üB2î©…>¸{—ïžXçÝëmÇÓXᬛüÛPÌ;'²6Àž·ÎÙwô™+æ©ÀÉßÖJîa>˨ꨅl!“ªÀ²H9Hj"CÔC±¬ÁG“`îà+FºwÎ\ÜÓƒóïic±é6¢XÊcðøQ£ý™7+2À¯åº¹"ªöïî$ÐÂ.‚óHèoVP;½vâìí[Në<†“´ŽXâøFÝIÿÖçeKM¹ÃŽëæ${ž¢¾…-d ¹ùÎ0ȃÏÀOc_\‡>)Y\˜×bkéâdn€uÞ²“5$å³Ý­À|ç|ax½¾Íƒ³M¾@|fnßU8ü†r'˜. n¶®adV;*V¾x £¿_ ri‚ø«[Cq¹0´ÆX!&3;Ô ÿãžú>Œ¿d+¹…Ë©«àÚ¬ I™4¿},miÔ2q`û1©,Ì#œuɧƒSÝ]ǧ;1°ã¯è[Ùù²) _§Þ^4T×¾æ{[Ø@°KUdz’kà+[\d„ñ{ê{f)ÃŽZð_O•@èàŽ®E\OqŸ 9¯$ãùÝ ¶î¸ñ&±BÑx½gÝŸ¶ë<‚‹'ü|LlŠ!XóFùÉâ[O§•’^ƒ¾î¾JUæÏÚ.  DŠJÌ Ç×áH€%ŽÃÕ±!ý46ˆàÉ¥¨w…SÂ{ß)éT¢ŸRmXðÁÁ qÑÂ"w$ÄIʈAÒÇórRCGá´lHùcœØhAÓðÈ œw¯0M4Cêæå­ŒX¿ó6<M ]GògÍpr²OûÀD<øÑÌÛѦÝÒöî†8Ëñ®Dpžº{1ýèSÈr¢ºó2[â®¶ççå@`™uÎaÏäE+y]ÌS>k±o6X#õÖQ ¬çi]u·‹u¡ôd/ë/#(ôØÊa$ ¦ËÝìùø±à‘ÖéØGFO_Þ†“=iÁÞXÿò}fYêrzÁíЋEÜ·HN`ýˆEW–~>ׯ†\;³·K†-Ëø-3œþˆóãÇ’vˆónZa’â0λ‹Õ&¥|½áʉÊ/8žqI¿Øâ|8¯Ódàþa^³Ï °®ùºInIÇ<_¤¶[çíœ÷EÏŽ:8ó30§ënàpè!»—À7¦áüÏÒ‹PÀšÞyú>d,²¿ê8fqwUßǼ–ñ°aîO6,žúŠy>±¾E ¿wXUþq–ÒýG4nù@¶³öKžy%0¿y#Éûò’Æ’"àq–‚#Eà, ³= R rF%ŠR°^=E%_•F¡ 'B-ŽB˜oÆKz¬"ÛhZ@&ûÅ{•61àsÓ†ÀÔr¿`aï¹Q»èC¤÷ò}JÇõlëú!ûÞsf{4iÇuÀë·Ñ´K”Ÿà»ÐPÿúÈ1x¾‰FcÑ€ãnyaµAçó’E;7ƒ ¤+)—%a¿i{˜R.ms˜¯œBÚnÎéçC^@K´tYØo\£³ð‚CÄ#_V?¥GŠƒ2^ï +·çC¸tJxèÝð<Â,'E1oïäp{½îÓ¨+º3Kpþ“r¾ÀxJh;ÐãUœqÎr@‚6ž~ã†×•L•~.û©M²ž£Ì‚ݦq—7ö &{8 ãæL‹®k Ö§p­Xc™HÈ›bfÐÉ9‘’›z×l,ãÈÎyÓá‹ëxŠõÞ— ©fM¿![p·®»mv6†dwÆyõô¼øa7®g—[: m”)ì? ¶Øœ…xaRÝeð½Èîÿ_'Þ7RRQʾ¸õ”!üö—+®lj¼ý¹M®^×ÛŽ‚nŒÛ;Çecñ¸%n+wóÝ—À~7ï¦NÊ5·Ÿ˜-Ç=\ûÊ#³ â³ 9uLØëžc|L',à|\-+%Ic]õþ)¥la §ÜvïÇ:­=/3„ón7S†ã}±°¼ð·²1È?.ü ûƒø[;܇°oØ¿Õ*ºã kÏ#J«Jèöi$»“¡cpÆerÕó”ZWÓ!Að4’>ûóEÔ~öì.5'ˆÞr‹í·3ýr§¨ø~Ž) xJ{ã<¦lÑ´Ô)óóO:ŸÈÕ@JC[».®‹º+Éoq\ü&ô1o»§›Ç¼Šë—²±ü~ÛX[HàºÇãuæ_ê6\w ‡ÍˆX¿ç$ ±hïëŠ/àÇ¿Ç[ã.þÞµm ˜M”û×u2¹ZÞÃó -ÜÆç­šútÞk-N«jTáñbx_¨âxû­8p¹`\û~=jƒ}Á‰“[_ÛóÎR7«ùá}¡õEʱ¬Y 1gLl®‚§Æ‘¯C—À˜GOÄún>DÐÔÇ­Pñ‚ ›XJe ïÃ-™0Åß~9¢Sõ7ûx:{'|Àñ'Yìý<\½œŽ9’7ýíŽöþþº€ú÷§ÅÉü§÷ ^žN~®Žøpí÷¨Ò£2º3K×:ÑôÔišÏ`„EÊI<¯˜Yœ ?øª‘ºgØ#Ê-Ã…ѬXqg> מêNw;%žèsIä-¨Â‰;—(p hò ¿ÝF³çœv£¬ Æç÷Ú®¤—€—ðm²:¬žýŒÏjãí—Fï:j¨Ë¸ìA†çHbæ> ¿Ÿc¼Šæ6˜lg f{ÄšDà°Ö·=òHOÜl¼úæ¢t¥Ñ;]ý¼^㌉™f Å÷vH#|úºÒ+þ…@…&¤á¸.ð;4sˆÀÚ°ÓϪ%8®ˆ!k¡wÝìs³S1¨ÝØÈáyÍ|±Ÿ èåͶܞ(ôKú‚W$e ÔFŸÄy‚7Ç•$²6pË¡1¼Ž5MMöÊ–(ÔPQîE¡,£ñ´\ù¸ãÏ—+áñˬ\”9¯8 ,Ž.\ lè7QƒP q´,5}D4TÎ1]C¡ ú.âv}Ñ è‹O[ÙòÍ%`™j-ÕjqBk{Î<Ôjq…,gǯ÷Ž¡Å_Š7e 8߯$‘ÕвŸ‚*6RœÊu}øýœØ†-*”5T©su¢¡%ka<Ñ,œKVASǧU œè£Aª3¢P€:þžÝîºCè‡ÅÝÕ2ÂA.óTh8AQ0~õÞð¾ÆF…ù|ão'1î9n1¼î#oÂÌ ‰²ZPÌ‘ÈÃÆ“P²§×ÿüÆÙÓ\/ÏÖê1.¸€&H{¥š¬‹(u<×­ZüЂÌg•×8ÎÂ>𠙿h™åY0;Ÿ?diŸA¦%š>^nØA`G½Wi?Ù Rz_i„[«Û·ôtÖ~Ð?;ÖŠy–¬Ú·‚çÿþ€·m-Æ ­tàS̳¨Œs©·ºQüv=Yh±Cã‹Ìl58.¬×©Ç+×ïÿFô·R3š£–úŽðB«Å})ø oâ*´fÂwï±aD¼¾_ò®u-µMÛ7~Úc)ZÛq]ÍÆ–xðé°ØÞp»ôÍ?¾Y‹ùêg‰Ùx©?šIW>í%¨€({„æÃ(–ègmí:¯Ÿ{bX‚ë`vZχØÚ½v×W™NòøÇ¨«ø}®;ñ[ÕR¨ÿã©HŒßÅБ)+Ð}l ±ëă™ð] ê.’\Ùâ /t5 r‘¥adצ^É­ã©íå®\²ŠeÍ[`(z¾"R ë/Õƒ€þ\²*Z—]Þë~ Í»Æ~n4kß…JñÆ‚×Ú–Ý-ÓX]„,³±îòg|*|b \A/;rÉ›€áõÑíx>y/ºŽS(3¨Ž ªTâRf‘Ïü÷Ûà| éyŠ“X·8²ø>>ˆAU·/5t¹ÇiŸg?ÝF?ÎútqÕ'§ªÇY-hWeFøÐÔ\¾Ø:&^¤Q÷ÏA·qgŽœ¬÷Á<=;ŸT=‰õtÕ—(&ÐâˆVb šV-ÁhìòÁ×Ohu·@Ã/N´ iHJÆëZ¹üYxÃÅSÀPÎÀ¹BÞ†VDï Éž=Ej¹8^µS±-[\CÐà ë¼ü…%J}ËÏÃ^)¿ÉAþÕcµë|6*I­»î&•­zq=£__ß1Æ8e]ИÂóÀ|©rû4Ûàc¡BAky…*$²5Zú@ÌAxC›®n„óÜœLë<1IÊPúŒóÂ)!rp ã•/â\"ö¢¿$ªƒ[œÑò݈1Ê(0ÔlÔmǼÛÈZs–}'šµJÈÃuðeZÿÆçu}R‘eÇùEŸéw|o») ¼Å±ÉÁ%¯€ª¹íW.ypï¼ÝóÆçÈ1‚u˜Ë"¤ü@M)ðMnŽLÌ4Î;kô¹˜?ØOÊ `Ü1ma©]!ëg[[?­ eŸK âºæH‹×ÎÅu<ÿnkuPÞVà%zõ¯º¬5…Ç! ×ÍÛMG±.0½ÖRæ`»!ðŪ…ŒÞ‡U?¬ƒ´Y;˜•°~ñ¥ó¦Ò‘CKÑ^W“±Þþ*ö¤š"0¢Ž¸¯ªVQñè#­ŠÅº/ŽÔU(Kh@ˆ^ÿ ÎËŠ‹áA«üÍ@«ðóÇ‹ë‚ýÃÝ6̨6{8+ãðm`} 6ÆÌUtVß퀰9tªÏw&êöÎ|ìÃøsLPi p¶’Iï­F!ó]Ûñ8´ñì?˜€4œº¹š¬ ,ùüóë~“£Y1ˆ¬‚ÂÇ”ÖuŽ(3!äßóÒp±%b>êìŽ÷E8´-ãŽëºÚJýî Ö)4@?ènŒ†§°ÿB??36ø3棋íZŽ`>oq\Áñ‘V¶ù¤nœ ÍÑûq~Bd/`>”úè7KÌÔEK£V&ØG3R§c¿© Œw_>hÇz9bý2‰þêë4ž©<&UܰŸ·kž‘ûö¢¾Æ÷rÞ§>MÊÏÜöõü3¶“mÈ[Ñ´rÞ};ìëéïlÄqb¹~0§y -„÷:±S&ç'ƒÏ’ƒÐ?ÊRÂõö3CÀ3ã–ú³×¼Y¨Oxô| lÏy×qB5ù|ׯÇ'JzÆó²œ]M)ÖÅÆçyë¼>ýn¹û9š‰EÑ1ÌK1¥ïáºY|yñͺΰ_EZ-Àœ\ùd4—Ò™“¡ ûåkØï éɤהy­H}GľSô]Gse‹Z¼²?/ê7Žæ2º­ïa|?ð<ö½ÍhýäÍÔ{œ~-òï]ýL™úf›}X÷-O¼oÅóìþ^?²ÎÇCuuËë¼¹ØT»d‚}>ãnM#3K`Ø¿žN¾BµiBúõD¥ðþóôÞ%qo,Ñ·éöõýÁ|Æ©³€u‰&CÆ-õ!°¯šÆ ´ø£…ìöƒˆ2…ÖZ U³4O€¼á±û_ïí–¶OçXµß¶ËªÅHAqrÕ§"é'¸+[|Pg¹ŽzÁQôx|ž˜°bßFóQôД`ü`oYW¶,ãøk/9õ¶ÙÛÄê†õMÔ#·Èµ ¹Þs&2x-€÷5h¦Ûîî=ì«ÎnáÓ ½pˆî)Ë6X_ç?·OáqÞJ'yÌì±GU¢x¿€ˆž¬Ã[l /¢]Á|#Èѽ6‚ã5!Þ¡‰ó)ò¹½6¸Å3HÊ{m1ðÙÌ ®ûoá$õ|ÓÐ Ã<„‹Ï GRæÑªY{Wv‹ê6yJ‰Âq®b›2 ®°añ&¼Ïþã>Ÿšò7ψöÇŽ9»ØÛûü± åŸÛü³®oþÿ0àÿ¾Qÿ»ýÙhþk´ÿnÿnÿnÿnjtÿnÿ©Fÿ·¿úR€ÞÃé„“Çïß Ðÿé,•ýŸ¨Ôþ|¤þû‘ËŸ?uùó§.ê„ÌcÐ;Û;xûþcÏÂåïàtÌÑÉ+ÀÉoãï=ÿÊŒüw¼¨þ“Ÿÿ«}ÿ'Çüg_Tçøò¾oÿèüþ«ûý3ýÿ£>ÿJ\7áÿêçïšæü?Ûç/ûþíü¨þæ÷?ûúg®ÿÏâø_Áãÿ)ïÿÈüÿÑñþ‘šýËøÿwàü_}ý#õñ÷°ý_…¡ÿª×¿ZOÿ™yüßÈÍ¿ZëÑþCÀ@v²p9òZ WlŽ;ýîþøÏ ,æ7·´DwÎA‹«f›A´›ûFãÓ1 ®Ÿ¤rº7kU,¬Ö-ê²§M çÜÉ8~]Ä寗7üúìuE[ Õvš ßQtÄÅü5¯ìgE©ÛÏ^ºÀHºË™¸V ¿¾ñõV¥mË¿µ¨ƒdíÆô[- ñò ÿñîChôbN£a#¯ðõÍotpiù5‘Îcà±~E_"oåÂb⟀øè‹óW~‚htÔkÓÛ\@¿/ÊÛLê:È=>»Î•€äÒ‡“,f® [ýó½@/Zü0½ñ1ùH_õ~çï ™ƒáy;£sôÕèk<$ɹIJÎÃãA@Jè]Š¹Ö b%ko̲2@6+ËJ’Ùu…-x|¢ª‘„9Úê ·Aú»î ò zx ¦ùAäÜH5å¤6È?ÝVKoó„Ôvv›ŸÖ}c;ÒÏ¢©©ˆÏ_ËÑü[ÙXíÑMÀ5ìŸÝÞbTÎyÊtî ù­I“˜‘]_*ƒ¢^@ï¡ÏóüõÉw®ï@Ø=L×(¸¿ˆR(  +ÃZÙ l}nçù"b@$—›ÒL|2?ŒÚW+ÐìÆ¶æÅœ2PßX`$<{õkRäùuôG?žÇ•=£“ýÀ=·­{mÓ6 áNz; %zöŠC ÿ^YÒý–ðÜ¥¤-_²ÛâO8AéQñ\¡Z®ÜÒzêfÒ†­ßÙ³€Ä,åÍ‘Àp,׆‰2ù5—®‹G#Êf*mæé⥷îE¡U ½r³kÀjËM,Î*Ÿþù- Àr³=\¯óÛu†³¬Á$Õ³:uì΂‚ªWÁáG »¯bXž-dßy<*ŸÝÂZlÏW²E@ ¶Ôk®¾ä¶Ë—ÉD\ ž 6ï@2¥R¯gGðn°sùÒ¨òqô] \2CÊë͉I“×\)úÈâfßãhmÁðõã ’‘Øf³k¯l¸·´âW=ìÂl+ÝûÚ`û~ s ¶ɶït/ô@eç—€†``ñÊç«V†culuü@†œ”j¬Aæe,ÓË'7@ºR§£Ñyý¼D¼+6)¬´—¸zŸ#…¯“^yÈ> õwk¡Ry†òñ Ðjq-ÛĵU3‰ rÏžG#PcOíÖê!—”¸Z-œ÷>ª¼zz Z;€µ¯™eíȄߒ«¥Õjº†dºŠ@àºïœÒÍ0BV½Z§ìÑû°½I†Š×@(U|4‰ ”§sÎÐXþû– 74©,*šÛz¤ ½Øâ¼/Ý^¼ÊâÁÝ« ß²A)r/3Ûå¸Êp¹èÛÜËÞì)ÆRk]ïA`ߣ¶¨[L ¹½áÑ7—@%™:\ LI†Ô­Ë¥ E}hÇ\B7ˆŠ’iVB@覥‰ÎÀc4 bÈV˜ÇTQ9B»»åѽtt`—ˆŠ¼½³+8 ‚Ïîd’±HCêR2𠩉SÔƒ .ÖY ×Ú@šzÛÁ( Ç{?ÿU ³Û¹Év3æÕ߀š~Ï~&Ÿë |ùÅäÓ¦hÎ1=“¸Áèï ÔÒl;T1}Ô 5¨Ê‡§ÚÇ–ÏYÞôá áÁ›#ô€X¾ÜŸPh7Qš‘' è*7NQÆÑgCS<ûDµBÝÕj`ÙtoǽÎàÍ3:ßtCÞ^ßö‰þë@9 m[¾¿ÿÇ*k=h¡û¸$3g©âJå9sÈñ…oQýXÝ÷X]—„¿„‹©vžý©~àQg"Sä$Ab~Î÷(Hóu«¹‰ºDÙ(O ]³¾ÅD)« ÿßÔ—€Ëk¯Cºk7lPc~å…Üã×_"­)À|©¬3Ù°HDêo^½ @µ5X G|lÿéá°mÍVKÆœry ´ÆŸûÎ}E äï‡þbZëYÆçÛ7 ~-®ÁWf˜oø÷×˃ąªÂùÙf Eí‘·û¤ïãî;Ï·QÑa¸E œŽËGR¥CA\Ù¹U  Léu^Üág]Ó À:/¹°µúÐO7žlÙÒô~–?o„‚ЇVN"P‹Únu{Ô~11 GjJÏžm Æ ™úØÓ[Ïí|ôõõÝAêöÒ]J‚'ÖÉF%@b®Âüz=p>Úêe¤xθ´@¢È fÓË#À;–Ps8 |ÊRS»= ,A‡jWN`>H84«$ ,×Þ²ªð¸õúåiu.°Ä¬$$ò,xvQ€ã"o~;Ûcà´Ž÷ñ‰ù+—μ¿ÂØ• —¹AÌùü¤FÌ™¿å»6 ’¤Õ7mqÝN8®ë aGÅ勞 Ñô=h‰¦x-Câªs~€D…x7<“Öbv@bÍÞ+îÿl:! ’«jTÆÆÀ¯™fûÚK6È•å¶H2GDx÷W2MoneØÑö$ ·°›òŸFâÀÛo7@¡â^âÀyôó •…T·ˆ¿\078·ˆËi†v]@ʸ¸%“9èËI+=§:Mþ K-ðf¾¸• ôï7!Õ$´öŠþžrÓoÀ»œ¦_,êü=™¶ä4G)c¨w©ºÆ²Y…àM°Á½ÅàÝ ²¥êÉ5FÓ|,­š¦@R„êµÃ\ Å£82á¢⇂°,Q~9Ξ!„¼Øó‹AJíh°šÕHi¸^«šÚ ,î¦ÚºôúŒz‘‰;ÑŠÔÈÝô)`¶,­ÿn¾xó íVf¸>e]zîïMæ‚Ó—rîóGÒ†G²èójVÉæùQà¬Ô )ÚT ü<J5ú€5ð}omÔúåP+¹ÛR Øc‹2áÕ›]˜þ¦g5©¨}7èV–—[µrAT³j^°¼øã”«ÆfskK…ÉõÕsÀ­¶˜µçÆu R k‚@Œ‰Ì™‰.Ô ã<ðŽ-ì=½¸Ë\8/6P„ãŽÕè`£6ÝVàÚ¨Û4CPC¿²;¿ŽÑîè}o‚ä"£ÉHHR' 3¹—@²>ûcÚ\pϦÔØ:€`ú3Ÿ ¼}Às¡&m% ý .5,c('yhat','lower','upper')] <- NA # or p[with(p, x2=='b' & x1 > .5), 3:5] <- NA plot(p) p <- Predict(f, x1, x2, np=10) plot(p, subset=x2=='a' | (x2=='b' & x1 < .5)) ggplot(Predict(f, name='x1'), anova=anova(f), pval=TRUE) rms/inst/tests/plotly-Predict.r0000644000176200001440000001332113030577463016300 0ustar liggesusersrequire(rms) require(plotly) n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) sex <- factor(sample(c('female','male'), n, TRUE)) country <- factor(sample(c('US', 'Canada'), n, TRUE)) i <- sex == 'female' cholesterol <- numeric(n) cholesterol[i] <- rnorm(sum(i), 170, 15) cholesterol[! i] <- rnorm(sum(! i), 200, 25) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) cholesterol[1:3] <- NA # 3 missings, at random d <- data.frame(y, blood.pressure, age, cholesterol, sex, country) rm(y, blood.pressure, age, cholesterol, sex, country) dd <- datadist(d); options(datadist='dd') f <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)) + country, data=d) p <- Predict(f, cholesterol, sex) source('~/R/Hmisc/R/histSpikeg.s') source('~/R/rms/R/plotp.Predict.s') source('~/R/Hmisc/R/scat1d.s') # plotp(p, rdata=d, ylim=c(-1,2)) i <- attr(p, 'info') cllab <- if(i$conf.int) paste0(i$conf.int, ' C.L.') class(p) <- setdiff(class(p), 'Predict') fm <- function(x) format(x, digits=4) # pm <- subset(p, sex == 'male') a <- i$Design bpl <- labelPlotmath(a$label['blood.pressure'], a$units['blood.pressure'], html=TRUE) chl <- labelPlotmath(a$label['cholesterol'], a$units['cholesterol'], html=TRUE) agl <- labelPlotmath(a$label['age'], a$units['age'], html=TRUE) a <- plot_ly() ht <- with(p, paste0('cholesterol=', fm(cholesterol), '
', fm(yhat), ' [', fm(lower), ',', fm(upper), ']')) j <- which(p$cholesterol == min(p$cholesterol)) ht[j] <- paste0(ht[j], '
Adjusted to:
', i$adjust[1]) a <- add_lines(a, data=p, x=~cholesterol, y=~yhat, color=~sex, text=~ht, hoverinfo='text') a <- add_ribbons(a, data=p, x=~cholesterol, ymin=~lower, ymax=~upper, color=~sex, hoverinfo='none') source('~/R/Hmisc/R/histSpikeg.s') a <- histSpikeg(yhat ~ cholesterol + sex, predictions=p, data=d, plotly=a, ylim=c(-1, 2)) layout(a, xaxis=list(title=chl), yaxis=list(title=i$ylabhtml, range=c(-1, 2))) p <- Predict(f) # w <- plotp(p, rdata=d) # w$Continuous # w$Categorical i <- attr(p, 'info') ylim <- range(c(p$lower, p$upper, p$yhat), na.rm=TRUE) p <- subset(p, .predictor. %nin% c('sex', 'country')) class(p) <- 'data.frame' r <- subset(p, .predictor. == 'age') r$ht <- with(r, paste0('age=', fm(age), '
', fm(yhat), ' [', fm(lower), ',', fm(upper), ']')) r$ht[1] <- paste0(r$ht[1], '
Adjusted to:
', i$adjust[3]) a <- plot_ly(r) a <- add_lines(a, x=~age, y=~yhat, text=~ht, color=I('black'), hoverinfo='text', name='yhat', legendgroup='yhat') a <- add_ribbons(a, x=~age, ymin=~lower, ymax=~upper, color=I('lightgray'), hoverinfo='none', name=cllab, legendgroup=cllab) source('~/R/Hmisc/R/histSpikeg.s') a <- histSpikeg(yhat ~ age, data=d, predictions=r, ylim=ylim, plotly=a) #aa <- histSpikep(a, x=d$age, y=approx(r$age, r$yhat, xout=d$age)$y, z=1) ex <- function(x, delta=0) { r <- range(x, na.rm=TRUE) if(delta == 0) return(r) c(r[1] - delta * diff(r), r[2] + delta * diff(r)) } a <- plotly::layout(a, xaxis=list(title=agl, range=ex(d$age))) r <- subset(p, .predictor. == 'cholesterol') r$ht <- with(r, paste0('cholesterol=', fm(cholesterol), '
', fm(yhat), ' [', fm(lower), ',', fm(upper), ']')) r$ht[1] <- paste0(r$ht[1], '
Adjusted to:
', i$adjust[4]) b <- plot_ly(r) b <- add_lines(b, x=~cholesterol, y=~yhat, text=~ht, color=I('black'), hoverinfo='text', name='yhat', showlegend=FALSE, legendgroup='yhat') b <- add_ribbons(b, x=~cholesterol, ymin=~lower, ymax=~upper, color=I('lightgray'), hoverinfo='none', name=cllab, showlegend=FALSE, legendgroup=cllab) b <- histSpikeg(yhat ~ cholesterol, data=d, predictions=r, ylim=ylim, plotly=b, showlegend=FALSE) b <- layout(b, xaxis=list(title='cholesterol', range=ex(d$cholesterol))) plotly::subplot(a, b, nrows=1, shareY=TRUE, titleX=TRUE) p <- Predict(f) r <- subset(p, .predictor. == 'sex') a <- plot_ly(r, color=I('black'), height=plotlyParm$heightDotchart(2)) a <- add_segments(a, y=~sex, x=~lower, yend=~sex, xend=~upper, color=I('lightgray'), name=cllab, legendgroup=cllab) a <- add_markers(a, y=~sex, x=~yhat, name='Estimate', legendgroup='Estimate') #lm <- plotlyParm$lrmargin('female') a <- layout(a, xaxis=list(title=i$ylabhtml), yaxis=list(title='Sex', titlefont=list(size=10))) r <- subset(p, .predictor. == 'country') b <- plot_ly(r, color=I('black'), height=plotlyParm$heightDotchart(2)) b <- add_segments(b, y=~country, x=~lower, yend=~country, xend=~upper, color=I('lightgray'), name=cllab, legendgroup=cllab, showlegend=FALSE) b <- add_markers(b, y=~country, x=~yhat, name='Estimate', legendgroup='Estimate', showlegend=FALSE) #lm <- plotlyParm$lrmargin('Canada') b <- layout(b, xaxis=list(title=i$ylabhtml), yaxis=list(title='Country', titlefont=list(size=10))) plotly::subplot(a, b, shareX=TRUE, titleY=TRUE, nrows=2, heights=c(2, 2) / sum(c(2, 2))) p <- Predict(f, sex) class(p) <- setdiff(class(p), 'Predict') p <- Predict(f, age, sex, blood.pressure=c(120,140,160), cholesterol=c(180,200,215)) rms/inst/tests/validate.ols.r0000644000176200001440000000114112365513274015747 0ustar liggesusers## From Shane McIntosh library(rms) # Prep data Load(qt50) dd <- datadist(qt50); options(datadist = "dd") with(qt50, table(round(entropy, 5))) # Find smallest model that fails f <- ols(log1p(post_bugs) ~ entropy, data=qt50, x=T, y=T) X <- f$x n <- nrow(X) y <- f$y set.seed(1) # Wild estimates on 22nd resample for(i in 1 : 22) { j <- sample(1 : n, replace=TRUE) g <- lm.fit.qr.bare(X[j,], y[j]) print(coef(g)) } plot(X[j,], y[j], xlim=range(qt50$entropy)) abline(coef(g)) with(qt50, scat1d(entropy)) # Only 1 value of entropy > .0002, this bootstrap sample had none set.seed(1) validate(f, B=100) rms/inst/tests/ggplotly.Rmd0000644000176200001440000000301712745711270015506 0ustar liggesusers--- title: "R Notebook" output: html_notebook --- ```{r} require(rms) require(plotly) set.seed(1) # so can reproduce x1 <- runif(100) x2 <- runif(100) x3 <- sample(c('a','b'), 100, TRUE) x4 <- sample(c('k','l','m'), 100, TRUE) y <- runif(100) dd <- datadist(x1, x2, x3, x4); options(datadist='dd') f <- ols(y ~ x1 + x2 + x3 + x4) ``` # Using `ggplotly` on `ggplot.Predict` object This method works fine. ```{r} options(grType=NULL) g <- ggplot(Predict(f,x1)) ggplotly(g) ``` ```{r} z <- 900 g <- ggplot(Predict(f,x1)) ggplotly(g, height=z, width=z) ``` This ```{r} ggplot(Predict(f,x1)) ``` That # Using `plotly_build` on `ggplot.Predict` object You see that when a plot is large, it covers the next output area. ```{r} b <- function(g, h=NULL, w=NULL) { a <- plotly_build(g) if(length(h)) { a$layout$height <- h a$layout$width <- w } a } g <- ggplot(Predict(f,x1)) b(g) ``` ```{r} z <- 900 g <- ggplot(Predict(f,x1)) b(g, h=z, w=z) ``` This ```{r} ggplot(Predict(f,x1)) ``` That # Using `ggplotly` within `ggplot.Predict` This worked fine. ```{r} options(grType='plotly') ggplot(Predict(f,x1)) ``` ```{r} z <- 1200 ggplot(Predict(f,x1), height=z, width=z) ``` This ```{r} ggplot(Predict(f,x1)) ``` That # Repeat for more complex ggplot output 1200 at 72 dpi is 16.67 inches. Tex and histogram were cut off if figure size was omitted from chunk header. ```{r tt,fig.width=17,fig.height=17} ggplot(Predict(f), sepdiscrete='vertical', height=z, width=z) ``` And here is ... More stuff .... ```{r} hist(rnorm(1000)) ``` rms/inst/tests/scale.r0000644000176200001440000000103512375214712014447 0ustar liggesusers## Test center option for lrm require(rms) x <- rnorm(30) y <- sample(0:4, 30, TRUE) f <- lrm(y ~ pol(x, 2), scale=FALSE) g <- lrm(y ~ pol(x, 2), scale=TRUE) coef(f) - coef(g) d <- vcov(f) - vcov(g) d max(abs(d)) f <- orm(y ~ pol(x, 2), scale=FALSE) g <- orm(y ~ pol(x, 2), scale=TRUE) coef(f) coef(g) f$var / g$var vcov(f)/vcov(g) vcov(f, regcoef.only=FALSE)/vcov(g, regcoef.only=FALSE) vcov(f, intercepts='all') / vcov(g, intercepts='all') vcov(f, intercepts=1)/vcov(g, intercepts=1) vcov(f, intercepts=c(1,3))/vcov(g, intercepts=c(1,3)) rms/inst/tests/orm2.s0000644000176200001440000000127012210164407014232 0ustar liggesusers## Compare log-log ordinal model fit with continuation ratio model ## See Biometrika 72:206-7, 1985 set.seed(171) type <- 1 n <- 300 y <- sample(0:50, n, rep=TRUE) sex <- factor(sample(c("f","m"), n, rep=TRUE)) age <- runif(n, 20, 50) sexo <- sex; ageo <- age require(rms) f <- orm(y ~ age + sex, family=loglog) g <- orm(y ~ age + sex, family=cloglog) h <- orm(-y ~ age + sex, family=loglog) i <- orm(-y ~ age + sex, family=cloglog) p <- function(fit) coef(fit)[c('age','sex=m')] p(f); p(g); p(h); p(i) for(type in 1:2) { u <- cr.setup(if(type==1) y else -y) Y <- u$y cohort <- u$cohort s <- u$subs sex <- sexo[s] age <- ageo[s] j <- lrm(Y ~ cohort + age + sex) print(p(j)) } rms/inst/tests/ggplot2-without-ggplot.Predict.r0000644000176200001440000001614312677767352021352 0ustar liggesusersrequire(rms) require(ggplot2) n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) sex <- factor(sample(c('female','male'), n, TRUE)) i <- sex == 'female' cholesterol <- numeric(n) cholesterol[i] <- rnorm(sum(i), 170, 15) cholesterol[! i] <- rnorm(sum(! i), 200, 25) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) cholesterol[1:3] <- NA # 3 missings, at random d <- data.frame(y, blood.pressure, age, cholesterol, sex) rm(y, blood.pressure, age, cholesterol, sex) dd <- datadist(d); options(datadist='dd') f <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), data=d) p <- Predict(f, cholesterol) class(p) <- setdiff(class(p), 'Predict') a <- attributes(p)$info$Design g <- ggplot(p, aes(x=cholesterol, y=yhat)) + geom_line() xl <- labelPlotmath(a$label['blood.pressure'], a$units['blood.pressure']) xl2 <- labelPlotmath(a$label['cholesterol'], a$units['cholesterol']) g <- g + xlab(xl) g <- g + geom_ribbon(data=p, aes(ymin=lower, ymax=upper), alpha=0.2, linetype=0) g g + histSpikeg(yhat ~ cholesterol, p, d, ylim=c(-1, 1.25)) g + histSpikeg(yhat ~ cholesterol, data=d, ylim=c(-1, 1.25)) g + histSpikeg(yhat ~ cholesterol, data=d, ylim=c(-1, 1.25), side=3) p <- Predict(f, cholesterol, sex) class(p) <- setdiff(class(p), 'Predict') g <- ggplot(p, aes(x=cholesterol, y=yhat, color=sex)) + geom_line() + xlab(xl2) + ylim(-1, 1) # show.legend=FALSE gets rid of slash in legend boxes # See http://stackoverflow.com/questions/10660775/ggplot-legend-slashes g <- g + geom_ribbon(data=p, aes(ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) g g + histSpikeg(yhat ~ cholesterol + sex, p, d, ylim=c(-1, 1.25)) p <- Predict(f, sex) class(p) <- setdiff(class(p), 'Predict') ggplot(p, aes(x=sex, y=yhat)) + coord_flip() + geom_point() + geom_errorbar(aes(ymin=lower, ymax=upper), width=0) p <- Predict(f) a <- attributes(p)$info yl <- a$ylabPlotmath xlabs <- a$Design$label unts <- a$Design$units ylim <- range(pretty( if(TRUE) c(p$yhat, p$lower, p$upper) else p$yhat), na.rm=TRUE) grid::grid.newpage() grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 2))) nr <- 1; nc <- 0 for(w in unique(p$.predictor.)) { nc <- nc + 1 if(nc > 2) {nr <- nr + 1; nc <- 1} i <- p$.predictor. == w z <- p[i, w] yhat <- p[i, 'yhat'] l <- levels(z) ll <- length(l) xl <- labelPlotmath(xlabs[w], unts[w]) zz <- data.frame(z, yhat) g <- ggplot(zz, aes(x=z, y=yhat)) + ylim(ylim) + theme(plot.margin = unit(rep(.2, 4), 'cm')) g <- g + if(ll) geom_point() else geom_line() g <- g + xlab(xl) + ylab(yl) g <- g + if(ll) geom_errorbar(data=p[i,], aes(ymin=lower, ymax=upper), width=0) else geom_ribbon(data=p[i,], aes(ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) print(g, vp = grid::viewport(layout.pos.row = nr, layout.pos.col = nc)) } # Change y scale to be uniform # try to narrow gaps p <- Predict(f, age, sex, blood.pressure=c(120,140,160), cholesterol=c(180,200,215)) class(p) <- setdiff(class(p), 'Predict') g <- ggplot(p, aes(x=age, y=yhat, color=sex)) + geom_line() g <- g + geom_ribbon(data=p, aes(ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) g + facet_grid(blood.pressure ~ cholesterol) g + facet_grid(cholesterol ~ blood.pressure) eval(parse(text='g + facet_grid(cholesterol ~ blood.pressure)')) # attr(p, 'info')$varying shows 4 predictors varying in order: age bp ch sex g <- ggplot(p, aes(x=age, y=yhat)) + geom_line() g <- g + geom_ribbon(data=p, aes(ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) g + facet_grid(blood.pressure ~ cholesterol*sex) g + facet_grid(cholesterol*sex ~ blood.pressure) # Add superposition g <- ggplot(p, aes(x=age, y=yhat, color=blood.pressure)) + geom_line() g <- g + geom_ribbon(data=p, aes(ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) g=g + facet_grid(sex ~ blood.pressure) if(FALSE) { # doesn't work - where is .predictor.? p <- as.data.frame(p) g <- ggplot(p, aes(y=yhat)) + facet_wrap(~ .predictor., scales='free_x') + xlab(NULL) require(plyr) pa <- subset(p, .predictor. == 'age') pc <- subset(p, .predictor. == 'cholesterol') g <- g + geom_line(subset=.(.predictor.=='age'), aes(x=age)) + geom_ribbon(subset=.(.predictor.=='age'), aes(x=age, ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) + geom_line(subset=.(.predictor.=='cholesterol'), aes(x=cholesterol)) + geom_ribbon(subset=.(.predictor.=='cholesterol'), aes(x=cholesterol, ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) g g + geom_point(subset=.(.predictor.=='sex'), aes(x=as.numeric(sex))) + geom_errorbar(subset=.(.predictor.=='sex'), aes(x=as.numeric(sex), ymin=lower, ymax=upper), width=0) ## Will not work: ## g + geom_point(subset=.(.predictor.=='sex'), aes(x=sex)) + ## geom_errorbar(subset=.(.predictor.=='sex'), ## aes(x=sex, ymin=lower, ymax=upper), width=0) ## Error: Discrete value supplied to continuous scale xx <- NULL pred <- p$.predictor. for(i in unique(pred)) xx <- c(xx, p[pred == i, i]) p$xx <- xx z <- ggplot(p, aes(x=xx, y=yhat)) + facet_wrap(~ .predictor., scales='free_x') + xlab(NULL) + geom_line() + geom_ribbon(aes(x=xx, ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) z } ## From http://stackoverflow.com/questions/11979017/changing-facet-label-to-math-formula-in-ggplot2 facet_wrap_labeller <- function(gg.plot, labels=NULL) { require(gridExtra) g <- ggplotGrob(gg.plot) gg <- g$grobs strips <- grep("strip_t", names(gg)) for(ii in seq_along(labels)) { modgrob <- getGrob(gg[[strips[ii]]], "strip.text", grep=TRUE, global=TRUE) gg[[strips[ii]]]$children[[modgrob$name]] <- editGrob(modgrob,label=labels[ii]) } g$grobs <- gg class(g) = c("arrange", "ggplot", class(g)) g } if(FALSE) { pold <- p p$.predictor. <- factor(p$.predictor., names(a$label)) pmlabels <- vector('expression', length(a$label)) names(pmlabels) <- levels(p$.predictor.) for(v in names(a$label)) pmlabels[v] <- labelPlotmath(a$label[v], a$units[v]) ## Re-order panels by original model specification z <- ggplot(p, aes(x=xx, y=yhat)) + facet_wrap(~ .predictor., scales='free_x', ncol=3) + xlab(NULL) + geom_line() + geom_ribbon(aes(x=xx, ymin=lower, ymax=upper), alpha=0.2, linetype=0, show.legend=FALSE) facet_wrap_labeller(z, pmlabels) } rms/inst/tests/survfit.cph.s0000644000176200001440000001546312700015445015640 0ustar liggesusers## Compare SE of log survival probability from survest and survfit require(rms) set.seed(123) n <- 200 age <- rnorm(n, 50, 10) x <- 50*runif(n) ct <- round(365*runif(n)) h <- .003*exp(.005*age+0.008*x) ft <- round(-log(runif(n))/h) status <- ifelse(ft <= ct,1,0) ft <- pmin(ft, ct) S <- Surv(ft, status) fit <- cph(S ~ age + x, x=TRUE, y=TRUE) d <- data.frame(age=mean(age), x=mean(x)) s <- survest(fit, d, times=56) prn(with(s, cbind(time, surv, std.err, lower, upper)), 'survest') s <- survfit(fit, d) k <- which(s$time == 56) prn(with(s, cbind(time, surv, std.err, lower, upper)[k,]), 'survfit') fit <- cph(S ~ age + x, x=TRUE, y=TRUE, surv=TRUE, time.inc=56) k <- which(fit$time == 56) prn(fit$surv.summary, 'cph surv=T') s <- survest(fit, d, times=56) prn(with(s, cbind(time, surv, std.err, lower, upper)), 'survest from cph surv=T') s <- survfit(fit, d) k <- which(s$time == 56) prn(with(s, cbind(time, surv, std.err, lower, upper)[k,]), 'survfit from cph surv=T') survest(fit, data.frame(age=40, x=25), times=56) pp <- rms:::survfit.cph(fit, data.frame(age=40, x=25),se.fit=TRUE) cbind(pp$std.err, pp$lower,pp$upper)[pp$time==56] ##-------------------------------------------------------------- require(survival) plots <- TRUE topdf <- TRUE testrms <- FALSE testDesign <- FALSE additive <- FALSE roundt <- FALSE chkpts <- FALSE ## Simulate a small example to compare results with survival package nfemale <- 100 nmale <- 9*nfemale n <- nfemale + nmale set.seed(1) age <- 50 + 12*rnorm(n) sex <- as.factor(c(rep('Male', nmale), rep('Female', nfemale))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+1.5*(sex=='Female')) dt <- -log(runif(n))/h e <- ifelse(dt <= cens, 1, 0) dt <- if(roundt) round(pmin(dt, cens)) else pmin(dt, cens) dtmax <- tapply(dt, sex, max) Srv <- Surv(dt, e) f <- coxph(if(additive) Srv ~ age + strata(sex) else Srv ~ age*strata(sex)) levels(sex) new <- expand.grid(age=50, sex=levels(sex)) new1 <- new[1,] new2 <- new[2,] if(plots) { if(topdf) pdf('/tmp/z.pdf') gr <- function(col=gray(.9), lwd=1) abline(h=seq(.2,.8,by=.2), col=col, lwd=lwd) par(mfrow=c(2,2)) s <- survfit(f, new1, censor=FALSE) plot(s, conf.int=TRUE, main=paste(new1$sex, 'coxph survfit newdata new1')) lines(s, col='red') gr() s <- survfit(f, new2, censor=FALSE) plot(s, conf.int=TRUE, main=paste(new2$sex, 'coxph survfit newdata new2')) lines(s, col='red') gr() } s <- survfit(f, new, censor=FALSE) plot(s, main='coxph combined newdata plot.survfit') gr() z <- with(s, data.frame(time, surv, std.err, lower, upper, se=std.err, strata=c(rep('Female', s$strata[1]), rep('Male', s$strata[2]))) ) z options(digits=3) if(FALSE && plots) { with(subset(z, strata=='Female'), { plot(c(time,dtmax['Female']), c(surv.1, min(surv.1)), type='s', col='red', xlim=c(0,15), ylim=c(0,1), xlab='', ylab='') if(chkpts) points(time, surv.1, pch='f', col='red') lines(time, lower.1, type='s', col='red') lines(time, upper.1, type='s', col='red') }) with(subset(z, strata=='Male'), { lines(c(time,dtmax['Male']), c(surv.2, min(surv.2)), type='s', col='green') if(chkpts) points(time, surv.2, pch='m', col='green') lines(time, lower.2, type='s', col='green') lines(time, upper.2, type='s', col='green') }) title('coxph combined newdata manual') gr() } if(testrms) { require(rms) system('cat ~/R/rms/pkg/R/*.s > /tmp/rms.s') source('/tmp/rms.s') dd <- datadist(age,sex); options(datadist='dd') Srv <- Surv(dt, e) g <- cph(if(additive) Srv ~ age + strat(sex) else Srv ~ age*strat(sex), surv=TRUE) for(sx in levels(sex)) { k <- survest(g, data.frame(age=50, sex=sx)) cat(sx, '\t', 'survest surv=TRUE\n') print(with(k, data.frame(time=time, surv=surv, std.err=std.err, lower=lower, upper=upper))) } if(plots) { survplot(g, sex, age=50, conf.int=TRUE) w <-survest(g, data.frame(age=50, sex='Female')) if(chkpts) points(w$time, w$surv, pch='f') w <- survest(g, data.frame(age=50, sex='Male')) if(chkpts) points(w$time, w$surv, pch='m') title('rms survplot + survest surv=T') gr() } h <- cph(if(additive) Srv ~ age + strat(sex) else Srv ~ age*strat(sex), x=TRUE, y=TRUE) s <- survfit(h, new) unclass(s) st <- rep(names(s$strata), s$strata) i <- 0 for(sx in levels(sex)) { i <- i + 1 cat(sx, '\t', 'survfit.cph surv=F\n') j <- st==paste('sex=', sx, sep='') z <- with(s, data.frame(time=time[j], surv=surv[j,i], std.err=std.err[j,i], lower=lower[j,i], upper=upper[j,i])) print(z) k <- survest(h, data.frame(age=50, sex=sx)) cat(sx, '\t', 'survest surv=F\n') print(with(k, data.frame(time=time, surv=surv, std.err=std.err, lower=lower, upper=upper))) } ## i <- s2$strata ## with(s2, data.frame(time=time[,i], surv=surv[,i], se=std.err[,i], ## lower=lower[,i], upper=upper[,i])) if(plots) { survplot(h, sex, age=50, conf.int=TRUE) w <- survest(h, data.frame(age=50, sex='Female')) if(chkpts) points(w$time, w$surv, pch='f') w <- survest(h, data.frame(age=50, sex='Male')) if(chkpts) points(w$time, w$surv, pch='m') title('rms survplot + survest x=T y=T') gr() } } if(testDesign) { ## To compare with Design require(Design) Srv <- Surv(dt, e) new <- expand.grid(age=50, sex=levels(sex)) dd <- datadist(age,sex); options(datadist='dd') g <- cph(if(additive) Srv ~ age + strat(sex) else Srv ~ age*strat(sex), surv=TRUE) if(plots) { survplot(g, sex=NA, age=50, conf.int=TRUE, conf='bands') gr() title('Design survplot surv=T') } options(digits=3) for(sx in levels(sex)) { k <- survest(g, data.frame(age=50, sex=sx)) cat(sx, '\t', 'survest Design surv=TRUE\n') print(with(k, data.frame(time=time, surv=surv, std.err=std.err, lower=lower, upper=upper))) } g <- cph(if(additive) Srv ~ age + strat(sex) else Srv ~ age*strat(sex), x=TRUE, y=TRUE) cat('cph x=T y=T survfit\n') print(unclass(survfit(g, new, conf.type='log'))) if(plots) { survplot(g, sex=NA, age=50, conf.int=TRUE, conf='bands') title('Design survplot x=T y=T') gr() } } if(topdf) dev.off() rms/inst/tests/lrm.ordinal.s0000644000176200001440000000110111651566431015600 0ustar liggesusersrequire(rms) set.seed(1) n <- 20 y <- sample(1:4, n, replace=TRUE) x1 <- runif(n) x2 <- runif(n) d <- data.frame(x1, x2) f <- lrm(y ~ x1 + x2) s <- 1:4 f$linear.predictors[s] options(digits=3) predict(f)[s] # kint=1 predict(f, d)[s] # kint=1 xb <- as.vector(cbind(x1, x2) %*% as.matrix(coef(f)[c('x1','x2')])) coef(f)[1] + xb[s] # kint=1 predict(f, type='fitted.ind')[s,] # get Prob(Y=4) plogis(coef(f)[3] + xb)[s] g <- lrm(y ~ x1 + x2, x=TRUE, linear.predictors=FALSE) predict(g)[s] # correct; kint=1 predict(g, d)[s] # agrees (kint=1) predict(g, type='fitted.ind')[s,] rms/inst/tests/ols.r0000644000176200001440000000014713000010213014130 0ustar liggesusersrequire(rms) n = 800 x = rnorm(n) x[4:6] = NA y = x + rnorm(n) fit = ols(y ~ x) print(fit, latex=TRUE) rms/inst/tests/lrm.ols.penalty.r0000644000176200001440000000175612252367420016432 0ustar liggesusersrequire(rms) # Example of penalty from help page using lrm: n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) f <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) p <- pentrace(f, seq(.2,1,by=.05)) plot(p) p$diag # may learn something about fractional effective d.f. # for each original parameter update(f,penalty=.02) # Example modified for ols: fols <- ols(blood.pressure ~sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) pols <- pentrace(fols, seq(0,10,by=.5)) plot(pols) pols$diag update(fols,penalty=10) rms/inst/tests/cph5.r0000644000176200001440000000264213546131600014217 0ustar liggesusers## Check that the median is NA when there is lots of censoring require(rms) n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) dt <- -log(runif(n))/h label(dt) <- 'Follow-up Time' e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) units(dt) <- "Year" dd <- datadist(age, sex) options(datadist='dd') S <- Surv(dt,e) f <- cph(S ~ age + strat(sex), surv=TRUE) g <- cph(S ~ age + sex, surv=TRUE) h <- f # set to f for strat(sex), g for covariate adj for sex par(mfrow=c(2,2)) for(a in c(45, 55, 65, 75)) { survplot(h, sex, age=a) title(paste0('Age=', a)) abline(h=0.5, col=gray(.85)) } ggplot(Predict(h, age=20:80, sex, time=11)) ggplot(Predict(h, age=20:80, sex, time=12)) ggplot(Predict(h, age=20:80, sex, time=13)) ggplot(Predict(h, age=20:80, sex, time=14)) quan <- Quantile(h) med <- function(x) quan(lp=x, stratum=2) ages <- 70:80 Predict(h, age=ages, sex='Male')$yhat lp <- predict(h, data.frame(sex='Male', age=ages)) data.frame(age=ages, lp=lp, median=med(lp)) ## Estimate survival curves at these medians if(length(h$strata)) { times <- h$time[['sex=Male']] surv <- h$surv[['sex=Male']] } else { times <- h$time surv <- h$surv } for(l in lp) print(approx(times, surv ^ exp(l), xout=med(l))$y) p <- Predict(h, age=ages, fun=med) plot(p) rms/inst/tests/ggplot.Predict.Rd.timing.r0000644000176200001440000001125312677767420020120 0ustar liggesusersrequire(rms) tt <- function() { dp <- function(...) { pdf('/tmp/z.pdf') print(...) dev.off() } n <- 350 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) + .01 * (blood.pressure - 120) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <<- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) an <- anova(fit) # Plot effects in two vertical sub-panels with continuous predictors on top # ggplot(Predict(fit), sepdiscrete='vertical') # Plot effects of all 4 predictors with test statistics from anova, and P dp(ggplot(Predict(fit), anova=an, pval=TRUE)) # ggplot(Predict(fit), rdata=llist(blood.pressure, age)) # spike histogram plot for two of the predictors # p <- Predict(fit, name=c('age','cholesterol')) # Make 2 plots # ggplot(p) # p <- Predict(fit, age=seq(20,80,length=100), sex, conf.int=FALSE) # # Plot relationship between age and log # odds, separate curve for each sex, # ggplot(p, subset=sex=='female' | age > 30) # No confidence interval, suppress estimates for males <= 30 # p <- Predict(fit, age, sex) # ggplot(p, rdata=llist(age,sex)) # rdata= allows rug plots (1-dimensional scatterplots) # on each sex's curve, with sex- # specific density of age # If data were in data frame could have used that # p <- Predict(fit, age=seq(20,80,length=100), sex='male', fun=plogis) # works if datadist not used # ggplot(p, ylab=expression(hat(P))) # plot predicted probability in place of log odds # per <- function(x, y) x >= 30 # ggplot(p, perim=per) # suppress output for age < 30 but leave scale alone # Do ggplot2 faceting a few different ways p <- Predict(fit, age, sex, blood.pressure=c(120,140,160), cholesterol=c(180,200,215)) # ggplot(p) dp(ggplot(p, cholesterol ~ blood.pressure)) # ggplot(p, ~ cholesterol + blood.pressure) # color for sex, line type for blood.pressure: dp(ggplot(p, groups=c('sex', 'blood.pressure'))) # Add legend.position='top' to allow wider plot # Map blood.pressure to line thickness instead of line type: # ggplot(p, groups=c('sex', 'blood.pressure'), aestype=c('color', 'size')) # Plot the age effect as an odds ratio # comparing the age shown on the x-axis to age=30 years # ddist$limits$age[2] <- 30 # make 30 the reference value for age # Could also do: ddist$limits["Adjust to","age"] <- 30 # fit <- update(fit) # make new reference value take effect # p <- Predict(fit, age, ref.zero=TRUE, fun=exp) # ggplot(p, ylab='Age=x:Age=30 Odds Ratio', # addlayer=geom_hline(yintercept=1, col=gray(.8)) + # geom_vline(xintercept=30, col=gray(.8)) + # scale_y_continuous(trans='log', # breaks=c(.5, 1, 2, 4, 8)))) # Compute predictions for three predictors, with superpositioning or # conditioning on sex, combined into one graph p1 <- Predict(fit, age, sex) p2 <- Predict(fit, cholesterol, sex) p3 <- Predict(fit, blood.pressure, sex) p <- rbind(age=p1, cholesterol=p2, blood.pressure=p3) dp(ggplot(p, groups='sex', varypred=TRUE, adj.subtitle=FALSE)) # ggplot(p, groups='sex', varypred=TRUE, adj.subtitle=FALSE, sepdiscrete='vert') # Make an 'interaction plot', forcing the x-axis variable to be # plotted at integer values but labeled with category levels n <- 100 set.seed(1) gender <- c(rep('male', n), rep('female',n)) m <- sample(c('a','b'), 2*n, TRUE) d <<- datadist(gender, m); options(datadist='d') anxiety <- runif(2*n) + .2*(gender=='female') + .4*(gender=='female' & m=='b') tapply(anxiety, llist(gender,m), mean) f <- ols(anxiety ~ gender*m) p <- Predict(f, gender, m) # ggplot(p) # horizontal dot chart; usually preferred for categorical predictors # ggplot(p, flipxdiscrete=FALSE) # back to vertical dp(ggplot(p, groups='gender')) dp(ggplot(p, ~ m, groups=FALSE, flipxdiscrete=FALSE)) } print(system.time(tt())) rms/inst/tests/Gls.s0000644000176200001440000000027311747513145014115 0ustar liggesuserslibrary(rms) library(nlme) set.seed(1) d <- data.frame(x = rnorm(50), y = rnorm(50)) gls(y ~ x, data=d, correlation = corARMA(p=2)) Gls(y ~ x, data=d, correlation = corARMA(p=2), B=10) rms/inst/tests/survplot2.r0000644000176200001440000000336012712432415015340 0ustar liggesusers## From John.Stickley@bch.nhs.uk require(rms) test_data <- structure( list(group = structure( c(1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class = "factor"), survival = c(17.46772065, 98.92209528,78.29864249, 9.822413669, 79.55050186, 82.36595474, 1.42766417, 71.48805748, 61.33571345, 84.62631825, 93.03022837, 44.04354499, 81.06711649, 26.19891261, 68.64477557, 52.2160246, 17.780942, 4.515968877, 95.46066172, 73.63010059, 40.13833451, 20.39467002, 50.80529216, 70.23087236, 23.89309088, 53.86527662, 3.422234859, 35.30675488, 50.07307746, 4.68602929, 86.04636345, 72.98976535, 33.18048902, 37.94566436, 83.17678398, 16.95356411, 80.5844794, 8.599290846, 46.06581857, 1.644574571, 34.81582745, 49.96017595, 11.74200883, 60.07697075, 80.40946019, 55.00705828, 17.75483404, 98.69523629, 68.15668013, 4.959304343), outcome = c(0L, 0L, 0L,0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("group", "survival", "outcome"), class = "data.frame", row.names = c(NA, -50L)) table(test_data$group) s <- npsurv(Surv(survival, outcome) ~ group, data = test_data) s$n.risk[c(1, s$strata['group=a'] + 1)] survplot(s, conf='none', lwd=2, lty=c(1,1,1), n.risk=TRUE, time.inc=5, label.curves=FALSE, cex.lab=1.75) rms/inst/tests/psm2.s0000644000176200001440000000047212700015363014237 0ustar liggesusers# Thanks: Chris Andrews require(survival) left <- c(1, 3, 5, NA) right <-c(2, 3, NA, 4) Surv(left, right, type='interval2') survreg(Surv(left, right, type='interval2') ~ 1) require(rms) Surv(left, right, type='interval2') # err args(Surv) psm(Surv(left, right, type='interval2') ~ 1) rms/inst/tests/val.surv.data.txt0000644000176200001440000000766013653677051016451 0ustar liggesusersSEX CURSMOKE BMI AGE DEATH TIMEDTH 2 0 29.76 54 0 8766 1 1 27.61 48 1 2157 2 1 25.89 43 0 8766 1 1 30.85 38 0 8766 2 0 18.14 61 1 7436 1 1 26.56 51 0 8766 2 1 25.34 44 1 8470 2 0 25.41 65 1 8738 1 0 25.91 39 0 8766 1 1 30.12 52 1 2682 2 0 25.45 57 1 7819 2 1 20.02 37 1 6691 2 0 24.8 52 1 3833 1 0 31.06 41 0 8766 2 1 22.35 43 0 8766 2 0 29.55 53 1 4537 2 1 27.96 50 0 8766 1 0 24.54 47 0 8766 2 0 39.4 59 1 6201 2 0 20.94 49 0 8766 2 1 23.07 58 0 8766 1 1 29.6 38 0 8766 1 1 21.18 52 0 8766 2 0 23.65 40 0 8766 2 1 21.33 44 1 2141 2 0 24.82 49 0 8766 2 0 28.58 51 0 8766 1 1 34.53 64 1 7015 2 0 25.12 65 1 3998 1 0 25.95 44 0 8766 1 1 25.63 38 0 8766 1 1 24.07 57 1 424 2 0 28.68 55 1 178 2 0 21.1 38 0 8766 2 0 24.81 51 1 4854 2 0 22.82 61 1 2531 2 0 24.04 44 0 8766 2 0 29.35 49 0 8766 2 0 20.82 43 0 8766 1 0 22.73 52 0 8766 2 0 23.03 62 0 8766 2 0 19.98 46 0 8766 2 0 21.27 44 1 4585 1 1 28.94 42 1 1240 1 1 22.85 40 1 6808 1 1 23.65 55 0 8766 2 1 23.54 40 0 8766 1 0 33.03 52 0 8766 1 1 24.01 40 0 8766 1 1 22.33 45 0 8766 1 1 28.44 39 0 8766 2 0 26.45 63 0 8766 1 1 25.12 44 0 8766 1 1 23.87 37 0 8766 2 1 22.66 60 0 8766 2 0 25.38 42 0 8766 1 1 29.35 53 0 8766 2 0 31.48 39 0 8766 2 0 25.27 57 0 8766 2 1 21.57 61 1 7774 2 1 24.12 39 0 8766 2 1 24.25 52 0 8766 2 0 24.94 62 1 1748 1 1 24.86 60 1 3077 1 1 28.7 35 0 8766 1 1 21.03 45 0 8766 2 1 23.48 45 0 8766 2 0 27.73 56 1 8577 2 0 26.39 48 1 6531 1 0 29.82 53 1 1622 2 1 24.22 39 0 8766 2 1 21.4 46 0 8766 1 1 25.8 41 0 8766 2 1 32.82 44 0 8766 2 1 24.91 57 1 3573 1 0 26.22 64 1 1492 2 0 29.87 61 0 8766 2 1 26.21 55 0 8766 2 0 22.19 44 0 8766 2 0 23.72 56 1 3873 1 0 23.75 42 1 8627 2 0 30.28 59 0 8766 1 1 28.34 39 0 8766 2 1 20.52 44 1 4028 1 1 20.72 48 1 7560 2 1 25.2 46 1 5755 2 0 30.18 64 1 7406 2 1 27.23 55 1 5272 2 0 28.35 49 0 8766 2 0 35.19 57 1 7306 2 1 28.18 52 1 310 1 0 40.11 52 1 6269 1 1 28.89 42 0 8766 1 0 25.14 44 0 8766 1 0 29.35 52 0 8766 1 1 21.03 35 0 8766 2 0 26.83 48 0 8766 2 1 23.06 40 0 8766 2 0 25.04 49 0 8766 2 1 19.09 41 0 8766 2 1 28.41 37 0 8766 2 1 16.59 52 0 8766 2 0 23.87 65 1 7661 2 0 29.97 64 0 8766 1 0 28.53 36 0 8766 2 1 27.26 38 0 8766 1 1 26.25 47 0 8766 2 1 24.07 50 0 8766 2 0 27.53 64 0 8766 2 0 23.06 50 0 8766 2 0 25.69 38 0 8766 2 1 22.36 44 0 8766 2 1 27.06 38 1 7667 1 1 22.16 61 1 4542 2 1 27.91 63 1 7256 2 1 23.72 44 0 8766 2 1 29.29 44 0 8766 2 0 25.03 41 0 8766 2 1 22.65 42 1 2960 2 0 22.19 60 1 8452 2 0 30.1 53 0 8766 2 1 18.09 57 1 8031 1 0 22.12 48 0 8766 2 0 26.13 57 1 753 1 1 21.45 48 0 8766 1 1 26.55 46 1 7223 2 0 25.31 61 0 8766 2 1 21.51 39 0 8766 1 0 21.96 42 0 8766 2 1 23.24 44 0 8766 2 0 23.41 57 1 1003 1 0 28.68 41 0 8766 2 0 33.11 54 1 5321 1 0 25.29 66 1 3790 1 0 24.38 61 1 5876 2 0 22.57 55 1 2719 1 1 25.68 36 0 8766 2 0 31.44 62 0 8766 2 1 24.63 67 1 4162 1 1 22.16 37 0 8766 2 0 26.51 64 1 7583 1 1 24.83 52 0 8766 2 1 27.3 56 0 8766 1 0 26.33 36 0 8766 2 1 18.16 39 0 8766 2 0 28.59 45 0 8766 1 1 28.3 42 0 8766 2 0 28.66 62 0 8766 2 1 20.49 66 1 4341 1 1 23.58 56 0 8766 2 0 22.02 61 0 8766 2 0 23.29 49 0 8766 1 1 25.34 45 1 7448 1 0 22.49 58 1 5733 1 1 24.04 41 0 8766 2 1 19.74 65 1 526 2 0 26.58 68 1 5589 2 0 31.12 62 0 8766 2 0 23.86 49 0 8766 1 0 29.63 58 1 1400 1 0 22.73 64 0 8766 2 0 21.97 61 0 8766 1 1 24.67 38 0 8766 2 0 23.17 58 0 8766 2 1 27.42 47 0 8766 1 0 32.33 66 1 2391 1 1 18.64 60 1 3573 2 0 32.51 63 1 3510 2 0 25.83 65 0 8766 1 1 25.55 65 1 4338 2 0 20.86 57 1 537 1 0 23.77 60 1 8020 2 0 29.45 49 0 8766 2 0 24.33 50 0 8766 1 1 33.49 63 1 6700 1 1 23.72 42 0 8766 2 1 24.53 48 0 8766 2 0 25.21 53 0 8766 2 1 21.64 37 0 8766 2 0 23.44 47 0 8766 1 0 29.46 50 0 8766 1 0 25.88 40 0 8766 2 0 25.69 57 0 8766 2 1 23.65 53 0 8766 1 0 23.98 63 1 2364 2 0 30.91 48 0 8766 2 1 23.48 40 0 8766 1 1 29.47 65 1 7317 2 1 22.19 39 0 8766 1 1 25.7 51 0 8766 1 0 17.17 62 1 3568 2 1 23.18 43 0 8766 1 1 25.38 39 0 8766 2 0 25.98 64 0 8766 1 0 29.51 60 1 8084 1 0 27.99 62 1 6997 2 0 20.31 53 0 8766 1 1 27.1 57 1 7507 1 0 24.04 65 1 8516 2 1 20.13 49 0 8766 rms/inst/tests/cph4.r0000644000176200001440000000064713321445530014222 0ustar liggesusersrequire(rms) options(debug=TRUE) set.seed(1) n <- 100 x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) x4 <- sample(0:4, n, TRUE) x5 <- ordered(sample(1:3, n, TRUE), levels=1:3, labels=c('I','II','III')) S <- Surv(runif(n)) # f <- cph(S ~ x1 + pol(x2, 2) + rcs(x3, 4) + scored(x4) + x5) # FAILS options(contrasts=c('contr.treatment', 'contr.treatment')) f <- cph(S ~ x1 + pol(x2, 2) + rcs(x3, 4) + scored(x4) + x5) f rms/inst/tests/model.matrix.s0000644000176200001440000000132512245215301015755 0ustar liggesusersd <- data.frame(x1=sample(c('a','b','c'), 20, TRUE), x2=sample(c('A','B'), 20, TRUE), y=1:20) f <- y ~ x1 * strat(x2) strat <- function(x) x Terms <- terms(f, specials='strat', data=d) specials <- attr(Terms, 'specials') stra <- specials$strat require(survival) # version 2.37-4; has untangle.specials z <- untangle.specials(Terms, 'strat', 1) Terms.ns <- Terms[-z$terms] X <- model.frame(f, data=d) colnames(model.matrix(Terms.ns, X)) # If don't remove specials before model.matrix, try to do afterward x <- model.matrix(Terms, X) colnames(x) asg <- attr(x, 'assign') i <- colnames(x) == '(Intercept)' | (grepl('strat\\(', colnames(x)) & !grepl(':', colnames(x))) x <- x[, !i] # How to reconstruct asg? rms/inst/tests/boys.rda0000644000176200001440000002016413416657163014655 0ustar liggesusersý7zXZi"Þ6!ÏXÌಠ5])TW"änRÊŸ’Øáà[áß^ ·ÖnŒÊ£òP(mÇ6‡·ó¦Zý.ºÀ9ØfyˆEƒÝé#ê(N¸[èx»Ä½¶›ø3}ŠÈB”ï3ó;Çæt´‹,28‹ÛÔ‘¤ÑO2×íomÛlí ‡ »²‡¦Ë8¼u£{ƒœ YQOä ‹ô¸G+Ư÷—›Cé\áŒíŒ’SœyÚ›^ÐØrVÒ5?cÖWb%ò'I?þ†(¯Ý éiäš§eðsÆCÉxãHü“J *ÀzÌàÜJ;Í[JØêv]Ä«²ôíPL±tQzäi²6”ýí'´ÎRÇ Ý¹àb޹ÚV÷SùDkv ð>D8ë+lüH™á¢=æËöƒ}8oW‰CF:dBµJdÔþ_96*ñÚ›D¿dój\ŸWf É&ŸàÔ&Ì:á ùrÄOףŜ“út SÃám¾%Õï3ªò—?ÎôíW<›"•ÜT ¥!ªñ™ö죈º#–Ã'Eö01:?e抭<îþ÷¢ã"ª-ÏÅ4RÃdÛdp=ulˆÌ™7òPU2´\e¡LÃö+,ug"ý4üÉ8Í ¦ûƒÆß!Øã¤²¿>‘èAm9óZ+ßšÖÕSÐÕ<²»¢µòò9é©w|qK  2ËD2s=ýElŽ%yØ j ™2U6Rœ„vÊf0Òõv1ˆæ·[º§z¾Ú6A:ìü!ï0íq$…¾Ó˜c¸„Û¢nÑÞ,—vßHKeÜÿ`h (Ø-å9X— Ë^\ÐBéM´ò(^~ Ô#‹H”²ýtçX¡\a×h¬ÉXgŽJ¹”¡i´¯LýW¼Œ(ž'«UF^ö^µÏüÌŒ,ˆþXåˆx„‡%'~ÌÔ!š@Ôü¨@%€€…ù¹+ï˜P3´Y-/èPk>‰k%-NÙMÜžŠK¿Õ»â®MÎ"Çb6Å«§YEW±×x­\@+?f€èDíx–‹Ïù81PÌf7ÓÜbÐg…Nơ잰ý毕M$¯ìÙ‰AjtÏ÷ùífŽ}*ÈõºQCwc‚Yª$ðÚÆvöóîÿh/þ¿ôäJ×|ŸAô±œíôfPÑ@uAÆøU_{^ öOf.³cškeþré1Ý`1º³1æ*¯FåEì…Æ×Ûi_µ«@ñ&Û-÷ú8áØÜB„~Q¤“­ln¯ 'œ¯¡˜q˜Eͧ÷¡¤ 6P—$¸ÀÅͶ»™×ho Üøi¡JX8¯çKŠÓuÐñéªyÐ`À»Ä×e±»xU›û¨š1Á‡ ÔRœdp3¢ e»A6O %†“4—ùýÃaèišð±S^ß9„®##Æ ¬L|ì\[ûE¥˜˜,N7É®äC—Òí_¢aoüÐX>üýƒ‰†°8>ÕúøÎ54ðC‰¬àî…t)£¸3YcoLJ¼=Çš ‹#õÃXDœv={“|áõÈ}!îèQæ=áAçR¥ÒÞ͆kv>É/ep¤ 3>‹TZ…4ЎΊrxrª,ì§Ï“Jžö2‘N“(Ø”[rkDÓWúÑÁ‘í@@ÐPjhy1©ÀtÙ?ÿŸxî>—ö¢Fœä<'ßÕÈÑh$¼0˜ÃÀÎ=>Z¤-×m„ã¨xT)mõ[!nú“³„µÑ}t¨L¤'[V:á¢3ÏSsM‘‡†.pžèÕ…ÎïÞëXiD‹Ùc'q‡è¤±ëE "‹~FÎ=R¬^H€…3‹¦H¦ˆ–`ÕL§ÛOìÌ9@ñ7Še¹kÝús÷"„;*ò„zÜá0–SR—5Š ¶EA𷇹(âø…Í3¼_¸ƒg}Xºª•(m5±eS †3«25žb’dMgöèïùšùíy>¹C$QJaLP¤÷Åó®2Ý9Ϊîìj8¸ dþº§’àÚ‚¢ïÖÏw½âårá8»5ÁY¼ì²ùPÉý¯0­-(_ÕÞ†ÎÑÑp QWjž”Mk‹šè}î%Ò¿MbJáDèÏíÉÇù&$éÙ]´v•ŒÞáõZ;çd>.V&Ý8¥c[™&¾ÿ` ñÿw$Dpã¬D'ü|33²×»-1ù_T™wù‰H*êí UºS/ç_~ñµ6o½ŸÀÑÝ0õÛß‘7ÂgóczÏÅæ#p}<ÔbÚÍ~[ÑÙßÊ•í¦À(/ìaŸŒ9£ª>¡'æƒH¬ØíÙ£œÒS÷»U’ #ËXƒÁîE)kI£ Ÿ×òú:.—4' qÃ{͇‚Tè¼ê‚žAã·Dk%[u‚¯úO˜jì+f»¿ ámPŸg­‰Œ–m!QªÓ$ ówWžl_€•€ˆý¦ G Ù Y© Ÿ‚‰æc,ßÇâ4ã„Î? ø‚ø©x ¥ãëÞÒþ·üj½^ÕF…Ьaˆ…/úðד¥£Z¿· ÈÙ>n—ÙØbø0‡$&Ìò"ž Y‡¶M«¨¬+Ýrr{ÃÙÔ`[h^]=¡²'©Ál3ß]Mÿ%F#Ë[åðáýÌO &¶|%2¥(7 ýA€5‰,¬!µuaüVËJ£Ço"\ÒÿO‹·ù'å:×k³¬³§f>)"–¿siã:3Œa„i§¬º—'ÑÓ!Üqf.oä̹î¦>ØÅXŽáº˜¢±üç`Ê(6O&é@¬vr•,\÷jx¦`ò®9PßZKÁÈÐ 4KhõöÚº'^g؇,8šP^ŠÛ 0A*/ ojM@£·ê„Ø`Î'Ï' 7´gÛp¡sÒmï<'œÙõMNe<Ȥ ßÒZ  |-ßú´Þ¡ÆñaÂA ­+®–Îëc„¥ÎI“xcu€ »õǾK{k  ë•ñòÁ«°boâ~yìô˜3œŸþNuO­J®bËôÌlϪÊËÐüy'mT@õ×x([Ui´%ÿ׺¿¸õpËÛ¾NW>Ëf›¿Ú<Ü宵û4‹vð¶,‰h»Lø› ¡„?átßS6I0€JÊ,“JH`°É–+G¬¶?©n´k¦gtÂáõN;ô¥b~pê¥ô÷Ãáý}D&à¦<  ÞuGÏØ Ë(7ç—ø è®MI€¾y¢fFè¤/òåWš=Zª0Ÿs£Ï*­ˆ@³Ú ½‘Ê™Ý`¼sú¦ùx‘¤ÂU…4òFayŠ}ídRUòGa?Ú,3S.‚P“>óz˜þRxù Öù±žu´;;köm ®ݽ rÉ%׿ÔpQ÷Ö¼ýmáVç²Zb…Nῆ(Y)Ñn$S‹½Ô¥BŽ¿~ ¸$®œ0tf…4oºl>½þ›ß®É«ˆÊÆ •67$šálz•&¡5шL$ì´¤ý<ŒqÆmÖ™°cD0Þã›{³¼,ù‚ªîü¸£@ °ñAl޵dÆ•("@Ê?³?“ø%8ÊŸs ¥ h„t3~¸qY”;2ÿå]ŒÉ¯Çº¶†0i©œìwy™å45Ï¢ŒùÑ|°ÿ´UÇ7íw¿é’c=u·ý»˜)¨,üø#2u„Úu*‰Å Ï’ .Šyc—­¼*ˆ9uÓ˜^à žK›+s7ßFwûzOÁò/(×-›õîÃ:q»¥ Ù¿!‚*¹+_¤ ?aÎÊçè+ôx9´v?ú#ƒéñb;šŠ|_ÅC­~qÈÚLç"t½Év{'VÂäb½¿_° NV˜}íý\€ÁIæ#Ö¹?Åû»pÄ·÷ „e)FVµ5ÇRô øo„hÂøH›Lÿ—Y~­õ ÕÀ‚M¦õsŽÆÃ²ÏÝNJ!>²ì·¹ ¼Q÷¥]?G£9¥—{à.›B :8W‚L®?…JgâÂB/5=æv…°jbˤ„¨û ˜3HËZ´^æpd$n@@€o ;O\‹ªà³ÉvÞg†Äþ‹PyëÔajøÑ¢¯bÝ¢ Ȫ'aÆ'»Ç¡ê4*×ò#"³ÛR6#ˆyYO!I«¶ÞÄ©„\ST ²üç% ¶qy)T™U™îÜÝËy3†\œž*á³÷þ-&ûŒ¬bãv&g ±Z°2"Φz,ˆ¯ô¨;3¦q«úèAº:´^¬2Ó”žð5ž|hûX×yÞgFšbl8»Ê²+v¶DÜ# ¨M!'{çhQ¢³Z7ø“ê/Ö†iˆ+"3’¼BĤÛ3Epìz¤ÌIRÄ/qé<  ÑxÙÝør±ÖÖ´¬<]ÙëD¤—¡4`Ù¦ñdœXÔcø=À¬’´1X#ˆé•¾¨ÔãDŒºzŸE›VØõ´’ÖB·fÌjã4¶‰z{õóãá¨Ö]U‰`‹Iõ|x´ÿŸ[Í4IOl<—PÒ!®‡I!%ºpÊ:½Ð¤[ª)ÞSŽï¬ÒinŠ3ÎAÑ>­>ê­¼ëÚó{ðÀ[*hĉ{Z׸ö(áM?Ì»þe7̘—üi/ Œ™_µ¸h¨Îü >ÚrùBòHñ°è¨9­¼ÌŸ[Ú=šÚîXñ¤=ÞùDM:;Hඉ­j—ùå ‘ÖZ~Öe`•‘£¡•âY‡O„…ÝwÝÙ<å ³»éÊ­·[x£Ë™X-ýáé ŸØÃF"€ÏÌÔBqý~L˜ ÍÁ$ÄB7<ñDF@•ÛõÂ亠 Эz Kå{ ®®>×  QážíÕ¤#¯ï¶Ã²ŒÛª_:‹þNêñlZ+ÃA+Æ5' [¾ŸÁ?€J܃ê 9¬7är_ïÖÓ÷ÀZQtš˜·‹oQô‹1y=ÃçÆ%'wjÍ™ˆ8EàÁ‰Ò¥ŒÈß&š½-Ây Ü4$£\È€°ßœôâxýŠœ˜„éxb˜ _ÅVô4wà V†¿Ì»\MÈéù{Øñ¶àûØ"×”jÞZ|Dˆ&-é _³¥ñ¥s£Yßbž3\«äó}ý •’’u+»õ]šy”;j?@R”å¡q™ô:<Ù®H¡|J9­®cr,zßëÀK³'âaÀcòj,Hµ4Á£Ý—±óÍÑ,DÌì—K1ŠØå\ù‰6Kcd\4|x­Gœ£ð­Gg[Y‚²ÌîÎ76PB`ûþäôŒ)î*®ô˜š‰cü?H¸îd tˆ•š‰8>ޤK¿¥JDZØù_­Ôø^ŽÓUèÍg\¦’Yc”+#LLë¾GšPÚï IF’¡x7SÄhR˜ÁCØ«÷—ˆÕ`Fšnï¶:íªÂÃXá¥<ñ¯´MÖ]»õßésFâAÐßOϰ©­){ý c*+ZÇ*ª £™µ™ÓÉU2¬[ˆŒä_(ÕëA<ù¯é$¡Ì?pZÁDçðÆû×péo±ÝDÚ Ée:˜ôßÀéá‰5¿ä¹ádš‡è±iw‚Kº'ý$³¨¢W$ZÓ›¯ ‡ÔÐjÙW¤/'"Â<Ù¨§Ô­Æb\[‡4 ›*¥b‡YK§Šq¡X6“¥%#7w§ W{q}ˆ QÎ×<*^hȆqÕ`Ϻ¾5Kõ_Y,s4OR·Ç#•\ÚV<õ—*{8š—Û¤j·%JVtWE¶°OÒf*ÏwÉ@@ðæÄB‰¹Áÿ÷†åæFñ_] cŸ î1j^(é¨ArÖâ¢iÙO§2ù±/BG÷2YìeŽºëÀ*1¶þb=u(Kßø?j˜2ÈÈÌ>Ô·=Ÿîbk ÚîÙê”<M-” Ã"÷¬t=;€i’«8‚=óœ2/nÿ„Í¡aH÷#sÔöiCÜ/½RK+„4ޏ wiü4ƒ9€£ZÌ—sÐåï Ôn¦×Æf}y×bRõÏ\y®hÌO¦u¬sÒ'HËŹ²Ð®êçÏüê`ï¸3Ž­g ’¹ã©ÕÊ`ÉÒ°¡^¡êf 7ÅÚú¼ drýÚÖ@ãßag›äîn…0ý”•I{Ý«™Æ¹° ¹)EÚ}„jœ ioÕ‰Ì0y¯…1ê‰Ùk.Í´É1î=š O0V›íN&4w(ØÀà4+Ë$Èö^ôø–ÂÉõËäAŠ ¾5¬%ôo´:ó…ïÈ*ÖíüèÔy,˜l`V¦êYÖ…C¤";Ûè¸Û=¯Û÷ÈAr—YrJñ;  Uˆdó#§LjÃEòCÀG©d _ž3åË0ˆsDÏö³÷»gY¡Z•_úôÝÇðO í—¹¼ïêh„£õ ‚ •¹Î PÏßF,š¥`:YÎå/˜'4hoåî ®Åœcý8“‡»Ä”í'ýÆïo›Yz¬PÉ“ÈzÁ#ײ­ú7¡„ 7—›'ý[Ef4OI:ÀnïÙj䱓+>K9FÇ…^9š™¡=íÐ h·Çv[#ðˆÄòàï¾liáv¶ÓIú´©Ov’IiKT…H#®ìÅóúÀ®:¸ ©:°,Ç¥ac7`è­ ò¬¥LôšPÕôgG¥Å{Jÿé‹9q»f¿ÆÑ&€|Qþf–³éÏê´kHÐÝã×Ýn¨¹GòuB™¬*· ù6“5Šqì°jžT«€K–˜¥ÞÜ80ê x…LÖ—¾5‰{É‹ŒÏ7w :ó >\–ñì×gˆÁlÖ2sÁ+ÕeVL@;ïßœ]ŠË{™”ªŠÉ@­ƒ]Ñ>¥cÖ¡w ÷w±ÒýŒ£û‰Ùúo3HOì  ¬ÔÄ\œëm\ÏÝz¨žŽoaL[_,Vy“”‚ô7hžÌ’j:»ît0›Õ“LøÒãé6ôŒX>™€´ï“Ôk™àŒs¬ ÷®?nhÓ#‘›!¿>±ŸšæËšY/ÃßÕyÍ-~²¾ñ›Jü—†þ¤|dÇ—«Vêsý8.Y¨`\qc¹waGþâgÐÙ6¨Xö—³ŠdBí%'©¸Ukùœg(:…Þ,ZØU Õ;Q´ÁfþJö[|»¯"™®*ÔÒÖ¾Yâ5uÁ’Ò~q¸v8ˆ¤á˜‚%©£D›ŽÿTäX…Ö^v»X’ß ‹Ÿ½–¼æl¨÷§ÉÌwyÌS>ʤ£Æiõ¡0 âf¤ÍðøéÈwÎyÜ•£'£Ñrð±ƒUxX7I0@Ðau *Ss“ÄÍNb †ÏQg0À —¦JåÅDº%1䢧¸nQs7#i,ã`Çá^¿JÑ<·7ŠÉQü§ïÌ]µq"y›Ð..‚-XÃÌÊ(Í®£0 ‹YZrms/inst/tests/offset.r0000644000176200001440000000322613472345556014664 0ustar liggesusersrequire(rms) set.seed(1) n <- 1000 cvd <- data.frame(id = sample(1 : 100, n, TRUE), bmi = rnorm(n, 25, 3), wtpre = sample(c(-1, 1), n, TRUE)) t.enter <- runif(n) t.exit <- t.enter + runif(n) cens <- sample(0:1, n, TRUE) cvd$S <- Surv(t.enter, t.exit, cens) label(cvd$id) <- "Id" label(cvd$bmi) <- "BMI" units(cvd$bmi) <- "Kg/m2" dd <- datadist(cvd); options(datadist = "dd") #S <- with(cvd, Surv(t.enter, t.exit, cens)) cph(S ~ rcs(bmi) + cluster(id) + wtpre, data=cvd) f <- cph (S ~ pol(bmi, 3) + cluster(id) + offset(wtpre), data = cvd, eps=1e-6) g <- coxph(S ~ pol(bmi, 3) + cluster(id) + offset(wtpre), data = cvd, control=coxph.control(eps=1e-6)) coef(f) - coef(g) f g Predict(f, bmi=20, offset=list(wtpre=5)) # 4.849534 d <- data.frame(bmi=20, wtpre=5) predict(f, d) # 4.849534 k <- coef(f) mp <- function(fit, bmi=20, wtpre=5) { k <- coef(fit) k0 <- if(length(fit$center)) - fit$center else { m <- fit$means k0 <- - unname(k[1] * m[1] + k[2] * m[2] + k[3] * m[3]) } k0 + k[1]*bmi + k[2] * bmi^2 + k[3] * bmi^3 + wtpre } mp(f) # 4.849534 mp(g) # " f <- cph(S ~ pol(bmi, 3) + offset(wtpre), data=cvd, eps=1e-6) g <- coxph(S ~ pol(bmi, 3) + offset(wtpre), data=cvd, control=coxph.control(eps=1e-6)) coef(f) - coef(g) mp(f) # 4.849534 mp(g) # " predict(f, d) # -.1504; ignores offset Predict(f, bmi=20, offset=list(wtpre=5)) # -.1504 p1 <- Predict(f, bmi, offset=list(wtpre=0)) p2 <- Predict(f, bmi, offset=list(wtpre=5)) plot(rbind(p1, p2)) z <- expand.grid(x1=LETTERS[1:3], x2=c('a','b','c'), reps=1:10) z$S <- Surv(runif(nrow(z))) cph(S ~ x1 * strat(x2), data=z) rms/inst/tests/ggplot2.r0000644000176200001440000002355713012415176014747 0ustar liggesusers## This runs all of the examples in the help file for ggplot2.Predict, ## removing \dontrun{} and not commenting out commands require(rms) n <- 500 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' # Specify population model for log odds that Y=1 L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) + .01 * (blood.pressure - 120) # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) an <- anova(fit) p <- Predict(fit, age) ggplot(p) ggplot(p, ylim=c(-.5, .5)) ggplot(p, xlim=c(40,50)) # problem reported by JoAnn Alvarez p <- Predict(fit) # Plot effects in two vertical sub-panels with continuous predictors on top ggplot(p, sepdiscrete='vertical') # Plot effects of all 4 predictors with test statistics from anova, and P ggplot(p, anova=an, pval=TRUE) ggplot(p, rdata=llist(blood.pressure, age)) # spike histogram plot for two of the predictors ggplot(p, rdata=llist(blood.pressure, age), sepdiscrete='vertical') p <- Predict(fit, name=c('age','cholesterol')) # Make 2 plots ggplot(p) p <- Predict(fit, age=seq(20,80,length=100), sex, conf.int=FALSE) # # Plot relationship between age and log # odds, separate curve for each sex, ggplot(p, subset=sex=='female' | age > 30) # No confidence interval, suppress estimates for males <= 30 p <- Predict(fit, age, sex) ggplot(p, rdata=llist(age,sex)) # rdata= allows rug plots (1-dimensional scatterplots) # on each sex's curve, with sex- # specific density of age # If data were in data frame could have used that p <- Predict(fit, age=seq(20,80,length=100), sex='male', fun=plogis) # works if datadist not used ggplot(p, ylab=expression(hat(P))) # plot predicted probability in place of log odds per <- function(x, y) x >= 30 ggplot(p, perim=per) # suppress output for age < 30 but leave scale alone # Do ggplot2 faceting a few different ways p <- Predict(fit, age, sex, blood.pressure=c(120,140,160), cholesterol=c(180,200,215)) ggplot(p) ggplot(p, cholesterol ~ blood.pressure) ggplot(p, ~ cholesterol + blood.pressure) # color for sex, line type for blood.pressure: ggplot(p, groups=c('sex', 'blood.pressure')) # Add legend.position='top' to allow wider plot # Map blood.pressure to line thickness instead of line type: ggplot(p, groups=c('sex', 'blood.pressure'), aestype=c('color', 'size')) # Plot the age effect as an odds ratio # comparing the age shown on the x-axis to age=30 years ddist$limits$age[2] <- 30 # make 30 the reference value for age # Could also do: ddist$limits["Adjust to","age"] <- 30 fit <- update(fit) # make new reference value take effect p <- Predict(fit, age, ref.zero=TRUE, fun=exp) # The following fails because of the 3rd element of addlayer # if ylim is not given, as y=0 is included and can't take log ggplot(p, ylab='Age=x:Age=30 Odds Ratio', ylim=c(.5, 10), addlayer=geom_hline(yintercept=1, col=gray(.7)) + geom_vline(xintercept=30, col=gray(.7)) + scale_y_continuous(trans='log', breaks=c(.5, 1, 2, 4, 8))) # Compute predictions for three predictors, with superpositioning or # conditioning on sex, combined into one graph p1 <- Predict(fit, age, sex) p2 <- Predict(fit, cholesterol, sex) p3 <- Predict(fit, blood.pressure, sex) p <- rbind(age=p1, cholesterol=p2, blood.pressure=p3) ggplot(p, groups='sex', varypred=TRUE, adj.subtitle=FALSE, colfill='blue') ggplot(p, groups='sex', varypred=TRUE, adj.subtitle=FALSE, sepdiscrete='vert', rdata=data.frame(age, cholesterol, sex)) # For males at the median blood pressure and cholesterol, plot 3 types # of confidence intervals for the probability on one plot, for varying age ages <- seq(20, 80, length=100) p1 <- Predict(fit, age=ages, sex='male', fun=plogis) # standard pointwise p2 <- Predict(fit, age=ages, sex='male', fun=plogis, conf.type='simultaneous') # simultaneous p3 <- Predict(fit, age=c(60,65,70), sex='male', fun=plogis, conf.type='simultaneous') # simultaneous 3 pts # The previous only adjusts for a multiplicity of 3 points instead of 100 f <- update(fit, x=TRUE, y=TRUE) g <- bootcov(f, B=500, coef.reps=TRUE) p4 <- Predict(g, age=ages, sex='male', fun=plogis) # bootstrap percentile p <- rbind(Pointwise=p1, 'Simultaneous 100 ages'=p2, 'Simultaneous 3 ages'=p3, 'Bootstrap nonparametric'=p4) # as.data.frame so will call built-in ggplot ggplot(as.data.frame(p), aes(x=age, y=yhat)) + geom_line() + geom_ribbon(data=p, aes(ymin=lower, ymax=upper), alpha=0.2, linetype=0)+ facet_wrap(~ .set., ncol=2) # Plots for a parametric survival model n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) label(age) <- "Age" sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) t <- -log(runif(n))/h label(t) <- 'Follow-up Time' e <- ifelse(t<=cens,1,0) t <- pmin(t, cens) units(t) <- "Year" ddist <- datadist(age, sex) Srv <- Surv(t,e) # Fit log-normal survival model and plot median survival time vs. age f <- psm(Srv ~ rcs(age), dist='lognormal') med <- Quantile(f) # Creates function to compute quantiles # (median by default) p <- Predict(f, age, fun=function(x) med(lp=x)) ggplot(p, ylab="Median Survival Time") # Note: confidence intervals from this method are approximate since # they don't take into account estimation of scale parameter # Fit an ols model to log(y) and plot the relationship between x1 # and the predicted mean(y) on the original scale without assuming # normality of residuals; use the smearing estimator # See help file for rbind.Predict for a method of showing two # types of confidence intervals simultaneously. set.seed(1) x1 <- runif(300) x2 <- runif(300) ddist <- datadist(x1,x2) y <- exp(x1+x2-1+rnorm(300)) f <- ols(log(y) ~ pol(x1,2)+x2) r <- resid(f) smean <- function(yhat)smearingEst(yhat, exp, res, statistic='mean') formals(smean) <- list(yhat=numeric(0), res=r[!is.na(r)]) #smean$res <- r[!is.na(r)] # define default res argument to function ggplot(Predict(f, x1, fun=smean), ylab='Predicted Mean on y-scale') # Make an 'interaction plot', forcing the x-axis variable to be # plotted at integer values but labeled with category levels n <- 100 set.seed(1) gender <- c(rep('male', n), rep('female',n)) m <- sample(c('a','b'), 2*n, TRUE) d <- datadist(gender, m); options(datadist='d') anxiety <- runif(2*n) + .2*(gender=='female') + .4*(gender=='female' & m=='b') tapply(anxiety, llist(gender,m), mean) f <- ols(anxiety ~ gender*m) p <- Predict(f, gender, m) ggplot(p) # horizontal dot chart; usually preferred for categorical predictors ggplot(p, flipxdiscrete=FALSE) # back to vertical ggplot(p, groups='gender') ggplot(p, ~ m, groups=FALSE, flipxdiscrete=FALSE) # Example from Yonghao Pua n <- 500 set.seed(17) age <- rnorm(n, 50, 10) # create an ordinal variable duration <- factor(sample(c('None', '10', '20', '30' ,'>30'), n,TRUE)) duration <- factor(duration, levels(duration)[c(5, 1:4)]) # arrange factor levels in ascending order levels(duration) # shows the correct order "None" "10" "20" "30" ">30" label(age) <- 'Age' label(duration) <- 'Duration' L <-.045*(age-50) +.01*(duration=='10') +.2*(duration=='20')+ .3*(duration=='30')+ .9*(duration=='>30') y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, duration) options(datadist='ddist') fit <- lrm(y ~ age + duration) p <- Predict(fit, fun=plogis) ggplot(p) ggplot(p, sepdiscrete='vertical', colfill='green', anova=anova(fit)) ## From JoAnn Alvarez 2016-10 n <- 800 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) eyecolor <- factor(sample(c('green','blue'), n,TRUE)) L <- .4*(sex=='male') + .045*(age-50) + 3*(eyecolor == 'blue')*(sex=='female') + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) y <- ifelse(runif(n) < plogis(L), 1, 0) ddist <- datadist(age, eyecolor, cholesterol, sex) options(datadist='ddist') fit <- lrm(y ~ sex * (eyecolor + age + rcs(cholesterol,4))) p <- Predict(fit, cholesterol, sex, eyecolor) ggplot(p) # Confidence bands automatically suppessed: ggplot(p, groups = c('eyecolor', 'sex'), aestype=c('color', 'linetype')) colorscale <- function(...) scale_color_manual(..., values=c("#000000", "#E69F00", "#56B4E9", "#009E73","#F0E442", "#0072B2", "#D55E00", "#CC79A7")) ggplot(as.data.frame(p), aes(x=cholesterol, y=yhat, color=eyecolor, linetype=sex)) + labs(x=expression(cholesterol), y="log odds", title="Adjusted to:age=50.2 ") + geom_line(data=p, mapping=aes(color=eyecolor, linetype=sex)) + colorscale(name=expression(eyecolor)) + scale_linetype_discrete(name=expression(sex)) + theme(legend.position='right') + # geom_ribbon(data=p, aes(ymin=lower, ymax=upper), alpha=0.2, # linetype=0, fill=I('black'), show.legend=FALSE) + coord_cartesian(ylim=c(-4, 6)) + theme(plot.title=element_text(size=8, hjust=1)) rms/inst/tests/cphtdc.r0000644000176200001440000001341613662733536014645 0ustar liggesusers# From Max Gordon require(rms) # Same simulated data set as used in the cph-example n <- 1000 set.seed(731) age <- 50 + 12*rnorm(n) sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) dt <- -log(runif(n))/h e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) test <- data.frame(age = age, sex = sex, Start = 0, dt = dt, e = e) dd <<- datadist(test); options(datadist='dd') f <- cph(Surv(dt,e) ~ rcs(age,4) + sex, x=TRUE, y=TRUE) cox.zph(f, "rank") # tests of PH # Now to the actual time-interaction if(! require(Epi)) quit(save='no') lxs <- Lexis(entry = list(Timeband = Start), exit = list(Timeband = dt, Age = age + dt), exit.status = e, data = test) subset(lxs, lex.id %in% 1:3) spl <- splitLexis(lxs, time.scale = "Timeband", breaks = seq(from = 0, to = ceiling(max(lxs$lex.dur)), by = .5)) subset(spl, lex.id %in% 1:3) spl$Stop <- spl$Timeband + spl$lex.dur dd <- datadist(spl) ####################### # Regular interaction # ####################### coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex*Timeband, data = spl) # Gives: # Call: # coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ # Age + sex * Timeband, data = spl) # # # coef exp(coef) se(coef) z p # Age 0.0420 1.043 0.00558 7.53 5.0e-14 # sexMale -0.9457 0.388 0.26014 -3.64 2.8e-04 # Timeband NA NA 0.00000 NA NA # sexMale:Timeband 0.0868 1.091 0.05360 1.62 1.1e-01 # # Likelihood ratio test=72.7 on 3 df, p=1.11e-15 n= 13421, number of events= 183 # Warning message: # In coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ : # X matrix deemed to be singular; variable 3 cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex*Timeband, data = spl) # Gives: # X matrix deemed to be singular; variable Timeband # # Model Did Not Converge. No summary provided. ############################### # Forced singular interaction # ############################### coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + sex:Timeband, data = spl) # Gives: # Call: # coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ # Age + sex + sex:Timeband, data = spl) # # # coef exp(coef) se(coef) z p # Age 0.0420 1.043 0.00558 7.53 5.0e-14 # sexMale -0.9457 0.388 0.26014 -3.64 2.8e-04 # sexFemale:Timeband -0.0868 0.917 0.05360 -1.62 1.1e-01 # sexMale:Timeband NA NA 0.00000 NA NA # # Likelihood ratio test=72.7 on 3 df, p=1.11e-15 n= 13421, number of events= 183 # Warning message: # In coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ : # X matrix deemed to be singular; variable 4 coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + I((sex == "Male")*Timeband), data = spl) # Gives: # Call: # coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ # Age + sex + I((sex == "Male") * Timeband), data = spl) # # # coef exp(coef) se(coef) z p # Age 0.0420 1.043 0.00558 7.53 5.0e-14 # sexMale -0.9457 0.388 0.26014 -3.64 2.8e-04 # I((sex == "Male") * Timeband) 0.0868 1.091 0.05360 1.62 1.1e-01 cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + sex:Timeband, data = spl) # Gives: # X matrix deemed to be singular; variable sex=Male * NA # # Model Did Not Converge. No summary provided. cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ rcs(Age, 4) + sex + asis((sex == "Male")*Timeband), data = spl) # Gives: # Err. in limits[[zname]] <- if (any(Limnames == zname)) { : # more elements supplied than there are to replace ############# # After fix # ############# fit_coxph <- coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + I((sex == "Male")*Timeband), data = spl) fit_cph <- cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + asis((sex == "Male")*Timeband), data = spl) # Basically the same cbind(coxph=coef(fit_coxph), cph=coef(fit_cph)) # Although numerically not equivalent expect_true(sum(abs(coef(fit_cph) - coef(fit_coxph))) < .Machine$double.eps) ############# # Needs fix # ############# Predict(fit_cph) # Err. in asis((sex == "Male") * Timeband) : object 'Timeband' not found # Not really working as expected contrast(fit_cph, a=list(sex = "Male"), b=list(sex = "Female")) # Err. in Getlimi(name[i], Limval, need.all = TRUE) : # no limits defined by datadist for variable sex_Timeband contrast(fit_cph, a=list(sex = "Male", Timeband = 0), b=list(sex = "Female", Timeband = seq(0, 10, by=.1))) # Err. in gendata(list(coefficients = c(0.0420352254526414, -0.945650117874665, : # factor(s) not in design: Timeband #Ok, thank you. I can get around the problem by manually generating an interaction variable - seems to work satisfactory: spl_alt <- within(spl, { Male_time_int = (sex == "Male")*Timeband }) spl_alt$lex.Cst <- NULL spl_alt$Start <- NULL dd <- datadist(spl_alt) options(datadist = "dd") model <- cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + Male_time_int, data = spl_alt) contrast(model, a = list(sex = "Male", Male_time_int = 0:5), b = list(sex = "Female", Male_time_int = 0)) rms/inst/tests/robcov.r0000644000176200001440000000060213205556702014652 0ustar liggesusers## Test handling of NAs in original data by robcov require(rms) set.seed(1) n <- 100 x1 <- runif(n) x2 <- runif(n) y <- x1 + x2 + 3 * runif(n) x1[1] <- NA x1 <- c(x1, x1) x2 <- c(x2, x2) y <- c(y, y) clus <- c(1 : n, 1 : n) f <- ols(y ~ x1 + x2, subset = 1 : n) vcov(f) g <- ols(y ~ x1 + x2, x=TRUE, y=TRUE) vcov(g) h <- robcov(g, clus) h sqrt(diag(h$var)) vcov(h) vcov(h) / vcov(f) rms/inst/tests/val.surv.s0000644000176200001440000000265013654013621015142 0ustar liggesusersrequire(rms) set.seed(123) # so can reproduce results n <- 2500 age <- 50 + 12*rnorm(n) sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) cens <- 15*runif(n) h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) t <- -log(runif(n))/h units(t) <- 'Year' label(t) <- 'Time to Event' ev <- ifelse(t <= cens, 1, 0) t <- pmin(t, cens) S <- Surv(t, ev) d <- data.frame(t, ev, sex, age) ddist <- datadist(d); options(datadist="ddist") fit <- cph(Surv(t,ev) ~ sex + age, data=d, subset=1:1500, surv=TRUE, x=TRUE, y=TRUE, time.inc=5) vd <- d[-(1:1500),] vs <- val.surv(fit, vd, S=with(vd, Surv(t, ev)), u=5) par(mfrow=c(2,1)) plot(vs) g <- survest(fit, vd, times=5) vs <- val.surv(fit, vd, S=with(vd, Surv(t,ev)), u=5, est.surv=g$surv) plot(vs) ## From Aida Eslama d <- read.csv("val.surv.data.txt", sep="") n = nrow(d) ## Choix d'un modele avec le BIC f = survreg(Surv(TIMEDTH, DEATH) ~ CURSMOKE + SEX + BMI + log(AGE), dist = "weibull", data = d, y=TRUE) f$y[1:10] AIC(f, k = log(n)); #1586.518 ## Verification des hypotheses f = psm(Surv(TIMEDTH, DEATH) ~ CURSMOKE + SEX + BMI + log(AGE), dist = "weibull", data = d, x = TRUE, y = TRUE) f$y[1:10] f$coefficients std.resid = residuals(f, type = "censored.normalized")[,1]; summary(std.resid) val.surv(f) cox.resid = -log(val.surv(f)$est.surv) summary(cox.resid) head(cox.resid, 20) rms/inst/tests/which.influence.r0000644000176200001440000000073213654560150016435 0ustar liggesusers## Fromm Yuwei Zhu require(rms) n <- 100 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) outcome <- sample(c(1,2,3), n,TRUE) sex <- factor(sample(c('female','male'), n,TRUE)) ddist <- datadist(outcome, age, blood.pressure, sex) options( datadist = 'ddist') modelform <- outcome ~ age+sex+blood.pressure f <-lrm(modelform, x = TRUE, y = TRUE) which.influence(f) rms/inst/tests/calibrate.r0000644000176200001440000000217213343244116015306 0ustar liggesuserslibrary(rms) n <- 1000 # define sample size set.seed(17) # so can reproduce the results age <- rnorm(n, 50, 10) blood.pressure <- rnorm(n, 120, 15) cholesterol <- rnorm(n, 200, 25) sex <- factor(sample(c('female','male'), n,TRUE)) label(age) <- 'Age' # label is in Hmisc label(cholesterol) <- 'Total Cholesterol' label(blood.pressure) <- 'Systolic Blood Pressure' label(sex) <- 'Sex' units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc units(blood.pressure) <- 'mmHg' L <- .4*(sex=='male') + .045*(age-50) + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) y <- ifelse(runif(n) < plogis(L), 1, 0) cholesterol[1:3] <- NA # 3 missings, at random ddist <- datadist(age, blood.pressure, cholesterol, sex) options(datadist='ddist') f <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), x=TRUE, y=TRUE) cal <- calibrate(f, B=80) class(cal) plot(cal, xlim=c(0,1.0), ylim=c(0,1.0), xlab="Predicted Probability of 1y Event-Free", ylab="Actual 1y", subtitles=TRUE, riskdist=FALSE, scat1d.opts=list(nhistSpike=200)) rms/inst/tests/robcov3.r0000644000176200001440000000204413537543146014745 0ustar liggesusers# Test execution time of cluster sandwich covariance estimator with logistic models # GEE using a working independence binary logistic regression model # 1,000,000 records on 100,000 subjects, 10 covariates # Times are on a Lenovo X1 laptop running Linux require(rms) set.seed(1) n <- 1000000 subjects <- 100000 y <- sample(0:1, n, TRUE) x <- matrix(runif(10*n), ncol=10) id <- sample(1:subjects, n, TRUE) # Fit binary logistic model with working independence structure system.time(f <- lrm(y ~ x, x=TRUE, y=TRUE)) # 4s # g will have betas from f but with robust covariance matrix system.time(g <- robcov(f, id)) # 1.4s diag(vcov(f)) / diag(vcov(g)) # Check robcov's ability to ignore duplicate data m <- n / subjects set.seed(1) y <- rep(sample(0 : 1, subjects, replace=TRUE), each=m) id <- rep(1 : subjects, each=m) j <- 1 x <- matrix(NA, nrow=n, ncol=10) for(i in 1 : subjects) { x[j : (j + m - 1), ] <- matrix(rep(runif(10), each=m), nrow=m) j <- j + m } f <- lrm(y ~ x, x=TRUE, y=TRUE) g <- robcov(f, id) diag(vcov(f) / vcov(g)) rms/inst/tests/contrast.r0000644000176200001440000000273312650517145015225 0ustar liggesusers# From JoAnn Alvarez require(rms) set.seed(1) age <- rnorm(200,40,12) sex <- factor(sample(c('female','male'),200,TRUE)) logit <- (sex=='male') + (age-40)/5 y <- ifelse(runif(200) <= plogis(logit), 1, 0) f <- lrm(y ~ pol(age,2)*sex) anova(f) # Compare a 30 year old female to a 40 year old male # (with or without age x sex interaction in the model) contrast(f, list(sex='female', age=30), list(sex='male', age=40)) # Test for interaction between age and sex, duplicating anova contrast(f, list(sex='female', age=30), list(sex='male', age=30), list(sex='female', age=c(40,50)), list(sex='male', age=c(40,50)), type='joint') # Duplicate overall sex effect in anova with 3 d.f. contrast(f, list(sex='female', age=c(30,40,50)), list(sex='male', age=c(30,40,50)), type='joint') jim <- contrast(f, list(sex = "male", age=30), list(sex = "male", age=40)) print(jim, fun = exp) jane <- contrast(f, list(sex = c("male", "female"), age=30), list(sex = c("male", "female"), age=40)) print(jane, fun = exp) # From http://stats.stackexchange.com/questions/191063/lrm-and-orm-contrast-rms-package require(rms) set.seed(1) x <- factor(rbinom(100,2,0.6), labels = c("a","b","c"), ordered = TRUE) y <- factor(rbinom(100,1,0.5), labels=c("no","yes")) f <- lrm(x ~ y) g <- orm(x ~ y) coef(f); coef(g) print(contrast(f, list(y='no'), list(y='yes')), X=TRUE) print(contrast(g, list(y='no'), list(y='yes')), X=TRUE) rms/inst/tests/robcov2.r0000644000176200001440000000236013472373342014742 0ustar liggesusers# From Yong Hao Pua require(rms) n.subjects <- 100 # original number is 30 on the rms documentation ages <- rnorm(n.subjects, 50, 15) sexes <- factor(sample(c('female','male'), n.subjects, TRUE)) logit <- (ages - 50) / 5 prob <- plogis(logit) # true prob not related to sex id <- sample(1:n.subjects, 300, TRUE) # subjects sampled multiple times length(unique(id)) # note that don't always get n.subjects sampled length(table(id)) table(table(id)) # frequencies of number of obs/subject age <- ages[id] sex <- sexes[id] # In truth, observations within subject are independent: y <- ifelse(runif(300) <= prob[id], 1, 0) f <- lrm(y ~ lsp(age, 50) * sex, x=TRUE, y=TRUE ) g <- robcov(f, id) g$clusterInfo # From Jennifer Thompson, modified afun <- function(...) bootcov(..., B=500) # or just robcov set.seed(56) df <- data.frame(y = rnorm(n = 100), x1 = rnorm(n = 100), x2 = rnorm(mean = 5, sd = 0.5, n = 100)) cat('Error expected in solvet when nsides=2\n') for(nsites in 7:2) { cat('nsites:', nsites, '\n') df$site <- sample(LETTERS[1:nsites], size = 100, replace = TRUE) f <- ols(y ~ rcs(x1, 3) + rcs(x2, 3), data = df, x = TRUE, y = TRUE) g <- afun(f, cluster=df$site) print(anova(g)) } rms/inst/tests/orm-example.r0000644000176200001440000000451512174522762015620 0ustar liggesusers## See http://stats.stackexchange.com/questions/65548/which-model-should-i-use-to-fit-my-data-ordinal-and-non-ordinal-not-normal-an pred_1 = rep(c(10,20,50,100),30) pred_2 = rep(c('a','b','c'),40) resp = c(0.08666667, 0.04000000, 0.13333333, 0.04666667, 0.50000000, 0.04000000, 0.02666667, 0.24666667, 0.15333333, 0.04000000, 0.06666667, 0.06666667, 0.03333333, 0.04000000, 0.26000000, 0.04000000, 0.04000000, 1.00000000, 0.28666667, 0.03333333, 0.06666667, 0.15333333, 0.06666667, 0.28000000, 0.35333333, 0.06000000, 0.06000000, 0.05333333, 0.96666667, 0.06666667, 0.03333333, 0.22000000, 0.04666667, 0.04666667, 0.05333333, 0.05333333, 0.05333333, 0.08000000, 0.48666667, 0.08666667, 0.02666667, 0.21333333, 0.45333333, 0.04666667, 0.36000000, 0.06666667, 0.04000000, 0.06000000, 0.07333333, 0.06000000, 0.04000000, 0.04666667, 0.30000000, 0.08666667, 0.07333333, 0.06666667, 0.29333333, 0.36000000, 0.17333333, 0.04000000, 0.09333333, 0.11333333, 0.03333333, 0.08000000, 0.27333333, 0.08666667, 0.03333333, 0.04000000, 0.02666667, 0.07333333, 0.07333333, 0.02000000, 0.02666667, 0.08000000, 0.07333333, 0.02666667, 0.06666667, 0.07333333, 0.95333333, 0.05333333, 0.04000000, 0.11333333, 0.04000000, 0.07333333, 0.06666667, 0.05333333, 0.04000000, 0.04000000, 0.06000000, 0.12666667, 0.04666667, 0.04000000, 0.21333333, 0.05333333, 0.97333333, 0.11333333, 0.02666667, 0.04000000, 0.03333333, 0.37333333, 0.25333333, 0.06000000, 0.06000000, 0.06000000, 0.04666667, 0.26666667, 0.98000000, 0.02000000, 0.26000000, 0.06000000, 0.05333333, 0.28000000, 0.99333333, 0.04666667, 0.02666667, 0.04000000, 0.12666667, 0.04666667, 0.18000000, 0.03333333) require(rms) row <- 0 png('/tmp/lookdist.png') for(gvar in list(pred_1, pred_2)) { row <- row + 1; col <- 0 for(fun in list(qlogis, qnorm, function(y) -log(-log(y)))) { col <- col + 1 cat(row, col, '\n') print(Ecdf(~resp, groups=gvar, fun=fun, main=paste(c('pred_1','pred_2')[row], c('logit','probit','log-log')[col])), split=c(col,row,3,2), more=row < 2 | col < 3) } } dev.off() f <- orm(resp ~ pred_1 + pred_2) f anova(f) dd <- datadist(pred_1, pred_2); options(datadist='dd') bar <- Mean(f) png('/tmp/Predict.png') plot(Predict(f, fun=bar), ylab='Predicted Mean') dev.off() png('/tmp/or.png') plot(summary(f), log=TRUE) dev.off() rms/inst/tests/strat.model.matrix.r0000644000176200001440000000071112577306230017120 0ustar liggesusersrequire(rms) d <- expand.grid(a=c('a1','a2'), b=c('b1','b2')) d$y <- Surv(c(1,3,2,4)) f <- y ~ a * strat(b) m <- model.frame(f, data=d) Terms <- terms(f, specials='strat', data=d) specials <- attr(Terms, 'specials') temp <- survival:::untangle.specials(Terms, 'strat', 1) Terms <- Terms[- temp$terms] # X <- rms:::Design(m) # atr <- attr(X, 'Design') # atr$colnames model.matrix(Terms, m) colnames(model.matrix(Terms, m)[, -1, drop=FALSE]) cph(f, data=d) rms/inst/tests/rms2.r0000644000176200001440000000142412777541375014263 0ustar liggesusers## From Jerome Asselin https://github.com/harrelfe/rms/issues/32 require(rms) df <- data.frame(y=rnorm(21), day=weekdays(Sys.Date()-(1:21), abbr=TRUE)) df$day.ordered <- with(df, factor(as.character(day), levels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"), ordered=TRUE)) options(contrasts=c("contr.treatment", "contr.treatment")) fit1 <- ols(y ~ day, data=df) fit2 <- ols(y ~ day.ordered, data=df) df.char <- df df.char$day.ordered <- as.character(df.char$day.ordered) w <- cbind(orig = predict(fit1), orig.newdata = predict(fit1, newdata=df), ordered = predict(fit2), ordered.newdata = predict(fit2, newdata=df), ordered.workaround = predict(fit2, newdata=df.char)) round(w[, -1] - w[, 1], 3) rms/inst/tests/survfit.timedep.s0000644000176200001440000000256412306633773016527 0ustar liggesusersrequire(rms) for (i in unique(pbcseq$id)) { if (i == 1) { l <- length(pbcseq$id[pbcseq$id==i]) start <- pbcseq$day[pbcseq$id==i] stop <- c(pbcseq$day[pbcseq$id==i][2:l],pbcseq$futime[pbcseq$id==i][1]) event <- c(rep(0,l-1),pbcseq$status[pbcseq$id==i][1]) } else { l <- length(pbcseq$id[pbcseq$id==i]) if (l==1) { t1 <- pbcseq$day[pbcseq$id==i] t2 <- pbcseq$futime[pbcseq$id==i] e <- pbcseq$status[pbcseq$id==i] start <- c(start,t1) stop <- c(stop,t2) event <- c(event,e) } else if (l>1) { t1 <- pbcseq$day[pbcseq$id==i] t2 <- c(pbcseq$day[pbcseq$id==i][2:l],pbcseq$futime[pbcseq$id==i][1]) e <- c(rep(0,l-1),pbcseq$status[pbcseq$id==i][1]) start <- c(start,t1) stop <- c(stop,t2) event <- c(event,e) } } } pbcseq <- data.frame(pbcseq,start,stop,event) #bili is time-dependent covariate fit <- cph(Surv(start, stop, event==2) ~ sex + log(bili) + rcs(age, 4), surv=T, x=T,y=T, data=pbcseq, eps=1e-8) temp <- pbcseq[1:2,] #First id temp$S <- with(temp, Surv(start, stop, event==2)) surv1 <- survfit(fit, newdata=temp, individual=TRUE) surv2 <- survest(fit, newdata=temp, individual=TRUE) A <- with(pbcseq, rcspline.eval(age, nk=4, inclx=TRUE)) temp$A <- A[1:2, ] f <- coxph(Surv(start, stop, event==2) ~ sex + log(bili) + A, x=TRUE, y=TRUE, data=pbcseq) s1 <- survfit(f, newdata=temp, individual=TRUE) rms/inst/tests/npsurv.r0000644000176200001440000000032113427601557014720 0ustar liggesusersrequire(rms) d <- data.frame(time=1:500, death=sample(0:1, 500, TRUE)) f <- npsurv(Surv(time, death) ~ 1, data=d, conf.type='log-log') g <- function(y) 1 - y survplot(f, fun=g) survplot(f, fun=g, conf='bars') rms/inst/tests/cph2.s0000644000176200001440000000205212243500601014202 0ustar liggesusers# From Rob Kushler require(rms) require(MASS) data(VA) # use VA lung cancer data in MASS package for examples # add labels to the factors VA <- within(VA, { treat <- factor(treat,labels=c("std","test")) cell <- factor(cell,labels=c("Squamous","Small","Adeno","Large")) prior <- factor(prior,labels=c("No","Yes")) }) str(VA) (VAddist <- datadist(VA)) options(datadist="VAddist") # model for illustration (VA.cph2 <- cph(Surv(stime,status) ~ treat*(rcs(Karn,4)+cell+prior), VA, x=TRUE, y=TRUE)) par(mfrow=c(1,2)) survplot(VA.cph2,treat,xlim=c(0,400)) title("Karn=60 cell=Small prior=No") survplot(VA.cph2,treat,Karn=30,prior="Yes",xlim=c(0,400)) title("Karn=30 cell=Small prior=Yes") ss1 <- survest(VA.cph2) ss1 S <- with(VA, Surv(stime, status)) f <- cph(S ~ (rcs(Karn,3) + prior)^2 + treat*cell, VA) with(VA, table(treat)); with(VA, table(cell)) f <- cph(S ~ treat*strat(cell), VA) f <- cph(Surv(stime,status) ~ (treat+rcs(Karn,3)+prior)^2 + treat*strat(cell), VA, x=TRUE, y=TRUE) rms/inst/tests/rcs.r0000644000176200001440000000141212677773114014161 0ustar liggesusers## See http://stats.stackexchange.com/questions/147733/equation-of-a-fitted-smooth-spline-and-its-analytical-derivative ## This fits a natural spline (linear tail restricted) using the truncated ## power basis. Default knots are not used; instead specify 4 knots. require(rms) x <- 1:11 y <- c(0.2,0.40, 0.6, 0.75, 0.88, 0.99, 1.1, 1.15, 1.16, 1.16, 1.16 ) dd <- datadist(x); options(datadist='dd') f <- ols(y ~ rcs(x, c(3, 5, 7, 9))) f anova(f) ggplot(Predict(f)) + geom_point(aes(x=x, y=y), data=data.frame(x,y)) Function(f) ## if have latex installed can also use latex(f) ## Function re-expresses the restricted cubic spline in simplest form ## The first derivative is: ## function(x) 0.174 - 3 * 0.00279 * pmax(x - 3, 0) ^ 2 + 3 * 0.0015 * pmax(x - 5, 0) ^ 2 + ... rms/inst/tests/lrm.s0000644000176200001440000000422113071155771014156 0ustar liggesusersrequire(rms) n <- 400000 x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) x4 <- runif(n) x5 <- runif(n) x6 <- runif(n) x7 <- runif(n) x8 <- runif(n) x9 <- runif(n) x10 <- runif(n) X <- cbind(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10) L <- x1 + x2 + x3 - 1.5 y <- ifelse(runif(n) <= plogis(L), 1, 0) fm <- y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 system.time(f <- glm(fm, family=binomial)) print(summary(f), digits=7) system.time(g <- lrm(fm)) system.time(lrm.fit(X, y)) print(g, digits=7) coef(f) - coef(g) sqrt(diag(vcov(f)))/sqrt(diag(vcov(g))) system.time(h <- orm(fm)) system.time(i <- orm.fit(X, y)) Rprof('orm.fit.out') of <- orm.fit(X, y) Rprof(NULL) system('R CMD Rprof orm.fit.out') require(MASS) n <- 300 y <- factor(sample(0:4, n, TRUE)) x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) system.time(f <- polr(y ~ x1 + x2 + x3)) print(summary(f, digits=7)) system.time(g <- lrm(y ~ x1 + x2 + x3)) print(g, digits=7) c(-f$zeta, f$coefficients) - coef(g) print( (diag(vcov(f))[c(4:7, 1:3)])/diag(vcov(g)), digits=10) w <- function(m) { x <- runif(200) if(m > 0) x[1:m] <- NA x } set.seed(1) y <- sample(0:1, 200, TRUE) x1 <- w(50) x2 <- w(1) x3 <- w(2) x4 <- w(0) x5 <- w(10) x6 <- w(11) x7 <- w(13) x8 <- w(8) x9 <- w(7) x10 <- w(6) x11 <- w(5) x12 <- w(4) x13 <- w(3) x14 <- w(7) x15 <- w(18) x16 <- w(19) x17 <- w(21) x18 <- w(23) x19 <- w(25) x20 <- w(27) f <- lrm(y ~ x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+x16+x17+x18+x19+x20) sink('/tmp/t.tex') cat('\\documentclass{report}\\usepackage{color,epic,longtable}\\begin{document}', sep='\n') print(f, latex=TRUE) cat('\\end{document}\n') sink() # From Ferenci Tamas set.seed(1) d <- data.frame( y = runif( 1000 ) > 0.5, x = rnorm( 1000 ), w = sample( 1:100, 1000, replace = TRUE ) ) wt <- d$w g <- d[ rep(1 : nrow(d), wt), ] wtd.mean(d$y, wt); mean(g$y) wtd.var(d$y, wt); var(g$y) wtd.mean(d$x, wt); mean(g$x) wtd.var(d$x, wt); var(g$x) # The 2 models will disagree if allow to use different knots k <- quantile(d$x, c(5,20,50,80,95) / 100) a <- lrm( y ~ rcs( x, k ), data = d, weights = w) b <- lrm( y ~ rcs( x, k ), data = g ) # xless(a); xless(b) rms/inst/tests/calibrate.cph.r0000644000176200001440000000063613703162522016062 0ustar liggesusers## Eduwin Pakpahan if(require(haven)) { require(rms) d <- read_dta("pakpahan.dta") fit <- cph(Surv(data_dftime, data_demfu) ~ data_age, method="breslow", data=d, surv=TRUE, x=T, y=T, time.inc=1200) print(fit) cal <- calibrate(fit, u=1200, B=120) plot(cal, subtitles=FALSE) cal_KM <- calibrate(fit, cmethod='KM', u=1200, m=10, B=40) plot(cal_KM, add=TRUE) } rms/inst/tests/nomogram2.r0000644000176200001440000000273513526570553015300 0ustar liggesusersrequire(rms) create <- FALSE if(create) { d <- csv.get("hongLiData.csv", lowernames=TRUE, charfactor=TRUE) d <- upData(d, labels=c(surg.discharge.days="Discharge Days", primarysite="Primary Site", facilitytype="Facility Type", insurance1="Insurance", race1="Race", region="Region", samefacility="Same Facility", port.nccn.fail="outcome")) d <- d[Cs(port.nccn.fail, surg.discharge.days, primarysite, facilitytype, insurance1, race1, samefacility, region)] save(d, file='nomogram2.rda', compress=TRUE) } else load('nomogram2.rda') ddist <- datadist(d); options(datadist='ddist') f <- lrm(port.nccn.fail ~ surg.discharge.days + primarysite + facilitytype + insurance1 + race1 + samefacility + region, data=d) for(abbrev in c(FALSE, TRUE)) { n <- nomogram(f, lp.at=seq(-2, 5, by=0.5), fun=plogis, fun.at=c(seq(.1, .9, by=.1), .95, .99, .999), funlabel="Risk of Delayed PORT Initiation", abbrev=abbrev, minlength=1, lp=FALSE) if(! abbrev) n1 <- n else n2 <- n } plot(n1) plot(n2) attr(n2, 'info')$Abbrev # Hong Li # The variable samefacility has two categories and region has 4 categories. But in the nomogram, the variable samefacility and region are switched, i.e. samefacility has 4 categories and region has 2 categories. All other variables are correct. rms/inst/tests/cph3.r0000644000176200001440000000106712434371456014227 0ustar liggesusers## Explore width of confidence intervals, checking that it is zero ## at the median of the single predictor require(rms) set.seed(1) a <- 1 : 100 dd <- datadist(a); options(datadist='dd') S <- Surv(a + 100 * runif(100)) f <- cph(S ~ a) ab <- list(v = median(a)) plot(Predict(f), abline=ab) f <- cph(S ~ pol(a, 2)) plot(Predict(f), abline=ab) plot(Predict(f, ref.zero=TRUE), abline=ab) b <- sample(1:100) + 100 dd <- datadist(a, b) f <- cph(S ~ pol(a, 2) + b) plot(Predict(f), abline=ab) plot(Predict(f, ref.zero=TRUE), abline=list(v=c(median(a),median(b)))) rms/inst/tests/orm.s0000644000176200001440000001551713760503057014172 0ustar liggesusersrequire(rms); require(MASS) set.seed(1) n <- 100 y <- sample(1:8, n, TRUE) #y <- runif(n) x1 <- sample(c(-1,0,1), n, TRUE) x2 <- sample(c(-1,0,1), n, TRUE) system.time(f <- lrm(y ~ x1 + x2, eps=1e-5, trace=TRUE)) fkeep <- f # xless(solve(vcov(f))) predict(f, data.frame(x1=0,x2=0), type='fitted.ind') h <- polr(as.factor(y) ~ x1 + x2, Hess=TRUE) v <- solve(vcov(h)) s <- c(3:ncol(v),1:2) v[s,s] v[s,s] / solve(vcov(f)) g <- orm(y ~ x1 + x2, eps=1e-5, trace=TRUE) system.time(g <- orm(y ~ x1 + x2, eps=.001)) #, trace=TRUE) coef(g) - coef(f) w <- vcov(g, intercepts='all') / vcov(f) - 1 max(abs(w)) m <- Mean(g) formals(m) <- list(lp=NULL, intercepts=runif(30000), values=runif(30001), conf.int=0, interceptRef=3, cumprob=function(x) 1 / (1 + exp(-x))) system.time(m(1)) system.time(m(1:100)) system.time(m(1:1000)) set.seed(1) n <- 1000 x1 <- c(rep(0,500), rep(1,500)) y <- rnorm(n) + 3*x1 g <- orm(y ~ x1) g k <- coef(g) plot(1:999, k[1:999]) h <- orm(y ~ x1, family='probit') plot(coef(g)[1:999], coef(h)[1:999]) tapply(y, x1, mean) m <- Mean(g) m(w <- k[g$interceptRef] + k['x1']*c(0,1)) #mf <- Mean(f) #k <- coef(f) #mf(k[1] + k['x1']*c(0,1)) mh <- Mean(h) wh <- coef(h)[h$interceptRef] + coef(h)['x1']*c(0,1) mh(wh) qu <- Quantile(g) qu(.1, w) qu(.5, w) qu(.9, w) tapply(y, x1, quantile, probs=c(.1,.5,.9)) set.seed(1) n <- 1000 x1 <- c(rep(0,500), rep(1,500)) y <- exp(rnorm(n) + 3*x1) g <- orm(y ~ x1) g k <- coef(g) plot(1:999, k[1:999]) m <- Mean(g) m(w <- k[1] + k['x1']*c(0,1)) m(w <- k[g$interceptRef] + k['x1']*c(0,1)) tapply(y, x1, mean) qu <- Quantile(g) qu(.1, w) tapply(y, x1, quantile, probs=.1) qu(.5, w) tapply(y, x1, quantile, probs=.5) qu(.9, w) tapply(y, x1, quantile, probs=.9) ## Check quantile calculations qu <- Quantile(g) ## .9 = Prob(Y >= 2) .8 = Prob(Y >= 3) etc. ## Prob Y <= j, j = 1, ... 10 = .1, .2, ..., 1 ## .1 quantile = 1, .2 quantile = 2, ..., .9 quantile = 9 formals(qu) <- list(q=.5, lp=0, intercepts=qlogis(seq(.9,.1,by=-.1)), values=1:10, interceptRef=1, cumprob=plogis, inverse=qlogis, conf.int=0, method='interpolated') for(a in c(.01, seq(0, 1, by=.05), .99)) cat(a, qu(a, qlogis(.9)), '\n') set.seed(3) n <- 300 x1 <- runif(n) ddist <- datadist(x1); options(datadist='ddist') y <- x1 + runif(n) x1[1:35] <- NA dat <- data.frame(x1, y) f <- orm(y ~ x1, x=TRUE, y=TRUE) f g <- bootcov(f, B=50) x1s <- seq(0, 1, by=.1) pg <- Predict(g, x1=x1s, boot.type='basic') cof <- c(coef(f)[f$interceptRef], coef(f)[length(coef(f))]) cof apply(g$boot.Coef, 2, mean) sqrt(var(g$boot.Coef[,2])) a <- aregImpute(~ x1 + y, data=dat) h <- fit.mult.impute(y ~ x1, orm, a, data=dat) pmi <- Predict(h, x1=x1s) plot(Predict(f, x1=x1s), addpanel=function(...) { with(pg, {llines(x1, lower, col='red') llines(x1, upper, col='red') lpoints(x1, yhat, col='red') llines(x1s, cof[1] + cof[2]*x1s, col='green') }) with(pmi, {lpoints(x1, lower, col='black') lpoints(x1, upper, col='black') lpoints(x1, yhat, col='black')}) }) require(rms) getHdata(nhgh) w <- subset(nhgh, age >= 21 & dx==0 & tx==0, select=-c(dx,tx)) dd <- datadist(w); options(datadist='dd') set.seed(1) w$ghe <- w$gh + round(runif(length(w$gh), -.5, .5), 2) Ecdf(~ gh, groups=is.na(sub), data=w) Ecdf(~ age, groups=is.na(sub), data=w) with(w, table(is.na(sub), gh)) length(unique(w$gh)) with(w, tapply(gh, is.na(sub), function(x) length(unique(x)))) w2 <- subset(w, !is.na(sub)) wdata <- 2 ## If substitute ghr for gh, boot problem goes away for w2 g <- orm(gh ~ age, family='loglog', data=if(wdata == 1) w else w2, x=TRUE, y=TRUE) set.seed(2) gb <- bootcov(g, B=100, pr=TRUE) ages <- seq(25, 80, by=5) bootcl <- Predict(gb, age=ages, boot.type=c('percentile','basic')[2]) bootclcov <- Predict(gb, age=ages, usebootcoef=FALSE) X <- predict(gb, newdata=bootcl, type='x') br <- gb$boot.Coef[,1] + X %*% t(gb$boot.Coef[,-1]) if(wdata == 1) br1 <- br else br2 <- br z <- quantile(br[1,], c(.025,.975)) plot(Predict(g, age=ages), ylim=c(-1.5,1.5), addpanel=function(...) { lpoints(23, z, col='red') for(j in 1:12) lpoints(ages[j], br[j,], col=gray(.9)) with(bootcl, {llines(age, lower, col='blue') llines(age, upper, col='blue') lpoints(age, yhat, col='blue')}) with(bootclcov, {llines(age, lower, col='red') llines(age, upper, col='red')}) }) # For age of 70 "manually" find predicted median # was predict(f, ...) why? p <- predict(g, newdata=data.frame(age=70), type='fitted.ind') cumsum(p) median(rep(g$yunique, round(1000000*p))) # 5.6 xb <- Function(g)(age=70) intercepts <- coef(f)[1 : num.intercepts(f)] # Compute Prob(Y <= y) from Prob(Y >= y) by shifting one level # Prob(Y > y) = Prob(Y >= y + epsilon) cumprob <- f$trans$cumprob # xless(cumprob(intercepts + xb)) names(intercepts) <- Lag(names(intercepts)) names(intercepts) <- gsub('>=', '>', names(intercepts)) intercepts probYley <- 1 - cumprob(intercepts + xb) names(probYley) <- gsub('>', '<=', names(probYley)) probYley # 5.6 gives prob Y <= 5.6 = .50899. Interpolated median 5.59 # pgty <- f$trans$cumprob(intercepts + xb) # Prob(Y <= y) = Prob(Y < y + epsilon) = 1 - Prob(Y >= y + epsilon) pleq <- cumprob(coef(f)[1:num.intercepts(f)] + xb) lp <- coef(f)[f$interceptRef] + xb ## Look at bootstrap variation in median gh for both subsets B <- 2000; meds1 <- meds2 <- numeric(B) y1 <- w$gh; y2 <- w2$gh n1 <- nrow(w); n2 <- nrow(w2) pb <- setPb(B, every=50) for(i in 1:B) { pb(i) s <- sample(1:n1, n1, replace=TRUE) meds1[i] <- median(y1[s]) s <- sample(1:n2, n2, replace=TRUE) meds2[i] <- median(y2[s]) } table(meds1); table(meds2) # See how to check intercepts against linear model assumptions require(rms) set.seed(1) n <- 1000 x1 <- runif(n) y <- 30 + x1 + rnorm(n) f <- orm(y ~ x1, family=probit) y2 <- y + 20 f2 <- orm(y2 ~ x1, family=probit) plot(coef(f), coef(f2)) # unaffected by shift g <- ols(y ~ x1) yu <- f$yunique[-1] ns <- num.intercepts(f) s <- g$stats['Sigma'] alphas <- coef(f)[1:ns] plot(-yu/s, alphas, type='l', xlab=expression(-y/s), ylab=expression(alpha[y])) co <- coef(lm.fit(cbind(1, -yu/s), alphas)) text(-32, 2, paste('Slope:', round(co[2], 4))) abline(a=co[1], b=co[2], col='gray70') ## Compare coefficients with those from partial likelihood (Cox model) orm(y ~ pol(x1,2), family=loglog) cph(Surv(y) ~ pol(x1,2)) ## Simulate from a linear model with normal residuals and compute ## quantiles for one x value, two ways set.seed(7) n <- 10000 x <- rnorm(n) y <- round(x + rnorm(n), 2) f <- ols(y ~ x) k <- coef(f) s <- f$stats['Sigma'] print(c(k, s)) k[1] + qnorm((1:3)/4) * s g <- orm(y ~ x, family='probit') quant <- Quantile(g) lp <- predict(g, data.frame(x=0)) for(qu in (1:3)/4) print(quant(qu, lp)) rms/inst/tests/ggplot2b.r0000644000176200001440000000077612657457714015131 0ustar liggesusers## From John Woodill: https://github.com/harrelfe/rms/issues/19 require(rms) dd = data.frame(x1 = 2 + (runif(200) * 6), x12 = 100 + (runif(200) * 6)) dd$y1 = rep(c(1.2, 1.4), each = 100) * dd$x1 + (runif(200) / 5) ddist <- datadist(dd) options("datadist" = "ddist") g <- ols(y1 ~ x1 + x12, data = dd, x = TRUE, y = TRUE) a <- Predict(g) h <- ols(y1 ~ I(x1^2) + I(x12^2), data = dd, x = TRUE, y = TRUE) b <- Predict(h) p <- rbind(a,b) s <- ggplot(p, group = ".set.", ggexpr=TRUE) s ggplot(p, group=".set.") rms/inst/tests/ggplot3.r0000644000176200001440000000055412705010667014744 0ustar liggesusers# Test anova= in ggplot require(rms) set.seed(1) x1 <- runif(100) x2 <- runif(100) x3 <- sample(c('a','b'), 100, TRUE) x4 <- sample(c('k','l','m'), 100, TRUE) y <- runif(100) dd <- datadist(x1, x2, x3, x4); options(datadist='dd') f <- ols(y ~ x1 + x2 + x3 + x4) a <- anova(f) ggplot(Predict(f), anova=a) # ok ggplot(Predict(f), anova=a, sepdiscrete='vertical') rms/inst/tests/validate.rpart.r0000644000176200001440000000133012713357221016275 0ustar liggesusersrequire(rms) require(rpart) n <- 100 set.seed(1) x1 <- runif(n) x2 <- runif(n) x3 <- runif(n) y <- 1*(x1+x2+rnorm(n) > 1) table(y) f <- rpart(y ~ x1 + x2 + x3, model=TRUE) v <- validate(f) v # note the poor validation par(mfrow=c(1,2)) plot(v, legendloc=c(.2,.5)) par(mfrow=c(1,1)) ## From http://stackoverflow.com/questions/37053654 Rohan Adur set.seed(4) dat = data.frame(X1 = sample(x = c(1,2,3,4,5), size = 100, replace=TRUE)) dat$t = rexp(100, rate=dat$X1) dat$t = dat$t / max(dat$t) dat$e = rbinom(n = 100, size = 1, prob = 1-dat$t ) f = rpart(Surv(t, event = e) ~ X1 , data = dat, model=TRUE, control=rpart.control(minsplit=30, cp=0.01)) plot(f); text(f) v <- validate(f) v plot(v, legendloc=c(.6,.2)) rms/inst/tests/orm3.r0000644000176200001440000002263312316040257014243 0ustar liggesusers# From Ahmed Hassan data2= read.csv('data/15.csv',header=T) require(rms) log.n <- c(0L, 1L, 2L, 0L, 0L, 3L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 12L, 4L, 10L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 28L, 0L, 0L, 0L, 0L, 3L, 0L, 2L, 0L, 0L, 0L, 1L, 0L, 7L, 0L, 0L, 0L, 1L, 8L, 4L, 0L, 15L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 9L, 0L, 0L, 1L, 0L, 0L, 10L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 20L, 0L, 33L, 0L, 0L, 1L, 0L, 0L, 37L, 6L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 13L, 6L, 0L, 0L, 0L, 0L, 139L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 50L, 0L, 9L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 31L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 9L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 1L, 10L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 8L, 88L, 5L, 0L, 0L, 1L, 92L, 0L, 0L, 0L, 0L, 58L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 8L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 20L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 0L, 0L, 65L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 18L, 0L, 0L, 1L, 0L, 18L, 2L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 7L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 9L, 0L, 0L, 0L, 0L, 7L, 0L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 2L, 1L, 4L, 0L, 0L, 2L, 0L, 0L, 0L, 13L, 18L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 3L, 0L, 0L, 13L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 42L, 0L, 0L, 5L, 0L, 16L, 0L, 0L, 24L, 0L, 11L, 0L, 0L, 0L, 0L, 15L, 0L) bug <- c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 3L, 0L, 6L, 0L, 2L, 2L, 0L, 15L, 0L, 0L, 1L, 10L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 8L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 2L, 5L, 0L, 1L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 1L, 0L, 0L, 31L, 0L, 0L, 0L, 0L, 1L, 8L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 10L, 5L, 5L, 0L, 0L, 1L, 22L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 11L, 0L, 2L, 0L, 2L, 0L, 4L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 5L, 6L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 36L, 2L, 0L, 0L, 0L, 22L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 13L, 0L, 0L, 0L, 0L, 3L, 0L, 3L, 1L, 1L, 1L, 0L, 2L, 0L, 10L, 4L, 0L, 0L, 3L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 19L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 9L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 18L, 0L, 7L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 10L, 0L, 0L, 0L, 0L, 1L, 0L, 9L, 2L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 3L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 9L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 15L, 0L, 0L, 5L, 0L, 3L, 5L, 0L, 0L, 2L, 14L, 0L) pre <- c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 5L, 0L, 2L, 0L, 3L, 0L, 0L, 5L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 4L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 5L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 4L, 0L, 0L, 0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 9L, 2L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 14L, 0L, 0L, 0L, 0L, 9L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 3L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 12L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 3L, 0L, 3L, 1L, 0L, 0L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 1L, 7L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 0L, 0L, 3L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 0L) loc <- c(18L, 51L, 219L, 23L, 12L, 49L, 474L, 164L, 11L, 106L, 73L, 45L, 79L, 8L, 32L, 40L, 571L, 186L, 780L, 5L, 231L, 148L, 18L, 592L, 49L, 59L, 154L, 903L, 3L, 72L, 246L, 79L, 245L, 26L, 458L, 42L, 88L, 132L, 179L, 118L, 2444L, 38L, 20L, 153L, 41L, 227L, 224L, 56L, 622L, 308L, 27L, 142L, 91L, 60L, 31L, 73L, 72L, 95L, 222L, 219L, 2L, 681L, 68L, 13L, 31L, 1L, 227L, 188L, 13L, 79L, 14L, 79L, 11L, 3L, 135L, 183L, 69L, 132L, 956L, 15L, 1529L, 82L, 249L, 68L, 176L, 146L, 898L, 667L, 90L, 26L, 27L, 84L, 68L, 14L, 158L, 383L, 852L, 42L, 162L, 11L, 49L, 1638L, 14L, 56L, 11L, 47L, 19L, 48L, 83L, 33L, 23L, 31L, 2434L, 23L, 101L, 38L, 6L, 78L, 128L, 104L, 5L, 368L, 34L, 24L, 128L, 6L, 57L, 51L, 212L, 317L, 52L, 82L, 70L, 19L, 118L, 86L, 35L, 41L, 15L, 76L, 1300L, 531L, 9L, 107L, 88L, 106L, 7L, 32L, 17L, 223L, 148L, 65L, 3L, 47L, 21L, 61L, 38L, 122L, 17L, 27L, 5L, 25L, 32L, 110L, 92L, 19L, 92L, 11L, 10L, 84L, 35L, 70L, 96L, 3493L, 290L, 32L, 12L, 173L, 1789L, 8L, 20L, 71L, 335L, 430L, 38L, 122L, 387L, 56L, 3L, 27L, 55L, 214L, 22L, 89L, 351L, 156L, 78L, 35L, 356L, 104L, 549L, 508L, 21L, 131L, 340L, 139L, 64L, 154L, 24L, 294L, 99L, 13L, 2290L, 140L, 140L, 6L, 32L, 30L, 222L, 35L, 96L, 227L, 81L, 128L, 290L, 3L, 68L, 490L, 872L, 117L, 12L, 229L, 23L, 702L, 62L, 125L, 199L, 368L, 83L, 91L, 94L, 39L, 45L, 21L, 31L, 347L, 3L, 83L, 29L, 312L, 346L, 471L, 30L, 202L, 69L, 48L, 30L, 318L, 23L, 74L, 94L, 123L, 19L, 970L, 87L, 82L, 454L, 41L, 334L, 6L, 99L, 557L, 3L, 25L, 20L, 52L, 395L, 119L, 14L, 15L, 408L, 10L, 366L, 120L, 74L, 195L, 68L, 1046L, 48L, 40L, 127L, 89L, 50L, 223L, 1132L, 80L, 189L, 7L, 105L, 5L, 10L, 263L, 192L, 714L, 35L, 24L, 96L, 48L, 101L, 374L, 9L, 125L, 166L, 47L, 284L, 75L, 289L, 218L, 202L, 178L, 4L, 221L, 88L, 10L, 713L, 82L, 333L, 30L, 288L, 55L, 50L, 23L, 225L, 97L, 5L, 27L, 63L, 363L, 556L, 3L, 190L, 489L, 378L, 78L, 61L, 60L, 28L, 155L, 253L, 72L, 39L, 23L, 49L, 224L, 6L, 20L, 151L, 65L, 906L, 16L, 81L, 337L, 65L, 759L, 403L, 29L, 152L, 84L, 785L, 23L) logadd <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.083333336, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.26666668, 0, 0, 0, 0, 0, 0.875, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.083333336, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.27586207, 0, 0, 0, 0, 1.117647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, 0, 0, 0, 0, 0, 0, 0, 0) cochange <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.083333336, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.125, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.083333336, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.3, 0.25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.03448276, 0, 0, 0, 0, 0.11764706, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) table(pre) # x <- rcs(pre,3) # will not work x <- rcs(pre,4) f = orm( bug ~ rcs(log(1+log.n), 3)) f$stats['Model L.R.'] f = orm( bug ~ rcs(log.n,3)) f$stats['Model L.R.'] f1a <- lrm(bug ~ rcs(loc,3) + logadd + cochange, eps=.001, maxit=20) f1b = orm( bug ~ rcs(loc,3)+logadd+cochange, eps=.001, maxit=20) f3 = orm( bug ~ rcs(loc,4)+logadd+cochange, maxit=15) f4 = orm( bug ~ rcs(loc,3)+logadd) f5 = orm( bug ~ rcs(loc,3)+cochange) f6 = orm( bug ~ pol(loc,2)+logadd+cochange) f7 = orm( bug ~ pol(pre,2)+logadd+cochange, maxit=15) rms/inst/tests/Rq2.s0000644000176200001440000000424212700015424014017 0ustar liggesusers## Check ability of quantile regression to estimate stratified medians require(quantreg) sm <- function(n, eps=1e-6) { y <- exp(rnorm(n)) x <- c(rep(0, n/2), rep(1, n/2)) y[x==1] <- y[x==1] * 1.4 qrmed <- matrix(NA, nrow=2, ncol=2, dimnames=list(c('br','fn'),c('x=0','x=1'))) for(m in c('br','fn')) { f <- if(m == 'br') rq(y ~ x, method=m) else rq(y ~ x, method=m, eps=eps) qrmed[m,] <- c(coef(f)[1], sum(coef(f))) } sampmed <- tapply(y, x, median) print(rbind(qrmed,sample=sampmed)) } for(i in 1:10) sm(100) for(i in 1:10) sm(1000) ## Compare standard err. of mean | x=0 with standard err. from quantile ## regression using 4 methods cse <- function(n) { y <- rnorm(n) x <- c(rep(0, n/2), rep(1, n/2)) sem <- sd(y[x==0])/sqrt(n/2) semr <- sem*sqrt(pi/2) res <- vector('numeric', 6) names(res) <- c('SEMean','Asympt SEMedian','iid','nid','ker','boot') res[1:2] <- c(sem, semr) f <- rq(y ~ x) for(m in c('iid', 'nid', 'ker', 'boot')) { # nid is default s <- coef(summary(f, se=m))['(Intercept)','Std. Error'] res[m] <- s } print(t(t(round(res,3)))) } for(i in 1:10) cse(100) for(i in 1:10) cse(5000) # nid does appear to work best ## Compare mean squared err. of quantile estimator of median y | x=E ## in 5-sample problem with orm logistic family estimator. Also include sample quantile cmse <- function(n) { # n = # obs per each of 5 samples x <- factor(rep(c('a','b','c','d','e'), n)) y <- rnorm(5*n) s <- x == 'e' y[s] <- y[s] + 3 sampmed <- median(y[s]) f <- rq(y ~ x) qrmed <- coef(f)[1] + coef(f)['xe'] f <- orm(y ~ x, family=probit) if(f$fail) return(c(NA, NA, NA)) qu <- Quantile(f) iref <- f$interceptRef ormmed <- qu(.5, z <- coef(f)[iref] + coef(f)['x=e']) ormmean <- Mean(f)(z) c(sampmed=sampmed, qrmed=qrmed, ormmed=ormmed, ormmean=ormmean) } require(rms) mse <- c(0, 0, 0, 0) n <- 50 B <- 1000 m <- 0 for(i in 1:B) { cat(i, '\r') ms <- cmse(n) if(!is.na(ms[1])) { m <- m + 1 mse <- mse + (ms - 3) ^ 2 } } m sqrt(mse/m) # .123 .124 .126 logistic n=100 # .173 .176 .172 probit n=50 # .169 .171 .165 .139 probit n=50 .139=rmse for mean from orm rms/inst/tests/plot.Predict.s0000644000176200001440000000244113163756363015743 0ustar liggesusers# From question by Mike Babyak, Duke U require(rms) n = 30 group = factor(sample(c('a','b','c'), n, TRUE)) x1 = runif(n) dat = data.frame(group, x1, y = as.numeric(group) + 0.2*x1 + rnorm(n) ) d <- datadist(dat) ; options(datadist="d") f <- ols(y ~ x1 + group, data=dat) p <- Predict(f, group) plot(p, ~group, nlines=TRUE, type='p', ylab='fitted Y', xlab='Treatment', pch=4, lwd=3) p <- Predict(f, x1=seq(0,1,by=.1), group) plot(p, ~ x1, groups='group', col=3:1) ## From Ferenci Tamas set.seed( 1 ) d <- data.frame(x1 = rnorm( 1000 ), x2 = sample( 1:2, 1000, replace = TRUE ), x3 = sample( 1:2, 1000, replace = TRUE ) ) d <- transform( d, y = x3*( x1 + x2 )+rnorm( 1000 ) ) dd <- datadist( d ) options( datadist = "dd" ) fit <- ols( y ~ x3*( x1 + x2 ), data = d ) p1 <- Predict( fit, x1, x3 ) p2 <- Predict( fit, x2, x3 ) p <- rbind(x1=p1, x2=p2) plot(p, groups='x3', varypred=TRUE) #Now, if you run plot( p1 ) or plot( p2 ), everything is fine. However, in #the last call above the panel for the continuous predictor, x1 is #fine, the same as plot( p1 ), but for the categorical predictor, it is #something completely different (and wrong, quite fundamentally: the #two groups do not even appear). rms/inst/tests/lrmMean.s0000644000176200001440000000044013472345365014763 0ustar liggesusersrequire(rms) set.seed(6) # was 3 n <- 85+15 y <- sample(1:10, n, TRUE) x1 <- runif(n) f <- lrm(y ~ x1, x=TRUE, y=TRUE) g <- bootcov(f, B=500, coef.reps=TRUE) m <- Mean(f) Predict(f, x1=c(.25, .75), fun=m) Predict(g, x1=c(.25, .75), fun='mean') h <- ols(y ~ x1) Predict(h, x1=c(.25, .75)) rms/inst/tests/orm-bootcov.r0000644000176200001440000000263413662102673015636 0ustar liggesusersrequire(rms) set.seed(1) n <- 100 y <- sample(1 : 10, n, TRUE) x1 <- runif(n) x2 <- runif(n) f <- lrm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=.001) g <- orm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=.001) set.seed(1) fb <- bootcov(f, B=400, eps=.001) k <- c('y>=6', 'x1', 'x2') set.seed(1) gb <- bootcov(g, B=400, eps=.001) list(rownames(f$var), rownames(g$var), rownames(fb$var), rownames(gb$var)) attributes(gb$var) fb$var[k, k] gb$var[k, k] vcov(fb)[k, k] vcov(gb) vcov(gb, intercepts='mid') anova(fb) anova(gb) # Still need to understand how bootcov works differently for orm r <- resid(f, 'score') - resid(g, 'score') apply(r, 2, function(x) max(abs(x))) fr <- robcov(f) gr <- robcov(g) g <- orm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=.001, family='loglog') gr <- robcov(g) gb <- bootcov(g, B=200) # Compare against Yuqi Tian's function source('robcov_Yuqi.r') vh <- func_robcov(g, cluster=1:100) vg <- vcov(gr, intercepts='all') cv <- function(v1, v2) { se1 <- sqrt(diag(v1)) se2 <- sqrt(diag(v2)) prn(round(se1 / se2, 3)) prn(max(abs(v1 - v2))) } cv(vg, vh) cs <- function(fit) { sc1 <- resid(fit, 'score') sc2 <- func_score(fit) max(abs(sc1 - sc2)) } g <- orm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=.001) cs(g) g <- orm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=.001, family='probit') cs(g) g <- orm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=.001, family='loglog') cs(g) g <- orm(y ~ x1 + x2, x=TRUE, y=TRUE, eps=0.001, family='cauchit') cs(g) rms/inst/tests/robcov_Yuqi.r0000644000176200001440000000705613662026530015671 0ustar liggesusers### The idea is to get score residuals from the score vector ### # a function to get score residiuals from the fitted model # (the code are taking directly from `orm.fit`) func_score <- function(fit){ # coefficients coef <- fit$coefficients # convert y to ordered category y <- match(fit$y, fit$yunique) # x x <- fit$x # some useful numbers kint <- length(fit$yunique) - 1L nx <- dim(x)[2L] n <- length(y) p <- as.integer(kint + nx) # store results - matrix u <- matrix(0, nrow = n, ncol=p) # functions f <- fit$trans$cumprob fp <- fit$trans$deriv xb <- fit$x %*% coef[-(1L : kint)] ints <- c(1e100, coef[1:kint], -1e100) xby <- xb + ints[y]; xby1 <- xb + ints[y + 1L] fa <- f(xby) fb <- f(xby1) P <- fa - fb fpa <- fp(xby, fa) fpb <- fp(xby1, fb) # score for alpha for(m in 1:kint){ for(j in 1:n){ u[j, m] <- (fpa[j]*(y[j]-1==m) - fpb[j]*(y[j]==m)) / P[j] } } # score for beta for(m in (kint+1):p){ for(j in 1:n){ u[j, m] <- (fpa[j] - fpb[j]) * x[j,m-kint] / P[j] } } return(u) } func_robcov <- function(fit, cluster){ var <- vcov(fit, intercepts='all') vname <- dimnames(var)[[1]] # X <- as.matrix(residuals(fit, type="score")) X <- func_score(fit) # get score residuals n <- nrow(X) cluster <- as.factor(cluster) p <- ncol(var) j <- is.na(X %*% rep(1, ncol(X))) if(any(j)) { X <- X[! j,, drop=FALSE] cluster <- cluster[! j, drop=TRUE] n <- length(cluster) } j <- order(cluster) X <- X[j, , drop=FALSE] clus.size <- table(cluster) # if(length(clusterInfo)) clusterInfo$n <- length(clus.size) clus.start <- c(1, 1 + cumsum(clus.size)) nc <- length(levels(cluster)) clus.start <- clus.start[- (nc + 1)] storage.mode(clus.start) <- "integer" # dyn.load("robcovf.so") W <- matrix(.Fortran("robcovf", n, p, nc, clus.start, clus.size, X, double(p), double(p * p), w=double(p * p))$w, nrow=p) ##The following has a small bug but comes close to reproducing what robcovf does # W <- tapply(X,list(cluster[row(X)],col(X)),sum) # W <- t(W) %*% W #The following logic will also do it, also at great cost in CPU time # W <- matrix(0, p, p) # for(j in levels(cluster)){ # s <- cluster==j # if(sum(s)==1){ # sx <- X[s,,drop=F] # }else {sx <- apply(X[s,,drop=F], 2, sum); dim(sx) <- c(1,p)} # W <- W + t(sx) %*% sx # } adjvar <- var %*% W %*% var return(adjvar) } ### test ### if(FALSE) { # generate longitudinal data library(mvtnorm) library(rms) data_gen <- function(n=100, m=10, b=beta, a0=0.7){ # correlation matrix - exchangeable structure G <- matrix(rep(a0, m*m), nrow=m) diag(G) <- 1 stdevs <- rep(1,m) e <- rmvnorm(n, mean = rep(0,m), sigma = G * matrix(outer(stdevs, stdevs), nrow=m, byrow=TRUE)) # x1: gender (0: female, 1: male) x1 <- rep(c(rep(0,round(n/2)), rep(1,n-round(n/2))), m) # x2: time x2 <- rep(c(1:m), each=n) y <- b[1] + b[2] * x1 + b[3] * x2 + e dat <- data.frame(y=c(y), x1=c(x1), x2=c(x2), id=rep(1:n, m)) return(dat) } # data dat <- data_gen(n=50, m=10, b=beta, a0=0.7) # model mod_orm <- orm(y ~ x1 + x2, data = dat, x=T, y=T) # robcov rob.cov <- robcov(fit = mod_orm, cluster = dat$id) rob.cov } rms/inst/tests/orm5.r0000644000176200001440000000113713271164423014244 0ustar liggesusers# From Tamas Ferenci require(rms) set.seed(1) n <- 100 SimData <- data.frame( x1 = sample(c(-1,0,1), n, TRUE), x2 = sample(c(-1,0,1), n, TRUE), exposure = rlnorm(100) ) SimData$y <- round(runif(n), 2) dd <- datadist(SimData) options(datadist="dd") f <- lrm(y ~ x1 + x2 + offset(log(exposure)), data=SimData, eps=.0001) d <- orm(y ~ x1 + x2 + offset(log(exposure)), data=SimData, eps=.0001) max(abs(coef(f) - coef(d))) h <- orm(y ~ x1 + x2, family='cloglog', data=SimData) k <- orm(y ~ x1 + x2 + offset( log( exposure ) ), family='cloglog', data=SimData) coef(h) - coef(k)