prodlim/0000755000175100001440000000000013057255665011754 5ustar hornikusersprodlim/tests/0000755000175100001440000000000013035633436013106 5ustar hornikusersprodlim/tests/testthat/0000755000175100001440000000000013035633436014746 5ustar hornikusersprodlim/tests/testthat/prodlim.R0000644000175100001440000002755113035633436016551 0ustar hornikuserscontext("Prodlim") library(testthat) test_that("strata",{ ## bug in version 1.5.1 library(prodlim) d <- data.frame(time=1:3,status=c(1,0,1),a=c(1,9,9),b=factor(c(0,1,0))) expect_output(prodlim(Hist(time,status)~b+factor(a),data=d)) } test_that("prodlim",{ library(lava) library(prodlim) library(riskRegression) library(etm) ## library(survival) m <- crModel() addvar(m) <- ~X1+X2+X3+X4+X5+X6 distribution(m,"X3") <- binomial.lvm() distribution(m,"X4") <- normal.lvm(mean=50,sd=10) distribution(m,"eventtime1") <- coxWeibull.lvm(scale=1/200) distribution(m,"censtime") <- coxWeibull.lvm(scale=1/1000) m <- categorical(m,K=4,eventtime1~X5,beta=c(1,0,0,0),p=c(0.1,0.2,0.3)) m <- categorical(m,K=3,eventtime1~X1,beta=c(2,1,0),p=c(0.3,0.2)) regression(m,to="eventtime1",from=c("X2","X4")) <- c(0.3,0) regression(m,to="eventtime2",from=c("X2","X4")) <- c(0.6,-0.07) set.seed(17) d <- sim(m,200) d$X1 <- factor(d$X1,levels=c(0,1,2),labels=c("low survival","medium survival","high survival")) ## d$X3 <- factor(d$X3,levels=c(0,1),labels=c("high survival","low survival")) d$X5 <- factor(d$X5,levels=c("0","1","2","3"),labels=c("one","two","three","four")) d$Event <- factor(d$event,levels=c("0","1","2"),labels=c("0","cause-1","cause-2")) d$status <- 1*(d$event!=0) head(d) s0 <- prodlim(Hist(time,status)~1,data=d) print(s0) summary(s0,intervals=TRUE) stats::predict(s0,times=1:10) ## plot(s0) su <- prodlim(Hist(time,status)~1,data=d,subset=d$X1=="medium survival") print(su) s1 <- prodlim(Hist(time,status)~X1,data=d) print(s1) summary(s1,intervals=TRUE,newdata=data.frame(X1=c("medium survival","high survival","low survival"))) stats::predict(s1,times=0:10,newdata=data.frame(X1=c("medium survival","low survival","high survival"))) ## plot(s1) s2 <- prodlim(Hist(time,status)~X2,data=d) print(s2) summary(s2,intervals=TRUE) stats::predict(s2,times=0:10,newdata=data.frame(X2=quantile(d$X2))) ## plot(s2) s1a <- prodlim(Hist(time,status)~X1+X3,data=d) print(s1a) summary(s1a,intervals=TRUE) stats::predict(s1a,times=0:10,newdata=expand.grid(X1=levels(d$X1),X3=unique(d$X3))) ## plot(s1a,confint=FALSE,atrisk=FALSE,legend.x="bottomleft",legend.cex=0.8) s3 <- prodlim(Hist(time,status)~X1+X2,data=d) print(s3) summary(s3,intervals=TRUE) stats::predict(s3,times=0:10,newdata=expand.grid(X1=levels(d$X1),X2=c(quantile(d$X2,0.05),median(d$X2)))) ## plot(s3,confint=FALSE,atrisk=FALSE,legend.x="bottomleft",legend.cex=0.8,newdata=expand.grid(X1=levels(d$X1),X2=c(quantile(d$X2,0.05),median(d$X2)))) f0 <- prodlim(Hist(time,event)~1,data=d) print(f0) summary(f0,intervals=TRUE) stats::predict(f0,times=1:10) ## plot(f0) f1 <- prodlim(Hist(time,event)~X1,data=d) print(f1) summary(f1,intervals=TRUE,newdata=data.frame(X1=c("medium survival","high survival","low survival"))) stats::predict(f1,times=0:10,newdata=data.frame(X1=c("medium survival","low survival","high survival"))) ## plot(f1) f2 <- prodlim(Hist(time,event)~X2,data=d) print(f2) summary(f2,intervals=TRUE) stats::predict(f2,times=0:10,newdata=data.frame(X2=quantile(d$X2))) ## plot(f2) f1a <- prodlim(Hist(time,event)~X1+X3,data=d) print(f1a) summary(f1a,intervals=TRUE) stats::predict(f1a,times=0:10,newdata=expand.grid(X1=levels(d$X1),X3=unique(d$X3))) ## plot(f1a,confint=FALSE,atrisk=FALSE,legend.x="bottomleft",legend.cex=0.8) f3 <- prodlim(Hist(time,event)~X1+X2,data=d) print(f3) summary(f3,intervals=TRUE) stats::predict(f3,times=0:10,newdata=expand.grid(X1=levels(d$X1),X2=c(quantile(d$X2,0.05),median(d$X2)))) ## plot(f3,confint=FALSE,atrisk=FALSE,legend.x="bottomleft",legend.cex=0.8,newdata=expand.grid(X1=levels(d$X1),X2=c(quantile(d$X2,0.05),median(d$X2)))) data(pbc) prodlim.0 <- prodlim(Hist(time,status!=0)~1,data=pbc) survfit.0 <- survfit(Surv(time,status!=0)~1,data=pbc) ## plot(survfit.0) ## plot(prodlim.0,add=TRUE,col=2,lwd=3) ttt <- sort(unique(d$time)[d$event==1]) ttt <- ttt[-length(ttt)] sum0.s <- summary(survfit.0,times=ttt) ## plot(survfit.0,lwd=6) ## plot(prodlim.0,add=TRUE,col=2) ## There is arounding issue: testdata <- data.frame(time=c(16.107812,3.657545,1.523978),event=c(0,1,1)) sum0 <- summary(survfit(Surv(time,event)~1,data=testdata),times=sort(testdata$time)) testdata$timeR <- round(testdata$time,1) sum1 <- summary(survfit(Surv(timeR,event)~1,data=testdata),times=sort(testdata$time)) sum0 sum1 ## sum0 != sum1 ## summary(survfit.0,times=c(0,0.1,0.2,0.3)) result.survfit <- data.frame(time=sum0.s$time,n.risk=sum0.s$n.risk,n.event=sum0.s$n.event,surv=sum0.s$surv,std.err=sum0.s$std.err,lower=sum0.s$lower,upper=sum0.s$upper) result.prodlim <- data.frame(summary(prodlim.0,times=ttt)$table[,c("time","n.risk","n.event","n.lost","surv","se.surv","lower","upper")]) cbind(result.survfit[,c("time","n.risk","n.event","surv")],result.prodlim[,c("time","n.risk","n.event","surv")]) a <- round(result.survfit$surv,8) b <- round(result.prodlim$surv[!is.na(result.prodlim$se.surv)],8) if (all(a==b)){cat("\nOK\n")}else{cat("\nERROR\n")} if (all(round(result.survfit$std.err,8)==round(result.prodlim$se.surv[!is.na(result.prodlim$se.surv)],8))){cat("\nOK\n")}else{cat("\nERROR\n")} pbc <- pbc[order(pbc$time,-pbc$status),] set.seed(17) boot <- sample(1:NROW(pbc),size=NROW(pbc),replace=TRUE) boot.weights <- table(factor(boot,levels=1:NROW(pbc))) s1 <- prodlim(Hist(time,status>0)~1,data=pbc,caseweights=boot.weights) ## plot(s1,col=1,confint=FALSE,lwd=8) s2 <- prodlim(Hist(time,status>0)~1,data=pbc[sort(boot),]) ## plot(s2,add=TRUE,col=2,confint=FALSE,lwd=3) }) test_that("weigths, subset and smoothing"){ library(prodlim) d <- SimSurv(100) f1 <- prodlim(Hist(time,status)~X2,data=d) f2 <- prodlim(Hist(time,status)~X2,data=d,caseweights=rep(1,100)) expect_equal(f1$surv,f2$surv) d <- SimSurv(100) d <- data.frame(d, group = c(rep(1, 70), rep(0,30))) f1a <- prodlim(Hist(time,status)~X2,data=d, caseweights = rep(1, 100), subset = d$group==1,bandwidth=0.1) f1b <- prodlim(Hist(time,status)~X2,data=d[d$group==1, ], caseweights = rep(1, 100)[d$group==1], bandwidth=0.1) f1a$call <- f1b$call expect_equal(f1a,f1b) f1 <- prodlim(Hist(time,status)~X1,data=d, subset = d$group==1) f2 <- prodlim(Hist(time,status)~X1,data=d,caseweights=d$group) expect_equal(unique(f1$surv),unique(f2$surv)) expect_equal(predict(f1,newdata = d[1, ], times = 5), predict(f2, newdata = d[1, ], times = 5)) } test_that("weights and delay",{ library(survival) library(survey) library(prodlim) library(SmoothHazard) library(etm) pbc <- pbc[order(pbc$time,-pbc$status),] ## pbc$randprob<-fitted(biasmodel) ## pbc$randprob <- as.numeric(pbc$sex=="m")+0.1 set.seed(17) pbc$randprob <- abs(rnorm(NROW(pbc))) dpbc <- svydesign(id=~id, weights=~randprob, strata=NULL, data=pbc) survey.1<-svykm(Surv(time,status>0)~1, design=dpbc) ## plot(survey.1,lwd=8) prodlim.1 <- prodlim(Hist(time,status>0)~1,data=pbc,caseweights=pbc$randprob) ## plot(prodlim.1,add=TRUE,col=2,confint=FALSE) pbc$entry <- round(pbc$time/5) survfit.delay <- survfit(Surv(entry,time,status!=0)~1,data=pbc) prodlim.delay <- prodlim(Hist(time,status!=0,entry=entry)~1,data=pbc) ## plot(survfit.delay,lwd=8) ## plot(prodlim.delay,lwd=4,col=2,add=TRUE,confint=FALSE) pbc0 <- pbc pbc0$entry <- round(pbc0$time/5) survfit.delay.edema <- survfit(Surv(entry,time,status!=0)~edema,data=pbc0) ## survfit.delay.edema.0.5 <- survfit(Surv(entry,time,status!=0)~1,data=pbc0[pbc0$edema==0.5,]) prodlim.delay.edema <- prodlim(Hist(time,status!=0,entry=entry)~edema,data=pbc0) ## prodlim.delay.edema.0.5 <- prodlim(Hist(time,status!=0,entry=entry)~1,data=pbc0[pbc0$edema==0.5,]) ## plot(survfit.delay.edema,conf.int=FALSE,col=1:3,lwd=8) ## plot(prodlim.delay.edema,add=TRUE,confint=FALSE,col=c("gray88","orange",5),lwd=4) data(abortion) cif.ab.etm <- etmCIF(Surv(entry, exit, cause != 0) ~ 1,abortion,etype = cause,failcode = 3) cif.ab.prodlim <- prodlim(Hist(time=exit, event=cause,entry=entry) ~ 1,data=abortion) plot(cif.ab.etm,lwd=8,col=3) plot(cif.ab.prodlim,add=TRUE,lwd=4,col=5,cause=3) data(abortion) x <- prodlim(Hist(time=exit, event=cause,entry=entry) ~ 1,data=abortion) x0 <- etmCIF(Surv(entry, exit, cause != 0) ~ 1,abortion,etype = cause) graphics::par(mfrow=c(2,2)) cif.ab.etm <- etmCIF(Surv(entry, exit, cause != 0) ~ 1,abortion,etype = cause,failcode = 3) cif.ab.prodlim <- prodlim(Hist(time=exit, event=cause,entry=entry) ~ 1,data=abortion) # cause 3 ## plot(cif.ab.etm, ci.type = "bars", pos.ci = 24, col = c(1, 2), lty = 1,which.cif=3,lwd=8) ## plot(cif.ab.prodlim,add=TRUE,cause=3,confint=TRUE,col=2) # cause 2 ## plot(cif.ab.etm, ci.type = "bars", pos.ci = 24, col = c(1, 2), lty = 1,which.cif=2,lwd=8) ## plot(cif.ab.prodlim,add=TRUE,cause=2,confint=TRUE,col=2) # cause 1 ## plot(cif.ab.etm, ci.type = "bars", pos.ci = 24, col = c(1, 2), lty = 1,which.cif=1,lwd=8) ## plot(cif.ab.prodlim,add=TRUE,cause=1,confint=TRUE,col=2) data(abortion) cif.ab.etm <- etmCIF(Surv(entry, exit, cause != 0) ~ group,abortion,etype = cause,failcode = 3) names(cif.ab.etm[[1]]) head(cbind(cif.ab.etm[[1]]$time,cif.ab.etm[[1]]$n.risk)) cif.ab.prodlim <- prodlim(Hist(time=exit, event=cause,entry=entry) ~ group,data=abortion) ## plot(cif.ab.etm, ci.type = "bars", pos.ci = 24, col = c(1, 2), lty = 1, curvlab = c("Control", "Exposed"),lwd=8) ## plot(cif.ab.prodlim,add=TRUE,cause=3,confint=FALSE,col="yellow") testdata <- data.frame(entry=c(1,5,2,8,5),exit=c(10,6,4,12,33),event=c(0,1,0,1,0)) cif.test.etm <- etmCIF(Surv(entry, exit, event) ~ 1,data=testdata,etype = event,failcode = 1) cif.test.survival <- survfit(Surv(entry, exit, event) ~ 1,data=testdata) cif.test.prodlim <- prodlim(Hist(exit,event,entry=entry)~1,data=testdata) ## plot(cif.test.etm, ci.type = "bars", pos.ci = 24, lwd=5) ## plot(cif.test.etm, ci.type = "bars", pos.ci = 24, lwd=5) ## plot(cif.test.prodlim,add=TRUE,cause=2,col=2,confint=TRUE,type="cuminc") ## simulate data from an illness-death model mod <- idmModel(K=10,schedule=0,punctuality=1) regression(mod,from="X",to="lifetime") <- log(2) regression(mod,from="X",to="waittime") <- log(2) regression(mod,from="X",to="illtime") <- log(2) set.seed(137) ## we round the event times to have some ties testdata <- round(sim(mod,250),1) ## the data enter with delay into the intermediate state (ill) ## thus, to estimate the cumulative incidence of ## the absorbing state (death) after illness we ## have left-truncated data illdata <- testdata[testdata$illstatus==1,] illdata <- illdata[order(illdata$lifetime,-illdata$seen.exit),] ## sindex(jump.times=illdata$illtime,eval.times=illdata$lifetime) ## F <- prodlim(Hist(lifetime,status,entry=illtime)~1,data=illdata[1:5,]) ## f <- survfit(Surv(illtime,lifetime,status)~1,data=illdata[1:5,],type="kaplan-meier") survfit.delayed.ill <- survfit(Surv(illtime,lifetime,seen.exit)~1,data=illdata) prodlim.delayed.ill <- prodlim(Hist(lifetime,seen.exit,entry=illtime)~1,data=illdata) ## plot(survfit.delayed.ill,lwd=5) ## plot(prodlim.delayed.ill,lwd=2,col=2,add=TRUE) }) test_that("interval censored",{ library(prodlim) library(SmoothHazard) m <- idmModel(scale.illtime=1/70, shape.illtime=1.8, scale.lifetime=1/50, shape.lifetime=0.7, scale.waittime=1/30, shape.waittime=0.7) d <- round(sim(m,6),1) icens <- prodlim(Hist(time=list(L,R),event=seen.ill)~1,data=d) ## plot(icens) }) prodlim/tests/testthat/pseudo.R0000644000175100001440000000311113035633436016364 0ustar hornikuserscontext("Construction of pseudovalues") test_that("pseudo",{ library(prodlim) library(pseudo) # comparison to pseudoci # make sure we get the same # results with both packages set.seed(17) N <- 200 ddd <- SimCompRisk(200) ttt <- c(3,5,10) # ttt <- ddd$time fff <- prodlim(Hist(time,event)~1,data=ddd) system.time(jack <- with(ddd,pseudoci(time,event,ttt))) system.time({jack2 <- jackknife.competing.risks(fff,times=ttt)}) ## check individual 2 expect_true(all(round(jack2[,2],9)==round(jack[[3]]$cause1[,2],9))) ## check all individuals expect_true(all(sapply(1:N,function(x){ a <- round(jack[[3]]$cause1[x,],8) b <- round(jack2[x,],8) # all(a[!is.na(a)]==b[!is.na(b)]) all(a[!is.na(a)]==b[!is.na(a)]) }))) ## the pseudoci function seems only slightly slower ## for small sample sizes (up to ca. 200) but ## much slower for large sample sizes: set.seed(17) N <- 200 ddd <- SimCompRisk(200) ttt <- c(3,5,10) # ttt <- ddd$time fff <- prodlim(Hist(time,event)~1,data=ddd) system.time(jack <- with(ddd,pseudoci(time,event,ttt))) system.time({jack2 <- jackknife.competing.risks(fff,times=ttt)}) expect_true(all(round(jack2[,1],9)==round(jack$pseudo$cause1[,1],9))) set.seed(17) N <- 2000 ddd <- SimCompRisk(2000) ttt <- c(3,5,10) fff <- prodlim(Hist(time,event)~1,data=ddd) a <- system.time(jack <- with(ddd,pseudoci(time,event,ttt))) b <- system.time({jack2 <- jackknife.competing.risks(fff,times=ttt)}) expect_less_than(a,b) } prodlim/tests/testthat/cluster.R0000644000175100001440000000161613035633436016556 0ustar hornikuserscontext("Clustered survival data") test_that("clustersruv",{ library(prodlim) ## if (!is.function("cluster")) cluster <- function(x)x clusterTestData <- data.frame(midtimeX=1:8,eventX=c(0,"pn","pn",0,0,0,0,0),patientid=c(1,1,2,2,3,3,4,4),AnyCrownFracture=c(1,1,1,1,2,2,2,2)) a <- prodlim(Hist(midtimeX,eventX=="pn")~cluster(patientid)+AnyCrownFracture,data=clusterTestData) b <- prodlim(Hist(midtimeX,eventX=="pn")~cluster(patientid),data=clusterTestData[clusterTestData$AnyCrownFracture==1,]) c <- prodlim(Hist(midtimeX,eventX=="pn")~cluster(patientid),data=clusterTestData,subset=clusterTestData$AnyCrownFracture==1) d <- prodlim(Hist(midtimeX,eventX=="pn")~1,data=clusterTestData[clusterTestData$AnyCrownFracture==2,]) expect_equal(round(as.numeric(summary(a)$table[[1]][,c("se.surv")]),5),c(0,0.20951,0.10476,0.10476,NA,NA,NA,NA)) expect_equal(summary(b), summary(c)) } prodlim/src/0000755000175100001440000000000013057250252012526 5ustar hornikusersprodlim/src/predict.c0000755000175100001440000000275413057250252014337 0ustar hornikusers#include void findex(int *findex, int *type, int *S, int *freq_strata, double *Z, double *NN, int *NR, int *NT){ int i,x,last; for (i=0;i<*NR;i++){ /* goto strata of subject i */ if (S[i]==1) x=0; else x = freq_strata[S[i]-2]; last = freq_strata[S[i]-1] -1; /* find the closest neighbor */ if (*type==0) findex[i]=x; else{ if (Z[i] <= NN[x]) /* <= first */ findex[i] = x; else{ if (Z[i] >= NN[last]){/* >= last */ findex[i] = last; } else { /* sitting between two neighbors*/ while (Z[i] >= NN[x]) x++; if ((NN[x] - Z[i]) < (Z[i] - NN[x-1])) findex[i] = x; else findex[i] = x-1; } } } findex[i]+=1; /* in `R' counting starts at 1 */ } } void pred_index(int *pindex, double *Y, double *time, int *first, int *size, int *NR, int *NT){ int i,t,f; for (i=0;i<*NR;i++){ f=0; for (t=0;t<(*NT);t++){ if (Y[t] < time[first[i]-1]){ /* < first */ pindex[t + i * (*NT)] = 0; } else{ if (Y[t] > time[first[i]-1 + size[i]-1]){ /* > last */ while(t<(*NT)){ pindex[t + i * (*NT)] = -1; t++; } } else{ /* sitting between to jump times */ while (f <= size[i]-1 && Y[t] >= time[first[i]-1 + f]) f++; pindex[t + i * (*NT)] = first[i] -1 + f; /* do NOT reset f because the next requested time is greater or equal to the current time */ } } } } } prodlim/src/loo.c0000755000175100001440000000373713057250252013500 0ustar hornikusers/* (2011) Thomas A. Gerds -------------------------------------------------------------------- distributed under the terms of the GNU public license */ #include #include void loo_surv(double *Y, double *D, double *time, double *obsT, double *status, double *S, int *N, int *NT){ int k, t; double na,pl; for (k=0; k<*N;k++){ /* Rprintf("\n"); */ /* compute the Nelson-Aalen estimate */ pl=1; for (t=0; t<*NT;t++){ if (obsT[k]>time[t]){ /* decrease the number at risk because individual k was at risk at time[t] */ na = D[t]/(Y[t]-1); } else{ if (obsT[k]==time[t]){ /* decrease the number of events if k was an event, and decrease the number at risk because k was in the risk set at time[t] */ na = (D[t]-status[k])/(Y[t]-1); } else{ /* do nothing */ na = D[t]/Y[t]; } } /* compute the product-limit estimate */ pl *= (1-na); S[k+(*N)*t]=pl; /* Rprintf("t=%d\tk=%d\tD[t]=%1.2f\tY[t]=%1.2f\tna=%1.2f\tS[k](t)=%1.2f\n",t,k,D[t],Y[t],na,S[k+(*N)*t]); */ } } } void loo_comprisk(double *Y, double *D, double *time, double *obsT, double *status, double *lagSurv, double *F, int *N, int *NT){ int k, t; double na,aj; for (k=0; k<*N;k++){ /* compute the Nelson-Aalen estimate */ aj=0; for (t=0; t<*NT;t++){ if (obsT[k]>time[t]){ /* decrease the number at risk because k was in the risk set at time[t] */ na = D[t]/(Y[t]-1); } else{ if (obsT[k]==time[t]){ /* decrease the number of events if k was an event, and decrease the number at risk because k was in the risk set at time[t] */ na = (D[t]-status[k])/(Y[t]-1); } else{ /* do nothing */ na = D[t]/Y[t]; } } /* compute the Aalen-Johansen estimate */ aj += lagSurv[t * (*N) + k] * na; F[k+(*N)*t]=aj; } } } prodlim/src/sindex.c0000755000175100001440000000075413057250252014175 0ustar hornikusers/* compute the values of a step function, ie how many of the jumps are smaller or equal to the eval points */ void sindexSRC(int *index, double *jump, double *eval, int *N, int *NT, int *strict){ int i,t; index[0] = 0; i = 0; if (*strict==0){ for (t=0;t<*NT;t++){ while(i<*N && jump[i]<=eval[t]) i++; index[t] = i; } } else{ for (t=0;t<*NT;t++){ while(i<*N && jump[i] < eval[t]) i++; index[t] = i; } } } prodlim/src/GMLE.c0000755000175100001440000000201113057250252013413 0ustar hornikusers#include #include #define max(A,B) ((A) > (B) ? (A):(B)) #define min(A,B) ((A) < (B) ? (A):(B)) void GMLE(int *Mstrata, int *Istrata, int *Mindex, int *Iindex, int *N, int *M, double *z, double *oldZ, double *tol, int *maxstep, int *niter){ int i,j,k,l,m,step,done; double newZ,nom, denom, diff; step=0; done=0; while (done==0 && step < *maxstep){ /* Rprintf("\n\nStep=%d\t\n",step); */ diff=0; for(k=0;k<*M;k++) oldZ[k]= z[k]; for(k=0;k<*M;k++){ nom=0; newZ=0; for(j=Mstrata[k]; j< Mstrata[k+1];j++){ i=Mindex[j]-1; denom=0; for(l=Istrata[i]; l < Istrata[i+1];l++){ m=Iindex[l]-1; denom += oldZ[m]; } nom = oldZ[k]; newZ += nom/denom; } z[k]=newZ/(*N); } for (k=0;k<*M;k++){ /* Rprintf("k=%d\toldZ[k]=%1.2f\tz[k]=%1.2f\tdiff=%1.2f\t\n",k,oldZ[k],z[k],diff); */ diff=max(max(z[k]-oldZ[k],oldZ[k]-z[k]),diff); } if (diff < *tol) done=1; step++; } niter[0]=step; } prodlim/src/icens_prodlim.c0000755000175100001440000001266413057250252015535 0ustar hornikusers/* The product limit method for interval censored data Copyright 2007-2009 Department of Biostatistics, University of Copenhagen Written by Thomas Alexander Gerds This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. The structure of the algorithm: looping until convergence or maxstep over all grid points starting with the interval [grid[0] ; grid[1]] the first time s=0 is a dummy time used to catch exact events at 0. to compute the hazard and the survival probability at the END of a grid interval [grid[s] ; grid[s+1]] first count events and censored between grid[s] and grid[s+1], then devide by the number at risk at grid[s]. Note: nevent[s+1] is the number of subjects at risk at time grid[s]. use only the observed intervals [L[i],R[i]] that overlap the current grid interval: [grid[s] ; grid[s+1]] whether or not an interval overlaps is determined by iindex, a vector of indices where the part from imax[x] to imax[x+1] identifies observations that overlap grid interval x Exact and right censored observations are handled as for the usual Kaplan-Meier method. Real interval censored observations contribute to the number of events by the relative to the overlap with the current grid-interval. To compute the relative event count at the very first step assume a uniform distribution, in subsequent steps use the survival probability of in the previous step */ #include #include #define max(A,B) ((A) > (B) ? (A):(B)) #define min(A,B) ((A) < (B) ? (A):(B)) void icens_prodlim(double *L, double *R, double *grid, int *indexL, int *indexR, int *iindex, int *imax, int *status, double *N, double *NS, double *nrisk, double *nevent, double *ncens, double *hazard, double *var_hazard, double *surv, double *oldsurv, double *tol, int *maxstep, int *niter) { int i, j, s, done=0, step=0, n, ns, start, stop; /* int verbose; */ double atrisk, pl, haz, varhaz, diff, survL, survR, lenOBS, nom; /* n = (int) *N; /\* number of interval censored observations *\/ */ ns = (int) *NS; /* number of grid points + 1 */ while (done==0 && step < *maxstep){ surv[0]=1; oldsurv[0]=1; diff=0; atrisk = *N; nrisk[0]= *N; varhaz=0; haz=0; pl=1; start=0; stop=max(0,imax[0]); /* LOOP OVER GRID INTERVALS */ for (s=0; s < (ns-2); s++){ nrisk[s+1]=atrisk; nevent[s+1] = 0; ncens[s+1] = 0; /* LOOP OVER OBSERVED INTERVALS */ for (j=start; j < stop; j++){ i=iindex[j]-1; /* R starts counting at 1 */ if (status[i]==0 && L[i] == grid[s+1]) ncens[s+1]++; /* right censored */ if (status[i]>0){ lenOBS = R[i] - L[i]; if (lenOBS==0 && L[i] == grid[s+1]) nevent[s+1] ++; /* exact observation */ if (lenOBS > 0){ if (L[i] < grid[s+1] && R[i]>grid[s]){ if (step==0){ nevent[s+1] += max(0,min(R[i],grid[s+1]) -max(grid[s],L[i]))/lenOBS; } else{ survL = surv[indexL[i]-1]; survR = surv[indexR[i]-1]; nom = (min(survL,surv[s]) - max(surv[s+1],survR)); /* overlap */ if (nom>=*tol) nevent[s+1] += nom/(survL-survR); } } } } } start=max(0,imax[s]); stop=max(imax[s+1],0); if (nevent[s+1]>0){ haz = nevent[s+1] / (double) atrisk; pl*=(1 - (nevent[s+1] / (double) atrisk)); varhaz += nevent[s+1] / (double) (atrisk * (atrisk - nevent[s+1])); } if (step>0) oldsurv[s+1]= surv[s+1]; /* move the current estimate to oldsurv */ surv[s+1]=pl; /* update the survival probability */ hazard[s+1] = haz; var_hazard[s+1] = varhaz; atrisk-=(nevent[s+1]+ncens[s+1]); /* update the number at risk */ } for (s=0;s<(ns-2);s++){ /* check if the algorithm converged */ diff=max(max(surv[s+1]-oldsurv[s+1],oldsurv[s+1]-surv[s+1]),diff); } if (diff < *tol) done=1; step++; } niter[0]=step; } /* verbose=-2; */ /* THE CURRENT SURVIVAL ESTIMATE if (verbose>=0){ Rprintf("\nStep %d\n",step); for (s=0; s < (ns-2); s++) Rprintf("s(%1.2f)=%1.2f\n",grid[s],surv[s]); Rprintf("\n\n",step); } */ /* THE GRID INTERVAL if (step<=verbose){ Rprintf("\n"); Rprintf("grid=[%1.3f,%1.3f]\n",grid[s],grid[s+1]);} */ /* THE OBSERVED INTERVAL if (step<=verbose){ Rprintf("\n"); Rprintf("Obs=[%1.3f,%1.3f]\n",L[i],R[i]); } */ /* THE EVENT COUNT IN STEPS >0 if (step<=verbose){ Rprintf("survGrid=[%1.2f,%1.2f]\tsurvObs=[%1.2f,%1.2f]\tzaehl=%1.2f\tnenn=%1.2f\tjump=%1.2f\n",surv[s],surv[s+1],survL,survR,nom,(survL-survR),nevent[s+1]); } */ /* EVENTS, ATRISK, SURVPROB if (step<=verbose){ Rprintf("nevent=%1.2f\tnrisk=%1.2f\tsurv=%1.2f\t\n",nevent[s+1],atrisk,pl); } */ prodlim/src/prodlim_surv.c0000755000175100001440000001135013057250252015422 0ustar hornikusers#include #include #include "prodlim.h" void prodlim_surv(double *y, double *status, double *time, double *nrisk, double *event, double *loss, double *surv, double *hazard, double *varhazard, int *reverse, int *t, int start, int stop ){ int i,s; double surv_temp,hazard_temp,varhazard_temp,atrisk; s=(*t); surv_temp=1; hazard_temp=0; varhazard_temp=0; atrisk=(double) stop-start; event[s] = status[start]; loss[s] = (1-status[start]); for (i=(1+start);i<=stop;i++){ if (ientrytime[e-1]){ nrisk[s]=atrisk+entered; if (entrytime[e]!=time[s-1]){ /* if entrytime[e]==y[i] then only increase the number at risk but not move the time counter or the values of event, etc. */ /* Rprintf("e=%d\ts=%d\tentrytime[e]=%1.2f\ty[i-1]=%1.2f\ttime[s]=%1.2f\ti=%d\t\n",e,s,entrytime[e],y[i-1],time[s],i); */ event[s+1]=event[s]; event[s]=0; loss[s+1]=loss[s]; loss[s]=0; surv[s]=surv_temp; hazard[s]=0; varhazard[s]=varhazard_temp; time[s]=entrytime[e]; s++; } } e++; /* increase cumulative counter */ } atrisk += (double) entered; } time[s]=y[i-1]; nrisk[s]=atrisk; if (*reverse==1) pl_step(&surv_temp, &hazard_temp, &varhazard_temp, atrisk, loss[s], event[s]); else pl_step(&surv_temp, &hazard_temp, &varhazard_temp, atrisk, event[s], 0); surv[s]=surv_temp; /* Rprintf("Before s=%d\thazard_temp=%1.2f\t\n",s,hazard[s]); */ hazard[s]=hazard_temp; /* Rprintf("After s=%d\thazard_temp=%1.2f\t\n",s,hazard[s]); */ varhazard[s] = varhazard_temp; if (i void predict_individual_survival(double *pred, double *surv, double *jumptime, double *Y, int *first, int *size, int *n, int *lag){ int j,i; /* start at index 0 */ /* predicted survival probabilities at or just before the individual event times Y[i] */ for (i=0;i<(*n);i++){ j=0; /* index j is in stratum i if j < size[i] */ while(j < size[i] - 1 && jumptime[first[i] - 1 + j] != Y[i]) j++; if (j - *lag < 0) pred[i]=1; else pred[i] = surv[first[i] - 1 + j - *lag]; } } prodlim/src/prodlim_multistates.c0000755000175100001440000001453613057250252017012 0ustar hornikusers#include /*********************************************************************/ /* declaration of some functions called by 'trans' */ /*********************************************************************/ void init_start_risk(int t, int nt, int ns, int u, int* nrisk, int* nstart); void init_next_risk(int t, int nt, int ns, int* nrisk); void init_aj(int ns, double* aj); void set_event(int i, int t, int nt, int ns, int* tra_from, int* tra_to, int* trow, int* cens_in, int* cpos, int* nevent, int* ncens, int* status, int* nrisk); void multi_state(int t, int ntr, int ns, int* tra_from, int* tra_to, int* nrisk, int* nevent, double* hazard, double* aj, double* prob); void compute_hazard(int t, int ntr, int ns, int* tra_from, int* tra_to, int* nrisk, int* nevent, double* hazard); void compute_diag(int t, int ns, double* hazard); void compute_aj(int t, int ns, double* hazard, double* aj); void store_aj(int t, int ns, double* aj, double* prob); /*********************************************************************/ /* function 'prodlim_multistates' called by C-function 'trans' */ /*********************************************************************/ void prodlim_multistates(int* n, int* nstates, int* nobserv, int* size, int* ntra, int* tra_from, int* tra_to, int* trow, int* nci, int* cens_in, int* cpos, double* y, int* status, int* nstart, double* time, double* hazard, double* prob, int* nevent, int* ncens, int* nrisk, int *first_strata, int *ntimes_strata) { int i=0; int k=0; int s=0; int u=0; int t=0; int nt = *n; /* N */ int ns = *nstates; /* number of states, if censoring -1 is included */ int no = *nobserv; /* number of observations */ int ntr = *ntra; /* number of (unique) possible transitions */ double aj[(ns*ns)]; /* matrix for the aalen-johansen */ for(i=0; i < no; ++i) { /* loop over the observations (jumps) */ if( s == 0 ) { /* initialize nrisk with the start distribution for the strata*/ init_start_risk(t, nt, ns, u, nrisk, nstart); /* initialize aj */ init_aj(ns, aj); } set_event(i, t, nt, ns, tra_from, tra_to, trow, cens_in, cpos, nevent, ncens, status, nrisk); if( (s < size[u]-1 && y[i] != y[i+1]) || s == size[u]-1 ) { /* compute the hazards and aalen */ multi_state(t, ntr, ns, tra_from, tra_to, nrisk, nevent, hazard, aj, prob); /* store the time-point */ time[t] = y[i]; ++t; ++k; if(s < size[u]-1 ){ /* initialize nrisk for the next time-point */ init_next_risk(t, nt, ns, nrisk); } } if(s == size[u]-1) { first_strata[u] = t-k+1; ntimes_strata[u] = k; s=0; k=0; ++u; } else { ++s; } } } /*********************************************************************/ /* implementation of the functions called by 'trans_multi' */ /*********************************************************************/ void init_start_risk(int t, int nt, int ns, int u, int* nrisk, int* nstart) { int j = 0; nrisk[t*ns + j] = nstart[u]; for(j=1; j < ns; ++j) { nrisk[t*ns + j] = 0; } init_next_risk(t, nt, ns, nrisk); } void init_next_risk(int t, int nt, int ns, int* nrisk) { int j; if(t < (nt - 1) ) { for(j=0; j < ns; ++j) { nrisk[(t+1)*ns + j] = nrisk[t*ns + j]; } } } void init_aj(int ns, double* aj) { int i,j; for(i=0; i < ns; ++i){ for(j=0; j < ns; ++j) { aj[i*ns+j] = 0; if( i == j ) { aj[i*ns+j] = 1; } } } } void set_event(int i, int t, int nt, int ns, int* tra_from, int* tra_to, int* trow, int* cens_in, int* cpos, int* nevent, int* ncens, int* status, int* nrisk) { if( status[i] == 1 ) { /* add the transition */ nevent[ (t*ns*ns) + (tra_from[trow[i]]*ns + tra_to[trow[i]]) ] += 1; /* risk */ if(t < (nt - 1) ) { nrisk[ (t+1)*ns + tra_from[trow[i]] ] = nrisk[ (t+1)*ns + tra_from[trow[i]] ] - 1; nrisk[ (t+1)*ns + tra_to[trow[i]] ] = nrisk[ (t+1)*ns + tra_to[trow[i]] ] + 1; } } else { /* add censoring */ ncens[ (t*ns) + cens_in[cpos[i]] ] += 1; /* risk */ if(t < (nt - 1) ) { nrisk[ (t+1)*ns + cens_in[cpos[i]] ] = nrisk[ (t+1)*ns + cens_in[cpos[i]] ] - 1; } } } void multi_state(int t, int ntr, int ns, int* tra_from, int* tra_to, int* nrisk, int* nevent, double* hazard, double* aj, double* prob) { /* compute the hazards */ compute_hazard(t, ntr, ns, tra_from, tra_to, nrisk, nevent, hazard); /* compute the aalen-johansen */ compute_aj(t, ns, hazard, aj); /* store the aalen-johansen for time-point t */ store_aj(t, ns, aj, prob); } void compute_hazard(int t, int ntr, int ns, int* tra_from, int* tra_to, int* nrisk, int* nevent, double* hazard) { int j; /* compute the hazards */ for(j=0; j < ntr; ++j) { if(nevent[(t*ns*ns) + (tra_from[j]*ns + tra_to[j])] > 0 ) { hazard[(t*ns*ns) + (tra_from[j]*ns + tra_to[j])] = (double) nevent[(t*ns*ns) + (tra_from[j]*ns + tra_to[j])] / nrisk[t*ns + tra_from[j]]; } } /* compute the diagonal of the matrix hazard[(t*ns*ns)] */ compute_diag(t, ns, hazard); } void compute_diag(int t, int ns, double* hazard) { int r,c; double sumrow; /* compute the diagonal elements: the sum over each row must be 1 */ for(r=0; r < ns; ++r ) { sumrow = 0.; for( c = 0; c < ns; ++c ) { if( c != r ) { sumrow += hazard[(t*ns*ns) + (r*ns+c)]; } } hazard[(t*ns*ns)+ (r*ns+r)] = (double)(1 - sumrow); } } void compute_aj(int t, int ns, double* hazard, double* aj) { int r,c,i; double m[ns*ns]; for(r=0; r < ns; ++r) { for(c=0; c < ns; ++c) { m[r*ns+c] = 0.0; for(i=0; i < ns; ++i) { m[r*ns+c] += aj[r*ns+i] * hazard[(t*ns*ns) + (i*ns+c)]; } } } for(i=0; i < (ns*ns); ++i) { aj[i] = m[i]; } } void store_aj(int t, int ns, double* aj, double* prob) { int i; for(i=0; i < (ns*ns); ++i) { prob[(t*ns*ns) + i] = aj[i]; } } prodlim/src/icens_prodlim_ml.c0000755000175100001440000000616613057250252016225 0ustar hornikusers#include #include #define max(A,B) ((A) > (B) ? (A):(B)) #define min(A,B) ((A) < (B) ? (A):(B)) void icens_prodlim_ml(double *L, double *R, double *petoL, double *petoR, int *indexL, int *indexR, int *status, double *N, double *NS, double *nrisk, double *nevent, double *ncens, double *hazard, double *var_hazard, double *surv, double *oldsurv, double *tol, int *maxstep, int *educate, int *niter) { int i, s, done=0, step=0; double atrisk, pl, haz, varhaz, diff, tmpR, tmpL ,survL, survR, lenOBS; while (done==0 && step < *maxstep){ /* Rprintf("Step %d\n",step); */ diff=0; atrisk = *N; pl=1; haz=0; varhaz=0; nevent[0] = 0; ncens[0] = 0; for (s=0; s < *NS; s++){ /* loop over peto intervals */ nrisk[s]=atrisk; for (i=0; i < *N; i++){ /* loop only over those intervals */ /* that touch the current peto interval */ if (L[i]<=petoR[s] && R[i]>=petoL[s]){ /* /\* educated first step *\/ */ if (step==0){ /* if (*educate==0){ */ /* } */ /* else */ if (status[i]==0 && L[i] <= petoL[s]) ncens[s]++; /* right censored at L[i] before JL*/ if (status[i]==1){ lenOBS = R[i] - L[i]; if (lenOBS==0 && L[i] == petoL[s]) { nevent[s] ++; /* exact observations */ } if (lenOBS > 0){ /* interval censored */ if (s==0 && L[i]petoL[s]) tmpL=L[i]; else tmpL=petoL[s]; if (s==(*NS-1) && R[i]>petoR[s]) tmpR=R[i]; else if (R[i]=(*NS-1)) survR=0; survR=surv[indexR[i]-1]; if (s==0) tmpL=1; else tmpL=surv[s-1]; if (s==(*NS-1)) tmpR=0; else tmpR=surv[s]; nevent[s] += (tmpL - tmpR)/(survL - survR); /* Rprintf("i=(%1.0f,%1.0f)\ts=[%1.0f,%1.0f]\tnevent[s]=%1.2f\tsurv[s-1]=%1.2f\tsurv[s]=%1.2f\tsurvL=%1.2f\tsurvR=%1.2f\n",L[i],R[i],petoL[s],petoR[s],nevent[s],tmpL,tmpR,survL,survR); */ } } } if (nevent[s]>0){ haz = nevent[s] / atrisk; pl*=(1 - (nevent[s] / atrisk)); varhaz += nevent[s] / (atrisk * (atrisk - nevent[s])); } if (step>0) oldsurv[s]= surv[s]; surv[s]=pl; /* Rprintf("\ns=%d\tatrisk=%1.8f\tnevent[s]=%1.8f\tsurv[s]=%1.2f\n\n",s,atrisk,nevent[s],surv[s]); */ hazard[s] = haz; var_hazard[s] = varhaz; atrisk-=(nevent[s]+ncens[s]); nevent[s+1] = 0; ncens[s+1] = 0; } for (s=0;s<*NS;s++){ diff=max(max(surv[s]-oldsurv[s],oldsurv[s]-surv[s]),diff); } if (diff < *tol) done=1; step++; } /* Rprintf("Step %d\n",step); */ niter[0]=step; } prodlim/src/iindex.c0000755000175100001440000000064313057250252014160 0ustar hornikusers#include void iindexSRC(int *iindex, int *strata, double *L, double *R, double *U, int *N, int *NS){ int s, i, k; k=0; for (s=0;s<(*NS-1);s++){ i=0; for (i=0; i<*N;i++){ if ((L[i]==R[i] && L[i]==U[s+1]) /* exact obs */ || (L[i]U[s])){ /* [U[s],U[s+1]] intersects [L[i],R[i]] */ iindex[k] = i+1; k++; } } strata[s]=k; } } prodlim/src/summary_prodlim.c0000755000175100001440000000446213057250252016126 0ustar hornikusers#include void summary_prodlim(int *pred_nrisk, int *pred_nevent, int *pred_nlost, int *nrisk, int *nevent, int *nlost, double *evalTime, double *eventTime, int *first, int *size, int *NR, int *NT){ int i,t,s,First,Last; double min_eventTime, max_eventTime; /* in a loop across covariate strata, count events, right censored (lost) and numbers at risk at the eval time points: we aim to find the a) number at risk just before evalTime[t] b) the number of uncensored events at evalTime[t] c) the number of censored at evalTime[t] i: covariate strata t: runs through evalTime s: runs through intervals between eventTimes the requested time points are in `evalTime' the censored event times are in `eventTime' There are three cases: (1) before the first event time (2) between event times (3) after the last event time the covariate stratum starts at First=first[i]-1 and stops at Last=first[i]-1 + size[i]-1 */ for (i=0;i<*NR;i++){ First=first[i]-1; Last=first[i]-1 + size[i]-1; min_eventTime = eventTime[First]; max_eventTime = eventTime[Last]; s=0; for (t=0;t<(*NT);t++){ if (evalTime[t] < min_eventTime){ pred_nrisk[t + i *(*NT)] = nrisk[First]; pred_nevent[t + i *(*NT)] = 0; pred_nlost[t + i *(*NT)] = 0; } else{ if (evalTime[t] > max_eventTime){ while(t<(*NT)){ pred_nrisk[t + i *(*NT)] = 0; pred_nevent[t + i *(*NT)] = 0; pred_nlost[t + i *(*NT)] = 0; t++; } } else{ /* move to the largest event time before the eval time */ while ((eventTime[First + s] < evalTime[t]) && (s <= size[i]-1)){ s++; } /* Rprintf("s=%d\tevalTime=%1.2f\teventTime[First+s]=%1.2f\tFirst=%d\tnrisk=%d\n",s,evalTime[t],eventTime[First+s],First,nrisk[First+s]); */ pred_nrisk[t + i *(*NT)] = nrisk[First+s]; if (eventTime[First + s] == evalTime[t]){ pred_nevent[t + i *(*NT)] = nevent[First+s]; pred_nlost[t + i *(*NT)] = nlost[First+s]; } else{ pred_nevent[t + i *(*NT)] = 0; pred_nlost[t + i *(*NT)] = 0; } } } /* do NOT reset s because the next evalTime is greater or equal to the current. */ } } } prodlim/src/declareRoutines.c0000644000175100001440000000675213057250252016034 0ustar hornikusers#include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .C calls */ extern void findex(void *, void *, void *, void *, void *, void *, void *, void *); extern void GMLE(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void icens_prodlim(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void iindexSRC(void *, void *, void *, void *, void *, void *, void *); extern void IntIndexSRC(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void life_table(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void loo_comprisk(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void loo_surv(void *, void *, void *, void *, void *, void *, void *, void *); extern void neighborhoodSRC(void *, void *, void *, void *, void *, void *, void *, void *); extern void neighborsSRC(void *, void *, void *, void *, void *); extern void pred_index(void *, void *, void *, void *, void *, void *, void *); extern void predict_individual_survival(void *, void *, void *, void *, void *, void *, void *, void *); extern void prodlimSRC(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void prodlim_multistates(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void sindexSRC(void *, void *, void *, void *, void *, void *); extern void summary_prodlim(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); static const R_CMethodDef CEntries[] = { {"findex", (DL_FUNC) &findex, 8}, {"GMLE", (DL_FUNC) &GMLE, 11}, {"icens_prodlim", (DL_FUNC) &icens_prodlim, 20}, {"iindexSRC", (DL_FUNC) &iindexSRC, 7}, {"IntIndexSRC", (DL_FUNC) &IntIndexSRC, 10}, {"life_table", (DL_FUNC) &life_table, 13}, {"loo_comprisk", (DL_FUNC) &loo_comprisk, 9}, {"loo_surv", (DL_FUNC) &loo_surv, 8}, {"neighborhoodSRC", (DL_FUNC) &neighborhoodSRC, 8}, {"neighborsSRC", (DL_FUNC) &neighborsSRC, 5}, {"pred_index", (DL_FUNC) &pred_index, 7}, {"predict_individual_survival", (DL_FUNC) &predict_individual_survival, 8}, {"prodlimSRC", (DL_FUNC) &prodlimSRC, 29}, {"prodlim_multistates", (DL_FUNC) &prodlim_multistates, 22}, {"sindexSRC", (DL_FUNC) &sindexSRC, 6}, {"summary_prodlim", (DL_FUNC) &summary_prodlim, 12}, {NULL, NULL, 0} }; void R_init_prodlim(DllInfo *dll) { R_registerRoutines(dll, CEntries, NULL, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } prodlim/src/life_table.c0000755000175100001440000000621613057250252014770 0ustar hornikusers#include void life_table(int *pred_nrisk, int *pred_nevent, int *pred_nlost, int *nrisk, int *nevent, int *nlost, double *lower, double *upper, double *eventTime, int *first, int *size, int *NR, int *NT){ int i,t,s,count_e,count_l,First,Last; double min_eventTime, max_eventTime; /* Aim: life table intervals are given by [lower[t] ; upper[t]) NOTE: the intervals are closed on the right and open on the left in a loop across covariate strata find the a) the number at risk just before lower[t] b) the number of uncensored events in interval c) the number of censored in interval Notation: i: runs through covariate strata t: runs through lower and upper s: runs through intervals between eventTimes the covariate stratum starts at First=first[i]-1 and stops at Last=first[i]-1 + size[i]-1 the censored event times are in `eventTime' There are three cases: (1) the interval lays before the first event time (2) the interval includes one event time (3) the interval lays behind the last event time */ for (i=0;i<*NR;i++){ First=first[i]-1; Last=first[i]-1 + size[i]-1; min_eventTime = eventTime[First]; max_eventTime = eventTime[Last]; s=0; for (t=0;t<(*NT);t++){ count_e =0; count_l =0; if (upper[t] < min_eventTime){ /* case (1) interval before the first event time: [).... */ pred_nrisk[t + i *(*NT)] = nrisk[First]; pred_nevent[t + i *(*NT)] = 0; pred_nlost[t + i *(*NT)] = 0; } else{ if (lower[t] > max_eventTime){ /* the left side of the interval is larger than max_eventTime.*/ /* case (3) after the last eventTime: ....[) */ while(t<(*NT)){ pred_nrisk[t + i *(*NT)] = 0; pred_nevent[t + i *(*NT)] = 0; pred_nlost[t + i *(*NT)] = 0; t++; } } else{ /* case (2) between .[..).. here upper[t] >= min_eventTime and lower[t] <= max_eventTime */ /* first find number at risk just before lower[t] ... */ /* Rprintf("s=%d\tFirst=%d\tnrisk=%d\n",s,First,nrisk[First+s]); */ if (s==0){ pred_nrisk[t + i *(*NT)] = nrisk[First]; } else{ pred_nrisk[t + i *(*NT)] = nrisk[First+s]; } /* ... then count events and lost in interval [lower[t],upper[t]) */ /* while ((s <= size[i]-1) && (eventTime[First + s] >= lower[t]) && (eventTime[First + s] < upper[t])){ */ while ((s <= size[i]-1) && (eventTime[First + s] < upper[t])){ count_e +=nevent[First+s]; count_l +=nlost[First+s]; /* Rprintf("s=%d\tsize=%d\tetime[First+s]=%1.2f\tlower[t]=%1.2f\tupper[t]=%1.2f\tnevent[First+s]=%d\tnlost[First+s]=%d\n",s,size[i]-1,eventTime[First+s],lower[t],upper[t],nevent[First+s],nlost[First+s]); */ s++; } pred_nevent[t + i *(*NT)] = count_e; pred_nlost[t + i *(*NT)] = count_l; /* now s is such that either eventTime[First + s] >= upper[t] =lower[t+1] or s==size[i] */ } } } /* do NOT reset s because the next event Time is greater or equal to the current. */ } } prodlim/src/prodlim_clustersurv.c0000755000175100001440000000677313057250252017041 0ustar hornikusers#include #include #include "prodlim.h" void prodlim_clustersurv(double *y, double *status, int *cluster, int *NC, double *time, double *nrisk, double *cluster_nrisk, double *nevent, double *lost, double *ncluster_with_event, double *ncluster_lost, double *sizeof_cluster, double *nevent_in_cluster, double *surv, double *hazard, double *varhazard, double *adj1, double *adj2, double *adjvarhazard, int *t, int start, int stop){ int s,i,l,k; double surv_step, hazard_step, V1, V2, atrisk, cluster_atrisk; /* Rprintf("Call clustersurv\n\n"); */ /* initialize the time counter */ s = (*t); /* cluster is an indicator of the cluster number. for example if the individual (tooth) 'i' belongs to patient 'k' then 'cluster[i]=k' First we need to re-initialize sizeof_cluster, nevent_in_cluster, etc are set to zero. */ for (k=0;k<*NC;k++) { sizeof_cluster[k]=0; nevent_in_cluster[k]=0; adj1[k]=0; adj2[k]=0; } /* Then, the vector "sizeof_cluster" is initialized with the current number of individuals in the cluster. */ for (i=start;i #include #include "prodlim.h" void prodlimSRC(double *y,double *status,int *cause,double *entrytime,double *caseweights,int *cluster,int *N,int *NS,int *NC,int *NU,int *size,double *time,double *nrisk,double *event,double *lost,double *surv,double *cuminc,double *hazard,double *varhazard,double *extra_double,int *max_nc,int *ntimes,int *size_strata,int *first_strata,int *reverse,int *model,int *independent,int *delayed,int *weighted) { int t, u, start, stop, size_temp; t=0; start=0; size_temp=0; for (u=0;u<*NU;u++){ stop=start+size[u]; if (*model==0){ if (*independent==1){ if (*weighted==1 || *delayed==1){ prodlimSurvPlus(y,status,entrytime,caseweights,time,nrisk,event,lost,surv,hazard,varhazard,reverse,&t,start,stop,delayed,weighted); } else{ prodlim_surv(y,status,time,nrisk,event,lost,surv,hazard,varhazard,reverse,&t,start,stop); } } else{ double *cluster_nrisk, *adj1, *adj2, *adjvarhazard; double *ncluster_lost, *ncluster_with_event, *sizeof_cluster, *nevent_in_cluster; /* tag: 12 Nov 2010 (18:41) the length of nrisk, nevent and lost is 2 * N the first half is used for the individual level the second for the cluster level. the function is thus still restricted to a single cluster variable */ cluster_nrisk = nrisk + *N; ncluster_with_event = event + *N; ncluster_lost = lost + *N; adjvarhazard = varhazard + *N; adj1 = extra_double; adj2 = extra_double + *max_nc; nevent_in_cluster = extra_double + *max_nc + *max_nc; sizeof_cluster = extra_double + *max_nc + *max_nc + *max_nc; prodlim_clustersurv(y,status,cluster,NC + u,time,nrisk,cluster_nrisk,event,lost,ncluster_with_event,ncluster_lost,sizeof_cluster,nevent_in_cluster,surv,hazard,varhazard,adj1,adj2,adjvarhazard,&t,start,stop); } } else{ if (*model==1){ double *cuminc_temp, *cuminc_lag, *v1, *v2; cuminc_temp = extra_double; cuminc_lag = extra_double + *NS; v1 = extra_double + *NS + *NS; v2 = extra_double + *NS + *NS + *NS; if (*weighted==1 || *delayed==1){ prodlimCompriskPlus(y,status,cause,entrytime,caseweights,NS,time,nrisk,event,lost,surv,cuminc,hazard,varhazard,cuminc_temp,cuminc_lag,v1,v2,&t,start,stop,delayed,weighted); } else{ prodlim_comprisk(y,status,cause,NS,time,nrisk,event,lost,surv,cuminc,hazard,varhazard,cuminc_temp,cuminc_lag,v1,v2,&t,start,stop); } } } start+=size[u]; size_strata[u] = t - size_temp; first_strata[u] = t + 1 - size_strata[u]; size_temp += size_strata[u]; } *ntimes=t; } void pl_step(double *pl,double *aj,double *v,double n,double d,int rev){ if (d > 0){ *aj = (d / (double) (n - rev)); /* nelson-aalen */ *v += (double) d / ((double) (n - rev) * (double) (n - rev - d)); /* greenwood variance */ *pl *= (1 - *aj); /* product limit */ } else{ *aj=0; } } prodlim/src/IntIndex.c0000755000175100001440000000263513057250252014425 0ustar hornikusers#include #include void IntIndexSRC(double *L, double *R, double *p, double *q, int *N, int *M, int *Iindex, int *Mindex, int *Istrata, int *Mstrata){ int i,m,k,l; k=0; for (i=0; i<*N;i++){ for (m=0; m<*M;m++){ if ((L[i]==R[i] && p[m]==q[m] && L[i]==q[m]) /* point */ || (L[i]=q[m] && R[i]>p[m])) /* interval */ { Iindex[k]=m+1; k++; } } Istrata[i]=k; } l=0; for (m=0; m<*M;m++){ for (i=0; i<*N;i++){ if ((L[i]==R[i] && p[m]==q[m] && L[i]==q[m]) /* point */ || (L[i]=q[m] && R[i]>p[m])) /* interval */ { Mindex[l]=i+1; l++; } } Mstrata[m]=l; } } void Turnb(int *Mstrata, int *Istrata, int *Mindex, int *Iindex, int *N, int *M, double *Z, double *nplme){ int i,l,u,j,Iind, Mind; double Ilast, ZI, ZM, Mlast, Zlast, ZMI; Mlast=0; for(i=0;i<*M;i++){ Zlast=0; ZMI=0; for(l=0;l<*N; l++){ Mlast=0; ZM=0; Mind=0; for(u=Mstrata[l];u #include #include "prodlim.h" /* Compute the Aalen-Johannsen estimate in a loop over "NS" causes. Important: the vector "cause" has code "-1" for censored obs */ /* {{{ Header */ void prodlim_comprisk(double* y, double* status, int* cause, int* NS, /* number of causes (states) */ double* time, double* nrisk, double* event, double* loss, double* surv, double* cuminc, double* cause_hazard, double* varcuminc, double* I, /* current cumulative incidence */ double*I_lag, /* time lagged cumulative incidence */ double* v1, double* v2, int *t, int start, int stop) { int i,j,s,d,d1; double S,S_lag,H,varH,n; /* }}} */ /* {{{ initialization */ s=(*t); S=1; H=0; for(j=0; j < (*NS); ++j) { I[j]=0; I_lag[j]=0; v1[j]=0; v2[j]=0; } varH=0; n=(double) stop-start; /* (sub-)sample size */ if (status[start]>0) event[s *(*NS) + cause[start]]=1; else loss[s]=1; /* }}} */ for (i=(1+start);i<=stop;i++){ /* {{{ if tie then wait */ if (i0) event[s * (*NS) + cause[i]] +=1; else loss[s]+=1; } /* }}} */ else { /* {{{ at s: set time, atrisk; reset d */ time[s]=y[i-1]; nrisk[s]=n; d = 0; /* }}} */ /* {{{ loop over causes: compute cuminc */ for(j=0; j < (*NS); ++j) { cause_hazard[s * (*NS) + j] = (event[s * (*NS) + j] / n); I_lag[j] = I[j]; I[j] += S * cause_hazard[s * (*NS) + j]; cuminc[s * (*NS) + j] = I[j]; d += event[s * (*NS) + j]; } /* }}} */ /* {{{ compute survival */ S_lag = S; pl_step(&S, &H, &varH, n, d, 0); surv[s] = S; /* }}} */ /* {{{ variance estimate Marubini & Valsecchi (1995), Wiley, chapter 10, page 341 */ for (j=0; j < (*NS); ++j){ d1 = event[s * (*NS) + j]; /* d2 = d - d1; */ v1[j] += I[j] * (d / (n * (n - d))) + (S_lag * d1) / (n * n); v2[j] += (I[j] * I[j]) * (d / (n * (n - d))) + ((S_lag * S_lag) * (n - d1) * d1) / (n * n * n) + (2 * I[j] * S_lag * d1) / (n * n); varcuminc[s * (*NS) + j] = (I[j] * I[j]) * varH - 2 * I[j] * v1[j] + v2[j]; /* varH is greenwood's formula */ /* variance estimate Korn & Dorey (1992), Stat in Med, Vol 11, page 815 */ /* I1 = (I[j] - I_lag[j]) / 2; */ } /* }}} */ /* {{{ update atrisk, set n.event, loss, for the next time point */ if (i0){ event[s *(*NS) + cause[i]]=1; } else loss[s]=1; } /* }}} */ } } *t=(s+1); /* for the next strata */ } void prodlimCompriskPlus(double* y, double* status, int* cause, double *entrytime, double *caseweights, int* NS, /* number of causes (states) */ double* time, double* nrisk, double* event, double* loss, double* surv, double* cuminc, double* cause_hazard, double* varcuminc, double* I, /* current cumulative incidence */ double* I_lag, /* time lagged cumulative incidence */ double* v1, double* v2, int *t, int start, int stop, int *delayed, int *weighted ) { int i,e,j,s,d,d1,entered; double S,S_lag,H,varH,atrisk; /* }}} */ /* {{{ initialization */ s=(*t); e=0; S=1; S_lag=1; H=0; for(j=0; j < (*NS); ++j) { I[j]=0; I_lag[j]=0; v1[j]=0; v2[j]=0; } varH=0; if (*weighted==1){ atrisk=0; for (i=start;i0){ event[s *(*NS) + cause[start]]=caseweights[start]; } else{ loss[s]=caseweights[start]; } } else{ if (status[start]>0){ event[s *(*NS) + cause[start]]=1; } else{ loss[s]=1; } } /* }}} */ for (i=(1+start);i<=stop;i++){ /* {{{ if tie then wait */ if (i0) event[s * (*NS) + cause[i]] +=caseweights[i]; else loss[s]+=caseweights[i]; } else{ if (status[i]>0) event[s * (*NS) + cause[i]] ++; else loss[s]++; } } /* }}} */ else{ /* {{{ at s: set time, atrisk; reset d */ if (*delayed==1){ /* delayed entry: find number of subjects that entered at time[s] */ entered=0; while(eentrytime[e-1]){ /* unless there is a tie between the current and the next entry-time, add time to list of times, increase s and move the values of event, loss etc. to the next event time */ nrisk[s]=atrisk+entered; if (entrytime[e]!=time[s-1]){ /* if entrytime[e]==y[i] then only increase the number at risk but not move the time counter or the values of event, etc. */ /* Rprintf("e=%d\ts=%d\tentrytime[e]=%1.2f\ty[i-1]=%1.2f\ttime[s-1]=%1.2f\ti=%d\t\n",e,s,entrytime[e],y[i-1],time[s-1],i); */ for(j=0; j < (*NS); ++j) { event[(s+1) * (*NS) + j]=event[s * (*NS) + j]; event[s * (*NS) + j]=0; } loss[s+1]=loss[s]; loss[s]=0; if (entrytime[e]0){ event[s *(*NS) + cause[i]]=caseweights[i]; } else loss[s]=caseweights[i]; } else{ if (status[i]>0){ event[s *(*NS) + cause[i]]=1; } else loss[s]=1; } } /* }}} */ } } *t=(s+1); /* for the next strata */ } prodlim/src/neighborhood.c0000755000175100001440000000335313057250252015350 0ustar hornikusers/* define symmetric neighborhoods for unique values u in x input ===== n: the sample size nu: number of unique x values cumtabu: n times the cumulative empirical df at u cumtabx: n times the cumulative empirical df at x tabx: frequency of x radius: n times the bandwidth output specific to neighborhood's ================================= first: the first neighbor size: the size the neighborhood neighbors sorted from the first to last neighborhood */ #include void neighborhoodSRC(int *first, int *size, int *cumtabu, int *cumtabx, int *tabx, int *radius, int *nu, int *n){ int u,last; for (u=0;u<*nu;u++){ /* make a first guess */ first[u]=cumtabu[u]-*radius; last=cumtabu[u]+*radius; /* if x[first[u]] is tied, move to the first[u] member in the bin */ if (first[u]<=0) first[u]=1; else first[u] = cumtabx[first[u]-1]-tabx[first[u]-1]+1; /* if x[last] is tied and not the last in its bin, move to the previous bin */ if (last>*n) last=*n; else if (cumtabx[last-1] > last) last=cumtabx[last-1]-tabx[last-1]; size[u]=last-first[u]+1; } } int neworder (int *a, int *b){ if (*a < *b) return -1; else return 1;} void neighborsSRC(int *first, int *size, int *orderx, int *neighbors, int *nu){ int u,i,new,start=0; /* fill the neighborhoods */ new=0; for (u=0;u<*nu;u++){ for (i=0;i0],function(sp){ ord <- attr(Terms, "order")[sp$terms] if (any(ord > 1)) stop(paste(sp," can not be used in an interaction"),call.=FALSE)}) } special.frame <- lapply(spec,function(sp){ if (length(sp)) { sp.frame <- data[,sp$vars,drop=FALSE] names(sp.frame) <- extract.name.from.special(names(sp.frame)) sp.frame } else NULL}) all.varnames <- all.vars(delete.response(Terms)) unspecified <- all.varnames[!(all.varnames %in% unlist(lapply(special.frame,names)))] special.frame$unspecified <- data[,unspecified,drop=FALSE] special.frame } prodlim/R/lines.prodlim.R0000755000175100001440000000010313035633434015042 0ustar hornikuserslines.prodlim <- function(x,...){ plot.prodlim(x,...,add=TRUE) } prodlim/R/lifeTab.competing.risks.R0000755000175100001440000002741713035633434016770 0ustar hornikuserslifeTab.competing.risks <- function(object,times,cause,newdata,stats,intervals=FALSE,percent=TRUE,showTime=TRUE){ # {{{---------get the indices-------------------------- IndeX <- predict(object,newdata=newdata,level.chaos=0,times=times,type="list") # }}} # {{{--------------times------------------------------- times <- IndeX$times Ntimes <- IndeX$dimensions$time pindex <- IndeX$indices$time # }}} # {{{---------covariate strata-------------------------- Nstrata <- IndeX$dimensions$strata findex <- IndeX$indices$strata # }}} # {{{---------competing causes-------------------------- if (missing(cause)) causes <- attributes(object$model.response)$states else{ causes <- checkCauses(cause,object) } # }}} # {{{--------------stats------------------------------- if (missing(stats) || (!missing(stats) && is.null(stats))) stats <- list(c("n.event",0),c("n.lost",0)) else stats <- c(list(c("n.event",0),c("n.lost",0)),stats) # # }}} # {{{---------loop over causes-------------------------- # outList <- lapply(causes,function(cc){ # ---no. at atrisk, events, and censored------------------ if (intervals==FALSE){ if (is.null(object$clustervar)){ ## only one column for n.risk xxx <- .C("summary_prodlim",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk),nevent=as.integer(object$n.event[[cc]]),nlost=as.integer(object$n.lost),as.double(times),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") out <- data.frame(n.risk=xxx$pred.nrisk,n.event=xxx$pred.nevent,n.lost=xxx$pred.nlost) ## out <- data.frame(n.risk=xxx$pred.nrisk) } else{ xxx <- .C("summary_prodlim",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1]),nevent=as.integer(object$n.event[[cc]][,1]),nlost=as.integer(object$n.lost),as.double(times),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") out <- data.frame(n.risk=xxx$pred.nrisk,n.event=xxx$pred.nevent,n.lost=xxx$pred.nlost) ## out <- data.frame(n.risk=xxx$pred.nrisk) for (cv in 1:length(object$clustervar)) yyy <- .C("summary_prodlim",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1+cv]),nevent=as.integer(object$n.event[[cc]][,1+cv]),nlost=as.integer(object$n.lost[,1+cv]),as.double(times),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") outCV <- data.frame(n.risk=yyy$pred.nrisk,n.event=yyy$pred.nevent,n.lost=yyy$pred.nlost) ## outCV <- data.frame(n.risk=yyy$pred.nrisk) names(outCV) <- paste(object$clustervar,names(outCV)) out <- cbind(out,outCV) } } # }}} # {{{-------Intervals--------------------------- else{ #,---- #| get the no. at risk at the left limit of the interval #| and count events and censored excluding the left limit #`---- start <- min(min(object$time),0)-.1 lower <- c(start,times[-length(times)]) upper <- times lagTimes <- c(min(min(object$time),0)-.1 , times[-length(times)]) if (is.null(object$clustervar)){ ## only one column in n.event and n.risk xxx <- .C("life_table",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk),nevent=as.integer(object$n.event[[cc]]),nlost=as.integer(object$n.lost),as.double(lower),as.double(upper),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") out <- data.frame(n.risk=xxx$pred.nrisk,n.event=xxx$pred.nevent,n.lost=xxx$pred.nlost) } else{ xxx <- .C("life_table",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1]),nevent=as.integer(object$n.event[[cc]][,1]),nlost=as.integer(object$n.lost[,1]),as.double(lower),as.double(upper),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") out <- data.frame(n.risk=xxx$pred.nrisk,n.event=xxx$pred.nevent,n.lost=xxx$pred.nlost) lagxxx <- .C("life_table",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1]),nevent=as.integer(object$n.event[[cc]][,1]),nlost=as.integer(object$n.lost[,1]),as.double(lagTimes),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),intervals=as.integer(TRUE),NAOK=FALSE,PACKAGE="prodlim") out$n.risk <- lagxxx$pred.nrisk for (cv in 1:length(object$clustervar)){ yyy <- .C("life_table",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1+cv]),nevent=as.integer(object$n.event[[cc]][,1+cv]),nlost=as.integer(object$n.lost[,1+cv]),as.double(lower),as.double(upper),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") outCV <- data.frame(n.risk=yyy$pred.nrisk,n.event=yyy$pred.nevent,n.lost=yyy$pred.nlost) names(outCV) <- paste(object$clustervar,names(outCV)) out <- cbind(out,outCV) } } } # }}} # {{{ percent if (!is.null(stats)){ statsList <- lapply(stats,function(x){ if (percent==TRUE && (match(x[1],c("n.event","n.lost","n.risk"),nomatch=0)==0)){ if (x[1]=="surv") { # only one for all causes 100*as.numeric(c(x[2],object[[x[1]]])[pindex+1]) } else{ 100*as.numeric(c(x[2],object[[x[1]]][[cc]])[pindex+1]) } } else{ if (x[1]%in%c("surv","n.lost")) {# only one for all causes as.numeric(c(x[2],object[[x[1]]])[pindex+1]) } else{ as.numeric(c(x[2],object[[x[1]]][[cc]])[pindex+1]) } } }) names(statsList) <- sapply(stats,function(x)x[[1]]) add <- do.call("cbind",statsList) add <- add[,match(colnames(add),colnames(out),nomatch=FALSE)==0,drop=FALSE] if (NROW(out)==1) out <- data.frame(cbind(out,add)) else out <- cbind(out,add) } # }}} # {{{ split according to covariate strata---------------- if (!is.null(newdata) || Nstrata > 1) { split.cova <- rep(1:Nstrata,rep(Ntimes,Nstrata)) out <- split(out,split.cova) names(out) <- IndeX$names.strata out <- lapply(out,function(x){ x <- as.matrix(x) if (showTime==TRUE){ if (intervals==TRUE) x <- cbind(time0=c(0,round(times[-length(times)],2)),time1=times,x) else x <- cbind(time=times,x) rownames(x) <- 1:NROW(x) } else{ # times are rownames if (intervals==TRUE) rownames(x) <- paste("(",paste(c(0,round(times[-length(times)],2)),round(times,2),sep="-"),"]",sep="") else rownames(x) <- round(times,2) } x }) } else{ out <- as.matrix(out) if (showTime==TRUE){ if (intervals==TRUE) out <- cbind(time0=c(0,round(times[-length(times)],2)),time1=times,out) else out <- cbind(time=times,out) rownames(out) <- 1:NROW(out) } else{ # times are rownames if (intervals==TRUE) rownames(out) <- paste("(",paste(c(0,round(times[-length(times)],2)),round(times,2),sep="-"),"]",sep="") else rownames(out) <- round(times,2) } out } }) # }}} names(outList) <- causes outList } prodlim/R/Hist.R0000755000175100001440000004604613035633434013212 0ustar hornikusers#' Create an event history response variable #' #' Functionality for managing censored event history response data. The #' function can be used as the left hand side of a formula: \code{Hist} serves #' \code{\link{prodlim}} in a similar way as \code{\link{Surv}} from the #' survival package serves `survfit'. \code{Hist} provides the suitable #' extensions for dealing with right censored and interval censored data from #' competing risks and other multi state models. Objects generated with #' \code{Hist} have a print and a plot method. #' #' #' *Specification of the event times* #' #' If \code{time} is a numeric vector then the values are interpreted as right #' censored event times, ie as the minimum of the event times and the censoring #' times. #' #' If \code{time} is a list with two elements or data frame with two numeric #' columns The first element (column) is used as the left endpoints of interval #' censored observations and the second as the corresponding right endpoints. #' When the two endpoints are equal, then this observation is treated as an #' exact uncensored observation of the event time. If the value of the right #' interval endpoint is either \code{NA} or \code{Inf}, then this observation #' is treated as a right censored observation. Right censored observations can #' also be specified by setting the value of \code{event} to \code{cens.code}. #' This latter specification of right censored event times overwrites the #' former: if \code{event} equals \code{cens.code} the observation is treated #' as right censored no matter what the value of the right interval endpoint #' is. #' #' *Specification of the events* #' #' If \code{event} is a numeric, character or logical vector then the order of #' the attribute "state" given to the \code{value} of \code{Hist} is determined #' by the order in which the values appear. If it is a factor then the order #' from the levels of the factor is used instead. #' #' **Normal form of a multi state model** #' #' If \code{event} is a list or a data.frame with exactly two elements, then #' these describe the transitions in a multi state model that occurred at the #' corresponding \code{time} as follows: The values of the first element are #' interpreted as the \code{from} states of the transition and values of the #' second as the corresponding \code{to} states. #' #' **Longitudinal form of a multi state model** #' #' If \code{id} is given then \code{event} must be a vector. In this case two #' subsequent values of \code{event} belonging to the same value of \code{id} #' are treated as the \code{from} and \code{to} states of the transitions. #' #' @param time for right censored data a numeric vector of event times -- for #' interval censored data a list or a data.frame providing two numeric vectors #' the left and right endpoints of the intervals. See \code{Details}. #' @param event A vector or a factor that specifies the events that occurred at #' the corresponding value of \code{time}. Numeric, character and logical #' values are recognized. It can also be a list or a data.frame for the #' longitudinal form of storing the data of a multi state model -- see #' \code{Details}. #' @param entry Vector of delayed entry times (left-truncation) or list of two #' times when the entry time is interval censored. #' @param id Identifies the subjects to which multiple events belong for the #' longitudinal form of storing the data of a multi state model -- see #' \code{Details}. #' @param cens.code A character or numeric vector to identify the right #' censored observations in the values of \code{event}. Defaults to "0" which #' is equivalent to 0. #' @param addInitialState If TRUE, an initial state is added to all ids for the #' longitudinal input form of a multi-state model. #' @return An object of class \code{Hist} for which there are print and plot #' methods. The object's internal is a matrix with some of the following #' columns: \item{time}{ the right censored times} \item{L}{the left endpoints #' of internal censored event times} \item{R}{the right endpoints of internal #' censored event times} \item{status}{\code{0} for right censored, \code{1} #' for exact, and \code{2} for interval censored event times.} \item{event}{an #' integer valued numeric vector that codes the events.} \item{from}{an integer #' valued numeric vector that codes the \code{from} states of a transition in a #' multi state model.} \item{to}{an integer valued numeric vector that codes #' the \code{to} states of a transition in a multi state model.} #' #' Further information is stored in \code{\link{attributes}}. The key to the #' official names given to the events and the from and to states is stored in #' an attribute "states". #' @author Thomas A. Gerds \email{tag@@biostat.ku.dk}, Arthur Allignol #' \email{arthur.allignol@@fdm.uni-freiburg.de} #' @seealso \code{\link{plot.Hist}}, \code{\link{summary.Hist}}, #' \code{\link{prodlim}} #' @keywords survival #' @examples #' #' #' ## Right censored responses of a two state model #' ## --------------------------------------------- #' #' Hist(time=1:10,event=c(0,1,0,0,0,1,0,1,0,0)) #' #' ## change the code for events and censored observations #' #' Hist(time=1:10,event=c(99,"event",99,99,99,"event",99,"event",99,99),cens.code=99) #' #' TwoStateFrame <- SimSurv(10) #' SurvHist <- with(TwoStateFrame,Hist(time,status)) #' summary(SurvHist) #' plot(SurvHist) #' #' ## Right censored data from a competing risk model #' ## -------------------------------------------------- #' #' CompRiskFrame <- data.frame(time=1:10,event=c(1,2,0,3,0,1,2,1,2,1)) #' CRHist <- with(CompRiskFrame,Hist(time,event)) #' summary(CRHist) #' plot(CRHist) #' #' ## Interval censored data from a survival model #' icensFrame <- data.frame(L=c(1,1,3,4,6),R=c(2,NA,3,6,9),event=c(1,1,1,2,2)) #' with(icensFrame,Hist(time=list(L,R))) #' #' ## Interval censored data from a competing risk model #' with(icensFrame,Hist(time=list(L,R),event)) #' #' ## Multi state model #' MultiStateFrame <- data.frame(time=1:10, #' from=c(1,1,3,1,2,4,1,1,2,1), #' to=c(2,3,1,2,4,2,3,2,4,4)) #' with(MultiStateFrame,Hist(time,event=list(from,to))) #' #' ## MultiState with right censored observations #' #' MultiStateFrame1 <- data.frame(time=1:10, #' from=c(1,1,3,2,1,4,1,1,3,1), #' to=c(2,3,1,0,2,2,3,2,0,4)) #' with(MultiStateFrame1,Hist(time,event=list(from,to))) #' #' ## Using the longitudinal input method #' MultiStateFrame2 <- data.frame(time=c(0,1,2,3,4,0,1,2,0,1), #' event=c(1,2,3,0,1,2,4,2,1,2), #' id=c(1,1,1,1,2,2,2,2,3,3)) #' with(MultiStateFrame2,Hist(time,event=event,id=id)) #' #' @export "Hist" <- function(time, event, entry=NULL, id=NULL, cens.code="0", addInitialState=FALSE) { ## package Cprob provides a vector, ## to avoid problems we pick the first element cens.code <- as.character(cens.code[[1]]) # {{{ resolving the `time' argument if (is.matrix(time)) time <- data.frame(time) if (class(time)=="list"){ if (length(time) !=2 || length(time[[1]])!=length(time[[2]])) stop("Argument time has a wrong format") time <- data.frame(time) } if (is.data.frame(time)){ cens.type <- "intervalCensored" L <- time[[1]] R <- time[[2]] N <- length(L) stopifnot(is.numeric(L)) stopifnot(is.numeric(R)) stopifnot(all(L<=R || is.na(R))) status <- rep(2,N) status[L==R] <- 1 status[is.infinite(R) | is.na(R) | (L!=R & as.character(R)==cens.code)] <- 0 ## the last part of the condition achieves to things: ## 1. for multi-state models allow transitions to a censored state ## 2. to ignore this, if an event occured exactly at time 0 and 0 is the cens.code R[status==0] <- Inf } else{ stopifnot(is.numeric(time)) cens.type <- "rightCensored" N <- length(time) status <- rep(1,N) ## temporary dummy } # }}} # {{{ resolving the `entry' argument if (is.null(entry)) entry.type <- "" else{ if (is.matrix(entry)) entry <- data.frame(entry) if (class(entry)=="list"){ if (length(entry) !=2 || length(entry[[1]])!=length(entry[[2]])) stop("Argument entry has a wrong format") entry <- data.frame(entry) } if (is.data.frame(entry)){ entry.type <-"intervalCensored" U <- entry[[1]] V <- entry[[2]] stopifnot(is.numeric(U)) stopifnot(is.numeric(V)) stopifnot(all(!is.na(U))|all(!is.na(V))) } else{ stopifnot(is.numeric(entry)) if (is.null(id)) entry.type <- "leftTruncated" else entry.type <- "exact" }} ## check if entry < exit if (cens.type=="intervalCensored"){ if (entry.type=="intervalCensored"){ stopifnot(all(V<=L)) } else{ stopifnot(is.null(entry) || all(entry<=L)) } } else{ if (entry.type=="intervalCensored"){ stopifnot(all(V<=time)) } else{ stopifnot(is.null(entry) || all(entry<=time)) } } # }}} # {{{ resolving the argument `event' if (missing(event)){ model <- "onejump" event <- rep(1,N) warning("Argument event is missing:\nassume observations of a survival model\nand only one event per subject") } else{ if (is.matrix(event)) event <- data.frame(event) ## event can be an ordered factor ## in which case class has two elements ## to avoid warnings we need [[1]] if ((is.vector(event) & class(event)[[1]]!="list")|| is.factor(event)) stopifnot(length(event)==N) if (class(event)[[1]]=="list"){ if (length(event) !=2 || length(event[[1]])!=length(event[[2]])) stop("Argument event has a wrong format") event <- data.frame(event) } if (!is.data.frame(event)){ if (is.null(id)){ model <- "onejump" if (is.logical(event)) event <- as.numeric(event) status[is.na(event) | is.infinite(event) | as.character(event)==cens.code] <- 0 } else{ ## inFormat <- "longitudinal" stopifnot(is.numeric(id) || is.factor(id)) model <- "multi.states" if (cens.type=="intervalCensored"){ stop("Dont know the order of transitions for interval censored observations.") } else{ if (addInitialState==TRUE){ time <- c(rep(0,length(unique(id))),time) if (is.factor(event)){ event <- factor(c(rep("initial",length(unique(id))),as.character(event)),levels=c("initial",levels(event))) } else{ stopifnot(match("initial",unique(event),nomatch=0)==0) event <- c(rep("initial",length(unique(id))),event) } id <- c(unique(id),id) ## status <- c(rep(cens.code,length(unique(id))),status) } # 1. sort the observations by id and time sorted <- order(id,time) time <- time[sorted] ## status <- status[sorted] consists only of 1's id <- id[sorted] event <- event[sorted] # time <- time[duplicated(id)] ## remove the resp. first time # status <- status[duplicated(id)] if (length(unique(id))!=sum(time==0)) stop("There are ",length(unique(id))," different individuals (id's), but the state at time 0 is available for ",sum(time==0)," id's.") initialState <- event[time==0] last.id <- c(diff(id) != 0, 1) first.id <- c(1, diff(id) != 0) from <- factor(event[last.id!=1]) to <- factor(event[first.id!=1]) id <- id[time!=0] time <- time[time!=0] # 2. get back to the original order ### cannot easily get back since ### length(time) < sorted ## time <- time[sorted] ## id <- id[sorted] ## event <- event[sorted] status <- rep(1,length(to)) status[is.na(to) | is.infinite(to) | as.character(to)==cens.code] <- 0 } } } else{ ## inFormat <- "from2to" model <- "multi.states" from <- event[[1]] to <- event[[2]] status[is.na(to) | is.infinite(to) | as.character(to)==cens.code] <- 0 if (length(unique(from))==1){ model <- "onejump" event <- to if (is.logical(to)) to <- as.numeric(to) status[is.na(to) | is.infinite(to) | as.character(event)==cens.code] <- 0 } } } ## if (all(status==0)) warning("All observations are censored") if (all(status==1)) cens.type <- "uncensored" if(model=="onejump"){ # }}} # {{{ 2-state and competing.risks models if (is.factor(event)){ event <- factor(event) # drop unused levels states <- levels(event) ## states <- states[match(state.order,states)] } else{ states <- sort(as.character(unique(event))) } states <- as.character(states[states!=cens.code]) if (length(states)>1) model <- "competing.risks" else model <- "survival" if (cens.type=="intervalCensored"){ if (model=="survival"){ if (entry.type=="intervalCensored") history <- cbind(U=U,V=V,L=L,R=R,status=status) else history <- cbind(entry = entry,L=L,R=R,status=status) } else{ if (entry.type=="intervalCensored") history <- cbind(U=U, V=V, L=L, R=R, status=status, event=as.integer(factor(event,levels=c(states,cens.code)))) else history <- cbind(entry = entry, L=L, R=R, status=status, event=as.integer(factor(event,levels=c(states,cens.code)))) } } else{ if (model=="survival"){ if (entry.type=="intervalCensored") history <- cbind(U=U,V=V,time=time,status=status) else history <- cbind(entry = entry,time=time,status=status) } else{ if (entry.type=="intervalCensored") history <- cbind(U=U, V=V, time=time, status=status, event=as.integer(factor(event,levels=c(states,cens.code)))) else{ history <- cbind(entry = entry, time=time, status=status, event=as.integer(factor(event,levels=c(states,cens.code)))) } } } } else{ # }}} # {{{ multi.state models if (any(as.character(from)==as.character(to))) stop("Data contain transitions from state x to state x") eventISfactor <- as.numeric(is.factor(from)) + as.numeric(is.factor(to)) if (eventISfactor==1) stop("Components of event have different classes") if (eventISfactor==2) states <- unique(c(levels(from),levels(to))) else states <- as.character(unique(c(from,to))) states <- as.character(states[states!=cens.code]) ## states <- states[match(state.order,states)] if (cens.code %in% levels(from)){ stop(paste("The Cens.code", cens.code, " identifies censored data, but is found amoung the `from' state of some transitions")) } if (cens.type=="intervalCensored"){ if (entry.type=="intervalCensored") history <- cbind(U=U, V=V, L=L, R=R, status=status, from=as.integer(factor(from,levels=c(states,cens.code))), to=as.integer(factor(to,levels=c(states,cens.code)))) else{ history <- cbind(entry = entry, L=L, R=R, status=status, from=as.integer(factor(from,levels=c(states,cens.code))), to=as.integer(factor(to,levels=c(states,cens.code)))) } } else{ if (entry.type=="intervalCensored") history <- cbind(U=U, V=V, time=time, status=status, from=as.integer(factor(from,levels=c(states,cens.code))), to=as.integer(factor(to,levels=c(states,cens.code)))) else{ history <- cbind(entry = entry, time=time, status=status, from=as.integer(factor(from,levels=c(states,cens.code))), to=as.integer(factor(to,levels=c(states,cens.code)))) } } } # }}} # {{{ add id if (!is.null(id)) history <- cbind(history,id) # }}} # {{{ class and attributes rownames(history) <- NULL class(history) <- c("Hist") attr(history,"states") <- states attr(history,"cens.type") <- cens.type attr(history,"cens.code") <- as.character(cens.code) attr(history,"model") <- model ## print(entry.type) attr(history,"entry.type") <- entry.type history # }}} } subset.Hist <- function(x,subset,select,drop){ if (missing(select)){ xx <- x class(xx) <- "matrix" xx <- subset(xx,subset=subset,drop=drop) attr(xx,"class") <- attr(x,"class") attr(xx,"states") <- attr(x,"states") attr(xx,"model") <- attr(x,"model") attr(xx,"cens.type") <- attr(x,"cens.type") attr(xx,"cens.code") <- attr(x,"cens.code") attr(xx,"entry.type") <- attr(x,"entry.type") xx } else{ class(x) <- "matrix" NextMethod("subset") } } "[.Hist" <- function(x,i,j,drop=FALSE){ if (missing(j)){ xx <- x class(xx) <- "matrix" xx <- xx[i,,drop=drop] class(xx) <- "Hist" attr(xx,"class") <- attr(x,"class") attr(xx,"states") <- attr(x,"states") attr(xx,"model") <- attr(x,"model") attr(xx,"cens.type") <- attr(x,"cens.type") attr(xx,"cens.code") <- attr(x,"cens.code") attr(xx,"entry.type") <- attr(x,"entry.type") xx } else{ class(x) <- "matrix" ## x[i,j,drop=drop] NextMethod("[") } } # does not work # as.data.frame.Hist <- function(x,...){ # class(x) <- "matrix" # as.data.frame(x) # } is.na.Hist <- function(x) { as.vector( (1* is.na(unclass(x)))%*% rep(1, ncol(x)) >0) } str.Hist <- function(x){ class(x) <- "matrix" utils::str(x) } head.Hist <- function(x){ class(x) <- "matrix" utils::head(x) } prodlim/R/PetoInt.R0000755000175100001440000000213513035633434013654 0ustar hornikusers## Notation ## subject specific intervals ## number: N ## running index: i ## support (Peto) intervals ## number: M ## running index: m PetoInt<-function(L,R,status){ #Status: 0 right censored, 1 exact time, 2 interval cencored. #R[status==0] <- max(R)+1 #to ensure a right endpoint. #it is outcomented because this is done in compGMLE...R instead. names(L)[status!=1] <- 'L' names(R)[status!=1] <- 'R' names(L)[status==1] <- 'EL' names(R)[status==1] <- 'ER' peto.intervals <- c(L,R) level.int <- factor(names(peto.intervals),levels=c('R','EL','ER','L')) right.order <- order(peto.intervals,level.int) peto.intervals <- peto.intervals[right.order] tmp1 <- as.numeric(factor(names(peto.intervals), levels=c('R','EL','ER','L'))) int <- grep('^-3$', diff(tmp1)) #finds the intervals tmp2 <- as.numeric(factor(names(peto.intervals), levels=c('EL','R','L','ER'))) exa <- grep('^3$', diff(tmp2)) #finds the exact observations obs.no <- c(int,exa) tmp <- peto.intervals[sort(c(obs.no,obs.no+1))] out <- matrix(tmp,nrow=2) out } prodlim/R/findArrow.R0000755000175100001440000001422113035633434014224 0ustar hornikusersfindArrow <- function(Box1, Box2, Box1Dim, Box2Dim, verbose=FALSE){ left1 <- Box1[1] bottom1 <- Box1[2] left2 <- Box2[1] bottom2 <- Box2[2] width1 <- Box1Dim[1] height1 <- Box1Dim[2] width2 <- Box2Dim[1] height2 <- Box2Dim[2] # ############################ # #p3 p4 p5# # # # # # # # #p2 p6# # # # # # # # #p1 p8 p7# # ############################ box1 <- list(left=as.numeric(left1), right=as.numeric(left1+width1), mid.horizontal=as.numeric(left1+width1/2), bottom=as.numeric(bottom1), top=as.numeric(bottom1+height1), mid.vertical=as.numeric(bottom1+height1/2)) box1$p1 <- c(x=box1$left,y=box1$bottom) box1$p2 <- c(x=box1$left,y=box1$mid.vertical) box1$p3 <- c(x=box1$left,y=box1$top) box1$p4 <- c(x=box1$mid.horizontal,y=box1$top) box1$p5 <- c(x=box1$right,y=box1$top) box1$p6 <- c(x=box1$right,y=box1$mid.vertical) box1$p7 <- c(x=box1$right,y=box1$bottom) box1$p8 <- c(x=box1$mid.horizontal,y=box1$bottom) box2 <- list(left=as.numeric(left2), right=as.numeric(left2+width2), mid.horizontal=as.numeric(left2+width2/2), bottom=as.numeric(bottom2), top=as.numeric(bottom2+height2), mid.vertical=as.numeric(bottom2+height2/2)) box2$p1 <- c(x=box2$left,y=box2$bottom) box2$p2 <- c(x=box2$left,y=box2$mid.vertical) box2$p3 <- c(x=box2$left,y=box2$top) box2$p4 <- c(x=box2$mid.horizontal,y=box2$top) box2$p5 <- c(x=box2$right,y=box2$top) box2$p6 <- c(x=box2$right,y=box2$mid.vertical) box2$p7 <- c(x=box2$right,y=box2$bottom) box2$p8 <- c(x=box2$mid.horizontal,y=box2$bottom) ## boxwidth <- abs(box1$left-box1$right) ## boxheight <- abs(box1$top-box1$bottom) direction <- 1 if (box2$mid.horizontal bottom") out <- list(from=box1$p4,to=box2$p8) } else{ ######################### #### 1 #### | #### 2 ######################### if (verbose==TRUE) print("case 0: bottom -> top") out <- list(from=box1$p8,to=box2$p4) } } else{ ## if (box1$right<=box2$left){ if (box1$bottom<=box2$bottom){ if (box1$top >= box2$bottom){ ######################### #### 2 #### 1 -> #### ######################### if (verbose==TRUE) { print("case 2: mid.left -> mid.right") print(c(from=box1$p6,to=box2$p2)) } out <- list(from=box1$p6,to=box2$p2) ######################### ## THIS IS A SPECIAL CASE #### #### 1 -> 2 #### ######################### } else{ # box1$top < box2$bottom if ((box2$bottom-box1$top) <= (box2$left-box1$right)){ if ((box2$bottom-box1$top) <= .5*(box2$left-box1$right)){ ######################### #### -> 2 #### / #### 1 ######################### if (verbose==TRUE) print("case 3a: corner.left.top -> mid.right") out <- list(from=box1$p5,to=box2$p2) } else{ ######################### #### 2 #### / #### 1 ######################### if (verbose==TRUE) print("case 3b: corner.left.top -> corner.right.bottom") out <- list(from=box1$p5,to=box2$p1) } } else{ ######################### #### 2 #### / #### | #### 1 ######################### if (verbose==TRUE) print("case 4: top.left -> bottom.right") out <- list(from=box1$p4,to=box2$p8) } } } ## } else{ ## box1$bottom>box2$bottom if (box2$top>=box1$bottom){ ######################### #### #### 1 -> #### 2 ######################### if (verbose==TRUE) { print("case 5: mid.left -> mid.right") print(c(from=box1$p6,to=box2$p2)) } out <- list(from=box1$p6,to=box2$p2) } else{ if ((box1$bottom-box2$top) <= (box2$left-box1$right)){ ## print((box1$bottom-box2$top) <= .5*(box2$left-box1$right)) if ((box1$bottom-box2$top) <= .5*(box2$left-box1$right)){ ######################### #### 1 #### \ #### 2 ######################### if (verbose==TRUE) print("case 6a: corner.left.bottom -> mid.right") out <- list(from=box1$p7,to=box2$p2) } else{ ######################### #### 1 #### \ #### 2 ######################### if (verbose==TRUE) print("case 6b: corner.left.bottom -> corner.right.top") out <- list(from=box1$p7,to=box2$p3) } } else{ if (box1$bottom>=box2$top){ ######################### #### 1 #### \-> 2 #### ######################### if (verbose==TRUE) print("case 7: top.left -> bottom.right") out <- list(from=box1$p8,to=box2$p4) } } } } } if (direction==2){ names(out) <- c("to","from") } out } prodlim/R/plot.Hist.R0000755000175100001440000006713513035633434014171 0ustar hornikusers#' Box-arrow diagrams for multi-state models. #' #' Automated plotting of the states and transitions that characterize a multi #' states model. #' #' #' @param x An object of class \code{Hist}. #' @param nrow the number of graphic rows #' @param ncol the number of graphic columns #' @param stateLabels Vector of names to appear in the boxes (states). #' Defaults to attr(x,"state.names"). The boxes can also be individually #' labeled by smart arguments of the form \code{box3.label="diseased"}, see #' examples. #' @param arrowLabels Vector of labels to appear in the boxes (states). One for #' each arrow. The arrows can also be individually labeled by smart arguments #' of the form \code{arrow1.label=paste(expression(eta(s,u)))}, see examples. #' @param arrowLabelStyle Either "symbolic" for automated symbolic arrow #' labels, or "count" for arrow labels that reflect the number of transitions #' in the data. #' @param arrowLabelSymbol Symbol for automated symbolic arrow labels. Defaults #' to "lambda". #' @param changeArrowLabelSide A vector of mode logical (TRUE,FALSE) one for #' each arrow to change the side of the arrow on which the label is placed. #' @param tagBoxes Logical. If TRUE the boxes are numbered in the upper left #' corner. The size can be controlled with smart argument boxtags.cex. The #' default is boxtags.cex=1.28. #' @param startCountZero Control states numbers for symbolic arrow labels and #' box tags. #' @param oneFitsAll If \code{FALSE} then boxes have individual size, depending #' on the size of the label, otherwise all boxes have the same size dependent #' on the largest label. #' @param margin Set the figure margin via \code{par(mar=margin)}. Less than 4 #' values are repeated. #' @param cex Initial cex value for the state and the arrow \code{labels}. #' @param verbose If TRUE echo various things. #' @param \dots Smart control of arguments for the subroutines text (box #' label), rect (box), arrows, text (arrow label). Thus the three dots can be #' used to draw individual boxes with individual labels, arrows and arrow #' labels. E.g. arrow2.label="any label" changes the label of the second arrow. #' See examples. #' @note Use the functionality of the unix program `dot' #' http://www.graphviz.org/About.php via R package Rgraphviz to obtain more #' complex graphs. #' @author Thomas A Gerds \email{tag@@biostat.ku.dk} #' @seealso \code{\link{Hist}}\code{\link{SmartControl}} #' @keywords survival ##' @examples ##' ##' ##' ## A simple survival model ##' ##' SurvFrame <- data.frame(time=1:10,status=c(0,1,1,0,0,1,0,0,1,0)) ##' SurvHist <- with(SurvFrame,Hist(time,status)) ##' plot(SurvHist) ##' plot(SurvHist,box2.col=2,box2.label="experienced\nR user") ##' plot(SurvHist, ##' box2.col=2, ##' box1.label="newby", ##' box2.label="experienced\nR user", ##' oneFitsAll=FALSE, ##' arrow1.length=.5, ##' arrow1.label="", ##' arrow1.lwd=4) ##' ##' ## change the cex of all box labels: ##' plot(SurvHist, ##' box2.col=2, ##' box1.label="newby", ##' box2.label="experienced\nR user", ##' oneFitsAll=FALSE, ##' arrow1.length=.5, ##' arrow1.label="", ##' arrow1.lwd=4, ##' label.cex=1) ##' ##' ## change the cex of single box labels: ##' plot(SurvHist, ##' box2.col=2, ##' box1.label="newby", ##' box2.label="experienced\nR user", ##' oneFitsAll=FALSE, ##' arrow1.length=.5, ##' arrow1.label="", ##' arrow1.lwd=4, ##' label1.cex=1, ##' label2.cex=2) ##' ##' ##' ## The pbc data set from the survival package ##' library(survival) ##' data(pbc) ##' plot(with(pbc,Hist(time,status)), ##' stateLabels=c("randomized","transplant","dead"), ##' arrowLabelStyle="count") ##' ##' ## two competing risks ##' comprisk.model <- data.frame(time=1:3,status=1:3) ##' CRHist <- with(comprisk.model,Hist(time,status,cens.code=2)) ##' plot(CRHist) ##' plot(CRHist,arrow1.label=paste(expression(eta(s,u)))) ##' ##' plot(CRHist,box2.label="This\nis\nstate 2",arrow1.label=paste(expression(gamma[1](t)))) ##' plot(CRHist,box3.label="Any\nLabel",arrow2.label="any\nlabel") ##' ##' ## change the layout ##' plot(CRHist, ##' box1.label="Alive", ##' box2.label="Dead\n cause 1", ##' box3.label="Dead\n cause 2", ##' arrow1.label=paste(expression(gamma[1](t))), ##' arrow2.label=paste(expression(eta[2](t))), ##' box1.col=2, ##' box2.col=3, ##' box3.col=4, ##' nrow=2, ##' ncol=3, ##' box1.row=1, ##' box1.column=2, ##' box2.row=2, ##' box2.column=1, ##' box3.row=2, ##' box3.column=3) ##' ##' ## more competing risks ##' comprisk.model2 <- data.frame(time=1:4,status=1:4) ##' CRHist2 <- with(comprisk.model2,Hist(time,status,cens.code=2)) ##' plot(CRHist2,box1.row=2) ##' ##' ## illness-death models ##' illness.death.frame <- data.frame(time=1:4, ##' from=c("Disease\nfree", ##' "Disease\nfree", ##' "Diseased", ##' "Disease\nfree"), ##' to=c("0","Diseased","Dead","Dead")) ##' IDHist <- with(illness.death.frame,Hist(time,event=list(from,to))) ##' plot(IDHist) ##' ##' ## illness-death with recovery ##' illness.death.frame2 <- data.frame(time=1:5, ##' from=c("Disease\nfree","Disease\nfree","Diseased","Diseased","Disease\nfree"), ##' to=c("0","Diseased","Disease\nfree","Dead","Dead")) ##' IDHist2 <- with(illness.death.frame2,Hist(time,event=list(from,to))) ##' plot(IDHist2) ##' ##' ## 4 state models ##' x=data.frame(from=c(1,2,1,3,4),to=c(2,1,3,4,1),time=1:5) ##' y=with(x,Hist(time=time,event=list(from=from,to=to))) ##' plot(y) ##' ##' ## moving the label of some arrows ##' ##' d <- data.frame(time=1:5,from=c(1,1,1,2,2),to=c(2,3,4,3,4)) ##' h <- with(d,Hist(time,event=list(from,to))) ##' plot(h, ##' tagBoxes=TRUE, ##' stateLabels=c("Remission\nwithout\nGvHD", ##' "Remission\nwith\nGvHD", ##' "Relapse", ##' "Death\nwithout\nrelapse"), ##' arrowLabelSymbol='alpha', ##' arrowlabel3.x=35, ##' arrowlabel3.y=53, ##' arrowlabel4.y=54, ##' arrowlabel4.x=68) ##' ##' ##' #' @export plot.Hist <- function(x, nrow, ncol, stateLabels, arrowLabels, arrowLabelStyle="symbolic", arrowLabelSymbol='lambda', changeArrowLabelSide, tagBoxes=FALSE, startCountZero=TRUE, oneFitsAll, margin, cex, verbose=FALSE, ...){ # {{{ margin oldmar <- par()$mar oldoma <- par()$oma par(oma=c(0,0,0,0)) oldxpd <- par()$xpd if (!missing(margin)){ par(mar=rep(margin,length.out=4),xpd=TRUE) } else par(mar=c(0,0,0,0),xpd=TRUE) # }}} # {{{ find states model.type <- attr(x,"model") states <- attr(x,"states") origStates <- states if (model.type!="multi.states"){ ## need an initial state states <- c("initial", states) } NS <- length(states) if (missing(stateLabels)){ if (all(as.character(as.numeric(as.factor(origStates)))==origStates)) ## make nice state boxlabels if states are integers stateLabs <- switch(model.type,"survival"=paste(c("","Event"),states),"competing.risks"=paste(c("",rep("Cause",NS-1)),states),paste("State",states)) else stateLabs <- states } else{ if(length(stateLabels)==NS-1){ stateLabs <- c("initial",stateLabels) } else{ if (length(stateLabels)==NS){ stateLabs <- stateLabels } else{ stop("Wrong number of state names.") } } } ## forcedLabels thecall <- match.call(expand.dots=TRUE) labelhits <- match(paste("box",1:NS,".label",sep=""),names(thecall),nomatch=0) for (i in 1:NS){ if (labelhits[i]!=0) ## may be language: thecall[[labelhits[i]]] ## if user specifies box2.label=c("Event 1") ## instead of box2.label="Event 1" stateLabs[i] <- eval(thecall[[labelhits[i]]])[1] } numstates <- as.numeric(as.character(factor(states,levels=states,labels=1:NS))) startCountZero <- TRUE if (startCountZero) numstateLabels <- numstates-1 else numstateLabels <- numstates # {{{ find transitions between the states ## first remove the censored lines from the transition matrix ## x <- x[x[,"status"]!=attr(x,"cens.code"),,drop=FALSE] x <- x[x[,"status"]!=0,,drop=FALSE] if (NROW(x)==0) stop("No uncensored transitions.") sumx <- summary(x,verbose=verbose) notCensored <- sumx$trans.frame$to!="unknown" sumx$trans.frame <- sumx$trans.frame[notCensored,] sumx$transitions <- sumx$transitions[notCensored] transitions <- sumx$trans.frame ordered.transitions <- unique(transitions) N <- NROW(ordered.transitions) # }}} # }}} # {{{ default layout: arranging the boxes state.types <- sumx$states state.types <- state.types[state.types>0] if (missing(nrow)) if (model.type=="multi.states") nrow <- NS else if (ceiling(NS/2)==floor(NS/2)) nrow <- NS-1 else nrow <- NS if (missing(ncol)) if (model.type=="multi.states") ncol <- NS else ncol <- 2 ## placing boxes in rows and columns if (model.type=="multi.states"){ adjustRowsInColumn <- rep(0,ncol) adjustColsInRow <- rep(0,nrow) box.col <- switch(as.character(NS), "2"=c(1,ncol), "3"=c(1,2,ncol), "4"=c(1,1,ncol,ncol), "5"=c(1,1,ceiling((ncol-1)/2),ncol,ncol), "6"=c(1,3,3,5,6,6)) box.row <- switch(as.character(NS), "2"=c(1,1), "3"=c(nrow,1,nrow), "4"=c(1,nrow,1,nrow), "5"=c(1,nrow,ceiling(nrow/2),1,nrow), "6"=c(3,1,6,4,1,6)) } else{ # survival or competing risks ## adjustRowsInColumn <- rep(1,ncol) ## adjustColsInRow <- rep(1,nrow) if (ceiling(NS/2)==floor(NS/2)){ ## equal number of states and unequal number of absorbing states box.col <- c(1,rep(ncol,NS-1)) box.row <- c(NS/2,1:(NS-1)) } else{ box.col <- c(1,rep(ncol,NS-1)) box.row <- c((NS+1)/2,(1:NS)[-(NS+1)/2]) } } if (is.null(box.row) || is.null(box.col)) stop("Please specify the layout for this ",NS," state model (") layoutDefaults <- data.frame(name=paste("box",1:NS,sep=""), row=box.row, column=box.col, stringsAsFactors=FALSE) layoutDefaultList <- lapply(1:NS,function(x)layoutDefaults[x,-1,drop=FALSE]) names(layoutDefaultList) <- layoutDefaults$name layout <- SmartControl(list(...), keys=c(layoutDefaults$name), defaults=c(layoutDefaultList), ignore.case=TRUE, replaceDefaults=FALSE, verbose=FALSE) # }}} # {{{ draw empty frame # plot Xlim <- 100 Ylim <- 100 plot(0,0,type="n",xlim=c(0,Xlim),ylim=c(0,Ylim),xlab="",ylab="",axes=FALSE) ## backGround(c(0,100),c(0,100),bg="yellow") # }}} # {{{ default values if (missing(cex)) theCex <- 2 else theCex <- cex if (found <- match("arrowLabel.cex",names(thecall),nomatch=0)) arrowLabel.cex <- thecall[[found]] else arrowLabel.cex <- rep(theCex,N) ## boxes boxDefaults <- data.frame(name=paste("box",1:NS,sep=""),xpd=TRUE,stringsAsFactors=FALSE) ## box labels boxLabelDefaults <- data.frame(name=paste("label",1:NS,sep=""),stringsAsFactors=FALSE,label=stateLabs) ## arrows arrowDefaults <- data.frame(name=paste("arrow",1:N,sep=""),code=2,lwd=1,headoffset=strwidth("ab",cex=arrowLabel.cex),length=.13,stringsAsFactors=FALSE) arrowDefaults <- cbind(arrowDefaults,ordered.transitions) ## arrowlabels if (missing(changeArrowLabelSide)) changeArrowLabelSide <- rep(FALSE,N) arrowlabelDefaults <- data.frame(name=paste("arrowlabel",1:N,sep=""), label=arrowLabelStyle, x=NA, y=NA, stringsAsFactors=FALSE, cex=arrowLabel.cex) arrowlabelDefaults <- cbind(arrowlabelDefaults,ordered.transitions) arrowlabelDefaults$numfrom <- factor(arrowlabelDefaults$from,levels=states,labels=numstateLabels) arrowlabelDefaults$numto <- factor(arrowlabelDefaults$to,levels=states,labels=numstateLabels) if (missing(arrowLabels)){ arrowLabels <- NULL } arrowLabels.p <- TRUE if (length(arrowLabels)>0 &&is.logical(arrowLabels) && arrowLabels==FALSE){ arrowLabels <- rep("",N) arrowLabels.p <- FALSE } else{ if (length(arrowLabels)==0){ arrowLabels <- lapply(1:N,function(i){ bquote(paste(expression(.(as.name(arrowLabelSymbol))[.(paste(as.character(arrowlabelDefaults$numfrom[i]), as.character(arrowlabelDefaults$numto[i]), sep=""))](t)))) }) } else{ stopifnot(length(arrowLabels)==N) } } arrowlabelhits <- match(paste("arrow",1:N,".label",sep=""),names(thecall),nomatch=0) for (i in 1:N){ if (arrowlabelhits[i]!=0){ arrowLabels[[i]] <- thecall[[arrowlabelhits[i]]] } } # }}} # {{{ compute box dimensions relative to cex of box labels ## to find the cex for the box labels, first initialize boxLabelCex <- rep(theCex,NS) ## then look for label.cex if (theLabelCex <- match("label.cex",names(thecall),nomatch=0)){ boxLabelCex <- rep(thecall[[theLabelCex]],NS) } # finally adjust for box individual values if (any(iLabelCex <- match(paste("label",1:NS,".cex",sep=""),names(thecall),nomatch=0))){ for (i in 1:NS){ if ((argi <- iLabelCex[i])!=0) boxLabelCex[i] <- thecall[[argi]] } } ## state.cex <- max(boxLabelCex) if (length(boxLabelCex) Xlim) warning("The horizontal dimensions of the boxes are too big -- change layout or tune parameters `label.cex' and/or `xbox.rule'.") ## if ((nrow * box.height) > Ylim) warning("The verticalf dimensions of the boxes are too big -- change layout or tune parameters `label.cex' and/or `ybox.rule'.") } else{ box.width <- state.width + strwidth("ab",cex=boxLabelCex) box.height <- state.height + strwidth("ab",cex=boxLabelCex) } if (length(box.height)==1) box.height <- rep(box.height,NS) if (length(box.width)==1) box.width <- rep(box.width,NS) # }}} # {{{ arrange the boxes in the layout boxCol <- sapply(layout,function(x){x$column}) if (any(boxCol>ncol)) ncol <- max(boxCol) boxRow <- sapply(layout,function(x){x$row}) if (any(boxRow>ncol)) nrow <- max(boxRow) ybox.position <- numeric(NS) names(ybox.position) <- paste("box",numstates,sep="") # {{{y box positions for (x in 1:ncol){ ## For each column find y positions for boxes boxesInColumn <- names(boxCol)[boxCol==x] boxesInColumnNumbers <- as.numeric(sapply(strsplit(boxesInColumn,"box"),function(x)x[[2]])) if (length(boxesInColumn)>0){ ## if (adjustRowsInColumn[x]==1 && all(match(paste(boxesInColumn,"row",sep="."),names(thecall),nomatch=0)==0)){ # adjust the y position of the boxes according to the number of boxes in column ## yPossible <- centerBoxes(Ylim,box.height[boxesInColumnNumbers],nrow,boxRow[boxesInColumn]) ## for (b in 1:length(boxesInColumn)) ## ybox.position[boxesInColumn[b]] <- yPossible[b] ## } ## else{ yPossible <- centerBoxes(Ylim,box.height[boxesInColumnNumbers],nrow,boxRow[boxesInColumn]) for (b in 1:length(boxesInColumn)){ ybox.position[boxesInColumn[b]] <- yPossible[b] ## } } } } ## row 1 is on top but the y-axis starts at the button ## therefore need to transform ybox.position <- 100-(ybox.position+box.height) # }}} # {{{x box positions xbox.position <- numeric(NS) names(xbox.position) <- paste("box",numstates,sep="") for (x in 1:nrow){ ## For each row find x positions for boxes boxesInRow <- names(boxRow)[boxRow==x] boxesInRowNumbers <- as.numeric(sapply(strsplit(boxesInRow,"box"),function(x)x[[2]])) if (length(boxesInRow)>0){ ## if (adjustColsInRow[x]==1 && all(match(paste(boxesInRow,"row",sep="."),names(thecall),nomatch=0)==0)){ # adjust the x position of the boxes according to the number of boxes in row ## xpossible <- centerBoxes(Ylim,box.height[boxesInRowNumbers],ncol,boxCol[boxesInRow]) ## for (b in 1:length(boxesInRow)) ## xbox.position[boxesInRow[b]] <- xpossible[b] ## } ## else{ if (sum(box.width[boxesInRowNumbers])>Xlim) stop(paste("Sum of box widths in row",x,"exceed limit",Xlim)) xpossible <- centerBoxes(Xlim,box.width[boxesInRowNumbers],ncol,boxCol[boxesInRow]) ## if (any(xpossible<0)) browser() for (b in 1:length(boxesInRow)){ xbox.position[boxesInRow[b]] <- xpossible[b] } ## } } } # }}} xtext.position <- xbox.position + (box.width - state.width)/2 ytext.position <- ybox.position + (box.height - state.height)/2 if (verbose){ cat("\n\nBoxlabel data:\n\n") print(data.frame(stateLabs, boxCol, boxRow, x.pos=round(xbox.position,2), y.pos=round(ybox.position,2), width=round(box.width,2), label.width=round(state.width,2), label.height=round(state.height,2), boxLabelCex)) } boxDefaults <- cbind(boxDefaults,xleft=xbox.position,ybottom=ybox.position,xright=xbox.position+box.width,ytop=ybox.position+box.height) boxLabelDefaults <- cbind(boxLabelDefaults, x=xtext.position, y=ytext.position, cex=boxLabelCex) # }}} # {{{ compute arrow positions doubleArrow <- match(paste(arrowDefaults[,"to"],arrowDefaults[,"from"]),paste(arrowDefaults[,"from"],arrowDefaults[,"to"]),nomatch=0) arrowDefaults <- cbind(arrowDefaults,doubleArrow) arrowList <- for (trans in 1:N){ from.state <- factor(ordered.transitions[trans,1],levels=states,labels=numstates) to.state <- factor(ordered.transitions[trans,2],levels=states,labels=numstates) ArrowPositions <- findArrow(Box1=c(round(xbox.position[from.state],4),round(ybox.position[from.state],4)), Box2=c(round(xbox.position[to.state],4),round(ybox.position[to.state],4)), Box1Dim=c(box.width[from.state],box.height[from.state]), Box2Dim=c(box.width[to.state],box.height[to.state]), verbose=FALSE) Len <- function(x){sqrt(sum(x^2))} from <- ArrowPositions$from to <- ArrowPositions$to ArrowDirection <- to-from ArrowDirection <- ArrowDirection/Len(ArrowDirection) ## perpendicular direction PerDir <- rev(ArrowDirection)*c(1,-1)/Len(ArrowDirection) ## shift double arrows dd <- arrowDefaults[trans,"doubleArrow"] if (dd!=0){ dist <- strwidth(".",cex=arrowLabel.cex) arrowDefaults[trans,"headoffset"]+dist if (dd>trans){ from <- from + sign(PerDir) * c(dist,dist) to <- to + sign(PerDir) * c(dist,dist) } else{ from <- from + sign(PerDir) * c(dist,dist) to <- to + sign(PerDir) * c(dist,dist) } } # shift the start and end points of arrows by ArrowHeadOffset ArrowHeadOffset <- arrowDefaults[trans,"headoffset"] from <- from+sign(ArrowDirection)*c(ArrowHeadOffset,ArrowHeadOffset)*abs(ArrowDirection) to <- to-sign(ArrowDirection)*c(ArrowHeadOffset,ArrowHeadOffset)*abs(ArrowDirection) arrowDefaults[trans,"x0"] <- from[1] arrowDefaults[trans,"x1"] <- to[1] arrowDefaults[trans,"y0"] <- from[2] arrowDefaults[trans,"y1"] <- to[2] ## shift arrow label perpendicular (left) to arrow direction offset <- strwidth(".",cex=arrowLabel.cex) ArrowMid <- (to+from)/2 ## points(x=ArrowMid[1],y=ArrowMid[2],col=3,pch=16) if (changeArrowLabelSide[trans]==TRUE) ArrowLabelPos <- ArrowMid - sign(PerDir) * c(offset,offset) else ArrowLabelPos <- ArrowMid + sign(PerDir) * c(offset,offset) try1 <- try(mode((arrowLabels[[trans]])[2])[[1]]=="call",silent=TRUE) ## try2 <- try(as.character(arrowLabels[[trans]])[[1]]=="paste",silent=TRUE) labIsCall <- (class(try1)!="try-error" && try1) ## labUsePaste <- (class(try2)!="try-error" && try2) if (labIsCall){ # symbolic label arrowLabels[[trans]] <- ((arrowLabels[[trans]])[2])[[1]][[2]] } ## relative label height lab <- arrowLabels[[trans]] labelHeight <- strheight(lab,cex=arrowlabelDefaults[trans,"cex"]) ## relative label width labelWidth <- strwidth(lab,cex=arrowlabelDefaults[trans,"cex"]) ## shift further according to label height and width in perpendicular direction if (changeArrowLabelSide[trans]==TRUE) ArrowLabelPos <- ArrowLabelPos-sign(PerDir)*c(labelWidth/2,labelHeight/2) else ArrowLabelPos <- ArrowLabelPos+sign(PerDir)*c(labelWidth/2,labelHeight/2) arrowlabelDefaults[trans,"x"] <- ArrowLabelPos[1] arrowlabelDefaults[trans,"y"] <- ArrowLabelPos[2] } # }}} # {{{ Smart argument control boxDefaultList <- lapply(1:NS,function(x)boxDefaults[x,-1,drop=FALSE]) names(boxDefaultList) <- boxDefaults$name boxLabelDefaultList <- lapply(1:NS,function(x)boxLabelDefaults[x,-1,drop=FALSE]) names(boxLabelDefaultList) <- boxLabelDefaults$name arrowDefaultList <- lapply(1:N,function(x)arrowDefaults[x,-1,drop=FALSE]) names(arrowDefaultList) <- as.character(arrowDefaults$name) arrowlabelDefaultList <- lapply(1:N,function(x)arrowlabelDefaults[x,-1,drop=FALSE]) names(arrowlabelDefaultList) <- as.character(arrowlabelDefaults$name) boxTagsDefaultList <- list(labels=numstateLabels,cex=1.28,adj=c(-.5,1.43)) smartArgs <- SmartControl(list(...), keys=c(boxDefaults$name, boxLabelDefaults$name, as.character(arrowDefaults$name), as.character(arrowlabelDefaults$name), "boxtags"), defaults=c(boxLabelDefaultList,arrowDefaultList,arrowlabelDefaultList,boxDefaultList,list("boxtags"=boxTagsDefaultList)), ignore.case=TRUE, replaceDefaults=FALSE, verbose=verbose) # }}} # {{{ draw the boxes for (i in 1:NS) { suppressWarnings(do.call("rect",smartArgs[[paste("box",i,sep="")]])) } # }}} # {{{ label the boxes for (i in 1:NS) { suppressWarnings(do.call("text",c(list(adj=c(0,0)),smartArgs[[paste("label",i,sep="")]]))) } # }}} # {{{ draw the arrows for (i in 1:N){ suppressWarnings(do.call("arrows",c(smartArgs[[paste("arrow",i,sep="")]]))) } # }}} # {{{ label the arrows if (verbose) arrowLabel.data <- NULL if (arrowLabels.p==TRUE){ for (i in 1:N){ labelList <- smartArgs[[paste("arrowlabel",i,sep="")]] if (verbose) arrowLabel.data <- rbind(arrowLabel.data,cbind("arrowLabel"=i,data.frame(labelList))) switch(labelList$label,"symbolic"={ ## lab <- (arrowLabels[[i]]) try1 <- try(mode((arrowLabels[[i]])[2])[[1]]=="call",silent=TRUE) ## try2 <- try(as.character(arrowLabels[[i]])[[1]]=="paste",silent=TRUE) labIsCall <- (class(try1)!="try-error" && try1) suppressWarnings(do.call("text",c(list(labels=bquote(arrowLabels[[i]])),labelList))) }, "count"={ tabTrans <- as.matrix(table(transitions)) lab <- paste("n=",tabTrans[as.character(labelList$from),as.character(labelList$to)]) suppressWarnings(do.call("text",c(list(labels=quote(lab)),labelList))) }) ## suppressWarnings(do.call("text",c(list(adj=c(labelWidth/2,labelHeight/2),labels="label"),smartArgs[[paste("arrowlabel",i,sep="")]]))) } } if (verbose) { cat("\n\nArrow label data:\n\n") print(arrowLabel.data) } # }}} # {{{ put numbers in the upper left corner of the boxes (if wanted) if (tagBoxes==TRUE){ tagList <- smartArgs$boxtags nix <- lapply(1:NS,function(b) { lab <- tagList[b] text(x=xbox.position[b], y=ybox.position[b]+box.height, labels=tagList$labels[b], cex=tagList$cex, adj=tagList$adj)}) } # }}} # {{{ reset margin par(mar=oldmar,xpd=oldxpd,oma=oldoma) # }}} if (verbose){ cat("\nRelevel the factor 'event' in the dataset which defines the Hist object,\nto change the order of the boxes.\n") } invisible(smartArgs) } position.finder <- function(border,len,n){ ## distribute the boxes of lenght len uniformly ## over [0,border] if (n==1) (border - len)/2 else{ seq(0,border-.5*len,len + (border-(n * len))/(n-1)) } } centerBoxes <- function(border,len,ncell,pos){ ## box i has length len[i] and is centered in cell pos[i] ## return the position in [0,border] of the lower ## border of the boxes cellwidth <- border/ncell nboxes <- length(len) if ((luft <- border-sum(len))<0) stop("sum of box dimensions exceeds limits") if (nboxes>ncell) stop("too many boxes in one row") ## case: all boxes fit into given cell width ## if (all(len1 && pos[b]==1) # at the left/lower border bp <- min(0,abs(box.pos[b])) if (ncell> 1 && pos[b]==ncell)# at the right/upper border bp <- max(border-len[b],box.pos[b]) bp }) ## }else{ ## ## case: at least one box exceeds the cellwidth ## between <- luft/(nboxes-1) ## boxPos <- c(0,len[-nboxes]+between) ## } boxPos } ## positionFinder <- function(border,len,n){ ## distribute the whitespace between the boxes ## instead of the boxes ## wspace <- border-sum(len) ## if (n==1) ## (border - len)/2 ## else{ ## seq(0,border-.5*len,len + (border-(n * len))/(n-1)) ## } ## } prodlim/R/leaveOneOut.R0000644000175100001440000001037413035633434014521 0ustar hornikusers#' Compute leave-one-out estimates #' #' This function is the work-horse for \code{jackknife} #' @title Compute jackknife pseudo values. #' @aliases leaveOneOut leaveOneOut.survival leaveOneOut.competing.risks #' @author Thomas Alexander Gerds #' @seealso \code{\link{jackknife}} #' #' @param object Object of class \code{"prodlim"}. #' @param times time points at which to compute leave-one-out #' event/survival probabilities. #' @param cause For competing risks the cause of interest. #' @param lag For survival models only. If \code{TRUE} lag the result, i.e. compute #' S(t-) instead of S(t). #' @param ... not used #' @export leaveOneOut <- function(object,times,cause,lag=FALSE,...){ if (object$model=="survival") leaveOneOut.survival(object=object,times=times,lag=lag,...) else if (object$model=="competing.risks") leaveOneOut.competing.risks(object=object,times=times,cause=cause,...) else stop("No method for jackknifing this object.") } #' @export leaveOneOut.survival <- function(object,times,lag=FALSE,...){ stopifnot(object$covariate.type==1) mr <- object$model.response time <- object$time Y <- object$n.risk D <- object$n.event Y <- Y[D>0] time <- time[D>0] D <- D[D>0] NU <- length(time) obstimes <- mr[,"time"] status <- mr[,"status"] N <- length(obstimes) ## S <- predict(object,times=time,newdata=mr) ## idea: find the at-risk set for pseudo-value k by ## substracting 1 in the period where subj k is ## at risk. need the position of obstime.k in time ... ## pos <- match(obstimes,time) ## if (useC==TRUE){ loo <- .C("loo_surv", Y = as.double(Y), D=as.double(D), time=as.double(time), obsT=as.double(obstimes), status=as.double(status), S=double(NU*N), N=as.integer(N), NT=as.integer(NU), PACKAGE="prodlim")$S out <- matrix(loo,nrow=N,ncol=NU,byrow=FALSE) ## } ## else{ pos <- sindex(jump.times=time,eval.times=obstimes) ## loo2 <- do.call("rbind",lapply(1:N,function(k){ ## Dk <- D ## if (status[k]==1) Dk[pos[k]] <- Dk[pos[k]]-1 ## Yk <- Y-c(rep(1,pos[k]),rep(0,NU-pos[k])) ## cumprod(1-Dk/Yk)})) ## } ## out <- loo if (!missing(times)){ found <- sindex(jump.times=time,eval.times=times)+1 if (lag==FALSE) out <- cbind(1,out)[,found,drop=TRUE] else out <- cbind(1,cbind(1,out))[,found,drop=TRUE] } out } #' @export leaveOneOut.competing.risks <- function(object,times,cause,...){ stopifnot(object$covariate.type==1) mr <- object$model.response states <- attr(mr,"states") if (missing(cause)) { C <- 1 cause <- states[1] } else{ C <- match(cause,states,nomatch=0) if (length(C)>1 || C==0) stop("Cause must match exactly one of the names of object$n.event.") } D <- object$n.event[[C]] # it is sufficient to consider time points where events occur time <- object$time[D>0] Y <- object$n.risk[D>0] sFit <- prodlim(Hist(time,status)~1,data=data.frame(unclass(mr))) S <- sFit$surv[D>0] D <- D[D>0] lagSk <- leaveOneOut.survival(sFit,times=time,lag=1) NU <- length(time) obstimes <- mr[,"time"] status <- mr[,"status"] E <- getEvent(mr) N <- length(obstimes) ## idea: see leaveOneOut.survival ## browser() ## if (useC==TRUE){ ## print(cbind(time=time,Y=Y,D=D)) loo <- .C("loo_comprisk", Y = as.double(Y), D=as.double(D), time=as.double(time), obsT=as.double(obstimes), status=as.double(status*(E==cause)), lagSurv=as.double(lagSk), F=double(NU*N), N=as.integer(N), NT=as.integer(NU), PACKAGE="prodlim")$F out <- matrix(loo,nrow=N,ncol=NU,byrow=FALSE) ## browser() ## } ## else{ ## pos <- sindex(jump.times=time,eval.times=obstimes) ## loo <- do.call("rbind",lapply(1:N,function(k){ ## Dk <- D ## if (status[k]==1 && E[k]==cause) Dk[pos[k]] <- Dk[pos[k]]-1 ## Yk <- Y-c(rep(1,pos[k]),rep(0,NU-pos[k])) ## Sk <- as.numeric(lagSk[k,,drop=TRUE]) ## Hk <- Dk/Yk ## Fk <- cumsum(Sk*Hk) ## Fk ## })) ## out <- loo ## } if (!missing(times)){ found <- sindex(jump.times=time,eval.times=times)+1 out <- cbind(0,out)[,found,drop=TRUE] } out } prodlim/R/getStates.R0000644000175100001440000000116313035633434014232 0ustar hornikusers##' Extract the states of a multi-state model ##' ##' Applying this function to the fit of prodlim means to apply ##' it to \code{fit$model.response}. ##' @title States of a multi-state model ##' @param object Object of class \code{prodlim} or \code{Hist} . ##' @param ... not used ##' @return A character vector with the states of the model. ##' @author Thomas A. Gerds #' @export getStates <- function(object,...){ UseMethod("getStates",object) } #' @export getStates.Hist <- function(object,...){ attr(object,"states") } #' @export getStates.prodlim <- function(object,...){ attr(object$model.response,"states") } prodlim/R/print.prodlim.R0000755000175100001440000000641413035633434015077 0ustar hornikusers#' Print objects in the prodlim library #' #' Pretty printing of objects created with the functionality of the `prodlim' #' library. #' #' #' @aliases print.prodlim print.neighborhood print.Hist #' @param x Object of class \code{prodlim}, \code{Hist} and #' \code{neighborhood}. #' @param \dots Not used. #' @author Thomas Gerds #' @seealso \code{\link{summary.prodlim}}, \code{\link{predict.prodlim}} #' @keywords survival #' @export "print.prodlim" <- function(x,...) { cat("\n") cat("Call: ") print(x$call) cat("\n") model <- x$model ## message("Estimation method:") if (!(model %in% c("survival","competing.risks"))) stop("Under construction") if (model=="survival") if (x$cens.type=="intervalCensored"){ message(switch(x$covariate.type,"NPMLE", "Stratified NPMLE estimator", "Stratified NPMLE estimator", "Stratified NPMLE estimator")," for the",ifelse(x$covariate.type==1," "," conditional "),ifelse(x$reverse==FALSE,"event time ","censoring time "),"survival function") message(paste("\nIteration steps:",x$n.iter,"\n")) ## summary(x) cat("\n") } else{ message(switch(x$covariate.type,"Kaplan-Meier estimator", "Stratified Kaplan-Meier estimator", "Stone-Beran estimator", "Stratified Stone-Beran estimator")," for the",ifelse(x$covariate.type==1," "," conditional "),ifelse(x$reverse==FALSE,"event time ","censoring time "),"survival function") } cat("\n") ## discrete.predictors <- extract.name.from.special(grep("strata.",names(x$X),value=TRUE),pattern="strata\\.") ## continuous.predictors <- extract.name.from.special(grep("NN.",names(x$X),value=TRUE),pattern="NN\\.") discrete.predictors <- x$discrete.predictors continuous.predictors <- x$continuous.predictors if (!is.null(x$cluster)) message("\nCluster-correlated data:\n\n cluster variable: ",x$cluster,"\n") format.disc <- function(name){ paste(name," (", paste(x$xlevels[[name]],collapse=", ",sep=""),")", collapse=", ",sep="") } message(#"Predictor space:\n\n", switch(x$covariate.type, "No covariates",{ if (length(discrete.predictors)==1){ c("Discrete predictor variable: ", format.disc(discrete.predictors)) }else{ c("Discrete predictor variables:\n", sapply(discrete.predictors,function(x)paste("\n - ",format.disc(x)))) }}, c("Continuous predictors: ",continuous.predictors), c(" Discrete predictor variables: ", paste(discrete.predictors,collapse=", "), "\nContinuous predictor variables: ", continuous.predictors))) summary(x$model.response,verbose=TRUE) if (!is.null(x$na.action)){ cat("\n", length(x$na.action), ifelse(length(x$na.action)==1, " observation", " observations")," deleted due to missing values.\n",sep="") } } prodlim/R/print.summary.prodlim.R0000755000175100001440000000175213035633434016573 0ustar hornikusers##' @export print.summary.prodlim <- function(x,digits=ifelse(x$percent,1,3),...){ model <- x$model cotype <- x$cotype sumtable <- x$table if (x$asMatrix==TRUE){ print(sumtable,digits=digits,quote=FALSE,...) } else{ if (model=="survival"){ if (cotype==1){ print(sumtable,digits=digits,quote=FALSE,...) } else{ print.listof(sumtable,digits=digits,quote=FALSE,...) } } else{ if (model=="competing.risks"){ for (cc in 1:length(sumtable)){ cat("\n\n----------> Cause: ",names(sumtable)[cc],"\n\n") if (cotype==1){ print(sumtable[[cc]],digits=digits,quote=FALSE,...) } else{ print.listof(sumtable[[cc]],digits=digits,quote=FALSE,...) } } } } } } prodlim/R/listNbh.R0000755000175100001440000000056713035633434013704 0ustar hornikuserslistNbh <- function(object,y,val){ stopifnot(class(object)=="neighborhood") if (missing(y)) y=object$neighbors else{ stopifnot(length(y)==object$n) y=y[object$neighbors] } if (missing(val)) val <- object$values posVal <- match(val,object$values,nomatch=FALSE) stopifnot(all(posVal!=0)) out <- split(y,rep(1:object$nu,object$size.nbh))[posVal] out } prodlim/R/lifeTab.survival.R0000755000175100001440000002015413035633434015513 0ustar hornikuserslifeTab.survival <- function(object, times, newdata, stats, intervals=FALSE, percent=TRUE, showTime=TRUE){ # {{{ get the indices IndeX <- predict(object, newdata=newdata, level.chaos=0, times=times, type="list") # }}} # {{{ times times <- IndeX$times Ntimes <- IndeX$dimensions$time pindex <- IndeX$indices$time # }}} # {{{ covariate strata Nstrata <- IndeX$dimensions$strata findex <- IndeX$indices$strata # }}} # {{{ stats if (missing(stats) || ((!missing(stats)) && is.null(stats))) stats <- list(c("n.event",0),c("n.lost",0)) else{ stats <- c(list(c("n.event",0),c("n.lost",0)),stats) } # }}} # {{{ summary at exact times if (intervals==FALSE){ if (is.null(object$clustervar)){ ## only one column for n.risk xxx <- .C("summary_prodlim",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk),nevent=as.integer(object$n.event),nlost=as.integer(object$n.lost),as.double(times),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") out <- data.frame(n.risk=xxx$pred.nrisk,n.event=xxx$pred.nevent,n.lost=xxx$pred.nlost) ## firstStrata <- object$first.strata[findex] ## sizeStrata <- object$size.strata[findex] ## indexAT <- unlist(lapply(1:Nstrata,function(s){ ## firstStrata[s] -1 + sindex(jump.times=object$time[firstStrata[s]:sizeStrata[s]],eval.times=times,strict=FALSE) ## })) ## indexJustBefore <- unlist(lapply(1:Nstrata,function(s){ ## firstStrata[s] -1 + sindex(jump.times=object$time[firstStrata[s]:sizeStrata[s]],eval.times=times,strict=TRUE) ## })) ## out <- data.frame(n.risk=c(object$n.risk[1],object$n.risk)[1+indexAT],n.event=c(0,object$n.event)[1+indexAT],n.lost=c(0,object$n.lost)[1+indexAT]) } else{ xxx <- .C("summary_prodlim",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1]),nevent=as.integer(object$n.event[,1]),nlost=as.integer(object$n.lost[,1]),as.double(times),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") out <- data.frame(n.risk=xxx$pred.nrisk,n.event=xxx$pred.nevent,n.lost=xxx$pred.nlost) for (cv in 1:length(object$clustervar)){ yyy <- .C("summary_prodlim",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1+cv]),nevent=as.integer(object$n.event[,1+cv]),nlost=as.integer(object$n.lost[,1+cv]),as.double(times),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") outCV <- data.frame(n.risk=yyy$pred.nrisk,n.event=yyy$pred.nevent,n.lost=yyy$pred.nlost) names(outCV) <- paste(object$clustervar,names(outCV)) out <- cbind(out,outCV) } } } # }}} # {{{ summary in Intervals else{ #,---- #| get no. at risk at the left limit of the interval #| and count events and censored including the left limit #| but excluding the right interval border #`---- start <- min(min(object$time),0)-.1 lower <- c(start,times[-length(times)]) upper <- times lagTimes <- c(min(min(object$time),0)-.1 , times[-length(times)]) if (is.null(object$clustervar)){ ## only one column in n.event and n.risk xxx <- .C("life_table",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk),nevent=as.integer(object$n.event),nlost=as.integer(object$n.lost),lower=as.double(lower),upper=as.double(upper),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") out <- data.frame(n.risk=xxx$pred.nrisk,n.event=xxx$pred.nevent,n.lost=xxx$pred.nlost) } else{ xxx <- .C("life_table",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1]),nevent=as.integer(object$n.event[,1]),nlost=as.integer(object$n.lost[,1]),lower=as.double(lower),upper=as.double(upper),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") out <- data.frame(n.risk=xxx$pred.nrisk,n.event=xxx$pred.nevent,n.lost=xxx$pred.nlost) for (cv in 1:length(object$clustervar)){ yyy <- .C("life_table",pred.nrisk=integer(Ntimes*Nstrata),pred.nevent=integer(Ntimes*Nstrata),pred.nlost=integer(Ntimes*Nstrata),nrisk=as.integer(object$n.risk[,1+cv]),nevent=as.integer(object$n.event[,1+cv]),nlost=as.integer(object$n.lost[,1+cv]),lower=as.double(lower),upper=as.double(upper),as.double(object$time),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(Nstrata),as.integer(Ntimes),NAOK=FALSE,PACKAGE="prodlim") outCV <- data.frame(n.risk=yyy$pred.nrisk,n.event=yyy$pred.nevent,n.lost=yyy$pred.nlost) names(outCV) <- paste(object$clustervar,names(outCV)) out <- cbind(out,outCV) } } } # }}} # {{{ percent if (!is.null(stats)){ statsList <- lapply(stats,function(x){ if (percent==TRUE && length(grep(x[1],c("n.event","n.lost","n.risk"),value=FALSE))==0){ 100*as.numeric(c(x[2],object[[x[1]]])[pindex+1]) } else{ as.numeric(c(x[2],object[[x[1]]])[pindex+1]) } }) names(statsList) <- sapply(stats,function(x)x[[1]]) add <- do.call("cbind",statsList) add <- add[,match(colnames(add),colnames(out),nomatch=FALSE)==0,drop=FALSE] if (NROW(out)==1) out <- data.frame(cbind(out,add)) else out <- cbind(out,add) } # }}} # {{{ split into list according to covariate strata if (Nstrata > 1) { split.cova <- rep(1:Nstrata,rep(Ntimes,Nstrata)) out <- split(out,split.cova) names(out) <- IndeX$names.strata out <- lapply(out,function(x){ x <- as.matrix(x) if (showTime==TRUE){ if (intervals==TRUE) x <- cbind(time0=c(0,round(times[-length(times)],2)),time1=times,x) else x <- cbind(time=times,x) rownames(x) <- 1:NROW(x) } else{ # times are rownames if (intervals==TRUE) rownames(x) <- paste("(",paste(c(0,round(times[-length(times)],2)),round(times,2),sep="-"),"]",sep="") else rownames(x) <- round(times,2) } x }) } # }}} # {{{ univariate case else{ out <- as.matrix(out) if (showTime==TRUE){ if (intervals==TRUE) out <- cbind(time0=c(0,round(times[-length(times)],2)),time1=times,out) else out <- cbind(time=times,out) rownames(out) <- 1:NROW(out) } else{ # times are rownames if (intervals==TRUE) rownames(out) <- paste("(",paste(c(0,round(times[-length(times)],2)),round(times,2),sep="-"),"]",sep="") else rownames(out) <- round(times,2) } } # }}} out } prodlim/R/print.IntIndex.R0000755000175100001440000000114213035633434015144 0ustar hornikusers#' @export print.IntIndex <- function(x,...){ mlist <- split(x$Mindex,rep(1:length(x$Mstrata),diff(c(0,x$Mstrata)))) p <- x$petoInt[1,] q <- x$petoInt[2,] pqnames <- paste("(p;q)=",paste("(",p,";",q,"]",sep="")) pqnames[p==q] <- paste("(p;q)=",paste("[",p[p==q],";",q[p==q],"]",sep="")) names(mlist) <- pqnames Mlist <- lapply(mlist,function(u){ L <- x$obsInt[1,u] R <- x$obsInt[2,u] out <- paste("(",L,";",R,"]",sep="") out[L==R] <- paste("[",L[L==R],";",R[L==R],"]",sep="") out }) print(Mlist) Ilist <- split(x$Iindex,rep(1:length(x$Istrata),diff(c(0,x$Istrata)))) } prodlim/R/markTime.R0000755000175100001440000000230113057105705014035 0ustar hornikusers#' Marking product-limit plots at the censored times. #' #' This function is invoked and controlled by \code{plot.prodlim}. #' #' This function should not be called directly. The arguments can be specified #' as \code{atRisk.arg} in the call to \code{plot.prodlim}. #' #' @param x The values of the curves at \code{times}. #' @param times The times where there curves are plotted. #' @param nlost The number of subjects lost to follow-up (censored) at #' \code{times}. #' @param pch The symbol used to mark the curves. #' @param col The color of the symbols. #' @param ... Arguments passed to \code{points}. #' @return Nil #' @author Thomas Alexander Gerds #' @seealso \code{\link{plot.prodlim}}, \code{\link{confInt}}, #' \code{\link{atRisk}} #' @keywords survival #' @export markTime <- function(x,times,nlost,pch,col,...){ mtimeList=lapply(1:length(x),function(i){ who=nlost[[i]]>0 & !is.na(nlost[[i]]) mark.x=times[who] mark.y=x[[i]][who] if (length(col) #' @seealso \code{\link{plot.prodlim}}, \code{\link{atRisk}}, #' \code{\link{markTime}} #' @keywords survival #' @export confInt <- function(x, times, newdata, type, citype, cause, col, lty, lwd, density=55, ...){ ## if (citype=="shadow" && length(times)>100 && exact==FALSE) ## times <- seq(min(times),max(times),diff(range(times)/100)) sumx <- summary(x,times=times,newdata=newdata,cause=cause,verbose=FALSE,surv=ifelse(type=="cuminc",FALSE,TRUE))$table if (x$model=="competing.risks" && x$covariate.type>1) sumx <- sumx[[1]] ## if (x$model=="survival" && x$covariate.type==1) sumx <- list(sumx) if (!is.list(sumx)) sumx <- list(sumx) nlines <- length(sumx) ci <- lapply(sumx,function(u){ uu <- data.frame(u[,c("time","lower","upper"),drop=FALSE]) uu=uu[!is.na(uu$lower),] # ----------remove confidence limits before the first event---------- est <- u[!is.na(u[,"lower"]),type] cond <- est <1 & est>0 uu=uu[((uu$upper-uu$lower)<1 | cond),] uu }) nix <- lapply(1:nlines,function(i){ if (NROW(ci[[i]])>0){ switch(citype, "bars"={ segments(x0=ci[[i]]$time, x1=ci[[i]]$time, y0=ci[[i]]$lower, y1=ci[[i]]$upper, lwd=lwd[i], col=col[i], lty=lty[i], ...) }, "shadow"={ cc <- dimColor(col[i],density=density) ## ccrgb=as.list(col2rgb(col[i],alpha=TRUE)) ## names(ccrgb) <- c("red","green","blue","alpha") ## ccrgb$alpha=density ## cc=do.call("rgb",c(ccrgb,list(max=255))) ttt <- ci[[i]]$time nt <- length(ttt) ttt <- c(ttt,ttt) uuu <- c(0,ci[[i]]$upper[-nt],ci[[i]]$upper) lll <- c(0,ci[[i]]$lower[-nt],ci[[i]]$lower) neworder <- order(ttt) uuu <- uuu[neworder] lll <- lll[neworder] ttt <- sort(ttt) polygon(x=c(ttt,rev(ttt)), y=c(lll,rev(uuu)),col=cc,border=NA) ## xx=ci[[i]]$time ## nix <- sapply(1:length(xx),function(b){ ## rect(xleft=xx[b],xright=xx[b+1],ybottom=ci[[i]]$lower[b],ytop=ci[[i]]$upper[b],col=cc,border=NA) ## }) },{ lines(x=ci[[i]]$time,ci[[i]]$lower,type="s",lwd=lwd[i],col=col[i],lty=lty[i],...) lines(x=ci[[i]]$time,ci[[i]]$upper,type="s",lwd=lwd[i],col=col[i],lty=lty[i],...) }) } }) } prodlim/R/prodlim.R0000755000175100001440000007752313057247774013771 0ustar hornikusers##' product limit method ##' ##' Nonparametric estimation in event history analysis. Featuring fast ##' algorithms and user friendly syntax adapted from the survival package. The ##' product limit algorithm is used for right censored data; the ##' self-consistency algorithm for interval censored data. ##' ##' ##' The response of \code{formula} (ie the left hand side of the `~' operator) ##' specifies the model. ##' ##' In two-state models -- the classical survival case -- the standard ##' Kaplan-Meier method is applied. For this the response can be specified as a ##' \code{\link{Surv}} or as a \code{\link{Hist}} object. The \code{\link{Hist}} ##' function allows you to change the code for censored observations, e.g. ##' \code{Hist(time,status,cens.code="4")}. ##' ##' Besides a slight gain of computing efficiency, there are some extensions ##' that are not included in the current version of the survival package: ##' ##' (0) The Kaplan-Meier estimator for the censoring times \code{reverse=TRUE} ##' is correctly estimated when there are ties between event and censoring ##' times. ##' ##' (1) A conditional version of the kernel smoothed Kaplan-Meier estimator for at most one ##' continuous predictors using nearest neighborhoods (Beran 1981, ##' Stute 1984, Akritas 1994). ##' ##' (2) For cluster-correlated data the right hand side of \code{formula} may ##' identify a \code{\link{cluster}} variable. In that case Greenwood's variance ##' formula is replaced by the formula of Ying \& Wei (1994). ##' ##' (3) Competing risk models can be specified via \code{\link{Hist}} response ##' objects in \code{formula}. ##' ##' The Aalen-Johansen estimator is applied for estimating the cumulative ##' incidence functions for all causes. The advantage over the function ##' \code{cuminc} of the cmprsk package are user-friendly model specification ##' via \code{\link{Hist}} and sophisticated print, summary, predict and plot ##' methods. ##' ##' Under construction: ##' ##' (U0) Interval censored event times specified via \code{\link{Hist}} are used ##' to find the nonparametric maximum likelihood estimate. Currently this works ##' only for two-state models and the results should match with those from the ##' package `Icens'. ##' ##' (U1) Extensions to more complex multi-states models ##' ##' (U2) The nonparametric maximum likelihood estimate for interval censored ##' observations of competing risks models. ##' ##' @param formula A formula whose left hand side is a \code{Hist} ##' object. In some special cases it can also be a \code{Surv} ##' response object, see the details section. The right hand side is ##' as usual a linear combination of covariates which may contain at ##' most one continuous factor. Whether or not a covariate is ##' recognized as continuous or discrete depends on its class and on ##' the argument \code{discrete.level}. The right hand side may also ##' be used to specify clusters, see the details section. ##' @param data A data.frame in which all the variables of ##' \code{formula} can be interpreted. ##' @param subset Passed as argument \code{subset} to function ##' \code{subset} which applied to \code{data} before the formula is ##' processed. ##' @param na.action All lines in data with any missing values in the ##' variables of formula are removed. ##' @param reverse For right censored data, if reverse=TRUE then the ##' censoring distribution is estimated. ##' @param conf.int The level (between 0 and 1) for two-sided ##' pointwise confidence intervals. Defaults to 0.95. Remark: only ##' plain Wald-type confidence limits are available. ##' @param bandwidth Smoothing parameter for nearest neighborhoods ##' based on the values of a continuous covariate. See function ##' \code{neighborhood} for details. ##' @param caseweights Weights applied to the contribution of each ##' subject to change the number of events and the number at ##' risk. This can be used for bootstrap and survey analysis. Should ##' be a vector of the same length and the same order as \code{data}. ##' @param discrete.level Numeric covariates are treated as factors ##' when their number of unique values exceeds not ##' \code{discrete.level}. Otherwise the product limit method is ##' applied, in overlapping neighborhoods according to the bandwidth. ##' @param x logical value: if \code{TRUE}, the full covariate matrix ##' with is returned in component \code{model.matrix}. The reduced ##' matrix contains unique rows of the full covariate matrix and is ##' always returned in component \code{X}. ##' @param method For interval censored data only. If equal to ##' \code{"npmle"} (the default) use the usual Turnbull algorithm, ##' else the product limit version of the self-consistent estimate. ##' @param exact If TRUE the grid of time points used for estimation ##' includes all the L and R endpoints of the observed intervals. ##' @param maxiter For interval censored data only. Maximal number of ##' iterations to obtain the nonparametric maximum likelihood ##' estimate. Defaults to 1000. ##' @param grid For interval censored data only. When method=one.step ##' grid for one-step product limit estimate. Defaults to sorted list ##' of unique left and right endpoints of the observed intervals. ##' @param tol For interval censored data only. Numeric value whose ##' negative exponential is used as convergence criterion for finding ##' the nonparametric maximum likelihood estimate. Defaults to 7 ##' meaning exp(-7). ##' @param type In two state models either \code{"surv"} for the Kaplan-Meier estimate of the survival ##' function or \code{"cuminc"} for 1-Kaplan-Meier. Default is \code{"surv"} when \code{reverse==FALSE} and \code{"cuminc"} when \code{reverse==TRUE}. ##' In competing risks models it has to be \code{"cuminc"} ##' Aalen-Johansen estimate of the cumulative incidence function. ##' @return Object of class "prodlim". See \code{\link{print.prodlim}}, \code{\link{predict.prodlim}}, predict, ##' \code{\link{summary.prodlim}}, \code{\link{plot.prodlim}}. ##' @author Thomas A. Gerds \email{tag@@biostat.ku.dk} ##' @seealso \code{\link{predictSurv}}, \code{\link{predictSurvIndividual}}, ##' \code{\link{predictCuminc}}, \code{\link{Hist}}, \code{\link{neighborhood}}, ##' \code{\link{Surv}}, \code{\link{survfit}}, \code{\link{strata}}, ##' @references Andersen, Borgan, Gill, Keiding (1993) Springer `Statistical ##' Models Based on Counting Processes' ##' ##' Akritas (1994) The Annals of Statistics 22, 1299-1327 Nearest neighbor ##' estimation of a bivariate distribution under random censoring. ##' ##' R Beran (1981) http://anson.ucdavis.edu/~beran/paper.html `Nonparametric ##' regression with randomly censored survival data' ##' ##' Stute (1984) The Annals of Statistics 12, 917--926 `Asymptotic Normality of ##' Nearest Neighbor Regression Function Estimates' ##' ##' Ying, Wei (1994) Journal of Multivariate Analysis 50, 17-29 The Kaplan-Meier ##' estimate for dependent failure time observations ##' @keywords survival nonparametric cluster ##' @examples ##' ##' ##---------------------two-state survival model------------ ##' dat <- SimSurv(30) ##' with(dat,plot(Hist(time,status))) ##' fit <- prodlim(Hist(time,status)~1,data=dat) ##' print(fit) ##' plot(fit) ##' summary(fit) ##' quantile(fit) ##' ##' ## Subset ##' fit1a <- prodlim(Hist(time,status)~1,data=dat,subset=dat$X1==1) ##' fit1b <- prodlim(Hist(time,status)~1,data=dat,subset=dat$X1==1 & dat$X2>0) ##' ##' ## --------------------clustered data--------------------- ##' library(survival) ##' cdat <- cbind(SimSurv(30),patnr=sample(1:5,size=30,replace=TRUE)) ##' fit <- prodlim(Hist(time,status)~cluster(patnr),data=cdat) ##' print(fit) ##' plot(fit) ##' summary(fit) ##' ##' ##' ##-----------compare Kaplan-Meier to survival package--------- ##' ##' dat <- SimSurv(30) ##' pfit <- prodlim(Surv(time,status)~1,data=dat) ##' pfit <- prodlim(Hist(time,status)~1,data=dat) ## same thing ##' sfit <- survfit(Surv(time,status)~1,data=dat,conf.type="plain") ##' ## same result for the survival distribution function ##' all(round(pfit$surv,12)==round(sfit$surv,12)) ##' summary(pfit,digits=3) ##' summary(sfit,times=quantile(unique(dat$time))) ##' ##' ##-----------estimating the censoring survival function---------------- ##' ##' rdat <- data.frame(time=c(1,2,3,3,3,4,5,5,6,7),status=c(1,0,0,1,0,1,0,1,1,0)) ##' rpfit <- prodlim(Hist(time,status)~1,data=rdat,reverse=TRUE) ##' rsfit <- survfit(Surv(time,1-status)~1,data=rdat,conf.type="plain") ##' ## When there are ties between times at which events are observed ##' ## times at which subjects are right censored, then the convention ##' ## is that events come first. This is not obeyed by the above call to survfit, ##' ## and hence only prodlim delivers the correct reverse Kaplan-Meier: ##' cbind("Wrong:"=rsfit$surv,"Correct:"=rpfit$surv) ##' ##' ##-------------------stratified Kaplan-Meier--------------------- ##' ##' pfit.X2 <- prodlim(Surv(time,status)~X2,data=dat) ##' summary(pfit.X2) ##' summary(pfit.X2,intervals=TRUE) ##' plot(pfit.X2) ##' ##' ##----------continuous covariate: Stone-Beran estimate------------ ##' ##' prodlim(Surv(time,status)~X1,data=dat) ##' ##' ##-------------both discrete and continuous covariates------------ ##' ##' prodlim(Surv(time,status)~X2+X1,data=dat) ##' ##' ##----------------------interval censored data---------------------- ##' ##' dat <- data.frame(L=1:10,R=c(2,3,12,8,9,10,7,12,12,12),status=c(1,1,0,1,1,1,1,0,0,0)) ##' with(dat,Hist(time=list(L,R),event=status)) ##' ##' dat$event=1 ##' npmle.fitml <- prodlim(Hist(time=list(L,R),event)~1,data=dat) ##' ##' ##-------------competing risks------------------- ##' ##' CompRiskFrame <- data.frame(time=1:100,event=rbinom(100,2,.5),X=rbinom(100,1,.5)) ##' crFit <- prodlim(Hist(time,event)~X,data=CompRiskFrame) ##' summary(crFit) ##' plot(crFit) ##' summary(crFit,cause=2) ##' plot(crFit,cause=2) ##' ##' ##' # Changing the cens.code: ##' dat <- data.frame(time=1:10,status=c(1,2,1,2,5,5,1,1,2,2)) ##' fit <- prodlim(Hist(time,status)~1,data=dat) ##' print(fit$model.response) ##' fit <- prodlim(Hist(time,status,cens.code="2")~1,data=dat) ##' print(fit$model.response) ##' plot(fit) ##' plot(fit,cause="5") ##' ##' ##' ##------------delayed entry---------------------- ##' ##' ## left-truncated event times with competing risk endpoint ##' ##' dat <- data.frame(entry=c(7,3,11,12,11,2,1,7,15,17,3),time=10:20,status=c(1,0,2,2,0,0,1,2,0,2,0)) ##' fitd <- prodlim(Hist(time=time,event=status,entry=entry)~1,data=dat) ##' summary(fitd) ##' plot(fitd) ##' #' @export ##' @author Thomas A. Gerds "prodlim" <- function(formula, data = parent.frame(), subset, na.action=NULL, reverse=FALSE, conf.int=.95, bandwidth=NULL, caseweights, discrete.level=3, x=TRUE, # force.multistate=FALSE, maxiter=1000, grid, tol=7, method=c("npmle","one.step","impute.midpoint","impute.right"), exact=TRUE, type){ # {{{ find the data call <- match.call() if (!missing(subset)){ data <- subset(data,subset=subset) if (!missing(caseweights)){ caseweights <- subset(caseweights,subset=subset) } } EHF <- EventHistory.frame(formula=formula, data=data, unspecialsDesign=FALSE, specials=c("Strata","strata","factor", "NN","cluster"), stripSpecials=c("strata","cluster","NN"), stripAlias=list("strata"=c("Strata","factor")), stripArguments=list("strata"=NULL,"NN"=NULL,"cluster"=NULL), specialsDesign=FALSE, check.formula=TRUE) event.history <- EHF$event.history response <- EHF$event.history if (reverse==TRUE){ ## estimation of censoring distribution model.type <- 1 }else{ model.type <- match(attr(event.history,"model"),c("survival","competing.risks","multi.states")) } if (missing(type)) type <- switch(model.type,"survival"=ifelse(reverse,"cuminc","surv"),"cuminc") else { type <- tolower(type) stopifnot(match(type,c("surv","cuminc"),nomatch=0)!=0) } cens.type <- attr(response,"cens.type") # if (force.multistate==TRUE) model.type <- 3 # {{{ order according to event times if (cens.type!="intervalCensored"){ event.time.order <- order(event.history[,"time"],-event.history[,"status"]) } else{ event.time.order <- order(event.history[,"L"],-event.history[,"status"]) } # }}} # {{{ covariates covariates <- EHF[-1] ## `factor' and 'Strata' are aliases for `strata' strata.pos <- match(c("strata","factor","Strata"),names(covariates),nomatch=0) if (sum(strata.pos)>0) strata <- do.call("cbind",covariates[strata.pos]) else strata <- NULL ## 'NN' NN <- covariates[["NN"]] xlevels <- attr(strata,"levels") ## unspecified rest <- covariates$design xlevels <- c(attr(strata,"levels"),attr(rest,"levels")) if ((is.null(NN)+is.null(strata)+is.null(rest))==3){ cotype <- 1 } else{ unspecified <- NULL if (!is.null(rest)){ discrete.p <- sapply(colnames(rest),function(u){ x <- rest[,u,drop=TRUE] !is.numeric(x) || !length(unique(x))>discrete.level }) if (any(!discrete.p)){ ## continuous covariates NN <- if (is.null(NN)) rest[,!discrete.p,drop=FALSE] else cbind(NN,rest[,!discrete.p,drop=FALSE]) } if (any(discrete.p)){ ## discrete covariates strata <- if (is.null(strata)){ rest[,discrete.p,drop=FALSE] } else{ cbind(strata,rest[,discrete.p,drop=FALSE]) } } } if (NCOL(NN)>1) stop(paste("Currently we can not compute neighborhoods in",length(colnames(NN)),"continuous dimensions.")) cotype <- 1 + (!is.null(strata))*1+(!is.null(NN))*2 } ## use unique values as levels ## for variables that are not factors ## but treated as such if (any(found <- (match(colnames(strata),names(xlevels),nomatch=0)==0))){ uniquelevels <- lapply(colnames(strata)[found],function(x){ unique(strata[,x]) }) names(uniquelevels) <- colnames(strata)[found] xlevels <- c(xlevels,uniquelevels) } ## cotype # 1 : no covariates # 2 : only strata # 3 : only continuous # 4 : strata AND continuous # }}} # {{{ disjunct strata (discrete covariates) if (cotype %in% c(2,4)){ ## changed 09 Dec 2014 (16:57)--> ## S <- do.call("paste", c(data.frame(strata), sep = "\r")) S <- interaction(data.frame(strata), sep = ":",drop=TRUE) ## <-- changed 09 Dec 2014 (16:57) NS <- length(unique(S)) ## changed 09 Dec 2014 (16:57) --> Sfactor <- factor(S,levels=levels(S),labels=1:NS) ## <-- changed 09 Dec 2014 (16:57) if (cens.type!="intervalCensored"){ sorted <- order(Sfactor, response[,"time"],-response[,"status"]) } else{ sorted <- order(Sfactor, response[,"L"],-response[,"status"]) } Sfactor <- Sfactor[sorted] } else{ sorted <- event.time.order } response <- response[sorted,] # sort each stratum # }}} # {{{ caseweights if (missing(caseweights)) { weighted <- 0 caseweights <- NULL } else { weighted <- 1 if(length(caseweights)!=NROW(response)) stop(paste("The length of caseweights is: ", length(caseweights), "\nthis is not the same as the number of subjects\nwith no missing values, which is ", NROW(response), sep="")) ## wrong to order by event.time.order when there are covariates ## caseweights <- caseweights[event.time.order] ## this fixes bug in versions < 1.5.7 caseweights <- caseweights[sorted] } # }}} # {{{ overlapping neighborhoods (continuous covariates) if (cotype %in% c(3,4)){ Z <- NN[sorted,,drop=TRUE] if (cotype==3){ nbh <- neighborhood(Z,bandwidth=bandwidth) nbh.list <- list(nbh) bandwidth <- nbh$bandwidth neighbors <- nbh$neighbors } else{ # nearest neighbors within each stratum nbh.list <- lapply(split(Z,Sfactor),neighborhood,bandwidth=bandwidth) bandwidth <- sapply(nbh.list,function(nbh)nbh$bandwidth) tabS <- c(0,cumsum(tabulate(Sfactor))[-NS]) neighbors <- unlist(lapply(1:NS,function(l){ ## incrementing the neighbors by nbh.list[[l]]$neighbors+tabS[l]}),use.names=FALSE) ## the size of the previous strata } response <- response[neighbors,,drop=FALSE] if (weighted==TRUE) caseweights <- caseweights[neighbors] } # }}} # {{{ delay (left truncation) delayed <- attr(event.history,"entry.type")=="leftTruncated" ## && !(attr(event.history,"entry.type")=="") if (!delayed) { ## either NULL or "" entrytime <- NULL } else { entrytime <- response[,"entry"] if(!(all(entrytime>=0))) stop(paste("Not all entry times in dataset are greater or equal to zero.")) } # }}} # {{{ bound on the number of unique time points over all strata switch(cotype, { # type=1 size.strata <- NROW(response) NU <- 1 if (cens.type!="intervalCensored") N <- length(unique(response[,"time"])) else N <- length(unique(response[,"L"])) ## if (delayed) N <- N + length(entrytime) if (delayed) N <- length(unique(c(entrytime,response[,"time"]))) }, { # type=2 size.strata <- tabulate(Sfactor) N <- NROW(response) NU <- length(size.strata) if (delayed) N <- 2*N }, { # type=3 size.strata <- nbh$size.nbh N <- sum(size.strata) NU <- nbh$nu if (delayed) N <- 2*N }, { # type=4 size.strata <- unlist(lapply(nbh.list,function(nbh)nbh$size.nbh),use.names=FALSE) N <- sum(size.strata) if (delayed) N <- 2*N n.unique.strata <- unlist(lapply(nbh.list,function(nbh)nbh$nu),use.names=FALSE) NU <- sum(n.unique.strata) }) # }}} # {{{ characterizing the covariate space continuous.predictors <- colnames(NN) discrete.predictors <- colnames(strata) X <- switch(cotype, {#type=1 NULL}, { #type=2 X <- data.frame(unique(strata[sorted,,drop=FALSE])) ## colnames(X) <- paste("strata",names(strata),sep=".") # colnames(X) <- names(strata) rownames(X) <- 1:NROW(X) X }, { #type=3 X <- unlist(lapply(nbh.list,function(x)x$values),use.names=FALSE) X <- data.frame(X) ## colnames(X) <- paste("NN",names(NN),sep=".") colnames(X) <- colnames(NN) rownames(X) <- 1:NROW(X) X }, { #type=4 D <- data.frame(unique(strata[sorted,,drop=FALSE])) ## colnames(D) <- paste("strata",names(strata),sep=".") D <- data.frame(D[rep(1:NS,n.unique.strata),,drop=FALSE]) C <- data.frame(unlist(lapply(nbh.list,function(x)x$values),use.names=FALSE)) X <- cbind(D,C) ## colnames(X) <- c(paste("strata",names(strata),sep="."),paste("NN",names(NN),sep=".")) colnames(X) <- c(colnames(strata),colnames(NN)) rownames(X) <- 1:NROW(X) X }, { #type=5 X=data.frame(pseudo="pseudo") rownames(X) <- 1:NROW(X) X }) if (x==TRUE) model.matrix <- switch(cotype,{NULL},strata,NN,cbind(strata,NN))[event.time.order,,drop=FALSE] else model.matrix <- NULL event.history <- event.history[event.time.order,,drop=FALSE] # }}} # {{{ cluster correlated data need an adjusted variance formula clustered <- (length(covariates$cluster)>0) if (clustered) clustervar <- colnames(covariates$cluster) else clustervar <- NULL if (clustered){ cluster <- covariates$cluster[sorted,,drop=TRUE] if (cotype==1){ NC <- length(unique(cluster)) cluster <- factor(cluster,labels=1:NC) } else{ if (cotype==2){ NC <- unlist(tapply(cluster,Sfactor,function(x){length(unique(x))})) cluster <- as.numeric(unlist(tapply(cluster,Sfactor,function(x){ factor(x,labels=1:length(unique(x)))}))) } } } # }}} # {{{ find the appropriate C routine # with respect to model.type, cens.type, cotype and clustered # the following cases are not yet available ## if (length(attr(event.history,"entry.type"))>1) stop("Prodlim: Estimation for left-truncated data not yet implemented.") if (delayed & weighted>0) stop("Prodlim: Estimation for left-truncated data with caseweights not implemented.") if (reverse && cens.type!="rightCensored") stop("Prodlim: Estimation of the censoring distribution works only for right censored data.") if (delayed && clustered) stop("Prodlim: Estimation with delayed entry and cluster-correlated observations not yet implemented.") if (reverse && clustered) stop("Prodlim: Estimation of censoring distribution with cluster-correlated observations not yet handled.") if (cens.type=="intervalCensored" && model.type>=2) stop("Prodlim: Interval censored observations only handled for two-state models") ## if (cens.type=="intervalCensored" && model.type>2) stop("Interval censored observations only handled for two-state and competing risks models") if (clustered && model.type>1) stop("Prodlim: Cluster-correlated observations only handled for two-state models") if (clustered && cotype %in% c(3,4)) stop("Prodlim: Cluster-correlated observations not yet handled in presence of continuous covariates") #cluster <- cluster[neighbors] if (cotype>1 && cens.type=="intervalCensored") stop("Prodlim: Interval censored data and covariate strata not yet handled.") if (model.type==1){ # }}} # {{{ two state model if (clustered){ ## right censored clustered fit <- .C("prodlimSRC",as.double(response[,"time"]),as.double(response[,"status"]),integer(0),as.double(entrytime),as.double(caseweights),as.integer(cluster),as.integer(N),integer(0),as.integer(NC),as.integer(NU),as.integer(size.strata),time=double(N),nrisk=double(2*N),nevent=double(2*N),ncens=double(2*N),surv=double(N),cuminc=double(0),hazard=double(N),var.hazard=double(N+N),extra.double=double(4 * max(NC)),max.nc=as.integer(max(NC)),ntimes=integer(1),ntimes.strata=integer(NU),first.strata=integer(NU),reverse=integer(0),model=as.integer(0),independent=as.integer(0),delayed=as.integer(delayed),weighted=as.integer(weighted),PACKAGE="prodlim") NT <- fit$ntimes Cout <- list("time"=fit$time[1:NT],"n.risk"=matrix(fit$nrisk,ncol=2,byrow=FALSE,dimnames=list(NULL,c("n.risk","cluster.n.risk")))[1:NT,],"n.event"=matrix(fit$nevent,ncol=2,byrow=FALSE,dimnames=list(NULL,c("n.event","cluster.n.event")))[1:NT,],"n.lost"=matrix(fit$ncens,ncol=2,byrow=FALSE,dimnames=list(NULL,c("n.lost","cluster.n.lost")))[1:NT,],"surv"=fit$surv[1:NT],"se.surv"=fit$surv[1:NT]*sqrt(pmax(0,fit$var.hazard[N+(1:NT)])),"naive.se.surv"=fit$surv[1:NT]*sqrt(pmax(0,fit$var.hazard[1:NT])),"hazard"=fit$hazard[1:NT],"first.strata"=fit$first.strata,"size.strata"=fit$ntimes.strata,"model"="survival") Cout$maxtime <- max(Cout$time) } else{ if (cens.type=="intervalCensored"){ if (length(method)>1) method <- method[1] if (length(grep("impute",method))>0){ naiiveMethod <- strsplit(method,"impute.")[[1]][[2]] if (naiiveMethod=="midpoint"){ naiveResponse <- data.frame(unclass(response)) naiveResponse$imputedTime <- (naiveResponse$L+naiveResponse$R)/2 naiveResponse[naiveResponse[,"status"]==0,"imputedTime"] <- naiveResponse[naiveResponse[,"status"]==0,"L"] Cout <- prodlim(Hist(imputedTime,status!=0)~1,data=naiveResponse) return(Cout) } } else{ Cout <- prodlimIcensSurv(response, grid, tol=tol, maxiter=maxiter, ml=ifelse(method=="one.step",FALSE,TRUE), exact=exact) } } else{ ## right censored not clustered fit <- .C("prodlimSRC",as.double(response[,"time"]),as.double(response[,"status"]),integer(0),as.double(entrytime),as.double(caseweights),integer(0),as.integer(N),integer(0),integer(0),as.integer(NU),as.integer(size.strata),time=double(N),nrisk=double(N),nevent=double(N),ncens=double(N),surv=double(N),double(0),hazard = double(N),var.hazard=double(N),extra.double=double(0),max.nc=integer(0),ntimes=integer(1),ntimes.strata=integer(NU),first.strata=integer(NU),as.integer(reverse),model=as.integer(0),independent=as.integer(1),delayed=as.integer(delayed),weighted=as.integer(weighted),PACKAGE="prodlim") NT <- fit$ntimes Cout <- list("time"=fit$time[1:NT], "n.risk"=fit$nrisk[1:NT], "n.event"=fit$nevent[1:NT], "n.lost"=fit$ncens[1:NT], "surv"=fit$surv[1:NT], "se.surv"=fit$surv[1:NT]*sqrt(pmax(0,fit$var.hazard[1:NT])), "hazard"=fit$hazard[1:NT], "first.strata"=fit$first.strata, "size.strata"=fit$ntimes.strata, "model"="survival") Cout$maxtime <- max(Cout$time) } } } else{ # }}} # {{{ competing.risks model if (model.type==2){ states <- attr(response,"states") E <- response[,"event"]-1 # for the c routine D <- response[,"status"] NS <- length(unique(E[D!=0])) # number of different causes fit <- .C("prodlimSRC", as.double(response[,"time"]), as.double(D), as.integer(E), as.double(entrytime), as.double(caseweights), integer(0), as.integer(N), as.integer(NS), integer(0), as.integer(NU), as.integer(size.strata), time=double(N), nrisk=double(N), nevent=double(N * NS), ncens=double(N), surv=double(N), cuminc=double(N * NS), cause.hazard = double(N * NS), var.hazard=double(N * NS), extra.double=double(4 * NS), max.nc=integer(0), ntimes=integer(1), ntimes.strata=integer(NU), first.strata=integer(NU), reverse=integer(0), model=as.integer(1), independent=as.integer(1), delayed=as.integer(delayed), weighted=as.integer(weighted), PACKAGE="prodlim") NT <- fit$ntimes # changed Tue Sep 30 12:51:58 CEST 2008 # its easier to work with a list than with a matrix # gatherC <- function(x,dimR=fit$ntimes,dimC=NS,names=states){ # matrix(x[1:(dimR*dimC)],ncol=dimC,byrow=TRUE,dimnames=list(rep("",dimR),names)) # } gatherC <- function(x,dimR=fit$ntimes,dimC=NS,names=states){ out <- split(x[1:(dimR*dimC)],rep(1:NS,dimR)) names(out) <- names out } Cout <- list("time"=fit$time[1:NT], "n.risk"=fit$nrisk[1:NT], "n.event"=gatherC(fit$nevent), "n.lost"=fit$ncens[1:NT], "cuminc"=gatherC(fit$cuminc), "var.cuminc"=gatherC(fit$var.hazard), "se.cuminc"=gatherC(sqrt(pmax(0,fit$var.hazard))), "surv"=fit$surv[1:NT], "cause.hazard"=gatherC(fit$cause.hazard), "first.strata"=fit$first.strata, "size.strata"=fit$ntimes.strata, "model"="competing.risks") Cout$maxtime <- max(Cout$time) } else { # multi.state model # -------------------------------------------------------------------- Cout <- prodlimMulti(response,size.strata,N,NU) Cout$maxtime <- max(Cout$time) } } if (conf.int==TRUE) conf.int <- 0.95 # }}} # {{{ confidence intervals if (is.numeric(conf.int) && cens.type!="intervalCensored"){ if (model.type==1){ if (!(is.null(Cout$se.surv))){ ## pointwise confidence intervals for survival probability zval <- qnorm(1- (1-conf.int)/2, 0,1) lower <- pmax(Cout$surv - zval * Cout$se.surv,0) lower[Cout$se.surv==0] <- 0 upper <- pmin(Cout$surv + zval * Cout$se.surv,1) upper[Cout$se.surv==0] <- 1 Cout <- c(Cout,list(lower=lower,upper=upper)) } } else{ if (is.numeric(conf.int)){ if (!(00) && all(is.numeric(coef))) if (NP != length(coef)){ if (length(coef)==1){ if (verbose) warning("The regression coefficient ",coef," is used for all covariates.") coef <- rep(coef,NP) } else{ stop(paste("Number of covariates ",NP," and number of regression coefficients ",length(coef)," differ.",sep="")) } } LP <- colSums(coef * t(X)) } LP } prodlim/R/backGround.R0000755000175100001440000000631313035633434014353 0ustar hornikusers#' Background and grid color control. #' #' Some users like background colors, and it may be helpful to have grid lines #' to read off e.g. probabilities from a Kaplan-Meier graph. Both things can be #' controlled with this function. However, it mainly serves #' \code{\link{plot.prodlim}}. #' #' #' @param xlim Limits for the xaxis, defaults to par("usr")[1:2]. #' @param ylim Limits for the yaxis, defaults to par("usr")[3:4]. #' @param bg Background color. Can be multiple colors which are then switched #' at each horizontal line. #' @param fg Grid line color. #' @param horizontal Numerical values at which horizontal grid lines are #' plotted. #' @param vertical Numerical values at which vertical grid lines are plotted. #' @param border The color of the border around the background. #' @author Thomas Alexander Gerds #' @keywords survival #' @examples #' #' #' plot(0,0) #' backGround(bg="beige",fg="red",vertical=0,horizontal=0) #' #' plot(0,0) #' backGround(bg=c("yellow","green"),fg="red",xlim=c(-1,1),ylim=c(-1,1),horizontal=seq(0,1,.1)) #' backGround(bg=c("yellow","green"),fg="red",horizontal=seq(0,1,.1)) #' #' @export backGround <- function(xlim, ylim, bg="white", fg="gray77", horizontal=NULL, vertical=NULL, border="black"){ U <- par("usr") if (missing(xlim)) xlim <- c(U[1],U[2]) if (missing(ylim)) ylim <- c(U[3],U[4]) # background if (!is.null(bg)){ if (length(bg)==1){ rect(U[1],U[3],U[2],U[4],col=bg[1], border=border) }else{ if (length(bg)>1){ if (is.null(horizontal)){ xleft <- sort(unique(c(xlim[1],vertical,xlim[2]))) NR <- length(xleft) bcol <- rep(bg,length.out=NR) nix <- sapply(1:(NR-1),function(r){ polygon(y=c(U[3],U[3],U[4],U[4],U[3]), x=c(xleft[r],xleft[r+1],xleft[r+1],xleft[r],xleft[r]), col=bcol[r], border=FALSE)}) } else{ ybot <- sort(unique(c(ylim[1],horizontal,ylim[2]))) NR <- length(ybot) bcol <- rep(bg,length.out=NR) nix <- sapply(1:(NR-1),function(r){ ## for (r in 1:(NR-1)){ ## rect(xleft=xlim[1],xright=xlim[2],ybottom=ybot[r],ytop=ybot[r+1],col=bcol[r],border=FALSE) ## polygon(x=c(xlim[1],xlim[1],xlim[2],xlim[2],xlim[1]), polygon(x=c(U[1],U[1],U[2],U[2],U[1]), y=c(ybot[r],ybot[r+1],ybot[r+1],ybot[r],ybot[r]), col=bcol[r], border=FALSE) ## do NOT specify: density=100 as this slows this down! }) } } } } # grid if (length(fg)>0){ if (length(vertical)>0) abline(v=vertical,col=fg) if (length(horizontal)>0) abline(h=horizontal,col=fg) } } prodlim/R/dimColor.R0000755000175100001440000000146613035633434014050 0ustar hornikusers##' This function calls first \code{\link{col2rgb}} on a color name and then ##' uses \code{\link{rgb}} to adjust the intensity of the result. ##' ##' @title Dim a given color to a specified density ##' @param col Color name or number passed to \code{\link{col2rgb}}. ##' @param density Integer value passed as alpha coefficient to ##' \code{\link{rgb}} between 0 and 255 ##' @return A character vector with the color code. See \code{rgb} for details. ##' @seealso rgb col2rgb ##' @examples ##' dimColor(2,33) ##' dimColor("green",133) ##' @export ##' @author Thomas A. Gerds dimColor <- function(col,density=55){ ccrgb=as.list(grDevices::col2rgb(col,alpha=TRUE)) names(ccrgb) <- c("red","green","blue","alpha") ccrgb$alpha=density do.call(grDevices::rgb,c(ccrgb,list(max=255))) } prodlim/R/print.Hist.R0000755000175100001440000000007313035633434014333 0ustar hornikusers##' @export print.Hist <- function(x,...){ summary(x) } prodlim/R/EventHistory.frame.R0000644000175100001440000002462213035633434016030 0ustar hornikusers##' Extract event history data and design matrix including specials from call ##' ##' Obtain a list with the data used for event history regression analysis. This ##' function cannot be used directly on the user level but inside a function ##' to prepare data for survival analysis. ##' @title Event history frame ##' @param formula Formula whose left hand side specifies the event ##' history, i.e., either via Surv() or Hist(). ##' @param data Data frame in which the formula is interpreted ##' @param unspecialsDesign Passed as is to ##' \code{\link{model.design}}. ##' @param specials Character vector of special function names. ##' Usually the body of the special functions is function(x)x but ##' e.g., \code{\link{strata}} from the survival package does treat ##' the values ##' @param specialsFactor Passed as is to \code{\link{model.design}}. ##' @param specialsDesign Passed as is to \code{\link{model.design}} ##' @param stripSpecials Passed as \code{specials} to ##' \code{\link{strip.terms}} ##' @param stripArguments Passed as \code{arguments} to ##' \code{\link{strip.terms}} ##' @param stripAlias Passed as \code{alias.names} to ##' \code{\link{strip.terms}} ##' @param stripUnspecials Passed as \code{unspecials} to ##' \code{\link{strip.terms}} ##' @param dropIntercept Passed as is to \code{\link{model.design}} ##' @param check.formula If TRUE check if formula is a Surv or Hist ##' thing. ##' @param response If FALSE do not get response data (event.history). ##' @return A list which contains ##' - the event.history (see \code{\link{Hist}}) ##' - the design matrix (see \code{\link{model.design}}) ##' - one entry for each special (see \code{\link{model.design}}) ##' @seealso model.frame model.design Hist ##' @examples ##' ##' ## Here are some data with an event time and no competing risks ##' ## and two covariates X1 and X2. ##' ## Suppose we want to declare that variable X1 is treated differently ##' ## than variable X2. For example, X1 could be a cluster variable, or ##' ## X1 should have a proportional effect on the outcome. ##' dsurv <- data.frame(time=1:7, ##' status=c(0,1,1,0,0,0,1), ##' X2=c(2.24,3.22,9.59,4.4,3.54,6.81,5.05), ##' X3=c(1,1,1,1,0,0,1), ##' X4=c(44.69,37.41,68.54,38.85,35.9,27.02,41.84), ##' X1=factor(c("a","b","a","c","c","a","b"), ##' levels=c("c","a","b"))) ##' ## We pass a formula and the data ##' e <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4, ##' data=dsurv, ##' specials=c("prop","cluster"), ##' stripSpecials=c("prop","cluster")) ##' names(e) ##' ## The first element is the event.history which is result of the left hand ##' ## side of the formula: ##' e$event.history ##' ## same as ##' with(dsurv,Hist(time,status)) ##' ## to see the structure do ##' colnames(e$event.history) ##' unclass(e$event.history) ##' ## in case of competing risks there will be an additional column called event, ##' ## see help(Hist) for more details ##' ##' ## The other elements are the design, i.e., model.matrix for the non-special covariates ##' e$design ##' ## and a data.frame for the special covariates ##' e$prop ##' ## The special covariates can be returned as a model.matrix ##' e2 <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4, ##' data=dsurv, ##' specials=c("prop","cluster"), ##' stripSpecials=c("prop","cluster"), ##' specialsDesign=TRUE) ##' e2$prop ##' ## and the non-special covariates can be returned as a data.frame ##' e3 <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4, ##' data=dsurv, ##' specials=c("prop","cluster"), ##' stripSpecials=c("prop","cluster"), ##' specialsDesign=TRUE, ##' unspecialsDesign=FALSE) ##' e3$design ##' ##' ## the general idea is that the function is used to parse the combination of ##' ## formula and data inside another function. Here is an example with ##' ## competing risks ##' SampleRegression <- function(formula,data=parent.frame()){ ##' thecall <- match.call() ##' ehf <- EventHistory.frame(formula=formula, ##' data=data, ##' stripSpecials=c("prop","cluster","timevar"), ##' specials=c("prop","timevar","cluster")) ##' time <- ehf$event.history[,"time"] ##' status <- ehf$event.history[,"status"] ##' ## event as a factor ##' if (attr(ehf$event.history,"model")=="competing.risks"){ ##' event <- ehf$event.history[,"event"] ##' Event <- getEvent(ehf$event.history) ##' list(response=data.frame(time,status,event,Event),X=ehf[-1]) ##' } ##' else{ # no competing risks ##' list(response=data.frame(time,status),X=ehf[-1]) ##' } ##' } ##' dsurv$outcome <- c("cause1","0","cause2","cause1","cause2","cause2","0") ##' SampleRegression(Hist(time,outcome)~prop(X1)+X2+cluster(X3)+X4,dsurv) ##' ##' ## let's test if the parsing works ##' form1 <- Hist(time,outcome!="0")~prop(X1)+X2+cluster(X3)+X4 ##' form2 <- Hist(time,outcome)~prop(X1)+cluster(X3)+X4 ##' ff <- list(form1,form2) ##' lapply(ff,function(f){SampleRegression(f,dsurv)}) ##' ##' ##' ## here is what the riskRegression package uses to ##' ## distinguish between covariates with ##' ## time-proportional effects and covariates with ##' ## time-varying effects: ##' \dontrun{ ##' library(riskRegression) ##' data(Melanoma) ##' f <- Hist(time,status)~prop(thick)+strata(sex)+age+prop(ulcer,power=1)+timevar(invasion,test=1) ##' ## here the unspecial terms, i.e., the term age is treated as prop ##' ## also, strata is an alias for timvar ##' ##' EHF <- prodlim::EventHistory.frame(formula, ##' Melanoma[1:10], ##' specials=c("timevar","strata","prop","const","tp"), ##' stripSpecials=c("timevar","prop"), ##' stripArguments=list("prop"=list("power"=0), ##' "timevar"=list("test"=0)), ##' stripAlias=list("timevar"=c("strata"), ##' "prop"=c("tp","const")), ##' stripUnspecials="prop", ##' specialsDesign=TRUE, ##' dropIntercept=TRUE) ##' EHF$prop ##' EHF$timevar ##' } ##' @export ##' @author Thomas A. Gerds EventHistory.frame <- function(formula, data, unspecialsDesign=TRUE, specials, specialsFactor=TRUE, specialsDesign=FALSE, stripSpecials=NULL, stripArguments=NULL, stripAlias=NULL, stripUnspecials=NULL, dropIntercept=TRUE, check.formula=TRUE, response=TRUE){ # {{{ check if formula is a proper formula if (response && check.formula){ formula.names <- try(all.names(formula),silent=TRUE) if (!(formula.names[1]=="~") || (match("$",formula.names,nomatch=0)+match("[",formula.names,nomatch=0)>0)){ stop("Invalid specification of formula. Perhaps forgotten right hand side?\nNote that any subsetting, ie data$var or data[,\"var\"], is invalid for this function.")} else{ if (!(any(match(c("survival::Surv","Surv","prodlim::Hist","Hist"), formula.names,nomatch=0)))) stop("formula is NOT a proper survival formula,\nwhich must have a `Surv' or `Hist' object as response.") } } # }}} # {{{call model.frame ## data argument is used to resolve '.' see help(terms.formula) Terms <- terms(x=formula,specials=specials,data=data) if (!is.null(stripSpecials)){ ## Terms <- terms(x=formula, specials=specials) if (length(attr(Terms,"term.labels"))>0) Terms <- strip.terms(Terms, specials=stripSpecials, arguments=stripArguments, alias.names=stripAlias, unspecials=stripUnspecials) } # }}} # {{{ get all variables and remove missing values ## use the stripped formula because, otherwise ## it may be hard to know what variables are, e.g., ## FGR uses cov2(var,tf=qfun) where qfun is a function mm <- na.omit(get_all_vars(formula(Terms),data)) if (NROW(mm) == 0) stop("No (non-missing) observations") # }}} # {{{ extract response if (response==TRUE && attr(Terms,"response")!=0){ event.history <- model.response(model.frame(update(formula,".~1"), data=mm)) # }}} # {{{ Fix for those who use `Surv' instead of `Hist' if (match("Surv",class(event.history),nomatch=0)!=0){ attr(event.history,"model") <- "survival" attr(event.history,"cens.type") <- "rightCensored" attr(event.history,"entry.type") <- ifelse(ncol(event.history)==2,"","leftTruncated") if (attr(event.history,"entry.type")=="leftTruncated") colnames(event.history) <- c("entry","time","status") } # }}} }else event.history <- NULL # {{{ design design <- model.design(Terms, data=mm, maxOrder=1, dropIntercept=dropIntercept, unspecialsDesign=unspecialsDesign, specialsFactor=specialsFactor, specialsDesign=specialsDesign) # }}} out <- c(list(event.history=event.history), design[sapply(design,length)>0]) attr(out,"Terms") <- Terms attr(out,"na.action") <- attr(mm,"na.action") class(out) <- "EventHistory.frame" out } ##' @export as.data.frame.EventHistory.frame <- function(x,...){ Y <- data.frame(unclass(x$event.history)) X <- do.call("cbind",x[-1]) cbind(Y,X) } prodlim/R/survModel.R0000755000175100001440000000135213035633434014252 0ustar hornikusers#' Survival model for simulation #' #' Create a survival model to simulate a right censored event time data without #' covariates #' #' This function requires the \code{lava} package. #' #' @return A structural equation model initialized with three variables: the #' latent event time, the latent right censored time, and the observed #' right censored event time. #' @author Thomas A. Gerds #' @export survModel <- function(){ ## require(lava) sm <- lava::lvm(~eventtime+censtime) lava::distribution(sm,"eventtime") <- lava::coxWeibull.lvm(scale=1/100) lava::distribution(sm,"censtime") <- lava::coxWeibull.lvm(scale=1/100) sm <- lava::eventTime(sm,time~min(eventtime=1,censtime=0),"event") sm } prodlim/R/atRisk.R0000755000175100001440000001325113057105704013526 0ustar hornikusers#' Drawing numbers of subjects at-risk of experiencing an event below #' Kaplan-Meier and Aalen-Johansen plots. #' #' This function is invoked and controlled by \code{plot.prodlim}. #' #' This function should not be called directly. The arguments can be specified #' as \code{atRisk.arg} in the call to \code{plot.prodlim}. #' #' @param x an object of class `prodlim' as returned by the #' \code{prodlim} function. #' @param newdata see \code{plot.prodlim} #' @param times Where to compute the atrisk numbers. #' @param line Distance of the atrisk numbers from the inner plot. #' @param col The color of the text. #' @param labelcol The color for the labels. Defaults to col. #' @param interspace Distance between rows of atrisk numbers. #' @param cex Passed on to \code{mtext} for both atrisk numbers and #' labels. #' @param labels Labels for the at-risk rows. #' @param title Title for the at-risk labels #' @param titlecol The color for the title. Defaults to 1 (black). #' @param pos The value is passed on to the \code{mtext} argument #' \code{at} for the labels (not the atriks numbers). #' @param adj Passed on to \code{mtext} for the labels (not the atriks #' numbers). #' @param dist If \code{line} is missing, the distance of the upper #' most atrisk row from the inner plotting region: par()$mgp[2]. #' @param adjust.labels If \code{TRUE} the labels are left adjusted. #' @param ... Further arguments that are passed to the function #' \code{mtext}. #' @return Nil #' @author Thomas Alexander Gerds #' @seealso \code{\link{plot.prodlim}}, \code{\link{confInt}}, #' \code{\link{markTime}} #' @keywords survival #' @export atRisk <- function(x, newdata, times, line, col, labelcol=NULL, interspace, cex, labels, title="", titlecol=NULL, pos, adj, dist, adjust.labels=TRUE, ...){ if (missing(times)) times <- seq(0,x$maxtime,x$maxtime/10) if (x$model=="competing.risks"){ px <- lifeTab(object=x,times=times,cause=1,newdata=newdata,stats=NULL)[[1]] } else if (x$model=="survival"){ px <- lifeTab(object=x,times=times,newdata=newdata,stats=NULL) } if (is.matrix(px) || is.data.frame(px)) sumx <- lapply(data.frame(px)[,grep("n.risk",colnames(px)),drop=FALSE],function(x)x) else sumx <- lapply(px,function(v){ u <- v[,grep("n.risk",colnames(v)),drop=FALSE] if (NCOL(u)>1){ ulist <- lapply(1:NCOL(u),function(i)u[,i]) names(ulist) <- colnames(u) ulist } else u }) if (is.list(sumx[[1]])) sumx <- unlist(sumx,recursive=FALSE) if (all(sapply(sumx,NCOL))==1) nlines <- length(sumx) if (missing(line)){ line <- par()$mgp[2] + dist + (0:(2*nlines-1)) *interspace -(nlines-1) } if (missing(cex)) cex <- 1 ## if (missing(pos)) pos <- min(times) if (missing(pos)) pos <- par()$usr[1] if (missing(adj)) adj <- 1 if (missing(labels)) if (length(names(sumx)==nlines)) labels <- paste("",names(sumx),"",sep="") else labels <- rep("",nlines) ## c("No. \nsubjects",rep("",nlines-1)) # title for no. at-risk below plot # -------------------------------------------------------------------- if (is.null(titlecol)){ tcol <- 1 } else { if (is.na(titlecol[1])) tcol <- 1 else tcol <- titlecol[1] } ## if (!is.null(title)) mtext(title, side=1, at=pos, col=tcol, line=line[1]-1, adj=adj, cex=cex, outer=FALSE, xpd=NA, ...) # labeling the no. at-risk below plot # -------------------------------------------------------------------- ## if (is.null(adjust.labels) || adjust.labels==TRUE){ ## labels <- format(labels,justify="left")} if (length(col)==nlines/2) ## 1 cluster level col <- rep(col,rep(2,length(col))) lapply(1:nlines,function(y){ mtext(text=as.character(sumx[[y]]), side=1, at=times, line=rep(line[y],length(times)), col=rep(col[y],length(times)), cex=cex, outer=FALSE, xpd=NA, ...) if (is.null(labelcol)){ lcol <- col[y] } else { if (is.na(labelcol[y])) lcol <- labelcol[1] else lcol <- labelcol[y] } ## print(labels[y]) mtext(text=labels[y], side=1, at=pos, col=labelcol[y], ## col=1, line=line[y], adj=adj, cex=cex, outer=FALSE, xpd=NA, ...) }) } prodlim/R/mean.prodlim.R0000755000175100001440000000150213035633434014654 0ustar hornikusers"mean.prodlim" <- function(x, times, newdata, ...){ if (!(x$model %in% c("survival","competing.risks"))) stop("no mean(.prodlim) method available for this object.") if(x$covariate.type==1) stop("No covariates for computing mean survival.") jump.times <- x$time if (missing(times)) times <- x$time times <- sort(unique(times)) ntimes <- length(times) if (missing(newdata)) newdata <- eval(x$call$data) surv.frame <- predict(x,newdata=newdata,time=times,level.chaos=1,mode="matrix",type="surv") smean <- apply(surv.frame,2,mean,na.rm=TRUE) marginal.fit <- prodlim(update.formula(formula(x$formula),"~1"),data=x$data) out <- marginal.fit out$surv <- smean out$covariate.type <- 1 class(out) <- c("prodlim","mean") out } prodlim/R/predictSurvIndividual.R0000755000175100001440000000331113035633434016612 0ustar hornikusers#' Predict individual survival probabilities #' #' Function to extract the predicted probabilities at the individual event #' times that have been used for fitting a prodlim object. #' #' #' @param object A fitted object of class "prodlim". #' @param lag Integer. `0' means predictions at the individual times, 1 means #' just before the individual times, etc. #' @return A vector of survival probabilities. #' @author Thomas A. Gerds \email{tag@@biostat.ku.dk} #' @seealso \code{\link{predict.prodlim}},\code{\link{predictSurv}}, #' @keywords survival #' @examples #' #' SurvFrame <- data.frame(time=1:10,status=rbinom(10,1,.5)) #' x <- prodlim(formula=Hist(time=time,status!=0)~1,data=SurvFrame) #' predictSurvIndividual(x,lag=1) #' #' @export predictSurvIndividual <- function(object, lag=1){ obs.times <- as.numeric(object$model.response[,1]) if (object$covariate.type==1){ locOBS <- match(obs.times,object$time,nomatch=FALSE) if (any(locOBS==FALSE)) stop("Can't locate all individual observation times" ) psurv <- c(rep(1,lag),object$surv)[locOBS]} else{ N <- length(obs.times) if (is.null(object$model.matrix)) stop("Cannot find model.matrix, need to set x=TRUE in call of prodlim.") findex <- row.match(object$model.matrix,object$X) ## if (any(is.na(findex))) ## stop("Cannot identify all rows in object$model.matrix in ") psurv <- .C("predict_individual_survival",pred=double(N),as.double(object$surv),as.double(object$time),as.double(obs.times),as.integer(object$first.strata[findex]),as.integer(object$size.strata[findex]),as.integer(N),as.integer(lag),NAOK=FALSE,PACKAGE="prodlim")$pred} psurv } prodlim/R/crModel.R0000644000175100001440000000170713035633434013660 0ustar hornikusers#' Competing risks model for simulation #' #' Create a competing risks model with to causes to simulate a right censored event time data without #' covariates #' #' This function requires the \code{lava} package. #' @title Competing risks model for simulation #' @return A structural equation model initialized with four variables: the #' latent event times of two causes, the latent right censored time, and the observed #' right censored event time. #' @author Thomas A. Gerds #' @examples #' library(lava) #' m <- crModel() #' d <- sim(m,6) #' print(d) #' #' @export crModel <- function(){ # require(lava) crm <- lava::lvm() lava::distribution(crm,"eventtime1") <- lava::coxWeibull.lvm(scale=1/100) lava::distribution(crm,"eventtime2") <- lava::coxWeibull.lvm(scale=1/100) lava::distribution(crm,"censtime") <- lava::coxWeibull.lvm(scale=1/100) crm <- lava::eventTime(crm,time~min(eventtime1=1,eventtime2=2,censtime=0),"event") crm } prodlim/R/iindex.R0000755000175100001440000000104613057247774013566 0ustar hornikusersiindex <- function (L,R,grid) { stopifnot((length(grid)>0) & (length(L)>0) & (length(R)>0)) stopifnot(is.numeric(c(L,R,grid))) N <- length(L) NS <- length(grid) ind <- .C("iindexSRC", index = integer(N*NS), strata = integer(NS-1), as.double(L), as.double(R), as.double(grid), as.integer(N), as.integer(NS), PACKAGE="prodlim") strata <- ind$strata index <- ind$index[1:max(strata)] list(iindex=index,imax=strata) } prodlim/R/eventsMethods.R0000755000175100001440000000210213035633434015114 0ustar hornikusersevents <- function(object,...){ UseMethod("events",object) } events.prodlim <- function(object){ events.Hist(object$model.response) } events.Hist <- function(object,...){ model <- attr(object,"model") cens.code <- attr(object,"cens.code") states <- attr(object,"states") if (model=="survival"){ factor(object[,"status",drop=TRUE],levels=c(cens.code,states),labels=c("unknown",states)) } else{ if (model=="competing.risks"){ D <- object[,"status",drop=TRUE] cens.type <- attr(object,"cens.type") E <- object[,"event",drop=TRUE] names(E) <- NULL stupid.stupid.factor.levels <- as.integer(factor(c(states,cens.code),levels=c(states,cens.code))) ## sorted.stupid.stupid.factor.levels <- c(stupid.stupid.factor.levels[-1],stupid.stupid.factor.levels[1]) ## events <- factor(E,levels=sorted.stupid.stupid.factor.levels,labels=c(states,"unknown")) events <- factor(E,levels=stupid.stupid.factor.levels,labels=c(states,"unknown")) events } else stop("No event.Hist function for multi.state models") } } prodlim/R/IntIndex.R0000755000175100001440000000267613057247552014034 0ustar hornikusers## Notation ## subject specific intervals ## number: N ## running index: i ## support (Peto) intervals ## number: M ## running index: m IntIndex <- function(x,L,R){ N <- length(L) M <- NCOL(x) p <- x[1,] q <- x[2,] res <- .C('IntIndexSRC',as.double(L),as.double(R),as.double(p),as.double(q),as.integer(N),as.integer(M),Iindex=integer(N*M),Mindex=integer(N*M),Istrata=integer(N),Mstrata=integer(M)) Iindex <- res$Iindex[res$Iindex!=0] Istrata <- res$Istrata#[res$Istrata!=0] Mindex <- res$Mindex[res$Mindex!=0] Mstrata <- res$Mstrata#[res$Mstrata!=0] out <- list(Mindex,Mstrata,Iindex,Istrata,rbind(L,R),x) names(out) <- c("Mindex","Mstrata","Iindex","Istrata","obsInt","petoInt") #class(out) <- "IntIndex" out } ## old version ## IntIndex <- function(x,L,R){ ## N <- length(L) ## M <- NCOL(x) ## p <- x[1,] ## q <- x[2,] ## res <- .C('IntIndex',as.double(L),as.double(R),as.double(p),as.double(q),as.integer(N),as.integer(M),Iindex=integer(N*M),Mindex=integer(N*M),Istrata=integer(N),Mstrata=integer(M),PACKAGE="prodlim") ## Iindex <- res$Iindex[res$Iindex!=0] ## Istrata <- res$Istrata[res$Istrata!=0] ## Mindex <- res$Mindex[res$Mindex!=0] ## Mstrata <- res$Mstrata[res$Mstrata!=0] ## out <- list(Mindex,Mstrata,Iindex,Istrata,rbind(L,R),x) ## names(out) <- c("Mindex","Mstrata","Iindex","Istrata","obsInt","petoInt") ## class(out) <- "IntIndex" ## out ## } prodlim/R/plotIntervals.R0000755000175100001440000000142213035633434015136 0ustar hornikusersplotIntervals <- function(object,rightCensored=FALSE,xlim,ylim,ylab,xlab,...){ stopifnot(match("Hist",class(object)) && attr(object,"cens.type")=="intervalCensored") x <- object[order(object[,"L"]),] L <- x[,"L"] R <- x[,"R"] if (rightCensored==FALSE) x <- x[!is.na(R)&!is.infinite(R),] if (missing(ylim)) ylim <- c(0,NROW(x)+1) if (missing(xlim)) xlim <- c(0,max(R[!is.na(R)&!is.infinite(R)])) if (missing(xlab)) xlab <- "Time" if (missing(ylab)) ylab <- "Observed intervals" plot(0,0,type="n",xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,...) nix <- lapply(1:NROW(x),function(f){ x <- unlist(x[f,c("L","R"),drop=TRUE]) x[is.infinite(x)] <- max(R[!is.na(R)&!is.infinite(R)]) segments(x0=x[1],y0=f,x1=x[2],y1=f,lwd=2)}) invisible(x) } prodlim/R/List2Matrix.R0000644000175100001440000000443613035633434014457 0ustar hornikusers### List2Matrix.R --- #---------------------------------------------------------------------- ## author: Thomas Alexander Gerds ## created: Sep 21 2015 (07:01) ## Version: ## last-updated: Sep 29 2015 (06:32) ## By: Thomas Alexander Gerds ## Update #: 6 #---------------------------------------------------------------------- ## ### Commentary: Reduce a list to a matrix or data.frame and add list names as new columns ## ### Change Log: #---------------------------------------------------------------------- ## ### Code: ##' This function is used by summary.prodlim to deal with results. ##' ##' Reduction is done with rbind. ##' @title Reduce list to a matrix or data.frame with names as new columns ##' @param list A named list which contains nested lists ##' @param depth The depth in the list hierarchy until an rbindable object ##' @param names Names for the list variables ##' @return Matrix or data.frame. ##' @examples ##' ##' x=list(a=data.frame(u=1,b=2,c=3),b=data.frame(u=3,b=4,c=6)) ##' List2Matrix(x,depth=1,"X") ##' @export ##' @author Thomas A. Gerds List2Matrix <- function(list,depth,names){ if (missing(names)) names <- paste0("D",1:depth) switch(as.character(depth), "1"={ dims <- lapply(list,dim) cols <- sapply(dims,function(x)x[[2]]) rows <- sapply(dims,function(x)x[[1]]) stopifnot(length(unique(cols))==1) nl <- names(list) M <- do.call("rbind",list) rownames(M) <- NULL M <- cbind(rep(nl,rows),M) colnames(M)[1] <- names[1] M}, "2"={ List2Matrix(lapply(list,List2Matrix,depth=1,names=names[2]), depth=1, names=names[1])}, "3"={ List2Matrix(lapply(list,function(l){ List2Matrix(lapply(l,List2Matrix,depth=1,names[3]), depth=1, names=names[2]) }), depth=1,names=names[1])}, stop("Cannot do this depth.")) } #---------------------------------------------------------------------- ### List2Matrix.R ends here prodlim/R/prodlimIcensSurv.R0000755000175100001440000001133713057105702015602 0ustar hornikusersprodlimIcensSurv <- function(response, grid, tol=7, maxiter, ml=FALSE, exact=TRUE){ # {{{ data ntol <- 10^{-tol} L <- response[,"L"] N <- length(L) R <- response[,"R"] status <- response[,"status"] # }}} # {{{ one-step idea if (ml==FALSE) { # right censored observations # are defined by status R[status==0] <- L[status==0] if (missing(grid)) grid <- sort(unique(c(L,R))) else if (exact) grid <- sort(unique(c(min(L,R),grid))) else grid <- sort(unique(grid)) ## need an extra grid point before the smallest ## `L' to catch right censored and exact ## event times that match this smallest `L' stopifnot(all(grid >= 0)) if (grid[1]==0) grid <- c(-1,grid) else grid <- c(0,grid) indexR <- sindex(jump.times=grid,eval.times=R) indexL <- sindex(jump.times=grid,eval.times=L) ## indexR <- match(R,grid) ## indexL <- match(L,grid) NS <- length(grid) Ind <- iindex(L,R,grid) ## fit <- list("icens_prodlim", ## as.double(L), ## as.double(R), ## as.double(grid), ## as.integer(indexL), ## as.integer(indexR), ## as.integer(Ind$iindex), ## as.integer(c(Ind$imax,0)), ## as.integer(status), ## as.double(N), ## as.double(NS), ## nrisk=double(NS), ## nevent=double(NS), ## ncens=double(NS), ## hazard=double(NS), ## varhazard=double(NS), ## surv=double(NS), ## oldsurv=double(NS), ## as.double(ntol), ## as.integer(maxiter), ## n.iter=integer(1), ## PACKAGE="prodlim") fit <- .C("icens_prodlim", as.double(L), as.double(R), as.double(grid), as.integer(indexL), as.integer(indexR), as.integer(Ind$iindex), as.integer(c(Ind$imax,0)), as.integer(status), as.double(N), as.double(NS), nrisk=double(NS), nevent=double(NS), ncens=double(NS), hazard=double(NS), varhazard=double(NS), surv=double(NS), oldsurv=double(NS), as.double(ntol), as.integer(maxiter), n.iter=integer(1), PACKAGE="prodlim") ## rename the extra grid point before the smallest `L' ## if it is negative if (grid[1]<0) grid[1] <- 0 res <- list("time"=rbind(c(0,grid[-length(grid)]),c(grid)), "n.risk"=round(pmax(0,fit$nrisk),tol), "n.event"=round(pmax(0,fit$nevent),tol), "n.lost"=round(fit$ncens,tol), "hazard"=round(fit$hazard,tol), "surv"=round(pmax(0,fit$surv),tol), "maxtime"=max(grid), "n.iter"=fit$n.iter, "tol"=ntol, "model"="survival") # res <- list("time"=rbind(c(0,0,grid[-length(grid)]),c(0,grid)),"n.risk"=c(N,round(pmax(0,fit$nrisk),tol)),"n.event"=c(0,round(pmax(0,fit$nevent),tol)),"n.lost"=c(0,round(fit$ncens,tol)),"hazard"=c(0,round(fit$hazard,tol)),"surv"=c(1,round(pmax(0,fit$surv),tol)),"maxtime"=max(grid),"n.iter"=fit$n.iter,"tol"=ntol,"model"="survival") } else{ # }}} # {{{ npmle ## artificial closure of right censored intervals ## R[Rna] <- max(c(L,R)) + 1 R[status==0] <- max(c(L,R[status!=0])) + 1 ## R[status==0] <- max(c(L,R)) + 1 ## print(R[status==0]) peto.intervals <- PetoInt(L,R,status) indices <- IntIndex(x=peto.intervals,L=L,R=R) Mindex <- indices$Mindex Mstrata <- indices$Mstrata Iindex <- indices$Iindex Istrata <- indices$Istrata M <- length(Mstrata) N <- length(Istrata) ## Zsurv <- predictSurv(prodlimIcensSurv(response=response,grid=grid,tol=tol,maxiter=1,ml=FALSE)) Z <- rep(1/M,M) fit <- .C('GMLE',as.integer(c(0,Mstrata)),as.integer(c(0,Istrata)),as.integer(Mindex),as.integer(Iindex),as.integer(N),as.integer(M),Z=as.double(Z),double(length(Z)),as.double(ntol),as.integer(maxiter),steps=integer(1),PACKAGE="prodlim") n.event <- c(0,fit$Z*M) surv <- c(1,1-cumsum(fit$Z)) hazard <- c(0,fit$Z)/surv res <- list("time"=cbind(c(0,0),peto.intervals),"n.risk"=N-n.event,"n.event"=n.event,"n.lost"= c(0,rep(0,M)),"hazard"=round(hazard,tol),"surv"=round(surv,tol),"maxtime"=max(c(peto.intervals)),"n.iter"=fit$steps,"tol"=ntol,"model"="survival") } # }}} class(res) <- "prodlim" res } prodlim/R/parseSpecialNames.R0000644000175100001440000001374613035633434015700 0ustar hornikusers##' Extract from a vector of character strings the names of special functions and auxiliary arguments ##' ##' Signals an error if an element has more arguments than specified by argument arguments. ##' @title Parse special terms ##' @param x Vector of character strings. ##' @param special A character string: the name of the special argument. ##' @param arguments A vector which contains the arguments of the special function ##' @return A named list of parsed arguments. The names of the list are the special variable names, the elements ##' are lists of arguments. ##' @seealso model.design ##' @examples ##' ##' ## ignore arguments ##' parseSpecialNames("treat(Z)",special="treat") ##' ## set default to 0 ##' parseSpecialNames(c("log(Z)","a","log(B)"),special="log",arguments=list("base"=0)) ##' ## set default to 0 ##' parseSpecialNames(c("log(Z,3)","a","log(B,base=1)"),special="log",arguments=list("base"=0)) ##' ## different combinations of order and names ##' parseSpecialNames(c("log(Z,3)","a","log(B,1)"), ##' special="log", ##' arguments=list("base"=0)) ##' parseSpecialNames(c("log(Z,1,3)","a","log(B,u=3)"), ##' special="log", ##' arguments=list("base"=0,"u"=1)) ##' parseSpecialNames(c("log(Z,u=1,base=3)","a","log(B,u=3)"), ##' special="log", ##' arguments=list("base"=0,"u"=1)) ##' parseSpecialNames(c("log(Z,u=1,base=3)","a","log(B,base=8,u=3)"), ##' special="log", ##' arguments=list("base"=0,"u"=1)) ##' parseSpecialNames("treat(Z,u=2)", ##' special="treat", ##' arguments=list("u"=1,"k"=1)) ##' parseSpecialNames(c("treat(Z,1,u=2)","treat(B,u=2,k=3)"), ##' special="treat", ##' arguments=list("u"=NA,"k"=NULL)) ##' ## does not work to set default to NULL: ##' parseSpecialNames(c("treat(Z,1,u=2)","treat(B,u=2)"), ##' special="treat", ##' arguments=list("u"=NA,"k"=NULL)) ##' @author Thomas A. Gerds ##' @export parseSpecialNames <- function(x,special,arguments){ if (missing(arguments)) { argnames <- NULL } else { argnames <- names(arguments) } ## it would be possible to vectorize the function with the regexp: ## paste("(",paste(special,collapse="|"),")\\(|)$",sep="") ## but this causes some ## confusion and extra work specialRegexp <- paste("^",special,"\\(|)$",sep="") posSpecial <- grep(specialRegexp,x,value=FALSE) if (length(posSpecial)>0){ specialTerms <- strsplit(x[posSpecial],specialRegexp) ## if length is 1 then term is unspecial ## isSpecial <- sapply(listTerms,length) # check for further arguments termsWithArguments <- unlist(lapply(specialTerms,function(x){ if (length(x)<2) NULL else strsplit(x[[2]],"[ ]*,[ ]*")}), recursive=FALSE) varnames <- lapply(termsWithArguments,function(x){x[[1]]}) ## attr(varnames,"special.position") <- posSpecial ## only fish arguments if this is desired if (is.null(argnames)){ out <- vector(mode="list",length(varnames)) names(out) <- varnames return(out) }else{ varnames <- unlist(varnames) if (length(problem <- grep("=",varnames,value=TRUE))>0) stop(paste("Problematic variable name '",problem,"'. Variable names used in special may not contain '='.",sep="")) givenArguments <- lapply(termsWithArguments,function(x){ if (length(x)==1) NULL else x[2:length(x)] }) names(givenArguments) <- varnames # {{{ parse arguments specialArgumentList <- lapply(givenArguments,function(args){ if (!is.null(args)){ fullvalue <- strsplit(args,"=") fullvalue <- lapply(fullvalue,function(x){ ## remove whitespace gsub(" ","",x) }) givennames <- sapply(fullvalue,function(x){ if (length(x)==1) "" else x[[1]] }) values <- lapply(fullvalue,function(x){ if (length(x)==1) x[[1]] else x[[2]] }) if(length(argnames)0) if (!all(thismatch)) stop("Argument(s) '", paste(realnames,collapse=", "), "' is not an argument of '", special, "'. Valid argument(s): '", paste(argnames,collapse=", "),"'.") names(values) <- givennames nadd <- length(argnames)-length(values) if (nadd>0){ values <- c(values,rep(NA,nadd)) } thatmatch <- match(argnames,names(values),nomatch=0) names(values)[names(values)==""] <- argnames[thatmatch==0] values <- values[argnames] ## set defaults values[is.na(values)] <- unlist(arguments)[is.na(values)] values } else { ## use defaults arguments } }) # }}} names(specialArgumentList) <- names(givenArguments) ## attr(specialArgumentList,"special.position") <- posSpecial specialArgumentList } } else{NULL} } prodlim/R/quantile.prodlim.R0000755000175100001440000000707513044421045015562 0ustar hornikusers#' Quantiles for Kaplan-Meier and Aalen-Johansen estimates. #' #' Quantiles for Kaplan-Meier and Aalen-Johansen estimates. #' #' #' @param x Object of class \code{"prodlim"}. #' @param q Quantiles. Vector of values between 0 and 1. #' @param cause For competing risks the cause of interest. #' @param ... not used #' @author Thomas Alexander Gerds #' @keywords survival #' @examples #' library(lava) #' set.seed(1) #' d=SimSurv(30) #' f=prodlim(Hist(time,status)~1,data=d) #' f1=prodlim(Hist(time,status)~X1,data=d) #' # default: median and IQR #' quantile(f) #' quantile(f1) #' # median alone #' quantile(f,.5) #' quantile(f1,.5) #' #' # competing risks #' set.seed(3) #' dd = SimCompRisk(30) #' ff=prodlim(Hist(time,event)~1,data=dd) #' ff1=prodlim(Hist(time,event)~X1,data=dd) #' ## default: median and IQR #' quantile(ff) #' quantile(ff1) #' #' print(quantile(ff1),na.val="NA") #' print(quantile(ff1),na.val="Not reached") #' #' @export "quantile.prodlim" <- function(x, q, cause=1, ...){ ## require(stats) ## stopifnot(x$model=="survival") etype <- attr(x$model.response,"entry.type") if (!is.null(etype) && etype=="leftTruncated") stop("Don't know how to compute quantiles with delayed entry (left-truncation).") if(x$model=="survival"){ if (missing(q)) q <- c(1,.75,0.5,.25,0) q <- 1-q ## since this is a survival function sumx <- summary(x,newdata=x$X,times=x$time,showTime=TRUE,verbose=FALSE) getQ <- function(sum){ out <- do.call("cbind",lapply(c("surv","lower","upper"),function(w){ sumw <- sum[,w,drop=TRUE] notna= is.na(sumw) | sumw==0 | sumw ==1 xxx=as.numeric(sumw[!notna]) ttt=as.numeric(sum[,"time"][!notna]) found <- 2+sindex(jump.times=xxx,eval.times=q,comp="greater",strict=FALSE) inner <- c(as.vector(c(0,ttt)[found])) inner })) out <- data.frame(out) out <- cbind(q,out) names(out) <- c("q","quantile","lower","upper") out} if (sumx$cotype==1) out <- list("quantiles.survival"=getQ(sumx$table)) else out <- lapply(sumx$table,getQ) attr(out,"model") <- x$model class(out) <- "quantile.prodlim" out } else{ ## cumulative incidence, competing risks if (missing(q)) q <- c(0,0.25,0.5,0.75,1) sumx <- summary(x,newdata=x$X,times=x$time,showTime=TRUE,verbose=FALSE,cause=cause) getQ <- function(sum){ out <- do.call("cbind",lapply(c("cuminc","lower","upper"),function(w){ sumw <- sum[,w,drop=TRUE] notna= is.na(sumw) | sumw==0 | sumw ==1 xxx=as.numeric(sumw[!notna]) ttt=as.numeric(sum[,"time"][!notna]) found <- 2+sindex(jump.times=xxx,eval.times=q,comp="smaller",strict=FALSE) inner <- c(as.vector(c(0,ttt)[found])) inner })) out <- data.frame(out) out <- cbind(q,out) ## upper is lower and lower is upper names(out) <- c("q","quantile","upper","lower") out <- out[,c("q","quantile","lower","upper")] out} if (sumx$cotype==1) out <- list("quantiles.cuminc"=getQ(sumx$table[[1]])) else { out <- lapply(sumx$table[[1]],getQ) } attr(out,"model") <- x$model class(out) <- "quantile.prodlim" out } } prodlim/R/followup.R0000644000175100001440000000124113035633434014133 0ustar hornikusers### followup.R --- #---------------------------------------------------------------------- ## author: Thomas Alexander Gerds ## created: Sep 22 2015 (10:29) ## Version: ## last-updated: Sep 25 2015 (06:19) ## By: Thomas Alexander Gerds ## Update #: 2 #---------------------------------------------------------------------- ## ### Commentary: ## ### Change Log: #---------------------------------------------------------------------- ## ### Code: followup <- function(formula,data,...){ G <- prodlim(formula,data,reverse=TRUE) quantile(G,...) } #---------------------------------------------------------------------- ### followup.R ends here prodlim/R/strip.terms.R0000644000175100001440000001515213035633434014564 0ustar hornikusers##' Reformulate a terms object such that some specials are stripped off ##' ##' This function is used to remove special specials, i.e., those ##' which cannot or should not be evaluated. ##' IMPORTANT: the unstripped terms need to know about all specials including the aliases. ##' See examples. ##' @title Strip special functions from terms ##' @param terms Terms object ##' @param specials Character vector of specials which should be ##' stripped off ##' @param alias.names Optional. A named list with alias names for the specials. ##' @param unspecials Optional. A special name for treating all the unspecial terms. ##' @param arguments A named list of arguments, one for each element ##' of specials. Elements are passed to \code{parseSpecialNames}. ##' @param keep.response Keep the response in the resulting object? ##' @return Reformulated terms object with an additional attribute which contains the \code{stripped.specials}. ##' @seealso parseSpecialNames reformulate drop.terms ##' @examples ##' ##' ## parse a survival formula and identify terms which ##' ## should be treated as proportional or timevarying: ##' f <- Surv(time,status)~age+prop(factor(edema))+timevar(sex,test=0)+prop(bili,power=1) ##' tt <- terms(f,specials=c("prop","timevar")) ##' attr(tt,"specials") ##' st <- strip.terms(tt,specials=c("prop","timevar"),arguments=NULL) ##' formula(st) ##' attr(st,"specials") ##' attr(st,"stripped.specials") ##' ##' ## provide a default value for argument power of proportional treatment ##' ## and argument test of timevarying treatment: ##' st2 <- strip.terms(tt, ##' specials=c("prop","timevar"), ##' arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) ##' formula(st2) ##' attr(st2,"stripped.specials") ##' attr(st2,"stripped.arguments") ##' ##' ## treat all unspecial terms as proportional ##' st3 <- strip.terms(tt, ##' unspecials="prop", ##' specials=c("prop","timevar"), ##' arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) ##' formula(st3) ##' attr(st3,"stripped.specials") ##' attr(st3,"stripped.arguments") ##' ##' ## allow alias names: strata for timevar and tp, const for prop. ##' ## IMPORTANT: the unstripped terms need to know about ##' ## all specials including the aliases ##' f <- Surv(time,status)~age+const(factor(edema))+strata(sex,test=0)+prop(bili,power=1)+tp(albumin) ##' tt2 <- terms(f,specials=c("prop","timevar","strata","tp","const")) ##' st4 <- strip.terms(tt2, ##' specials=c("prop","timevar"), ##' unspecials="prop", ##' alias.names=list("timevar"="strata","prop"=c("const","tp")), ##' arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) ##' formula(st4) ##' attr(st4,"stripped.specials") ##' attr(st4,"stripped.arguments") ##' ##' ## test if alias works also without unspecial argument ##' st5 <- strip.terms(tt2, ##' specials=c("prop","timevar"), ##' alias.names=list("timevar"="strata","prop"=c("const","tp")), ##' arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) ##' formula(st5) ##' attr(st5,"stripped.specials") ##' attr(st5,"stripped.arguments") ##' ##' library(survival) ##' data(pbc) ##' model.design(st4,data=pbc[1:3,],specialsDesign=TRUE) ##' model.design(st5,data=pbc[1:3,],specialsDesign=TRUE) ##' ##' ##' @export ##' @author Thomas A. Gerds strip.terms <- function(terms, specials, alias.names=NULL, unspecials=NULL, arguments, keep.response=TRUE){ termLabels <- attr(terms,"term.labels") terms.specials <- attr(terms,"specials") intercept <- attr(terms, "intercept") if (attr(terms,"response") && keep.response) response <- terms[[2L]] else response <- NULL # resolve unspecials do.unspecials <- length(unspecials)>0 if (do.unspecials){ if (length(unlist(terms.specials))>0) any <- -(-attr(terms,"response")+unlist(terms.specials)) else any <- 1:length(termLabels) if (length(any)) termLabels[any] <- paste(unspecials,"(",termLabels[any],")",sep="") } # resolve aliases do.alias <- length(alias.names)>0 if (do.alias){ for (spc in specials){ ali <- alias.names[[spc]] termLabels <- sub(paste("^(",paste(ali,collapse="|"),")\\(",sep=""), paste(spc,"(",sep=""), termLabels) ## remove alias specials newspecials <- unique(c(specials,names(terms.specials))) catch <- match(unlist(alias.names),newspecials,nomatch=0) newspecials <- newspecials[-catch] } } if (do.unspecials||do.alias){ aform <- reformulate(termLabels,response,intercept) environment(aform) <- environment(terms) if (do.alias) terms <- terms(aform,specials=newspecials) else terms <- terms(aform,specials=specials) terms.specials <- attr(terms,"specials") } ## terms.specials <- specials ## remove unused specials ## terms.specials <- terms.specials[!sapply(terms.specials,is.null)] ## only strip the specials in specials found <- match(names(terms.specials),specials,nomatch=0) if (any(found>0)){ stripspecials <- names(terms.specials)[found>0] strippedTerms <- vector(mode="list") strippedArguments <- vector(mode="list") for (s in 1:length(stripspecials)){ ## outcome counts as 1 spc <- stripspecials[[s]] hit.s <- - attr(terms,"response") + terms.specials[[spc]] ps <- parseSpecialNames(termLabels[hit.s], special=spc, arguments=arguments[[spc]]) ## attr(ps,"special.position") <- terms.specials[[spc]] terms.s <- terms.specials[spc] aps <- list(ps) names(aps) <- spc strippedArguments <- c(strippedArguments,aps) strippedTerms <- c(strippedTerms,terms.s) termLabels[hit.s] <- names(ps) } strippedFormula <- reformulate(termLabels,response,intercept) environment(strippedFormula) <- environment(terms) out <- terms(strippedFormula, specials = names(terms.specials)) ## reset specials attr(out,"stripped.specials") <- strippedTerms attr(out,"stripped.arguments") <- strippedArguments out }else{ terms } } prodlim/R/summary.prodlim.R0000755000175100001440000002651413035633434015443 0ustar hornikusers# {{{ header #' Summary method for prodlim objects. #' #' Summarizing the result of the product limit method in life-table format. #' Calculates the number of subjects at risk and counts events and censored #' observations at specified times or in specified time intervals. #' #' For cluster-correlated data the number of clusters at-risk are are also #' given. Confidence intervals are displayed when they are part of the fitted #' object. #' #' @param object An object with class `prodlim' derived with #' \code{\link{prodlim}} #' @param times Vector of times at which to return the estimated #' probabilities. #' @param newdata A data frame with the same variable names as those #' that appear on the right hand side of the 'prodlim' formula. #' Defaults to \code{object$X}. #' @param max.tables Integer. If \code{newdata} is not given the value #' of \code{max.tables} decides about the maximal number of tables to #' be shown. Defaults to 20. #' @param surv Logical. If FALSE report event probabilities instead of #' survival probabilities. Only available for #' \code{object$model=="survival"}. #' @param cause The cause for predicting the cause-specific cumulative #' incidence function in competing risk models. #' @param intervals Logical. If TRUE count events and censored in #' intervals between the values of \code{times}. #' @param percent Logical. If TRUE all estimated values are multiplied #' by 100 and thus interpretable on a percent scale. #' @param showTime If \code{TRUE} evaluation times are put into a #' column of the output table, otherwise evaluation times are shown as #' rownames. #' @param asMatrix Control the output format when there are multiple #' life tables, either because of covariate strata or competing causes #' or both. If not missing and not FALSE, reduce multiple life tables #' into a matrix with new columns \code{X} for covariate strata and #' \code{Event} for competing risks. #' @param ... Further arguments that are passed to the print #' function. #' @return A data.frame with the relevant information. #' @author Thomas A. Gerds \email{tag@@biostat.ku.dk} #' @seealso \code{\link{prodlim}}, \code{\link{summary.Hist}} #' #' @keywords survival ##' @examples ##' ##' library(lava) ##' set.seed(17) ##' m <- survModel() ##' distribution(m,~age) <- uniform.lvm(30,80) ##' distribution(m,~sex) <- binomial.lvm() ##' m <- categorical(m,~z,K=3) ##' regression(m,eventtime~age) <- 0.01 ##' regression(m,eventtime~sex) <- -0.4 ##' d <- sim(m,50) ##' d$sex <- factor(d$sex,levels=c(0,1),labels=c("female","male")) ##' d$Z <- factor(d$z,levels=c(1,0,2),labels=c("B","A","C")) ##' ##' # Univariate Kaplan-Meier ##' # ----------------------------------------------------------------------------------------- ##' fit0 <- prodlim(Hist(time,event)~1,data=d) ##' summary(fit0) ##' ##' ## show survival probabilities as percentage and ##' ## count number of events within intervals of a ##' ## given time-grid: ##' summary(fit0,times=c(1,5,10,12),percent=TRUE,intervals=TRUE) ##' ##' ## the result of summary has a print function ##' ## which passes ... to print and print.listof ##' sx <- summary(fit0,times=c(1,5,10,12),percent=TRUE,intervals=TRUE) ##' print(sx,digits=3) ##' ##' ## show cumulative incidences (1-survival) ##' summary(fit0,times=c(1,5,10,12),surv=FALSE,percent=TRUE,intervals=TRUE) ##' ##' # Stratified Kaplan-Meier ##' # ----------------------------------------------------------------------------------------- ##' ##' fit1 <- prodlim(Hist(time,event)~sex,data=d) ##' print(summary(fit1,times=c(1,5,10),intervals=TRUE,percent=TRUE),digits=3) ##' ##' summary(fit1,times=c(1,5,10),asMatrix=TRUE,intervals=TRUE,percent=TRUE) ##' ##' fit2 <- prodlim(Hist(time,event)~Z,data=d) ##' print(summary(fit2,times=c(1,5,10),intervals=TRUE,percent=TRUE),digits=3) ##' ##' ## Continuous strata (Beran estimator) ##' # ----------------------------------------------------------------------------------------- ##' fit3 <- prodlim(Hist(time,event)~age,data=d) ##' print(summary(fit3, ##' times=c(1,5,10), ##' newdata=data.frame(age=c(20,50,70)), ##' intervals=TRUE, ##' percent=TRUE),digits=3) ##' ##' ## stratified Beran estimator ##' # ----------------------------------------------------------------------------------------- ##' fit4 <- prodlim(Hist(time,event)~age+sex,data=d) ##' print(summary(fit4, ##' times=c(1,5,10), ##' newdata=data.frame(age=c(20,50,70),sex=c("female","male","male")), ##' intervals=TRUE, ##' percent=TRUE),digits=3) ##' ##' print(summary(fit4, ##' times=c(1,5,10), ##' newdata=data.frame(age=c(20,50,70),sex=c("female","male","male")), ##' intervals=TRUE,collapse=TRUE, ##' percent=TRUE),digits=3) ##' ##' ## assess results from summary ##' x <- summary(fit4,times=10,newdata=expand.grid(age=c(60,40,50),sex=c("male","female"))) ##' cbind(names(x$table),do.call("rbind",lapply(x$table,round,2))) ##' ##' x <- summary(fit4,times=10,newdata=expand.grid(age=c(60,40,50),sex=c("male","female"))) ##' ##' ## Competing risks: Aalen-Johansen ##' # ----------------------------------------------------------------------------------------- ##' d <- SimCompRisk(30) ##' crfit <- prodlim(Hist(time,event)~X1,data=d) ##' summary(crfit,times=c(1,2,5)) ##' summary(crfit,times=c(1,2,5),cause=1,intervals=TRUE) ##' summary(crfit,times=c(1,2,5),cause=1,asMatrix=TRUE) ##' summary(crfit,times=c(1,2,5),cause=1:2,asMatrix=TRUE) ##' ##' ##' # extract the actual tables from the summary ##' sumfit <- summary(crfit,times=c(1,2,5),print=FALSE) ##' sumfit$table[[1]] # cause 1 ##' sumfit$table[[2]] # cause 2 ##' ##' ##' # ' #' @export summary.prodlim <- function(object, times, newdata, max.tables=20, surv=TRUE, cause, intervals=FALSE, percent=FALSE, showTime=TRUE, asMatrix=FALSE, ...) { # }}} # {{{ classify the situation cens.type <- object$cens.type # uncensored, right or interval censored model <- object$model # survival, competing risks or multi-state ## cluster <- object$clustervar # clustered data? cotype <- object$covariate.type # no, discrete, continuous or both # }}} # {{{ times jump.times <- object$time if (missing(times) && (length(times <- jump.times) > 50)) times <- quantile(sort(unique(jump.times))) times <- sort(unique(times)) if (any(times>max(jump.times))) warning(call.=TRUE, immediate.=TRUE, paste("\n","Time(s) ",paste(times[times>max(jump.times)],collapse=", "), " are beyond the maximal follow-up time ",max(jump.times),"\n")) ntimes <- length(times) # }}} # {{{ interval-censored if (cens.type=="intervalCensored"){ ltab <- data.frame(time=paste("(",paste(signif(object$time[1,],2), signif(object$time[2,],2), sep="-"),"]",sep=""), n.risk=signif(object$n.risk,2), n.event=signif(object$n.event,2), ## n.lost=object$n.lost, surv=object$surv) } else{ # }}} # {{{ with covariates if (cotype>1){ if (missing(newdata) || length(newdata)==0){ X <- object$X if (NROW(X)>max.tables){ warning(call.=TRUE,immediate.=TRUE,paste("\nLife tables are available for", NROW(X), "different covariate constellations.\n", "Shown are the table corresponding to the first row in object$X,", "corresponding to the middle row (median of the number of rows in object$X) ", "and corresponding to the last row in object$X ...\n", "to see more tables use arguments `newdata' and `max.tables'\n")) X <- X[c(1,round(median(1:NROW(X))),NROW(X)),,drop=FALSE] } } else{ X <- unique.data.frame(newdata) if (NROW(X) < NROW(newdata)) warning("Returned is only one summary for each unique value in newdata.") } } else { X <- NULL } if (model=="survival") { stats <- list(c("surv",1),c("se.surv",0)) if (!is.null(object$conf.int)) stats <- c(stats,list(c("lower",0),c("upper",1))) if (surv==FALSE){ object$cuminc <- 1-object$surv object$se.cuminc <- object$se.surv cuminc.upper <- 1-object$lower cuminc.lower <- 1-object$upper object$lower <- cuminc.lower object$upper <- cuminc.upper stats <- list(c("cuminc",0),c("se.cuminc",0)) if (!is.null(object$conf.int)) stats <- c(stats,list(c("lower",0),c("upper",1))) } } if (model=="competing.risks"){ stats <- list(c("cuminc",0),c("se.cuminc",0)) if (!is.null(object$conf.int)) stats <- c(stats,list(c("lower",0),c("upper",0))) if (!missing(cause)){ cause <- checkCauses(cause=cause,object=object) } else{ ## show all causes cause <- attr(object$model.response,"states") } ltab <- lifeTab(object=object, times=times, cause=cause, newdata=X, stats=stats, intervals=intervals, percent=percent, showTime=showTime) Found <- match(cause,names(ltab),nomatch=0) if (all(Found)>0) { ltab <- ltab[Found] } else stop(paste("\nCannot find cause: ",cause,".\nFitted were causes: ",paste(names(ltab),collapse=", "),sep="")) }else{ ltab <- lifeTab(object=object, times=times, newdata=X, stats=stats, intervals=intervals, percent=percent, showTime=showTime) } } # }}} # {{{ output if (asMatrix!=FALSE) asMatrix <- TRUE if (model=="competing.risks"){ ## out <- list(table=ltab,cause=cause) if (asMatrix) if (cotype>1) ltab <- List2Matrix(ltab,depth=2,names=c("Event","X")) else ltab <- List2Matrix(ltab,depth=1,names=c("Event")) }else{ if(cotype>1 && asMatrix) ltab <- List2Matrix(ltab,depth=1,names="X") } out <- list(table=ltab,model=model,cotype=cotype,asMatrix=asMatrix,percent=percent) if (model=="competing.risks"){ out <- c(out,list(cause=cause)) } class(out) <- "summary.prodlim" out # }}} } prodlim/R/PercentAxis.R0000755000175100001440000000111613035633434014515 0ustar hornikusers#' Percentage-labeled axis. #' #' Use percentages instead of decimals to label the an axis with a probability #' scale . #' #' #' @param x Side of the axis #' @param at Positions (decimals) at which to label the axis. #' @param \dots Given to \code{axis}. #' @author Thomas Alexander Gerds #' @seealso \code{\link{plot.prodlim}} #' @keywords survival #' @examples #' #' plot(0,0,xlim=c(0,1),ylim=c(0,1),axes=FALSE) #' PercentAxis(1,at=seq(0,1,.25)) #' PercentAxis(2,at=seq(0,1,.25)) #' #' @export PercentAxis <- function(x,at,...){ axis(x,at=at,labels=paste(100*at,"%"),...) } prodlim/R/sindex.R0000755000175100001440000000564413057247770013604 0ustar hornikusers#' Index for evaluation of step functions. #' #' Returns an index of positions. Intended for evaluating a step function at #' selected times. The function counts how many elements of a vector, e.g. the #' jump times of the step function, are smaller or equal to the elements in a #' second vector, e.g. the times where the step function should be evaluated. #' #' If all \code{jump.times} are greater than a particular \code{eval.time} the #' sindex returns \code{0}. This must be considered when sindex is used for #' subsetting, see the Examples below. #' #' @param jump.times Numeric vector: e.g. the unique jump times of a step #' function. #' @param eval.times Numeric vector: e.g. the times where the step function #' should be evaluated #' @param strict If TRUE make the comparison of jump times and eval times #' strict #' @param comp If "greater" count the number of jump times that are greater #' (greater or equal when strict==FALSE) than the eval times #' @return Index of the same length as \code{eval.times} containing the numbers #' of the \code{jump.times} that are smaller than or equal to #' \code{eval.times}. #' @author Thomas A. Gerds \email{tag@@biostat.ku.dk} #' @keywords misc #' @examples #' #' #' test <- list(time = c(1, 1,5,5,2,7,9), #' status = c(1,0,1,0,1,1,0)) #' fit <- prodlim(Hist(time,status)~1,data=test) #' jtimes <- fit$time #' etimes <- c(0,.5,2,8,10) #' fit$surv #' c(1,fit$surv)[1+sindex(jtimes,etimes)] #' #' @export "sindex" <- function(jump.times,eval.times,comp="smaller",strict=FALSE) { stopifnot(is.numeric(jump.times)) stopifnot(is.numeric(eval.times)) N <- length(jump.times) if (comp=="greater"){ N-sindex(jump.times=jump.times, eval.times=eval.times, comp="smaller", strict=!strict) } else{ neval <- length(eval.times) if (!(neval> 0 && N >0)) stop("missing data") new.order <- order(eval.times) ind <- .C("sindexSRC",index = integer(neval),as.double(sort(jump.times)),as.double(eval.times[new.order]),as.integer(N),as.integer(neval),as.integer(strict),PACKAGE="prodlim")$index ind[order(new.order)] } } ## sindexStrata <- function(jump.times, ## first, ## size, ## eval.times, ## strict=FALSE) { ## stopifnot(is.numeric(jump.times)) ## stopifnot(is.numeric(eval.times)) ## NK <- length(size) ## stopifnot(length(first)==NK) ## N <- length(jump.times) ## neval <- length(eval.times) ## if (!(neval> 0 && N >0)) stop("missing data") ## new.order <- order(eval.times) ## ind <- .C("sindexStrata", ## index = integer(neval), ## as.double(sort(jump.times)), ## as.double(eval.times[new.order]), ## as.integer(N), ## as.integer(neval), ## as.integer(strict), ## DUP=FALSE, ## PACKAGE="prodlim")$index ## ind[order(new.order)] ## } prodlim/R/neighbors.R0000755000175100001440000000024713035633434014254 0ustar hornikusersneighbors <- function(x,y,...){ nbh=neighborhood(x,...) levs=rep(1:nbh$nu,nbh$size.nbh) nbh.list <- split(y[nbh$neighbors],levs) list(nbh=nbh,list=nbh.list) } prodlim/R/redist.R0000644000175100001440000000673113035633434013567 0ustar hornikusers### redist.R --- #---------------------------------------------------------------------- ## author: Thomas Alexander Gerds ## created: Nov 28 2015 (10:30) ## Version: ## last-updated: Nov 28 2015 (10:35) ## By: Thomas Alexander Gerds ## Update #: 2 #---------------------------------------------------------------------- ## ### Commentary: ## ### Change Log: #---------------------------------------------------------------------- ## ### Code: ##' Calculation of Efron's re-distribution to the right algorithm to obtain the ##' Kaplan-Meier estimate. ##' #' @param time A numeric vector of event times. #' @param status The event status vector takes the value \code{1} for observed events and #' the value \code{0} for right censored times. ##' @return Calculations needed to ##' @seealso prodlim ##' @examples ##' redist(time=c(.35,0.4,.51,.51,.7,.73),status=c(0,1,1,0,0,1)) ##' @export ##' @author Thomas A. Gerds redist <- function(time,status){ library(prodlim) cat("\nKaplan-Meier estimate via re-distribution to the right algorithm:\n") order <- order(time,-status) time <- time[order] status <- status[order] N <- length(time) mass <- as.list(rep(1/N,N)) fractions <- as.list(rep(paste("1/",N,sep=""),N)) names(mass) <- paste("subject",1:N) for (i in 1:N) names(mass[[i]]) <- "own" for (i in 1:N) names(fractions[[i]]) <- "own contribution" surv <- 1 for (i in 1:N) { cat("\nSubject ",i,":\n---------------------------\nSurvival before = ",round(surv*100,2),"%\n",sep="") if (status[i]==0){ if (i==N){ cat("Last subject lost to follow-up event free at time = ",time[i],"\n",sep="") } else{ cat("No event until time = ",time[i],"\nRe-distribute mass ",signif(sum(mass[[i]]),2)," to remaining ",N-i,ifelse(N-i==1," subject"," subjects"),"\n",sep="") for (j in ((i+1):N)){ mass[[j]] <- c(mass[[j]],mass[[i]]/(N-i)) fractions[[j]] <- c(fractions[[j]],paste(fractions[[i]],"*1/",(N-i),sep="")) names(fractions[[j]])[length(fractions[[j]])-length(mass[[i]])+1] <- paste("from subject ",i,sep="") names(mass[[j]])[length(mass[[j]])] <- paste("from subject ",i,sep="") } } cat("Survival after = ",round(surv*100,2),"%\n",sep="") } else{ cat("Event at time = ",time[i],"\nContribution to Kaplan-Meier estimate:\n\n",sep="") contr <- rbind(fractions[[i]],format(mass[[i]],digits=4,nsmall=4)) rownames(contr) <- c("fractions","decimal") contr <- rbind(t(contr),c("sum",format(sum(mass[[i]]),digits=4,nsmall=4))) print(contr,quote=FALSE) surv.before <- surv surv <- surv-sum(mass[[i]]) cat("\nSurvival after = ",round(100*surv.before,2),"% - (",paste(fractions[[i]],collapse=" + ") ,")", "\n = ",round(100*surv.before,2),"% - ",round(100*sum(mass[[i]]),2) ,"% = ",round(surv*100,2),"%\n",sep="") } } table <- summary(f <- prodlim(Hist(time,status)~1,data=data.frame(time,status)),times=c(0,time),percent=TRUE) cat("\nSummary table:\n\n") tab <- table$table[,c("time","n.risk","n.event","n.lost","surv")] print(tab) out <- list(fit=f,table=tab) invisible(out) } #---------------------------------------------------------------------- ### redist.R ends here prodlim/R/SmartControl.R0000755000175100001440000001355313035633434014727 0ustar hornikusers# {{{ SmartControl #' Function to facilitate the control of arguments passed to subroutines. #' #' Many R functions need to pass several arguments to several different #' subroutines. Such arguments can are given as part of the three magic dots #' "...". The function SmartControl reads the dots together with a list of #' default values and returns for each subroutine a list of arguments. #' #' #' @param call A list of named arguments, as for example can be obtained via #' \code{list(...)}. #' @param keys A vector of names of subroutines. #' @param ignore A list of names which are removed from the argument #' \code{call} before processing. #' @param defaults A named list of default argument lists for the subroutines. #' @param forced A named list of forced arguments for the subroutines. #' @param split Regular expression used for splitting keys from arguments. #' Default is \code{"\."}. #' @param ignore.case If \code{TRUE} then all matching and splitting is not #' case sensitive. #' @param replaceDefaults If \code{TRUE} default arguments are replaced by #' given arguments. Can also be a named list with entries for each subroutine. #' @param verbose If \code{TRUE} warning messages are given for arguments in #' \code{call} that are not ignored via argument \code{ignore} and that do not #' match any \code{key}. #' @author Thomas Alexander Gerds #' @seealso \code{\link{plot.prodlim}} #' @keywords Graphics #' @examples #' #' #' myPlot = function(...){ #' ## set defaults #' plot.DefaultArgs=list(x=0,y=0,type="n") #' lines.DefaultArgs=list(x=1:10,lwd=3) #' ## apply smartcontrol #' x=SmartControl(call=list(...), #' defaults=list("plot"=plot.DefaultArgs, "lines"=lines.DefaultArgs), #' ignore.case=TRUE,keys=c("plot","axis2","lines"), #' forced=list("plot"=list(axes=FALSE),"axis2"=list(side=2))) #' ## call subroutines #' do.call("plot",x$plot) #' do.call("lines",x$lines) #' do.call("axis",x$axis2) #' } #' myPlot(plot.ylim=c(0,5),plot.xlim=c(0,20),lines.lty=3,axis2.At=c(0,3,4)) #' #' @export SmartControl <- function(call, keys, ignore, defaults, forced, split, ignore.case=TRUE, replaceDefaults, verbose=TRUE) # }}} { if (missing(split)) split <- "\\." # {{{ set up argument list SmartArgs <- as.list(call) SmartArgs <- SmartArgs[names(SmartArgs)!=""] if (ignore.case==TRUE){ names(SmartArgs) <- tolower(names(SmartArgs)) } # }}} # {{{remove ignorable arguments if (!missing(ignore) && is.character(ignore)){ if (ignore.case==TRUE){ ignore <- tolower(ignore) } SmartArgs <- SmartArgs[match(names(SmartArgs), ignore, nomatch=0)==0] } if (verbose==TRUE){ allKeysRegexp <- paste("^",keys,split,sep="",collapse="|") notIgnored <- grep(allKeysRegexp,names(SmartArgs),value=FALSE,ignore.case=TRUE) Ignored <- names(SmartArgs)[-notIgnored] SmartArgs <- SmartArgs[notIgnored] if (length(Ignored)>0){ paste(Ignored,collapse=", ") warning(paste("The following argument(s) are not smart and therefore ignored: ",paste(Ignored,collapse=", "))) } } # }}} # {{{ default arguments DefaultArgs <- vector(mode="list",length=length(keys)) names(DefaultArgs) <- keys if (!missing(defaults)){ whereDefault <- match(names(defaults),names(DefaultArgs),nomatch=0) if (all(whereDefault)) DefaultArgs[whereDefault] <- defaults else stop("Could not find the following default arguments: ",paste(names(defaults[0==whereDefault]),",")) } if (!missing(replaceDefaults)){ if (length(replaceDefaults)==1){ replaceDefaults <- rep(replaceDefaults,length(keys)) names(replaceDefaults) <- keys } else { stopifnot(length(replaceDefaults)==length(keys)) stopifnot(all(match(names(replaceDefaults),keys))) replaceDefaults <- replaceDefaults[keys] } } else{ replaceDefaults <- rep(FALSE,length(keys)) names(replaceDefaults) <- keys } # }}} # {{{ forced arguments keyForced <- vector(mode="list",length=length(keys)) names(keyForced) <- keys if (!missing(forced)){ whereDefault <- match(names(forced),names(keyForced),nomatch=0) if (all(whereDefault)) keyForced[whereDefault] <- forced else stop("Not all forced arguments found.") } # }}} # {{{ loop over keys keyArgList <- lapply(keys,function(k){ keyRegexp <- paste("^",k,split,sep="") foundArgs <- grep(keyRegexp,names(SmartArgs),value=TRUE,ignore.case=TRUE) if (length(foundArgs)>0){ keyArgs <- SmartArgs[foundArgs] if (ignore.case) argNames <- sapply(strsplit(tolower(names(keyArgs)),tolower(keyRegexp)),function(x)x[[2]]) else argNames <- sapply(strsplit(names(keyArgs),keyRegexp),function(x)x[[2]]) keyArgs <- lapply(keyArgs,function(x){ ## expressions for arrow labels in plot.Hist ## cannot be evaluated at this point ## if the expression is communicated ## more than one level higher maybeFail <- try(e <- eval(x),silent=TRUE) if (class(maybeFail)=="try-error") x else eval(x) }) names(keyArgs) <- argNames } else{ keyArgs <- NULL } # }}} # {{{ prepending the forced arguments----------------- if (length(keyForced[[k]])>0){ keyArgs <- c(keyForced[[k]],keyArgs) } # }}} # {{{ appending default arguments if (length(DefaultArgs[[k]])>0 && replaceDefaults[k]==FALSE){ keyArgs <- c(keyArgs,DefaultArgs[[k]]) } # }}} # {{{ removing duplicates if (!is.null(names(keyArgs))){ keyArgs[!duplicated(names(keyArgs))] } }) names(keyArgList) <- keys keyArgList # }}} } prodlim/R/extract.name.from.special.R0000755000175100001440000000026513035633434017246 0ustar hornikusersextract.name.from.special <- function(x,pattern="[()]"){ if (length(x)==1) rev(unlist(strsplit(x,pattern)))[1] else as.character(sapply(x,extract.name.from.special)) } prodlim/R/prodlimMulti.R0000755000175100001440000000536013035633434014756 0ustar hornikusersprodlimMulti <- function(response,size.strata,N,NU,cotype,force.multistate){ ## original function by Matthias `Wang' Wangler is.event <- response[,"status"]!=0 if (force.multistate==TRUE){ to <- response[,"status"] from <- rep(0,length(to)) } else{ to <- response[,"event"] from <- response[,"from"] } state.names <- unique(c(from, to[response[,"status"]!=0])) ns <- length(state.names) cens <- FALSE if(length(to[is.event])>0) cens <- TRUE from <- as.integer(factor(from,levels=state.names)) - 1 from <- as.numeric(from) to[is.event] <- as.integer(factor(to[is.event], levels=state.names)) - 1 to[!is.event] <- ns to <- as.numeric(to) states <- sort(unique(c(from, to[is.event]))) ## possible transitions tra <- unique(cbind(from[is.event], to[is.event])) sorted <- order(tra[,1],tra[,2]) tra <- matrix(tra[sorted,], ncol=2) tra <- cbind(0:(length(tra[,1])-1),tra) colnames(tra) <- c("row","from", "to") ntra <- nrow(tra) trow <- match(paste(from,to), paste(tra[,"from"],tra[,"to"]), nomatch=0) - 1 cens.in <- sort(unique(from[!is.event])) nci <- length(cens.in) cpos <- match(paste(from,to), paste(cens.in, ns), nomatch = 0) - 1 ## start distribution (all are starting in state 0 !!!) if( cotype > 1 ) { # nr.start <- table(from,co$covariates$strata[,1])[1,] nr.start <- size.strata ## WANG??? } else{nr.start <- length(from[from==0])} fit <- .C("prodlim_multistates", as.integer(N), as.integer(ns), as.integer(length(is.event)), as.integer(size.strata), as.integer(ntra), as.integer(tra[,"from"]), as.integer(tra[,"to"]), as.integer(trow), as.integer(nci), as.integer(cens.in), as.integer(cpos), as.double(response[,"time"]), as.integer(response[,"status"]), as.integer(nr.start), time=double(N), hazard=double(N*ns*ns), prob=double(N*ns*ns), nevent=integer(N*ns*ns), ncens=integer(N*ns), nrisk=integer(N*ns), first.strata=integer(NU), ntimes.strata=integer(NU), PACKAGE="prodlim") tra[,"from"] <- state.names[tra[,"from"]+1] tra[,"to"] <- state.names[tra[,"to"]+1] cens.in <- state.names[cens.in+1] NT <- sum(fit$ntimes.strata) res <- list("time"=fit$time[1:NT],"hazard"=fit$hazard[1:(NT*ns*ns)],"prob"=fit$prob[1:(NT*ns*ns)],"nevent"=fit$nevent[1:(NT*ns*ns)],"ncens"=fit$ncens[1:(NT*ns)],"nrisk"=nrisk <- fit$nrisk[1:(NT*ns)],"first.strata"=fit$first.strata,"size.strata"=fit$ntimes.strata,"uniquetrans"=tra,"cens.in"=cens.in,"states"=states,"state.names"=state.names,"model"="multi.states") res } prodlim/R/meanNeighbors.R0000755000175100001440000000124013035633434015047 0ustar hornikusers#' Helper function to obtain running means for prodlim objects. #' #' Compute average values of a variable according to neighborhoods. #' #' #' @param x Object of class \code{"neighborhood"}. #' @param y Vector of numeric values. #' @param \dots Not used. #' @author Thomas Alexander Gerds #' @seealso \code{\link{neighborhood}} #' @keywords survival #' @examples #' #' meanNeighbors(x=1:10,y=c(1,10,100,1000,1001,1001,1001,1002,1002,1002)) #' #' @export meanNeighbors <- function(x,y,...){ nnn=neighbors(x,y,...) out <- data.frame(x=nnn$nbh$values, y=sapply(nnn$list,mean)) names(out) <- c("uniqueX","averageY") out } prodlim/R/print.neighborhood.R0000755000175100001440000000110613035633434016071 0ustar hornikusers##' @export "print.neighborhood" <- function(x,...){ n <- x$n size <- x$size.nbh bw <- lapply(x$bandwidth,function(bw)round(bw,3)) cat("Nearest neighborhoods for kernel smoothing\n\n") print(c(bandwidth=as.numeric(bw),kernel=x$kernel,n.obs=x$n,n.values=x$nu),quote=FALSE) cat("\n") print(c("Number of nbh's" = length(size), "Average size"=round(mean(size)), "Min size"=round(min(size)), "Max size"=round(max(size)))) # if (print.it) print(data.frame(Nbh=x$values,First=x$first.nbh,Size=size)) invisible(x) } prodlim/R/neighborhood.R0000755000175100001440000000612613057247641014752 0ustar hornikusers#' Nearest neighborhoods for kernel smoothing #' #' Nearest neighborhoods for the values of a continuous predictor. The result #' is used for the conditional Kaplan-Meier estimator and other conditional #' product limit estimators. #' #' #' @param x Numeric vector -- typically the observations of a continuous random #' variate. #' @param bandwidth Controls the distance between neighbors in a neighborhood. #' It can be a decimal, i.e.\ the bandwidth, or the string `"smooth"', in which #' case \code{N^{-1/4}} is used, \code{N} being the sample size, or \code{NULL} #' in which case the \code{\link{dpik}} function of the package KernSmooth is #' used to find the optimal bandwidth. #' @param kernel Only the rectangular kernel ("box") is implemented. #' @return An object of class 'neighborhood'. The value is a list that #' includes the unique values of `x' (\code{values}) for which a neighborhood, #' consisting of the nearest neighbors, is defined by the first neighbor #' (\code{first.nbh}) of the usually very long vector \code{neighbors} and the #' size of the neighborhood (\code{size.nbh}). #' #' Further values are the arguments \code{bandwidth}, \code{kernel}, the total #' sample size \code{n} and the number of unique values \code{nu}. #' @author Thomas Gerds #' @seealso \code{\link{dpik}}, \code{\link{prodlim}} #' @references Stute, W. "Asymptotic Normality of Nearest Neighbor Regression #' Function Estimates", \emph{The Annals of Statistics}, 1984,12,917--926. #' @keywords smooth #' @examples #' #' d <- SimSurv(20) #' neighborhood(d$X2) #' @export "neighborhood" <- function(x,bandwidth=NULL,kernel="box"){ if (any(is.na(x))) stop("Missing values in x") N <- length(x) if (N<2) stop("Not enough observations for kernel smoothing.") orderx <- order(x) values <- sort(unique(x)) NU <- length(values) workx <- factor(x,labels=1:NU) tabu <- tabulate(workx) cumtabu <- cumsum(tabu) cumtabx <- rep(cumtabu,tabu) tabx <- rep(tabu,tabu) if (!length(bandwidth)){ ## need a bandwidth (dpik is from KernSmooth) ## require(KernSmooth) bandwidth <- KernSmooth::dpik(cumtabx/N,kernel="box") } else if (bandwidth=="smooth") bandwidth <- N^{-1/4} radius <- floor(bandwidth*N) nbh <- .C("neighborhoodSRC", first=integer(NU), size=integer(NU), as.integer(cumtabu), as.integer(cumtabx), as.integer(tabx), as.integer(radius), as.integer(NU), as.integer(N), PACKAGE="prodlim") nall <- sum(nbh$size) nbors <- .C("neighborsSRC", first=nbh$first, size=nbh$size, as.integer(orderx), neighbors=integer(nall), as.integer(NU), PACKAGE="prodlim")$neighbors out <- list(values=values, first.nbh=nbh$first, size.nbh=nbh$size, neighbors=nbors, bandwidth=bandwidth, kernel=kernel, nu=NU, n=N) class(out) <- "neighborhood" out } prodlim/R/plotIllnessDeathModel.R0000755000175100001440000000356413035633434016540 0ustar hornikusers#' Plotting an illness-death-model. #' #' Plotting an illness-death-model using \code{plot.Hist}. #' #' #' @param stateLabels Labels for the three boxes. #' @param style Either \code{1} or anything else, switches the orientation of #' the graph. Hard to explain in words, see examples. #' @param recovery Logical. If \code{TRUE} there will be an arrow from the #' illness state to the initial state. #' @param \dots Arguments passed to plot.Hist. #' @author Thomas Alexander Gerds #' @seealso \code{\link{plotCompetingRiskModel}}, \code{\link{plot.Hist}} #' @keywords survival ##' @examples ##' ##' plotIllnessDeathModel() ##' plotIllnessDeathModel(style=2) ##' plotIllnessDeathModel(style=2, ##' stateLabels=c("a","b\nc","d"), ##' box1.col="yellow", ##' box2.col="green", ##' box3.col="red") #' @export plotIllnessDeathModel <- function(stateLabels, style=1, recovery=FALSE, ...){ if (missing(stateLabels)) labels <- c("Disease\nfree","Illness","Death") if (recovery==TRUE){ idHist <- Hist(time=1:4,event=list(from=c(1,1,2,2),to=c(2,3,1,3))) if (style==1) plot(idHist, stateLabels=stateLabels, box1.row=2, box1.column=1, box2.row=1, box2.column=3, ...) else{ plot(idHist, stateLabels=stateLabels, ...) } } else{ idHist <- Hist(time=1:3,event=list(from=c(1,1,2),to=c(2,3,3))) if (style==1){ plot(idHist, stateLabels=stateLabels, box1.row=2, box1.column=1, box2.row=1, box2.column=3, ...) } else{ plot(idHist, stateLabels=stateLabels, ...) } } } prodlim/R/plot.prodlim.R0000755000175100001440000007426713043403443014726 0ustar hornikusers# {{{ Header #' Plotting event probabilities over time #' #' Function to plot survival and cumulative incidence curves against time. #' #' From version 1.1.3 on the arguments legend.args, atrisk.args, confint.args #' are obsolete and only available for backward compatibility. Instead #' arguments for the invoked functions \code{atRisk}, \code{legend}, #' \code{confInt}, \code{markTime}, \code{axis} are simply specified as #' \code{atrisk.cex=2}. The specification is not case sensitive, thus #' \code{atRisk.cex=2} or \code{atRISK.cex=2} will have the same effect. The #' function \code{axis} is called twice, and arguments of the form #' \code{axis1.labels}, \code{axis1.at} are used for the time axis whereas #' \code{axis2.pos}, \code{axis1.labels}, etc. are used for the y-axis. #' #' These arguments are processed via \code{\dots{}} of \code{plot.prodlim} and #' inside by using the function \code{SmartControl}. Documentation of these #' arguments can be found in the help pages of the corresponding functions. #' #' @aliases plot.prodlim lines.prodlim #' @param x an object of class `prodlim' as returned by the #' \code{prodlim} function. #' @param type Either \code{"surv"} or \code{"cuminc"} controls what # part of the object is plotted. Defaults to \code{object$type}. #' @param cause determines the cause of the cumulative incidence #' function. Currently one cause is allowed at a time, but you may #' call the function again with add=TRUE to add the lines of the other #' causes. #' @param select Select which lines to plot. This can be used when #' there are many strata or many competing risks to select a #' subset of the lines. However, a more clean way to select #' covariate stratat is to use argument \code{newdata}. Another #' application is when there are many competing risks and it is #' desired (for the stacked plot) to stack and show only a subset #' of the cumulative incidence functions. #' @param newdata a data frame containing covariate strata for which #' to show curves. When omitted element \code{X} of object #' \code{x} is used. #' @param add if \code{TRUE} curves are added to an existing plot. #' @param col color for curves. Default is \code{1:number(curves)} #' @param lty line type for curves. Default is 1. #' @param lwd line width for all curves. Default is 3. #' @param ylim limits of the y-axis #' @param xlim limits of the x-axis #' @param ylab label for the y-axis #' @param xlab label for the x-axis #' @param timeconverter The strings are allowed: #' "days2years" (conversion factor: 1/365.25) #' "months2years" (conversion factor: 1/12) #' "days2months" (conversion factor 1/30.4368499) #' "years2days" (conversion factor 365.25) #' "years2months" (conversion factor 12) #' "months2days" (conversion factor 30.4368499) #' @param legend if TRUE a legend is plotted by calling the function #' legend. Optional arguments of the function \code{legend} can #' be given in the form \code{legend.x=val} where x is the name of #' the argument and val the desired value. See also Details. #' @param logrank If TRUE, the logrank p-value will be extracted from #' a call to \code{survdiff} and added to the legend. This works #' only for survival models, i.e. Kaplan-Meier with discrete #' predictors. #' @param marktime if TRUE the curves are tick-marked at right #' censoring times by invoking the function #' \code{markTime}. Optional arguments of the function #' \code{markTime} can be given in the form \code{confint.x=val} #' as with legend. See also Details. #' @param confint if TRUE pointwise confidence intervals are plotted #' by invoking the function \code{confInt}. Optional arguments of #' the function \code{confInt} can be given in the form #' \code{confint.x=val} as with legend. See also Details. #' @param automar If TRUE the function trys to find suitable values #' for the figure margins around the main plotting region. #' @param atrisk if TRUE display numbers of subjects at risk by #' invoking the function \code{atRisk}. Optional arguments of the #' function \code{atRisk} can be given in the form #' \code{atrisk.x=val} as with legend. See also Details. #' @param timeOrigin Start of the time axis #' @param axes If true axes are drawn. See details. #' @param background If \code{TRUE} the background color and grid #' color can be controlled using smart arguments SmartControl, #' such as background.bg="yellow" or #' background.bg=c("gray66","gray88"). The following defaults are #' passed to \code{background} by \code{plot.prodlim}: #' horizontal=seq(0,1,.25), vertical=NULL, bg="gray77", #' fg="white". See \code{background} for all arguments, and the #' examples below. #' @param percent If true the y-axis is labeled in percent. #' @param minAtrisk Integer. Show the curve only until the number #' at-risk is at least \code{minAtrisk} #' @param limit When newdata is not specified and the number of lines #' in element \code{X} of object \code{x} exceeds limits, only the #' results for covariate constellations of the first, the middle #' and the last row in \code{X} are shown. Otherwise all lines of #' \code{X} are shown. #' @param ... Parameters that are filtered by #' \code{\link{SmartControl}} and then passed to the functions #' \code{\link{plot}}, \code{\link{legend}}, \code{\link{axis}}, #' \code{\link{atRisk}}, \code{\link{confInt}}, #' \code{\link{markTime}}, \code{\link{backGround}} #' @return The (invisible) object. #' @note Similar functionality is provided by the function #' \code{\link{plot.survfit}} of the survival library #' @author Thomas Alexander Gerds #' @seealso \code{\link{plot}}, \code{\link{legend}}, #' \code{\link{axis}}, #' \code{\link{prodlim}},\code{\link{plot.Hist}},\code{\link{summary.prodlim}}, #' \code{\link{neighborhood}}, \code{\link{atRisk}}, #' \code{\link{confInt}}, \code{\link{markTime}}, #' \code{\link{backGround}} #' @keywords survival ##' @examples ##' ## simulate right censored data from a two state model ##' set.seed(100) ##' dat <- SimSurv(100) ##' # with(dat,plot(Hist(time,status))) ##' ##' ### marginal Kaplan-Meier estimator ##' kmfit <- prodlim(Hist(time, status) ~ 1, data = dat) ##' plot(kmfit) ##' ##' plot(kmfit,timeconverter="years2months") ##' ##' # change time range ##' plot(kmfit,xlim=c(0,4)) ##' ##' # change scale of y-axis ##' plot(kmfit,percent=FALSE) ##' ##' # mortality instead of survival ##' plot(kmfit,type="cuminc") ##' ##' # change axis label and position of ticks ##' plot(kmfit, ##' xlim=c(0,10), ##' axis1.at=seq(0,10,1), ##' axis1.labels=0:10, ##' xlab="Years", ##' axis2.las=2, ##' atrisk.at=seq(0,10,2.5), ##' atrisk.title="") ##' ##' # change background color ##' plot(kmfit, ##' xlim=c(0,10), ##' confint.citype="shadow", ##' col=1, ##' axis1.at=0:10, ##' axis1.labels=0:10, ##' xlab="Years", ##' axis2.las=2, ##' atrisk.at=seq(0,10,2.5), ##' atrisk.title="", ##' background=TRUE, ##' background.fg="white", ##' background.horizontal=seq(0,1,.25/2), ##' background.vertical=seq(0,10,2.5), ##' background.bg=c("gray88")) ##' ##' # change type of confidence limits ##' plot(kmfit, ##' xlim=c(0,10), ##' confint.citype="dots", ##' col=4, ##' background=TRUE, ##' background.bg=c("white","gray88"), ##' background.fg="gray77", ##' background.horizontal=seq(0,1,.25/2), ##' background.vertical=seq(0,10,2)) ##' ##' ##' ### Kaplan-Meier in discrete strata ##' kmfitX <- prodlim(Hist(time, status) ~ X1, data = dat) ##' plot(kmfitX) ##' # move legend ##' plot(kmfitX,legend.x="bottomleft",atRisk.cex=1.3, ##' atrisk.title="No. subjects") ##' ##' ## Control the order of strata ##' ## since version 1.5.1 prodlim does obey the order of ##' ## factor levels ##' dat$group <- factor(cut(dat$X2,c(-Inf,0,0.5,Inf)), ##' labels=c("High","Intermediate","Low")) ##' kmfitG <- prodlim(Hist(time, status) ~ group, data = dat) ##' plot(kmfitG) ##' ##' ## relevel ##' dat$group2 <- factor(cut(dat$X2,c(-Inf,0,0.5,Inf)), ##' levels=c("(0.5, Inf]","(0,0.5]","(-Inf,0]"), ##' labels=c("Low","Intermediate","High")) ##' kmfitG2 <- prodlim(Hist(time, status) ~ group2, data = dat) ##' plot(kmfitG2) ##' ##' # add log-rank test to legend ##' plot(kmfitX, ##' atRisk.cex=1.3, ##' logrank=TRUE, ##' legend.x="topright", ##' atrisk.title="at-risk") ##' ##' # change atrisk labels ##' plot(kmfitX, ##' legend.x="bottomleft", ##' atrisk.title="Patients", ##' atrisk.cex=0.9, ##' atrisk.labels=c("X1=0","X1=1")) ##' ##' # multiple categorical factors ##' ##' kmfitXG <- prodlim(Hist(time,status)~X1+group2,data=dat) ##' plot(kmfitXG,select=1:2) ##' ##' ### Kaplan-Meier in continuous strata ##' kmfitX2 <- prodlim(Hist(time, status) ~ X2, data = dat) ##' plot(kmfitX2,xlim=c(0,10)) ##' ##' # specify values of X2 for which to show the curves ##' plot(kmfitX2,xlim=c(0,10),newdata=data.frame(X2=c(-1.8,0,1.2))) ##' ##' ### Cluster-correlated data ##' library(survival) ##' cdat <- cbind(SimSurv(20),patnr=sample(1:5,size=20,replace=TRUE)) ##' kmfitC <- prodlim(Hist(time, status) ~ cluster(patnr), data = cdat) ##' plot(kmfitC) ##' plot(kmfitC,atrisk.labels=c("Units","Patients")) ##' ##' kmfitC2 <- prodlim(Hist(time, status) ~ X1+cluster(patnr), data = cdat) ##' plot(kmfitC2) ##' plot(kmfitC2,atrisk.labels=c("Teeth","Patients","Teeth","Patients"), ##' atrisk.col=c(1,1,2,2)) ##' ##' ##' ### Cluster-correlated data with strata ##' n = 50 ##' foo = runif(n) ##' bar = rexp(n) ##' baz = rexp(n,1/2) ##' d = stack(data.frame(foo,bar,baz)) ##' d$cl = sample(10, 3*n, replace=TRUE) ##' fit = prodlim(Surv(values) ~ ind + cluster(cl), data=d) ##' plot(fit) ##' ##' ##' ## simulate right censored data from a competing risk model ##' datCR <- SimCompRisk(100) ##' with(datCR,plot(Hist(time,event))) ##' ##' ### marginal Aalen-Johansen estimator ##' ajfit <- prodlim(Hist(time, event) ~ 1, data = datCR) ##' plot(ajfit) # same as plot(ajfit,cause=1) ##' ##' # cause 2 ##' plot(ajfit,cause=2) ##' ##' # both in one ##' plot(ajfit,cause=1) ##' plot(ajfit,cause=2,add=TRUE,col=2) ##' ##' ### stacked plot ##' ##' plot(ajfit,cause="stacked",select=2) ##' ##' ### stratified Aalen-Johansen estimator ##' ajfitX1 <- prodlim(Hist(time, event) ~ X1, data = datCR) ##' plot(ajfitX1) ##' ##' ## add total number at-risk to a stratified curve ##' ttt = 1:10 ##' plot(ajfitX1,atrisk.at=ttt,col=2:3) ##' plot(ajfit,add=TRUE,col=1) ##' atRisk(ajfit,newdata=datCR,col=1,times=ttt,line=3,labels="Total") ##' ##' ##' ## stratified Aalen-Johansen estimator in nearest neighborhoods ##' ## of a continuous variable ##' ajfitX <- prodlim(Hist(time, event) ~ X1+X2, data = datCR) ##' plot(ajfitX,newdata=data.frame(X1=c(1,1,0),X2=c(4,10,10))) ##' plot(ajfitX,newdata=data.frame(X1=c(1,1,0),X2=c(4,10,10)),cause=2) ##' ##' ## stacked plot ##' ##' plot(ajfitX, ##' newdata=data.frame(X1=0,X2=0.1), ##' cause="stacked", ##' legend.title="X1=0,X2=0.1", ##' legend.legend=paste("cause:",getStates(ajfitX$model.response)), ##' plot.main="Subject specific stacked plot") ##' #' @export plot.prodlim <- function(x, type, cause=1, select, newdata, add = FALSE, col, lty, lwd, ylim, xlim, ylab, xlab="Time", timeconverter, legend=TRUE, logrank=FALSE, marktime=FALSE, confint=TRUE, automar, atrisk=ifelse(add,FALSE,TRUE), timeOrigin=0, axes=TRUE, background=TRUE, percent=TRUE, minAtrisk=0, limit=10, ...){ # }}} # {{{ backward compatibility ## args=match.call(expand=TRUE) ## args[[1]]=list allArgs <- match.call() if (missing(type)){ type=allArgs[[match("what",names(allArgs))]] } # }}} # {{{ extracting a list of lines to draw cens.type <- x$cens.type # uncensored, right or interval censored if (cens.type=="intervalCensored") { confint <- FALSE atrisk <- FALSE } model <- x$model # survival, competing risks or multi-state clusterp <- !is.null(x$clustervar) if (missing(type)||is.null(type)){ type <- x$type ## type <- switch(model,"survival"="surv","competing.risks"="cuminc","multi.states"="hazard") ## if (!is.null(x$reverse) && x$reverse==TRUE && model=="survival") type <- "cuminc" } else type <- match.arg(type,c("surv","cuminc","hazard")) if (model=="competing.risks" && type=="surv") stop("To plot the event-free survival curve, please fit a suitable model: prodlim(Hist(time,status!=0)~....") if (cens.type=="intervalCensored") plot.times <- sort(unique(x$time[2,])) else{ plot.times <- sort(unique(x$time)) if (plot.times[1]>timeOrigin) plot.times <- c(timeOrigin,plot.times) else plot.times <- c(timeOrigin,plot.times[plot.times>timeOrigin]) } if (length(x$clustervar)>0) nRisk <- x$n.risk[,1] else nRisk <- x$n.risk if (minAtrisk>0 && any(nRisk<=minAtrisk)){ if (all(nRisk<=minAtrisk)){ return(plot(0,0,type="n",xlim=c(min(plot.times), max(plot.times)),ylim=c(0, 1),axes=FALSE)) } criticalTime <- min(x$time[nRisk<=minAtrisk]) plot.times <- plot.times[plot.timesminAtrisk])<=1) } if (missing(newdata)) { newdata <- x$X if (NROW(newdata)>limit) newdata <- newdata[c(1,round(median(1:NROW(newdata))),NROW(newdata)),,drop=FALSE] } ## restrict plot.times to xlim if (!missing(xlim)){ if (xlim[1]>plot.times[1]) plot.times <- plot.times[plot.times>=xlim[1]] if (xlim[2]limit) ## newdata <- newdata[c(1,round(median(1:NROW(newdata))),NROW(newdata)),,drop=FALSE] ## browser() stacked <- cause[1]=="stacked" if (stacked){ confint <- FALSE if (model!="competing.risks") stop("Stacked plot works only for competing risks models.") if (NROW(newdata)>1) stop("Stacked plot works only for one covariate stratum.") }else{ if (length(cause)!=1){ warning("Currently only the cumulative incidence of a single cause can be plotted in one go. Use argument add=TRUE to add the lines of the other causes. For now I use the first cause") cause <- cause[1] } } ## Y <- predict(x,times=plot.times,newdata=newdata,level.chaos=1,type=type,cause=cause,mode="list") startValue=ifelse(type=="surv",1,0) if (type=="hazard" && model!="survival") stats=list(c("cause.hazard",0)) else stats=list(c(type,startValue)) if (model=="survival" && type=="cuminc") { startValue=1 stats=list(c("surv",startValue)) } if (confint==TRUE) stats=c(stats,list(c("lower",startValue),c("upper",startValue))) if (x$cens.type=="intervalCensored"){ stop("FIXME: There is no plot method implemented for intervalCensored data.") } if (model=="competing.risks"){ if (stacked) ## all causes cause <- attributes(x$model.response)$states else cause <- checkCauses(cause,x) sumX <- lifeTab(x, times=plot.times, cause=cause, newdata=newdata, stats=stats, percent=FALSE) } else{ sumX <- lifeTab(x, times=plot.times, newdata=newdata, stats=stats, percent=FALSE) } if (model=="competing.risks"){ if (stacked == FALSE){ sumX <- sumX[[cause]] } else { ## there is at most one stratum for each cause if (!is.null(newdata)) sumX <- lapply(sumX,function(cc)cc[[1]]) } } ## cover both no covariate and single newdata: if (!is.null(dim(sumX))) sumX <- list(sumX) if (model=="survival" && type=="cuminc"){ Y <- lapply(sumX,function(x)1-x[,"surv"]) names(Y) <- names(sumX) nlines <- length(Y) } else{ Y <- lapply(sumX,function(x)x[,type]) names(Y) <- names(sumX) if (!missing(select)){ if (length(select)==1) Y <- Y[select] else Y <- Y[select] } nlines <- length(Y) } # }}} # {{{ getting default arguments for plot, atrisk, axes, legend, confint, marktime if (missing(xlim)) xlim <- c(min(plot.times), max(plot.times)) if (!missing(timeconverter)){ units <- strsplit(tolower(as.character(substitute(timeconverter))),"[ \t]?(2|to)[ \t]?")[[1]] conversion <- switch(paste0(units,collapse="-"), "days-years"=1/365.25, "months-years"=1/12, "days-months"=1/30.4368499, "years-days"=365.25, "years-months"=12, "months-days"=30.4368499) one <- switch(units[[1]],"years"=1,"months"=12,"days"=365.25) xlab <- paste0("Time (", units[[2]],")") axis1.DefaultArgs <- list(at=seq(xlim[1],xlim[2],one),labels=seq(xlim[1],xlim[2],one)*conversion) atriskDefaultPosition <- seq(xlim[1],xlim[2],one) } else { if (missing(xlab)) xlab <- "Time" axis1.DefaultArgs <- list() atriskDefaultPosition <- seq(min(plot.times),max(plot.times),(max(plot.times)-min(plot.times))/10) } if (missing(ylab)) ylab <- switch(type, "surv"=ifelse(x$reverse==TRUE,"Censoring probability","Survival probability"), "cuminc"="Cumulative incidence", "hazard"="Cumulative hazard") if (missing(ylim)) ylim <- c(0, 1) if (missing(lwd)) lwd <- rep(3,nlines) if (missing(col)) col <- 1:nlines if (missing(lty)) lty <- rep(1, nlines) if (length(lwd) < nlines) lwd <- rep(lwd, nlines) if (length(lty) < nlines) lty <- rep(lty, nlines) if (length(col) < nlines) col <- rep(col, nlines) background.DefaultArgs <- list(xlim=xlim, ylim=ylim, horizontal=seq(ylim[1],ylim[2],diff(ylim)/4), vertical=NULL, bg="white", fg="gray88") axis2.DefaultArgs <- list(at=seq(ylim[1],ylim[2],ylim[2]/4),side=2) lines.DefaultArgs <- list(type="s") plot.DefaultArgs <- list(x=0,y=0,type = "n",ylim = ylim,xlim = xlim,xlab = xlab,ylab = ylab) marktime.DefaultArgs <- list(x=Y,nlost=lapply(sumX,function(x)x[,"n.lost"]),times=plot.times,pch="I",col=col) if (length(Y)==1 && length(x$clustervar)==0){ atriskDefaultLabels <- "Subjects: " atriskDefaultTitle <- "" } else{ if (length(x$clustervar)>0){ atriskDefaultTitle <- "" atriskDefaultLabels <- rep(paste(c("Subjects","Clusters"),": ",sep=""), nlines) } else{ ## print(names(Y)) if (model=="competing.risks" && stacked==TRUE){ atriskDefaultTitle <- "" atriskDefaultLabels <- "Subjects: " } else{ if ((length(grep("=",names(Y)))==length(names(Y)))){ atriskDefaultLabels <- paste(gsub("[ \t]*$","",sapply(strsplit(names(Y),"="),function(x)x[[2]])), ": ", sep="") atriskDefaultTitle <- unique(sapply(strsplit(names(Y),"="),function(x)x[[1]])) }else{ atriskDefaultTitle <- "" atriskDefaultLabels <- paste(gsub("[ \t]*$","",names(Y)),": ",sep="") } } } ## atriskDefaultLabels <- format(atriskDefaultLabels,justify="left") ## atriskDefaultTitle <- "" } atrisk.DefaultArgs <- list(x=x, newdata=newdata, interspace=1, dist=.3, col=col, labelcol=1, titlecol=1, title=atriskDefaultTitle, labels=atriskDefaultLabels, times=atriskDefaultPosition) if (!missing(select) && (!(model=="competing.risks" && stacked))){ atrisk.DefaultArgs$newdata <- atrisk.DefaultArgs$newdata[select,,drop=FALSE] } legend.DefaultArgs <- list(legend=names(Y), lwd=lwd, col=col, lty=lty, cex=1.5, bty="n", y.intersp=1.3, trimnames=!match("legend.legend",names(allArgs),nomatch=0), x="topright") if (stacked) { legend.DefaultArgs$title <- "Competing risks" legend.DefaultArgs$x <- "topleft" } if (NCOL(newdata)>1) legend.DefaultArgs$trimnames <- FALSE confint.DefaultArgs <- list(x=x, newdata=newdata, type=type, citype="shadow", times=plot.times, cause=cause, density=55, col=col[1:nlines], lwd=rep(2,nlines), lty=rep(3,nlines)) # }}} # {{{ backward compatibility if (match("legend.args",names(allArgs),nomatch=FALSE)){ legend.DefaultArgs <- c(args[[match("legend.args",names(allArgs),nomatch=FALSE)]],legend.DefaultArgs) legend.DefaultArgs <- legend.DefaultArgs[!duplicated(names(legend.DefaultArgs))] } if (match("confint.args",names(allArgs),nomatch=FALSE)){ confint.DefaultArgs <- c(args[[match("confint.args",names(allArgs),nomatch=FALSE)]],confint.DefaultArgs) confint.DefaultArgs <- confint.DefaultArgs[!duplicated(names(confint.DefaultArgs))] } if (match("atrisk.args",names(allArgs),nomatch=FALSE)){ atrisk.DefaultArgs <- c(args[[match("atrisk.args",names(allArgs),nomatch=FALSE)]],atrisk.DefaultArgs) atrisk.DefaultArgs <- atrisk.DefaultArgs[!duplicated(names(atrisk.DefaultArgs))] } if (length(list(...)) && match("legend.legend",names(list(...)),nomatch=FALSE) && any(sapply(newdata,is.factor))){ message("Since version 1.5.1 prodlim obeys the order of factor levels.\nThis may break old code which explicitly defines the legend labels.") } smartA <- SmartControl(call= list(...), keys=c("plot","lines","atrisk","legend","confint","background","marktime","axis1","axis2"), ignore=c("x","type","cause","newdata","add","col","lty","lwd","ylim","xlim","xlab","ylab","legend","marktime","confint","automar","atrisk","timeOrigin","percent","axes","atrisk.args","confint.args","legend.args"), defaults=list("plot"=plot.DefaultArgs,"atrisk"=atrisk.DefaultArgs,"lines"=lines.DefaultArgs,"legend"=legend.DefaultArgs,"confint"=confint.DefaultArgs,"marktime"=marktime.DefaultArgs,"background"=background.DefaultArgs,"axis1"=axis1.DefaultArgs,"axis2"=axis2.DefaultArgs), forced=list("plot"=list(axes=FALSE),"axis1"=list(side=1)), ignore.case=TRUE, replaceDefaults=FALSE, verbose=TRUE) # }}} # {{{ setting margin parameters if (atrisk==TRUE){ oldmar <- par()$mar if (missing(automar) || automar==TRUE){ ## bottomMargin = margin line (in 'mex' units) for xlab ## + distance of xlab from xaxis ## + distance of atrisk numbers from xlab ## + number of atrisk lines ## + one extra line below the bottom number atrisk line ## leftSideMargin = margin line + atrisk.lab bottomMargin <- par()$mgp[2] + smartA$atrisk$dist+ ifelse(clusterp,2,1)*nlines + 1 ## smartA$atrisk$labels maxlabellen <- max(strwidth(c(smartA$atrisk$labels,smartA$atrisk$title), cex=smartA$atrisk$cex, units="inches")) maxlabellen <- pmax(maxlabellen * (par("mar")[2] / par("mai")[2]),par("mar")[2]) leftMargin <- maxlabellen+2-par("mar")[2] newmar <- par()$mar + c(bottomMargin,leftMargin,0,0) par(mar=newmar) } } # }}} # {{{ plot and backGround if (!add) { do.call("plot",smartA$plot) ## if (background==TRUE && match("bg",names(smartA$background),nomatch=FALSE)){ ## par(bg=smartA$background$bg) ## } if (background==TRUE){ do.call("backGround",smartA$background) } } # }}} # {{{ axes if (!add) { if (axes){ do.call("axis",smartA$axis1) if (percent & is.null(smartA$axis2$labels)) smartA$axis2$labels <- paste(100*smartA$axis2$at,"%") do.call("axis",smartA$axis2) } } if (atrisk==TRUE) par(mar=oldmar) ## reset # }}} # {{{ pointwise confidence intervals if (confint==TRUE) { ## if (verbose==TRUE){print(smartA$confint)} do.call("confInt",smartA$confint) } # }}} # {{{ adding the lines lines.type <- smartA$lines$type if (stacked==TRUE){ if (length(Y)>1){ nY <- names(Y) Y <- apply(do.call("rbind",Y),2,cumsum) Y <- lapply(1:nlines,function(i)Y[i,]) names(Y) <- nY } ## names(Y) <- attr(x$model.response,"states") nix <- lapply(1:nlines, function(s) { yyy <- Y[[s]] ppp <- plot.times pos.na <- is.na(yyy) ppp <- ppp[!pos.na] yyy <- yyy[!pos.na] lines(x = ppp,y = yyy,type = lines.type,col = col[s],lty = lty[s],lwd = lwd[s]) cc <- dimColor(col[s],density=55) ttt <- ppp nt <- length(ttt) ttt <- c(ttt,ttt) uuu <- c(0,yyy[-nt],yyy) if (s==1) lll <- rep(0,nt*2) else lll <- c(0,Y[[s-1]][!pos.na][-nt],Y[[s-1]][!pos.na]) neworder <- order(ttt) uuu <- uuu[neworder] lll <- lll[neworder] ttt <- sort(ttt) polygon(x=c(ttt,rev(ttt)),y=c(lll,rev(uuu)),col=cc,border=NA) }) }else{ nix <- lapply(1:nlines, function(s) { lines(x = plot.times, y = Y[[s]], type = lines.type, col = col[s], lty = lty[s], lwd = lwd[s]) }) } # }}} # {{{ marks at the censored times if (marktime==TRUE){ if (model %in% c("survival","competing.risks")){ do.call("markTime",smartA$marktime) } else{ message("Marking the curves at censored times is not yet available for multi-state models.") } } # }}} # {{{ adding the no. of individuals at risk if (atrisk==TRUE && !add){ if (hit <- match("at",names(smartA$atrisk),nomatch=FALSE)){ if (match("atrisk.times",names(list(...)),nomatch=FALSE)){ warning("Atrisk argument clash: remove either 'atrisk.at' or 'atrisk.times'.") } else{ names(smartA$atrisk)[hit] <- "times" smartA$atrisk <- smartA$atrisk[!duplicated(names(smartA$atrisk))] } } do.call("atRisk",smartA$atrisk) } # }}} # {{{ legend if(legend==TRUE && !add && !is.null(names(Y))){ if (smartA$legend$trimnames==TRUE && (length(grep("=",smartA$legend$legend))==length(smartA$legend$legend))){ smartA$legend$legend <- sapply(strsplit(smartA$legend$legend,"="),function(x)x[[2]]) if (is.null(smartA$legend$title)) smartA$legend$title <- unique(sapply(strsplit(names(Y),"="),function(x)x[[1]])) } smartA$legend <- smartA$legend[-match("trimnames",names(smartA$legend))] save.xpd <- par()$xpd if (logrank && model=="survival" && length(smartA$legend$legend)>1){ ## formula.names <- try(all.names(formula),silent=TRUE) lrform <- x$call$formula if (lrform[[2]][[1]]==as.name("Hist")) lrform[[2]][[1]] <- as.name("Surv") ## require(survival) lrtest <- survival::survdiff(eval(lrform),data=eval(x$call$data)) if (length(lrtest$n) == 1) { p <- 1 - pchisq(lrtest$chisq, 1) } else{ if (is.matrix(x$obs)) { etmp <- apply(lrtest$exp, 1, sum) } else { etmp <- lrtest$exp } df <- (sum(1 * (etmp > 0))) - 1 p <- 1 - pchisq(lrtest$chisq, df) } if (length(smartA$legend$title)) smartA$legend$title <- paste(smartA$legend$title," Log-rank: p=",format.pval(p,digits=logrank,eps=0.0001)) else smartA$legend$title <- paste(" Log-rank: ",format.pval(p,digits=logrank,eps=0.0001)) } par(xpd=TRUE) do.call("legend",smartA$legend) par(xpd=save.xpd) } # }}} invisible(x) } prodlim/R/stopTime.R0000644000175100001440000000610213035633434014071 0ustar hornikusers### stopTime.R --- #---------------------------------------------------------------------- ## author: Thomas Alexander Gerds ## created: Nov 28 2015 (10:07) ## Version: ## last-updated: Dec 4 2015 (06:57) ## By: Thomas Alexander Gerds ## Update #: 23 #---------------------------------------------------------------------- ## ### Commentary: ## ### Change Log: #---------------------------------------------------------------------- ## ### Code: ##' All event times are stopped at a given time point and ##' corresponding events are censored ##' ##' @title Stop the time of an event history object ##' @param object Event history object as obtained with \code{Hist} ##' @param stop.time Time point at which to stop the event history object ##' @return Stopped event history object where all times are censored ##' at \code{stop.time}. All observations with times greater than \code{stop.time} ##' are set to \code{stop.time} and the event status is set to \code{attr(object,"cens.code")}. ##' A new column \code{"stop.time"} is equal to \code{1} for stopped observations ##' and equal to \code{0} for the other observations. ##' @seealso Hist ##' @examples ##' ##' set.seed(29) ##' d <- SimSurv(10) ##' h <- with(d,Hist(time,status)) ##' h ##' stopTime(h,8) ##' stopTime(h,5) ##' ##' ## works also with Surv objects ##' library(survival) ##' s <- with(d,Surv(time,status)) ##' stopTime(s,5) ##' ##' ## competing risks ##' set.seed(29) ##' dr <- SimCompRisk(10) ##' hr <- with(dr,Hist(time,event)) ##' hr ##' stopTime(hr,8) ##' stopTime(hr,5) ##' ##' @export ##' @author Thomas A. Gerds stopTime <- function(object,stop.time){ if (missing(stop.time)) stop("Argument stop.time missing. Need a time point at which to stop the event history.") if (length(stop.time)>1) { warning("Argument stop.time is a vector. Proceed with the first element.") stop.time <- stop.time[[1]] } cc <- class(object)[[1]] stopifnot(cc%in% c("Hist","Surv")) if (cc=="Surv"){ model <- "survival" }else{ model <- attr(object,"model") if(!(model %in% c("survival","competing.risks"))) stop(paste("Don't know (not yet) how to stop this type of model:",model)) } stopped <- object[,"time"] >= stop.time sobject <- cbind(object,"stopped"=1*stopped) sobject[,"status"][stopped] <- 0 if(model=="competing.risks") sobject[,"event"][stopped] <- length(attr(object,"states"))+1 sobject[,"time"][stopped] <- stop.time attr(sobject,"stop.time") <- stop.time attr(sobject,"class") <- attr(object,"class") if (cc=="Surv"){ attr(sobject,"type") <- attr(object,"type") } attr(sobject,"states") <- attr(object,"states") attr(sobject,"model") <- attr(object,"model") attr(sobject,"cens.type") <- attr(object,"cens.type") attr(sobject,"cens.code") <- attr(object,"cens.code") attr(sobject,"entry.type") <- attr(object,"entry.type") sobject } #---------------------------------------------------------------------- ### stopTime.R ends here prodlim/R/prodlim-package.R0000644000175100001440000000225713057244334015334 0ustar hornikusers### prodlim-package.R --- #---------------------------------------------------------------------- ## author: Thomas Alexander Gerds ## created: Apr 24 2015 (09:08) ## Version: ## last-updated: Mar 6 2017 (12:32) ## By: Thomas Alexander Gerds ## Update #: 8 #---------------------------------------------------------------------- ## ### Commentary: ## ### Change Log: #---------------------------------------------------------------------- ## ### Code: #' Functions for estimating probabilities from right censored data #' #' @docType package #' @name prodlim #' @useDynLib prodlim, .registration=TRUE #' @importFrom survival survdiff Surv cluster #' @importFrom stats quantile #' @import lava #' @importFrom Rcpp sourceCpp ## --> importFrom KernSmooth dpik #' @importFrom graphics abline axis lines mtext par plot points polygon rect segments strheight strwidth text #' @importFrom stats .getXlevels delete.response drop.terms formula get_all_vars median model.frame model.matrix model.response na.omit pchisq predict qnorm reformulate terms update update.formula NULL #---------------------------------------------------------------------- ### prodlim-package.R ends here prodlim/R/model.design.R0000644000175100001440000003064713035633434014650 0ustar hornikusers##' Extract design matrix and data specials from a model.frame ##' ##' The function separates special terms from the unspecial terms and returns ##' a list of design matrices, one for unspecial terms and one for each special. ##' Some special specials cannot or should not be evaluated in ##' data. E.g., \code{y~a+dummy(x)+strata(v)} the function strata can and should be evaluated, ##' but in order to have \code{model.frame} also evaluate dummy(x) one would be to define ##' and export the function \code{dummy}. Still the term \code{dummy(x)} can be used ##' to identify a special treatment of the variable \code{x}. To deal with this case, ##' one can specify \code{stripSpecials="dummy"}. In addition, the data ##' should include variables \code{strata(z)} and \code{x}, not \code{dummy(x)}. ##' See examples. ##' The function \code{untangle.specials} of the survival function does a similar job. ##' @title Extract a design matrix and specials from a model.frame ##' @param terms terms object as obtained either with function \code{terms} or \code{strip.terms}. ##' @param data A data set in which terms are defined. ##' @param xlev a named list of character vectors giving the full set of levels to be assumed for the factors. ##' Can have less elements, in which case the other levels are learned from the \code{data}. ##' @param dropIntercept If TRUE drop intercept term from the design ##' matrix ##' @param maxOrder An error is produced if special variables are ##' involved in interaction terms of order higher than max.order. ##' @param unspecialsDesign A logical value: if \code{TRUE} apply ##' \code{\link{model.matrix}} to unspecial covariates. If ##' \code{FALSE} extract unspecial covariates from data. ##' @param specialsFactor A character vector containing special ##' variables which should be coerced into a single factor. If ##' \code{TRUE} all specials are treated in this way, if \code{FALSE} ##' none of the specials is treated in this way. ##' @param specialsDesign A character vector containing special ##' variables which should be transformed into a design matrix via ##' \code{\link{model.matrix}}. If \code{TRUE} all specials are ##' treated in this way. ##' @return A list which contains ##' - the design matrix with the levels of the variables stored in attribute 'levels' ##' - separate data.frames which contain the values of the special variables. ##' @seealso \code{\link{EventHistory.frame}} model.frame terms model.matrix .getXlevels ##' @examples ##' # specials that are evaluated. here ID needs to be defined ##' set.seed(8) ##' d <- data.frame(y=rnorm(5),x=factor(c("a","b","b","a","c")),z=c(2,2,7,7,7),v=sample(letters)[1:5]) ##' d$z <- factor(d$z,levels=c(1:8)) ##' ID <- function(x)x ##' f <- formula(y~x+ID(z)) ##' t <- terms(f,special="ID",data=d) ##' mda <- model.design(terms(t),data=d,specialsFactor=TRUE) ##' mda$ID ##' mda$design ##' ## ##' mdb <- model.design(terms(t),data=d,specialsFactor=TRUE,unspecialsDesign=FALSE) ##' mdb$ID ##' mdb$design ##' ##' # set x-levels ##' attr(mdb$ID,"levels") ##' attr(model.design(terms(t),data=d,xlev=list("ID(z)"=1:10), ##' specialsFactor=TRUE)$ID,"levels") ##' ##' # special specials (avoid define function SP) ##' f <- formula(y~x+SP(z)+factor(v)) ##' t <- terms(f,specials="SP",data=d) ##' st <- strip.terms(t,specials="SP",arguments=NULL) ##' md2a <- model.design(st,data=d,specialsFactor=TRUE,specialsDesign="SP") ##' md2a$SP ##' md2b <- model.design(st,data=d,specialsFactor=TRUE,specialsDesign=FALSE) ##' md2b$SP ##' ##' # special function with argument ##' f2 <- formula(y~x+treat(z,power=2)+treat(v,power=-1)) ##' t2 <- terms(f2,special="treat") ##' st2 <- strip.terms(t2,specials="treat",arguments=list("treat"=list("power"))) ##' model.design(st2,data=d,specialsFactor=FALSE) ##' model.design(st2,data=d,specialsFactor=TRUE) ##' model.design(st2,data=d,specialsDesign=TRUE) ##' ##' library(survival) ##' data(pbc) ##' t3 <- terms(Surv(time,status!=0)~factor(edema)*age+strata(I(log(bili)>1))+strata(sex), ##' specials=c("strata","cluster")) ##' st3 <- strip.terms(t3,specials=c("strata"),arguments=NULL) ##' md3 <- model.design(terms=st3,data=pbc[1:4,]) ##' md3$strata ##' md3$cluster ##' ##' f4 <- Surv(time,status)~age+const(factor(edema))+strata(sex,test=0)+prop(bili,power=1)+tp(albumin) ##' t4 <- terms(f4,specials=c("prop","timevar","strata","tp","const")) ##' st4 <- strip.terms(t4, ##' specials=c("prop","timevar"), ##' unspecials="prop", ##' alias.names=list("timevar"="strata","prop"=c("const","tp")), ##' arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) ##' formula(st4) ##' md4 <- model.design(st4,data=pbc[1:4,],specialsDesign=TRUE) ##' md4$prop ##' md4$timevar ##' ##' @author Thomas A. Gerds ##' @export model.design <- function(terms, data, xlev=NULL, dropIntercept=FALSE, maxOrder=1, unspecialsDesign=TRUE, specialsFactor=FALSE, specialsDesign=FALSE){ # {{{ analyse the terms if (missing(terms)) terms <- attr(data,"terms") if (!inherits(terms, "terms")) stop(gettextf("'terms' must be an object of class %s", dQuote("terms")), domain = NA) response <- attr(terms,"response") if (response==1) terms <- delete.response(terms) if (dropIntercept) attr(terms, "intercept") <- 1 design <- attr(terms,"factor") varnames <- rownames(design) termsOrder <- attr(terms,"order") stripped.position <- attr(terms,"stripped.specials") stripped.arguments <- attr(terms,"stripped.arguments") stripped.position <- stripped.position[sapply(stripped.position,length)>0] stripped <- names(stripped.position) specials.position <- attr(terms,"specials") specials.position <- specials.position[sapply(specials.position,length)>0] specials <- c(names(specials.position),stripped) names(specials) <- specials if (is.logical(specialsDesign) && (specialsDesign==TRUE)){ specialsDesign <- specials } if (is.logical(specialsFactor) && (specialsFactor==TRUE)){ specialsFactor <- specials } # }}} if (length(specials)>0){ # {{{ extract information about specials specialInfo <- lapply(specials,function(spc){ if (match(spc,stripped,nomatch=0)) ## delete.response does not know about stripped terms ## so, we need to adjust manually pos <- stripped.position[[spc]]-response else pos <- specials.position[[spc]] ## print(pos) ## print(class(design)) ## print(design) ## print(NCOL(design)) if (NCOL(design)>0 && NROW(design)>0){ ## class(design)=="matrix") ff <- apply(design[pos,,drop=FALSE],2,sum) } else{ ## stopifnot(pos==1) ## there is only one variable ff <- 1 } terms <- seq(ff)[ff>0] if (any(termsOrder[terms]>maxOrder)) stop(paste(spc, " can not be used in an interaction of order higher than ", maxOrder, sep=""),call.=FALSE) ## extract additional arguments from term.labels spc.vnames <- varnames[pos] list(vars=varnames[pos],terms=as.vector(terms)) }) specialTerms <- unlist(lapply(specialInfo,function(x)x$terms)) termLabels <- attr(terms,"term.labels") ## only specials if (length(termLabels) == length(specialTerms)){ unspecialTerms <- NULL }else{ unspecialTerms <- drop.terms(terms,specialTerms) } # }}} # {{{ loop over specials specialFrames <- lapply(specials,function(sp){ Info <- specialInfo[[sp]] sp.terms <- attr(terms, "term.labels")[Info$terms] spTerms <- terms[Info$terms] attr(spTerms,"specials") <- NULL if (length(xlev)>0){ spLevels <- xlev[match(sp.terms,names(xlev),nomatch=0)] if (length(spLevels)>0) spData <- model.frame(spTerms,data=data,xlev=spLevels) else spData <- model.frame(spTerms,data) } else{ spData <- model.frame(spTerms,data) } spLevels <- .getXlevels(spTerms,spData) if (match(sp,stripped,nomatch=0)){ ## stripped specials may have arguments ## in which case we need to know which ## columns are affected vars <- names(stripped.arguments[[sp]]) mterms <- lapply(vars,function(v){ if (match(v,names(spLevels),nomatch=0)) paste(v,spLevels[[v]],sep="") else v}) names(mterms) <- vars stripped.args <- stripped.arguments[[sp]] arg.names <- names(stripped.args[[1]]) arguments.terms <- lapply(arg.names,function(a){ unlist(lapply(names(stripped.args),function(var){ val <- stripped.args[[var]][[a]] if (length(val)==0) val <- NA tmp <- rep(val,length(mterms[[var]])) names(tmp) <- mterms[[var]] tmp }))}) names(arguments.terms) <- arg.names } if (sp %in% specialsDesign){ spMatrix <- model.matrix(spTerms,data=spData,xlev=spLevels)[,-1,drop=FALSE] attr(spMatrix,"levels") <- spLevels if (match(sp,stripped,nomatch=0)){ attr(spMatrix,"arguments") <- stripped.arguments[[sp]] attr(spMatrix,"arguments.terms") <- arguments.terms attr(spMatrix,"matrix.terms") <- mterms } spMatrix }else{ if (sp %in% specialsFactor){ ## force into a single factor ## in this case ignore any arguments if (NCOL(spData)>1) { cnames <- colnames(spData) spData <- data.frame(apply(spData,1,paste,collapse=", ")) names(spData) <- paste(cnames,collapse=", ") } } else{ if (match(sp,stripped,nomatch=0)){ ## stripped specials may have arguments attr(spData,"arguments") <- stripped.arguments[[sp]] attr(spData,"arguments.terms") <- arguments.terms } } attr(spData,"levels") <- spLevels spData } }) # }}} # {{{ unspecials if (length(unspecialTerms)>0){ if (length(xlev)>0){ uLevels <- xlev[match(attr(unspecialTerms,"term.labels"),names(xlev),nomatch=0)] if (length(uLevels)>0) X <- model.frame(unspecialTerms,data=data,xlev=uLevels) else X <- model.frame(unspecialTerms,data=data) } else{ X <- model.frame(unspecialTerms,data) } uLevels <- .getXlevels(unspecialTerms,X) if (unspecialsDesign==TRUE){ X <- model.matrix(unspecialTerms,data,xlev=uLevels) if (dropIntercept) X <- X[,-1,drop=FALSE] } } else { X <- NULL uLevels <- NULL } attr(X,"levels") <- uLevels c(list(design=X),specialFrames) # }}} }else{ # {{{ no specials if (length(xlev)>0){ levels <- xlev[match(attr(terms,"term.labels"),names(xlev),nomatch=0)] if (length(levels)>0) X <- model.frame(terms,data=data,xlev=uLevels) else X <- model.frame(terms,data) } else{ X <- model.frame(terms,data) } levels <- .getXlevels(terms,X) if (unspecialsDesign==TRUE){ X <- model.matrix(terms,data,xlev=levels) if (dropIntercept) X <- X[,-1,drop=FALSE] } attr(X,"levels") <- levels list(design=X) # }}} } } prodlim/R/SimSurv.R0000755000175100001440000000574213035633434013711 0ustar hornikusers##' Simulate right censored survival data with two covariates X1 and X2, both have effect exp(1) on the hazard of the unobserved event time. ##' ##' This function calls \code{survModel}, then adds covariates and finally calls \code{sim.lvm}. ##' @title Simulate survival data ##' @param N sample size ##' @param ... do nothing ##' @return data.frame with simulated data ##' @references Bender, Augustin & Blettner. Generating survival times to simulate Cox proportional hazards models. Statistics in Medicine, 24: 1713-1723, 2005. ##' @author Thomas Alexander Gerds ##' @examples ##' ##' SimSurv(10) ##' ##' @export SimSurv <- function(N, ...){ m <- survModel() regression(m,from="X1",to="eventtime") <- 1 regression(m,from="X2",to="eventtime") <- 1 distribution(m,"X1") <- binomial.lvm() m <- eventTime(m,time~min(eventtime=1,censtime=0),"status") sim(m,N) } ## SimSurvInternalIntervalCensored <- function(N, ## unit, ## lateness, ## compliance, ## withdraw.time, ## event.time){ ## Intervals <- do.call("rbind",lapply(1:N,function(i){ ## schedule <- seq(0,withdraw.time[i],unit) ## M <- length(schedule) ## g <- c(0,rep(unit,M)) ## # introduce normal variation of the visit times ## g <- g+c(abs(rnorm(1,0,lateness)),rnorm(M,0,lateness)) ## grid <- c(0,cumsum(g)) ## # remove visits after the end of follow-up time ## grid <- grid[grid0) ## missed <- rbinom(length(grid),1,compliance)==0 ## grid <- grid[missed==FALSE] ## } ## if (length(grid)==0){ ## L <- 0 ## R <- Inf ## } ## else{ ## posTime <- sindex(jump.times=grid, ## eval.times=event.time[i]) ## L <- grid[posTime] ## R <- grid[posTime+1] ## if (is.na(R)){ ## R <- Inf ## } ## } ## c(L=L,R=R) ## })) ## out <- data.frame(Intervals) ## out ## } # }}} # {{{ find.baseline ## find.baseline <- function(x=.5, ## setting, ## verbose=FALSE){ ## N <- setting$N ## f <- function(y){ ## setting$cens.baseline <- y ## ncens <- sum(do.call("SimSurv",replace(setting,"verbose",verbose))$status==0) ## x-ncens/N ## } ## base.cens <- uniroot(f,c(exp(-50),1000000),tol=.0000001,maxiter=100)$root ## new.setting <- setting ## new.setting$cens.baseline <- base.cens ## do.call("SimSurv",replace(new.setting,"verbose",TRUE)) ## new.setting ## } # }}} # {{{quantile.SimSurv ## quantile.SimSurv <- function(x,B=10,na.rm=FALSE,probs=.9){ ## callx <- attr(x,"call") ## nix <- do.call("rbind",lapply(1:B,function(b){ ## quantile(eval(callx)$time,probs) ## })) ## nix <- colMeans(nix) ## nix ## } # }}} prodlim/R/checkCauses.R0000644000175100001440000000233113035633434014506 0ustar hornikusers### checkCauses.R --- #---------------------------------------------------------------------- ## author: Thomas Alexander Gerds ## created: Sep 10 2015 (11:56) ## Version: ## last-updated: Sep 28 2015 (10:03) ## By: Thomas Alexander Gerds ## Update #: 3 #---------------------------------------------------------------------- ## ### Commentary: ## ### Change Log: #---------------------------------------------------------------------- ## ### Code: checkCauses <- function(cause,object){ cause <- unique(cause) fitted.causes <- attributes(object$model.response)$states ## stopifnot(length(fitted.causes)==length(object$n.event)) if (!is.numeric(cause)){ Found <- match(as.character(cause),fitted.causes,nomatch=0) if (any(Found==0)) stop("Cannot find competing cause(s) ", as.character(cause)[Found==0], "in fitted object.") return(cause) }else{ if (length(fitted.causes) #' @seealso \code{\link{plotIllnessDeathModel}}, \code{\link{plot.Hist}} #' @keywords survival #' @examples #' #' plotCompetingRiskModel() #' plotCompetingRiskModel(labels=c("a","b")) #' plotCompetingRiskModel(labels=c("a","b","c")) #' #' @export plotCompetingRiskModel <- function(stateLabels,horizontal=TRUE,...){ if (missing(stateLabels)) stateLabels <- c("Disease\nfree","Cause1","Cause2") nTrans <- length(stateLabels)-1 if (horizontal==TRUE){ comprisk.model <- data.frame(time=1:3,status=1:3) CRHist <- with(comprisk.model,Hist(time,status,cens.code=2)) plot(CRHist,stateLabels=stateLabels,...) } else{ crHist <- Hist(time=1:nTrans,event=list(from=rep("1",nTrans),to=stateLabels[-1])) nrow <- 3 if (nTrans/2==round(nTrans/2)){ ncol <- nTrans+1 midCol <- ceiling(ncol/2) columns <- c(midCol,(1:ncol)[-midCol]) names(columns) <- paste("box",1:length(stateLabels),".column",sep="") rows <- c(1,rep(3,nTrans)) names(rows) <- paste("box",1:length(stateLabels),".row",sep="") } else{ ncol <- nTrans columns <- c(nTrans+1/2,1:nTrans) names(columns) <- paste("box",1:length(stateLabels),".column",sep="") rows <- c(1,rep(3,nTrans)) names(rows) <- paste("box",2:length(stateLabels),".row",sep="") } do.call("plot.Hist",c(list(x=crHist,stateLabels=stateLabels,nrow=nrow,ncol=ncol,...),columns,rows)) } } prodlim/R/SimCompRisk.R0000755000175100001440000000147413035633434014477 0ustar hornikusers##' Simulate right censored competing risks data with two covariates X1 and X2. Both covariates have effect exp(1) on the hazards of event 1 and zero effect on the hazard of event 2. ##' ##' This function calls \code{crModel}, then adds covariates and finally calls \code{sim.lvm}. ##' @title Simulate competing risks data ##' @param N sample size ##' @param ... do nothing. ##' @return data.frame with simulated data ##' @author Thomas Alexander Gerds ##' @examples ##' ##' SimCompRisk(10) ##' ##' @export SimCompRisk <- function(N, ...){ ## require(lava) m <- crModel() regression(m,from="X1",to="eventtime1") <- 1 regression(m,from="X2",to="eventtime1") <- 1 distribution(m,"X1") <- binomial.lvm() out <- sim(m,N) ## for backward compatibility out$cause <- out$event out } prodlim/R/resolveX.R0000755000175100001440000000120613035633434014077 0ustar hornikusersresolveX <- function(object,N){ if (missing(object)) X <- NULL if (!missing(object) && (is.null(object)|| (is.logical(object) && object==FALSE))) X <- NULL else{ ## if the object is a matrix then do nothing if (is.matrix(object) && NROW(object)==N) X <- object else X <- data.frame(sapply(object, function(x) { ## each entry is either a distribution to draw from if (is.character(x[[1]]) || is.function(x[[1]])) do.call(x[[1]], c(n = N, x[-1])) else{ ## or a vector of numeric values stopifnot(is.numeric(x) && length(x)==N) x} })) } X } prodlim/R/summary.Hist.R0000755000175100001440000001170413035633434014677 0ustar hornikusers#' Summary of event histories #' #' Describe events and censoring patterns of an event history. #' #' #' @param object An object with class `Hist' derived with \code{\link{Hist}} #' @param verbose Logical. If FALSE any printing is supressed. #' @param \dots Not used #' @return \code{NULL} for survival and competing risk models. For other #' multi-state models, it is a list with the following entries: #' \item{states}{the states of the model} \item{transitions}{the transitions #' between the states} \item{trans.frame}{a data.frame with the from and to #' states of the transitions} #' @author Thomas A. Gerds \email{tag@@biostat.ku.dk} #' @seealso \code{\link{Hist}}, \code{\link{plot.Hist}} #' @keywords survival #' @examples #' #' icensFrame <- data.frame(L=c(1,1,3,4,6),R=c(2,NA,3,6,9),event=c(1,1,1,2,2)) #' with(icensFrame,summary(Hist(time=list(L,R)))) #' #' @export summary.Hist <- function(object, verbose=TRUE,...){ D <- object[,"status",drop=TRUE] states <- attr(object,"states") cens.code <- attr(object,"cens.code") # {{{ resolving events and model states model <- attr(object,"model") model.string <- paste("response of a", model,"model") if (model=="multi.states"){ from <- object[,"from"] to <- object[,"to"] code.from <- getEvent(object,mode="factor",column="from") code.to <- getEvent(object,mode="factor",column="to") state.types <- factor(as.numeric(match(states,unique(code.from),nomatch=0)!=0) + 2*as.numeric(match(states,unique(code.to),nomatch=0)!=0),levels=c(1,2,3)) names(state.types) <- states levels(state.types) <- c("initial","absorbing","transient") state.types <- table(state.types) } else{ from <- rep("initial",NROW(object)) code.to <- getEvent(object,mode="factor",column=ifelse(model=="survival","status","event")) code.from <- factor(from) state.types <- c(1,length(states)) names(state.types) <- c("initial","absorbing") } # }}} # {{{ transition frame ## trans.frame <- unique(data.frame(from=code.from,to=code.to),MARGIN=1) trans.frame <- data.frame(from=code.from,to=code.to) Transitions <- apply(cbind(as.character(code.from),as.character(code.to)),1,paste,collapse=" -> ") obnoxious.factor.levels <- unique(Transitions) Transitions <- factor(Transitions,obnoxious.factor.levels) transitions <- table(Transitions) summary.out <- list(states=state.types,transitions=transitions,trans.frame=trans.frame) if (verbose==TRUE){ state.table <- as.matrix(transitions) colnames(state.table) <- c("Freq") } # }}} # {{{ resolving the censoring mechanism if (verbose==TRUE){ ## event time cens.type <- attr(object,"cens.type") ## cens.string <- capitalize(cens.type) cens.string <- switch(cens.type, "intervalCensored"="Interval-censored", "rightCensored"="Right-censored", "uncensored"="Uncensored") Observations <- switch(cens.type, "intervalCensored"=factor(D,levels=c(1,2,0),labels=c("exact.time","interval-censored","right-censored")), "rightCensored"=factor(D,levels=c(1,0),labels=c("event","right.censored")), "uncensored"=factor(D,labels=c("event"))) Freq <- table(Observations) ## entry time entry.type <- attr(object,"entry.type") if (entry.type!="") entry.string <- paste(" with ",entry.type," entry time",sep="") else entry.string <- "" ## stop time stop.time <- attr(object,"stop.time") if (is.null(stop.time)) stop.string <- "" else stop.string <- paste(" stopped at time ",stop.time,sep="") cat("\n", cens.string, " ", model.string, entry.string, stop.string, "\n", sep="") cat("\nNo.Observations:",NROW(object),"\n\nPattern:\n") switch(model,"survival"={ prmatrix(cbind(names(Freq),Freq), quote=FALSE, rowlab=rep("",NROW(Freq)))}, "competing.risks"={ events <- getEvent(object) prout <- table("Cause"=events,as.character(Observations)) print(prout) }, "multi.states"={ x=table(Transitions,Observations) aaa=sapply(strsplit(rownames(x)," -> "),function(x)x[1]) bbb=sapply(strsplit(rownames(x)," -> "),function(x)x[1]) print(x[order(aaa,bbb),,drop=FALSE]) }) } # }}} invisible(summary.out) } ## capitalize <- function(x) { ## s <- strsplit(x, " ")[[1]] ## paste(toupper(substring(s, 1,1)), substring(s, 2), sep="", collapse=" ") ## } prodlim/R/predict.prodlim.R0000755000175100001440000003424213035633434015375 0ustar hornikusers#' Predicting event probabilities from product limit estimates #' #' Evaluation of estimated survival or event probabilities at given times and #' covariate constellations. #' #' Predicted (survival) probabilities are returned that can be plotted, #' summarized and used for inverse of probability of censoring weighting. #' #' @aliases predict.prodlim predictSurv predictCuminc #' @param object A fitted object of class "prodlim". #' @param times Vector of times at which to return the estimated probabilities. #' @param newdata A data frame with the same variable names as those that #' appear on the right hand side of the 'prodlim' formula. If there are #' covariates this argument is required. #' @param level.chaos Integer specifying the sorting of the output: `0' sort by #' time and newdata; `1' only by time; `2' no sorting at all #' @param type Choice between "surv","cuminc","list": #' #' "surv": predict survival probabilities only survival models #' #' "cuminc": predict cumulative incidences only competing risk models #' #' "list": find the indices corresponding to times and newdata. See value. #' #' Defaults to "surv" for two-state models and to "cuminc" for competing risk #' models. #' @param mode Only for \code{type=="surv"} and \code{type=="cuminc"}. Can #' either be "list" or "matrix". For "matrix" the predicted probabilities will #' be returned in matrix form. #' @param bytime Logical. If TRUE and \code{mode=="matrix"} the matrix with #' predicted probabilities will have a column for each time and a row for each #' newdata. Only when \code{object$covariate.type>1} and more than one time is #' given. #' @param cause The cause for predicting the cause-specific cumulative #' incidence function in competing risk models. #' @param \dots Only for compatibility reasons. #' @return \code{type=="surv"} A list or a matrix with survival probabilities #' for all times and all newdata. #' #' \code{type=="cuminc"} A list or a matrix with cumulative incidences for all #' times and all newdata. #' #' \code{type=="list"} A list with the following components: #' #' \item{times}{The argument \code{times} carried forward} #' #' \item{predictors}{The relevant part of the argument \code{newdata}.} #' \item{indices}{ A list with the following components #' #' \code{time}: Where to find values corresponding to the requested times #' \code{strata}: Where to find values corresponding to the values of the #' variables in newdata. Together time and strata show where to find the #' predicted probabilities. } \item{dimensions}{ a list with the following #' components: \code{time} : The length of \code{times} \code{strata} : The #' number of rows in \code{newdata} \code{names.strata} : Labels for the #' covariate values. } #' @author Thomas Alexander Gerds #' @seealso \code{\link{predictSurvIndividual}} #' @keywords survival #' @examples #' #' #' dat <- SimSurv(400) #' fit <- prodlim(Hist(time,status)~1,data=dat) #' #' ## predict the survival probs at selected times #' predict(fit,times=c(10,100,1000)) #' #' ## works also outside the usual range of the Kaplan-Meier #' predict(fit,times=c(-1,0,10,100,1000,10000)) #' #' ## newdata is required if there are strata #' ## or neighborhoods (i.e. overlapping strata) #' mfit <- prodlim(Hist(time,status)~X1+X2,data=dat) #' predict(mfit,times=c(-1,0,10,100,1000,10000),newdata=dat[18:21,]) #' #' ## this can be requested in matrix form #' predict(mfit,times=c(-1,0,10,100,1000,10000),newdata=dat[18:21,],mode="matrix") #' #' ## and even transposed #' predict(mfit,times=c(-1,0,10,100,1000,10000),newdata=dat[18:21,],mode="matrix",bytime=TRUE) #' #' @export "predict.prodlim" <- function(object, times, newdata, level.chaos=1, type=c("surv","cuminc","list"), mode="list", bytime=FALSE, cause=1, ...){ if (length(times)==0) stop("Argument 'times' has length 0") if (missing(type)) type <- switch(object$model,"survival"="surv","competing.risks"="cuminc","list") else type <- switch(type,"survival"="surv","surv"="surv","incidence"="cuminc","cuminc"="cuminc","list") if (type=="surv"){ predictSurv(object=object, times=times, newdata=newdata, level.chaos=level.chaos, mode=mode, bytime=bytime) } else{ if (type=="cuminc"){ predictCuminc(object=object, times=times, newdata=newdata, level.chaos=level.chaos, mode=mode, cause=cause) } else{ predictList(object=object, times=times, newdata=newdata, level.chaos=level.chaos) } } } "predictList" <- function(object,times,newdata,level.chaos=1){ if (missing(times)) stop("Argument times is missing.") NT <- length(times) order.times <- order(times) unsorted.times <- times times <- times[order.times] if (object$cens.type=="intervalCensored") jTimes <- object$time[2,] else jTimes <- object$time # no factors # -------------------------------------------------------------------- if (object$covariate.type==1){ tindex <- sindex(jump.times=jTimes,eval.times=times) tindex[times>object$maxtime] <- NA if (level.chaos==2) indices <- list(time=tindex[order(order.times)],strata=1) else indices <- list(time=tindex,strata=1) dimensions <- list(time=NT,strata=1) predictors <- NULL names.strata <- NULL } else { # conditional on factors # -------------------------------------------------------------------- if (missing(newdata)) stop("Argument newdata is missing.") NX <- NROW(object$X) fit.X <- object$X ## strata.vars <- sapply(strsplit(grep("strata",names(fit.X),val=TRUE),"strata."),function(x)x[2]) ## NN.vars <- sapply(strsplit(grep("NN",names(object$X),val=TRUE),"NN."),function(x)x[2]) strata.vars <- object$discrete.predictors NN.vars <- object$continuous.predictors X.formula <- update(formula(object$formula),NULL~.) ## delete.response(terms(formula(object$formula))) iid <- is.null(object$clustervar) if (!iid){ find.clu <- match(object$clustervar,all.vars(X.formula)) X.formula <- drop.terms(terms(X.formula),find.clu) } if (!all(match(all.vars(X.formula),names(newdata),nomatch=FALSE))) stop("Arg newdata does not contain all the covariates used for fitting. \n\nfitted variables: ", paste(all.vars(X.formula),collapse=", "),"\nnewdata contains:",ifelse(length(names(newdata))==0," nothing",names(newdata))) requested.X <- newdata[,all.vars(X.formula),drop=FALSE] NR <- NROW(requested.X) requested.names <- extract.name.from.special(names(requested.X)) names(requested.X) <- requested.names check.vars <- match(c(strata.vars,NN.vars),requested.names,nomatch=FALSE) if (length(strata.vars)==0){ requested.strata <- rep(1,NR) fit.strata <- rep(1,NX) freq.strata <- NX } else{ # strata # -------------------------------------------------------------------- ## changed 09 Dec 2014 (16:44) --> ## requested.strata <- do.call("paste",c(requested.X[,strata.vars,drop=FALSE],sep="\r")) fit.strata <- interaction(fit.X[,strata.vars,drop=FALSE],sep=":",drop=TRUE) requested.strata <- interaction(requested.X[,strata.vars,drop=FALSE],sep=":",drop=TRUE) fit.levels <- as.character(unique(fit.strata)) ## <-- changed 09 Dec 2014 (16:44) ## before version 1.5.1 ## fit.strata <- factor(do.call("paste",c(fit.X[,strata.vars,drop=FALSE],sep="\r"))) ## fit.levels <- unique(fit.strata) if (!all(unique(requested.strata) %in% (fit.levels))){ stop(paste("Not all values of newdata strata variables occur in fit:\nrequested:", paste(unique(requested.strata),collapse=","), "\nfitted:", paste(fit.levels,collapse=","))) } NS <- length(fit.levels) ## fit.strata <- factor(fit.strata,levels=unique(fit.strata),labels=1:NS) fit.strata <- factor(fit.strata,levels=levels(fit.strata),labels=1:NS) requested.strata <- factor(requested.strata,levels=fit.levels,labels=1:NS) freq.strata <- cumsum(tabulate(fit.strata)) } # neighborhoods # -------------------------------------------------------------------- switch(length(NN.vars)+1, {requested.NN <- NULL fit.NN <- NULL new.order <- order(requested.strata)}, {requested.NN <- requested.X[,NN.vars,drop=TRUE] fit.NN <- fit.X[,NN.vars,drop=TRUE] new.order <- order(requested.strata,requested.NN) }, stop("Currently only one continuous covariate allowed."), stop("Currently only one continuous covariate allowed.")) # findex identifies the individual strata neighborhood combination # -------------------------------------------------------------------- findex <- .C("findex", index=integer(NR), as.integer(as.integer(length(NN.vars)>0)), as.integer(requested.strata[new.order]), as.integer(freq.strata), as.double(requested.NN[new.order]), as.double(fit.NN), as.integer(NR), as.integer(NT), NAOK=FALSE, PACKAGE="prodlim")$index if (level.chaos==2) stop("Need to sort the times if there are strata.") if (level.chaos==1){# do NOT sort by factors predictors <- requested.X findex <- findex[order(new.order)] } else{ predictors <- requested.X[new.order,,drop=FALSE] } # pindex identifies the predicted probabilities # -------------------------------------------------------------------- pindex <- .C("pred_index", index=integer(NT*NR), as.double(times), as.double(jTimes), as.integer(object$first.strata[findex]), as.integer(object$size.strata[findex]), as.integer(NR), as.integer(NT), NAOK=FALSE, PACKAGE="prodlim")$index pindex[pindex==-1] <- NA indices <- list(time=pindex,strata=findex) dimensions <- list(time=NT,strata=NR) ## bug fix (10 Oct 2013 (10:08)): ## order of names needs to ## obey level.chaos names.strata <- apply(do.call("cbind",lapply(names(requested.X),function(n){ if(is.numeric(requested.X[,n])) paste(n,format(requested.X[,n],digits=2),sep="=") else paste(n,requested.X[,n],sep="=")})),1,paste,collapse=", ") if (level.chaos==0) {names.strata <- names.strata[new.order]} ## print(names.strata) predictors <- predictors } if (level.chaos==2) times <- unsorted.times else times <- times out <- list(times=times, predictors=predictors, indices=indices, dimensions=dimensions, names.strata=names.strata) out } predictSurv <- function(object, times, newdata, level.chaos=1, mode="list", bytime=FALSE){ p <- predict(object, newdata=newdata, level.chaos=level.chaos, times=times,type="list") NT <- p$dimensions$time NR <- p$dimensions$strata pindex <- p$indices$time if (object$covariate.type==1){ psurv <- c(1,object$surv)[pindex+1] } else{ if (bytime==FALSE){ psurv <- split(c(1,object$surv)[pindex+1], rep(1:NR,rep(NT,NR))) names(psurv) <- p$names.strata } else{ psurv <- split(c(1,object$surv)[pindex+1],rep(1:NT,NR)) names(psurv) <- paste("t",times,sep="=") } } if (mode=="matrix" && NR>1) { psurv <- do.call("rbind",psurv) } psurv } "predictCuminc" <- function(object, times, newdata, level.chaos=1, mode="list", cause, ...){ # if (object$model!="competing.risks") stop("This object is not a competing.risks model.") p <- predict(object,newdata=newdata,level.chaos=level.chaos,times=times,type="list") NT <- p$dimensions$time NR <- p$dimensions$strata pindex <- p$indices$time if (object$model=="survival"){ object$cuminc <- list("1"=1-object$surv) cause <- 1 } if (object$model=="competing.risks"){ if (missing(cause)) cause <- attributes(object$model.response)$states else causes <- checkCauses(cause,object) } out <- lapply(cause,function(thisCause){ if (NR == 1){ pcuminc <- c(0,object$cuminc[[thisCause]])[pindex+1] if (mode=="matrix") pcuminc <- matrix(pcuminc,nrow=1) } else{ pcuminc <- split(c(0,object$cuminc[[thisCause]])[pindex+1], rep(1:NR,rep(NT,NR))) names(pcuminc) <- p$names.strata if (mode=="matrix" && NR>1) { pcuminc <- do.call("rbind",pcuminc) } } pcuminc}) if (length(cause)==1){ out[[1]]} else{ names(out) <- names(object$cuminc)[cause] out} } prodlim/R/getEvent.R0000755000175100001440000000322313035633434014052 0ustar hornikusers#' Extract a column from an event history object. #' #' Extract a column from an event history object, as obtained with the function #' \code{\link{Hist}}. #' #' Since objects of class \code{"Hist"} are also matrices, all columns are #' numeric or integer valued. To extract a correctly labeled version, the #' attribute \code{states} of the object is used to generate factor levels. #' #' @aliases getEvent #' @param object Object of class \code{"Hist"}. #' @param mode Return mode. One of \code{"numeric"}, \code{"character"}, or #' \code{"factor"}. #' @param column Name of the column to extract from the object. #' @author Thomas Alexander Gerds #' @seealso \code{\link{Hist}} #' @keywords survival #' @examples #' #' dat= data.frame(time=1:5,event=letters[1:5]) #' x=with(dat,Hist(time,event)) #' ## inside integer #' unclass(x) #' ## extract event (the extra level "unknown" is for censored data) #' getEvent(x) #' #' @export getEvent <- function(object,mode="factor",column="event"){ model <- attr(object,"model") if (model=="multi.state") stop("Dont know how to extract events from a multi.state model") ## cens.code <- attr(object,"cens.code") states <- attr(object,"states") if (match(column,colnames(object),nomatch=0)==0){ warning("Object '", class(object),"' does not have this element: ",column,". Returning NULL.") return(NULL) } else{ E <- factor(as.vector(object[,column]), levels=1:(length(states)+1), labels=c(as.character(states),"unknown")) switch(mode,"character"=as.character(E),"numeric"=as.numeric(E),E) } } prodlim/R/jackknife.R0000755000175100001440000000600313035633434014215 0ustar hornikusers#' Compute jackknife pseudo values. #' #' Compute jackknife pseudo values based on marginal Kaplan-Meier estimate of #' survival, or based on marginal Aalen-Johansen estimate of cumulative #' incidence. #' #' @title Compute jackknife pseudo values. #' @aliases jackknife jackknife.survival jackknife.competing.risks #' @param object Object of class \code{"prodlim"}. #' @param times Time points at which to compute pseudo values. #' @param cause For competing risks the cause of failure. #' @param keepResponse If \code{TRUE} add the model response, #' i.e. event time, event status, etc. to the result. #' @param ... not used #' @note The R-package pseudo does a similar job, and appears to be a little faster in small samples, but much slower in large samples. See examples. #' @author Thomas Alexander Gerds #' @seealso \code{\link{prodlim}} #' @references Andersen PK & Perme MP (2010). Pseudo-observations in survival #' analysis Statistical Methods in Medical Research, 19(1), 71-99. #' @keywords survival ##' @examples ##' ##' ##' ## pseudo-values for survival models ##' ##' d=SimSurv(20) ##' f=prodlim(Hist(time,status)~1,data=d) ##' jackknife(f,times=c(3,5)) ##' ##' ## in some situations it may be useful to attach the ##' ## the event time history to the result ##' jackknife(f,times=c(3,5),keepResponse=TRUE) ##' ##' # pseudo-values for competing risk models ##' d=SimCompRisk(10) ##' f=prodlim(Hist(time,event)~1,data=d) ##' jackknife(f,times=c(3,10),cause=1) ##' jackknife(f,times=c(3,10,17),cause=2) ##' #' @export jackknife <- function(object,times,cause,keepResponse=FALSE,...){ if (object$model=="survival") jackknife.survival(object=object,times=times,keepResponse=keepResponse,...) else if (object$model=="competing.risks") jackknife.competing.risks(object=object, times=times, cause=cause, keepResponse=keepResponse, ...) else stop("No method for jackknifing this object.") } #' @export jackknife.survival <- function(object,times,keepResponse=FALSE,...){ S <- predict(object,times=times,newdata=object$model.response) Sk <- leaveOneOut.survival(object,times,...) N <- NROW(Sk) Jk <- t(N*S-t((N-1)*Sk)) colnames(Jk) <- paste("t",times,sep=".") if (keepResponse==TRUE){ Jk <- cbind(object$model.response,Jk) } ## re-order the pseudo-values Jk <- Jk[object$originalDataOrder,,drop=FALSE] Jk } #' @export jackknife.competing.risks <- function(object,times,cause,keepResponse=FALSE,...){ F <- predict(object,times=times,newdata=object$model.response,cause=cause) Fk <- leaveOneOut.competing.risks(object,times,cause=cause,...) N <- NROW(Fk) Jk <- t(N*F-t((N-1)*Fk)) colnames(Jk) <- paste("t",times,sep=".") if (keepResponse==TRUE){ Jk <- cbind(object$model.response,Jk) colnames(Jk)[(NCOL(Jk)-length(times)+1):NCOL(Jk)] <- paste("t",times,sep=".") } ## re-order the pseudo-values Jk <- Jk[object$originalDataOrder,,drop=FALSE] Jk } prodlim/MD50000644000175100001440000001470313057255665012271 0ustar hornikusers33d2bbd483fe919606d1da31709426f5 *DESCRIPTION 72a8624a099e4c877798216196b97911 *NAMESPACE 818717ebad6dcd56e41b79154dc89881 *R/EventHistory.frame.R 438193663fe3520f7a184e13796b2035 *R/Hist.R be8abadfc67bbe882d839436b1b7a6b0 *R/IntIndex.R 8c7c283f389119789f8043d290888440 *R/List2Matrix.R f1e85f220d6d60c0bdea43cf0d798ee1 *R/PercentAxis.R 5693e6e8eaf9bec11c2fff43e3af6421 *R/PetoInt.R 42eb0dab9ec9738f1b1b10b563323035 *R/SimCompRisk.R 6018efa705572a138bf314ca93e25b7c *R/SimSurv.R 74708e831db09eb143371799291eece4 *R/SmartControl.R 8048ada2ea704007fd33e32530037ed9 *R/atRisk.R 5f3ddb6a7e3e8fd08a8b8350880711bd *R/backGround.R f75b7108278767effae67816c6aa8624 *R/checkCauses.R 26dc3e49ba7d8ed35fcc2339bba6552e *R/confInt.R 0a6240db9cb5432524631a3f0ba727dd *R/crModel.R cf66dce602483984ad98363e46fe86ab *R/dimColor.R eab6b61b3c9ff343e013a5b5c26c6d2d *R/eventsMethods.R 79bd579e911d018d6047a58dcb41c117 *R/extract.name.from.special.R ed7d78d81504dc80a3e3d718a0c8a5c9 *R/findArrow.R 71290399f8336015c1ec3a013e49da5d *R/followup.R b2cb2d803b526f2fbdd920c0d655966f *R/getEvent.R 17da35dfc8653208d034161a84335b38 *R/getStates.R 5aae3b3fbf8d07140b8b2b27be7e7cd5 *R/iindex.R 36dfe0f18a179c65e57ae7e904bba057 *R/jackknife.R 92287c0f63a8c6ffe0fb5c0e9b86b0fb *R/leaveOneOut.R 69f1f539ce42ecba4e1b7f122a71845f *R/lifeTab.R 4170807138bbfd4ed5bb1e6fb004bc83 *R/lifeTab.competing.risks.R d0dec8bc01828b0f2224236ff7bdb8c6 *R/lifeTab.survival.R 68938f12148cbaa743b8c3a0e541232c *R/lines.prodlim.R 6a4e6eccebdcf48a68992fc04998bf17 *R/listNbh.R f05ddd4f73b00c1b6c3c9a6d0c62b6b2 *R/markTime.R 4564e45a574a53456832253877b21945 *R/mean.prodlim.R 0d0b0df3b7ffec81a77cffe4ad271c8c *R/meanNeighbors.R 368850d1262cd07d4bb1edec3b616e4b *R/model.design.R 26c5158bc91e0864bb208a7db2d5c4cd *R/model.specials.R 7935eeecdcf779e1a96fc8079d0f4826 *R/neighborhood.R cda74eca598553d3ac541dff17e6cdad *R/neighbors.R c4664ff808ea9ca502cf9af4519dd3c6 *R/parseSpecialNames.R 77793116ec70bc02b813fcb306507c69 *R/plot.Hist.R 005ceab1e93108b96373fd7b473f6eb4 *R/plot.prodlim.R 6181f2f79644989bd02cbf74be8b1fc8 *R/plotCompetingRiskModel.R 4c86b9265b6636ba079cdce60a9a1ea8 *R/plotIllnessDeathModel.R ef337413764239e58807f24bbafaa036 *R/plotIntervals.R 69cc53113ab5680bc91b1005a8dc75e2 *R/predict.prodlim.R a68be698b3786f6370c2887c693bad69 *R/predictSurvIndividual.R 01857901aeb05262d58726c253f92886 *R/print.Hist.R 3eafcc425bb0d7a69785974ba72cee31 *R/print.IntIndex.R 209b478c1ab8248ed53600040780294c *R/print.neighborhood.R 01af413c1a509cbf3dbb8cd60effc591 *R/print.prodlim.R d54f46f17175c5e9551d17c8599ba429 *R/print.quantile.prodlim.R 0a5cd312fc08c8c00d7fea83977cccf6 *R/print.summary.prodlim.R ba374395c30dd07dfaf16a28a47f9077 *R/prodlim-package.R 0134f576d306f5e3375b9f773960953b *R/prodlim.R 33bd15d77d337f6e6f2d77a95cb2f4e9 *R/prodlimIcensSurv.R 5fefc2bba43698b27c38b473bfdfbf3e *R/prodlimMulti.R dacc2545787afd9189b7a92bf9e09735 *R/quantile.prodlim.R 7d2565ccaa8362e9bda85ccd97d735ba *R/redist.R 9c128967716557ea1733ac9de52a1d00 *R/resolveLinPred.R 3eaf5885107f11063b721ab4da070b3b *R/resolveX.R d2cb8ba989f625cfcfff3f9efaa73483 *R/row.match.R 71456697adf41ab797c90701e52c3205 *R/sindex.R 42d556305d242e28a519ae886b89f4ec *R/stopTime.R e8b37a5dd8c9d24280b0b9b2cc51f197 *R/strip.terms.R 0b8038e0f90786b8c0b6d8a66af8cf83 *R/summary.Hist.R 2663cd07553065dc16fd8edea7c72941 *R/summary.prodlim.R 1f3eef15b8fd6c5134c5d00f2262f0d2 *R/survModel.R 2797f783b4a668fe34789920625939de *man/EventHistory.frame.Rd 965ae27459a907c9d646d8e99fafc5a5 *man/Hist.Rd 0b73729994d42240e1c96afe8716055e *man/List2Matrix.Rd 156d212927c8623ba41da431d0eb798c *man/PercentAxis.Rd 9bec6694ed8e7958b2cde4a0916f3971 *man/SimCompRisk.Rd f6d5bd7054f16ace5dd1331e4fa1441f *man/SimSurv.Rd 59995cdd219f2ad03d519a8e67aa8aee *man/SmartControl.Rd 15b53f3e74eddb25450e207cdd6b0661 *man/atRisk.Rd 0836daa3aa493ea4f4b8bee757f2ebcb *man/backGround.Rd 5819918a9a9039289aff131f987e921e *man/confInt.Rd 05f16f9a424a3dc886e3e2fe5d1542ac *man/crModel.Rd 5d59bf23e7384c9210d68d7dcc864676 *man/dimColor.Rd 2840b131987501ddd61e22a99fc909d7 *man/getEvent.Rd 74cf70d75020cda857c58fa91b6aee9d *man/getStates.Rd 28b70a3d520981fd4cbbed1e47221264 *man/jackknife.Rd dea7910d3c1c9858ccb1843c8916dc79 *man/leaveOneOut.Rd 5d33f8ddb17cd098bc860399f9fa98b5 *man/markTime.Rd 5055db0b353cd25ea2513fcb0c1f86a4 *man/meanNeighbors.Rd 5741434eb83b0b6c2ab2edf20c991265 *man/model.design.Rd ce3520bd55b10a4df84e49ecad883f2a *man/neighborhood.Rd 978d402b6e8af671a20b260e6af40cb6 *man/parseSpecialNames.Rd 24a8566398c20f7d2164a3d8720047a8 *man/plot.Hist.Rd ce601534f8e41e145b055c4fa34f4d69 *man/plot.prodlim.Rd 6a074fd3091085355b8150d43b49b52a *man/plotCompetingRiskModel.Rd 589a229166374873a39741e26b6eeb67 *man/plotIllnessDeathModel.Rd fcacb8d3a62f162ae16ca09d9b219db4 *man/predict.prodlim.Rd eb50f44ba16de4c5910d5264e89925a7 *man/predictSurvIndividual.Rd 32b0b043a0af1a86fe22276ff1f46e8b *man/print.prodlim.Rd 6fc216c31cede6f5035f83e42d803226 *man/prodlim.Rd 4168f60dd28effe36ab01d1a9ef0cc9f *man/quantile.prodlim.Rd 667c25c6b0cfa2a455d07ab5196a410f *man/redist.Rd 4904f6e61d5dc8ff65e13e8c62fe810a *man/row.match.Rd 3d069d4d52f67bc64ec4f7b73cce7ea2 *man/sindex.Rd 2741b436c890a4441d4a3b27f6447a43 *man/stopTime.Rd 0b2f16be9e6d9e074384530443a3fc81 *man/strip.terms.Rd 4835d0ae5d8d67b3feb3c9ff65350d7d *man/summary.Hist.Rd 8156a642975019141369dcad1e8c21bd *man/summary.prodlim.Rd 77ea53c9e6ee2d87e77473d976b5207a *man/survModel.Rd b939b1f1f1d248d46bcbdd9b702ead66 *src/GMLE.c 9a8f63a9c3d9e9cdaf1e1db2b4c0c845 *src/IntIndex.c 031d40cf2dea0273cb1247d2b62c83ab *src/declareRoutines.c fa3507435acf1a05e36e5879f1524a20 *src/icens_prodlim.c c9ac98e7c1b5fae26703f43558953b57 *src/icens_prodlim_ml.c acad3796769242199ea5e99153806ced *src/iindex.c ab7025ad739333c079f6586ed29a9a32 *src/life_table.c c8abb1997276a62400f2db58007d0f88 *src/loo.c 29bf1b65cfba84149c0221cf93bc12d5 *src/neighborhood.c a5581f4f3600e469cfaff7ce299945d7 *src/predict.c 5b1f38ba0d226f3eb88e528b79ce87d6 *src/predict_individual_survival.c e8c3b44b26409e3b986e05f9a3b3aa05 *src/prodlim.c 1e7f3fac1c960cc2667868995e258954 *src/prodlim.h 0e936788c700c2f034cebaa9053cf77a *src/prodlim_clustersurv.c 66c28bb1267fa38fbeec056ad1815bb7 *src/prodlim_comprisk.c d1c99c1c29b55aea6098e244467e8d3a *src/prodlim_multistates.c 14f760d9d56729470953e1e36c6140e2 *src/prodlim_surv.c 5c8f514a1b3f8c8894a04acff8a26744 *src/sindex.c 4a357739645a72a67efa4714aaa33711 *src/summary_prodlim.c f746c0ca4fbdfbd255139895ab850f0a *tests/testthat/cluster.R 1fc85aecf1f0b334274aad7fced90cca *tests/testthat/prodlim.R 546781ed09bef3ec76cde83eb20be5a3 *tests/testthat/pseudo.R prodlim/DESCRIPTION0000644000175100001440000000114313057255665013461 0ustar hornikusersPackage: prodlim Title: Product-Limit Estimation for Censored Event History Analysis Version: 1.6.1 Author: Thomas A. Gerds Description: Fast and user friendly implementation of nonparametric estimators for censored event history (survival) analysis. Kaplan-Meier and Aalen-Johansen method. Depends: R (>= 2.9.0) Imports: Rcpp (>= 0.11.5), stats, graphics, survival, KernSmooth, lava LinkingTo: Rcpp Maintainer: Thomas A. Gerds License: GPL (>= 2) Packaged: 2017-03-06 12:06:02 UTC; tag RoxygenNote: 5.0.1 NeedsCompilation: yes Repository: CRAN Date/Publication: 2017-03-06 13:53:09 prodlim/man/0000755000175100001440000000000013035633435012516 5ustar hornikusersprodlim/man/List2Matrix.Rd0000644000175100001440000000133413035633435015170 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/List2Matrix.R \name{List2Matrix} \alias{List2Matrix} \title{Reduce list to a matrix or data.frame with names as new columns} \usage{ List2Matrix(list, depth, names) } \arguments{ \item{list}{A named list which contains nested lists} \item{depth}{The depth in the list hierarchy until an rbindable object} \item{names}{Names for the list variables} } \value{ Matrix or data.frame. } \description{ This function is used by summary.prodlim to deal with results. } \details{ Reduction is done with rbind. } \examples{ x=list(a=data.frame(u=1,b=2,c=3),b=data.frame(u=3,b=4,c=6)) List2Matrix(x,depth=1,"X") } \author{ Thomas A. Gerds } prodlim/man/row.match.Rd0000755000175100001440000000205513035633435014714 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/row.match.R \name{row.match} \alias{row.match} \title{Identifying rows in a matrix or data.frame} \usage{ row.match(x, table, nomatch = NA) } \arguments{ \item{x}{Vector or matrix whose rows are to be matched} \item{table}{Matrix or data.frame that contain the rows to be matched against.} \item{nomatch}{the value to be returned in the case when no match is found. Note that it is coerced to 'integer'.} } \value{ A vector of the same length as 'x'. } \description{ Function for finding matching rows between two matrices or data.frames. First the matrices or data.frames are vectorized by row wise pasting together the elements. Then it uses the function match. Thus the function returns a vector with the row numbers of (first) matches of its first argument in its second. } \examples{ tab <- data.frame(num=1:26,abc=letters) x <- c(3,"c") row.match(x,tab) x <- data.frame(n=c(3,8),z=c("c","h")) row.match(x,tab) } \author{ Thomas A. Gerds } \seealso{ \code{match} } \keyword{misc} prodlim/man/crModel.Rd0000644000175100001440000000125713035633435014377 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/crModel.R \name{crModel} \alias{crModel} \title{Competing risks model for simulation} \usage{ crModel() } \value{ A structural equation model initialized with four variables: the latent event times of two causes, the latent right censored time, and the observed right censored event time. } \description{ Competing risks model for simulation } \details{ Create a competing risks model with to causes to simulate a right censored event time data without covariates This function requires the \code{lava} package. } \examples{ library(lava) m <- crModel() d <- sim(m,6) print(d) } \author{ Thomas A. Gerds } prodlim/man/dimColor.Rd0000644000175100001440000000134313035633435014556 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimColor.R \name{dimColor} \alias{dimColor} \title{Dim a given color to a specified density} \usage{ dimColor(col, density = 55) } \arguments{ \item{col}{Color name or number passed to \code{\link{col2rgb}}.} \item{density}{Integer value passed as alpha coefficient to \code{\link{rgb}} between 0 and 255} } \value{ A character vector with the color code. See \code{rgb} for details. } \description{ This function calls first \code{\link{col2rgb}} on a color name and then uses \code{\link{rgb}} to adjust the intensity of the result. } \examples{ dimColor(2,33) dimColor("green",133) } \author{ Thomas A. Gerds } \seealso{ rgb col2rgb } prodlim/man/prodlim.Rd0000755000175100001440000002427013035633435014463 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/prodlim-package.R, R/prodlim.R \docType{package} \name{prodlim} \alias{prodlim} \alias{prodlim-package} \title{Functions for estimating probabilities from right censored data} \usage{ prodlim(formula, data = parent.frame(), subset, na.action = NULL, reverse = FALSE, conf.int = 0.95, bandwidth = NULL, caseweights, discrete.level = 3, x = TRUE, maxiter = 1000, grid, tol = 7, method = c("npmle", "one.step", "impute.midpoint", "impute.right"), exact = TRUE, type) } \arguments{ \item{formula}{A formula whose left hand side is a \code{Hist} object. In some special cases it can also be a \code{Surv} response object, see the details section. The right hand side is as usual a linear combination of covariates which may contain at most one continuous factor. Whether or not a covariate is recognized as continuous or discrete depends on its class and on the argument \code{discrete.level}. The right hand side may also be used to specify clusters, see the details section.} \item{data}{A data.frame in which all the variables of \code{formula} can be interpreted.} \item{subset}{Passed as argument \code{subset} to function \code{subset} which applied to \code{data} before the formula is processed.} \item{na.action}{All lines in data with any missing values in the variables of formula are removed.} \item{reverse}{For right censored data, if reverse=TRUE then the censoring distribution is estimated.} \item{conf.int}{The level (between 0 and 1) for two-sided pointwise confidence intervals. Defaults to 0.95. Remark: only plain Wald-type confidence limits are available.} \item{bandwidth}{Smoothing parameter for nearest neighborhoods based on the values of a continuous covariate. See function \code{neighborhood} for details.} \item{caseweights}{Weights applied to the contribution of each subject to change the number of events and the number at risk. This can be used for bootstrap and survey analysis. Should be a vector of the same length and the same order as \code{data}.} \item{discrete.level}{Numeric covariates are treated as factors when their number of unique values exceeds not \code{discrete.level}. Otherwise the product limit method is applied, in overlapping neighborhoods according to the bandwidth.} \item{x}{logical value: if \code{TRUE}, the full covariate matrix with is returned in component \code{model.matrix}. The reduced matrix contains unique rows of the full covariate matrix and is always returned in component \code{X}.} \item{maxiter}{For interval censored data only. Maximal number of iterations to obtain the nonparametric maximum likelihood estimate. Defaults to 1000.} \item{grid}{For interval censored data only. When method=one.step grid for one-step product limit estimate. Defaults to sorted list of unique left and right endpoints of the observed intervals.} \item{tol}{For interval censored data only. Numeric value whose negative exponential is used as convergence criterion for finding the nonparametric maximum likelihood estimate. Defaults to 7 meaning exp(-7).} \item{method}{For interval censored data only. If equal to \code{"npmle"} (the default) use the usual Turnbull algorithm, else the product limit version of the self-consistent estimate.} \item{exact}{If TRUE the grid of time points used for estimation includes all the L and R endpoints of the observed intervals.} \item{type}{In two state models either \code{"surv"} for the Kaplan-Meier estimate of the survival function or \code{"cuminc"} for 1-Kaplan-Meier. Default is \code{"surv"} when \code{reverse==FALSE} and \code{"cuminc"} when \code{reverse==TRUE}. In competing risks models it has to be \code{"cuminc"} Aalen-Johansen estimate of the cumulative incidence function.} } \value{ Object of class "prodlim". See \code{\link{print.prodlim}}, \code{\link{predict.prodlim}}, predict, \code{\link{summary.prodlim}}, \code{\link{plot.prodlim}}. } \description{ Functions for estimating probabilities from right censored data Nonparametric estimation in event history analysis. Featuring fast algorithms and user friendly syntax adapted from the survival package. The product limit algorithm is used for right censored data; the self-consistency algorithm for interval censored data. } \details{ The response of \code{formula} (ie the left hand side of the `~' operator) specifies the model. In two-state models -- the classical survival case -- the standard Kaplan-Meier method is applied. For this the response can be specified as a \code{\link{Surv}} or as a \code{\link{Hist}} object. The \code{\link{Hist}} function allows you to change the code for censored observations, e.g. \code{Hist(time,status,cens.code="4")}. Besides a slight gain of computing efficiency, there are some extensions that are not included in the current version of the survival package: (0) The Kaplan-Meier estimator for the censoring times \code{reverse=TRUE} is correctly estimated when there are ties between event and censoring times. (1) A conditional version of the kernel smoothed Kaplan-Meier estimator for at most one continuous predictors using nearest neighborhoods (Beran 1981, Stute 1984, Akritas 1994). (2) For cluster-correlated data the right hand side of \code{formula} may identify a \code{\link{cluster}} variable. In that case Greenwood's variance formula is replaced by the formula of Ying \& Wei (1994). (3) Competing risk models can be specified via \code{\link{Hist}} response objects in \code{formula}. The Aalen-Johansen estimator is applied for estimating the cumulative incidence functions for all causes. The advantage over the function \code{cuminc} of the cmprsk package are user-friendly model specification via \code{\link{Hist}} and sophisticated print, summary, predict and plot methods. Under construction: (U0) Interval censored event times specified via \code{\link{Hist}} are used to find the nonparametric maximum likelihood estimate. Currently this works only for two-state models and the results should match with those from the package `Icens'. (U1) Extensions to more complex multi-states models (U2) The nonparametric maximum likelihood estimate for interval censored observations of competing risks models. } \examples{ ##---------------------two-state survival model------------ dat <- SimSurv(30) with(dat,plot(Hist(time,status))) fit <- prodlim(Hist(time,status)~1,data=dat) print(fit) plot(fit) summary(fit) quantile(fit) ## Subset fit1a <- prodlim(Hist(time,status)~1,data=dat,subset=dat$X1==1) fit1b <- prodlim(Hist(time,status)~1,data=dat,subset=dat$X1==1 & dat$X2>0) ## --------------------clustered data--------------------- library(survival) cdat <- cbind(SimSurv(30),patnr=sample(1:5,size=30,replace=TRUE)) fit <- prodlim(Hist(time,status)~cluster(patnr),data=cdat) print(fit) plot(fit) summary(fit) ##-----------compare Kaplan-Meier to survival package--------- dat <- SimSurv(30) pfit <- prodlim(Surv(time,status)~1,data=dat) pfit <- prodlim(Hist(time,status)~1,data=dat) ## same thing sfit <- survfit(Surv(time,status)~1,data=dat,conf.type="plain") ## same result for the survival distribution function all(round(pfit$surv,12)==round(sfit$surv,12)) summary(pfit,digits=3) summary(sfit,times=quantile(unique(dat$time))) ##-----------estimating the censoring survival function---------------- rdat <- data.frame(time=c(1,2,3,3,3,4,5,5,6,7),status=c(1,0,0,1,0,1,0,1,1,0)) rpfit <- prodlim(Hist(time,status)~1,data=rdat,reverse=TRUE) rsfit <- survfit(Surv(time,1-status)~1,data=rdat,conf.type="plain") ## When there are ties between times at which events are observed ## times at which subjects are right censored, then the convention ## is that events come first. This is not obeyed by the above call to survfit, ## and hence only prodlim delivers the correct reverse Kaplan-Meier: cbind("Wrong:"=rsfit$surv,"Correct:"=rpfit$surv) ##-------------------stratified Kaplan-Meier--------------------- pfit.X2 <- prodlim(Surv(time,status)~X2,data=dat) summary(pfit.X2) summary(pfit.X2,intervals=TRUE) plot(pfit.X2) ##----------continuous covariate: Stone-Beran estimate------------ prodlim(Surv(time,status)~X1,data=dat) ##-------------both discrete and continuous covariates------------ prodlim(Surv(time,status)~X2+X1,data=dat) ##----------------------interval censored data---------------------- dat <- data.frame(L=1:10,R=c(2,3,12,8,9,10,7,12,12,12),status=c(1,1,0,1,1,1,1,0,0,0)) with(dat,Hist(time=list(L,R),event=status)) dat$event=1 npmle.fitml <- prodlim(Hist(time=list(L,R),event)~1,data=dat) ##-------------competing risks------------------- CompRiskFrame <- data.frame(time=1:100,event=rbinom(100,2,.5),X=rbinom(100,1,.5)) crFit <- prodlim(Hist(time,event)~X,data=CompRiskFrame) summary(crFit) plot(crFit) summary(crFit,cause=2) plot(crFit,cause=2) # Changing the cens.code: dat <- data.frame(time=1:10,status=c(1,2,1,2,5,5,1,1,2,2)) fit <- prodlim(Hist(time,status)~1,data=dat) print(fit$model.response) fit <- prodlim(Hist(time,status,cens.code="2")~1,data=dat) print(fit$model.response) plot(fit) plot(fit,cause="5") ##------------delayed entry---------------------- ## left-truncated event times with competing risk endpoint dat <- data.frame(entry=c(7,3,11,12,11,2,1,7,15,17,3),time=10:20,status=c(1,0,2,2,0,0,1,2,0,2,0)) fitd <- prodlim(Hist(time=time,event=status,entry=entry)~1,data=dat) summary(fitd) plot(fitd) } \author{ Thomas A. Gerds \email{tag@biostat.ku.dk} Thomas A. Gerds } \references{ Andersen, Borgan, Gill, Keiding (1993) Springer `Statistical Models Based on Counting Processes' Akritas (1994) The Annals of Statistics 22, 1299-1327 Nearest neighbor estimation of a bivariate distribution under random censoring. R Beran (1981) http://anson.ucdavis.edu/~beran/paper.html `Nonparametric regression with randomly censored survival data' Stute (1984) The Annals of Statistics 12, 917--926 `Asymptotic Normality of Nearest Neighbor Regression Function Estimates' Ying, Wei (1994) Journal of Multivariate Analysis 50, 17-29 The Kaplan-Meier estimate for dependent failure time observations } \seealso{ \code{\link{predictSurv}}, \code{\link{predictSurvIndividual}}, \code{\link{predictCuminc}}, \code{\link{Hist}}, \code{\link{neighborhood}}, \code{\link{Surv}}, \code{\link{survfit}}, \code{\link{strata}}, } \keyword{cluster} \keyword{nonparametric} \keyword{survival} prodlim/man/SmartControl.Rd0000755000175100001440000000427013035633435015442 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/SmartControl.R \name{SmartControl} \alias{SmartControl} \title{Function to facilitate the control of arguments passed to subroutines.} \usage{ SmartControl(call, keys, ignore, defaults, forced, split, ignore.case = TRUE, replaceDefaults, verbose = TRUE) } \arguments{ \item{call}{A list of named arguments, as for example can be obtained via \code{list(...)}.} \item{keys}{A vector of names of subroutines.} \item{ignore}{A list of names which are removed from the argument \code{call} before processing.} \item{defaults}{A named list of default argument lists for the subroutines.} \item{forced}{A named list of forced arguments for the subroutines.} \item{split}{Regular expression used for splitting keys from arguments. Default is \code{"\."}.} \item{ignore.case}{If \code{TRUE} then all matching and splitting is not case sensitive.} \item{replaceDefaults}{If \code{TRUE} default arguments are replaced by given arguments. Can also be a named list with entries for each subroutine.} \item{verbose}{If \code{TRUE} warning messages are given for arguments in \code{call} that are not ignored via argument \code{ignore} and that do not match any \code{key}.} } \description{ Many R functions need to pass several arguments to several different subroutines. Such arguments can are given as part of the three magic dots "...". The function SmartControl reads the dots together with a list of default values and returns for each subroutine a list of arguments. } \examples{ myPlot = function(...){ ## set defaults plot.DefaultArgs=list(x=0,y=0,type="n") lines.DefaultArgs=list(x=1:10,lwd=3) ## apply smartcontrol x=SmartControl(call=list(...), defaults=list("plot"=plot.DefaultArgs, "lines"=lines.DefaultArgs), ignore.case=TRUE,keys=c("plot","axis2","lines"), forced=list("plot"=list(axes=FALSE),"axis2"=list(side=2))) ## call subroutines do.call("plot",x$plot) do.call("lines",x$lines) do.call("axis",x$axis2) } myPlot(plot.ylim=c(0,5),plot.xlim=c(0,20),lines.lty=3,axis2.At=c(0,3,4)) } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{plot.prodlim}} } \keyword{Graphics} prodlim/man/markTime.Rd0000755000175100001440000000171413057105723014562 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/markTime.R \name{markTime} \alias{markTime} \title{Marking product-limit plots at the censored times.} \usage{ markTime(x, times, nlost, pch, col, ...) } \arguments{ \item{x}{The values of the curves at \code{times}.} \item{times}{The times where there curves are plotted.} \item{nlost}{The number of subjects lost to follow-up (censored) at \code{times}.} \item{pch}{The symbol used to mark the curves.} \item{col}{The color of the symbols.} \item{...}{Arguments passed to \code{points}.} } \value{ Nil } \description{ This function is invoked and controlled by \code{plot.prodlim}. } \details{ This function should not be called directly. The arguments can be specified as \code{atRisk.arg} in the call to \code{plot.prodlim}. } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{plot.prodlim}}, \code{\link{confInt}}, \code{\link{atRisk}} } \keyword{survival} prodlim/man/plotIllnessDeathModel.Rd0000755000175100001440000000214013035633435017244 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotIllnessDeathModel.R \name{plotIllnessDeathModel} \alias{plotIllnessDeathModel} \title{Plotting an illness-death-model.} \usage{ plotIllnessDeathModel(stateLabels, style = 1, recovery = FALSE, ...) } \arguments{ \item{stateLabels}{Labels for the three boxes.} \item{style}{Either \code{1} or anything else, switches the orientation of the graph. Hard to explain in words, see examples.} \item{recovery}{Logical. If \code{TRUE} there will be an arrow from the illness state to the initial state.} \item{\dots}{Arguments passed to plot.Hist.} } \description{ Plotting an illness-death-model using \code{plot.Hist}. } \examples{ plotIllnessDeathModel() plotIllnessDeathModel(style=2) plotIllnessDeathModel(style=2, stateLabels=c("a","b\\nc","d"), box1.col="yellow", box2.col="green", box3.col="red") } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{plotCompetingRiskModel}}, \code{\link{plot.Hist}} } \keyword{survival} prodlim/man/PercentAxis.Rd0000755000175100001440000000122213035633435015232 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/PercentAxis.R \name{PercentAxis} \alias{PercentAxis} \title{Percentage-labeled axis.} \usage{ PercentAxis(x, at, ...) } \arguments{ \item{x}{Side of the axis} \item{at}{Positions (decimals) at which to label the axis.} \item{\dots}{Given to \code{axis}.} } \description{ Use percentages instead of decimals to label the an axis with a probability scale . } \examples{ plot(0,0,xlim=c(0,1),ylim=c(0,1),axes=FALSE) PercentAxis(1,at=seq(0,1,.25)) PercentAxis(2,at=seq(0,1,.25)) } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{plot.prodlim}} } \keyword{survival} prodlim/man/atRisk.Rd0000755000175100001440000000356213057105723014251 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/atRisk.R \name{atRisk} \alias{atRisk} \title{Drawing numbers of subjects at-risk of experiencing an event below Kaplan-Meier and Aalen-Johansen plots.} \usage{ atRisk(x, newdata, times, line, col, labelcol = NULL, interspace, cex, labels, title = "", titlecol = NULL, pos, adj, dist, adjust.labels = TRUE, ...) } \arguments{ \item{x}{an object of class `prodlim' as returned by the \code{prodlim} function.} \item{newdata}{see \code{plot.prodlim}} \item{times}{Where to compute the atrisk numbers.} \item{line}{Distance of the atrisk numbers from the inner plot.} \item{col}{The color of the text.} \item{labelcol}{The color for the labels. Defaults to col.} \item{interspace}{Distance between rows of atrisk numbers.} \item{cex}{Passed on to \code{mtext} for both atrisk numbers and labels.} \item{labels}{Labels for the at-risk rows.} \item{title}{Title for the at-risk labels} \item{titlecol}{The color for the title. Defaults to 1 (black).} \item{pos}{The value is passed on to the \code{mtext} argument \code{at} for the labels (not the atriks numbers).} \item{adj}{Passed on to \code{mtext} for the labels (not the atriks numbers).} \item{dist}{If \code{line} is missing, the distance of the upper most atrisk row from the inner plotting region: par()$mgp[2].} \item{adjust.labels}{If \code{TRUE} the labels are left adjusted.} \item{...}{Further arguments that are passed to the function \code{mtext}.} } \value{ Nil } \description{ This function is invoked and controlled by \code{plot.prodlim}. } \details{ This function should not be called directly. The arguments can be specified as \code{atRisk.arg} in the call to \code{plot.prodlim}. } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{plot.prodlim}}, \code{\link{confInt}}, \code{\link{markTime}} } \keyword{survival} prodlim/man/backGround.Rd0000755000175100001440000000252513035633435015073 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/backGround.R \name{backGround} \alias{backGround} \title{Background and grid color control.} \usage{ backGround(xlim, ylim, bg = "white", fg = "gray77", horizontal = NULL, vertical = NULL, border = "black") } \arguments{ \item{xlim}{Limits for the xaxis, defaults to par("usr")[1:2].} \item{ylim}{Limits for the yaxis, defaults to par("usr")[3:4].} \item{bg}{Background color. Can be multiple colors which are then switched at each horizontal line.} \item{fg}{Grid line color.} \item{horizontal}{Numerical values at which horizontal grid lines are plotted.} \item{vertical}{Numerical values at which vertical grid lines are plotted.} \item{border}{The color of the border around the background.} } \description{ Some users like background colors, and it may be helpful to have grid lines to read off e.g. probabilities from a Kaplan-Meier graph. Both things can be controlled with this function. However, it mainly serves \code{\link{plot.prodlim}}. } \examples{ plot(0,0) backGround(bg="beige",fg="red",vertical=0,horizontal=0) plot(0,0) backGround(bg=c("yellow","green"),fg="red",xlim=c(-1,1),ylim=c(-1,1),horizontal=seq(0,1,.1)) backGround(bg=c("yellow","green"),fg="red",horizontal=seq(0,1,.1)) } \author{ Thomas Alexander Gerds } \keyword{survival} prodlim/man/confInt.Rd0000755000175100001440000000315013057105723014405 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/confInt.R \name{confInt} \alias{confInt} \title{Add point-wise confidence limits to the graphs of Kaplan-Meier and Aalen-Johansen estimates of survival and cumulative incidence.} \usage{ confInt(x, times, newdata, type, citype, cause, col, lty, lwd, density = 55, ...) } \arguments{ \item{x}{an object of class `prodlim' as returned by the \code{prodlim} function.} \item{times}{where to compute point-wise confidence limits} \item{newdata}{see \code{plot.prodlim}} \item{type}{Either \code{"cuminc"} or \code{"survival"} passed to summary.prodlim as \code{surv=ifelse(type=="cuminc",FALSE,TRUE)}.} \item{citype}{If \code{"shadow"} then confidence limits are drawn as colored shadows. Otherwise, dotted lines are used to show the upper and lower confidence limits.} \item{cause}{see \code{plot.prodlim}} \item{col}{the colour of the lines.} \item{lty}{the line type of the lines.} \item{lwd}{the line thickness of the lines.} \item{density}{For \code{citype="shadow"}, the density of the shade. Default is 55 percent.} \item{\dots}{Further arguments that are passed to the function \code{segments} if \code{type=="bars"} and to \code{lines} else.} } \value{ Nil } \description{ This function is invoked and controlled by \code{plot.prodlim}. } \details{ This function should not be called directly. The arguments can be specified as \code{Confint.arg} in the call to \code{plot.prodlim}. } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{plot.prodlim}}, \code{\link{atRisk}}, \code{\link{markTime}} } \keyword{survival} prodlim/man/plot.prodlim.Rd0000755000175100001440000002500013036404353015424 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.prodlim.R \name{plot.prodlim} \alias{lines.prodlim} \alias{plot.prodlim} \title{Plotting event probabilities over time} \usage{ \method{plot}{prodlim}(x, type, cause = 1, select, newdata, add = FALSE, col, lty, lwd, ylim, xlim, ylab, xlab = "Time", timeconverter, legend = TRUE, logrank = FALSE, marktime = FALSE, confint = TRUE, automar, atrisk = ifelse(add, FALSE, TRUE), timeOrigin = 0, axes = TRUE, background = TRUE, percent = TRUE, minAtrisk = 0, limit = 10, ...) } \arguments{ \item{x}{an object of class `prodlim' as returned by the \code{prodlim} function.} \item{type}{Either \code{"surv"} or \code{"cuminc"} controls what} \item{cause}{determines the cause of the cumulative incidence function. Currently one cause is allowed at a time, but you may call the function again with add=TRUE to add the lines of the other causes.} \item{select}{Select which lines to plot. This can be used when there are many strata or many competing risks to select a subset of the lines. However, a more clean way to select covariate stratat is to use argument \code{newdata}. Another application is when there are many competing risks and it is desired (for the stacked plot) to stack and show only a subset of the cumulative incidence functions.} \item{newdata}{a data frame containing covariate strata for which to show curves. When omitted element \code{X} of object \code{x} is used.} \item{add}{if \code{TRUE} curves are added to an existing plot.} \item{col}{color for curves. Default is \code{1:number(curves)}} \item{lty}{line type for curves. Default is 1.} \item{lwd}{line width for all curves. Default is 3.} \item{ylim}{limits of the y-axis} \item{xlim}{limits of the x-axis} \item{ylab}{label for the y-axis} \item{xlab}{label for the x-axis} \item{timeconverter}{The strings are allowed: "days2years" (conversion factor: 1/365.25) "months2years" (conversion factor: 1/12) "days2months" (conversion factor 1/30.4368499) "years2days" (conversion factor 365.25) "years2months" (conversion factor 12) "months2days" (conversion factor 30.4368499)} \item{legend}{if TRUE a legend is plotted by calling the function legend. Optional arguments of the function \code{legend} can be given in the form \code{legend.x=val} where x is the name of the argument and val the desired value. See also Details.} \item{logrank}{If TRUE, the logrank p-value will be extracted from a call to \code{survdiff} and added to the legend. This works only for survival models, i.e. Kaplan-Meier with discrete predictors.} \item{marktime}{if TRUE the curves are tick-marked at right censoring times by invoking the function \code{markTime}. Optional arguments of the function \code{markTime} can be given in the form \code{confint.x=val} as with legend. See also Details.} \item{confint}{if TRUE pointwise confidence intervals are plotted by invoking the function \code{confInt}. Optional arguments of the function \code{confInt} can be given in the form \code{confint.x=val} as with legend. See also Details.} \item{automar}{If TRUE the function trys to find suitable values for the figure margins around the main plotting region.} \item{atrisk}{if TRUE display numbers of subjects at risk by invoking the function \code{atRisk}. Optional arguments of the function \code{atRisk} can be given in the form \code{atrisk.x=val} as with legend. See also Details.} \item{timeOrigin}{Start of the time axis} \item{axes}{If true axes are drawn. See details.} \item{background}{If \code{TRUE} the background color and grid color can be controlled using smart arguments SmartControl, such as background.bg="yellow" or background.bg=c("gray66","gray88"). The following defaults are passed to \code{background} by \code{plot.prodlim}: horizontal=seq(0,1,.25), vertical=NULL, bg="gray77", fg="white". See \code{background} for all arguments, and the examples below.} \item{percent}{If true the y-axis is labeled in percent.} \item{minAtrisk}{Integer. Show the curve only until the number at-risk is at least \code{minAtrisk}} \item{limit}{When newdata is not specified and the number of lines in element \code{X} of object \code{x} exceeds limits, only the results for covariate constellations of the first, the middle and the last row in \code{X} are shown. Otherwise all lines of \code{X} are shown.} \item{...}{Parameters that are filtered by \code{\link{SmartControl}} and then passed to the functions \code{\link{plot}}, \code{\link{legend}}, \code{\link{axis}}, \code{\link{atRisk}}, \code{\link{confInt}}, \code{\link{markTime}}, \code{\link{backGround}}} } \value{ The (invisible) object. } \description{ Function to plot survival and cumulative incidence curves against time. } \details{ From version 1.1.3 on the arguments legend.args, atrisk.args, confint.args are obsolete and only available for backward compatibility. Instead arguments for the invoked functions \code{atRisk}, \code{legend}, \code{confInt}, \code{markTime}, \code{axis} are simply specified as \code{atrisk.cex=2}. The specification is not case sensitive, thus \code{atRisk.cex=2} or \code{atRISK.cex=2} will have the same effect. The function \code{axis} is called twice, and arguments of the form \code{axis1.labels}, \code{axis1.at} are used for the time axis whereas \code{axis2.pos}, \code{axis1.labels}, etc. are used for the y-axis. These arguments are processed via \code{\dots{}} of \code{plot.prodlim} and inside by using the function \code{SmartControl}. Documentation of these arguments can be found in the help pages of the corresponding functions. } \note{ Similar functionality is provided by the function \code{\link{plot.survfit}} of the survival library } \examples{ ## simulate right censored data from a two state model set.seed(100) dat <- SimSurv(100) # with(dat,plot(Hist(time,status))) ### marginal Kaplan-Meier estimator kmfit <- prodlim(Hist(time, status) ~ 1, data = dat) plot(kmfit) plot(kmfit,timeconverter="years2months") # change time range plot(kmfit,xlim=c(0,4)) # change scale of y-axis plot(kmfit,percent=FALSE) # mortality instead of survival plot(kmfit,type="cuminc") # change axis label and position of ticks plot(kmfit, xlim=c(0,10), axis1.at=seq(0,10,1), axis1.labels=0:10, xlab="Years", axis2.las=2, atrisk.at=seq(0,10,2.5), atrisk.title="") # change background color plot(kmfit, xlim=c(0,10), confint.citype="shadow", col=1, axis1.at=0:10, axis1.labels=0:10, xlab="Years", axis2.las=2, atrisk.at=seq(0,10,2.5), atrisk.title="", background=TRUE, background.fg="white", background.horizontal=seq(0,1,.25/2), background.vertical=seq(0,10,2.5), background.bg=c("gray88")) # change type of confidence limits plot(kmfit, xlim=c(0,10), confint.citype="dots", col=4, background=TRUE, background.bg=c("white","gray88"), background.fg="gray77", background.horizontal=seq(0,1,.25/2), background.vertical=seq(0,10,2)) ### Kaplan-Meier in discrete strata kmfitX <- prodlim(Hist(time, status) ~ X1, data = dat) plot(kmfitX) # move legend plot(kmfitX,legend.x="bottomleft",atRisk.cex=1.3, atrisk.title="No. subjects") ## Control the order of strata ## since version 1.5.1 prodlim does obey the order of ## factor levels dat$group <- factor(cut(dat$X2,c(-Inf,0,0.5,Inf)), labels=c("High","Intermediate","Low")) kmfitG <- prodlim(Hist(time, status) ~ group, data = dat) plot(kmfitG) ## relevel dat$group2 <- factor(cut(dat$X2,c(-Inf,0,0.5,Inf)), levels=c("(0.5, Inf]","(0,0.5]","(-Inf,0]"), labels=c("Low","Intermediate","High")) kmfitG2 <- prodlim(Hist(time, status) ~ group2, data = dat) plot(kmfitG2) # add log-rank test to legend plot(kmfitX, atRisk.cex=1.3, logrank=TRUE, legend.x="topright", atrisk.title="at-risk") # change atrisk labels plot(kmfitX, legend.x="bottomleft", atrisk.title="Patients", atrisk.cex=0.9, atrisk.labels=c("X1=0","X1=1")) # multiple categorical factors kmfitXG <- prodlim(Hist(time,status)~X1+group2,data=dat) plot(kmfitXG,select=1:2) ### Kaplan-Meier in continuous strata kmfitX2 <- prodlim(Hist(time, status) ~ X2, data = dat) plot(kmfitX2,xlim=c(0,10)) # specify values of X2 for which to show the curves plot(kmfitX2,xlim=c(0,10),newdata=data.frame(X2=c(-1.8,0,1.2))) ### Cluster-correlated data library(survival) cdat <- cbind(SimSurv(20),patnr=sample(1:5,size=20,replace=TRUE)) kmfitC <- prodlim(Hist(time, status) ~ cluster(patnr), data = cdat) plot(kmfitC) plot(kmfitC,atrisk.labels=c("Units","Patients")) kmfitC2 <- prodlim(Hist(time, status) ~ X1+cluster(patnr), data = cdat) plot(kmfitC2) plot(kmfitC2,atrisk.labels=c("Teeth","Patients","Teeth","Patients"), atrisk.col=c(1,1,2,2)) ### Cluster-correlated data with strata n = 50 foo = runif(n) bar = rexp(n) baz = rexp(n,1/2) d = stack(data.frame(foo,bar,baz)) d$cl = sample(10, 3*n, replace=TRUE) fit = prodlim(Surv(values) ~ ind + cluster(cl), data=d) plot(fit) ## simulate right censored data from a competing risk model datCR <- SimCompRisk(100) with(datCR,plot(Hist(time,event))) ### marginal Aalen-Johansen estimator ajfit <- prodlim(Hist(time, event) ~ 1, data = datCR) plot(ajfit) # same as plot(ajfit,cause=1) # cause 2 plot(ajfit,cause=2) # both in one plot(ajfit,cause=1) plot(ajfit,cause=2,add=TRUE,col=2) ### stacked plot plot(ajfit,cause="stacked",select=2) ### stratified Aalen-Johansen estimator ajfitX1 <- prodlim(Hist(time, event) ~ X1, data = datCR) plot(ajfitX1) ## add total number at-risk to a stratified curve ttt = 1:10 plot(ajfitX1,atrisk.at=ttt,col=2:3) plot(ajfit,add=TRUE,col=1) atRisk(ajfit,newdata=datCR,col=1,times=ttt,line=3,labels="Total") ## stratified Aalen-Johansen estimator in nearest neighborhoods ## of a continuous variable ajfitX <- prodlim(Hist(time, event) ~ X1+X2, data = datCR) plot(ajfitX,newdata=data.frame(X1=c(1,1,0),X2=c(4,10,10))) plot(ajfitX,newdata=data.frame(X1=c(1,1,0),X2=c(4,10,10)),cause=2) ## stacked plot plot(ajfitX, newdata=data.frame(X1=0,X2=0.1), cause="stacked", legend.title="X1=0,X2=0.1", legend.legend=paste("cause:",getStates(ajfitX$model.response)), plot.main="Subject specific stacked plot") } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{plot}}, \code{\link{legend}}, \code{\link{axis}}, \code{\link{prodlim}},\code{\link{plot.Hist}},\code{\link{summary.prodlim}}, \code{\link{neighborhood}}, \code{\link{atRisk}}, \code{\link{confInt}}, \code{\link{markTime}}, \code{\link{backGround}} } \keyword{survival} prodlim/man/quantile.prodlim.Rd0000755000175100001440000000211613035633435016277 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quantile.prodlim.R \name{quantile.prodlim} \alias{quantile.prodlim} \title{Quantiles for Kaplan-Meier and Aalen-Johansen estimates.} \usage{ \method{quantile}{prodlim}(x, q, cause = 1, ...) } \arguments{ \item{x}{Object of class \code{"prodlim"}.} \item{q}{Quantiles. Vector of values between 0 and 1.} \item{cause}{For competing risks the cause of interest.} \item{...}{not used} } \description{ Quantiles for Kaplan-Meier and Aalen-Johansen estimates. } \examples{ library(lava) set.seed(1) d=SimSurv(30) f=prodlim(Hist(time,status)~1,data=d) f1=prodlim(Hist(time,status)~X1,data=d) # default: median and IQR quantile(f) quantile(f1) # median alone quantile(f,.5) quantile(f1,.5) # competing risks set.seed(3) dd = SimCompRisk(30) ff=prodlim(Hist(time,event)~1,data=dd) ff1=prodlim(Hist(time,event)~X1,data=dd) ## default: median and IQR quantile(ff) quantile(ff1) print(quantile(ff1),na.val="NA") print(quantile(ff1),na.val="Not reached") } \author{ Thomas Alexander Gerds } \keyword{survival} prodlim/man/plot.Hist.Rd0000755000175100001440000001360713035633435014703 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.Hist.R \name{plot.Hist} \alias{plot.Hist} \title{Box-arrow diagrams for multi-state models.} \usage{ \method{plot}{Hist}(x, nrow, ncol, stateLabels, arrowLabels, arrowLabelStyle = "symbolic", arrowLabelSymbol = "lambda", changeArrowLabelSide, tagBoxes = FALSE, startCountZero = TRUE, oneFitsAll, margin, cex, verbose = FALSE, ...) } \arguments{ \item{x}{An object of class \code{Hist}.} \item{nrow}{the number of graphic rows} \item{ncol}{the number of graphic columns} \item{stateLabels}{Vector of names to appear in the boxes (states). Defaults to attr(x,"state.names"). The boxes can also be individually labeled by smart arguments of the form \code{box3.label="diseased"}, see examples.} \item{arrowLabels}{Vector of labels to appear in the boxes (states). One for each arrow. The arrows can also be individually labeled by smart arguments of the form \code{arrow1.label=paste(expression(eta(s,u)))}, see examples.} \item{arrowLabelStyle}{Either "symbolic" for automated symbolic arrow labels, or "count" for arrow labels that reflect the number of transitions in the data.} \item{arrowLabelSymbol}{Symbol for automated symbolic arrow labels. Defaults to "lambda".} \item{changeArrowLabelSide}{A vector of mode logical (TRUE,FALSE) one for each arrow to change the side of the arrow on which the label is placed.} \item{tagBoxes}{Logical. If TRUE the boxes are numbered in the upper left corner. The size can be controlled with smart argument boxtags.cex. The default is boxtags.cex=1.28.} \item{startCountZero}{Control states numbers for symbolic arrow labels and box tags.} \item{oneFitsAll}{If \code{FALSE} then boxes have individual size, depending on the size of the label, otherwise all boxes have the same size dependent on the largest label.} \item{margin}{Set the figure margin via \code{par(mar=margin)}. Less than 4 values are repeated.} \item{cex}{Initial cex value for the state and the arrow \code{labels}.} \item{verbose}{If TRUE echo various things.} \item{\dots}{Smart control of arguments for the subroutines text (box label), rect (box), arrows, text (arrow label). Thus the three dots can be used to draw individual boxes with individual labels, arrows and arrow labels. E.g. arrow2.label="any label" changes the label of the second arrow. See examples.} } \description{ Automated plotting of the states and transitions that characterize a multi states model. } \note{ Use the functionality of the unix program `dot' http://www.graphviz.org/About.php via R package Rgraphviz to obtain more complex graphs. } \examples{ ## A simple survival model SurvFrame <- data.frame(time=1:10,status=c(0,1,1,0,0,1,0,0,1,0)) SurvHist <- with(SurvFrame,Hist(time,status)) plot(SurvHist) plot(SurvHist,box2.col=2,box2.label="experienced\\nR user") plot(SurvHist, box2.col=2, box1.label="newby", box2.label="experienced\\nR user", oneFitsAll=FALSE, arrow1.length=.5, arrow1.label="", arrow1.lwd=4) ## change the cex of all box labels: plot(SurvHist, box2.col=2, box1.label="newby", box2.label="experienced\\nR user", oneFitsAll=FALSE, arrow1.length=.5, arrow1.label="", arrow1.lwd=4, label.cex=1) ## change the cex of single box labels: plot(SurvHist, box2.col=2, box1.label="newby", box2.label="experienced\\nR user", oneFitsAll=FALSE, arrow1.length=.5, arrow1.label="", arrow1.lwd=4, label1.cex=1, label2.cex=2) ## The pbc data set from the survival package library(survival) data(pbc) plot(with(pbc,Hist(time,status)), stateLabels=c("randomized","transplant","dead"), arrowLabelStyle="count") ## two competing risks comprisk.model <- data.frame(time=1:3,status=1:3) CRHist <- with(comprisk.model,Hist(time,status,cens.code=2)) plot(CRHist) plot(CRHist,arrow1.label=paste(expression(eta(s,u)))) plot(CRHist,box2.label="This\\nis\\nstate 2",arrow1.label=paste(expression(gamma[1](t)))) plot(CRHist,box3.label="Any\\nLabel",arrow2.label="any\\nlabel") ## change the layout plot(CRHist, box1.label="Alive", box2.label="Dead\\n cause 1", box3.label="Dead\\n cause 2", arrow1.label=paste(expression(gamma[1](t))), arrow2.label=paste(expression(eta[2](t))), box1.col=2, box2.col=3, box3.col=4, nrow=2, ncol=3, box1.row=1, box1.column=2, box2.row=2, box2.column=1, box3.row=2, box3.column=3) ## more competing risks comprisk.model2 <- data.frame(time=1:4,status=1:4) CRHist2 <- with(comprisk.model2,Hist(time,status,cens.code=2)) plot(CRHist2,box1.row=2) ## illness-death models illness.death.frame <- data.frame(time=1:4, from=c("Disease\\nfree", "Disease\\nfree", "Diseased", "Disease\\nfree"), to=c("0","Diseased","Dead","Dead")) IDHist <- with(illness.death.frame,Hist(time,event=list(from,to))) plot(IDHist) ## illness-death with recovery illness.death.frame2 <- data.frame(time=1:5, from=c("Disease\\nfree","Disease\\nfree","Diseased","Diseased","Disease\\nfree"), to=c("0","Diseased","Disease\\nfree","Dead","Dead")) IDHist2 <- with(illness.death.frame2,Hist(time,event=list(from,to))) plot(IDHist2) ## 4 state models x=data.frame(from=c(1,2,1,3,4),to=c(2,1,3,4,1),time=1:5) y=with(x,Hist(time=time,event=list(from=from,to=to))) plot(y) ## moving the label of some arrows d <- data.frame(time=1:5,from=c(1,1,1,2,2),to=c(2,3,4,3,4)) h <- with(d,Hist(time,event=list(from,to))) plot(h, tagBoxes=TRUE, stateLabels=c("Remission\\nwithout\\nGvHD", "Remission\\nwith\\nGvHD", "Relapse", "Death\\nwithout\\nrelapse"), arrowLabelSymbol='alpha', arrowlabel3.x=35, arrowlabel3.y=53, arrowlabel4.y=54, arrowlabel4.x=68) ##' } \author{ Thomas A Gerds \email{tag@biostat.ku.dk} } \seealso{ \code{\link{Hist}}\code{\link{SmartControl}} } \keyword{survival} prodlim/man/stopTime.Rd0000644000175100001440000000234313035633435014613 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stopTime.R \name{stopTime} \alias{stopTime} \title{Stop the time of an event history object} \usage{ stopTime(object, stop.time) } \arguments{ \item{object}{Event history object as obtained with \code{Hist}} \item{stop.time}{Time point at which to stop the event history object} } \value{ Stopped event history object where all times are censored at \code{stop.time}. All observations with times greater than \code{stop.time} are set to \code{stop.time} and the event status is set to \code{attr(object,"cens.code")}. A new column \code{"stop.time"} is equal to \code{1} for stopped observations and equal to \code{0} for the other observations. } \description{ All event times are stopped at a given time point and corresponding events are censored } \examples{ set.seed(29) d <- SimSurv(10) h <- with(d,Hist(time,status)) h stopTime(h,8) stopTime(h,5) ## works also with Surv objects library(survival) s <- with(d,Surv(time,status)) stopTime(s,5) ## competing risks set.seed(29) dr <- SimCompRisk(10) hr <- with(dr,Hist(time,event)) hr stopTime(hr,8) stopTime(hr,5) } \author{ Thomas A. Gerds } \seealso{ Hist } prodlim/man/leaveOneOut.Rd0000644000175100001440000000151313035633435015233 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/leaveOneOut.R \name{leaveOneOut} \alias{leaveOneOut} \alias{leaveOneOut.competing.risks} \alias{leaveOneOut.survival} \title{Compute jackknife pseudo values.} \usage{ leaveOneOut(object, times, cause, lag = FALSE, ...) } \arguments{ \item{object}{Object of class \code{"prodlim"}.} \item{times}{time points at which to compute leave-one-out event/survival probabilities.} \item{cause}{For competing risks the cause of interest.} \item{lag}{For survival models only. If \code{TRUE} lag the result, i.e. compute S(t-) instead of S(t).} \item{...}{not used} } \description{ Compute leave-one-out estimates } \details{ This function is the work-horse for \code{jackknife} } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{jackknife}} } prodlim/man/redist.Rd0000644000175100001440000000134313035633435014300 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/redist.R \name{redist} \alias{redist} \title{Calculation of Efron's re-distribution to the right algorithm to obtain the Kaplan-Meier estimate.} \usage{ redist(time, status) } \arguments{ \item{time}{A numeric vector of event times.} \item{status}{The event status vector takes the value \code{1} for observed events and the value \code{0} for right censored times.} } \value{ Calculations needed to } \description{ Calculation of Efron's re-distribution to the right algorithm to obtain the Kaplan-Meier estimate. } \examples{ redist(time=c(.35,0.4,.51,.51,.7,.73),status=c(0,1,1,0,0,1)) } \author{ Thomas A. Gerds } \seealso{ prodlim } prodlim/man/survModel.Rd0000644000175100001440000000107013035633435014763 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/survModel.R \name{survModel} \alias{survModel} \title{Survival model for simulation} \usage{ survModel() } \value{ A structural equation model initialized with three variables: the latent event time, the latent right censored time, and the observed right censored event time. } \description{ Create a survival model to simulate a right censored event time data without covariates } \details{ This function requires the \code{lava} package. } \author{ Thomas A. Gerds } prodlim/man/predict.prodlim.Rd0000755000175100001440000000732113035633435016112 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.prodlim.R \name{predict.prodlim} \alias{predict.prodlim} \alias{predictCuminc} \alias{predictSurv} \title{Predicting event probabilities from product limit estimates} \usage{ \method{predict}{prodlim}(object, times, newdata, level.chaos = 1, type = c("surv", "cuminc", "list"), mode = "list", bytime = FALSE, cause = 1, ...) } \arguments{ \item{object}{A fitted object of class "prodlim".} \item{times}{Vector of times at which to return the estimated probabilities.} \item{newdata}{A data frame with the same variable names as those that appear on the right hand side of the 'prodlim' formula. If there are covariates this argument is required.} \item{level.chaos}{Integer specifying the sorting of the output: `0' sort by time and newdata; `1' only by time; `2' no sorting at all} \item{type}{Choice between "surv","cuminc","list": "surv": predict survival probabilities only survival models "cuminc": predict cumulative incidences only competing risk models "list": find the indices corresponding to times and newdata. See value. Defaults to "surv" for two-state models and to "cuminc" for competing risk models.} \item{mode}{Only for \code{type=="surv"} and \code{type=="cuminc"}. Can either be "list" or "matrix". For "matrix" the predicted probabilities will be returned in matrix form.} \item{bytime}{Logical. If TRUE and \code{mode=="matrix"} the matrix with predicted probabilities will have a column for each time and a row for each newdata. Only when \code{object$covariate.type>1} and more than one time is given.} \item{cause}{The cause for predicting the cause-specific cumulative incidence function in competing risk models.} \item{\dots}{Only for compatibility reasons.} } \value{ \code{type=="surv"} A list or a matrix with survival probabilities for all times and all newdata. \code{type=="cuminc"} A list or a matrix with cumulative incidences for all times and all newdata. \code{type=="list"} A list with the following components: \item{times}{The argument \code{times} carried forward} \item{predictors}{The relevant part of the argument \code{newdata}.} \item{indices}{ A list with the following components \code{time}: Where to find values corresponding to the requested times \code{strata}: Where to find values corresponding to the values of the variables in newdata. Together time and strata show where to find the predicted probabilities. } \item{dimensions}{ a list with the following components: \code{time} : The length of \code{times} \code{strata} : The number of rows in \code{newdata} \code{names.strata} : Labels for the covariate values. } } \description{ Evaluation of estimated survival or event probabilities at given times and covariate constellations. } \details{ Predicted (survival) probabilities are returned that can be plotted, summarized and used for inverse of probability of censoring weighting. } \examples{ dat <- SimSurv(400) fit <- prodlim(Hist(time,status)~1,data=dat) ## predict the survival probs at selected times predict(fit,times=c(10,100,1000)) ## works also outside the usual range of the Kaplan-Meier predict(fit,times=c(-1,0,10,100,1000,10000)) ## newdata is required if there are strata ## or neighborhoods (i.e. overlapping strata) mfit <- prodlim(Hist(time,status)~X1+X2,data=dat) predict(mfit,times=c(-1,0,10,100,1000,10000),newdata=dat[18:21,]) ## this can be requested in matrix form predict(mfit,times=c(-1,0,10,100,1000,10000),newdata=dat[18:21,],mode="matrix") ## and even transposed predict(mfit,times=c(-1,0,10,100,1000,10000),newdata=dat[18:21,],mode="matrix",bytime=TRUE) } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{predictSurvIndividual}} } \keyword{survival} prodlim/man/print.prodlim.Rd0000755000175100001440000000116113035633435015610 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print.prodlim.R \name{print.prodlim} \alias{print.Hist} \alias{print.neighborhood} \alias{print.prodlim} \title{Print objects in the prodlim library} \usage{ \method{print}{prodlim}(x, ...) } \arguments{ \item{x}{Object of class \code{prodlim}, \code{Hist} and \code{neighborhood}.} \item{\dots}{Not used.} } \description{ Pretty printing of objects created with the functionality of the `prodlim' library. } \author{ Thomas Gerds } \seealso{ \code{\link{summary.prodlim}}, \code{\link{predict.prodlim}} } \keyword{survival} prodlim/man/plotCompetingRiskModel.Rd0000755000175100001440000000137413035633435017453 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotCompetingRiskModel.R \name{plotCompetingRiskModel} \alias{plotCompetingRiskModel} \title{Plotting a competing-risk-model.} \usage{ plotCompetingRiskModel(stateLabels, horizontal = TRUE, ...) } \arguments{ \item{stateLabels}{Labels for the boxes.} \item{horizontal}{The orientation of the plot.} \item{\dots}{Arguments passed to \code{\link{plot.Hist}}.} } \description{ Plotting a competing-risk-model. } \examples{ plotCompetingRiskModel() plotCompetingRiskModel(labels=c("a","b")) plotCompetingRiskModel(labels=c("a","b","c")) } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{plotIllnessDeathModel}}, \code{\link{plot.Hist}} } \keyword{survival} prodlim/man/SimSurv.Rd0000755000175100001440000000136613035633435014426 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/SimSurv.R \name{SimSurv} \alias{SimSurv} \title{Simulate survival data} \usage{ SimSurv(N, ...) } \arguments{ \item{N}{sample size} \item{...}{do nothing} } \value{ data.frame with simulated data } \description{ Simulate right censored survival data with two covariates X1 and X2, both have effect exp(1) on the hazard of the unobserved event time. } \details{ This function calls \code{survModel}, then adds covariates and finally calls \code{sim.lvm}. } \examples{ SimSurv(10) } \author{ Thomas Alexander Gerds } \references{ Bender, Augustin & Blettner. Generating survival times to simulate Cox proportional hazards models. Statistics in Medicine, 24: 1713-1723, 2005. } prodlim/man/SimCompRisk.Rd0000644000175100001440000000124513035633435015207 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/SimCompRisk.R \name{SimCompRisk} \alias{SimCompRisk} \title{Simulate competing risks data} \usage{ SimCompRisk(N, ...) } \arguments{ \item{N}{sample size} \item{...}{do nothing.} } \value{ data.frame with simulated data } \description{ Simulate right censored competing risks data with two covariates X1 and X2. Both covariates have effect exp(1) on the hazards of event 1 and zero effect on the hazard of event 2. } \details{ This function calls \code{crModel}, then adds covariates and finally calls \code{sim.lvm}. } \examples{ SimCompRisk(10) } \author{ Thomas Alexander Gerds } prodlim/man/predictSurvIndividual.Rd0000755000175100001440000000166113035633435017337 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predictSurvIndividual.R \name{predictSurvIndividual} \alias{predictSurvIndividual} \title{Predict individual survival probabilities} \usage{ predictSurvIndividual(object, lag = 1) } \arguments{ \item{object}{A fitted object of class "prodlim".} \item{lag}{Integer. `0' means predictions at the individual times, 1 means just before the individual times, etc.} } \value{ A vector of survival probabilities. } \description{ Function to extract the predicted probabilities at the individual event times that have been used for fitting a prodlim object. } \examples{ SurvFrame <- data.frame(time=1:10,status=rbinom(10,1,.5)) x <- prodlim(formula=Hist(time=time,status!=0)~1,data=SurvFrame) predictSurvIndividual(x,lag=1) } \author{ Thomas A. Gerds \email{tag@biostat.ku.dk} } \seealso{ \code{\link{predict.prodlim}},\code{\link{predictSurv}}, } \keyword{survival} prodlim/man/EventHistory.frame.Rd0000644000175100001440000001512013035633435016540 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/EventHistory.frame.R \name{EventHistory.frame} \alias{EventHistory.frame} \title{Event history frame} \usage{ EventHistory.frame(formula, data, unspecialsDesign = TRUE, specials, specialsFactor = TRUE, specialsDesign = FALSE, stripSpecials = NULL, stripArguments = NULL, stripAlias = NULL, stripUnspecials = NULL, dropIntercept = TRUE, check.formula = TRUE, response = TRUE) } \arguments{ \item{formula}{Formula whose left hand side specifies the event history, i.e., either via Surv() or Hist().} \item{data}{Data frame in which the formula is interpreted} \item{unspecialsDesign}{Passed as is to \code{\link{model.design}}.} \item{specials}{Character vector of special function names. Usually the body of the special functions is function(x)x but e.g., \code{\link{strata}} from the survival package does treat the values} \item{specialsFactor}{Passed as is to \code{\link{model.design}}.} \item{specialsDesign}{Passed as is to \code{\link{model.design}}} \item{stripSpecials}{Passed as \code{specials} to \code{\link{strip.terms}}} \item{stripArguments}{Passed as \code{arguments} to \code{\link{strip.terms}}} \item{stripAlias}{Passed as \code{alias.names} to \code{\link{strip.terms}}} \item{stripUnspecials}{Passed as \code{unspecials} to \code{\link{strip.terms}}} \item{dropIntercept}{Passed as is to \code{\link{model.design}}} \item{check.formula}{If TRUE check if formula is a Surv or Hist thing.} \item{response}{If FALSE do not get response data (event.history).} } \value{ A list which contains - the event.history (see \code{\link{Hist}}) - the design matrix (see \code{\link{model.design}}) - one entry for each special (see \code{\link{model.design}}) } \description{ Extract event history data and design matrix including specials from call } \details{ Obtain a list with the data used for event history regression analysis. This function cannot be used directly on the user level but inside a function to prepare data for survival analysis. } \examples{ ## Here are some data with an event time and no competing risks ## and two covariates X1 and X2. ## Suppose we want to declare that variable X1 is treated differently ## than variable X2. For example, X1 could be a cluster variable, or ## X1 should have a proportional effect on the outcome. dsurv <- data.frame(time=1:7, status=c(0,1,1,0,0,0,1), X2=c(2.24,3.22,9.59,4.4,3.54,6.81,5.05), X3=c(1,1,1,1,0,0,1), X4=c(44.69,37.41,68.54,38.85,35.9,27.02,41.84), X1=factor(c("a","b","a","c","c","a","b"), levels=c("c","a","b"))) ## We pass a formula and the data e <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4, data=dsurv, specials=c("prop","cluster"), stripSpecials=c("prop","cluster")) names(e) ## The first element is the event.history which is result of the left hand ## side of the formula: e$event.history ## same as with(dsurv,Hist(time,status)) ## to see the structure do colnames(e$event.history) unclass(e$event.history) ## in case of competing risks there will be an additional column called event, ## see help(Hist) for more details ## The other elements are the design, i.e., model.matrix for the non-special covariates e$design ## and a data.frame for the special covariates e$prop ## The special covariates can be returned as a model.matrix e2 <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4, data=dsurv, specials=c("prop","cluster"), stripSpecials=c("prop","cluster"), specialsDesign=TRUE) e2$prop ## and the non-special covariates can be returned as a data.frame e3 <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4, data=dsurv, specials=c("prop","cluster"), stripSpecials=c("prop","cluster"), specialsDesign=TRUE, unspecialsDesign=FALSE) e3$design ## the general idea is that the function is used to parse the combination of ## formula and data inside another function. Here is an example with ## competing risks SampleRegression <- function(formula,data=parent.frame()){ thecall <- match.call() ehf <- EventHistory.frame(formula=formula, data=data, stripSpecials=c("prop","cluster","timevar"), specials=c("prop","timevar","cluster")) time <- ehf$event.history[,"time"] status <- ehf$event.history[,"status"] ## event as a factor if (attr(ehf$event.history,"model")=="competing.risks"){ event <- ehf$event.history[,"event"] Event <- getEvent(ehf$event.history) list(response=data.frame(time,status,event,Event),X=ehf[-1]) } else{ # no competing risks list(response=data.frame(time,status),X=ehf[-1]) } } dsurv$outcome <- c("cause1","0","cause2","cause1","cause2","cause2","0") SampleRegression(Hist(time,outcome)~prop(X1)+X2+cluster(X3)+X4,dsurv) ## let's test if the parsing works form1 <- Hist(time,outcome!="0")~prop(X1)+X2+cluster(X3)+X4 form2 <- Hist(time,outcome)~prop(X1)+cluster(X3)+X4 ff <- list(form1,form2) lapply(ff,function(f){SampleRegression(f,dsurv)}) ## here is what the riskRegression package uses to ## distinguish between covariates with ## time-proportional effects and covariates with ## time-varying effects: \dontrun{ library(riskRegression) data(Melanoma) f <- Hist(time,status)~prop(thick)+strata(sex)+age+prop(ulcer,power=1)+timevar(invasion,test=1) ## here the unspecial terms, i.e., the term age is treated as prop ## also, strata is an alias for timvar EHF <- prodlim::EventHistory.frame(formula, Melanoma[1:10], specials=c("timevar","strata","prop","const","tp"), stripSpecials=c("timevar","prop"), stripArguments=list("prop"=list("power"=0), "timevar"=list("test"=0)), stripAlias=list("timevar"=c("strata"), "prop"=c("tp","const")), stripUnspecials="prop", specialsDesign=TRUE, dropIntercept=TRUE) EHF$prop EHF$timevar } } \author{ Thomas A. Gerds } \seealso{ model.frame model.design Hist } prodlim/man/jackknife.Rd0000755000175100001440000000321113035633435014732 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/jackknife.R \name{jackknife} \alias{jackknife} \alias{jackknife.competing.risks} \alias{jackknife.survival} \title{Compute jackknife pseudo values.} \usage{ jackknife(object, times, cause, keepResponse = FALSE, ...) } \arguments{ \item{object}{Object of class \code{"prodlim"}.} \item{times}{Time points at which to compute pseudo values.} \item{cause}{For competing risks the cause of failure.} \item{keepResponse}{If \code{TRUE} add the model response, i.e. event time, event status, etc. to the result.} \item{...}{not used} } \description{ Compute jackknife pseudo values. } \details{ Compute jackknife pseudo values based on marginal Kaplan-Meier estimate of survival, or based on marginal Aalen-Johansen estimate of cumulative incidence. } \note{ The R-package pseudo does a similar job, and appears to be a little faster in small samples, but much slower in large samples. See examples. } \examples{ ## pseudo-values for survival models d=SimSurv(20) f=prodlim(Hist(time,status)~1,data=d) jackknife(f,times=c(3,5)) ## in some situations it may be useful to attach the ## the event time history to the result jackknife(f,times=c(3,5),keepResponse=TRUE) # pseudo-values for competing risk models d=SimCompRisk(10) f=prodlim(Hist(time,event)~1,data=d) jackknife(f,times=c(3,10),cause=1) jackknife(f,times=c(3,10,17),cause=2) } \author{ Thomas Alexander Gerds } \references{ Andersen PK & Perme MP (2010). Pseudo-observations in survival analysis Statistical Methods in Medical Research, 19(1), 71-99. } \seealso{ \code{\link{prodlim}} } \keyword{survival} prodlim/man/model.design.Rd0000644000175100001440000001134213035633435015356 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/model.design.R \name{model.design} \alias{model.design} \title{Extract a design matrix and specials from a model.frame} \usage{ model.design(terms, data, xlev = NULL, dropIntercept = FALSE, maxOrder = 1, unspecialsDesign = TRUE, specialsFactor = FALSE, specialsDesign = FALSE) } \arguments{ \item{terms}{terms object as obtained either with function \code{terms} or \code{strip.terms}.} \item{data}{A data set in which terms are defined.} \item{xlev}{a named list of character vectors giving the full set of levels to be assumed for the factors. Can have less elements, in which case the other levels are learned from the \code{data}.} \item{dropIntercept}{If TRUE drop intercept term from the design matrix} \item{maxOrder}{An error is produced if special variables are involved in interaction terms of order higher than max.order.} \item{unspecialsDesign}{A logical value: if \code{TRUE} apply \code{\link{model.matrix}} to unspecial covariates. If \code{FALSE} extract unspecial covariates from data.} \item{specialsFactor}{A character vector containing special variables which should be coerced into a single factor. If \code{TRUE} all specials are treated in this way, if \code{FALSE} none of the specials is treated in this way.} \item{specialsDesign}{A character vector containing special variables which should be transformed into a design matrix via \code{\link{model.matrix}}. If \code{TRUE} all specials are treated in this way.} } \value{ A list which contains - the design matrix with the levels of the variables stored in attribute 'levels' - separate data.frames which contain the values of the special variables. } \description{ Extract design matrix and data specials from a model.frame } \details{ The function separates special terms from the unspecial terms and returns a list of design matrices, one for unspecial terms and one for each special. Some special specials cannot or should not be evaluated in data. E.g., \code{y~a+dummy(x)+strata(v)} the function strata can and should be evaluated, but in order to have \code{model.frame} also evaluate dummy(x) one would be to define and export the function \code{dummy}. Still the term \code{dummy(x)} can be used to identify a special treatment of the variable \code{x}. To deal with this case, one can specify \code{stripSpecials="dummy"}. In addition, the data should include variables \code{strata(z)} and \code{x}, not \code{dummy(x)}. See examples. The function \code{untangle.specials} of the survival function does a similar job. } \examples{ # specials that are evaluated. here ID needs to be defined set.seed(8) d <- data.frame(y=rnorm(5),x=factor(c("a","b","b","a","c")),z=c(2,2,7,7,7),v=sample(letters)[1:5]) d$z <- factor(d$z,levels=c(1:8)) ID <- function(x)x f <- formula(y~x+ID(z)) t <- terms(f,special="ID",data=d) mda <- model.design(terms(t),data=d,specialsFactor=TRUE) mda$ID mda$design ## mdb <- model.design(terms(t),data=d,specialsFactor=TRUE,unspecialsDesign=FALSE) mdb$ID mdb$design # set x-levels attr(mdb$ID,"levels") attr(model.design(terms(t),data=d,xlev=list("ID(z)"=1:10), specialsFactor=TRUE)$ID,"levels") # special specials (avoid define function SP) f <- formula(y~x+SP(z)+factor(v)) t <- terms(f,specials="SP",data=d) st <- strip.terms(t,specials="SP",arguments=NULL) md2a <- model.design(st,data=d,specialsFactor=TRUE,specialsDesign="SP") md2a$SP md2b <- model.design(st,data=d,specialsFactor=TRUE,specialsDesign=FALSE) md2b$SP # special function with argument f2 <- formula(y~x+treat(z,power=2)+treat(v,power=-1)) t2 <- terms(f2,special="treat") st2 <- strip.terms(t2,specials="treat",arguments=list("treat"=list("power"))) model.design(st2,data=d,specialsFactor=FALSE) model.design(st2,data=d,specialsFactor=TRUE) model.design(st2,data=d,specialsDesign=TRUE) library(survival) data(pbc) t3 <- terms(Surv(time,status!=0)~factor(edema)*age+strata(I(log(bili)>1))+strata(sex), specials=c("strata","cluster")) st3 <- strip.terms(t3,specials=c("strata"),arguments=NULL) md3 <- model.design(terms=st3,data=pbc[1:4,]) md3$strata md3$cluster f4 <- Surv(time,status)~age+const(factor(edema))+strata(sex,test=0)+prop(bili,power=1)+tp(albumin) t4 <- terms(f4,specials=c("prop","timevar","strata","tp","const")) st4 <- strip.terms(t4, specials=c("prop","timevar"), unspecials="prop", alias.names=list("timevar"="strata","prop"=c("const","tp")), arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) formula(st4) md4 <- model.design(st4,data=pbc[1:4,],specialsDesign=TRUE) md4$prop md4$timevar } \author{ Thomas A. Gerds } \seealso{ \code{\link{EventHistory.frame}} model.frame terms model.matrix .getXlevels } prodlim/man/Hist.Rd0000755000175100001440000001463513035633435013730 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Hist.R \name{Hist} \alias{Hist} \title{Create an event history response variable} \usage{ Hist(time, event, entry = NULL, id = NULL, cens.code = "0", addInitialState = FALSE) } \arguments{ \item{time}{for right censored data a numeric vector of event times -- for interval censored data a list or a data.frame providing two numeric vectors the left and right endpoints of the intervals. See \code{Details}.} \item{event}{A vector or a factor that specifies the events that occurred at the corresponding value of \code{time}. Numeric, character and logical values are recognized. It can also be a list or a data.frame for the longitudinal form of storing the data of a multi state model -- see \code{Details}.} \item{entry}{Vector of delayed entry times (left-truncation) or list of two times when the entry time is interval censored.} \item{id}{Identifies the subjects to which multiple events belong for the longitudinal form of storing the data of a multi state model -- see \code{Details}.} \item{cens.code}{A character or numeric vector to identify the right censored observations in the values of \code{event}. Defaults to "0" which is equivalent to 0.} \item{addInitialState}{If TRUE, an initial state is added to all ids for the longitudinal input form of a multi-state model.} } \value{ An object of class \code{Hist} for which there are print and plot methods. The object's internal is a matrix with some of the following columns: \item{time}{ the right censored times} \item{L}{the left endpoints of internal censored event times} \item{R}{the right endpoints of internal censored event times} \item{status}{\code{0} for right censored, \code{1} for exact, and \code{2} for interval censored event times.} \item{event}{an integer valued numeric vector that codes the events.} \item{from}{an integer valued numeric vector that codes the \code{from} states of a transition in a multi state model.} \item{to}{an integer valued numeric vector that codes the \code{to} states of a transition in a multi state model.} Further information is stored in \code{\link{attributes}}. The key to the official names given to the events and the from and to states is stored in an attribute "states". } \description{ Functionality for managing censored event history response data. The function can be used as the left hand side of a formula: \code{Hist} serves \code{\link{prodlim}} in a similar way as \code{\link{Surv}} from the survival package serves `survfit'. \code{Hist} provides the suitable extensions for dealing with right censored and interval censored data from competing risks and other multi state models. Objects generated with \code{Hist} have a print and a plot method. } \details{ *Specification of the event times* If \code{time} is a numeric vector then the values are interpreted as right censored event times, ie as the minimum of the event times and the censoring times. If \code{time} is a list with two elements or data frame with two numeric columns The first element (column) is used as the left endpoints of interval censored observations and the second as the corresponding right endpoints. When the two endpoints are equal, then this observation is treated as an exact uncensored observation of the event time. If the value of the right interval endpoint is either \code{NA} or \code{Inf}, then this observation is treated as a right censored observation. Right censored observations can also be specified by setting the value of \code{event} to \code{cens.code}. This latter specification of right censored event times overwrites the former: if \code{event} equals \code{cens.code} the observation is treated as right censored no matter what the value of the right interval endpoint is. *Specification of the events* If \code{event} is a numeric, character or logical vector then the order of the attribute "state" given to the \code{value} of \code{Hist} is determined by the order in which the values appear. If it is a factor then the order from the levels of the factor is used instead. **Normal form of a multi state model** If \code{event} is a list or a data.frame with exactly two elements, then these describe the transitions in a multi state model that occurred at the corresponding \code{time} as follows: The values of the first element are interpreted as the \code{from} states of the transition and values of the second as the corresponding \code{to} states. **Longitudinal form of a multi state model** If \code{id} is given then \code{event} must be a vector. In this case two subsequent values of \code{event} belonging to the same value of \code{id} are treated as the \code{from} and \code{to} states of the transitions. } \examples{ ## Right censored responses of a two state model ## --------------------------------------------- Hist(time=1:10,event=c(0,1,0,0,0,1,0,1,0,0)) ## change the code for events and censored observations Hist(time=1:10,event=c(99,"event",99,99,99,"event",99,"event",99,99),cens.code=99) TwoStateFrame <- SimSurv(10) SurvHist <- with(TwoStateFrame,Hist(time,status)) summary(SurvHist) plot(SurvHist) ## Right censored data from a competing risk model ## -------------------------------------------------- CompRiskFrame <- data.frame(time=1:10,event=c(1,2,0,3,0,1,2,1,2,1)) CRHist <- with(CompRiskFrame,Hist(time,event)) summary(CRHist) plot(CRHist) ## Interval censored data from a survival model icensFrame <- data.frame(L=c(1,1,3,4,6),R=c(2,NA,3,6,9),event=c(1,1,1,2,2)) with(icensFrame,Hist(time=list(L,R))) ## Interval censored data from a competing risk model with(icensFrame,Hist(time=list(L,R),event)) ## Multi state model MultiStateFrame <- data.frame(time=1:10, from=c(1,1,3,1,2,4,1,1,2,1), to=c(2,3,1,2,4,2,3,2,4,4)) with(MultiStateFrame,Hist(time,event=list(from,to))) ## MultiState with right censored observations MultiStateFrame1 <- data.frame(time=1:10, from=c(1,1,3,2,1,4,1,1,3,1), to=c(2,3,1,0,2,2,3,2,0,4)) with(MultiStateFrame1,Hist(time,event=list(from,to))) ## Using the longitudinal input method MultiStateFrame2 <- data.frame(time=c(0,1,2,3,4,0,1,2,0,1), event=c(1,2,3,0,1,2,4,2,1,2), id=c(1,1,1,1,2,2,2,2,3,3)) with(MultiStateFrame2,Hist(time,event=event,id=id)) } \author{ Thomas A. Gerds \email{tag@biostat.ku.dk}, Arthur Allignol \email{arthur.allignol@fdm.uni-freiburg.de} } \seealso{ \code{\link{plot.Hist}}, \code{\link{summary.Hist}}, \code{\link{prodlim}} } \keyword{survival} prodlim/man/neighborhood.Rd0000755000175100001440000000327113035633435015462 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/neighborhood.R \name{neighborhood} \alias{neighborhood} \title{Nearest neighborhoods for kernel smoothing} \usage{ neighborhood(x, bandwidth = NULL, kernel = "box") } \arguments{ \item{x}{Numeric vector -- typically the observations of a continuous random variate.} \item{bandwidth}{Controls the distance between neighbors in a neighborhood. It can be a decimal, i.e.\ the bandwidth, or the string `"smooth"', in which case \code{N^{-1/4}} is used, \code{N} being the sample size, or \code{NULL} in which case the \code{\link{dpik}} function of the package KernSmooth is used to find the optimal bandwidth.} \item{kernel}{Only the rectangular kernel ("box") is implemented.} } \value{ An object of class 'neighborhood'. The value is a list that includes the unique values of `x' (\code{values}) for which a neighborhood, consisting of the nearest neighbors, is defined by the first neighbor (\code{first.nbh}) of the usually very long vector \code{neighbors} and the size of the neighborhood (\code{size.nbh}). Further values are the arguments \code{bandwidth}, \code{kernel}, the total sample size \code{n} and the number of unique values \code{nu}. } \description{ Nearest neighborhoods for the values of a continuous predictor. The result is used for the conditional Kaplan-Meier estimator and other conditional product limit estimators. } \examples{ d <- SimSurv(20) neighborhood(d$X2) } \author{ Thomas Gerds } \references{ Stute, W. "Asymptotic Normality of Nearest Neighbor Regression Function Estimates", \emph{The Annals of Statistics}, 1984,12,917--926. } \seealso{ \code{\link{dpik}}, \code{\link{prodlim}} } \keyword{smooth} prodlim/man/sindex.Rd0000755000175100001440000000310413035633435014300 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sindex.R \name{sindex} \alias{sindex} \title{Index for evaluation of step functions.} \usage{ sindex(jump.times, eval.times, comp = "smaller", strict = FALSE) } \arguments{ \item{jump.times}{Numeric vector: e.g. the unique jump times of a step function.} \item{eval.times}{Numeric vector: e.g. the times where the step function should be evaluated} \item{comp}{If "greater" count the number of jump times that are greater (greater or equal when strict==FALSE) than the eval times} \item{strict}{If TRUE make the comparison of jump times and eval times strict} } \value{ Index of the same length as \code{eval.times} containing the numbers of the \code{jump.times} that are smaller than or equal to \code{eval.times}. } \description{ Returns an index of positions. Intended for evaluating a step function at selected times. The function counts how many elements of a vector, e.g. the jump times of the step function, are smaller or equal to the elements in a second vector, e.g. the times where the step function should be evaluated. } \details{ If all \code{jump.times} are greater than a particular \code{eval.time} the sindex returns \code{0}. This must be considered when sindex is used for subsetting, see the Examples below. } \examples{ test <- list(time = c(1, 1,5,5,2,7,9), status = c(1,0,1,0,1,1,0)) fit <- prodlim(Hist(time,status)~1,data=test) jtimes <- fit$time etimes <- c(0,.5,2,8,10) fit$surv c(1,fit$surv)[1+sindex(jtimes,etimes)] } \author{ Thomas A. Gerds \email{tag@biostat.ku.dk} } \keyword{misc} prodlim/man/summary.Hist.Rd0000755000175100001440000000204113035633435015410 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summary.Hist.R \name{summary.Hist} \alias{summary.Hist} \title{Summary of event histories} \usage{ \method{summary}{Hist}(object, verbose = TRUE, ...) } \arguments{ \item{object}{An object with class `Hist' derived with \code{\link{Hist}}} \item{verbose}{Logical. If FALSE any printing is supressed.} \item{\dots}{Not used} } \value{ \code{NULL} for survival and competing risk models. For other multi-state models, it is a list with the following entries: \item{states}{the states of the model} \item{transitions}{the transitions between the states} \item{trans.frame}{a data.frame with the from and to states of the transitions} } \description{ Describe events and censoring patterns of an event history. } \examples{ icensFrame <- data.frame(L=c(1,1,3,4,6),R=c(2,NA,3,6,9),event=c(1,1,1,2,2)) with(icensFrame,summary(Hist(time=list(L,R)))) } \author{ Thomas A. Gerds \email{tag@biostat.ku.dk} } \seealso{ \code{\link{Hist}}, \code{\link{plot.Hist}} } \keyword{survival} prodlim/man/getStates.Rd0000644000175100001440000000104113035633435014744 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getStates.R \name{getStates} \alias{getStates} \title{States of a multi-state model} \usage{ getStates(object, ...) } \arguments{ \item{object}{Object of class \code{prodlim} or \code{Hist} .} \item{...}{not used} } \value{ A character vector with the states of the model. } \description{ Extract the states of a multi-state model } \details{ Applying this function to the fit of prodlim means to apply it to \code{fit$model.response}. } \author{ Thomas A. Gerds } prodlim/man/summary.prodlim.Rd0000755000175100001440000001302713035633435016155 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summary.prodlim.R \name{summary.prodlim} \alias{summary.prodlim} \title{Summary method for prodlim objects.} \usage{ \method{summary}{prodlim}(object, times, newdata, max.tables = 20, surv = TRUE, cause, intervals = FALSE, percent = FALSE, showTime = TRUE, asMatrix = FALSE, ...) } \arguments{ \item{object}{An object with class `prodlim' derived with \code{\link{prodlim}}} \item{times}{Vector of times at which to return the estimated probabilities.} \item{newdata}{A data frame with the same variable names as those that appear on the right hand side of the 'prodlim' formula. Defaults to \code{object$X}.} \item{max.tables}{Integer. If \code{newdata} is not given the value of \code{max.tables} decides about the maximal number of tables to be shown. Defaults to 20.} \item{surv}{Logical. If FALSE report event probabilities instead of survival probabilities. Only available for \code{object$model=="survival"}.} \item{cause}{The cause for predicting the cause-specific cumulative incidence function in competing risk models.} \item{intervals}{Logical. If TRUE count events and censored in intervals between the values of \code{times}.} \item{percent}{Logical. If TRUE all estimated values are multiplied by 100 and thus interpretable on a percent scale.} \item{showTime}{If \code{TRUE} evaluation times are put into a column of the output table, otherwise evaluation times are shown as rownames.} \item{asMatrix}{Control the output format when there are multiple life tables, either because of covariate strata or competing causes or both. If not missing and not FALSE, reduce multiple life tables into a matrix with new columns \code{X} for covariate strata and \code{Event} for competing risks.} \item{...}{Further arguments that are passed to the print function.} } \value{ A data.frame with the relevant information. } \description{ Summarizing the result of the product limit method in life-table format. Calculates the number of subjects at risk and counts events and censored observations at specified times or in specified time intervals. } \details{ For cluster-correlated data the number of clusters at-risk are are also given. Confidence intervals are displayed when they are part of the fitted object. } \examples{ library(lava) set.seed(17) m <- survModel() distribution(m,~age) <- uniform.lvm(30,80) distribution(m,~sex) <- binomial.lvm() m <- categorical(m,~z,K=3) regression(m,eventtime~age) <- 0.01 regression(m,eventtime~sex) <- -0.4 d <- sim(m,50) d$sex <- factor(d$sex,levels=c(0,1),labels=c("female","male")) d$Z <- factor(d$z,levels=c(1,0,2),labels=c("B","A","C")) # Univariate Kaplan-Meier # ----------------------------------------------------------------------------------------- fit0 <- prodlim(Hist(time,event)~1,data=d) summary(fit0) ## show survival probabilities as percentage and ## count number of events within intervals of a ## given time-grid: summary(fit0,times=c(1,5,10,12),percent=TRUE,intervals=TRUE) ## the result of summary has a print function ## which passes ... to print and print.listof sx <- summary(fit0,times=c(1,5,10,12),percent=TRUE,intervals=TRUE) print(sx,digits=3) ## show cumulative incidences (1-survival) summary(fit0,times=c(1,5,10,12),surv=FALSE,percent=TRUE,intervals=TRUE) # Stratified Kaplan-Meier # ----------------------------------------------------------------------------------------- fit1 <- prodlim(Hist(time,event)~sex,data=d) print(summary(fit1,times=c(1,5,10),intervals=TRUE,percent=TRUE),digits=3) summary(fit1,times=c(1,5,10),asMatrix=TRUE,intervals=TRUE,percent=TRUE) fit2 <- prodlim(Hist(time,event)~Z,data=d) print(summary(fit2,times=c(1,5,10),intervals=TRUE,percent=TRUE),digits=3) ## Continuous strata (Beran estimator) # ----------------------------------------------------------------------------------------- fit3 <- prodlim(Hist(time,event)~age,data=d) print(summary(fit3, times=c(1,5,10), newdata=data.frame(age=c(20,50,70)), intervals=TRUE, percent=TRUE),digits=3) ## stratified Beran estimator # ----------------------------------------------------------------------------------------- fit4 <- prodlim(Hist(time,event)~age+sex,data=d) print(summary(fit4, times=c(1,5,10), newdata=data.frame(age=c(20,50,70),sex=c("female","male","male")), intervals=TRUE, percent=TRUE),digits=3) print(summary(fit4, times=c(1,5,10), newdata=data.frame(age=c(20,50,70),sex=c("female","male","male")), intervals=TRUE,collapse=TRUE, percent=TRUE),digits=3) ## assess results from summary x <- summary(fit4,times=10,newdata=expand.grid(age=c(60,40,50),sex=c("male","female"))) cbind(names(x$table),do.call("rbind",lapply(x$table,round,2))) x <- summary(fit4,times=10,newdata=expand.grid(age=c(60,40,50),sex=c("male","female"))) ## Competing risks: Aalen-Johansen # ----------------------------------------------------------------------------------------- d <- SimCompRisk(30) crfit <- prodlim(Hist(time,event)~X1,data=d) summary(crfit,times=c(1,2,5)) summary(crfit,times=c(1,2,5),cause=1,intervals=TRUE) summary(crfit,times=c(1,2,5),cause=1,asMatrix=TRUE) summary(crfit,times=c(1,2,5),cause=1:2,asMatrix=TRUE) # extract the actual tables from the summary sumfit <- summary(crfit,times=c(1,2,5),print=FALSE) sumfit$table[[1]] # cause 1 sumfit$table[[2]] # cause 2 # ' } \author{ Thomas A. Gerds \email{tag@biostat.ku.dk} } \seealso{ \code{\link{prodlim}}, \code{\link{summary.Hist}} } \keyword{survival} prodlim/man/getEvent.Rd0000755000175100001440000000212413035633435014570 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getEvent.R \name{getEvent} \alias{getEvent} \title{Extract a column from an event history object.} \usage{ getEvent(object, mode = "factor", column = "event") } \arguments{ \item{object}{Object of class \code{"Hist"}.} \item{mode}{Return mode. One of \code{"numeric"}, \code{"character"}, or \code{"factor"}.} \item{column}{Name of the column to extract from the object.} } \description{ Extract a column from an event history object, as obtained with the function \code{\link{Hist}}. } \details{ Since objects of class \code{"Hist"} are also matrices, all columns are numeric or integer valued. To extract a correctly labeled version, the attribute \code{states} of the object is used to generate factor levels. } \examples{ dat= data.frame(time=1:5,event=letters[1:5]) x=with(dat,Hist(time,event)) ## inside integer unclass(x) ## extract event (the extra level "unknown" is for censored data) getEvent(x) } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{Hist}} } \keyword{survival} prodlim/man/meanNeighbors.Rd0000755000175100001440000000120613035633435015570 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/meanNeighbors.R \name{meanNeighbors} \alias{meanNeighbors} \title{Helper function to obtain running means for prodlim objects.} \usage{ meanNeighbors(x, y, ...) } \arguments{ \item{x}{Object of class \code{"neighborhood"}.} \item{y}{Vector of numeric values.} \item{\dots}{Not used.} } \description{ Compute average values of a variable according to neighborhoods. } \examples{ meanNeighbors(x=1:10,y=c(1,10,100,1000,1001,1001,1001,1002,1002,1002)) } \author{ Thomas Alexander Gerds } \seealso{ \code{\link{neighborhood}} } \keyword{survival} prodlim/man/strip.terms.Rd0000644000175100001440000000666713035633435015316 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/strip.terms.R \name{strip.terms} \alias{strip.terms} \title{Strip special functions from terms} \usage{ strip.terms(terms, specials, alias.names = NULL, unspecials = NULL, arguments, keep.response = TRUE) } \arguments{ \item{terms}{Terms object} \item{specials}{Character vector of specials which should be stripped off} \item{alias.names}{Optional. A named list with alias names for the specials.} \item{unspecials}{Optional. A special name for treating all the unspecial terms.} \item{arguments}{A named list of arguments, one for each element of specials. Elements are passed to \code{parseSpecialNames}.} \item{keep.response}{Keep the response in the resulting object?} } \value{ Reformulated terms object with an additional attribute which contains the \code{stripped.specials}. } \description{ Reformulate a terms object such that some specials are stripped off } \details{ This function is used to remove special specials, i.e., those which cannot or should not be evaluated. IMPORTANT: the unstripped terms need to know about all specials including the aliases. See examples. } \examples{ ## parse a survival formula and identify terms which ## should be treated as proportional or timevarying: f <- Surv(time,status)~age+prop(factor(edema))+timevar(sex,test=0)+prop(bili,power=1) tt <- terms(f,specials=c("prop","timevar")) attr(tt,"specials") st <- strip.terms(tt,specials=c("prop","timevar"),arguments=NULL) formula(st) attr(st,"specials") attr(st,"stripped.specials") ## provide a default value for argument power of proportional treatment ## and argument test of timevarying treatment: st2 <- strip.terms(tt, specials=c("prop","timevar"), arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) formula(st2) attr(st2,"stripped.specials") attr(st2,"stripped.arguments") ## treat all unspecial terms as proportional st3 <- strip.terms(tt, unspecials="prop", specials=c("prop","timevar"), arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) formula(st3) attr(st3,"stripped.specials") attr(st3,"stripped.arguments") ## allow alias names: strata for timevar and tp, const for prop. ## IMPORTANT: the unstripped terms need to know about ## all specials including the aliases f <- Surv(time,status)~age+const(factor(edema))+strata(sex,test=0)+prop(bili,power=1)+tp(albumin) tt2 <- terms(f,specials=c("prop","timevar","strata","tp","const")) st4 <- strip.terms(tt2, specials=c("prop","timevar"), unspecials="prop", alias.names=list("timevar"="strata","prop"=c("const","tp")), arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) formula(st4) attr(st4,"stripped.specials") attr(st4,"stripped.arguments") ## test if alias works also without unspecial argument st5 <- strip.terms(tt2, specials=c("prop","timevar"), alias.names=list("timevar"="strata","prop"=c("const","tp")), arguments=list("prop"=list("power"=0),"timevar"=list("test"=0))) formula(st5) attr(st5,"stripped.specials") attr(st5,"stripped.arguments") library(survival) data(pbc) model.design(st4,data=pbc[1:3,],specialsDesign=TRUE) model.design(st5,data=pbc[1:3,],specialsDesign=TRUE) } \author{ Thomas A. Gerds } \seealso{ parseSpecialNames reformulate drop.terms } prodlim/man/parseSpecialNames.Rd0000644000175100001440000000421313035633435016404 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/parseSpecialNames.R \name{parseSpecialNames} \alias{parseSpecialNames} \title{Parse special terms} \usage{ parseSpecialNames(x, special, arguments) } \arguments{ \item{x}{Vector of character strings.} \item{special}{A character string: the name of the special argument.} \item{arguments}{A vector which contains the arguments of the special function} } \value{ A named list of parsed arguments. The names of the list are the special variable names, the elements are lists of arguments. } \description{ Extract from a vector of character strings the names of special functions and auxiliary arguments } \details{ Signals an error if an element has more arguments than specified by argument arguments. } \examples{ ## ignore arguments parseSpecialNames("treat(Z)",special="treat") ## set default to 0 parseSpecialNames(c("log(Z)","a","log(B)"),special="log",arguments=list("base"=0)) ## set default to 0 parseSpecialNames(c("log(Z,3)","a","log(B,base=1)"),special="log",arguments=list("base"=0)) ## different combinations of order and names parseSpecialNames(c("log(Z,3)","a","log(B,1)"), special="log", arguments=list("base"=0)) parseSpecialNames(c("log(Z,1,3)","a","log(B,u=3)"), special="log", arguments=list("base"=0,"u"=1)) parseSpecialNames(c("log(Z,u=1,base=3)","a","log(B,u=3)"), special="log", arguments=list("base"=0,"u"=1)) parseSpecialNames(c("log(Z,u=1,base=3)","a","log(B,base=8,u=3)"), special="log", arguments=list("base"=0,"u"=1)) parseSpecialNames("treat(Z,u=2)", special="treat", arguments=list("u"=1,"k"=1)) parseSpecialNames(c("treat(Z,1,u=2)","treat(B,u=2,k=3)"), special="treat", arguments=list("u"=NA,"k"=NULL)) ## does not work to set default to NULL: parseSpecialNames(c("treat(Z,1,u=2)","treat(B,u=2)"), special="treat", arguments=list("u"=NA,"k"=NULL)) } \author{ Thomas A. Gerds } \seealso{ model.design }