npsurv/0000755000176200001440000000000013170333554011613 5ustar liggesusersnpsurv/NAMESPACE0000644000176200001440000000115112620714430013023 0ustar liggesusers# exportPattern("^[^\\.]") export("icendata","is.icendata","npsurv","Deltamatrix","idf","km","plotsurvidf","plotgradidf","Uhaz","uh","hazuh","chazuh","survuh","denuh","plothazuh","plotchazuh","plotsurvuh","plotdenuh","plotgraduh","logLikuh") import(lsei) importFrom("grDevices", "col2rgb", "hsv", "rgb2hsv") importFrom("graphics", "abline", "hist", "lines", "plot", "points", "segments", "legend", "rect") importFrom("methods", "getFunction") importFrom("stats", "aggregate", "weighted.mean") S3method(print, idf) S3method(plot, idf) S3method(plot, npsurv) S3method(print, uh) S3method(plot, uh) S3method(plot, Uhaz) npsurv/data/0000755000176200001440000000000012617511361012523 5ustar liggesusersnpsurv/data/ap.rda0000644000176200001440000000044112617434066013620 0ustar liggesusersŒN@ۢP@14@0p ȅ '#+߸z`otfhWOD\H=!R"ʂv$n9 2{cqPۚ 8'>_qI|-߉eqh~7_` VgA4ȌTA\Jm##=|!Dt6hٲm4vzf{m{Sl+OMNj>gX(QFCnpsurv/data/leukemia.rda0000644000176200001440000000045712617434066015023 0ustar liggesusers r0b```b`fbb`b2Y# 'I-NLd``A XA p2PZ JjPZ J@iC(m M6ҖPv ` U9-F+h 4Zha 0ˠUPpqpECs.u Wf^  F`&cǖZS d e!,fP6{@NbrjR>Fb>ii%E@?ty0PAF#,(M/gQ~L?(A2GsWJbI^ZP ) FAnpsurv/data/marijuana.rda0000644000176200001440000000033012617434066015164 0ustar liggesusers r0b```b`fbb`b2Y# 'M,*MK 3TրZhFцh6a.9p9poGa4v>\;a@g^`` / TsJf.eQb [ B0APkr~i^ T?EI3npsurv/data/gastric.rda0000644000176200001440000000072212617434066014656 0ustar liggesusersһkSa''mAI{k5jժV*^E6v*Nv;88yE;::8o~d2sN='Oؕ-f%EJG4M_)5)CL^_LZh6tяӸO__Xa]">o&l/KJk fJ+{]{~q< I,?Yx^k>~Ǿ[-'V7<,'}~}|׫vzfpϟ5W 󭞾oO[>+Z>ad6 Ujǫ]Gcj 9(4\L%SCַ/vh7`/a?^H?:cG mM8))Y`gqp!%AE8+`t*& 1;Ĺ Ϫz$unpsurv/data/acfail.rda0000644000176200001440000000062112617434066014437 0ustar liggesusers*Dqep!.]H$Iv>f 4))y#Q.='j ٵ7dWMFD<)< /) L3\> ߷Oy U%_Iٌ-%l-ahGarC/hlġ% $N17E=-ŝYs?O%_))2]>v5 lcqΏ''۠a䛘m|nC5{b;d?H?~9C2gr_G#rcX?=徳/V˗+5\S\Drk=gsO "MNXnpsurv/data/nzmort.rda0000644000176200001440000000245012617434066014553 0ustar liggesuserswSUIQSQQ"T(  i!m6HƂpggy8ϊ_']~}y,{>2y{e9 'k4L@ PNNSӨ'PSOJ33gPϤE=:z\lyssaռJJ**kk/_H~"/_J>B Sxi#FGGGGǘǘǘǘ71obļy3f̛0oa¼y+V[Ǚۜcw:VԂM}03ߣ^1gۙ3ogμy=p'O3uI{cs|<=֥^8͵~mN7^Œ!3O ]tuB{Q=^itQbS^~q[z3Ok=EBx]?>>t]uKsߋgUۿ~N3N5U}~= :>}NԺ OYuTycf4^U3N>U~~7yWq6] w2b.$}nw+9.󬤦7H]"Nus|PMԑNGpp:ph/f渡n8o5Nx)^gyi'i_K]^TSZ_Vʴޗƍ7o'RdJj<;uG%24ҙSw:xe\&vD_RyПXԙ=Dw@]2כ^m11YUW+er!{I@y=r 83ҬÁ,RG#Q@0@6iy<Ȧ$ SlJO(B@6,eMle(H#&./( 4s4fix)KÖf+ Yz&Ml1Kp@py+//"A^4eBBp.=@K@?@> U@e` X ց@eF06aQlUjp \7Mfp nw;]npAxH?}<O'Si<ρ Ex^M(;]x|>ҷ?Ogs_y6߂# ~\J˹'KT,ˢ%npsurv/data/cancer.rda0000644000176200001440000000066312617434066014461 0ustar liggesusersՕJ@Ƿ*C)jlMmC'!Km%z4ff! $3mv*N^!%ZfzdDVW}ֆ&j>p MqVGS9J/3JX'zl]"q839E.m>DX6ДyF|_g̷ffǴu~DS82D"@suXmᾦ}QǛ^$>!r=q>K?6|N#.~'s=^ar}o_ڮ9_=1寉%q/x zC*Loh-Ѧt!{t=4Pu1FE/\6{ / 6OFFgbhp7n\7Z<'npsurv/R/0000755000176200001440000000000013170234234012007 5ustar liggesusersnpsurv/R/icendata.R0000644000176200001440000000356212620006324013704 0ustar liggesusers######################## # # Interval-censored data # ######################## # # Also allows for exact observations included. icendata = function(x, w=1) { if(is.null(x)) return(NULL) if(is.icendata(x)) { if(all(w == 1)) return(x) w = rep(w, length = length(x$t) + nrow(x$o)) if(length(x$t) > 0) x$wt = x$wt * w[1:length(x$wt)] if(nrow(x$o) > 0) x$wo = x$wo * w[length(x$wt)+1:nrow(x$o)] return(x) } z = vector("list", 7) names(z) = c("t", "wt", "o", "wo", "i1", "upper", "u") if(is.vector(x)) x = cbind(x, x) if(!is.matrix(x)) x = as.matrix(x) if(ncol(x) == 3) {w = w * x[,3]; x = x[,1:2]} if(length(w) != nrow(x)) w = rep(w, len=nrow(x)) iw = w > 0 w = w[iw] x = x[iw,,drop=FALSE] o = order(x[,1], x[,2]) x = x[o,] w = w[o] id = c(TRUE, diff(x[,1]) > 0 | diff(x[,2]) > 0) id[is.na(id)] = FALSE # for Inf's w = aggregate(w, by=list(group=cumsum(id)), sum)[,2] x = x[id,] i = x[,1] == x[,2] z$t = x[i,1] names(z$t) = NULL z$wt = w[i] z$o = x[!i,1:2,drop=FALSE] dimnames(z$o) = list(NULL, c("L","R")) z$wo = w[!i] z$upper = max(x[,1]) z$i1 = z$t != z$upper z$u = sort(unique(c(0, pmin(c(x[,1], x[,2]), z$upper)))) class(z) = "icendata" z } is.icendata = function(x) "icendata" %in% class(x) # is.rightcensored.icendata = function(x) all(x$o[,2] == Inf) expand.icendata = function(x) { if(!is.icendata(x)) x = icendata(x) z = vector("list", 7) names(z) = c("t", "wt", "o", "wo", "i1", "upper", "u") z$upper = x$upper if(length(x$t) > 0) { z$t = rep(x$t, x$wt) z$wt = rep(1, length(z$t)) z$i1 = z$t != z$upper } else z$t = z$wt = numeric(0) if(nrow(x$o) > 0) { z$o = cbind(rep(x$o[,1], x$wo), rep(x$o[,2], x$wo)) z$wo = rep(1, nrow(z$o)) colnames(z$o) = c("L","R") } else {z$o = matrix(nrow=0, ncol=2); z$wo = numeric(0)} z$u = x$u class(z) = "icendata" z } npsurv/R/npsurv.R0000644000176200001440000004061413170234212013470 0ustar liggesusers# ----------------------------------------------------------------------- # # Nonparametric maximum likelihood estimation from interval-censored data # # ----------------------------------------------------------------------- # npsurv = function(data, w=1, maxit=100, tol=1e-6, verb=0) { x2 = icendata(data, w) if(nrow(x2$o) == 0 || all(x2$o[,2] == Inf)) { # exact or right-censored only r0 = km(x2) r = list(f=r0$f, upper=max(x2$t, x2$o[,1]), convergence=TRUE, ll=r0$ll, maxgrad=0, numiter=1) return(structure(r, class="npsurv")) } x = rbind(cbind(x2$t, x2$t), x2$o) nx = nrow(x) w = c(x2$wt, x2$wo) wr = sqrt(w) n = sum(w) upper = x2$upper dmat = Deltamatrix(x) left = dmat$left right = dmat$right D = dmat$Delta m = length(left) p = double(m) i = rowSums(D) != 1 j = colSums(D[!i,,drop=FALSE]) > 0 j[c(1,m)] = TRUE repeat { # Initial p must ensure P > 0 jm = which.max(colSums(D[i,,drop=FALSE])) j[jm] = TRUE i[D[,jm]] = FALSE if( sum(i) == 0 ) break } p = colSums(w * D) * j p = p / sum(p) if(m >= 30) { ## Turn to HCNM r = hcnm(w=w, D=D, p0=p, maxit=maxit, tol=tol, verb=verb) j = r$pf > 0 f = idf(left[j], right[j], r$pf[j]) r = list(f=f, upper=upper, convergence=r$convergence, method="hcnm", ll=r$ll, maxgrad=r$maxgrad, numiter=r$numiter) return(structure(r, class="npsurv")) } P = drop(D %*% p) ll = sum( w * log(P) ) converge = FALSE for(i in 1:maxit) { p.old = p ll.old = ll S = D / P ## d = crossprod(w, S)[1,] d = colSums(w * S) dmax = max(d) - n if(verb > 0) { cat("##### Iteration", i, "#####\n") cat("Log-likelihood: ", signif(ll, 6), "\n") } if(verb > 1) cat("Maximum gradient: ", signif(dmax, 6), "\n") if(verb > 2) {cat("Probability vector:\n"); print(p)} j[which(j)-1 + aggregate(d, by=list(group=cumsum(j)), which.max)[,2]] = TRUE pj = pnnls(wr * S[,j,drop=FALSE], wr * 2, sum=1)$x p[j] = pj / sum(pj) alpha = 1 # line search pd = p - p.old lld = sum(d * pd) p.alpha = p repeat { P.alpha = drop(D %*% p.alpha) ll.alpha = sum(w * log(P.alpha)) if(ll.alpha >= ll + alpha * lld * .33) { p = p.alpha; P = P.alpha; ll = ll.alpha; break } if((alpha <- alpha * .5) < 1e-10) break p.alpha = p.old + alpha * pd } j = p > 0 if( ll <= ll.old + tol ) {converge=TRUE; break} } f = idf(left[j], right[j], p[j]) r = list(f=f, upper=upper, convergence=converge, method="cnm", ll=ll, maxgrad=max(crossprod(w/P, D))-n, numiter=i) structure(r, class="npsurv") } # LR matrix of intervals # An interval is either (Li, Ri] if Li < Ri, or [Li, Ri] if Li = Ri. Deltamatrix = function(LR) { L = LR[,1] R = LR[,2] ic = L != R # inverval-censored nc = sum(ic) # tol = max(R[R!=Inf]) * 1e-8 if(nc > 0) { L1 = L[ic] + max(R[R!=Inf]) * 1e-8 # open left endpoints LRc = cbind(c(L1, R[ic]), c(rep(0,nc), rep(1,nc)), rep(1:nc, 2)) LRc.o = LRc[order(LRc[,1]),] j = which(diff(LRc.o[,2]) == 1) left = L[ic][LRc.o[j,3]] right = R[ic][LRc.o[j+1,3]] } else left = right = numeric(0) if(nrow(LR) - nc > 0) { ut = unique(L[!ic]) jin = colSums(outer(ut, left, ">") & outer(ut, right, "<=")) > 0 left = c(ut, left[!jin]) # remove those that contain exact obs. right = c(ut, right[!jin]) o = order(left, right) left = left[o] right = right[o] } ## D = outer(L, left, "<=") & outer(R, right, ">=") D = outer(L, left, "<=") & outer(R, right, ">=") & (outer(L, right, "<") | outer(R, left, "==")) dimnames(D) = names(left) = names(right) = NULL list(left=left, right=right, Delta=D) } # interval distribution function, i.e., a distribution function defined on # a set of intervals. # left Left endpoints of the intervals # right Right endpoints of the intervals # p Probability masses allocated to the intervals idf = function(left, right, p) { if(length(left) != length(right)) stop("length(left) != length(right)") names(left) = names(right) = names(p) = NULL p = rep(p, length=length(left)) f = list(left=left, right=right, p=p/sum(p)) structure(f, class="idf") } print.idf = function(x, ...) { print(cbind(left=x$left, right=x$right, p=x$p), ...) } # Kaplan-Meier estimate of the survival function for right-censored data km = function(data, w=1) { x = icendata(data, w) if(any(x$o[,2] != Inf)) stop("Not all observations are exact or right-censored") if(nrow(x$o) == 0) { # no right-censored observations f = idf(x$t, x$t, x$wt) ll = sum(x$wt * log(f$p)) return(list(f=f, ll=ll)) } c = colSums(x$wo * outer(x$o[,1], x$t, "<")) n = sum(x$wt, x$wo) # number of observations r = n - c - c(0,cumsum(x$wt))[1:length(x$t)] # no. at risk S = cumprod(1 - x$wt/r) # survival prob. # tab = cbind(x$t, x$wt, c, r, S) p = rev(diff(rev(c(1,S,0)))) dc = x$wt + c if(max(x$t) > max(x$o[,1])) { f = idf(x$t, x$t, p[-length(p)]) ll = sum( x$wt * log(f$p) ) } else { f = idf(c(x$t,max(x$o[,1])), c(x$t,Inf), p) ll = sum(c(x$wt, n - sum(x$wt)) * log(f$p)) } list(f=f, ll=ll) } #### Plot functions plot.npsurv = function(x, ...) plot(x$f, ...) plot.idf = function(x, data, fn=c("surv","grad"), ...) { fn = match.arg(fn) fnR = getFunction(paste("plot",fn,"idf",sep="")) switch(fn, "surv" = fnR(x, ...), "grad" = fnR(x, data, ...) ) } plotgradidf = function(f, data, w=1, col1="red3", col2="blue3", xlab="Survival Time", ylab="Gradient", xlim, ...) { x2 = icendata(data, w) x = rbind(cbind(x2$t, x2$t), x2$o) w = c(x2$wt, x2$wo) dmat = Deltamatrix(x) D = dmat$Delta if(missing(xlim)) { upper = max(dmat$left, dmat$right[f$right 0 ms = sum(j) points(dmat$left[!j], rep(0,m-ms), pch=1, col=col2, cex=1) points(dmat$right[!j], rep(0, m-ms), pch=20, col=col2, cex=.8) segments(dmat$left[!j], rep(0, m-ms), pmin(dmat$right[!j], xlim[2]), rep(0, m-ms), col=col2, lwd=3) points(dmat$left[j], rep(0,ms), pch=1, col=col1, cex=1) points(dmat$right[j], rep(0, ms), pch=20, col=col1, cex=.8) segments(dmat$left[j], rep(0, ms), pmin(dmat$right[j], xlim[2]), rep(0, ms), col=col1, lwd=3) } plotsurvidf = function(f, style=c("box","uniform","left","right","midpoint"), xlab="Time", ylab="Survival Probability", col="blue3", fill=0, add=FALSE, lty=1, lty.inf=2, xlim, ...) { style = match.arg(style) k = length(f$left) S = 1 - cumsum(f$p) upper = max(f$left, f$right[f$right != Inf]) if(max(f$right) == Inf) point.inf = upper * 1.2 else point.inf = upper if( missing(xlim) ) xlim = c(0, point.inf) m = length(f$p) if(!is.na(fill) && fill==0) { fill.hsv = drop(rgb2hsv(col2rgb(col))) * c(1, .3, 1) fill = hsv(fill.hsv[1], fill.hsv[2], fill.hsv[3], .3) } switch(style, box = { d = c(f$left[1], rep(f$right, rep(2,k)), f$right[k]) # right s = rep(c(1,S), rep(2,k+1)) if(f$right[k] == Inf) d[2*k] = upper else d[2*k+2] = upper if( !add ) plot(d, s, type="n", col=col, xlim=xlim, xlab=xlab, ylab=ylab, lty=lty, ...) if(style == "box") { Sc = c(1, S) j = which(f$right > f$left) rect(f$left[j], Sc[j+1], f$right[j], Sc[j], border=col, col=fill) } lines(d, s, col=col, lty=lty, ...) lines(c(upper, point.inf), c(S[k-1],S[k-1]), col=col, lty=lty.inf) if(f$right[k] != Inf) { # left d = rep(c(f$left,f$right[k]), rep(2,k+1)) s = c(1,rep(S, rep(2,k)),0) } else { d = rep(f$left, c(rep(2,k-1), 1)) s = c(1,rep(S[-k], rep(2,k-1))) } add = TRUE }, left = { d = rep(c(f$left,f$right[k]), rep(2,k+1)) s = c(1,rep(S, rep(2,k)),0) d[2*k+2] = upper }, right = { d = c(f$left[1], rep(f$right, rep(2,k)), f$right[k]) s = rep(c(1,S), rep(2,k+1)) if(f$right[k] == Inf) d[2*k] = upper else d[2*k+2] = upper }, midpoint = { d1 = (f$left + f$right) / 2 d = c(f$left[1], rep(d1, rep(2,k)), f$right[k]) if(f$right[k] == Inf) d[2*k] = upper else d[2*k+2] = upper s = rep(c(1,S), rep(2,k+1)) }, uniform = { d = c(rbind(f$left,f$right), rep(f$right[k],2)) if(f$right[k] == Inf) d[2*k] = upper else d[2*k+2] = upper s = c(1,rep(S, rep(2,k)),S[k]) } ) if( add ) lines(d, s, col=col, lty=lty, ...) else plot(d, s, type="l", col=col, xlim=xlim, xlab=xlab, ylab=ylab, lty=lty, ...) abline(h=0, col="black") lines(c(0,f$left[1]), c(1,1), col=col) if(f$right[k] < Inf) lines(c(upper, point.inf), rep(0,2), col=col, lty=lty) else points(upper, S[k-1], col=col, pch=20) } ## ========================================================================== ## Hierarchical CNM: a variant of the Constrained Newton Method for finding ## the NPMLE survival function of a data set containing interval censoring. ## This is a new method to build on those in the Icens and MLEcens ## packages. It uses the idea of block subsets of the S matrix to move ## probability mass among blocks of candidate support intervals. ## ## Usage (parameters and return value) is similar to the methods in package ## Icens, although note the transposed clique matrix. ## ## Arguments: ## data: Data ## w: Weights ## D: Clique matrix, n*m (note, transposed c.f. Icens::EMICM, ## MLEcens::reduc). The clique matrix may contain conditional ## probabilities rather than just membership flags, for use in HCNM ## recursively calling itself. ## p0: Vector (length m) of initial estimates for the probabilities of ## the support intervals. ## maxit: Maximum number of iterations to perform ## tol: Tolerance for the stopping condition (in log-likelihood value) ## blockpar: ## NA or NULL means choose a value based on the data (using n and r) ## ==0 means same as cnm (don't do blocks) ## <1 means nblocks is this power of sj, e.g. 0.5 for sqrt ## >1 means exactly this block size (e.g. 40) ## recurs.maxit: For internal use only: maximum number of iterations in ## recursive calls ## depth: For internal use only: depth of recursion ## verb: For internal use only: depth of recursion ## Author: Stephen S. Taylor and Yong Wang ## Reference: Wang, Y. and Taylor, S. M. (2013). Efficient computation of ## nonparametric survival functions via a hierarchical mixture ## formulation. Statistics and Computing, 23, 713-725. ## ========================================================================== hcnm = function(data, w=1, D=NULL, p0=NULL, maxit=100, tol=1e-6, blockpar=NULL, recurs.maxit=2, depth=1, verb=0) { if(missing(D)) { x2 = icendata(data, w) if(nrow(x2$o) == 0 || all(x2$o[,2] == Inf)) { # exact or right-censored only r0 = km(x2) r = list(f=r0$f, convergence=TRUE, ll=r0$ll, maxgrad=0, numiter=1) class(r) = "npsurv" return(r) } x = rbind(cbind(x2$t, x2$t), x2$o) nx = nrow(x) w = c(x2$wt, x2$wo) dmat = Deltamatrix(x) left = dmat$left right = dmat$right intervals = cbind(left, right) D = dmat$Delta } else { if (missing(p0)) stop("Must provide 'p0' with D.") if (!missing(data)) warning("D and data both provided. LR ignored!") nx = nrow(D) w = rep(w, length=nx) intervals = NULL } n = sum(w) wr = sqrt(w) converge = FALSE m = ncol(D) m1 = 1:m nblocks = 1 maxdepth = depth i = rowSums(D) == 1 r = mean(i) # Proportion of exact observations if(is.null(p0)) { ## Derive an initial p vector. j = colSums(D[i,,drop=FALSE]) > 0 while(any(c(FALSE,(i <- rowSums(D[,j,drop=FALSE])==0)))) { j[which.max(colSums(D[i,,drop=FALSE]))] = TRUE } p = colSums(w * D) * j } else { if(length(p <- p0) != m) stop("Argument 'p0' is the wrong length.") } p = p / sum(p) P = drop(D %*% p) ll = sum(w * log(P)) evenstep = FALSE for(iter in 1:maxit) { p.old = p ll.old = ll S = D / P g = colSums(w * S) dmax = max(g) - n if(verb > 0) { cat("##### Iteration", i, "#####\n") cat("Log-likelihood: ", signif(ll, 6), "\n") } if(verb > 1) cat("Maximum gradient: ", signif(dmax, 6), "\n") if(verb > 2) {cat("Probability vector:\n"); print(p)} j = p > 0 if(depth==1) { s = unique(c(1,m1[j],m)) if (length(s) > 1) for (l in 2:length(s)) { j[s[l-1] + which.max(g[s[l-1]:s[l]]) - 1] = TRUE } } sj = sum(j) ## BW: matrix of block weights: sj rows, nblocks columns if(is.null(blockpar) || is.na(blockpar)) ## Default blockpar based on log(sj) iter.blockpar = ifelse(sj < 30, 0, 1 - log(max(20,10*log(sj/100)))/log(sj)) else iter.blockpar = blockpar if(iter.blockpar==0 | sj < 30) { nblocks = 1 BW = matrix(1, nrow=sj, ncol=1) } else { nblocks = max(1, if(iter.blockpar>1) round(sj/iter.blockpar) else floor(min(sj/2, sj^iter.blockpar))) i = seq(0, nblocks, length=sj+1)[-1] if(evenstep) { nblocks = nblocks + 1 BW = outer(round(i)+1, 1:nblocks, "==") } else BW = outer(ceiling(i), 1:nblocks, "==") storage.mode(BW) = "numeric" } for(block in 1:nblocks) { jj = logical(m) jj[j] = BW[,block] > 0 sjj = sum(jj) if (sjj > 1 && (delta <- sum(p.old[jj])) > 0) { Sj = S[,jj] res = pnnls(wr * Sj, wr * drop(Sj %*% p.old[jj]) + wr, sum=delta) if (res$mode > 1) warning("Problem in pnnls(a,b)") p[jj] = p[jj] + BW[jj[j],block] * (res$x * (delta / sum(res$x)) - p.old[jj]) } } ## Maximise likelihood along the line between p and p.old p.gap = p - p.old # vector from old to new estimate ## extrapolated rise in ll, based on gradient at old estimate ll.rise.gap = sum(g * p.gap) alpha = 1 p.alpha = p ll.rise.alpha = ll.rise.gap repeat { P = drop(D %*% p.alpha) ll = sum(w * log(P)) if(ll >= ll.old && ll + ll.rise.alpha <= ll.old) { p = p.alpha # flat land reached converge = TRUE break } if(ll > ll.old && ll >= ll.old + ll.rise.alpha * .33) { p = p.alpha # Normal situation: new ll is higher break } if((alpha <- alpha * 0.5) < 1e-10) { p = p.old P = drop(D %*% p) ll = ll.old converge = TRUE break } p.alpha = p.old + alpha * p.gap ll.rise.alpha = alpha * ll.rise.gap } if(converge) break if (nblocks > 1) { ## Now jiggle p around among the blocks Q = sweep(BW,1,p[j],"*") # Matrix of weighted probabilities: [sj,nblocks] q = colSums(Q) # its column sums (total in each block) ## Now Q is n*nblocks Matrix of probabilities for mixture components Q = sweep(D[,j] %*% Q, 2, q, "/") if (any(q == 0)) { warning("A block has zero probability!") } else { ## Recursively call HCNM to allocate probability among the blocks res = hcnm(w=w, D=Q, p0=q, blockpar=iter.blockpar, maxit=recurs.maxit, recurs.maxit=recurs.maxit, depth=depth+1) maxdepth = max(maxdepth, res$maxdepth) if (res$ll > ll) { p[j] = p[j] * (BW %*% (res$pf / q)) P = drop(D %*% p) ll = sum(w * log(P)) # should match res$lval } } } if(iter > 2) if( ll <= ll.old + tol ) {converge=TRUE; break} evenstep = !evenstep } list(pf=p, intervals=intervals, convergence=converge, method="hcnm", ll=ll, maxgrad=max(crossprod(w/P, D))-n, numiter=iter) } npsurv/R/Uhaz.R0000644000176200001440000007142412764617531013066 0ustar liggesusers############################################ # Estimation of a U-shaped Hazard Function # ############################################ Uhaz = function(data, w=1, deg=1, maxit=100, tol=1e-6, verb=0) { x = icendata(data, w) h = uh.initial(x, deg) attr(h, "ll") = logLikuh(h, x) expdH = NULL bc = TRUE # boundary change convergence = 1 for(i in 1:maxit){ h.old = h if(nrow(x$o) > 0) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) maxima = maxgrad(h, x, expdH, bc=bc) np1 = maxima$np1 np2 = maxima$np2 h = uh(h$alpha, c(h$tau, np1), c(h$nu, double(length(np1))), c(h$eta, np2), c(h$mu, double(length(np2))), h$upper, h$deg, collapse=TRUE) r = updatemass(h, x, expdH, tol=tol) h = r$h if(h$deg == 0) {h = simplify(h); attr(h, "ll") = logLikuh(h, x)} if(verb>0) { cat("##### Iteration", i, "#####\n") cat("Log-likelihood: ", signif(attr(h,"ll"), 6), "\n") if(verb>1) cat("Gradient values: ", signif(dlogLik(h, x), 6), "\n") if(verb>2) {cat("hazard function:\n"); print(h)} } if(r$convergence == 1) bc = FALSE # backtracking failed. else if(attr(h, "ll") <= attr(h.old, "ll") + tol) {convergence = 0; break} } r = list(convergence=convergence, grad=dlogLik(h, x), numiter=i, ll=attr(h, "ll"), h=h) class(r) = "Uhaz" r } # Update masses updatemass = function(h, x, expdH=NULL, tol=1e-10) { tau = h$tau k = length(tau) j2 = h$eta != h$upper eta = h$eta = h$eta[j2] h$mu = h$mu[j2] m = length(eta) p = h$deg D1 = D2 = NULL t1 = x$t[x$i1] n1 = length(t1) if(n1 > 0) { tau.r = rep(tau, rep.int(n1,k)) dim(tau.r) = c(n1, k) if(p > 0) tau.t = pmax(tau.r - t1, 0) if(m > 0) { eta.r = rep(eta, rep.int(n1,m)) dim(eta.r) = c(n1, m) if(p > 0) t.eta = pmax(t1 - eta.r, 0) } D1 = switch(as.character(p), "0" = cbind(1, tau.r >= t1, if(m>0) t1 >= eta.r else NULL) / hazuh(t1, h), "1" = cbind(1, tau.t, if(m>0) t.eta else NULL) / hazuh(t1, h), "2" = cbind(1, tau.t * tau.t, if(m>0) t.eta * t.eta else NULL) / hazuh(t1, h), cbind(1, tau.t^p, if(m>0) t.eta^p else NULL) / hazuh(t1, h) ) } n2 = nrow(x$o) if(n2 > 0) { if(is.null(expdH)) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) delta = sqrt(expdH) / (1 - expdH) tau.r1 = rep(tau, rep.int(n2,k)) dim(tau.r1) = c(n2, k) tau.x1 = pmax(tau.r1 - x$o[,1], 0) tau.x2 = pmax(tau.r1 - x$o[,2], 0) xd0 = x$o[,1] - x$o[,2] xd1 = switch(as.character(p), "0" = tau.x2 - tau.x1, "1" = .5 * (tau.x2 * tau.x2 - tau.x1 * tau.x1), "2" = (tau.x2 * tau.x2 * tau.x2 - tau.x1 * tau.x1 * tau.x1) / 3, (tau.x2^(p+1) - tau.x1^(p+1)) / (p+1) ) if(m > 0) { eta.r2 = rep(eta, rep.int(n2,m)) dim(eta.r2) = c(n2, m) x1.eta = pmax(x$o[,1] - eta.r2, 0) x2.eta = pmax(x$o[,2] - eta.r2, 0) xd2 = switch(as.character(p), "0" = x1.eta - x2.eta, "1" = .5 * (x1.eta * x1.eta - x2.eta * x2.eta), "2" = (x1.eta * x1.eta * x1.eta - x2.eta * x2.eta * x2.eta) / 3, (x1.eta^(p+1) - x2.eta^(p+1)) / (p+1) ) } else xd2 = NULL D2 = cbind(xd0, xd1, xd2) * delta D2[delta == 0] = 0 } D = rbind(D1, D2) * sqrt(c(x$wt[x$i1], x$wo)) H = crossprod(D) # Choleski decomposition v = sqrt(diag(H)) jv = v != 0 Hv = H[jv,jv,drop=FALSE] / tcrossprod(v[jv]) diag(Hv) = diag(Hv) + 1e-10 Rv = chol(Hv) gv = dlogLik(h, x, expdH, interior=TRUE)[jv] / v[jv] plus = forwardsolve(Rv, gv, upper.tri=TRUE, transpose=TRUE) par = c(h$alpha, h$nu, h$mu[j2])[jv] * v[jv] w.new = double(length(v)) w.new[jv] = nnls(Rv, Rv %*% par + plus)$x / v[jv] alpha = w.new[1] nu = if(k > 0) w.new[2:(k+1)] else numeric() mu = if(m > 0) w.new[(k+2):(k+m+1)] else numeric() newh = uh(alpha=alpha, tau=h$tau, nu=nu, eta=h$eta, mu=mu, upper=x$upper, h$deg, collapse=FALSE) if(h$deg == 0) b = backtrack(h, newh, x, expdH, alpha=0) else b = backtrack(h, newh, x, expdH) newh = b$h2 j1 = newh$nu != 0 j2 = newh$mu != 0 h2 = uh(newh$alpha, newh$tau[j1], newh$nu[j1], newh$eta[j2], newh$mu[j2], upper=h$upper, h$deg, collapse=FALSE) h = collapse(h2, x, tol=pmax(tol,1e-10)) list(h=h, convergence=b$convergence) } # Backtracking line search. h and h2 must have the same knots backtrack = function(h, h2, x, expdH, tol=1e-10, alpha=0.33){ j = h$eta != h$upper h2$eta = h$eta = h$eta[j] h$mu = h$mu[j] h2$mu = h2$mu[j] ll.h = logLikuh(h, x) d = c(h2$alpha - h$alpha, h2$nu - h$nu, h2$mu - h$mu) g = alpha * sum(dlogLik(h, x, expdH) * d) convergence = 0 r = 1 repeat { hr = uh((1-r) * h$alpha + r * h2$alpha, h$tau, (1-r) * h$nu + r * h2$nu, h$eta, (1-r)*h$mu + r * h2$mu, upper=h$upper, deg=h$deg) ll.hr = logLikuh(hr, x) if (ll.hr >= ll.h + r * g) {convergence =0; break} r = 0.5 * r if(r < tol) {r = 0; hr = h; ll.h2 = ll.h; convergence = 1; break} } attr(h2, "ll") = ll.hr list(h2=hr, r=r, convergence=convergence) } # Zeroth gradient function grad0 = function(h, x, expdH=NULL) { if(length(x$t) > 0) d0 = sum(x$wt[x$i1] / hazuh(x$t[x$i1], h)) - sum(x$wt * x$t) else d0 = 0 if(nrow(x$o) > 0) { if(is.null(expdH)) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) Delta = expdH / (1 - expdH) xd = x$o[,1] - x$o[,2] xd[Delta == 0] = 0 d0 = d0 - sum(x$wo * (x$o[,1] + xd * Delta)) } d0 } # First gradient function grad1 = function(tau, h, x, expdH=NULL, order=0) { g = vector("list", length(order)) names(g) = paste("d", order, sep="") g[1:length(g)] = 0 if(length(tau) == 0) return(NULL) m = length(tau) n1 = length(x$t) p = h$deg if(n1 > 0) { # for exact observations tau.r1 = rep(tau, rep.int(n1,m)) dim(tau.r1) = c(n1,m) ind = tau.r1 >= x$t tau.t = pmax(tau.r1 - x$t, 0) if(0 %in% order) { g$d0 = switch(as.character(p), "0" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), ind[x$i1,,drop=FALSE]) - crossprod(x$wt, tau.r1 - tau.t))[1,], "1" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t[x$i1,,drop=FALSE]) - 0.5 * crossprod(x$wt, tau.r1 * tau.r1 - tau.t * tau.t))[1,], "2" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t[x$i1,,drop=FALSE]^2) - crossprod(x$wt, tau.r1*tau.r1*tau.r1 - tau.t*tau.t*tau.t) / 3)[1,], (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t[x$i1,,drop=FALSE]^p) - crossprod(x$wt, tau.r1^(p+1) - tau.t^(p+1))[1,] / (p+1))[1,] ) } if(1 %in% order) { g$d1 = switch(as.character(p), "0" = double(m), "1" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), ind[x$i1,,drop=FALSE]) - crossprod(x$wt, tau.r1 - tau.t))[1,], "2" = (2 * crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t[x$i1,,drop=FALSE]) - crossprod(x$wt, tau.r1 * tau.r1 - tau.t * tau.t))[1,], { tau.t.pm1 = tau.t[x$i1,,drop=FALSE]^(p-1) if(p < 1) tau.t.pm1[tau.t.pm1 == Inf] = 0 (p * crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t.pm1) - crossprod(x$wt, tau.r1^p - tau.t^p))[1,] } ) } if(2 %in% order) { g$d2 = switch(as.character(p), "0" = double(m), "1" = - sum(x$wt) + crossprod(x$wt, ind)[1,], "2" = (2 * crossprod(x$wt[x$i1], ind[x$i1,,drop=FALSE]) - crossprod(x$wt, tau.r1 - tau.t))[1,], { tau.t1.pm2 = tau.t[x$i1,,drop=FALSE]^(p-2) if(p < 2) tau.t1.pm2[tau.t1.pm2 == Inf] = 0 tau.t.pm1 = tau.t^(p-1) if(p < 1) tau.t.pm1[tau.t.pm1 == Inf] = 0 (p * (p-1) * crossprod(x$wt[x$i1], tau.t1.pm2) - p * crossprod(x$wt, tau.r1^(p-1) - tau.t.pm1))[1,] } ) } } n2 = nrow(x$o) if(n2 > 0) { # for interval-censored observations if(is.null(expdH)) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) Delta = expdH / (1 - expdH) tau.r2 = rep(tau, rep.int(n2,m)) dim(tau.r2) = c(n2,m) tau.x1 = pmax(tau.r2 - x$o[,1], 0) tau.x2 = pmax(tau.r2 - x$o[,2], 0) ind1 = tau.r2 >= x$o[,1] ind2 = tau.r2 >= x$o[,2] if(0 %in% order) { xd0 = switch(as.character(p), "0" = {tau.r2.p1 = tau.r2; tau.x2 - (tau.x1.p1 <- tau.x1)}, "1" = {tau.r2.p1 = tau.r2 * tau.r2 tau.x2 * tau.x2 - (tau.x1.p1 <- tau.x1 * tau.x1)}, "2" = {tau.r2.p1 = tau.r2 * tau.r2 * tau.r2 tau.x2 * tau.x2 * tau.x2 - (tau.x1.p1 <- tau.x1 * tau.x1 * tau.x1)}, { tau.r2.p1 = tau.r2^(p+1) tau.x2^(p+1) - (tau.x1.p1 <- tau.x1^(p+1))} ) xd0[Delta == 0] = 0 g$d0 = g$d0 - crossprod(x$wo, (tau.r2.p1 - tau.x1.p1 + xd0 * Delta))[1,] / (p+1) } if(1 %in% order) { xd1 = switch(as.character(p), "0" = {tau.r2.p = 1; ind2 - (tau.x1.p <- ind1)}, "1" = {tau.r2.p = tau.r2; tau.x2 - (tau.x1.p <- tau.x1)}, "2" = {tau.r2.p = tau.r2 * tau.r2; tau.x2 * tau.x2 - (tau.x1.p <- tau.x1 * tau.x1)}, {tau.r2.p = tau.r2^p; tau.x2^p - (tau.x1.p <- tau.x1^p)} ) xd1[Delta == 0] = 0 g$d1 = g$d1 - crossprod(x$wo, tau.r2.p - tau.x1.p + xd1 * Delta)[1,] } if(2 %in% order) { g$d2 = switch(as.character(p), "0" = double(m), "1" = g$d2 - sum(x$wo) + crossprod(x$wo, ind1 - (ind2 - ind1) * Delta)[1,], "2" = { xd2 = tau.x2 - tau.x1 xd2[Delta == 0] = 0 g$d2 - 2 * crossprod(x$wo, tau.r2 - tau.x1 + xd2 * Delta)[1,] }, { tau.x1.pm1 = tau.x1^(p-1) if(p < 1) tau.x1.pm1[tau.x1.pm1 == Inf] = 0 xdp = tau.x2^(p-1) - tau.x1^(p-1) xdp[Delta == 0] = 0 g$d2 - p * crossprod(x$wo, tau.r2^(p-1) - tau.x1^(p-1) + xdp * Delta)[1,] } ) } } g } # Second gradient function grad2 = function(eta, h, x, expdH=NULL, order=0) { g = vector("list", length(order)) names(g) = paste("d", order, sep="") g[1:length(g)] = 0 if(length(eta) == 0) return(NULL) m = length(eta) n1 = length(x$t) p = h$deg if(n1 > 0) { eta.r1 = rep(eta, rep.int(n1,m)) dim(eta.r1) = c(n1,m) t.eta = pmax(x$t - eta.r1, 0) ind = x$t >= eta.r1 if(0 %in% order) { g$d0 = switch(as.character(p), "0" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), ind[x$i1,,drop=FALSE]) - crossprod(x$wt, t.eta))[1,], "1" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta[x$i1,,drop=FALSE]) - 0.5 * crossprod(x$wt, t.eta^2))[1,], "2" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta[x$i1,,drop=FALSE]^2) - crossprod(x$wt, t.eta * t.eta * t.eta) / 3)[1,], (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta[x$i1,,drop=FALSE]^p) - crossprod(x$wt, t.eta^(p+1)) / (p+1))[1,] ) } if(1 %in% order) { g$d1 = switch(as.character(p), "0" = double(m), "1" = (-crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), ind[x$i1,,drop=FALSE]) + crossprod(x$wt, t.eta))[1,], "2" = (-2 * crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta[x$i1,,drop=FALSE]) + crossprod(x$wt, t.eta^2))[1,], { t.eta.pm1 = t.eta[x$i1,,drop=FALSE]^(p-1) if(p < 1) t.eta.pm1[t.eta.pm1 == Inf] = 0 (-p * crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta.pm1) + crossprod(x$wt, t.eta^p))[1,] } ) } if(2 %in% order) { g$d2 = switch(as.character(p), "0" = double(m), "1" = - crossprod(x$wt, ind)[1,], "2" = - 2 * crossprod(x$wt, t.eta)[1,], { t.eta.pm1 = t.eta^(p-1) if(p < 1) t.eta.pm1[t.eta.pm1 == Inf] = 0 - p * crossprod(x$wt, t.eta.pm1)[1,] } ) } } n2 = nrow(x$o) if(n2 > 0) { if(is.null(expdH)) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) Delta = expdH / (1 - expdH) eta.r2 = rep(eta, rep.int(n2,m)) dim(eta.r2) = c(n2,m) x1.eta = pmax(x$o[,1] - eta.r2, 0) x2.eta = pmax(x$o[,2] - eta.r2, 0) ind1 = x$o[,1] >= eta.r2 ind2 = x$o[,2] >= eta.r2 if(0 %in% order) { xd0 = switch(as.character(p), "0" = (x1.eta.p1 <- x1.eta) - x2.eta, "1" = (x1.eta.p1 <- x1.eta * x1.eta) - x2.eta * x2.eta, "2" = (x1.eta.p1 <- x1.eta * x1.eta * x1.eta) - x2.eta * x2.eta * x2.eta, (x1.eta.p1 <- x1.eta^(p+1)) - x2.eta^(p+1) ) xd0[Delta == 0] = 0 g$d0 = g$d0 - crossprod(x$wo, x1.eta.p1 + xd0 * Delta)[1,] / (p+1) } if(1 %in% order) { xd1 = switch(as.character(p), "0" = (x1.eta.p <- ind1) - ind2, "1" = (x1.eta.p <- x1.eta) - x2.eta, "2" = (x1.eta.p <- x1.eta * x1.eta) - x2.eta * x2.eta, (x1.eta.p <- x1.eta^p) - x2.eta^p ) xd1[Delta == 0] = 0 g$d1 = g$d1 + crossprod(x$wo, x1.eta.p + xd1 * Delta)[1,] } if(2 %in% order) { g$d2 = switch(as.character(p), "0" = double(m), "1" = g$d2 - crossprod(x$wo, ind1 + (ind1 - ind2) * Delta)[1,], "2" = { xd2 = x1.eta - x2.eta xd2[Delta == 0] = 0 g$d2 - 2 * crossprod(x$wo, x1.eta + xd2 * Delta)[1,] }, { x1.eta.pm1 = x1.eta^(p-1) x2.eta.pm1 = x2.eta^(p-1) if(p < 1) { x1.eta.pm1[x1.eta.pm1 == Inf] = 0 x2.eta.pm1[x2.eta.pm1 == Inf] = 0 } xdp = x1.eta.pm1 - x2.eta.pm1 xdp[Delta == 0] = 0 g$d2 - p * crossprod(x$wo, x1.eta.pm1 + xdp * Delta)[1,] } ) } } g } logLikuh = function(h, data) { x = icendata(data) if(length(x$t) > 0) ll = sum(x$wt[x$i1] * log(hazuh(x$t[x$i1], h))) - sum(x$wt * chazuh(x$t, h)) else ll = 0 if(nrow(x$o) > 0) { ll = ll + sum(x$wo * (log(exp(-chazuh(x$o[,1], h)) - exp(-chazuh(x$o[,2], h))))) } ll } dlogLik = function(h, x, expdH=NULL, interior=FALSE) { if(interior) eta = h$eta[h$eta != h$upper] else eta = h$eta m = length(eta) d = c(grad0(h, x, expdH), grad1(h$tau, h, x, expdH)$d0, if(m>0) grad2(eta, h, x, expdH)$d0 else NULL) names(d) = c("alpha", paste("nu",1:length(h$tau),sep=""), if(m>0) paste("mu",1:m,sep="") else NULL) d } ### Finds all local maxima of the gradient functions # bc boundary change maxgrad = function(h, x, expdH=NULL, maxit=100, grid=100, bc=TRUE) { if(!is.icendata(x)) x = icendata(x, w=1) u = sort(unique(c(x$u, h$tau, h$eta))) p = h$deg if(p > 0 && p < 0.1) u = rep(u, rep.int(21, length(u))) + seq(-h$upper*1e-2, h$upper*1e-2, len=21) if(p == 1) maxit = 1 tau1 = c(0, h$tau[h$tau > 0]) k = length(tau1) - 1 eta1 = c(h$eta[h$eta 0 & g$d1[-1] < 0 } if(any(jd)) { left = u1[-m1][jd] right = u1[-1][jd] pt1 = (left + right) * .5 for(i in 1:maxit) { g = grad1(pt1, h, x, expdH, order=1:2) left[g$d1>0] = pt1[g$d1>0] right[g$d1<0] = pt1[g$d1<0] pt1.old = pt1 pt1 = pt1 - g$d1 / g$d2 j = is.na(pt1) | pt1 < left | pt1 > right pt1[j] = (left[j] + right[j]) * .5 if( max(abs(pt1 - pt1.old)) <= 1e-14 * h$upper) break } if(p == 1) pt1 = pt1[!j] gpt1 = grad1(pt1, h, x, expdH, order=0)$d0 } } i = pt1 > 0 & pt1 <= tau1[k+1] pt1i = pt1[i] gpt1i = gpt1[i] if(k > 0 && length(pt1i) > 0) { grp = apply(outer(pt1i, tau1[-(k+1)], ">") & outer(pt1i, tau1[-1], "<="), 1, which.max) r1 = aggregate(gpt1i, by=list(group=grp), which.max) j = integer(k) j[r1[,1]] = r1[,2] j = j + c(0,cumsum(tabulate(grp, nbins=k))[-k]) np1 = pt1i[j] gnp1 = gpt1i[j] j0 = gnp1 > 0 np1 = np1[j0] gnp1 = gnp1[j0] } else np1 = gnp1 = numeric() ## grad2 if(p < 0.1) { if(bc) pt2 = u[u>=tau1[k+1] & u<=h$upper] else pt2 = u[u>tau1[k+1] & u= tau1[k+1]] else u2 = seq(tau1[k+1], h$upper, len=grid) if(!bc) u2 = u2[u2 > tau1[k+1]] m2 = length(u2) pt2 = gpt2 = numeric() if(p == 1) jd = rep(TRUE, m2-1) else { g = grad2(u2, h, x, expdH, order=1) jd = g$d1[-m2] > 0 & g$d1[-1] < 0 } if(any(jd)) { left = u2[-m2][jd] right = u2[-1][jd] pt2 = (left + right) * .5 for(i in 1:maxit) { g = grad2(pt2, h, x, expdH, order=1:2) left[g$d1>0] = pt2[g$d1>0] right[g$d1<0] = pt2[g$d1<0] pt2.old = pt2 pt2 = pt2 - g$d1 / g$d2 j = is.na(pt2) | pt2 < left | pt2 > right pt2[j] = (left[j] + right[j]) * .5 if( max(abs(pt2 - pt2.old)) <= 1e-14 * h$upper) break } if(p == 1) pt2 = pt2[!j] gpt2 = grad2(pt2, h, x, expdH, order=0)$d0 } } i = pt2 >= eta1[1] & pt2 < eta1[m+1] pt2i = pt2[i] gpt2i = gpt2[i] if(m > 0 && length(pt2i) > 0) { grp = apply(outer(pt2i, eta1[-(m+1)], ">=") & outer(pt2i, eta1[-1], "<"), 1, which.max) r2 = aggregate(gpt2i, by=list(group=grp), which.max) j = integer(m) j[r2[,1]] = r2[,2] j = j + c(0, cumsum(tabulate(grp, nbins=m))[-m]) np2 = pt2i[j] gnp2 = gpt2i[j] j0 = gnp2 > 0 np2 = np2[j0] gnp2 = gnp2[j0] } else np2 = gnp2 = numeric() ## grad1 and grad2 if(max(h$tau) != h$eta[1]) { jj1 = pt1 >= tau1[k+1] & pt1 <= eta1[1] if(p == 0) { uj1 = pt1[jj1]; gj1 = gpt1[jj1] } else { uj1 = c(tau1[k+1], if(bc) eta1[1] else NULL, pt1[jj1]) gj1 = c(grad1(c(tau1[k+1], if(bc) eta1[1] else NULL), h, x, expdH)$d0, gpt1[jj1]) } jmax = which.max(gj1) np31 = uj1[jmax] gnp31 = gj1[jmax] jj2 = pt2 >= tau1[k+1] & pt2 <= eta1[1] if(p == 0) { uj2 = pt2[jj2]; gj2 = gpt2[jj2] } else { uj2 = c(if(bc) tau1[k+1] else NULL, eta1[1], pt2[jj2]) gj2 = c(grad2(c(if(bc) tau1[k+1] else NULL, eta1[1]), h, x, expdH)$d0, gpt2[jj2]) } jmax = which.max(gj2) np32 = uj2[jmax] gnp32 = gj2[jmax] if(gnp31 > gnp32) {np1 = c(np1, np31); gnp1 = c(gnp1, gnp31)} else {np2 = c(np2, np32); gnp2 = c(gnp2, gnp32)} } list(np1=np1, gnp1=gnp1, np2=np2, gnp2=gnp2) } simplify = function(h) { i1 = order(h$tau) # remove identical knots tau = h$tau[i1] nu = h$nu[i1] i2 = order(h$eta) eta = h$eta[i2] mu = h$mu[i2] if(h$deg != 0 || length(tau) == 0 || length(eta) == 0) return(h) if(tau[length(tau)] != eta[1]) return(h) if(nu[length(tau)] < mu[1]) { if(eta[1] == 0) { h$alpha = h$alpha + mu[1] eta = eta[-1] mu = mu[-1] } else { h$alpha = h$alpha + nu[length(nu)] mu[1] = mu[1] - nu[length(nu)] tau = tau[-length(tau)] nu = nu[-length(nu)] } } else { h$alpha = h$alpha + mu[1] nu[length(nu)] = nu[length(nu)] - mu[1] eta = eta[-1] mu = mu[-1] } uh(h$alpha, tau, nu, eta, mu, h$upper, h$deg, collapse=FALSE) } # Collapse similar knots collapse = function(h, x, tol=0){ ll = attr(h, "ll") i1 = order(h$tau) # remove identical knots tau = h$tau[i1] ind1 = cumsum(!duplicated(c(tau))) tau = unique(tau) nu = aggregate(h$nu[i1], by=list(group=ind1), sum)[,2] nu[tau == 0] = 0 i2 = order(h$eta) eta = h$eta[i2] ind2 = cumsum(!duplicated(eta)) eta = unique(eta) mu = aggregate(h$mu[i2], by=list(group=ind2), sum)[,2] mu[eta == h$upper] = 0 h = uh(h$alpha, tau, nu, eta, mu, h$upper, h$deg, collapse=FALSE) if(tol > 0) { if(is.null(ll)) ll = logLikuh(h, x) # if(h$deg < 1) {attr(h, "ll") = ll; return(h)} # why? h2 = h break1 = break2 = FALSE repeat { if(!break1 && length(h2$nu) > 1) { j = which.min(diff(h$tau)) h2$nu[j] = h2$nu[j] + h2$nu[j+1] h2$nu = h2$nu[-(j+1)] h2$tau[j] = (h2$tau[j] + h2$tau[j+1]) * .5 h2$tau = h2$tau[-(j+1)] ll2 = logLikuh(h2, x) if(ll2 + tol >= ll) {h = h2; ll = ll2; break1 = FALSE} else {h2 = h; break1 = TRUE} } else break1 = TRUE if(!break2 && length(h2$mu) > 1) { j = which.min(diff(h2$eta)) h2$mu[j] = h2$mu[j] + h2$mu[j+1] h2$mu = h2$mu[-(j+1)] h2$eta[j] = (h2$eta[j] + h2$eta[j+1]) * .5 h2$eta = h2$eta[-(j+1)] ll2 = logLikuh(h2, x) if(ll2 + tol >= ll) {h = h2; ll = ll2; break2 = FALSE} else {h2 = h; break2 = TRUE} } else break2 = TRUE if(break1 && break2) break } attr(h, "ll") = ll } h } # deg - polynomial degree uh = function(alpha, tau, nu, eta, mu, upper=Inf, deg=1, collapse=TRUE) { if(length(tau) == 0) {tau=0; nu=0} if(length(eta) == 0) {eta=upper; mu=0} i1 = order(tau) tau = tau[i1] nu = nu[i1] i2 = order(eta) eta = eta[i2] mu = mu[i2] h = list(alpha=alpha, tau=tau, nu=nu, eta=eta, mu=mu, upper=upper, deg=deg) if(collapse) h = collapse(h) class(h) = "uh" h } print.uh = function(x, ...) { cat("$alpha\n") print(x$alpha, ...) print(cbind(tau=x$tau, nu=x$nu), ...) print(cbind(eta=x$eta, mu=x$mu), ...) cat("$upper\n") print(x$upper, ...) cat("$deg\n") print(x$deg, ...) } # Hazard function hazuh = function(t, h) { p = h$deg b = c = 0 if(length(h$tau) > 0) { if(p == 0) d = outer(h$tau, t, ">=") else { d = pmax(outer(h$tau, t, "-"), 0) if(p != 1) d = d^p } b = (h$nu %*% d)[1,] } if(length(h$eta) > 0) { if(p == 0) d = outer(t, h$eta, ">=") else { d = pmax(outer(t, h$eta, "-"), 0) if(p != 1) d = d^p } c = (d %*% h$mu)[,1] } h$alpha + pmax(b, c) } chazuh = function(t, h) { deg = pmax(h$deg, 1) p1 = h$deg + 1 a = b = c = 0 if(h$alpha > 0) a = h$alpha * t if(length(h$tau) > 0) { tau.t = pmax(outer(h$tau, t, "-"), 0) b = (h$nu %*% (h$tau^p1 - tau.t^p1))[1,] / p1 } if(length(h$eta) > 0) { t.eta = pmax(outer(t, h$eta, "-"), 0) c = (t.eta^p1 %*% h$mu)[,1] / p1 } H = a + b + c H[t > h$upper] = 1e100 H } # survival function survuh = function(t, h) exp(-chazuh(t, h)) # density function denuh = function(t, h) hazuh(t, h) * survuh(t, h) ## plotting functions plot.Uhaz = function(x, ...) plot(x$h, ...) plot.uh = function(x, data, fn=c("haz","grad","surv","den","chaz"), ...) { fn = match.arg(fn) fnR = getFunction(paste("plot",fn,"uh",sep="")) switch(fn, "haz" =, "surv" =, "den" =, "chaz" = fnR(x, ...), "grad" = fnR(x, data, ...) ) } plothazuh = function(h, add=FALSE, col="darkblue", lty=1, xlim, ylim, lwd=2, pch=19, len=500, vert=FALSE, add.knots=TRUE, xlab="Time", ylab="Hazard", ...) { p = h$deg pt = switch(as.character(p), "0" = unique(sort(c(0, h$tau, h$eta, h$upper))), "1" = unique(sort(c(0, h$tau, h$eta, h$upper))), unique(sort(c(h$tau, h$eta, seq(0, h$upper, len=len))))) m = length(pt) knots = unique(c(h$tau, h$eta)) haz = hazuh(pt, h) max.haz = max(haz) if(missing(xlim)) xlim = range(pt) if(missing(ylim)) ylim = c(0, max.haz) if(!add) plot(pt, haz, type="n", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, ...) if(vert) { lines(rep(max(h$tau),2), ylim, col="grey", lty=2) lines(rep(min(h$eta),2), ylim, col="grey", lty=2) } abline(h=0, col ="black") if(p == 0) { lines(c(h$tau[length(h$tau)], h$eta[1]), rep(h$alpha,2), lwd=lwd, col=col, lty=lty) lines(c(rep(rev(h$tau),each=2), 0), c(h$alpha, rep(hazuh(rev(h$tau), h), each=2)), lwd=lwd, col=col, lty=lty) lines(c(rep(h$eta,each=2), h$upper), c(h$alpha, rep(hazuh(h$eta, h), each=2)), lwd=lwd, col=col, lty=lty) } else lines(pt, haz, lwd=lwd, col=col, lty=lty) if(add.knots && length(knots) > 0) points(knots, hazuh(knots, h), col=col, pch=pch) } plotchazuh = function(h, add=FALSE, lwd=2, len=500, col="darkblue", pch=19, add.knots=TRUE, vert=FALSE, xlim, ylim, ...) { pt = unique(sort(c(seq(0, h$upper, len=len), h$tau, h$eta))) m = length(pt) H = chazuh(pt, h) max.H = max(H) if(missing(xlim)) xlim = range(pt) if(missing(ylim)) ylim = c(0, max.H) plot(rep(max(h$tau),2), c(0,max.H), type="n", xlim=xlim, ylim=ylim, xlab="Time", ylab="Cumulative Hazard", ...) if(vert) lines(rep(max(h$tau),2), c(0,max.H), type="l", col ="grey", lty=2) if(vert) lines(rep(min(h$eta),2), c(0,max.H), col ="grey", lty=2) abline(h=0, col ="black") lines(pt, H, type="l", lwd=lwd, col=col) if(add.knots) { knots = c(h$tau, h$eta) knots = knots[knots>0 & knots0 & knots0 & knots 0) x$t else numeric() y2 = if(n2 > 0) { x$o[x$o[,2] == Inf,2] = 1.6 * x$upper rowMeans(x$o) } else numeric() beta = weighted.mean(c(y1,y2), c(x$wt, x$wo)) uh(alpha=1/beta, tau=NULL, nu=NULL, eta=NULL, mu=NULL, upper=x$upper, deg=deg) } npsurv/MD50000644000176200001440000000264613170333554012133 0ustar liggesusers64b6ec057d264f608dec3e7d42e03155 *DESCRIPTION 76a927e32f56c8ae1e359db61003f82c *NAMESPACE 89c3d00fe6e41e19405918d0647d5ac1 *R/Uhaz.R f3832048ab28b68485e87657845cf7c9 *R/icendata.R b374874c3b0d98918b6dabefa7975741 *R/npsurv.R 237be856104d680263fd4d3c68f0c604 *data/acfail.rda f899ce39f558a3c0bd44957146f76ff8 *data/ap.rda f41030bc4511396645ca241a0da91385 *data/cancer.rda 216932fc31c1a6944f2dd48e8cd9b2e0 *data/gastric.rda c735afa2ef11951387ecc1cc2e5838d4 *data/leukemia.rda 028a58d586aa71bf2b7d07df289ea3d8 *data/marijuana.rda 3cb11231442a3a3dc205a188f805594b *data/nzmort.rda d9795b65c54c26d9d339ae2ff292733d *man/Deltamatrix.Rd 78b1517b3caa46a7c42f126518f42608 *man/Uhaz.Rd e4724f174b6f1671979414ca7e806c1c *man/acfail.Rd be0854f4100508c8f75b4131dafd96d1 *man/ap.Rd a62b3b8dff2b298d03876448af084ea0 *man/cancer.Rd bdc41cbd1899086d00775872b3cf4492 *man/gastric.Rd 7e68329c4cb38f52d530958343a8bd24 *man/hazuh.Rd 0cbfc05e213f2f441023fc10343a902d *man/icendata.Rd f9d4ed507778a1f0df0e531df5814613 *man/idf.Rd b1b5c5da43a5a47210e527418a421b56 *man/km.Rd 4c45985a0af8d1a7fe716b661bfb335b *man/leukemia.Rd b764ec118b4c9b32f924a622f38e3167 *man/logLikuh.Rd 8cb6265a34ce8a23870dcd2b7c5bf65d *man/marijuana.Rd 2e13f0dfc76c26746bc1b278252d75d7 *man/npsurv.Rd 04e334e50350d8366da44d5e6a9765aa *man/nzmort.Rd e8f0d02314f671596acfd0d5888851d9 *man/plot.Uhaz.Rd 67a3013ace27560f31640f294409ec1c *man/plot.npsurv.Rd f53494224f9527f13eb82e6bbfe4fcdc *man/uh.Rd npsurv/DESCRIPTION0000644000176200001440000000075413170333554013327 0ustar liggesusersPackage: npsurv Title: Nonparametric Survival Analysis Version: 0.4-0 Date: 2017-10-13 Author: Yong Wang Maintainer: Yong Wang Depends: lsei Imports: methods Description: Contains functions for non-parametric survival analysis of exact and interval-censored observations. License: GPL (>= 2) URL: https://www.stat.auckland.ac.nz/~yongwang NeedsCompilation: no Packaged: 2017-10-13 22:05:26 UTC; yong Repository: CRAN Date/Publication: 2017-10-14 07:06:20 UTC npsurv/man/0000755000176200001440000000000013170017117012360 5ustar liggesusersnpsurv/man/plot.npsurv.Rd0000644000176200001440000001051113170001236015152 0ustar liggesusers\name{plot.npsurv} \alias{plot.npsurv} \alias{plot.idf} \alias{plotsurvidf} \alias{plotgradidf} \title{Plot Functions for Nonparametric Survival Estimation} \description{ \code{plot.npsurv} and \code{plot.idf} are wrapper functions that call either \code{plotsurvidf} or \code{plotgradidf}. \code{plotsurvidf} plots the survival function of the nonparametric maximum likelihood estimate (NPMLE). \code{plotgradidf} plots the gradient function of the NPMLE. } \usage{ \method{plot}{npsurv}(x, ...) \method{plot}{idf}(x, data, fn=c("surv","grad"), ...) plotsurvidf(f, style=c("box","uniform","left","right","midpoint"), xlab="Time", ylab="Survival Probability", col="blue3", fill=0, add=FALSE, lty=1, lty.inf=2, xlim, ...) plotgradidf(f, data, w=1, col1="red3", col2="blue3", xlab="Survival Time", ylab="Gradient", xlim, ...) } \arguments{ \item{x}{an object of class \code{npsurv} (i.e., an output of function \code{npsurv}) or an object of class \code{idf}.} \item{fn}{either "surv" or "grad", to indicate plotting either the survival or the gradient function.} \item{f}{an object of class \code{idf}.} \item{style}{for how to plot the survival function on a "maximal intersection interval": = \code{box}, plot a rectangle, which shows the uncertainty of probability allocation within the interval; = \code{uniform}, treat it as a uniform distribution and hence the diagonal line of the rectangle is plotted; = \code{left}, plot only the left side of the rectangle; = \code{right}, plot only the right side of the rectangle; = \code{midpoint}, plot a vertical line at the midpoint of the interval. } \item{xlab, ylab}{x- or y-axis label. } \item{add}{= \code{TRUE}, adds the curve to the existing plot; = \code{FALSE}, plots the curve in a new one.} \item{col}{color for all line segments, including box/rectangle borders.} \item{fill}{color for filling a box/rectangle. By default, a lighter semi-transparent color is used.} \item{lty}{line type} \item{lty.inf}{line type for the rectangle that may extend to infinity.} \item{data}{vector or matrix that stores observations, or an object of class \code{icendata}.} \item{w}{additional weights/multiplicities of the observations stored in \code{x}. } \item{col1}{color for drawing maximal intersection intervals allocated with positive probabilities.} \item{col2}{color for drawing all gradients and the maximal intersection intervals allocated with zero probabilities.} \item{xlim}{x-coordinate limit points.} \item{...}{arguments for other graphical parameters (see \code{par}).} } \details{ \code{plotsurvidf} by default chooses a less saturated color for \code{fill} than \code{col}. \code{plotgradidf} plots gradient values as vertical lines located as the left endpoints of the maximal intersection intervals. Each maximal intersection interval is plotted as a wider line on the horizontal zero-gradient line, with a circle to represent the open left endpoint of the interval and a solid point the closed right endpoint of the interval. The maximal intersection intervals allocated with positive probabilities have zero gradients, and hence no vertical lines are drawn for them. } \author{ Yong Wang } \references{ Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood computation for interval-censored data. \emph{Computational Statistics & Data Analysis}, \bold{52}, 2388-2402. } \seealso{ \code{\link{icendata}}, \code{\link{idf}}, \code{\link{npsurv}}. } \examples{ data(ap) plot(r<-npsurv(ap)) # survival function plot(r$f, ap, fn="g") # all gradients virtually zeros. data(cancer) cancerRT = with(cancer, cancer[group=="RT",1:2]) plot(rt<-npsurv(cancerRT), xlim=c(0,60)) # survival of RT cancerRCT = with(cancer, cancer[group=="RCT",1:2]) plot(rct<-npsurv(cancerRCT), add=TRUE, col="green3") # survival of RCT ## as uniform dististrbutions. plot(rt, add=TRUE, style="uniform", col="blue3") plot(rct, add=TRUE, style="uniform", col="green3") ## plot gradients; must supply data plot(rt, cancerRT, fn="g") # for group RT plotgradidf(rct$f, cancerRCT) # or, for group RCT } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/gastric.Rd0000644000176200001440000000241712621225542014312 0ustar liggesusers\name{gastric} \alias{gastric} \docType{data} \title{ Gastric Cancer Survival Data } \description{ Contains the survival times of 45 gastrointestinal tumor patients who were treated with both chemotherapy and radiotherapy. It has both exact and right-censored observations. } \usage{gastric} \format{ A data frame with 30 observations and 3 variables: L: left-end points of the interval-censored survival times; R: right-end points of the interval-censored survival times.} \source{ Klein and Moeschberger (2003), page 224. } \references{ Klein, J. P. and Moeschberger, M. L. (2003). \emph{Survival Analysis: Techniques for Censored and Truncated Data (2nd ed.)}. Springer. } \seealso{ \code{\link{npsurv}}, \code{\link{Uhaz}}. } \examples{ data(gastric) plot(npsurv(gastric), col="grey") # survival function plot(h0<-Uhaz(gastric, deg=0)$h, fn="s", add=TRUE, col="green3") plot(h1<-Uhaz(gastric, deg=1)$h, fn="s", add=TRUE) plot(h2<-Uhaz(gastric, deg=2)$h, fn="s", add=TRUE, col="red3") plot(h0, fn="h", col="green3") # hazard function plot(h1, fn="h", add=TRUE) plot(h2, fn="h", add=TRUE, col="red3") plot(h0, fn="d", col="green3") # density function plot(h1, fn="d", add=TRUE) plot(h2, fn="d", add=TRUE, col="red3") } \keyword{datasets} npsurv/man/leukemia.Rd0000644000176200001440000000245612617434066014465 0ustar liggesusers\name{leukemia} \alias{leukemia} \docType{data} \title{ Remission Times for Acute Leukemia Patients } \description{ Contains remission times in weeks of 42 acute leukemia patients, who received either the treatment of drug 6-mercaptopurine or the placebo treatment. Each remission time is either exactly observed or right-censored. } \usage{leukemia} \format{ A data frame with 42 observations and 3 variables: L: left-end points of the interval-censored remission times in weeks; R: right-end points of the interval-censored remission times; group: either 6-MP (6-mercaptopurine) or Placebo.} \source{ Freireich et al. (1963). } \references{ Freireich, E. O. et al. (1963). The effect of 6-mercaptopmine on the duration of steroid induced remission in acute leukemia. \emph{Blood}, \bold{21}, 699-716. } \seealso{ \code{\link{npsurv}}. } \examples{ data(leukemia) i = leukemia[,"group"] == "Placebo" plot(npsurv(leukemia[i,1:2]), xlim=c(0,40), col="green3") # placebo plot(npsurv(leukemia[!i,1:2]), add=TRUE) # 6-MP ## Treat each remission time as interval-censored: x = leukemia ii = x[,1] == x[,2] x[ii,2] = x[ii,1] + 1 plot(npsurv(x[i,1:2]), xlim=c(0,40), col="green3") # placebo plot(npsurv(x[!i,1:2]), add=TRUE) # 6-MP } \keyword{datasets} npsurv/man/uh.Rd0000644000176200001440000000716713036473043013303 0ustar liggesusers\name{uh} \alias{uh} \alias{uh.object} \alias{print.uh} \title{U-shaped Hazard Function} \description{ \code{uh} creates an object of class \code{uh}, which stores a U-shaped hazard function. \code{print.uh} prints an object of class \code{uh}. } \usage{ uh(alpha, tau, nu, eta, mu, upper=Inf, deg=1, collapse=TRUE) \method{print}{uh}(x, ...) } \arguments{ \item{alpha}{a nonnegative value, for the constant coefficient.} \item{tau}{vector of nonnegative real values, for left knots. } \item{nu}{vector of nonnegative values, for masses associated with the left knots. } \item{eta}{vector of nonnegative real values, for right knots. } \item{mu}{vector of nonnegative real values, for masses associated with the right knots. } \item{upper}{a positive value, at which point the hazard starts to become infinite. } \item{deg}{nonnegative real number for spline degree (i.e., p in the formula below).} \item{collapse}{logical, indicating if identical knots should be collapsed.} \item{x}{an object of class \code{uh}.} \item{...}{other auguments for printing.} } \details{ A U-shape hazard function, as generalized by Wang and Fani (2017), is given by \deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p + sum_{j=1}^m mu_j (t - eta_j)_+^p,} where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, \eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0} is the the spline degree which determines the smoothness of the U-shaped hazard. As p increases, the family of hazard functions becomes increasingly smoother, but at the same time, smaller. When \eqn{p = 0}{p = 0}, the hazard function is U-shaped, as studied by Bray et al. (1967). When \eqn{p = 1}{p = 1}, the hazard function is convex, as studied by Jankowski and Wellner (2009a,b). \code{print.uh} prints an object of class \code{uh}. While \code{alpha}, \code{upper} and \code{deg} are printed as they are, \code{tau} and \code{nu} are printed as a two-column matrix, and so are \code{eta} and \code{mu}. } \value{ \code{uh} returns an object of class \code{uh}. It is a list with components \code{alpha}, \code{tau}, \code{nu}, \code{eta}, \code{mu}, \code{upper} and \code{deg}, which store their corresponding values as described above. } \author{ Yong Wang } \references{ Bray, T. A., Crawford, G. B., and Proschan, F. (1967). \emph{Maximum Likelihood Estimation of a U-shaped Failure Rate Function}. Defense Technical Information Center. Jankowski, H. K. and Wellner, J. A. (2009a). Computation of nonparametric convex hazard estimators via profile methods. \emph{Journal of Nonparametric Statistics}, \bold{21}, 505-518. Jankowski, H. K. and Wellner, J. A. (2009b). Nonparametric estimation of a convex bathtub-shaped hazard function. \emph{Bernoulli}, \bold{15}, 1010-1035. Wang, Y. and Fani, S. (2017). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, (in print). } \seealso{ \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} } \examples{ (h0 = uh(3, 2, 3, 4, 5, 7, deg=0)) # deg = 0 plot(h0, ylim=c(0,20)) (h1 = uh(4, 2, 3, 5, 6, 7, deg=1)) # deg = 1 plot(h1, add=TRUE, col="green3") (h2 = uh(1, 1:2, 3:4, 5:6, 7:8, 9, deg=2)) # deg = 2 plot(h2, add=TRUE, col="red3") } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/cancer.Rd0000644000176200001440000000204112617436640014112 0ustar liggesusers\name{cancer} \alias{cancer} \docType{data} \title{ Breast Retraction Times after Beast Cancer Treatments. } \description{ Contains the breast retraction times in months for 94 breast cancer patients who received either radiation therapy or radiation therapy plus adjuvant chemotherapy. } \usage{cancer} \format{ A data frame with 94 observations and 3 variables: L: left-end points of the interval-censored retraction times; R: right-end points of the interval-censored retraction times; group: either \code{RT} (radiation therapy) or \code{RCT} (radiation therapy plus adjuvant chemotherapy).} \source{ Finkelstein and Wolfe (1985). } \references{ Finkelstein, D. M. and R. A. Wolfe (1985). A semiparametric model for regression analysis of interval-censored failure time data. \emph{Biometrics}, \bold{41}, pp.933-945. } \seealso{ \code{\link{npsurv}}. } \examples{ data(cancer) i = cancer$group == "RT" plot(npsurv(cancer[i,1:2]), xlim=c(0,60)) plot(npsurv(cancer[!i,1:2]), add=TRUE, col="green3") } \keyword{datasets} npsurv/man/acfail.Rd0000644000176200001440000000114512617436351014101 0ustar liggesusers\name{acfail} \alias{acfail} \docType{data} \title{ Air Conditioner Failure Data } \description{ Contains the number of operating hours between successive failure times of the air conditioning systems in Boeing airplanes } \usage{acfail} \format{ A numeric vector storing the failure times.} \source{ Proschan (1963) } \references{ Proschan, F. (1963). Theoretical explanation of observed decreasing failure rate. \emph{Technometrics}, \bold{5}, 375-383. } \seealso{ \code{\link{Uhaz}}. } \examples{ data(acfail) r = Uhaz(acfail, deg=2) plot(r$h, fn="h") plot(r$h, fn="d") } \keyword{datasets} npsurv/man/km.Rd0000644000176200001440000000230613036472774013276 0ustar liggesusers\name{km} \alias{km} \title{Kaplan-Meier Estimation} \description{ \code{km} computes the nonparametric maximum likelihood esimate (NPMLE) of a survival function for right-censored data. } \usage{ km(data, w=1) } \arguments{ \item{data}{vector or matrix, or an object of class \code{icendata}.} \item{w}{weights/multiplicities of observations.} } \details{ For details about the arguments, see \code{icendata}. } \value{ A list with components: \item{f}{NPMLE, an object of class \code{idf}.} \item{ll}{log-likelihood value of the NPMLE \code{f}.} } \author{ Yong Wang } \references{ Kaplan, E. L. and Meier, P. (1958). Nonparametric estimation from incomplete observations. \emph{Journal of the American Statistical Association}, \bold{53}, 457-481. } \seealso{ \code{\link{icendata}}, \code{\link{npsurv}}, \code{\link{idf}}. } \examples{ x = cbind(1:5, c(1,Inf,3,4,Inf)) (f = km(x)$f) plot(f) data(leukemia) i = leukemia[,"group"] == "Placebo" plot(km(leukemia[i,1:2])$f, xlim=c(0,40), col="green3") # placebo plot(km(leukemia[!i,1:2])$f, add=TRUE) # 6-MP } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/plot.Uhaz.Rd0000644000176200001440000001237413035263456014553 0ustar liggesusers\name{plot.Uhaz} \alias{plot.Uhaz} \alias{plot.uh} \alias{plothazuh} \alias{plotchazuh} \alias{plotsurvuh} \alias{plotdenuh} \alias{plotgraduh} \title{Plot Functions for U-shaped Hazard Estimation} \description{ \code{plot.Uhaz} and \code{plot.uh} are wrapper functions that can be used to invoke \code{plot.hazuh}, \code{plot.chazuh}, \code{plot.survuh}, \code{plot.denuh} or \code{plot.graduh}. \code{plothazuh} plots a U-shaped hazard function. \code{plotchazuh} plots a cumulative hazard function that has a U-shaped hazard function. \code{plotsurvuh} plots the survival function that has a U-shaped hazard function. \code{plotdenuh} plots the density function that has a U-shaped hazard function. \code{plotgraduh} plots the gradient function that has a U-shaped hazard function. } \usage{ \method{plot}{Uhaz}(x, ...) \method{plot}{uh}(x, data, fn=c("haz","grad","surv","den","chaz"), ...) plothazuh(h, add=FALSE, col="darkblue", lty=1, xlim, ylim, lwd=2, pch=19, len=500, vert=FALSE, add.knots=TRUE, xlab="Time", ylab="Hazard", ...) plotchazuh(h, add=FALSE, lwd=2, len=500, col="darkblue", pch=19, add.knots=TRUE, vert=FALSE, xlim, ylim, ...) plotdenuh(h, add=FALSE, lty=1, lwd=2, col="darkblue", add.knots=TRUE, pch=19, ylim, len=500, vert=FALSE, ...) plotsurvuh(h, add=FALSE, lty=1, lwd=2, len=500, vert=FALSE, col="darkblue", pch=19, add.knots=TRUE, xlim, ylim, ...) plotgraduh(h, data, w=1, len=500, xlim, ylim, vert=TRUE, add=FALSE, xlab="Time", ylab="Gradient", col0="red3", col1="blue3", col2="green3", order=0, ...) } \arguments{ \item{x}{an object of class \code{Uhaz}, i.e., an output of function \code{Uhaz}, or an object of class \code{uh}..} \item{h}{an object of class \code{uh}.} \item{data}{vector or matrix that stores observations, or an object of class \code{icendata}.} \item{w}{additional weights/multiplicities for the observations stored in \code{data}.} \item{fn}{function to be plotted. It can be = \code{haz}, for hazard function; = \code{chaz}, for cumulative hazard function; = \code{den}, for density function; = \code{surv}, for survival function; = \code{gradient}, for gradient functions. } \item{xlim, ylim}{numeric vectors of length 2, giving the x and y coordinates ranges.} \item{xlab, ylab}{x- or y-axis labels.} \item{add}{= \code{TRUE}, adds the curve to the existing plot; = \code{FALSE}, plots the curve in a new one.} \item{col}{color used for plotting the curve.} \item{lty}{line type for plotting the curve.} \item{lwd}{line width for plotting the curve.} \item{len}{number of points used to plot a curve.} \item{add.knots}{logical, indicating if knots are also plotted. } \item{pch}{point character/type for plotting knots.} \item{vert}{logical, indicating if grey vertical lines are plotted to show the interval that separates the two discrete measures. } \item{col0}{color for gradient function 0, i.e., for the hazard-constant part, or alpha.} \item{col1}{color for gradient function 1, i.e., for the hazard-decreasing part.} \item{col2}{color for gradient function 1, i.e., for the hazard-increasing part.} \item{order}{= 0, the gradient functions are plotted; = 1, their first derivatives are plotted; = 2, their second derivatives are plotted. } \item{...}{arguments for other graphical parameters (see \code{par}).} } \details{ A U-shaped hazard function is given by \deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p + sum_{j=1}^m mu_j (t - eta_j)_+^p,} where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, \eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0}. } \author{ Yong Wang } \references{ Wang, Y. and Fani, S. (2017). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, (in print). } \seealso{ \code{\link{icendata}}, \code{\link{uh}}, \code{\link{npsurv}}. } \examples{ ## Angina Pectoris Survival Data data(ap) plot(r<-Uhaz(ap)) # hazard function for a convex hazard plot(r, fn="c") # cumulative hazard function plot(r, fn="s") # survival function plot(r, fn="d") # density function plot(r, ap, fn="g") # gradient functions plot(r, ap, fn="g", order=1) # first derivatives of gradient functions plot(r, ap, fn="g", order=2) # second derivatives of gradient functions ## New Zealand Mortality in 2000 data(nzmort) i = nzmort$ethnic == "maori" x = nzmort[i,1:2] # Maori mortality h = Uhaz(x[,1]+0.5, x[,2], deg=2)$h # smooth U-shaped hazard plot(h) # hazard function plot(h, fn="d") # density function plot(h, fn="s") # survival function x2 = nzmort[!i,1:2] # Non-Maori mortality h2 = Uhaz(x2[,1]+0.5, x2[,2], deg=2)$h plot(h2, fn="s", add=TRUE, col="green3") } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/nzmort.Rd0000644000176200001440000000372612620002740014203 0ustar liggesusers\name{nzmort} \alias{nzmort} \docType{data} \title{ New Zealand Mortality in 2000 } \description{ Contains the number of deaths of Maori and Non-Maori people at each age in New Zealand in 2000. } \usage{nzmort} \format{ A data frame with 210 observations and 3 variables: age: at which age the deaths occurred; deaths: number of people died at the age; ethnic: either Maori or Non-Maori.} \details{Data contains no age with zero death.} \source{ \url{http://www.mortality.org/} } \seealso{ \code{\link{Uhaz}}. } \examples{ data(nzmort) x = with(nzmort, nzmort[ethnic=="maori",])[,1:2] # Maori mortality # x = with(nzmort, nzmort[ethnic!="maori",])[,1:2] # Non-Maori mortality ## As exact observations # Plot hazard functions h0 = Uhaz(x[,1]+0.5, x[,2], deg=0)$h # U-shaped hazard plot(h0, fn="h", col="green3", pch=2) h1 = Uhaz(x[,1]+0.5, x[,2], deg=1)$h # convex hazard plot(h1, fn="h", add=TRUE, pch=1) h2 = Uhaz(x[,1]+0.5, x[,2], deg=2)$h # smooth U-shaped hazard plot(h2, fn="h", add=TRUE, col="red3") # Plot densities age = 0:max(x[,1]) count = integer(length(age)) count[x[,"age"]+1] = x[,"deaths"] barplot(count/sum(count), space=0, col="lightgrey", ylab="Density") axis(1, pos=NA, at=0:10*10) plot(h0, fn="d", add=TRUE, col="green3", pch=2) plot(h1, fn="d", add=TRUE, col="blue3", pch=1) plot(h2, fn="d", add=TRUE, col="red3", pch=19) ## As interval-censored observations # Plot hazard functions x2 = cbind(x[,1], x[,1]+1, x[,2]) h0 = Uhaz(x2, deg=0)$h # U-shaped hazard plot(h0, fn="h", col="green3", pch=2) h1 = Uhaz(x2, deg=1)$h # convex hazard plot(h1, fn="h", add=TRUE, pch=1) h2 = Uhaz(x2, deg=2)$h # smooth U-shaped hazard plot(h2, fn="h", add=TRUE, col="red3", pch=1) # Plot densities barplot(count/sum(count), space=0, col="lightgrey") axis(1, pos=NA, at=0:10*10) plot(h0, fn="d", add=TRUE, col="green3", pch=2) plot(h1, fn="d", add=TRUE, col="blue3", pch=1) plot(h2, fn="d", add=TRUE, col="red3", pch=19) } \keyword{datasets} npsurv/man/Deltamatrix.Rd0000644000176200001440000000323213036472756015144 0ustar liggesusers\name{Deltamatrix} \alias{Deltamatrix} \title{Delta matrix} \description{ \code{Deltamatrix} computes the Delta matrix, along with maximal intersection intervals, for a set of intervals. } \usage{ Deltamatrix(LR) } \arguments{ \item{LR}{two-column matrix, each row of which stores an censoring interval of the form \eqn{(L_i, R_i]}{(Li, Ri]}. If \eqn{L_i = R_i}{Li = Ri}, it is an exact observation. } } \details{ An intersection interval is a nonempty intersection of any combination of the given intervals, and a maximal intersection interval is an intersection interval that contains no other intersection interval. The Delta matrix is a matrix of indicators (\code{TRUE} or \code{FALSE}). The rows correspond to the given interval-censored observations, and the columns the maximal intersection intervals. A \code{TRUE} value of the (i,j)-th element means that the i-th observation covers the j-th maximal intersection interval, and a \code{FALSE} value means the opposite. } \value{ A list with components: \item{left}{left endpoints of the maximal intersection intervals.} \item{right}{right endpoints of the maximal intersection intervals.} \item{Delta}{logical matrix, for the Delta matrix.} } \author{ Yong Wang } \references{ Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood computation for interval-censored data. \emph{Computational Statistics & Data Analysis}, \bold{52}, 2388-2402. } \seealso{ \code{\link{icendata}}, \code{\link{idf}}. } \examples{ (x = cbind(1:5,1:5*3-2)) Deltamatrix(x) } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/ap.Rd0000644000176200001440000000226712617441262013265 0ustar liggesusers\name{ap} \alias{ap} \docType{data} \title{ Angina Pectoris Survival Data } \description{ Contains the survival times in years from the time of diagnosis for 2418 male patients with angina pectoris. Some patients are lost to follow-up, hence giving right-censored observations. Each integer-valued survival time is treated as being censored within a one-year interval. } \usage{ap} \format{ A data frame with 30 observations and 3 variables: \code{L}: left-end point of an interval-censored retraction time; \code{R}: right-end point of an interval-censored retraction time; \code{count}: number of patients in the interval. } \source{ Lee and Wang (2003), page 92. } \references{ Lee, E. T. and Wang, J. W. (2003). \emph{Statistical Methods for Survival Data Analysis}. Wiley. } \seealso{ \code{\link{npsurv}}. } \examples{ data(ap) r = Uhaz(ap, deg=2) # smooth U-shaped hazard plot(r$h, fn="h") # hazard plot(r$h, fn="d") # density # NPMLE and shape-restricted estimation plot(npsurv(ap), fn="s") # survival under no shape restriction plot(r$h, fn="s", add=TRUE) # survival with smooth U-shaped hazard } \keyword{datasets} npsurv/man/icendata.Rd0000644000176200001440000000607013170015143014417 0ustar liggesusers\name{icendata} \alias{icendata} \alias{is.icendata} \alias{icendata.object} \title{Class of Interval-censored Data} \description{ Function \code{icendata} creates an object of class 'icendata', which can be used to save both interval-censored and exact observations. Function \code{is.icendata} simply checks if an object is of class 'icendata'. } \usage{ icendata(x, w=1) is.icendata(x) } \arguments{ \item{x}{vector or matrix.} \item{w}{weights or multiplicities of the observations.} } \details{ If \code{x} is a vector, it contains only exact observations, with weights given in \code{w}. If \code{x} is a two-column matrix, it contains interval-censored observations and stores their left and right endpoints in the first and second column, respectively. If the left and right endpoints are equal, then the observation is exact. Weights are provided by \code{w}. If \code{x} is a three-column matrix, it contains interval-censored observations and stores their left and right endpoints in the first and second column, respectively. The weight of each observation is the third-column value multiplied by the corresponding weight value in \code{w}. It is useful to turn interval-censored (and exact) observations into the format imposed by \code{icendata} so that they can be processed in a standardized format by other functions. Also, exact and interval-censored observations are stored separately in this format and can hence be dealt with more easily. Most functions in the package \code{npsurv} first ensure that the data has this format before processing. Observations of zero weights are removed. Identical observations are aggregated. An interval-valued observation is either \eqn{(L_i, R_i]}{(Li, Ri]} if \eqn{L_i < R_i}{Li < Ri}, or \eqn{[L_i, R_i]}{[Li, Ri]} if \eqn{L_i = R_i}{Li = Ri}. } \value{ \item{t}{numeric vector, storing exact observations.} \item{wt}{numeric vector, storing the weights of exact observations.} \item{o}{two-column numeric matrix, storing interval-censored observations.} \item{wo}{numeric vector, storing the weights of interval-censored observations.} \item{i1}{logical vector, indicating whether exact observations are less than \code{upper}.} \item{upper}{the largest finite value of \code{t} and \code{o}.} \item{u}{numeric vector, containing 0 and all unique finite values in \code{t} and \code{o}.} } \author{ Yong Wang } \references{ Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood computation for interval-censored data. Computational Statistics & Data Analysis, 52, 2388-2402. Wang, Y. and Fani, S. (2017). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, (in print). } \seealso{ \code{\link{npsurv}}, \code{\link{Uhaz}}. } \examples{ data(ap) (x = icendata(ap)) is.icendata(x) data(gastric) icendata(gastric) data(leukemia) i = leukemia[,"group"] == "6-MP" icendata(leukemia[i,1:2]) } \keyword{ function } npsurv/man/logLikuh.Rd0000644000176200001440000000211313034044777014435 0ustar liggesusers\name{logLikuh} \alias{logLikuh} \title{Computes the Log-likelihood Value of a U-shaped Hazard Function} \description{ \code{logLikuh} returns the log-likelihood value of a U-shaped hazard function, given a data set. } \usage{ logLikuh(h, data) } \arguments{ \item{h}{an object of class \code{uh}. } \item{data}{numeric vector or matrix for exact or interval-censored observations, or an object of class \code{icendata}.} } \value{ Log-likelihood value evaluated at \code{h}, given \code{data}. } \author{ Yong Wang } \references{ Wang, Y. and Fani, S. (2017). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, (in print). } \seealso{ \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} } \examples{ data(ap) (h0 = uh(.2, NULL, NULL, NULL, NULL, 15, 1)) # Uniform hazard plot(h0, ylim=c(0,.3)) logLikuh(h0, ap) r = Uhaz(ap, deg=2) r$ll logLikuh(r$h, ap) plot(r$h, add=TRUE, col="red3") } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/hazuh.Rd0000644000176200001440000000231113034044714013765 0ustar liggesusers\name{hazuh} \alias{hazuh} \alias{chazuh} \alias{survuh} \alias{denuh} \title{Distributional Functions given a U-shaped Hazard Function} \description{ Given an object of class \code{uh}: \code{hazuh} computes the hazard values; \code{chazuh} computes the cumulative hazard values; \code{survuh} computes the survival function values; \code{denuh} computes the density function values. } \usage{ hazuh(t, h) chazuh(t, h) survuh(t, h) denuh(t, h) } \arguments{ \item{t}{time points at which the function is to be evaluated. } \item{h}{an object of class \code{uh}.} } \value{ A numeric vector of the function values. } \author{ Yong Wang } \references{ Wang, Y. and Fani, S. (2017). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, (in print). } \seealso{ \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} } \examples{ data(ap) h = Uhaz(icendata(ap), deg=2)$h hazuh(0:15, h) # hazard chazuh(0:15, h) # cumulative hazard survuh(0:15, h) # survival probability denuh(0:15, h) # density } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/npsurv.Rd0000644000176200001440000000736613170016535014223 0ustar liggesusers\name{npsurv} \alias{npsurv} \alias{npsurv.object} \title{Nonparametric Survival Function Estimation} \description{ \code{npsurv} computes the nonparametric maximum likelihood esimate (NPMLE) of a survival function for general interval-censored data. } \usage{ npsurv(data, w=1, maxit=100, tol=1e-6, verb=0) } \arguments{ \item{data}{vector or matrix, or an object of class \code{icendata}.} \item{w}{weights or multiplicities of the observations.} \item{maxit}{maximum number of iterations.} \item{tol}{ tolerance level for stopping the algorithm. It is used as the threshold on the increase of the log-likelihood after each iteration. } \item{verb}{ verbosity level for printing intermediate results at each iteration. } } \details{ If \code{data} is a vector, it contains only exact observations, with weights given in \code{w}. If \code{data} is a matrix with two columns, it contains interval-censored observations, with the two columns storing their left and right end-points, respectively. If the left and right end-points are equal, then the observation is exact. Weights are provided by \code{w}. If \code{data} is a matrix with three columns, it contains interval-censored observations, with the first two columns storing their left and right end-points, respectively. The weight of each observation is the third-column value multiplied by the corresponding weight value in \code{w}. The algorithm used for computing the NPMLE is either the constrained Newton method (CNM) (Wang, 2008), or the hierachical constrained Newton method (HCNM) (Wang and Taylor, 2013) when there are a large number of maximal intersection intervals. Inside the function, it examines if data has only right censoring, and if so, the Kaplan-Meier estimate is computed directly by function \code{km}. An interval-valued observation is either \eqn{(L_i, R_i]}{(Li, Ri]} if \eqn{L_i < R_i}{Li < Ri}, or \eqn{[L_i, R_i]}{[Li, Ri]} if \eqn{L_i = R_i}{Li = Ri}. } \value{ An object of class \code{npsurv}, which is a list with components: \item{f}{NPMLE, an object of class \code{idf}.} \item{upper}{largest finite value in the data.} \item{convergence}{= \code{TRUE}, converged successfully; = \code{FALSE}, maximum number of iterations reached.} \item{method}{method used internally, either \code{cnm} or \code{hcnm}.} \item{ll}{log-likelihood value of the NPMLE \code{f}.} \item{maxgrad}{maximum gradient value of the NPMLE \code{f}.} \item{numiter}{number of iterations used.} } \author{ Yong Wang } \references{ Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood computation for interval-censored data. \emph{Computational Statistics & Data Analysis}, \bold{52}, 2388-2402. Wang, Y. and Taylor, S. M. (2013). Efficient computation of nonparametric survival functions via a hierarchical mixture formulation. \emph{Statistics and Computing}, \bold{23}, 713-725. } \seealso{ \code{\link{icendata}}, \code{\link{Deltamatrix}}, \code{\link{idf}}, \code{\link{km}}. } \examples{ ## all exact observations data(acfail) plot(npsurv(acfail)) ## right-censored (and exact) observations data(gastric) plot(npsurv(gastric)) data(leukemia) i = leukemia[,"group"] == "Placebo" plot(npsurv(leukemia[i,1:2]), xlim=c(0,40), col="blue") # placebo plot(npsurv(leukemia[!i,1:2]), add=TRUE, col="red") # 6-MP ## purely interval-censored data data(ap) plot(npsurv(ap)) data(cancer) cancerRT = with(cancer, cancer[group=="RT",1:2]) plot(npsurv(cancerRT), xlim=c(0,60)) # survival of RT cancerRCT = with(cancer, cancer[group=="RCT",1:2]) plot(npsurv(cancerRCT), add=TRUE, col="green") # survival of RCT } \keyword{ function } npsurv/man/Uhaz.Rd0000644000176200001440000001272313036473025013570 0ustar liggesusers\name{Uhaz} \alias{Uhaz} \alias{Uhaz.object} \title{U-shaped Hazard Function Estimation} \description{ \code{Uhaz} computes the nonparametric maximum likelihood esimate (NPMLE) of a U-shaped hazard function from exact or interval-censored data, or a mix of the two types of data. } \usage{ Uhaz(data, w=1, deg=1, maxit=100, tol=1e-6, verb=0) } \arguments{ \item{data}{vector or matrix, or an object of class \code{icendata}.} \item{w}{weights or multiplicities of the observations.} \item{deg}{nonnegative real number for spline degree (i.e., p in the formula below).} \item{maxit}{maximum number of iterations.} \item{tol}{ tolerance level for stopping the algorithm. It is used as the threshold on the increase of the log-likelihood after each iteration. } \item{verb}{ verbosity level for printing intermediate results in each iteration. } } \details{ If \code{data} is a vector, it contains only exact observations, with weights given in \code{w}. If \code{data} is a matrix with two columns, it contains interval-censored observations, with the two columns storing their left and right end-points, respectively. If the left and right end-points are equal, then the observation is exact. Weights are provided by \code{w}. If \code{data} is a matrix with three columns, it contains interval-censored observations, with the first two columns storing their left and right end-points, respectively. The weight of each observation is the third-column value multiplied by the corresponding weight value in \code{w}. The algorithm used for the computing the NPMLE of a hazard function under the U-shape restriction is is proposed by Wang and Fani (2015). Such a hazard function is given by \deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p + sum_{j=1}^m mu_j (t - eta_j)_+^p,} where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, \eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0} is the the spline degree which determines the smoothness of the U-shaped hazard. As p increases, the family of hazard functions becomes increasingly smoother, but at the time, smaller. When p = 0, the hazard function is U-shaped, as studied by Bray et al. (1967). When p = 1, the hazard function is convex, as studied by Jankowski and Wellner (2009a,b). Note that \code{deg} (i.e., p in the above mathematical display) can take on any nonnegative real value. } \value{ An object of class \code{Uhaz}, which is a list with components: \item{convergence}{= \code{TRUE}, converged successfully; = \code{FALSE}, maximum number of iterations reached.} \item{grad}{gradient values at the knots.} \item{numiter}{number of iterations used.} \item{ll}{log-likelihood value of the NPMLE \code{h}.} \item{h}{NPMLE of the U-shaped hazard function, an object of class \code{uh}.} } \author{ Yong Wang } \references{ Bray, T. A., Crawford, G. B., and Proschan, F. (1967). \emph{Maximum Likelihood Estimation of a U-shaped Failure Rate Function}. Defense Technical Information Center. Jankowski, H. K. and Wellner, J. A. (2009a). Computation of nonparametric convex hazard estimators via profile methods. \emph{Journal of Nonparametric Statistics}, \bold{21}, 505-518. Jankowski, H. K. and Wellner, J. A. (2009b). Nonparametric estimation of a convex bathtub-shaped hazard function. \emph{Bernoulli}, \bold{15}, 1010-1035. Wang, Y. and Fani, S. (2017). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, (in print). } \seealso{ \code{\link{icendata}}, \code{\link{nzmort}}. } \examples{ ## Interval-censored observations data(ap) (r = Uhaz(ap, deg=0)) plot(r, ylim=c(0,.3), col=1) for(i in 1:6) plot(Uhaz(ap, deg=i/2), add=TRUE, col=i+1) legend(15, 0.01, paste0("deg = ", 0:6/2), lwd=2, col=1:7, xjust=1, yjust=0) ## Exact observations data(nzmort) x = with(nzmort, nzmort[ethnic=="maori",])[,1:2] # Maori mortality (h0 = Uhaz(x[,1]+0.5, x[,2], deg=0)$h) # U-shaped hazard (h1 = Uhaz(x[,1]+0.5, x[,2], deg=1)$h) # convex hazard (h2 <- Uhaz(x[,1]+0.5, x[,2], deg=2)$h) # smooth U-shaped hazard plot(h0, pch=2) # plot hazard functions plot(h1, add=TRUE, col="green3", pch=1) plot(h2, add=TRUE, col="red3", pch=19) age = 0:max(x[,1]) # plot densities count = integer(length(age)) count[x[,"age"]+1] = x[,"deaths"] barplot(count/sum(count), space=0, col="lightgrey") axis(1, pos=NA, at=0:10*10) plot(h0, fn="d", add=TRUE, pch=2) plot(h1, fn="d", add=TRUE, col="green3", pch=1) plot(h2, fn="d", add=TRUE, col="red3", pch=19) plot(h0, fn="s", pch=2) # plot survival functions plot(h1, fn="s", add=TRUE, col="green3", pch=1) plot(h2, fn="s", add=TRUE, col="red3", pch=19) ## Exact and right-censored observations data(gastric) plot(h0<-Uhaz(gastric, deg=0)$h) # plot hazard functions plot(h1<-Uhaz(gastric, deg=1)$h, add=TRUE, col="green3") plot(h2<-Uhaz(gastric, deg=2)$h, add=TRUE, col="red3") plot(npsurv(gastric), fn="s", col="grey") # plot survival functions plot(h0, fn="s", add=TRUE) plot(h1, fn="s", add=TRUE, col="green3") plot(h2, fn="s", add=TRUE, col="red3") } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/idf.Rd0000644000176200001440000000253213170017065013415 0ustar liggesusers\name{idf} \alias{idf} \alias{idf.object} \alias{print.idf} %- Also NEED an '\alias' for EACH other topic documented here. \title{Interval Distribution Function} \description{ \code{idf} creates an object of class \code{idf}. An \code{idf} object stores a distribution function defined on a set of intervals. } \usage{ idf(left, right, p) \method{print}{idf}(x, ...) } \arguments{ \item{left, right}{left and right endpoints of intervals on which the distribution function is defined.} \item{p}{probabilities allocated to the intervals. Probability values will be normalized inside the function. } \item{x}{an object of class \code{idf}. } \item{...}{other arguments for printing. } } \details{ When left and right endpoints are equal, the intervals are in fact points. \code{print.idf} prints an object of class \code{idf} as a three-coumn matrix. } \value{ \item{left, right}{left and right endpoints of intervals on which the distribution function is defined.} \item{p}{probabilities allocated to the intervals.} } \author{ Yong Wang } \seealso{ \code{\link{icendata}}, \code{\link{Deltamatrix}}, \code{\link{npsurv}}. } \examples{ idf(1:5, 1:5*3-2, c(1,1,2,2,4)) npsurv(cbind(1:5, 1:5*3-2))$f # NPMLE } \keyword{ function } % at least one, from doc/KEYWORDS npsurv/man/marijuana.Rd0000644000176200001440000000231712617442123014625 0ustar liggesusers\name{marijuana} \alias{marijuana} \docType{data} \title{ Angina Pectoris Survival Data } \description{ Contains the answers of 191 California high school students to the question: "When did you first use marijuana?". An answer can be an exact age, or "I have never used it", which gives rise to a right-censored observation, or "I have used it but cannot recall just when the first time was", which gives rise to a left-censored observation. } \usage{marijuana} \format{ A data frame with 21 observations and 3 variables: L: left-end point of an interval-censored time; R: right-end point of an interval-censored time; count: number of students in the interval.} \source{ Turnbull and Weiss (1978). See also Klein and Moeschberger (1997), page 17. } \references{ Turnbull and Weiss (1978). A likelihood ratio statistic fortesting goodness of fit with randomly censored data. \emph{Biometrics}, \bold{34}, 367-375. Klein and Moeschberger (2003). \emph{Survival Analysis: Techniques for Censored and Truncated Data} (2nd ed.). Springer } \seealso{ \code{\link{npsurv}}. } \examples{ data(marijuana) r = Uhaz(marijuana, deg=2) plot(r$h, fn="h") plot(r$h, fn="s") } \keyword{datasets}