npsurv/0000755000176200001440000000000013731664713011622 5ustar liggesusersnpsurv/NAMESPACE0000644000176200001440000000115112620714430013023 0ustar liggesusers# exportPattern("^[^\\.]") export("icendata","is.icendata","npsurv","Deltamatrix","idf","km","plotsurvidf","plotgradidf","Uhaz","uh","hazuh","chazuh","survuh","denuh","plothazuh","plotchazuh","plotsurvuh","plotdenuh","plotgraduh","logLikuh") import(lsei) importFrom("grDevices", "col2rgb", "hsv", "rgb2hsv") importFrom("graphics", "abline", "hist", "lines", "plot", "points", "segments", "legend", "rect") importFrom("methods", "getFunction") importFrom("stats", "aggregate", "weighted.mean") S3method(print, idf) S3method(plot, idf) S3method(plot, npsurv) S3method(print, uh) S3method(plot, uh) S3method(plot, Uhaz) npsurv/data/0000755000176200001440000000000012617511315012522 5ustar liggesusersnpsurv/data/marijuana.rda0000644000176200001440000000033012617434066015164 0ustar liggesusers r0b```b`fbb`b2Y# 'M,*MK 3TրZhFцh6a.9p9poGa4v>\;a@g^`` / TsJf.eQb [ B0APkr~i^ T?EI3npsurv/data/nzmort.rda0000644000176200001440000000245012617434066014553 0ustar liggesuserswSUIQSQQ"T(  i!m6HƂpggy8ϊ_']~}y,{>2y{e9 'k4L@ PNNSӨ'PSOJ33gPϤE=:z\lyssaռJJ**kk/_H~"/_J>B Sxi#FGGGGǘǘǘǘ71obļy3f̛0oa¼y+V[Ǚۜcw:VԂM}03ߣ^1gۙ3ogμy=p'O3uI{cs|<=֥^8͵~mN7^Œ!3O ]tuB{Q=^itQbS^~q[z3Ok=EBx]?>>t]uKsߋgUۿ~N3N5U}~= :>}NԺ OYuTycf4^U3N>U~~7yWq6] w2b.$}nw+9.󬤦7H]"Nus|PMԑNGpp:ph/f渡n8o5Nx)^gyi'i_K]^TSZ_Vʴޗƍ7o'RdJj<;uG%24ҙSw:xe\&vD_RyПXԙ=Dw@]2כ^m11YUW+er!{I@y=r 83ҬÁ,RG#Q@0@6iy<Ȧ$ SlJO(B@6,eMle(H#&./( 4s4fix)KÖf+ Yz&Ml1Kp@py+//"A^4eBBp.=@K@?@> U@e` X ց@eF06aQlUjp \7Mfp nw;]npAxH?}<O'Si<ρ Ex^M(;]x|>ҷ?Ogs_y6߂# ~\J˹'KT,ˢ%npsurv/data/ap.rda0000644000176200001440000000044112617434066013620 0ustar liggesusersŒN@ۢP@14@0p ȅ '#+߸z`otfhWOD\H=!R"ʂv$n9 2{cqPۚ 8'>_qI|-߉eqh~7_` VgA4ȌTA\Jm##=|!Dt6hٲm4vzf{m{Sl+OMNj>gX(QFCnpsurv/data/leukemia.rda0000644000176200001440000000045712617434066015023 0ustar liggesusers r0b```b`fbb`b2Y# 'I-NLd``A XA p2PZ JjPZ J@iC(m M6ҖPv ` U9-F+h 4Zha 0ˠUPpqpECs.u Wf^  F`&cǖZS d e!,fP6{@NbrjR>Fb>ii%E@?ty0PAF#,(M/gQ~L?(A2GsWJbI^ZP ) FAnpsurv/data/gastric.rda0000644000176200001440000000072212617434066014656 0ustar liggesusersһkSa''mAI{k5jժV*^E6v*Nv;88yE;::8o~d2sN='Oؕ-f%EJG4M_)5)CL^_LZh6tяӸO__Xa]">o&l/KJk fJ+{]{~q< I,?Yx^k>~Ǿ[-'V7<,'}~}|׫vzfpϟ5W 󭞾oO[>+Z>ad6 Ujǫ]Gcj 9(4\L%SCַ/vh7`/a?^H?:cG mM8))Y`gqp!%AE8+`t*& 1;Ĺ Ϫz$unpsurv/data/acfail.rda0000644000176200001440000000062112617434066014437 0ustar liggesusers*Dqep!.]H$Iv>f 4))y#Q.='j ٵ7dWMFD<)< /) L3\> ߷Oy U%_Iٌ-%l-ahGarC/hlġ% $N17E=-ŝYs?O%_))2]>v5 lcqΏ''۠a䛘m|nC5{b;d?H?~9C2gr_G#rcX?=徳/V˗+5\S\Drk=gsO "MNXnpsurv/data/cancer.rda0000644000176200001440000000066312617434066014461 0ustar liggesusersՕJ@Ƿ*C)jlMmC'!Km%z4ff! $3mv*N^!%ZfzdDVW}ֆ&j>p MqVGS9J/3JX'zl]"q839E.m>DX6ДyF|_g̷ffǴu~DS82D"@suXmᾦ}QǛ^$>!r=q>K?6|N#.~'s=^ar}o_ڮ9_=1寉%q/x zC*Loh-Ѧt!{t=4Pu1FE/\6{ / 6OFFgbhp7n\7Z<'npsurv/man/0000755000176200001440000000000013725061634012371 5ustar liggesusersnpsurv/man/logLikuh.Rd0000644000176200001440000000212013725066600014427 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Uhaz.R \name{logLikuh} \alias{logLikuh} \title{Computes the Log-likelihood Value of a U-shaped Hazard Function} \usage{ logLikuh(h, data) } \arguments{ \item{h}{an object of class \code{uh}.} \item{data}{numeric vector or matrix for exact or interval-censored observations, or an object of class \code{icendata}.} } \value{ Log-likelihood value evaluated at \code{h}, given \code{data}. } \description{ \code{logLikuh} returns the log-likelihood value of a U-shaped hazard function, given a data set. } \examples{ data(ap) (h0 = uh(.2, NULL, NULL, NULL, NULL, 15, 1)) # Uniform hazard plot(h0, ylim=c(0,.3)) logLikuh(h0, ap) r = Uhaz(ap, deg=2) r$ll logLikuh(r$h, ap) plot(r$h, add=TRUE, col="red3") } \references{ Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, \bold{28}, 187-200. } \seealso{ \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} } \author{ Yong Wang } \keyword{function} npsurv/man/Uhaz.Rd0000644000176200001440000001257613725066600013600 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Uhaz.R \name{Uhaz} \alias{Uhaz} \alias{Uhaz.object} \title{U-shaped Hazard Function Estimation} \usage{ Uhaz(data, w = 1, deg = 1, maxit = 100, tol = 1e-06, verb = 0) } \arguments{ \item{data}{vector or matrix, or an object of class \code{icendata}.} \item{w}{weights or multiplicities of the observations.} \item{deg}{nonnegative real number for spline degree (i.e., p in the formula below).} \item{maxit}{maximum number of iterations.} \item{tol}{tolerance level for stopping the algorithm. It is used as the threshold on the increase of the log-likelihood after each iteration.} \item{verb}{verbosity level for printing intermediate results in each iteration.} } \value{ An object of class \code{Uhaz}, which is a list with components: \item{convergence}{= \code{TRUE}, converged successfully; = \code{FALSE}, maximum number of iterations reached.} \item{grad}{gradient values at the knots.} \item{numiter}{number of iterations used.} \item{ll}{log-likelihood value of the NPMLE \code{h}.} \item{h}{NPMLE of the U-shaped hazard function, an object of class \code{uh}.} } \description{ \code{Uhaz} computes the nonparametric maximum likelihood esimate (NPMLE) of a U-shaped hazard function from exact or interval-censored data, or a mix of the two types of data. } \details{ If \code{data} is a vector, it contains only exact observations, with weights given in \code{w}. If \code{data} is a matrix with two columns, it contains interval-censored observations, with the two columns storing their left and right end-points, respectively. If the left and right end-points are equal, then the observation is exact. Weights are provided by \code{w}. If \code{data} is a matrix with three columns, it contains interval-censored observations, with the first two columns storing their left and right end-points, respectively. The weight of each observation is the third-column value multiplied by the corresponding weight value in \code{w}. The algorithm used for the computing the NPMLE of a hazard function under the U-shape restriction is is proposed by Wang and Fani (2015). Such a hazard function is given by A U-shaped hazard function is given by \deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p + sum_{j=1}^m mu_j (t - eta_j)_+^p,} where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, \eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0} is the the spline degree which determines the smoothness of the U-shaped hazard. As p increases, the family of hazard functions becomes increasingly smoother, but at the time, smaller. When p = 0, the hazard function is U-shaped, as studied by Bray et al. (1967). When p = 1, the hazard function is convex, as studied by Jankowski and Wellner (2009a,b). Note that \code{deg} (i.e., p in the above mathematical display) can take on any nonnegative real value. } \examples{ ## Interval-censored observations data(ap) (r = Uhaz(ap, deg=0)) plot(r, ylim=c(0,.3), col=1) for(i in 1:6) plot(Uhaz(ap, deg=i/2), add=TRUE, col=i+1) legend(15, 0.01, paste0("deg = ", 0:6/2), lwd=2, col=1:7, xjust=1, yjust=0) ## Exact observations data(nzmort) x = with(nzmort, nzmort[ethnic=="maori",])[,1:2] # Maori mortality (h0 = Uhaz(x[,1]+0.5, x[,2], deg=0)$h) # U-shaped hazard (h1 = Uhaz(x[,1]+0.5, x[,2], deg=1)$h) # convex hazard (h2 <- Uhaz(x[,1]+0.5, x[,2], deg=2)$h) # smooth U-shaped hazard plot(h0, pch=2) # plot hazard functions plot(h1, add=TRUE, col="green3", pch=1) plot(h2, add=TRUE, col="red3", pch=19) age = 0:max(x[,1]) # plot densities count = integer(length(age)) count[x[,"age"]+1] = x[,"deaths"] barplot(count/sum(count), space=0, col="lightgrey") axis(1, pos=NA, at=0:10*10) plot(h0, fn="d", add=TRUE, pch=2) plot(h1, fn="d", add=TRUE, col="green3", pch=1) plot(h2, fn="d", add=TRUE, col="red3", pch=19) plot(h0, fn="s", pch=2) # plot survival functions plot(h1, fn="s", add=TRUE, col="green3", pch=1) plot(h2, fn="s", add=TRUE, col="red3", pch=19) ## Exact and right-censored observations data(gastric) plot(h0<-Uhaz(gastric, deg=0)$h) # plot hazard functions plot(h1<-Uhaz(gastric, deg=1)$h, add=TRUE, col="green3") plot(h2<-Uhaz(gastric, deg=2)$h, add=TRUE, col="red3") plot(npsurv(gastric), fn="s", col="grey") # plot survival functions plot(h0, fn="s", add=TRUE) plot(h1, fn="s", add=TRUE, col="green3") plot(h2, fn="s", add=TRUE, col="red3") } \references{ Bray, T. A., Crawford, G. B., and Proschan, F. (1967). \emph{Maximum Likelihood Estimation of a U-shaped Failure Rate Function}. Defense Technical Information Center. Jankowski, H. K. and Wellner, J. A. (2009a). Computation of nonparametric convex hazard estimators via profile methods. \emph{Journal of Nonparametric Statistics}, \bold{21}, 505-518. Jankowski, H. K. and Wellner, J. A. (2009b). Nonparametric estimation of a convex bathtub-shaped hazard function. \emph{Bernoulli}, \bold{15}, 1010-1035. Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, \bold{28}, 187-200. } \seealso{ \code{\link{icendata}}, \code{\link{nzmort}}. } \author{ Yong Wang } \keyword{function} npsurv/man/plot.Uhaz.Rd0000644000176200001440000001225213725066600014544 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Uhaz.R \name{plot.Uhaz} \alias{plot.Uhaz} \alias{plot.uh} \alias{plothazuh} \alias{plotchazuh} \alias{plotsurvuh} \alias{plotdenuh} \alias{plotgraduh} \title{Plot Functions for U-shaped Hazard Estimation} \usage{ \method{plot}{Uhaz}(x, ...) \method{plot}{uh}(x, data, fn=c("haz","grad","surv","den","chaz"), ...) plothazuh(h, add=FALSE, col="darkblue", lty=1, xlim, ylim, lwd=2, pch=19, len=500, vert=FALSE, add.knots=TRUE, xlab="Time", ylab="Hazard", ...) plotchazuh(h, add=FALSE, lwd=2, len=500, col="darkblue", pch=19, add.knots=TRUE, vert=FALSE, xlim, ylim, ...) plotdenuh(h, add=FALSE, lty=1, lwd=2, col="darkblue", add.knots=TRUE, pch=19, ylim, len=500, vert=FALSE, ...) plotsurvuh(h, add=FALSE, lty=1, lwd=2, len=500, vert=FALSE, col="darkblue", pch=19, add.knots=TRUE, xlim, ylim, ...) plotgraduh(h, data, w=1, len=500, xlim, ylim, vert=TRUE, add=FALSE, xlab="Time", ylab="Gradient", col0="red3", col1="blue3", col2="green3", order=0, ...) } \arguments{ \item{x}{an object of class \code{Uhaz}, i.e., an output of function \code{Uhaz}, or an object of class \code{uh}..} \item{...}{arguments for other graphical parameters (see \code{par}).} \item{h}{an object of class \code{uh}.} \item{data}{vector or matrix that stores observations, or an object of class \code{icendata}.} \item{w}{additional weights/multiplicities for the observations stored in \code{data}.} \item{fn}{function to be plotted. It can be = \code{haz}, for hazard function; = \code{chaz}, for cumulative hazard function; = \code{den}, for density function; = \code{surv}, for survival function; = \code{gradient}, for gradient functions.} \item{xlim, ylim}{numeric vectors of length 2, giving the x and y coordinates ranges.} \item{xlab, ylab}{x- or y-axis labels.} \item{add}{= \code{TRUE}, adds the curve to the existing plot; = \code{FALSE}, plots the curve in a new one.} \item{col}{color used for plotting the curve.} \item{lty}{line type for plotting the curve.} \item{lwd}{line width for plotting the curve.} \item{len}{number of points used to plot a curve.} \item{add.knots}{logical, indicating if knots are also plotted.} \item{pch}{point character/type for plotting knots.} \item{vert}{logical, indicating if grey vertical lines are plotted to show the interval that separates the two discrete measures.} \item{col0}{color for gradient function 0, i.e., for the hazard-constant part, or alpha.} \item{col1}{color for gradient function 1, i.e., for the hazard-decreasing part.} \item{col2}{color for gradient function 1, i.e., for the hazard-increasing part.} \item{order}{= 0, the gradient functions are plotted; = 1, their first derivatives are plotted; = 2, their second derivatives are plotted.} } \description{ Functions for plotting various functions in U-shaped hazard estimation } \details{ \code{plot.Uhaz} and \code{plot.uh} are wrapper functions that can be used to invoke \code{plot.hazuh}, \code{plot.chazuh}, \code{plot.survuh}, \code{plot.denuh} or \code{plot.graduh}. \code{plothazuh} plots a U-shaped hazard function. \code{plotchazuh} plots a cumulative hazard function that has a U-shaped hazard function. \code{plotsurvuh} plots the survival function that has a U-shaped hazard function. \code{plotdenuh} plots the density function that has a U-shaped hazard function. \code{plotgraduh} plots the gradient function that has a U-shaped hazard function. A U-shaped hazard function is given by \deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p + sum_{j=1}^m mu_j (t - eta_j)_+^p,} where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, \eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0}. } \examples{ ## Angina Pectoris Survival Data data(ap) plot(r<-Uhaz(ap)) # hazard function for a convex hazard plot(r, fn="c") # cumulative hazard function plot(r, fn="s") # survival function plot(r, fn="d") # density function plot(r, ap, fn="g") # gradient functions plot(r, ap, fn="g", order=1) # first derivatives of gradient functions plot(r, ap, fn="g", order=2) # second derivatives of gradient functions ## New Zealand Mortality in 2000 data(nzmort) i = nzmort$ethnic == "maori" x = nzmort[i,1:2] # Maori mortality h = Uhaz(x[,1]+0.5, x[,2], deg=2)$h # smooth U-shaped hazard plot(h) # hazard function plot(h, fn="d") # density function plot(h, fn="s") # survival function x2 = nzmort[!i,1:2] # Non-Maori mortality h2 = Uhaz(x2[,1]+0.5, x2[,2], deg=2)$h plot(h2, fn="s", add=TRUE, col="green3") } \references{ Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, \bold{28}, 187-200. } \seealso{ \code{\link{icendata}}, \code{\link{uh}}, \code{\link{npsurv}}. } \author{ Yong Wang } \keyword{function} npsurv/man/km.Rd0000644000176200001440000000230213725061634013264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv.R \name{km} \alias{km} \title{Kaplan-Meier Estimation} \usage{ km(data, w = 1) } \arguments{ \item{data}{vector or matrix, or an object of class \code{icendata}.} \item{w}{weights/multiplicities of observations.} } \value{ A list with components: \item{f}{NPMLE, an object of class \code{idf}.} \item{ll}{log-likelihood value of the NPMLE \code{f}.} } \description{ \code{km} computes the nonparametric maximum likelihood esimate (NPMLE) of a survival function for right-censored data. } \details{ For details about the arguments, see \code{icendata}. } \examples{ x = cbind(1:5, c(1,Inf,3,4,Inf)) (f = km(x)$f) plot(f) data(leukemia) i = leukemia[,"group"] == "Placebo" plot(km(leukemia[i,1:2])$f, xlim=c(0,40), col="green3") # placebo plot(km(leukemia[!i,1:2])$f, add=TRUE) # 6-MP } \references{ Kaplan, E. L. and Meier, P. (1958). Nonparametric estimation from incomplete observations. \emph{Journal of the American Statistical Association}, \bold{53}, 457-481. } \seealso{ \code{\link{icendata}}, \code{\link{npsurv}}, \code{\link{idf}}. } \author{ Yong Wang } \keyword{function} npsurv/man/gastric.Rd0000644000176200001440000000250513725061634014316 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv-package.R \docType{data} \name{gastric} \alias{gastric} \title{Gastric Cancer Survival Data} \format{ A data frame with 30 observations and 3 variables: L: left-end points of the interval-censored survival times; R: right-end points of the interval-censored survival times. } \source{ Klein and Moeschberger (2003), page 224. } \description{ Contains the survival times of 45 gastrointestinal tumor patients who were treated with both chemotherapy and radiotherapy. It has both exact and right-censored observations. } \examples{ data(gastric) plot(npsurv(gastric), col="grey") # survival function plot(h0<-Uhaz(gastric, deg=0)$h, fn="s", add=TRUE, col="green3") plot(h1<-Uhaz(gastric, deg=1)$h, fn="s", add=TRUE) plot(h2<-Uhaz(gastric, deg=2)$h, fn="s", add=TRUE, col="red3") plot(h0, fn="h", col="green3") # hazard function plot(h1, fn="h", add=TRUE) plot(h2, fn="h", add=TRUE, col="red3") plot(h0, fn="d", col="green3") # density function plot(h1, fn="d", add=TRUE) plot(h2, fn="d", add=TRUE, col="red3") } \references{ Klein, J. P. and Moeschberger, M. L. (2003). \emph{Survival Analysis: Techniques for Censored and Truncated Data (2nd ed.)}. Springer. } \seealso{ \code{\link{npsurv}}, \code{\link{Uhaz}}. } \keyword{datasets} npsurv/man/nzmort.Rd0000644000176200001440000000402313730756106014211 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv-package.R \docType{data} \name{nzmort} \alias{nzmort} \title{New Zealand Mortality in 2000} \format{ A data frame with 210 observations and 3 variables: age: at which age the deaths occurred; deaths: number of people died at the age; ethnic: either Maori or Non-Maori. } \source{ \url{https://www.mortality.org/} } \description{ Contains the number of deaths of Maori and Non-Maori people at each age in New Zealand in 2000. } \details{ Data contains no age with zero death. } \examples{ data(nzmort) x = with(nzmort, nzmort[ethnic=="maori",])[,1:2] # Maori mortality # x = with(nzmort, nzmort[ethnic!="maori",])[,1:2] # Non-Maori mortality ## As exact observations # Plot hazard functions h0 = Uhaz(x[,1]+0.5, x[,2], deg=0)$h # U-shaped hazard plot(h0, fn="h", col="green3", pch=2) h1 = Uhaz(x[,1]+0.5, x[,2], deg=1)$h # convex hazard plot(h1, fn="h", add=TRUE, pch=1) h2 = Uhaz(x[,1]+0.5, x[,2], deg=2)$h # smooth U-shaped hazard plot(h2, fn="h", add=TRUE, col="red3") # Plot densities age = 0:max(x[,1]) count = integer(length(age)) count[x[,"age"]+1] = x[,"deaths"] barplot(count/sum(count), space=0, col="lightgrey", ylab="Density") axis(1, pos=NA, at=0:10*10) plot(h0, fn="d", add=TRUE, col="green3", pch=2) plot(h1, fn="d", add=TRUE, col="blue3", pch=1) plot(h2, fn="d", add=TRUE, col="red3", pch=19) ## As interval-censored observations # Plot hazard functions x2 = cbind(x[,1], x[,1]+1, x[,2]) h0 = Uhaz(x2, deg=0)$h # U-shaped hazard plot(h0, fn="h", col="green3", pch=2) h1 = Uhaz(x2, deg=1)$h # convex hazard plot(h1, fn="h", add=TRUE, pch=1) h2 = Uhaz(x2, deg=2)$h # smooth U-shaped hazard plot(h2, fn="h", add=TRUE, col="red3", pch=1) # Plot densities barplot(count/sum(count), space=0, col="lightgrey") axis(1, pos=NA, at=0:10*10) plot(h0, fn="d", add=TRUE, col="green3", pch=2) plot(h1, fn="d", add=TRUE, col="blue3", pch=1) plot(h2, fn="d", add=TRUE, col="red3", pch=19) } \seealso{ \code{\link{Uhaz}}. } \keyword{datasets} npsurv/man/icendata.Rd0000644000176200001440000000630013725061634014427 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/icendata.R \name{icendata} \alias{icendata} \alias{is.icendata} \alias{icendata.object} \title{Class of Interval-censored Data} \usage{ icendata(x, w=1) is.icendata(x) } \arguments{ \item{x}{vector or matrix.} \item{w}{weights or multiplicities of the observations.} } \value{ \item{t}{numeric vector, storing exact observations.} \item{wt}{numeric vector, storing the weights of exact observations.} \item{o}{two-column numeric matrix, storing interval-censored observations.} \item{wo}{numeric vector, storing the weights of interval-censored observations.} \item{i1}{logical vector, indicating whether exact observations are less than \code{upper}.} \item{upper}{the largest finite value of \code{t} and \code{o}.} \item{u}{numeric vector, containing 0 and all unique finite values in \code{t} and \code{o}.} } \description{ Class \code{icendata} can be used to store general interval-censored data, which may possibly contain exact observations.There are several functions associated with the class. } \details{ Function \code{icendata} creates an object of class 'icendata', which can be used to save both interval-censored and exact observations. Function \code{is.icendata} simply checks if an object is of class 'icendata'. If \code{x} is a vector, it contains only exact observations, with weights given in \code{w}. If \code{x} is a two-column matrix, it contains interval-censored observations and stores their left and right endpoints in the first and second column, respectively. If the left and right endpoints are equal, then the observation is exact. Weights are provided by \code{w}. If \code{x} is a three-column matrix, it contains interval-censored observations and stores their left and right endpoints in the first and second column, respectively. The weight of each observation is the third-column value multiplied by the corresponding weight value in \code{w}. It is useful to turn interval-censored (and exact) observations into the format imposed by \code{icendata} so that they can be processed in a standardized format by other functions. Also, exact and interval-censored observations are stored separately in this format and can hence be dealt with more easily. Most functions in the package \code{npsurv} first ensure that the data has this format before processing. Observations of zero weights are removed. Identical observations are aggregated. An interval-valued observation is either \eqn{(L_i, R_i]}{(Li, Ri]} if \eqn{L_i < R_i}{Li < Ri}, or \eqn{[L_i, R_i]}{[Li, Ri]} if \eqn{L_i = R_i}{Li = Ri}. } \examples{ data(ap) (x = icendata(ap)) is.icendata(x) data(gastric) icendata(gastric) data(leukemia) i = leukemia[,"group"] == "6-MP" icendata(leukemia[i,1:2]) } \references{ Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood computation for interval-censored data. Computational Statistics & Data Analysis, 52, 2388-2402. Wang, Y. and Fani, S. (2017). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, (in print). } \seealso{ \code{\link{npsurv}}, \code{\link{Uhaz}}. } \author{ Yong Wang } \keyword{class} \keyword{function} npsurv/man/cancer.Rd0000644000176200001440000000212313725061634014111 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv-package.R \docType{data} \name{cancer} \alias{cancer} \title{Breast Retraction Times after Beast Cancer Treatments.} \format{ A data frame with 94 observations and 3 variables: L: left-end points of the interval-censored retraction times; R: right-end points of the interval-censored retraction times; group: either \code{RT} (radiation therapy) or \code{RCT} (radiation therapy plus adjuvant chemotherapy). } \source{ Finkelstein and Wolfe (1985). } \description{ Contains the breast retraction times in months for 94 breast cancer patients who received either radiation therapy or radiation therapy plus adjuvant chemotherapy. } \examples{ data(cancer) i = cancer$group == "RT" plot(npsurv(cancer[i,1:2]), xlim=c(0,60)) plot(npsurv(cancer[!i,1:2]), add=TRUE, col="green3") } \references{ Finkelstein, D. M. and R. A. Wolfe (1985). A semiparametric model for regression analysis of interval-censored failure time data. \emph{Biometrics}, \bold{41}, pp.933-945. } \seealso{ \code{\link{npsurv}}. } \keyword{datasets} npsurv/man/uh.Rd0000644000176200001440000000720213725067072013277 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Uhaz.R \name{uh} \alias{uh} \alias{uh.object} \alias{print.uh} \title{U-shaped Hazard Function} \usage{ uh(alpha, tau, nu, eta, mu, upper=Inf, deg=1, collapse=TRUE) \method{print}{uh}(x, ...) } \arguments{ \item{alpha}{a nonnegative value, for the constant coefficient.} \item{tau}{vector of nonnegative real values, for left knots.} \item{nu}{vector of nonnegative values, for masses associated with the left knots.} \item{eta}{vector of nonnegative real values, for right knots.} \item{mu}{vector of nonnegative real values, for masses associated with the right knots.} \item{upper}{a positive value, at which point the hazard starts to become infinite.} \item{deg}{nonnegative real number for spline degree (i.e., p in the formula below).} \item{collapse}{logical, indicating if identical knots should be collapsed.} \item{x}{an object of class \code{uh}.} \item{...}{other auguments for printing.} } \value{ \code{uh} returns an object of class \code{uh}. It is a list with components \code{alpha}, \code{tau}, \code{nu}, \code{eta}, \code{mu}, \code{upper} and \code{deg}, which store their corresponding values as described above. } \description{ Class \code{uh} can be used to store U-shaped hazard functions. There are a couple of functions associated with the class. } \details{ \code{uh} creates an object of class \code{uh}, which stores a U-shaped hazard function. \code{print.uh} prints an object of class \code{uh}. A U-shape hazard function, as generalized by Wang and Fani (2018), is given by \deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p + sum_{j=1}^m mu_j (t - eta_j)_+^p,} where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, \eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0} is the the spline degree which determines the smoothness of the U-shaped hazard. As p increases, the family of hazard functions becomes increasingly smoother, but at the same time, smaller. When \eqn{p = 0}{p = 0}, the hazard function is U-shaped, as studied by Bray et al. (1967). When \eqn{p = 1}{p = 1}, the hazard function is convex, as studied by Jankowski and Wellner (2009a,b). \code{print.uh} prints an object of class \code{uh}. While \code{alpha}, \code{upper} and \code{deg} are printed as they are, \code{tau} and \code{nu} are printed as a two-column matrix, and so are \code{eta} and \code{mu}. } \examples{ (h0 = uh(3, 2, 3, 4, 5, 7, deg=0)) # deg = 0 plot(h0, ylim=c(0,20)) (h1 = uh(4, 2, 3, 5, 6, 7, deg=1)) # deg = 1 plot(h1, add=TRUE, col="green3") (h2 = uh(1, 1:2, 3:4, 5:6, 7:8, 9, deg=2)) # deg = 2 plot(h2, add=TRUE, col="red3") } \references{ Bray, T. A., Crawford, G. B., and Proschan, F. (1967). \emph{Maximum Likelihood Estimation of a U-shaped Failure Rate Function}. Defense Technical Information Center. Jankowski, H. K. and Wellner, J. A. (2009a). Computation of nonparametric convex hazard estimators via profile methods. \emph{Journal of Nonparametric Statistics}, \bold{21}, 505-518. Jankowski, H. K. and Wellner, J. A. (2009b). Nonparametric estimation of a convex bathtub-shaped hazard function. \emph{Bernoulli}, \bold{15}, 1010-1035. Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, \bold{28}, 187-200. } \seealso{ \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} } \author{ Yong Wang } \keyword{function} npsurv/man/marijuana.Rd0000644000176200001440000000236013725061634014630 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv-package.R \docType{data} \name{marijuana} \alias{marijuana} \title{Angina Pectoris Survival Data} \format{ A data frame with 21 observations and 3 variables: L: left-end point of an interval-censored time; R: right-end point of an interval-censored time; count: number of students in the interval. } \source{ Turnbull and Weiss (1978). See also Klein and Moeschberger (1997), page 17. } \description{ Contains the answers of 191 California high school students to the question: "When did you first use marijuana?". An answer can be an exact age, or "I have never used it", which gives rise to a right-censored observation, or "I have used it but cannot recall just when the first time was", which gives rise to a left-censored observation. } \examples{ data(marijuana) r = Uhaz(marijuana, deg=2) plot(r$h, fn="h") plot(r$h, fn="s") } \references{ Turnbull and Weiss (1978). A likelihood ratio statistic fortesting goodness of fit with randomly censored data. \emph{Biometrics}, \bold{34}, 367-375. Klein and Moeschberger (2003). \emph{Survival Analysis: Techniques for Censored and Truncated Data} (2nd ed.). Springer } \seealso{ \code{\link{npsurv}}. } \keyword{datasets} npsurv/man/acfail.Rd0000644000176200001440000000124313725061634014077 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv-package.R \docType{data} \name{acfail} \alias{acfail} \title{Air Conditioner Failure Data} \format{ A numeric vector storing the failure times. } \source{ Proschan (1963) } \description{ Contains the number of operating hours between successive failure times of the air conditioning systems in Boeing airplanes } \examples{ data(acfail) r = Uhaz(acfail, deg=2) plot(r$h, fn="h") plot(r$h, fn="d") } \references{ Proschan, F. (1963). Theoretical explanation of observed decreasing failure rate. \emph{Technometrics}, \bold{5}, 375-383. } \seealso{ \code{\link{Uhaz}}. } \keyword{datasets} npsurv/man/Deltamatrix.Rd0000644000176200001440000000322013725061634015133 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv.R \name{Deltamatrix} \alias{Deltamatrix} \title{Delta matrix} \usage{ Deltamatrix(LR) } \arguments{ \item{LR}{two-column matrix, each row of which stores an censoring interval of the form \eqn{(L_i, R_i]}{(Li, Ri]}. If \eqn{L_i = }{Li = Ri}\eqn{ R_i}{Li = Ri}, it is an exact observation.} } \value{ A list with components: \item{left}{left endpoints of the maximal intersection intervals.} \item{right}{right endpoints of the maximal intersection intervals.} \item{Delta}{logical matrix, for the Delta matrix.} } \description{ \code{Deltamatrix} computes the Delta matrix, along with maximal intersection intervals, for a set of intervals. } \details{ An intersection interval is a nonempty intersection of any combination of the given intervals, and a maximal intersection interval is an intersection interval that contains no other intersection interval. The Delta matrix is a matrix of indicators (\code{TRUE} or \code{FALSE}). The rows correspond to the given interval-censored observations, and the columns the maximal intersection intervals. A \code{TRUE} value of the (i,j)-th element means that the i-th observation covers the j-th maximal intersection interval, and a \code{FALSE} value means the opposite. } \examples{ (x = cbind(1:5,1:5*3-2)) Deltamatrix(x) } \references{ Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood computation for interval-censored data. \emph{Computational Statistics & Data Analysis}, \bold{52}, 2388-2402. } \seealso{ \code{\link{icendata}}, \code{\link{idf}}. } \author{ Yong Wang } \keyword{function} npsurv/man/leukemia.Rd0000644000176200001440000000254113725061634014456 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv-package.R \docType{data} \name{leukemia} \alias{leukemia} \title{Remission Times for Acute Leukemia Patients} \format{ A data frame with 42 observations and 3 variables: L: left-end points of the interval-censored remission times in weeks; R: right-end points of the interval-censored remission times; group: either 6-MP (6-mercaptopurine) or Placebo. } \source{ Freireich et al. (1963). } \description{ Contains remission times in weeks of 42 acute leukemia patients, who received either the treatment of drug 6-mercaptopurine or the placebo treatment. Each remission time is either exactly observed or right-censored. } \examples{ data(leukemia) i = leukemia[,"group"] == "Placebo" plot(npsurv(leukemia[i,1:2]), xlim=c(0,40), col="green3") # placebo plot(npsurv(leukemia[!i,1:2]), add=TRUE) # 6-MP ## Treat each remission time as interval-censored: x = leukemia ii = x[,1] == x[,2] x[ii,2] = x[ii,1] + 1 plot(npsurv(x[i,1:2]), xlim=c(0,40), col="green3") # placebo plot(npsurv(x[!i,1:2]), add=TRUE) # 6-MP } \references{ Freireich, E. O. et al. (1963). The effect of 6-mercaptopmine on the duration of steroid induced remission in acute leukemia. \emph{Blood}, \bold{21}, 699-716. } \seealso{ \code{\link{npsurv}}. } \keyword{datasets} npsurv/man/idf.Rd0000644000176200001440000000264713725061634013433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv.R \name{idf} \alias{idf} \alias{idf.object} \alias{print.idf} \title{Interval Distribution Function} \usage{ idf(left, right, p) \method{print}{idf}(x, ...) } \arguments{ \item{left, right}{left and right endpoints of intervals on which the distribution function is defined.} \item{p}{probabilities allocated to the intervals. Probability values will be normalized inside the function.} \item{x}{an object of class \code{idf}.} \item{...}{other arguments for printing.} } \value{ \item{left, right}{left and right endpoints of intervals on which the distribution function is defined.} \item{p}{probabilities allocated to the intervals.} } \description{ Class \code{idf} can be used to store a distribution function defined on a set of intervals. There are several functions associated with the class. } \details{ \code{idf} creates an object of class \code{idf}. An \code{idf} object stores a distribution function defined on a set of intervals. When left and right endpoints are identical, the intervals just represent exact points. \code{print.idf} prints an object of class \code{idf} as a three-coumn matrix. } \examples{ idf(1:5, 1:5*3-2, c(1,1,2,2,4)) npsurv(cbind(1:5, 1:5*3-2))$f # NPMLE } \seealso{ \code{\link{icendata}}, \code{\link{Deltamatrix}}, \code{\link{npsurv}}. } \author{ Yong Wang } \keyword{function} npsurv/man/npsurv.Rd0000644000176200001440000000727613725061634014231 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv.R \name{npsurv} \alias{npsurv} \alias{npsurv.object} \title{Nonparametric Survival Function Estimation} \usage{ npsurv(data, w = 1, maxit = 100, tol = 1e-06, verb = 0) } \arguments{ \item{data}{vector or matrix, or an object of class \code{icendata}.} \item{w}{weights or multiplicities of the observations.} \item{maxit}{maximum number of iterations.} \item{tol}{tolerance level for stopping the algorithm. It is used as the threshold on the increase of the log-likelihood after each iteration.} \item{verb}{verbosity level for printing intermediate results at each iteration.} } \value{ An object of class \code{npsurv}, which is a list with components: \item{f}{NPMLE, an object of class \code{idf}.} \item{upper}{largest finite value in the data.} \item{convergence}{= \code{TRUE}, converged successfully; = \code{FALSE}, maximum number of iterations reached.} \item{method}{method used internally, either \code{cnm} or \code{hcnm}.} \item{ll}{log-likelihood value of the NPMLE \code{f}.} \item{maxgrad}{maximum gradient value of the NPMLE \code{f}.} \item{numiter}{number of iterations used.} } \description{ \code{npsurv} computes the nonparametric maximum likelihood esimate (NPMLE) of a survival function for general interval-censored data. } \details{ If \code{data} is a vector, it contains only exact observations, with weights given in \code{w}. If \code{data} is a matrix with two columns, it contains interval-censored observations, with the two columns storing their left and right end-points, respectively. If the left and right end-points are equal, then the observation is exact. Weights are provided by \code{w}. If \code{data} is a matrix with three columns, it contains interval-censored observations, with the first two columns storing their left and right end-points, respectively. The weight of each observation is the third-column value multiplied by the corresponding weight value in \code{w}. The algorithm used for computing the NPMLE is either the constrained Newton method (CNM) (Wang, 2008), or the hierachical constrained Newton method (HCNM) (Wang and Taylor, 2013) when there are a large number of maximal intersection intervals. Inside the function, it examines if data has only right censoring, and if so, the Kaplan-Meier estimate is computed directly by function \code{km}. An interval-valued observation is either \eqn{(L_i, R_i]}{(Li, Ri]} if \eqn{L_i < R_i}{Li < Ri}, or \eqn{[L_i, R_i]}{[Li, Ri]} if \eqn{L_i = R_i}{Li = Ri}. } \examples{ ## all exact observations data(acfail) plot(npsurv(acfail)) ## right-censored (and exact) observations data(gastric) plot(npsurv(gastric)) data(leukemia) i = leukemia[,"group"] == "Placebo" plot(npsurv(leukemia[i,1:2]), xlim=c(0,40), col="blue") # placebo plot(npsurv(leukemia[!i,1:2]), add=TRUE, col="red") # 6-MP ## purely interval-censored data data(ap) plot(npsurv(ap)) data(cancer) cancerRT = with(cancer, cancer[group=="RT",1:2]) plot(npsurv(cancerRT), xlim=c(0,60)) # survival of RT cancerRCT = with(cancer, cancer[group=="RCT",1:2]) plot(npsurv(cancerRCT), add=TRUE, col="green") # survival of RCT } \references{ Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood computation for interval-censored data. \emph{Computational Statistics & Data Analysis}, \bold{52}, 2388-2402. Wang, Y. and Taylor, S. M. (2013). Efficient computation of nonparametric survival functions via a hierarchical mixture formulation. \emph{Statistics and Computing}, \bold{23}, 713-725. } \seealso{ \code{\link{icendata}}, \code{\link{Deltamatrix}}, \code{\link{idf}}, \code{\link{km}}. } \author{ Yong Wang } \keyword{function} npsurv/man/plot.npsurv.Rd0000644000176200001440000001042013725061634015167 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv.R \name{plot.npsurv} \alias{plot.npsurv} \alias{plot.idf} \alias{plotsurvidf} \alias{plotgradidf} \title{Plot Functions for Nonparametric Survival Estimation} \usage{ \method{plot}{npsurv}(x, ...) \method{plot}{idf}(x, data, fn=c("surv","grad"), ...) plotsurvidf(f, style=c("box","uniform","left","right","midpoint"), xlab="Time", ylab="Survival Probability", col="blue3", fill=0, add=FALSE, lty=1, lty.inf=2, xlim, ...) plotgradidf(f, data, w=1, col1="red3", col2="blue3", xlab="Survival Time", ylab="Gradient", xlim, ...) } \arguments{ \item{x}{an object of class \code{npsurv} (i.e., an output of function \code{npsurv}) or an object of class \code{idf}.} \item{...}{arguments for other graphical parameters (see \code{par}).} \item{fn}{either "surv" or "grad", to indicate plotting either the survival or the gradient function.} \item{f}{an object of class \code{idf}.} \item{style}{for how to plot the survival function on a "maximal intersection interval": = \code{box}, plot a rectangle, which shows the uncertainty of probability allocation within the interval; = \code{uniform}, treat it as a uniform distribution and hence the diagonal line of the rectangle is plotted; = \code{left}, plot only the left side of the rectangle; = \code{right}, plot only the right side of the rectangle; = \code{midpoint}, plot a vertical line at the midpoint of the interval.} \item{xlab, ylab}{x- or y-axis label.} \item{add}{= \code{TRUE}, adds the curve to the existing plot; = \code{FALSE}, plots the curve in a new one.} \item{col}{color for all line segments, including box/rectangle borders.} \item{fill}{color for filling a box/rectangle. By default, a lighter semi-transparent color is used.} \item{lty}{line type} \item{lty.inf}{line type for the rectangle that may extend to infinity.} \item{data}{vector or matrix that stores observations, or an object of class \code{icendata}.} \item{w}{additional weights/multiplicities of the observations stored in \code{x}.} \item{col1}{color for drawing maximal intersection intervals allocated with positive probabilities.} \item{col2}{color for drawing all gradients and the maximal intersection intervals allocated with zero probabilities.} \item{xlim}{x-coordinate limit points.} } \description{ Functions for plotting nonparametric survival functions and related ones. } \details{ \code{plot.npsurv} and \code{plot.idf} are wrapper functions that call either \code{plotsurvidf} or \code{plotgradidf}. \code{plotsurvidf} plots the survival function of the nonparametric maximum likelihood estimate (NPMLE). \code{plotgradidf} plots the gradient function of the NPMLE. \code{plotsurvidf} by default chooses a less saturated color for \code{fill} than \code{col}. \code{plotgradidf} plots gradient values as vertical lines located as the left endpoints of the maximal intersection intervals. Each maximal intersection interval is plotted as a wider line on the horizontal zero-gradient line, with a circle to represent the open left endpoint of the interval and a solid point the closed right endpoint of the interval. The maximal intersection intervals allocated with positive probabilities have zero gradients, and hence no vertical lines are drawn for them. } \examples{ data(ap) plot(r<-npsurv(ap)) # survival function plot(r$f, ap, fn="g") # all gradients virtually zeros. data(cancer) cancerRT = with(cancer, cancer[group=="RT",1:2]) plot(rt<-npsurv(cancerRT), xlim=c(0,60)) # survival of RT cancerRCT = with(cancer, cancer[group=="RCT",1:2]) plot(rct<-npsurv(cancerRCT), add=TRUE, col="green3") # survival of RCT ## as uniform dististrbutions. plot(rt, add=TRUE, style="uniform", col="blue3") plot(rct, add=TRUE, style="uniform", col="green3") ## plot gradients; must supply data plot(rt, cancerRT, fn="g") # for group RT plotgradidf(rct$f, cancerRCT) # or, for group RCT } \references{ Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood computation for interval-censored data. \emph{Computational Statistics & Data Analysis}, \bold{52}, 2388-2402. } \seealso{ \code{\link{icendata}}, \code{\link{idf}}, \code{\link{npsurv}}. } \author{ Yong Wang } \keyword{function} npsurv/man/hazuh.Rd0000644000176200001440000000231513725066600013776 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Uhaz.R \name{hazuh} \alias{hazuh} \alias{chazuh} \alias{survuh} \alias{denuh} \title{Distributional Functions given a U-shaped Hazard Function} \usage{ hazuh(t, h) chazuh(t, h) survuh(t, h) denuh(t, h) } \arguments{ \item{t}{time points at which the function is to be evaluated.} \item{h}{an object of class \code{uh}.} } \value{ A numeric vector of the function values. } \description{ Given an object of class \code{uh}: } \details{ \code{hazuh} computes the hazard values; \code{chazuh} computes the cumulative hazard values; \code{survuh} computes the survival function values; \code{denuh} computes the density function values. } \examples{ data(ap) h = Uhaz(icendata(ap), deg=2)$h hazuh(0:15, h) # hazard chazuh(0:15, h) # cumulative hazard survuh(0:15, h) # survival probability denuh(0:15, h) # density } \references{ Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood computation of a U-shaped hazard function. \emph{Statistics and Computing}, \bold{28}, 187-200. } \seealso{ \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} } \author{ Yong Wang } \keyword{function} npsurv/man/ap.Rd0000644000176200001440000000234613725061634013265 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/npsurv-package.R \docType{data} \name{ap} \alias{ap} \title{Angina Pectoris Survival Data} \format{ A data frame with 30 observations and 3 variables: \code{L}: left-end point of an interval-censored retraction time; \code{R}: right-end point of an interval-censored retraction time; \code{count}: number of patients in the interval. } \source{ Lee and Wang (2003), page 92. } \description{ Contains the survival times in years from the time of diagnosis for 2418 male patients with angina pectoris. Some patients are lost to follow-up, hence giving right-censored observations. Each integer-valued survival time is treated as being censored within a one-year interval. } \examples{ data(ap) r = Uhaz(ap, deg=2) # smooth U-shaped hazard plot(r$h, fn="h") # hazard plot(r$h, fn="d") # density # NPMLE and shape-restricted estimation plot(npsurv(ap), fn="s") # survival under no shape restriction plot(r$h, fn="s", add=TRUE) # survival with smooth U-shaped hazard } \references{ Lee, E. T. and Wang, J. W. (2003). \emph{Statistical Methods for Survival Data Analysis}. Wiley. } \seealso{ \code{\link{npsurv}}. } \keyword{datasets} npsurv/DESCRIPTION0000644000176200001440000000142213731664713013327 0ustar liggesusersPackage: npsurv Title: Nonparametric Survival Analysis Version: 0.5-0 Date: 2020-09-19 Author: Yong Wang Maintainer: Yong Wang Depends: lsei Imports: methods Description: Non-parametric survival analysis of exact and interval-censored observations. The methods implemented are developed by Wang (2007) , Wang (2008) , Wang and Taylor (2013) and Wang and Fani (2018) . Encoding: UTF-8 License: GPL (>= 2) URL: https://www.stat.auckland.ac.nz/~yongwang/ RoxygenNote: 7.1.1 NeedsCompilation: no Packaged: 2020-09-19 02:02:58 UTC; yong Repository: CRAN Date/Publication: 2020-09-20 14:40:11 UTC npsurv/R/0000755000176200001440000000000013730756117012022 5ustar liggesusersnpsurv/R/icendata.R0000644000176200001440000001226513725057762013727 0ustar liggesusers######################## # # Interval-censored data # ######################## # # Also allows for exact observations included. ##'Class of Interval-censored Data ##' ##' ##' Class \code{icendata} can be used to store general ##' interval-censored data, which may possibly contain exact ##' observations.There are several functions associated with the ##' class. ##' ##'Function \code{icendata} creates an object of class 'icendata', which can be ##'used to save both interval-censored and exact observations. ##' ##'Function \code{is.icendata} simply checks if an object is of class ##''icendata'. ##' ##' ##'If \code{x} is a vector, it contains only exact observations, with weights ##'given in \code{w}. ##' ##'If \code{x} is a two-column matrix, it contains interval-censored ##'observations and stores their left and right endpoints in the first and ##'second column, respectively. If the left and right endpoints are equal, then ##'the observation is exact. Weights are provided by \code{w}. ##' ##'If \code{x} is a three-column matrix, it contains interval-censored ##'observations and stores their left and right endpoints in the first and ##'second column, respectively. The weight of each observation is the ##'third-column value multiplied by the corresponding weight value in \code{w}. ##' ##'It is useful to turn interval-censored (and exact) observations into the ##'format imposed by \code{icendata} so that they can be processed in a ##'standardized format by other functions. Also, exact and interval-censored ##'observations are stored separately in this format and can hence be dealt ##'with more easily. Most functions in the package \code{npsurv} first ensure ##'that the data has this format before processing. ##' ##'Observations of zero weights are removed. Identical observations are ##'aggregated. ##' ##'An interval-valued observation is either \eqn{(L_i, R_i]}{(Li, Ri]} if ##'\eqn{L_i < R_i}{Li < Ri}, or \eqn{[L_i, R_i]}{[Li, Ri]} if \eqn{L_i = ##'R_i}{Li = Ri}. ##' ##'@aliases icendata is.icendata icendata.object ##'@param x vector or matrix. ##'@param w weights or multiplicities of the observations. ##'@return ##' ##'\item{t}{numeric vector, storing exact observations.} ##' ##'\item{wt}{numeric vector, storing the weights of exact observations.} ##' ##'\item{o}{two-column numeric matrix, storing interval-censored observations.} ##' ##'\item{wo}{numeric vector, storing the weights of interval-censored ##'observations.} ##' ##'\item{i1}{logical vector, indicating whether exact observations are less ##'than \code{upper}.} ##' ##'\item{upper}{the largest finite value of \code{t} and \code{o}.} ##' ##'\item{u}{numeric vector, containing 0 and all unique finite values in ##'\code{t} and \code{o}.} ##'@author Yong Wang ##'@seealso \code{\link{npsurv}}, \code{\link{Uhaz}}. ##'@references ##' ##'Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood ##'computation for interval-censored data. Computational Statistics & Data ##'Analysis, 52, 2388-2402. ##' ##'Wang, Y. and Fani, S. (2017). Nonparametric maximum likelihood computation ##'of a U-shaped hazard function. \emph{Statistics and Computing}, (in print). ##'@keywords class function ##'@examples ##' ##'data(ap) ##'(x = icendata(ap)) ##'is.icendata(x) ##' ##'data(gastric) ##'icendata(gastric) ##' ##'data(leukemia) ##'i = leukemia[,"group"] == "6-MP" ##'icendata(leukemia[i,1:2]) ##' ##'@usage ##'icendata(x, w=1) ##'is.icendata(x) ##' ##'@export icendata ##'@export is.icendata icendata = function(x, w=1) { if(is.null(x)) return(NULL) if(is.icendata(x)) { if(all(w == 1)) return(x) w = rep(w, length = length(x$t) + nrow(x$o)) if(length(x$t) > 0) x$wt = x$wt * w[1:length(x$wt)] if(nrow(x$o) > 0) x$wo = x$wo * w[length(x$wt)+1:nrow(x$o)] return(x) } z = vector("list", 7) names(z) = c("t", "wt", "o", "wo", "i1", "upper", "u") if(is.vector(x)) x = cbind(x, x) if(!is.matrix(x)) x = as.matrix(x) if(ncol(x) == 3) {w = w * x[,3]; x = x[,1:2]} if(length(w) != nrow(x)) w = rep(w, len=nrow(x)) iw = w > 0 w = w[iw] x = x[iw,,drop=FALSE] o = order(x[,1], x[,2]) x = x[o,] w = w[o] id = c(TRUE, diff(x[,1]) > 0 | diff(x[,2]) > 0) id[is.na(id)] = FALSE # for Inf's w = aggregate(w, by=list(group=cumsum(id)), sum)[,2] x = x[id,] i = x[,1] == x[,2] z$t = x[i,1] names(z$t) = NULL z$wt = w[i] z$o = x[!i,1:2,drop=FALSE] dimnames(z$o) = list(NULL, c("L","R")) z$wo = w[!i] z$upper = max(x[,1]) z$i1 = z$t != z$upper z$u = sort(unique(c(0, pmin(c(x[,1], x[,2]), z$upper)))) class(z) = "icendata" z } is.icendata = function(x) "icendata" %in% class(x) # is.rightcensored.icendata = function(x) all(x$o[,2] == Inf) expand.icendata = function(x) { if(!is.icendata(x)) x = icendata(x) z = vector("list", 7) names(z) = c("t", "wt", "o", "wo", "i1", "upper", "u") z$upper = x$upper if(length(x$t) > 0) { z$t = rep(x$t, x$wt) z$wt = rep(1, length(z$t)) z$i1 = z$t != z$upper } else z$t = z$wt = numeric(0) if(nrow(x$o) > 0) { z$o = cbind(rep(x$o[,1], x$wo), rep(x$o[,2], x$wo)) z$wo = rep(1, nrow(z$o)) colnames(z$o) = c("L","R") } else {z$o = matrix(nrow=0, ncol=2); z$wo = numeric(0)} z$u = x$u class(z) = "icendata" z } npsurv/R/npsurv.R0000644000176200001440000007131113725060570013500 0ustar liggesusers# ----------------------------------------------------------------------- # # Nonparametric maximum likelihood estimation from interval-censored data # # ----------------------------------------------------------------------- # ##'Nonparametric Survival Function Estimation ##' ##' ##'\code{npsurv} computes the nonparametric maximum likelihood esimate (NPMLE) ##'of a survival function for general interval-censored data. ##' ##' ##'If \code{data} is a vector, it contains only exact observations, with ##'weights given in \code{w}. ##' ##'If \code{data} is a matrix with two columns, it contains interval-censored ##'observations, with the two columns storing their left and right end-points, ##'respectively. If the left and right end-points are equal, then the ##'observation is exact. Weights are provided by \code{w}. ##' ##'If \code{data} is a matrix with three columns, it contains interval-censored ##'observations, with the first two columns storing their left and right ##'end-points, respectively. The weight of each observation is the third-column ##'value multiplied by the corresponding weight value in \code{w}. ##' ##'The algorithm used for computing the NPMLE is either the constrained Newton ##'method (CNM) (Wang, 2008), or the hierachical constrained Newton method ##'(HCNM) (Wang and Taylor, 2013) when there are a large number of maximal ##'intersection intervals. ##' ##'Inside the function, it examines if data has only right censoring, and if ##'so, the Kaplan-Meier estimate is computed directly by function \code{km}. ##' ##'An interval-valued observation is either \eqn{(L_i, R_i]}{(Li, Ri]} if ##'\eqn{L_i < R_i}{Li < Ri}, or \eqn{[L_i, R_i]}{[Li, Ri]} if \eqn{L_i = ##'R_i}{Li = Ri}. ##' ##'@aliases npsurv npsurv.object ##'@param data vector or matrix, or an object of class \code{icendata}. ##'@param w weights or multiplicities of the observations. ##'@param maxit maximum number of iterations. ##'@param tol tolerance level for stopping the algorithm. It is used as the ##'threshold on the increase of the log-likelihood after each iteration. ##'@param verb verbosity level for printing intermediate results at each ##'iteration. ##'@return ##' ##'An object of class \code{npsurv}, which is a list with components: ##' ##'\item{f}{NPMLE, an object of class \code{idf}.} ##' ##'\item{upper}{largest finite value in the data.} ##' ##'\item{convergence}{= \code{TRUE}, converged successfully; ##' ##'= \code{FALSE}, maximum number of iterations reached.} ##' ##'\item{method}{method used internally, either \code{cnm} or \code{hcnm}.} ##' ##'\item{ll}{log-likelihood value of the NPMLE \code{f}.} ##' ##'\item{maxgrad}{maximum gradient value of the NPMLE \code{f}.} ##' ##'\item{numiter}{number of iterations used.} ##'@author Yong Wang ##'@seealso \code{\link{icendata}}, \code{\link{Deltamatrix}}, ##'\code{\link{idf}}, \code{\link{km}}. ##'@references ##' ##'Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood ##'computation for interval-censored data. \emph{Computational Statistics & ##'Data Analysis}, \bold{52}, 2388-2402. ##' ##'Wang, Y. and Taylor, S. M. (2013). Efficient computation of nonparametric ##'survival functions via a hierarchical mixture formulation. \emph{Statistics ##'and Computing}, \bold{23}, 713-725. ##'@keywords function ##'@examples ##' ##'## all exact observations ##'data(acfail) ##'plot(npsurv(acfail)) ##' ##'## right-censored (and exact) observations ##'data(gastric) ##'plot(npsurv(gastric)) ##' ##'data(leukemia) ##'i = leukemia[,"group"] == "Placebo" ##'plot(npsurv(leukemia[i,1:2]), xlim=c(0,40), col="blue") # placebo ##'plot(npsurv(leukemia[!i,1:2]), add=TRUE, col="red") # 6-MP ##' ##'## purely interval-censored data ##'data(ap) ##'plot(npsurv(ap)) ##' ##'data(cancer) ##'cancerRT = with(cancer, cancer[group=="RT",1:2]) ##'plot(npsurv(cancerRT), xlim=c(0,60)) # survival of RT ##'cancerRCT = with(cancer, cancer[group=="RCT",1:2]) ##'plot(npsurv(cancerRCT), add=TRUE, col="green") # survival of RCT ##' ##'@export npsurv npsurv = function(data, w=1, maxit=100, tol=1e-6, verb=0) { x2 = icendata(data, w) if(nrow(x2$o) == 0 || all(x2$o[,2] == Inf)) { # exact or right-censored only r0 = km(x2) r = list(f=r0$f, upper=max(x2$t, x2$o[,1]), convergence=TRUE, ll=r0$ll, maxgrad=0, numiter=1) return(structure(r, class="npsurv")) } x = rbind(cbind(x2$t, x2$t), x2$o) nx = nrow(x) w = c(x2$wt, x2$wo) wr = sqrt(w) n = sum(w) upper = x2$upper dmat = Deltamatrix(x) left = dmat$left right = dmat$right D = dmat$Delta m = length(left) p = double(m) i = rowSums(D) != 1 j = colSums(D[!i,,drop=FALSE]) > 0 j[c(1,m)] = TRUE repeat { # Initial p must ensure P > 0 jm = which.max(colSums(D[i,,drop=FALSE])) j[jm] = TRUE i[D[,jm]] = FALSE if( sum(i) == 0 ) break } p = colSums(w * D) * j p = p / sum(p) if(m >= 200) { ## Turn to HCNM r = hcnm(w=w, D=D, p0=p, maxit=maxit, tol=tol, verb=verb) j = r$pf > 0 f = idf(left[j], right[j], r$pf[j]) r = list(f=f, upper=upper, convergence=r$convergence, method="hcnm", ll=r$ll, maxgrad=r$maxgrad, numiter=r$numiter) return(structure(r, class="npsurv")) } P = drop(D %*% p) ll = sum( w * log(P) ) converge = FALSE for(i in 1:maxit) { p.old = p ll.old = ll S = D / pmax(P, 1e-100) d = colSums(w * S) dmax = max(d) - n if(verb > 0) { cat("##### Iteration", i, "#####\n") cat("Log-likelihood: ", signif(ll, 6), "\n") } if(verb > 1) cat("Maximum gradient: ", signif(dmax, 6), "\n") if(verb > 2) {cat("Probability vector:\n"); print(p)} j[which(j)-1 + aggregate(d, by=list(group=cumsum(j)), which.max)[,2]] = TRUE pj = pnnls(wr * S[,j,drop=FALSE], wr * 2, sum=1)$x p[j] = pj / sum(pj) alpha = 1 # line search pd = p - p.old lld = sum(d * pd) p.alpha = p repeat { P.alpha = drop(D %*% p.alpha) ll.alpha = sum(w * log(P.alpha)) if(ll.alpha >= ll + alpha * lld * .33) { p = p.alpha; P = P.alpha; ll = ll.alpha; break } if((alpha <- alpha * .5) < 1e-10) break p.alpha = p.old + alpha * pd } j = p > 0 if( ll <= ll.old + tol ) {converge=TRUE; break} } f = idf(left[j], right[j], p[j]) r = list(f=f, upper=upper, convergence=converge, method="cnm", ll=ll, maxgrad=max(crossprod(w/P, D))-n, numiter=i) structure(r, class="npsurv") } # LR matrix of intervals # An interval is either (Li, Ri] if Li < Ri, or [Li, Ri] if Li = Ri. ##'Delta matrix ##' ##' ##'\code{Deltamatrix} computes the Delta matrix, along with maximal ##'intersection intervals, for a set of intervals. ##' ##' ##'An intersection interval is a nonempty intersection of any combination of ##'the given intervals, and a maximal intersection interval is an intersection ##'interval that contains no other intersection interval. ##' ##'The Delta matrix is a matrix of indicators (\code{TRUE} or \code{FALSE}). ##'The rows correspond to the given interval-censored observations, and the ##'columns the maximal intersection intervals. A \code{TRUE} value of the ##'(i,j)-th element means that the i-th observation covers the j-th maximal ##'intersection interval, and a \code{FALSE} value means the opposite. ##' ##'@param LR two-column matrix, each row of which stores an censoring interval ##'of the form \eqn{(L_i, R_i]}{(Li, Ri]}. If \eqn{L_i = }{Li = Ri}\eqn{ ##'R_i}{Li = Ri}, it is an exact observation. ##'@return ##' ##'A list with components: ##' ##'\item{left}{left endpoints of the maximal intersection intervals.} ##' ##'\item{right}{right endpoints of the maximal intersection intervals.} ##' ##'\item{Delta}{logical matrix, for the Delta matrix.} ##'@author Yong Wang ##'@seealso \code{\link{icendata}}, \code{\link{idf}}. ##'@references ##' ##'Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood ##'computation for interval-censored data. \emph{Computational Statistics & ##'Data Analysis}, \bold{52}, 2388-2402. ##'@keywords function ##'@examples ##' ##'(x = cbind(1:5,1:5*3-2)) ##'Deltamatrix(x) ##' ##'@export Deltamatrix Deltamatrix = function(LR) { L = LR[,1] R = LR[,2] ic = L != R # inverval-censored nc = sum(ic) # tol = max(R[R!=Inf]) * 1e-8 if(nc > 0) { L1 = L[ic] + max(R[R!=Inf]) * 1e-8 # open left endpoints LRc = cbind(c(L1, R[ic]), c(rep(0,nc), rep(1,nc)), rep(1:nc, 2)) LRc.o = LRc[order(LRc[,1]),] j = which(diff(LRc.o[,2]) == 1) left = L[ic][LRc.o[j,3]] right = R[ic][LRc.o[j+1,3]] } else left = right = numeric(0) if(nrow(LR) - nc > 0) { ut = unique(L[!ic]) jin = colSums(outer(ut, left, ">") & outer(ut, right, "<=")) > 0 left = c(ut, left[!jin]) # remove those that contain exact obs. right = c(ut, right[!jin]) o = order(left, right) left = left[o] right = right[o] } ## D = outer(L, left, "<=") & outer(R, right, ">=") D = outer(L, left, "<=") & outer(R, right, ">=") & (outer(L, right, "<") | outer(R, left, "==")) dimnames(D) = names(left) = names(right) = NULL list(left=left, right=right, Delta=D) } # interval distribution function, i.e., a distribution function defined on # a set of intervals. # left Left endpoints of the intervals # right Right endpoints of the intervals # p Probability masses allocated to the intervals ##'Interval Distribution Function ##' ##' ##' Class \code{idf} can be used to store a distribution function ##' defined on a set of intervals. There are several functions ##' associated with the class. ##' ##' \code{idf} creates an object of class \code{idf}. An \code{idf} object ##'stores a distribution function defined on a set of intervals. ##' ##' ##'When left and right endpoints are identical, the intervals just ##' represent exact points. ##' ##'\code{print.idf} prints an object of class \code{idf} as a three-coumn ##'matrix. ##' ##'@aliases idf idf.object print.idf ##'@param left,right left and right endpoints of intervals on which the ##'distribution function is defined. ##'@param p probabilities allocated to the intervals. Probability values will ##'be normalized inside the function. ##'@param x an object of class \code{idf}. ##'@param ... other arguments for printing. ##'@return ##' ##'\item{left, right}{left and right endpoints of intervals on which the ##'distribution function is defined.} ##' ##'\item{p}{probabilities allocated to the intervals.} ##'@author Yong Wang ##'@seealso \code{\link{icendata}}, \code{\link{Deltamatrix}}, ##'\code{\link{npsurv}}. ##'@keywords function ##'@examples ##' ##'idf(1:5, 1:5*3-2, c(1,1,2,2,4)) ##'npsurv(cbind(1:5, 1:5*3-2))$f # NPMLE ##' ##'@usage ##'idf(left, right, p) ##'\method{print}{idf}(x, ...) ##' ##'@export idf ##'@export print.idf idf = function(left, right, p) { if(length(left) != length(right)) stop("length(left) != length(right)") names(left) = names(right) = names(p) = NULL p = rep(p, length=length(left)) f = list(left=left, right=right, p=p/sum(p)) structure(f, class="idf") } print.idf = function(x, ...) { print(cbind(left=x$left, right=x$right, p=x$p), ...) } # Kaplan-Meier estimate of the survival function for right-censored data ##'Kaplan-Meier Estimation ##' ##' ##'\code{km} computes the nonparametric maximum likelihood esimate (NPMLE) of a ##'survival function for right-censored data. ##' ##' ##'For details about the arguments, see \code{icendata}. ##' ##'@param data vector or matrix, or an object of class \code{icendata}. ##'@param w weights/multiplicities of observations. ##'@return ##' ##'A list with components: ##' ##'\item{f}{NPMLE, an object of class \code{idf}.} ##' ##'\item{ll}{log-likelihood value of the NPMLE \code{f}.} ##'@author Yong Wang ##'@seealso \code{\link{icendata}}, \code{\link{npsurv}}, \code{\link{idf}}. ##'@references ##' ##'Kaplan, E. L. and Meier, P. (1958). Nonparametric estimation from incomplete ##'observations. \emph{Journal of the American Statistical Association}, ##'\bold{53}, 457-481. ##'@keywords function ##'@examples ##' ##'x = cbind(1:5, c(1,Inf,3,4,Inf)) ##'(f = km(x)$f) ##'plot(f) ##' ##'data(leukemia) ##'i = leukemia[,"group"] == "Placebo" ##'plot(km(leukemia[i,1:2])$f, xlim=c(0,40), col="green3") # placebo ##'plot(km(leukemia[!i,1:2])$f, add=TRUE) # 6-MP ##' ##'@export km km = function(data, w=1) { x = icendata(data, w) if(any(x$o[,2] != Inf)) stop("Not all observations are exact or right-censored") if(nrow(x$o) == 0) { # no right-censored observations f = idf(x$t, x$t, x$wt) ll = sum(x$wt * log(f$p)) return(list(f=f, ll=ll)) } c = colSums(x$wo * outer(x$o[,1], x$t, "<")) n = sum(x$wt, x$wo) # number of observations r = n - c - c(0,cumsum(x$wt))[1:length(x$t)] # no. at risk S = cumprod(1 - x$wt/r) # survival prob. # tab = cbind(x$t, x$wt, c, r, S) p = rev(diff(rev(c(1,S,0)))) dc = x$wt + c if(max(x$t) > max(x$o[,1])) { f = idf(x$t, x$t, p[-length(p)]) ll = sum( x$wt * log(f$p) ) } else { f = idf(c(x$t,max(x$o[,1])), c(x$t,Inf), p) ll = sum(c(x$wt, n - sum(x$wt)) * log(f$p)) } list(f=f, ll=ll) } #### Plot functions ##'Plot Functions for Nonparametric Survival Estimation ##' ##'Functions for plotting nonparametric survival functions and related ones. ##' ##'\code{plot.npsurv} and \code{plot.idf} are wrapper functions that call ##'either \code{plotsurvidf} or \code{plotgradidf}. ##' ##'\code{plotsurvidf} plots the survival function of the nonparametric maximum ##'likelihood estimate (NPMLE). ##' ##'\code{plotgradidf} plots the gradient function of the NPMLE. ##' ##' ##'\code{plotsurvidf} by default chooses a less saturated color for \code{fill} ##'than \code{col}. ##' ##'\code{plotgradidf} plots gradient values as vertical lines located as the ##'left endpoints of the maximal intersection intervals. Each maximal ##'intersection interval is plotted as a wider line on the horizontal ##'zero-gradient line, with a circle to represent the open left endpoint of the ##'interval and a solid point the closed right endpoint of the interval. The ##'maximal intersection intervals allocated with positive probabilities have ##'zero gradients, and hence no vertical lines are drawn for them. ##' ##'@aliases plot.npsurv plot.idf plotsurvidf plotgradidf ##'@param x an object of class \code{npsurv} (i.e., an output of function ##'\code{npsurv}) or an object of class \code{idf}. ##'@param fn either "surv" or "grad", to indicate plotting either the survival ##'or the gradient function. ##'@param f an object of class \code{idf}. ##'@param style for how to plot the survival function on a "maximal ##'intersection interval": ##' ##'= \code{box}, plot a rectangle, which shows the uncertainty of probability ##'allocation within the interval; ##' ##'= \code{uniform}, treat it as a uniform distribution and hence the diagonal ##'line of the rectangle is plotted; ##' ##'= \code{left}, plot only the left side of the rectangle; ##' ##'= \code{right}, plot only the right side of the rectangle; ##' ##'= \code{midpoint}, plot a vertical line at the midpoint of the interval. ##' ##'@param xlab,ylab x- or y-axis label. ##'@param add = \code{TRUE}, adds the curve to the existing plot; ##' ##'= \code{FALSE}, plots the curve in a new one. ##'@param col color for all line segments, including box/rectangle borders. ##'@param fill color for filling a box/rectangle. By default, a lighter ##'semi-transparent color is used. ##'@param lty line type ##'@param lty.inf line type for the rectangle that may extend to infinity. ##'@param data vector or matrix that stores observations, or an object of class ##'\code{icendata}. ##'@param w additional weights/multiplicities of the observations stored in ##'\code{x}. ##'@param col1 color for drawing maximal intersection intervals allocated with ##'positive probabilities. ##'@param col2 color for drawing all gradients and the maximal intersection ##'intervals allocated with zero probabilities. ##'@param xlim x-coordinate limit points. ##'@param ... arguments for other graphical parameters (see \code{par}). ##'@author Yong Wang ##'@seealso \code{\link{icendata}}, \code{\link{idf}}, \code{\link{npsurv}}. ##'@references ##' ##'Wang, Y. (2008). Dimension-reduced nonparametric maximum likelihood ##'computation for interval-censored data. \emph{Computational Statistics & ##'Data Analysis}, \bold{52}, 2388-2402. ##'@keywords function ##'@examples ##' ##'data(ap) ##'plot(r<-npsurv(ap)) # survival function ##'plot(r$f, ap, fn="g") # all gradients virtually zeros. ##' ##'data(cancer) ##'cancerRT = with(cancer, cancer[group=="RT",1:2]) ##'plot(rt<-npsurv(cancerRT), xlim=c(0,60)) # survival of RT ##'cancerRCT = with(cancer, cancer[group=="RCT",1:2]) ##'plot(rct<-npsurv(cancerRCT), add=TRUE, col="green3") # survival of RCT ##'## as uniform dististrbutions. ##'plot(rt, add=TRUE, style="uniform", col="blue3") ##'plot(rct, add=TRUE, style="uniform", col="green3") ##' ##'## plot gradients; must supply data ##'plot(rt, cancerRT, fn="g") # for group RT ##'plotgradidf(rct$f, cancerRCT) # or, for group RCT ##' ##'@usage ##'\method{plot}{npsurv}(x, ...) ##'\method{plot}{idf}(x, data, fn=c("surv","grad"), ...) ##'plotsurvidf(f, style=c("box","uniform","left","right","midpoint"), ##' xlab="Time", ylab="Survival Probability", col="blue3", fill=0, ##' add=FALSE, lty=1, lty.inf=2, xlim, ...) ##'plotgradidf(f, data, w=1, col1="red3", col2="blue3", ##' xlab="Survival Time", ylab="Gradient", xlim, ...) ##' ##'@export plot.npsurv ##'@export plot.idf ##'@export plotsurvidf ##'@export plotgradidf plot.npsurv = function(x, ...) plot(x$f, ...) plot.idf = function(x, data, fn=c("surv","grad"), ...) { fn = match.arg(fn) fnR = getFunction(paste("plot",fn,"idf",sep="")) switch(fn, "surv" = fnR(x, ...), "grad" = fnR(x, data, ...) ) } plotgradidf = function(f, data, w=1, col1="red3", col2="blue3", xlab="Survival Time", ylab="Gradient", xlim, ...) { x2 = icendata(data, w) x = rbind(cbind(x2$t, x2$t), x2$o) w = c(x2$wt, x2$wo) dmat = Deltamatrix(x) D = dmat$Delta if(missing(xlim)) { upper = max(dmat$left, dmat$right[f$right 0 ms = sum(j) points(dmat$left[!j], rep(0,m-ms), pch=1, col=col2, cex=1) points(dmat$right[!j], rep(0, m-ms), pch=20, col=col2, cex=.8) segments(dmat$left[!j], rep(0, m-ms), pmin(dmat$right[!j], xlim[2]), rep(0, m-ms), col=col2, lwd=3) points(dmat$left[j], rep(0,ms), pch=1, col=col1, cex=1) points(dmat$right[j], rep(0, ms), pch=20, col=col1, cex=.8) segments(dmat$left[j], rep(0, ms), pmin(dmat$right[j], xlim[2]), rep(0, ms), col=col1, lwd=3) } plotsurvidf = function(f, style=c("box","uniform","left","right","midpoint"), xlab="Time", ylab="Survival Probability", col="blue3", fill=0, add=FALSE, lty=1, lty.inf=2, xlim, ...) { style = match.arg(style) k = length(f$left) S = 1 - cumsum(f$p) upper = max(f$left, f$right[f$right != Inf]) if(max(f$right) == Inf) point.inf = upper * 1.2 else point.inf = upper if( missing(xlim) ) xlim = c(0, point.inf) m = length(f$p) if(!is.na(fill) && fill==0) { fill.hsv = drop(rgb2hsv(col2rgb(col))) * c(1, .3, 1) fill = hsv(fill.hsv[1], fill.hsv[2], fill.hsv[3], .3) } switch(style, box = { d = c(f$left[1], rep(f$right, rep(2,k)), f$right[k]) # right s = rep(c(1,S), rep(2,k+1)) if(f$right[k] == Inf) d[2*k] = upper else d[2*k+2] = upper if( !add ) plot(d, s, type="n", col=col, xlim=xlim, xlab=xlab, ylab=ylab, lty=lty, ...) if(style == "box") { Sc = c(1, S) j = which(f$right > f$left) rect(f$left[j], Sc[j+1], f$right[j], Sc[j], border=col, col=fill) } lines(d, s, col=col, lty=lty, ...) lines(c(upper, point.inf), c(S[k-1],S[k-1]), col=col, lty=lty.inf) if(f$right[k] != Inf) { # left d = rep(c(f$left,f$right[k]), rep(2,k+1)) s = c(1,rep(S, rep(2,k)),0) } else { d = rep(f$left, c(rep(2,k-1), 1)) s = c(1,rep(S[-k], rep(2,k-1))) } add = TRUE }, left = { d = rep(c(f$left,f$right[k]), rep(2,k+1)) s = c(1,rep(S, rep(2,k)),0) d[2*k+2] = upper }, right = { d = c(f$left[1], rep(f$right, rep(2,k)), f$right[k]) s = rep(c(1,S), rep(2,k+1)) if(f$right[k] == Inf) d[2*k] = upper else d[2*k+2] = upper }, midpoint = { d1 = (f$left + f$right) / 2 d = c(f$left[1], rep(d1, rep(2,k)), f$right[k]) if(f$right[k] == Inf) d[2*k] = upper else d[2*k+2] = upper s = rep(c(1,S), rep(2,k+1)) }, uniform = { d = c(rbind(f$left,f$right), rep(f$right[k],2)) if(f$right[k] == Inf) d[2*k] = upper else d[2*k+2] = upper s = c(1,rep(S, rep(2,k)),S[k]) } ) if( add ) lines(d, s, col=col, lty=lty, ...) else plot(d, s, type="l", col=col, xlim=xlim, xlab=xlab, ylab=ylab, lty=lty, ...) abline(h=0, col="black") lines(c(0,f$left[1]), c(1,1), col=col) if(f$right[k] < Inf) lines(c(upper, point.inf), rep(0,2), col=col, lty=lty) else points(upper, S[k-1], col=col, pch=20) } ## ========================================================================== ## ## Hierarchical CNM: a variant of the Constrained Newton Method for finding ## the NPMLE survival function of a data set containing interval censoring. ## This is a new method to build on those in the Icens and MLEcens ## packages. It uses the idea of block subsets of the S matrix to move ## probability mass among blocks of candidate support intervals. ## ## Usage (parameters and return value) is similar to the methods in package ## Icens, although note the transposed clique matrix. ## ## Arguments: ## data: Data ## w: Weights ## D: Clique matrix, n*m (note, transposed c.f. Icens::EMICM, ## MLEcens::reduc). The clique matrix may contain conditional ## probabilities rather than just membership flags, for use in HCNM ## recursively calling itself. ## p0: Vector (length m) of initial estimates for the probabilities of ## the support intervals. ## maxit: Maximum number of iterations to perform ## tol: Tolerance for the stopping condition (in log-likelihood value) ## blockpar: ## NA or NULL means choose a value based on the data (using n and r) ## ==0 means same as cnm (don't do blocks) ## <1 means nblocks is this power of sj, e.g. 0.5 for sqrt ## >1 means exactly this block size (e.g. 40) ## recurs.maxit: For internal use only: maximum number of iterations in ## recursive calls ## depth: For internal use only: depth of recursion ## verb: For internal use only: depth of recursion ## ## Author: Yong Wang and Stephen S. Taylor ## ## Reference: Wang, Y. and Taylor, S. M. (2013). Efficient computation of ## nonparametric survival functions via a hierarchical mixture ## formulation. Statistics and Computing, 23, 713-725. ## ## ========================================================================== hcnm = function(data, w=1, D=NULL, p0=NULL, maxit=100, tol=1e-6, blockpar=NULL, recurs.maxit=2, depth=1, verb=0) { if(missing(D)) { x2 = icendata(data, w) if(nrow(x2$o) == 0 || all(x2$o[,2] == Inf)) { # exact or right-censored only r0 = km(x2) r = list(f=r0$f, convergence=TRUE, ll=r0$ll, maxgrad=0, numiter=1) class(r) = "npsurv" return(r) } x = rbind(cbind(x2$t, x2$t), x2$o) nx = nrow(x) w = c(x2$wt, x2$wo) dmat = Deltamatrix(x) left = dmat$left right = dmat$right intervals = cbind(left, right) D = dmat$Delta } else { if (missing(p0)) stop("Must provide 'p0' with D.") if (!missing(data)) warning("D and data both provided. LR ignored!") nx = nrow(D) w = rep(w, length=nx) intervals = NULL } n = sum(w) wr = sqrt(w) converge = FALSE m = ncol(D) m1 = 1:m nblocks = 1 # maxdepth = depth i = rowSums(D) == 1 r = mean(i) # Proportion of exact observations if(is.null(p0)) { ## Derive an initial p vector. j = colSums(D[i,,drop=FALSE]) > 0 while(any(c(FALSE,(i <- rowSums(D[,j,drop=FALSE])==0)))) { j[which.max(colSums(D[i,,drop=FALSE]))] = TRUE } p = colSums(w * D) * j } else { if(length(p <- p0) != m) stop("Argument 'p0' is the wrong length.") } p = p / sum(p) P = drop(D %*% p) ll = sum(w * log(P)) evenstep = FALSE for(iter in 1:maxit) { p.old = p ll.old = ll S = D / P g = colSums(w * S) dmax = max(g) - n if(verb > 0) { cat("##### Iteration", i, "#####\n") cat("Log-likelihood: ", signif(ll, 6), "\n") } if(verb > 1) cat("Maximum gradient: ", signif(dmax, 6), "\n") if(verb > 2) {cat("Probability vector:\n"); print(p)} j = p > 0 if(depth==1) { s = unique(c(1,m1[j],m)) if (length(s) > 1) for (l in 2:length(s)) { j[s[l-1] + which.max(g[s[l-1]:s[l]]) - 1] = TRUE } } sj = sum(j) ## BW: matrix of block weights: sj rows, nblocks columns if(is.null(blockpar) || is.na(blockpar)) ## Default blockpar based on log(sj) iter.blockpar = ifelse(sj < 30, 0, 1 - log(max(20,10*log(sj/100)))/log(sj)) else iter.blockpar = blockpar if(iter.blockpar==0 | sj < 30) { nblocks = 1 BW = matrix(1, nrow=sj, ncol=1) } else { nblocks = max(1, if(iter.blockpar>1) round(sj/iter.blockpar) else floor(min(sj/2, sj^iter.blockpar))) i = seq(0, nblocks, length=sj+1)[-1] if(evenstep) { nblocks = nblocks + 1 BW = outer(round(i)+1, 1:nblocks, "==") } else BW = outer(ceiling(i), 1:nblocks, "==") storage.mode(BW) = "numeric" } for(block in 1:nblocks) { jj = logical(m) jj[j] = BW[,block] > 0 sjj = sum(jj) if (sjj > 1 && (delta <- sum(p.old[jj])) > 0) { Sj = S[,jj] res = pnnls(wr * Sj, wr * drop(Sj %*% p.old[jj]) + wr, sum=delta) if (res$mode > 1) warning("Problem in pnnls(a,b)") p[jj] = p[jj] + BW[jj[j],block] * (res$x * (delta / sum(res$x)) - p.old[jj]) } } ## Maximise likelihood along the line between p and p.old p.gap = p - p.old # vector from old to new estimate ## extrapolated rise in ll, based on gradient at old estimate ll.rise.gap = sum(g * p.gap) alpha = 1 p.alpha = p ll.rise.alpha = ll.rise.gap repeat { P = drop(D %*% p.alpha) ll = sum(w * log(P)) if(ll >= ll.old && ll + ll.rise.alpha <= ll.old) { p = p.alpha # flat land reached converge = TRUE break } if(ll > ll.old && ll >= ll.old + ll.rise.alpha * .33) { p = p.alpha # Normal situation: new ll is higher break } if((alpha <- alpha * 0.5) < 1e-10) { p = p.old P = drop(D %*% p) ll = ll.old converge = TRUE break } p.alpha = p.old + alpha * p.gap ll.rise.alpha = alpha * ll.rise.gap } if(converge) break if (nblocks > 1) { ## Now jiggle p around among the blocks Q = sweep(BW,1,p[j],"*") # Matrix of weighted probabilities: [sj,nblocks] q = colSums(Q) # its column sums (total in each block) ## Now Q is n*nblocks Matrix of probabilities for mixture components Q = sweep(D[,j] %*% Q, 2, q, "/") if (any(q == 0)) { warning("A block has zero probability!") } else { ## Recursively call HCNM to allocate probability among the blocks res = hcnm(w=w, D=Q, p0=q, blockpar=iter.blockpar, maxit=recurs.maxit, recurs.maxit=recurs.maxit, depth=depth+1) # maxdepth = max(maxdepth, res$maxdepth) if (res$ll > ll) { p[j] = p[j] * (BW %*% (res$pf / q)) P = drop(D %*% p) ll = sum(w * log(P)) # should match res$lval } } } if(iter > 2) if( ll <= ll.old + tol ) {converge=TRUE; break} evenstep = !evenstep } list(pf=p, intervals=intervals, convergence=converge, method="hcnm", ll=ll, maxgrad=max(crossprod(w/P, D))-n, numiter=iter) } npsurv/R/Uhaz.R0000644000176200001440000013340413725067050013054 0ustar liggesusers############################################ # Estimation of a U-shaped Hazard Function # ############################################ ##'U-shaped Hazard Function Estimation ##' ##' ##'\code{Uhaz} computes the nonparametric maximum likelihood esimate (NPMLE) of ##'a U-shaped hazard function from exact or interval-censored data, or a mix of ##'the two types of data. ##' ##' ##'If \code{data} is a vector, it contains only exact observations, with ##'weights given in \code{w}. ##' ##'If \code{data} is a matrix with two columns, it contains interval-censored ##'observations, with the two columns storing their left and right end-points, ##'respectively. If the left and right end-points are equal, then the ##'observation is exact. Weights are provided by \code{w}. ##' ##'If \code{data} is a matrix with three columns, it contains interval-censored ##'observations, with the first two columns storing their left and right ##'end-points, respectively. The weight of each observation is the third-column ##'value multiplied by the corresponding weight value in \code{w}. ##' ##'The algorithm used for the computing the NPMLE of a hazard function under ##'the U-shape restriction is is proposed by Wang and Fani (2015). Such a ##'hazard function is given by ##' ##'A U-shaped hazard function is given by ##' ##'\deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p ##' + sum_{j=1}^m mu_j (t - eta_j)_+^p,} ##' ##'where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, ##'\eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0} is the the spline degree which ##'determines the smoothness of the U-shaped hazard. As p increases, the family ##'of hazard functions becomes increasingly smoother, but at the time, smaller. ##'When p = 0, the hazard function is U-shaped, as studied by Bray et al. ##'(1967). When p = 1, the hazard function is convex, as studied by Jankowski ##'and Wellner (2009a,b). ##' ##'Note that \code{deg} (i.e., p in the above mathematical display) can take on ##'any nonnegative real value. ##' ##'@aliases Uhaz Uhaz.object ##'@param data vector or matrix, or an object of class \code{icendata}. ##'@param w weights or multiplicities of the observations. ##'@param deg nonnegative real number for spline degree (i.e., p in the formula ##'below). ##'@param maxit maximum number of iterations. ##'@param tol tolerance level for stopping the algorithm. It is used as the ##'threshold on the increase of the log-likelihood after each iteration. ##'@param verb verbosity level for printing intermediate results in each ##'iteration. ##'@return ##' ##'An object of class \code{Uhaz}, which is a list with components: ##' ##'\item{convergence}{= \code{TRUE}, converged successfully; ##' ##'= \code{FALSE}, maximum number of iterations reached.} ##' ##'\item{grad}{gradient values at the knots.} ##' ##'\item{numiter}{number of iterations used.} ##' ##'\item{ll}{log-likelihood value of the NPMLE \code{h}.} ##' ##'\item{h}{NPMLE of the U-shaped hazard function, an object of class ##'\code{uh}.} ##'@author Yong Wang ##'@seealso \code{\link{icendata}}, \code{\link{nzmort}}. ##'@references ##' ##'Bray, T. A., Crawford, G. B., and Proschan, F. (1967). \emph{Maximum ##'Likelihood Estimation of a U-shaped Failure Rate Function}. Defense ##'Technical Information Center. ##' ##'Jankowski, H. K. and Wellner, J. A. (2009a). Computation of nonparametric ##'convex hazard estimators via profile methods. \emph{Journal of Nonparametric ##'Statistics}, \bold{21}, 505-518. ##' ##'Jankowski, H. K. and Wellner, J. A. (2009b). Nonparametric estimation of a ##'convex bathtub-shaped hazard function. \emph{Bernoulli}, \bold{15}, ##'1010-1035. ##' ##'Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood ##' computation of a U-shaped hazard function. \emph{Statistics and ##' Computing}, \bold{28}, 187-200. ##' ##'@keywords function ##'@examples ##' ##'## Interval-censored observations ##'data(ap) ##'(r = Uhaz(ap, deg=0)) ##'plot(r, ylim=c(0,.3), col=1) ##'for(i in 1:6) plot(Uhaz(ap, deg=i/2), add=TRUE, col=i+1) ##'legend(15, 0.01, paste0("deg = ", 0:6/2), lwd=2, col=1:7, xjust=1, yjust=0) ##' ##'## Exact observations ##'data(nzmort) ##'x = with(nzmort, nzmort[ethnic=="maori",])[,1:2] # Maori mortality ##'(h0 = Uhaz(x[,1]+0.5, x[,2], deg=0)$h) # U-shaped hazard ##'(h1 = Uhaz(x[,1]+0.5, x[,2], deg=1)$h) # convex hazard ##'(h2 <- Uhaz(x[,1]+0.5, x[,2], deg=2)$h) # smooth U-shaped hazard ##' ##'plot(h0, pch=2) # plot hazard functions ##'plot(h1, add=TRUE, col="green3", pch=1) ##'plot(h2, add=TRUE, col="red3", pch=19) ##' ##'age = 0:max(x[,1]) # plot densities ##'count = integer(length(age)) ##'count[x[,"age"]+1] = x[,"deaths"] ##'barplot(count/sum(count), space=0, col="lightgrey") ##'axis(1, pos=NA, at=0:10*10) ##'plot(h0, fn="d", add=TRUE, pch=2) ##'plot(h1, fn="d", add=TRUE, col="green3", pch=1) ##'plot(h2, fn="d", add=TRUE, col="red3", pch=19) ##' ##'plot(h0, fn="s", pch=2) # plot survival functions ##'plot(h1, fn="s", add=TRUE, col="green3", pch=1) ##'plot(h2, fn="s", add=TRUE, col="red3", pch=19) ##' ##'## Exact and right-censored observations ##'data(gastric) ##'plot(h0<-Uhaz(gastric, deg=0)$h) # plot hazard functions ##'plot(h1<-Uhaz(gastric, deg=1)$h, add=TRUE, col="green3") ##'plot(h2<-Uhaz(gastric, deg=2)$h, add=TRUE, col="red3") ##' ##'plot(npsurv(gastric), fn="s", col="grey") # plot survival functions ##'plot(h0, fn="s", add=TRUE) ##'plot(h1, fn="s", add=TRUE, col="green3") ##'plot(h2, fn="s", add=TRUE, col="red3") ##' ##'@export Uhaz Uhaz = function(data, w=1, deg=1, maxit=100, tol=1e-6, verb=0) { x = icendata(data, w) h = uh.initial(x, deg) attr(h, "ll") = logLikuh(h, x) expdH = NULL bc = TRUE # boundary change convergence = 1 for(i in 1:maxit){ h.old = h if(nrow(x$o) > 0) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) maxima = maxgrad(h, x, expdH, bc=bc) np1 = maxima$np1 np2 = maxima$np2 h = uh(h$alpha, c(h$tau, np1), c(h$nu, double(length(np1))), c(h$eta, np2), c(h$mu, double(length(np2))), h$upper, h$deg, collapse=TRUE) r = updatemass(h, x, expdH, tol=tol) h = r$h if(h$deg == 0) {h = simplify(h); attr(h, "ll") = logLikuh(h, x)} if(verb>0) { cat("##### Iteration", i, "#####\n") cat("Log-likelihood: ", signif(attr(h,"ll"), 6), "\n") if(verb>1) cat("Gradient values: ", signif(dlogLik(h, x), 6), "\n") if(verb>2) {cat("hazard function:\n"); print(h)} } if(r$convergence == 1) bc = FALSE # backtracking failed. else if(attr(h, "ll") <= attr(h.old, "ll") + tol) {convergence = 0; break} } r = list(convergence=convergence, grad=dlogLik(h, x), numiter=i, ll=attr(h, "ll"), h=h) class(r) = "Uhaz" r } # Update masses updatemass = function(h, x, expdH=NULL, tol=1e-10) { tau = h$tau k = length(tau) j2 = h$eta != h$upper eta = h$eta = h$eta[j2] h$mu = h$mu[j2] m = length(eta) p = h$deg D1 = D2 = NULL t1 = x$t[x$i1] n1 = length(t1) if(n1 > 0) { tau.r = rep(tau, rep.int(n1,k)) dim(tau.r) = c(n1, k) if(p > 0) tau.t = pmax(tau.r - t1, 0) if(m > 0) { eta.r = rep(eta, rep.int(n1,m)) dim(eta.r) = c(n1, m) if(p > 0) t.eta = pmax(t1 - eta.r, 0) } D1 = switch(as.character(p), "0" = cbind(1, tau.r >= t1, if(m>0) t1 >= eta.r else NULL) / hazuh(t1, h), "1" = cbind(1, tau.t, if(m>0) t.eta else NULL) / hazuh(t1, h), "2" = cbind(1, tau.t * tau.t, if(m>0) t.eta * t.eta else NULL) / hazuh(t1, h), cbind(1, tau.t^p, if(m>0) t.eta^p else NULL) / hazuh(t1, h) ) } n2 = nrow(x$o) if(n2 > 0) { if(is.null(expdH)) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) delta = sqrt(expdH) / (1 - expdH) tau.r1 = rep(tau, rep.int(n2,k)) dim(tau.r1) = c(n2, k) tau.x1 = pmax(tau.r1 - x$o[,1], 0) tau.x2 = pmax(tau.r1 - x$o[,2], 0) xd0 = x$o[,1] - x$o[,2] xd1 = switch(as.character(p), "0" = tau.x2 - tau.x1, "1" = .5 * (tau.x2 * tau.x2 - tau.x1 * tau.x1), "2" = (tau.x2 * tau.x2 * tau.x2 - tau.x1 * tau.x1 * tau.x1) / 3, (tau.x2^(p+1) - tau.x1^(p+1)) / (p+1) ) if(m > 0) { eta.r2 = rep(eta, rep.int(n2,m)) dim(eta.r2) = c(n2, m) x1.eta = pmax(x$o[,1] - eta.r2, 0) x2.eta = pmax(x$o[,2] - eta.r2, 0) xd2 = switch(as.character(p), "0" = x1.eta - x2.eta, "1" = .5 * (x1.eta * x1.eta - x2.eta * x2.eta), "2" = (x1.eta * x1.eta * x1.eta - x2.eta * x2.eta * x2.eta) / 3, (x1.eta^(p+1) - x2.eta^(p+1)) / (p+1) ) } else xd2 = NULL D2 = cbind(xd0, xd1, xd2) * delta D2[delta == 0] = 0 } D = rbind(D1, D2) * sqrt(c(x$wt[x$i1], x$wo)) H = crossprod(D) # Choleski decomposition v = sqrt(diag(H)) jv = v != 0 Hv = H[jv,jv,drop=FALSE] / tcrossprod(v[jv]) diag(Hv) = diag(Hv) + 1e-10 Rv = chol(Hv) gv = dlogLik(h, x, expdH, interior=TRUE)[jv] / v[jv] plus = forwardsolve(Rv, gv, upper.tri=TRUE, transpose=TRUE) par = c(h$alpha, h$nu, h$mu[j2])[jv] * v[jv] w.new = double(length(v)) w.new[jv] = nnls(Rv, Rv %*% par + plus)$x / v[jv] alpha = w.new[1] nu = if(k > 0) w.new[2:(k+1)] else numeric() mu = if(m > 0) w.new[(k+2):(k+m+1)] else numeric() newh = uh(alpha=alpha, tau=h$tau, nu=nu, eta=h$eta, mu=mu, upper=x$upper, h$deg, collapse=FALSE) if(h$deg == 0) b = backtrack(h, newh, x, expdH, alpha=0) else b = backtrack(h, newh, x, expdH) newh = b$h2 j1 = newh$nu != 0 j2 = newh$mu != 0 h2 = uh(newh$alpha, newh$tau[j1], newh$nu[j1], newh$eta[j2], newh$mu[j2], upper=h$upper, h$deg, collapse=FALSE) h = collapse(h2, x, tol=pmax(tol,1e-10)) list(h=h, convergence=b$convergence) } # Backtracking line search. h and h2 must have the same knots backtrack = function(h, h2, x, expdH, tol=1e-10, alpha=0.33){ j = h$eta != h$upper h2$eta = h$eta = h$eta[j] h$mu = h$mu[j] h2$mu = h2$mu[j] ll.h = logLikuh(h, x) d = c(h2$alpha - h$alpha, h2$nu - h$nu, h2$mu - h$mu) g = alpha * sum(dlogLik(h, x, expdH) * d) convergence = 0 r = 1 repeat { hr = uh((1-r) * h$alpha + r * h2$alpha, h$tau, (1-r) * h$nu + r * h2$nu, h$eta, (1-r)*h$mu + r * h2$mu, upper=h$upper, deg=h$deg) ll.hr = logLikuh(hr, x) if (ll.hr >= ll.h + r * g) {convergence =0; break} r = 0.5 * r if(r < tol) {r = 0; hr = h; ll.h2 = ll.h; convergence = 1; break} } attr(h2, "ll") = ll.hr list(h2=hr, r=r, convergence=convergence) } # Zeroth gradient function grad0 = function(h, x, expdH=NULL) { if(length(x$t) > 0) d0 = sum(x$wt[x$i1] / hazuh(x$t[x$i1], h)) - sum(x$wt * x$t) else d0 = 0 if(nrow(x$o) > 0) { if(is.null(expdH)) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) Delta = expdH / (1 - expdH) xd = x$o[,1] - x$o[,2] xd[Delta == 0] = 0 d0 = d0 - sum(x$wo * (x$o[,1] + xd * Delta)) } d0 } # First gradient function grad1 = function(tau, h, x, expdH=NULL, order=0) { g = vector("list", length(order)) names(g) = paste("d", order, sep="") g[1:length(g)] = 0 if(length(tau) == 0) return(NULL) m = length(tau) n1 = length(x$t) p = h$deg if(n1 > 0) { # for exact observations tau.r1 = rep(tau, rep.int(n1,m)) dim(tau.r1) = c(n1,m) ind = tau.r1 >= x$t tau.t = pmax(tau.r1 - x$t, 0) if(0 %in% order) { g$d0 = switch(as.character(p), "0" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), ind[x$i1,,drop=FALSE]) - crossprod(x$wt, tau.r1 - tau.t))[1,], "1" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t[x$i1,,drop=FALSE]) - 0.5 * crossprod(x$wt, tau.r1 * tau.r1 - tau.t * tau.t))[1,], "2" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t[x$i1,,drop=FALSE]^2) - crossprod(x$wt, tau.r1*tau.r1*tau.r1 - tau.t*tau.t*tau.t) / 3)[1,], (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t[x$i1,,drop=FALSE]^p) - crossprod(x$wt, tau.r1^(p+1) - tau.t^(p+1))[1,] / (p+1))[1,] ) } if(1 %in% order) { g$d1 = switch(as.character(p), "0" = double(m), "1" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), ind[x$i1,,drop=FALSE]) - crossprod(x$wt, tau.r1 - tau.t))[1,], "2" = (2 * crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t[x$i1,,drop=FALSE]) - crossprod(x$wt, tau.r1 * tau.r1 - tau.t * tau.t))[1,], { tau.t.pm1 = tau.t[x$i1,,drop=FALSE]^(p-1) if(p < 1) tau.t.pm1[tau.t.pm1 == Inf] = 0 (p * crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), tau.t.pm1) - crossprod(x$wt, tau.r1^p - tau.t^p))[1,] } ) } if(2 %in% order) { g$d2 = switch(as.character(p), "0" = double(m), "1" = - sum(x$wt) + crossprod(x$wt, ind)[1,], "2" = (2 * crossprod(x$wt[x$i1], ind[x$i1,,drop=FALSE]) - crossprod(x$wt, tau.r1 - tau.t))[1,], { tau.t1.pm2 = tau.t[x$i1,,drop=FALSE]^(p-2) if(p < 2) tau.t1.pm2[tau.t1.pm2 == Inf] = 0 tau.t.pm1 = tau.t^(p-1) if(p < 1) tau.t.pm1[tau.t.pm1 == Inf] = 0 (p * (p-1) * crossprod(x$wt[x$i1], tau.t1.pm2) - p * crossprod(x$wt, tau.r1^(p-1) - tau.t.pm1))[1,] } ) } } n2 = nrow(x$o) if(n2 > 0) { # for interval-censored observations if(is.null(expdH)) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) Delta = expdH / (1 - expdH) tau.r2 = rep(tau, rep.int(n2,m)) dim(tau.r2) = c(n2,m) tau.x1 = pmax(tau.r2 - x$o[,1], 0) tau.x2 = pmax(tau.r2 - x$o[,2], 0) ind1 = tau.r2 >= x$o[,1] ind2 = tau.r2 >= x$o[,2] if(0 %in% order) { xd0 = switch(as.character(p), "0" = {tau.r2.p1 = tau.r2; tau.x2 - (tau.x1.p1 <- tau.x1)}, "1" = {tau.r2.p1 = tau.r2 * tau.r2 tau.x2 * tau.x2 - (tau.x1.p1 <- tau.x1 * tau.x1)}, "2" = {tau.r2.p1 = tau.r2 * tau.r2 * tau.r2 tau.x2 * tau.x2 * tau.x2 - (tau.x1.p1 <- tau.x1 * tau.x1 * tau.x1)}, { tau.r2.p1 = tau.r2^(p+1) tau.x2^(p+1) - (tau.x1.p1 <- tau.x1^(p+1))} ) xd0[Delta == 0] = 0 g$d0 = g$d0 - crossprod(x$wo, (tau.r2.p1 - tau.x1.p1 + xd0 * Delta))[1,] / (p+1) } if(1 %in% order) { xd1 = switch(as.character(p), "0" = {tau.r2.p = 1; ind2 - (tau.x1.p <- ind1)}, "1" = {tau.r2.p = tau.r2; tau.x2 - (tau.x1.p <- tau.x1)}, "2" = {tau.r2.p = tau.r2 * tau.r2; tau.x2 * tau.x2 - (tau.x1.p <- tau.x1 * tau.x1)}, {tau.r2.p = tau.r2^p; tau.x2^p - (tau.x1.p <- tau.x1^p)} ) xd1[Delta == 0] = 0 g$d1 = g$d1 - crossprod(x$wo, tau.r2.p - tau.x1.p + xd1 * Delta)[1,] } if(2 %in% order) { g$d2 = switch(as.character(p), "0" = double(m), "1" = g$d2 - sum(x$wo) + crossprod(x$wo, ind1 - (ind2 - ind1) * Delta)[1,], "2" = { xd2 = tau.x2 - tau.x1 xd2[Delta == 0] = 0 g$d2 - 2 * crossprod(x$wo, tau.r2 - tau.x1 + xd2 * Delta)[1,] }, { tau.x1.pm1 = tau.x1^(p-1) if(p < 1) tau.x1.pm1[tau.x1.pm1 == Inf] = 0 xdp = tau.x2^(p-1) - tau.x1^(p-1) xdp[Delta == 0] = 0 g$d2 - p * crossprod(x$wo, tau.r2^(p-1) - tau.x1^(p-1) + xdp * Delta)[1,] } ) } } g } # Second gradient function grad2 = function(eta, h, x, expdH=NULL, order=0) { g = vector("list", length(order)) names(g) = paste("d", order, sep="") g[1:length(g)] = 0 if(length(eta) == 0) return(NULL) m = length(eta) n1 = length(x$t) p = h$deg if(n1 > 0) { eta.r1 = rep(eta, rep.int(n1,m)) dim(eta.r1) = c(n1,m) t.eta = pmax(x$t - eta.r1, 0) ind = x$t >= eta.r1 if(0 %in% order) { g$d0 = switch(as.character(p), "0" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), ind[x$i1,,drop=FALSE]) - crossprod(x$wt, t.eta))[1,], "1" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta[x$i1,,drop=FALSE]) - 0.5 * crossprod(x$wt, t.eta^2))[1,], "2" = (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta[x$i1,,drop=FALSE]^2) - crossprod(x$wt, t.eta * t.eta * t.eta) / 3)[1,], (crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta[x$i1,,drop=FALSE]^p) - crossprod(x$wt, t.eta^(p+1)) / (p+1))[1,] ) } if(1 %in% order) { g$d1 = switch(as.character(p), "0" = double(m), "1" = (-crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), ind[x$i1,,drop=FALSE]) + crossprod(x$wt, t.eta))[1,], "2" = (-2 * crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta[x$i1,,drop=FALSE]) + crossprod(x$wt, t.eta^2))[1,], { t.eta.pm1 = t.eta[x$i1,,drop=FALSE]^(p-1) if(p < 1) t.eta.pm1[t.eta.pm1 == Inf] = 0 (-p * crossprod(x$wt[x$i1] / hazuh(x$t[x$i1], h), t.eta.pm1) + crossprod(x$wt, t.eta^p))[1,] } ) } if(2 %in% order) { g$d2 = switch(as.character(p), "0" = double(m), "1" = - crossprod(x$wt, ind)[1,], "2" = - 2 * crossprod(x$wt, t.eta)[1,], { t.eta.pm1 = t.eta^(p-1) if(p < 1) t.eta.pm1[t.eta.pm1 == Inf] = 0 - p * crossprod(x$wt, t.eta.pm1)[1,] } ) } } n2 = nrow(x$o) if(n2 > 0) { if(is.null(expdH)) expdH = exp(chazuh(x$o[,1],h) - chazuh(x$o[,2],h)) Delta = expdH / (1 - expdH) eta.r2 = rep(eta, rep.int(n2,m)) dim(eta.r2) = c(n2,m) x1.eta = pmax(x$o[,1] - eta.r2, 0) x2.eta = pmax(x$o[,2] - eta.r2, 0) ind1 = x$o[,1] >= eta.r2 ind2 = x$o[,2] >= eta.r2 if(0 %in% order) { xd0 = switch(as.character(p), "0" = (x1.eta.p1 <- x1.eta) - x2.eta, "1" = (x1.eta.p1 <- x1.eta * x1.eta) - x2.eta * x2.eta, "2" = (x1.eta.p1 <- x1.eta * x1.eta * x1.eta) - x2.eta * x2.eta * x2.eta, (x1.eta.p1 <- x1.eta^(p+1)) - x2.eta^(p+1) ) xd0[Delta == 0] = 0 g$d0 = g$d0 - crossprod(x$wo, x1.eta.p1 + xd0 * Delta)[1,] / (p+1) } if(1 %in% order) { xd1 = switch(as.character(p), "0" = (x1.eta.p <- ind1) - ind2, "1" = (x1.eta.p <- x1.eta) - x2.eta, "2" = (x1.eta.p <- x1.eta * x1.eta) - x2.eta * x2.eta, (x1.eta.p <- x1.eta^p) - x2.eta^p ) xd1[Delta == 0] = 0 g$d1 = g$d1 + crossprod(x$wo, x1.eta.p + xd1 * Delta)[1,] } if(2 %in% order) { g$d2 = switch(as.character(p), "0" = double(m), "1" = g$d2 - crossprod(x$wo, ind1 + (ind1 - ind2) * Delta)[1,], "2" = { xd2 = x1.eta - x2.eta xd2[Delta == 0] = 0 g$d2 - 2 * crossprod(x$wo, x1.eta + xd2 * Delta)[1,] }, { x1.eta.pm1 = x1.eta^(p-1) x2.eta.pm1 = x2.eta^(p-1) if(p < 1) { x1.eta.pm1[x1.eta.pm1 == Inf] = 0 x2.eta.pm1[x2.eta.pm1 == Inf] = 0 } xdp = x1.eta.pm1 - x2.eta.pm1 xdp[Delta == 0] = 0 g$d2 - p * crossprod(x$wo, x1.eta.pm1 + xdp * Delta)[1,] } ) } } g } ##'Computes the Log-likelihood Value of a U-shaped Hazard Function ##' ##' ##'\code{logLikuh} returns the log-likelihood value of a U-shaped hazard ##'function, given a data set. ##' ##' ##'@param h an object of class \code{uh}. ##'@param data numeric vector or matrix for exact or interval-censored ##'observations, or an object of class \code{icendata}. ##'@return ##' ##'Log-likelihood value evaluated at \code{h}, given \code{data}. ##'@author Yong Wang ##'@seealso \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} ##'@references ##' ##'Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood ##' computation of a U-shaped hazard function. \emph{Statistics and ##' Computing}, \bold{28}, 187-200. ##' ##'@keywords function ##'@examples ##' ##'data(ap) ##'(h0 = uh(.2, NULL, NULL, NULL, NULL, 15, 1)) # Uniform hazard ##'plot(h0, ylim=c(0,.3)) ##'logLikuh(h0, ap) ##' ##'r = Uhaz(ap, deg=2) ##'r$ll ##'logLikuh(r$h, ap) ##'plot(r$h, add=TRUE, col="red3") ##' ##'@export logLikuh logLikuh = function(h, data) { x = icendata(data) if(length(x$t) > 0) ll = sum(x$wt[x$i1] * log(hazuh(x$t[x$i1], h))) - sum(x$wt * chazuh(x$t, h)) else ll = 0 if(nrow(x$o) > 0) { ll = ll + sum(x$wo * (log(exp(-chazuh(x$o[,1], h)) - exp(-chazuh(x$o[,2], h))))) } ll } dlogLik = function(h, x, expdH=NULL, interior=FALSE) { if(interior) eta = h$eta[h$eta != h$upper] else eta = h$eta m = length(eta) d = c(grad0(h, x, expdH), grad1(h$tau, h, x, expdH)$d0, if(m>0) grad2(eta, h, x, expdH)$d0 else NULL) names(d) = c("alpha", paste("nu",1:length(h$tau),sep=""), if(m>0) paste("mu",1:m,sep="") else NULL) d } ### Finds all local maxima of the gradient functions # bc boundary change maxgrad = function(h, x, expdH=NULL, maxit=100, grid=100, bc=TRUE) { if(!is.icendata(x)) x = icendata(x, w=1) u = sort(unique(c(x$u, h$tau, h$eta))) p = h$deg if(p > 0 && p < 0.1) u = rep(u, rep.int(21, length(u))) + seq(-h$upper*1e-2, h$upper*1e-2, len=21) if(p == 1) maxit = 1 tau1 = c(0, h$tau[h$tau > 0]) k = length(tau1) - 1 eta1 = c(h$eta[h$eta 0 & g$d1[-1] < 0 } if(any(jd)) { left = u1[-m1][jd] right = u1[-1][jd] pt1 = (left + right) * .5 for(i in 1:maxit) { g = grad1(pt1, h, x, expdH, order=1:2) left[g$d1>0] = pt1[g$d1>0] right[g$d1<0] = pt1[g$d1<0] pt1.old = pt1 pt1 = pt1 - g$d1 / g$d2 j = is.na(pt1) | pt1 < left | pt1 > right pt1[j] = (left[j] + right[j]) * .5 if( max(abs(pt1 - pt1.old)) <= 1e-14 * h$upper) break } if(p == 1) pt1 = pt1[!j] gpt1 = grad1(pt1, h, x, expdH, order=0)$d0 } } i = pt1 > 0 & pt1 <= tau1[k+1] pt1i = pt1[i] gpt1i = gpt1[i] if(k > 0 && length(pt1i) > 0) { grp = apply(outer(pt1i, tau1[-(k+1)], ">") & outer(pt1i, tau1[-1], "<="), 1, which.max) r1 = aggregate(gpt1i, by=list(group=grp), which.max) j = integer(k) j[r1[,1]] = r1[,2] j = j + c(0,cumsum(tabulate(grp, nbins=k))[-k]) np1 = pt1i[j] gnp1 = gpt1i[j] j0 = gnp1 > 0 np1 = np1[j0] gnp1 = gnp1[j0] } else np1 = gnp1 = numeric() ## grad2 if(p < 0.1) { if(bc) pt2 = u[u>=tau1[k+1] & u<=h$upper] else pt2 = u[u>tau1[k+1] & u= tau1[k+1]] else u2 = seq(tau1[k+1], h$upper, len=grid) if(!bc) u2 = u2[u2 > tau1[k+1]] m2 = length(u2) pt2 = gpt2 = numeric() if(p == 1) jd = rep(TRUE, m2-1) else { g = grad2(u2, h, x, expdH, order=1) jd = g$d1[-m2] > 0 & g$d1[-1] < 0 } if(any(jd)) { left = u2[-m2][jd] right = u2[-1][jd] pt2 = (left + right) * .5 for(i in 1:maxit) { g = grad2(pt2, h, x, expdH, order=1:2) left[g$d1>0] = pt2[g$d1>0] right[g$d1<0] = pt2[g$d1<0] pt2.old = pt2 pt2 = pt2 - g$d1 / g$d2 j = is.na(pt2) | pt2 < left | pt2 > right pt2[j] = (left[j] + right[j]) * .5 if( max(abs(pt2 - pt2.old)) <= 1e-14 * h$upper) break } if(p == 1) pt2 = pt2[!j] gpt2 = grad2(pt2, h, x, expdH, order=0)$d0 } } i = pt2 >= eta1[1] & pt2 < eta1[m+1] pt2i = pt2[i] gpt2i = gpt2[i] if(m > 0 && length(pt2i) > 0) { grp = apply(outer(pt2i, eta1[-(m+1)], ">=") & outer(pt2i, eta1[-1], "<"), 1, which.max) r2 = aggregate(gpt2i, by=list(group=grp), which.max) j = integer(m) j[r2[,1]] = r2[,2] j = j + c(0, cumsum(tabulate(grp, nbins=m))[-m]) np2 = pt2i[j] gnp2 = gpt2i[j] j0 = gnp2 > 0 np2 = np2[j0] gnp2 = gnp2[j0] } else np2 = gnp2 = numeric() ## grad1 and grad2 if(max(h$tau) != h$eta[1]) { jj1 = pt1 >= tau1[k+1] & pt1 <= eta1[1] if(p == 0) { uj1 = pt1[jj1]; gj1 = gpt1[jj1] } else { uj1 = c(tau1[k+1], if(bc) eta1[1] else NULL, pt1[jj1]) gj1 = c(grad1(c(tau1[k+1], if(bc) eta1[1] else NULL), h, x, expdH)$d0, gpt1[jj1]) } jmax = which.max(gj1) np31 = uj1[jmax] gnp31 = gj1[jmax] jj2 = pt2 >= tau1[k+1] & pt2 <= eta1[1] if(p == 0) { uj2 = pt2[jj2]; gj2 = gpt2[jj2] } else { uj2 = c(if(bc) tau1[k+1] else NULL, eta1[1], pt2[jj2]) gj2 = c(grad2(c(if(bc) tau1[k+1] else NULL, eta1[1]), h, x, expdH)$d0, gpt2[jj2]) } jmax = which.max(gj2) np32 = uj2[jmax] gnp32 = gj2[jmax] if(gnp31 > gnp32) {np1 = c(np1, np31); gnp1 = c(gnp1, gnp31)} else {np2 = c(np2, np32); gnp2 = c(gnp2, gnp32)} } list(np1=np1, gnp1=gnp1, np2=np2, gnp2=gnp2) } simplify = function(h) { i1 = order(h$tau) # remove identical knots tau = h$tau[i1] nu = h$nu[i1] i2 = order(h$eta) eta = h$eta[i2] mu = h$mu[i2] if(h$deg != 0 || length(tau) == 0 || length(eta) == 0) return(h) if(tau[length(tau)] != eta[1]) return(h) if(nu[length(tau)] < mu[1]) { if(eta[1] == 0) { h$alpha = h$alpha + mu[1] eta = eta[-1] mu = mu[-1] } else { h$alpha = h$alpha + nu[length(nu)] mu[1] = mu[1] - nu[length(nu)] tau = tau[-length(tau)] nu = nu[-length(nu)] } } else { h$alpha = h$alpha + mu[1] nu[length(nu)] = nu[length(nu)] - mu[1] eta = eta[-1] mu = mu[-1] } uh(h$alpha, tau, nu, eta, mu, h$upper, h$deg, collapse=FALSE) } # Collapse similar knots collapse = function(h, x, tol=0){ ll = attr(h, "ll") i1 = order(h$tau) # remove identical knots tau = h$tau[i1] ind1 = cumsum(!duplicated(c(tau))) tau = unique(tau) nu = aggregate(h$nu[i1], by=list(group=ind1), sum)[,2] nu[tau == 0] = 0 i2 = order(h$eta) eta = h$eta[i2] ind2 = cumsum(!duplicated(eta)) eta = unique(eta) mu = aggregate(h$mu[i2], by=list(group=ind2), sum)[,2] mu[eta == h$upper] = 0 h = uh(h$alpha, tau, nu, eta, mu, h$upper, h$deg, collapse=FALSE) if(tol > 0) { if(is.null(ll)) ll = logLikuh(h, x) # if(h$deg < 1) {attr(h, "ll") = ll; return(h)} # why? h2 = h break1 = break2 = FALSE repeat { if(!break1 && length(h2$nu) > 1) { j = which.min(diff(h$tau)) h2$nu[j] = h2$nu[j] + h2$nu[j+1] h2$nu = h2$nu[-(j+1)] h2$tau[j] = (h2$tau[j] + h2$tau[j+1]) * .5 h2$tau = h2$tau[-(j+1)] ll2 = logLikuh(h2, x) if(ll2 + tol >= ll) {h = h2; ll = ll2; break1 = FALSE} else {h2 = h; break1 = TRUE} } else break1 = TRUE if(!break2 && length(h2$mu) > 1) { j = which.min(diff(h2$eta)) h2$mu[j] = h2$mu[j] + h2$mu[j+1] h2$mu = h2$mu[-(j+1)] h2$eta[j] = (h2$eta[j] + h2$eta[j+1]) * .5 h2$eta = h2$eta[-(j+1)] ll2 = logLikuh(h2, x) if(ll2 + tol >= ll) {h = h2; ll = ll2; break2 = FALSE} else {h2 = h; break2 = TRUE} } else break2 = TRUE if(break1 && break2) break } attr(h, "ll") = ll } h } # deg - polynomial degree ##'U-shaped Hazard Function ##' ##' ##' Class \code{uh} can be used to store U-shaped hazard functions. ##' There are a couple of functions associated with the class. ##' ##'\code{uh} creates an object of class \code{uh}, which stores a U-shaped ##'hazard function. ##' ##'\code{print.uh} prints an object of class \code{uh}. ##' ##' ##'A U-shape hazard function, as generalized by Wang and Fani (2018), is given ##'by ##' ##'\deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p ##' + sum_{j=1}^m mu_j (t - eta_j)_+^p,} ##' ##'where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, ##'\eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0} is the the spline degree which ##'determines the smoothness of the U-shaped hazard. As p increases, the family ##'of hazard functions becomes increasingly smoother, but at the same time, ##'smaller. When \eqn{p = 0}{p = 0}, the hazard function is U-shaped, as ##'studied by Bray et al. (1967). When \eqn{p = 1}{p = 1}, the hazard function ##'is convex, as studied by Jankowski and Wellner (2009a,b). ##' ##'\code{print.uh} prints an object of class \code{uh}. While \code{alpha}, ##'\code{upper} and \code{deg} are printed as they are, \code{tau} and ##'\code{nu} are printed as a two-column matrix, and so are \code{eta} and ##'\code{mu}. ##' ##'@aliases uh uh.object print.uh ##'@param alpha a nonnegative value, for the constant coefficient. ##'@param tau vector of nonnegative real values, for left knots. ##'@param nu vector of nonnegative values, for masses associated with the left ##'knots. ##'@param eta vector of nonnegative real values, for right knots. ##'@param mu vector of nonnegative real values, for masses associated with the ##'right knots. ##'@param upper a positive value, at which point the hazard starts to become ##'infinite. ##'@param deg nonnegative real number for spline degree (i.e., p in the formula ##'below). ##'@param collapse logical, indicating if identical knots should be collapsed. ##'@param x an object of class \code{uh}. ##'@param ... other auguments for printing. ##'@return ##' ##'\code{uh} returns an object of class \code{uh}. It is a list with components ##'\code{alpha}, \code{tau}, \code{nu}, \code{eta}, \code{mu}, \code{upper} and ##'\code{deg}, which store their corresponding values as described above. ##'@author Yong Wang ##'@seealso \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} ##'@references ##' ##'Bray, T. A., Crawford, G. B., and Proschan, F. (1967). \emph{Maximum ##'Likelihood Estimation of a U-shaped Failure Rate Function}. Defense ##'Technical Information Center. ##' ##'Jankowski, H. K. and Wellner, J. A. (2009a). Computation of nonparametric ##'convex hazard estimators via profile methods. \emph{Journal of Nonparametric ##'Statistics}, \bold{21}, 505-518. ##' ##'Jankowski, H. K. and Wellner, J. A. (2009b). Nonparametric estimation of a ##'convex bathtub-shaped hazard function. \emph{Bernoulli}, \bold{15}, ##'1010-1035. ##' ##'Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood ##' computation of a U-shaped hazard function. \emph{Statistics and ##' Computing}, \bold{28}, 187-200. ##' ##'@keywords function ##'@examples ##' ##'(h0 = uh(3, 2, 3, 4, 5, 7, deg=0)) # deg = 0 ##'plot(h0, ylim=c(0,20)) ##'(h1 = uh(4, 2, 3, 5, 6, 7, deg=1)) # deg = 1 ##'plot(h1, add=TRUE, col="green3") ##'(h2 = uh(1, 1:2, 3:4, 5:6, 7:8, 9, deg=2)) # deg = 2 ##'plot(h2, add=TRUE, col="red3") ##' ##'@usage ##'uh(alpha, tau, nu, eta, mu, upper=Inf, deg=1, collapse=TRUE) ##'\method{print}{uh}(x, ...) ##' ##'@export uh ##'@export print.uh uh = function(alpha, tau, nu, eta, mu, upper=Inf, deg=1, collapse=TRUE) { if(length(tau) == 0) {tau=0; nu=0} if(length(eta) == 0) {eta=upper; mu=0} i1 = order(tau) tau = tau[i1] nu = nu[i1] i2 = order(eta) eta = eta[i2] mu = mu[i2] h = list(alpha=alpha, tau=tau, nu=nu, eta=eta, mu=mu, upper=upper, deg=deg) if(collapse) h = collapse(h) class(h) = "uh" h } print.uh = function(x, ...) { cat("$alpha\n") print(x$alpha, ...) print(cbind(tau=x$tau, nu=x$nu), ...) print(cbind(eta=x$eta, mu=x$mu), ...) cat("$upper\n") print(x$upper, ...) cat("$deg\n") print(x$deg, ...) } # Hazard function ##'Distributional Functions given a U-shaped Hazard Function ##' ##' ##'Given an object of class \code{uh}: ##' ##'\code{hazuh} computes the hazard values; ##' ##'\code{chazuh} computes the cumulative hazard values; ##' ##'\code{survuh} computes the survival function values; ##' ##'\code{denuh} computes the density function values. ##' ##' ##'@aliases hazuh chazuh survuh denuh ##'@param t time points at which the function is to be evaluated. ##'@param h an object of class \code{uh}. ##'@return ##' ##'A numeric vector of the function values. ##'@author Yong Wang ##'@seealso \code{\link{Uhaz}}, \code{\link{icendata}}, \code{\link{plot.uh}} ##'@references ##' ##'Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood ##' computation of a U-shaped hazard function. \emph{Statistics and ##' Computing}, \bold{28}, 187-200. ##' ##'@keywords function ##'@examples ##' ##'data(ap) ##'h = Uhaz(icendata(ap), deg=2)$h ##'hazuh(0:15, h) # hazard ##'chazuh(0:15, h) # cumulative hazard ##'survuh(0:15, h) # survival probability ##'denuh(0:15, h) # density ##' ##'@usage ##'hazuh(t, h) ##'chazuh(t, h) ##'survuh(t, h) ##'denuh(t, h) ##' ##'@export hazuh ##'@export chazuh ##'@export survuh ##'@export denuh hazuh = function(t, h) { p = h$deg b = c = 0 if(length(h$tau) > 0) { if(p == 0) d = outer(h$tau, t, ">=") else { d = pmax(outer(h$tau, t, "-"), 0) if(p != 1) d = d^p } b = (h$nu %*% d)[1,] } if(length(h$eta) > 0) { if(p == 0) d = outer(t, h$eta, ">=") else { d = pmax(outer(t, h$eta, "-"), 0) if(p != 1) d = d^p } c = (d %*% h$mu)[,1] } h$alpha + pmax(b, c) } chazuh = function(t, h) { deg = pmax(h$deg, 1) p1 = h$deg + 1 a = b = c = 0 if(h$alpha > 0) a = h$alpha * t if(length(h$tau) > 0) { tau.t = pmax(outer(h$tau, t, "-"), 0) b = (h$nu %*% (h$tau^p1 - tau.t^p1))[1,] / p1 } if(length(h$eta) > 0) { t.eta = pmax(outer(t, h$eta, "-"), 0) c = (t.eta^p1 %*% h$mu)[,1] / p1 } H = a + b + c H[t > h$upper] = 1e100 H } # survival function survuh = function(t, h) exp(-chazuh(t, h)) # density function denuh = function(t, h) hazuh(t, h) * survuh(t, h) ## plotting functions ##'Plot Functions for U-shaped Hazard Estimation ##' ##' ##' Functions for plotting various functions in U-shaped hazard estimation ##' ##'\code{plot.Uhaz} and \code{plot.uh} are wrapper functions that can be used ##'to invoke \code{plot.hazuh}, \code{plot.chazuh}, \code{plot.survuh}, ##'\code{plot.denuh} or \code{plot.graduh}. ##' ##'\code{plothazuh} plots a U-shaped hazard function. ##' ##'\code{plotchazuh} plots a cumulative hazard function that has a U-shaped ##'hazard function. ##' ##'\code{plotsurvuh} plots the survival function that has a U-shaped hazard ##'function. ##' ##'\code{plotdenuh} plots the density function that has a U-shaped hazard ##'function. ##' ##'\code{plotgraduh} plots the gradient function that has a U-shaped hazard ##'function. ##' ##' ##'A U-shaped hazard function is given by ##' ##'\deqn{h(t) = \alpha + \sum_{j = 1}^k \nu_j(\tau_j - t)_+^p + \sum_{j = 1}^{m} \mu_j (t-\eta_j)_+^p,}{ h(t) = alpha + sum_{j=1}^k nu_j (tau_j - t)_+^p ##' + sum_{j=1}^m mu_j (t - eta_j)_+^p,} ##' ##'where \eqn{\alpha,\nu_j,\mu_j \ge 0}{alpha, nu_j, mu_j \ge 0}, ##'\eqn{\tau_1 < \cdots < \tau_k \le \eta_1 < \cdots < \eta_m,}{tau_1 < ... < tau_k <= eta_1 < ... < eta_m,} and \eqn{p \ge 0}{p >= 0}. ##' ##'@aliases plot.Uhaz plot.uh plothazuh plotchazuh plotsurvuh plotdenuh ##'plotgraduh ##'@param x an object of class \code{Uhaz}, i.e., an output of function ##'\code{Uhaz}, or an object of class \code{uh}.. ##'@param h an object of class \code{uh}. ##'@param data vector or matrix that stores observations, or an object of class ##'\code{icendata}. ##'@param w additional weights/multiplicities for the observations stored in ##'\code{data}. ##'@param fn function to be plotted. It can be ##' ##'= \code{haz}, for hazard function; ##' ##'= \code{chaz}, for cumulative hazard function; ##' ##'= \code{den}, for density function; ##' ##'= \code{surv}, for survival function; ##' ##'= \code{gradient}, for gradient functions. ##' ##'@param xlim,ylim numeric vectors of length 2, giving the x and y coordinates ##'ranges. ##'@param xlab,ylab x- or y-axis labels. ##'@param add = \code{TRUE}, adds the curve to the existing plot; ##' ##'= \code{FALSE}, plots the curve in a new one. ##'@param col color used for plotting the curve. ##'@param lty line type for plotting the curve. ##'@param lwd line width for plotting the curve. ##'@param len number of points used to plot a curve. ##'@param add.knots logical, indicating if knots are also plotted. ##'@param pch point character/type for plotting knots. ##'@param vert logical, indicating if grey vertical lines are plotted to show ##'the interval that separates the two discrete measures. ##'@param col0 color for gradient function 0, i.e., for the hazard-constant ##'part, or alpha. ##'@param col1 color for gradient function 1, i.e., for the hazard-decreasing ##'part. ##'@param col2 color for gradient function 1, i.e., for the hazard-increasing ##'part. ##'@param order = 0, the gradient functions are plotted; ##' ##'= 1, their first derivatives are plotted; ##' ##'= 2, their second derivatives are plotted. ##'@param ... arguments for other graphical parameters (see \code{par}). ##'@author Yong Wang ##'@seealso \code{\link{icendata}}, \code{\link{uh}}, \code{\link{npsurv}}. ##'@references ##' ##'Wang, Y. and Fani, S. (2018). Nonparametric maximum likelihood ##' computation of a U-shaped hazard function. \emph{Statistics and ##' Computing}, \bold{28}, 187-200. ##' ##'@keywords function ##'@examples ##' ##'## Angina Pectoris Survival Data ##'data(ap) ##'plot(r<-Uhaz(ap)) # hazard function for a convex hazard ##'plot(r, fn="c") # cumulative hazard function ##'plot(r, fn="s") # survival function ##'plot(r, fn="d") # density function ##'plot(r, ap, fn="g") # gradient functions ##'plot(r, ap, fn="g", order=1) # first derivatives of gradient functions ##'plot(r, ap, fn="g", order=2) # second derivatives of gradient functions ##' ##'## New Zealand Mortality in 2000 ##'data(nzmort) ##'i = nzmort$ethnic == "maori" ##'x = nzmort[i,1:2] # Maori mortality ##'h = Uhaz(x[,1]+0.5, x[,2], deg=2)$h # smooth U-shaped hazard ##'plot(h) # hazard function ##'plot(h, fn="d") # density function ##'plot(h, fn="s") # survival function ##' ##'x2 = nzmort[!i,1:2] # Non-Maori mortality ##'h2 = Uhaz(x2[,1]+0.5, x2[,2], deg=2)$h ##'plot(h2, fn="s", add=TRUE, col="green3") ##' ##'@usage ##'\method{plot}{Uhaz}(x, ...) ##'\method{plot}{uh}(x, data, fn=c("haz","grad","surv","den","chaz"), ...) ##'plothazuh(h, add=FALSE, col="darkblue", lty=1, xlim, ylim, ##' lwd=2, pch=19, len=500, vert=FALSE, add.knots=TRUE, ##' xlab="Time", ylab="Hazard", ...) ##'plotchazuh(h, add=FALSE, lwd=2, len=500, col="darkblue", ##' pch=19, add.knots=TRUE, vert=FALSE, xlim, ylim, ...) ##'plotdenuh(h, add=FALSE, lty=1, lwd=2, col="darkblue", ##' add.knots=TRUE, pch=19, ylim, len=500, vert=FALSE, ...) ##'plotsurvuh(h, add=FALSE, lty=1, lwd=2, len=500, vert=FALSE, ##' col="darkblue", pch=19, add.knots=TRUE, xlim, ylim, ...) ##'plotgraduh(h, data, w=1, len=500, xlim, ylim, vert=TRUE, ##' add=FALSE, xlab="Time", ylab="Gradient", ##' col0="red3", col1="blue3", col2="green3", order=0, ...) ##' ##'@export plot.Uhaz ##'@export plot.uh ##'@export plothazuh ##'@export plotchazuh ##'@export plotsurvuh ##'@export plotdenuh ##'@export plotgraduh plot.Uhaz = function(x, ...) plot(x$h, ...) plot.uh = function(x, data, fn=c("haz","grad","surv","den","chaz"), ...) { fn = match.arg(fn) fnR = getFunction(paste("plot",fn,"uh",sep="")) switch(fn, "haz" =, "surv" =, "den" =, "chaz" = fnR(x, ...), "grad" = fnR(x, data, ...) ) } plothazuh = function(h, add=FALSE, col="darkblue", lty=1, xlim, ylim, lwd=2, pch=19, len=500, vert=FALSE, add.knots=TRUE, xlab="Time", ylab="Hazard", ...) { p = h$deg pt = switch(as.character(p), "0" = unique(sort(c(0, h$tau, h$eta, h$upper))), "1" = unique(sort(c(0, h$tau, h$eta, h$upper))), unique(sort(c(h$tau, h$eta, seq(0, h$upper, len=len))))) m = length(pt) knots = unique(c(h$tau, h$eta)) haz = hazuh(pt, h) max.haz = max(haz) if(missing(xlim)) xlim = range(pt) if(missing(ylim)) ylim = c(0, max.haz) if(!add) plot(pt, haz, type="n", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, ...) if(vert) { lines(rep(max(h$tau),2), ylim, col="grey", lty=2) lines(rep(min(h$eta),2), ylim, col="grey", lty=2) } abline(h=0, col ="black") if(p == 0) { lines(c(h$tau[length(h$tau)], h$eta[1]), rep(h$alpha,2), lwd=lwd, col=col, lty=lty) lines(c(rep(rev(h$tau),each=2), 0), c(h$alpha, rep(hazuh(rev(h$tau), h), each=2)), lwd=lwd, col=col, lty=lty) lines(c(rep(h$eta,each=2), h$upper), c(h$alpha, rep(hazuh(h$eta, h), each=2)), lwd=lwd, col=col, lty=lty) } else lines(pt, haz, lwd=lwd, col=col, lty=lty) if(add.knots && length(knots) > 0) points(knots, hazuh(knots, h), col=col, pch=pch) } plotchazuh = function(h, add=FALSE, lwd=2, len=500, col="darkblue", pch=19, add.knots=TRUE, vert=FALSE, xlim, ylim, ...) { pt = unique(sort(c(seq(0, h$upper, len=len), h$tau, h$eta))) m = length(pt) H = chazuh(pt, h) max.H = max(H) if(missing(xlim)) xlim = range(pt) if(missing(ylim)) ylim = c(0, max.H) plot(rep(max(h$tau),2), c(0,max.H), type="n", xlim=xlim, ylim=ylim, xlab="Time", ylab="Cumulative Hazard", ...) if(vert) lines(rep(max(h$tau),2), c(0,max.H), type="l", col ="grey", lty=2) if(vert) lines(rep(min(h$eta),2), c(0,max.H), col ="grey", lty=2) abline(h=0, col ="black") lines(pt, H, type="l", lwd=lwd, col=col) if(add.knots) { knots = c(h$tau, h$eta) knots = knots[knots>0 & knots0 & knots0 & knots 0) x$t else numeric() y2 = if(n2 > 0) { x$o[x$o[,2] == Inf,2] = 1.6 * x$upper rowMeans(x$o) } else numeric() beta = weighted.mean(c(y1,y2), c(x$wt, x$wo)) uh(alpha=1/beta, tau=NULL, nu=NULL, eta=NULL, mu=NULL, upper=x$upper, deg=deg) } npsurv/R/npsurv-package.R0000644000176200001440000002125713730756040015075 0ustar liggesusers ##'Air Conditioner Failure Data ##' ##'Contains the number of operating hours between successive failure times of ##'the air conditioning systems in Boeing airplanes ##' ##' ##'@name acfail ##'@docType data ##'@format A numeric vector storing the failure times. ##'@seealso \code{\link{Uhaz}}. ##'@references Proschan, F. (1963). Theoretical explanation of observed ##'decreasing failure rate. \emph{Technometrics}, \bold{5}, 375-383. ##'@source Proschan (1963) ##'@keywords datasets ##'@examples ##' ##'data(acfail) ##'r = Uhaz(acfail, deg=2) ##'plot(r$h, fn="h") ##'plot(r$h, fn="d") ##' NULL ##'Angina Pectoris Survival Data ##' ##'Contains the survival times in years from the time of diagnosis for 2418 ##'male patients with angina pectoris. Some patients are lost to follow-up, ##'hence giving right-censored observations. Each integer-valued survival time ##'is treated as being censored within a one-year interval. ##' ##' ##'@name ap ##'@docType data ##'@format ##' ##'A data frame with 30 observations and 3 variables: ##' ##'\code{L}: left-end point of an interval-censored retraction time; ##' ##'\code{R}: right-end point of an interval-censored retraction time; ##' ##'\code{count}: number of patients in the interval. ##'@seealso \code{\link{npsurv}}. ##'@references Lee, E. T. and Wang, J. W. (2003). \emph{Statistical Methods for ##'Survival Data Analysis}. Wiley. ##'@source Lee and Wang (2003), page 92. ##'@keywords datasets ##'@examples ##' ##'data(ap) ##'r = Uhaz(ap, deg=2) # smooth U-shaped hazard ##'plot(r$h, fn="h") # hazard ##'plot(r$h, fn="d") # density ##' ##'# NPMLE and shape-restricted estimation ##'plot(npsurv(ap), fn="s") # survival under no shape restriction ##'plot(r$h, fn="s", add=TRUE) # survival with smooth U-shaped hazard ##' NULL ##'Breast Retraction Times after Beast Cancer Treatments. ##' ##'Contains the breast retraction times in months for 94 breast cancer patients ##'who received either radiation therapy or radiation therapy plus adjuvant ##'chemotherapy. ##' ##' ##'@name cancer ##'@docType data ##'@format A data frame with 94 observations and 3 variables: ##' ##'L: left-end points of the interval-censored retraction times; ##' ##'R: right-end points of the interval-censored retraction times; ##' ##'group: either \code{RT} (radiation therapy) or \code{RCT} (radiation therapy ##'plus adjuvant chemotherapy). ##'@seealso \code{\link{npsurv}}. ##'@references Finkelstein, D. M. and R. A. Wolfe (1985). A semiparametric ##'model for regression analysis of interval-censored failure time data. ##'\emph{Biometrics}, \bold{41}, pp.933-945. ##'@source Finkelstein and Wolfe (1985). ##'@keywords datasets ##'@examples ##' ##'data(cancer) ##'i = cancer$group == "RT" ##'plot(npsurv(cancer[i,1:2]), xlim=c(0,60)) ##'plot(npsurv(cancer[!i,1:2]), add=TRUE, col="green3") ##' NULL ##'Gastric Cancer Survival Data ##' ##'Contains the survival times of 45 gastrointestinal tumor patients who were ##'treated with both chemotherapy and radiotherapy. It has both exact and ##'right-censored observations. ##' ##' ##'@name gastric ##'@docType data ##'@format A data frame with 30 observations and 3 variables: ##' ##'L: left-end points of the interval-censored survival times; ##' ##'R: right-end points of the interval-censored survival times. ##'@seealso \code{\link{npsurv}}, \code{\link{Uhaz}}. ##'@references Klein, J. P. and Moeschberger, M. L. (2003). \emph{Survival ##'Analysis: Techniques for Censored and Truncated Data (2nd ed.)}. Springer. ##'@source Klein and Moeschberger (2003), page 224. ##'@keywords datasets ##'@examples ##' ##'data(gastric) ##'plot(npsurv(gastric), col="grey") # survival function ##'plot(h0<-Uhaz(gastric, deg=0)$h, fn="s", add=TRUE, col="green3") ##'plot(h1<-Uhaz(gastric, deg=1)$h, fn="s", add=TRUE) ##'plot(h2<-Uhaz(gastric, deg=2)$h, fn="s", add=TRUE, col="red3") ##' ##'plot(h0, fn="h", col="green3") # hazard function ##'plot(h1, fn="h", add=TRUE) ##'plot(h2, fn="h", add=TRUE, col="red3") ##' ##'plot(h0, fn="d", col="green3") # density function ##'plot(h1, fn="d", add=TRUE) ##'plot(h2, fn="d", add=TRUE, col="red3") ##' ##' NULL ##'Remission Times for Acute Leukemia Patients ##' ##'Contains remission times in weeks of 42 acute leukemia patients, who ##'received either the treatment of drug 6-mercaptopurine or the placebo ##'treatment. Each remission time is either exactly observed or right-censored. ##' ##' ##'@name leukemia ##'@docType data ##'@format A data frame with 42 observations and 3 variables: ##' ##'L: left-end points of the interval-censored remission times in weeks; ##' ##'R: right-end points of the interval-censored remission times; ##' ##'group: either 6-MP (6-mercaptopurine) or Placebo. ##'@seealso \code{\link{npsurv}}. ##'@references Freireich, E. O. et al. (1963). The effect of 6-mercaptopmine on ##'the duration of steroid induced remission in acute leukemia. \emph{Blood}, ##'\bold{21}, 699-716. ##'@source Freireich et al. (1963). ##'@keywords datasets ##'@examples ##' ##'data(leukemia) ##'i = leukemia[,"group"] == "Placebo" ##'plot(npsurv(leukemia[i,1:2]), xlim=c(0,40), col="green3") # placebo ##'plot(npsurv(leukemia[!i,1:2]), add=TRUE) # 6-MP ##' ##'## Treat each remission time as interval-censored: ##'x = leukemia ##'ii = x[,1] == x[,2] ##'x[ii,2] = x[ii,1] + 1 ##'plot(npsurv(x[i,1:2]), xlim=c(0,40), col="green3") # placebo ##'plot(npsurv(x[!i,1:2]), add=TRUE) # 6-MP ##' NULL ##'Angina Pectoris Survival Data ##' ##'Contains the answers of 191 California high school students to the question: ##'"When did you first use marijuana?". An answer can be an exact age, or "I ##'have never used it", which gives rise to a right-censored observation, or "I ##'have used it but cannot recall just when the first time was", which gives ##'rise to a left-censored observation. ##' ##' ##'@name marijuana ##'@docType data ##'@format A data frame with 21 observations and 3 variables: ##' ##'L: left-end point of an interval-censored time; ##' ##'R: right-end point of an interval-censored time; ##' ##'count: number of students in the interval. ##'@seealso \code{\link{npsurv}}. ##'@references Turnbull and Weiss (1978). A likelihood ratio statistic ##'fortesting goodness of fit with randomly censored data. \emph{Biometrics}, ##'\bold{34}, 367-375. ##' ##'Klein and Moeschberger (2003). \emph{Survival Analysis: Techniques for ##'Censored and Truncated Data} (2nd ed.). Springer ##'@source Turnbull and Weiss (1978). See also Klein and Moeschberger (1997), ##'page 17. ##'@keywords datasets ##'@examples ##' ##'data(marijuana) ##'r = Uhaz(marijuana, deg=2) ##'plot(r$h, fn="h") ##'plot(r$h, fn="s") ##' NULL ##'New Zealand Mortality in 2000 ##' ##'Contains the number of deaths of Maori and Non-Maori people at each age in ##'New Zealand in 2000. ##' ##'Data contains no age with zero death. ##' ##'@name nzmort ##'@docType data ##'@format A data frame with 210 observations and 3 variables: ##' ##'age: at which age the deaths occurred; ##' ##'deaths: number of people died at the age; ##' ##'ethnic: either Maori or Non-Maori. ##'@seealso \code{\link{Uhaz}}. ##'@source \url{https://www.mortality.org/} ##'@keywords datasets ##'@examples ##' ##'data(nzmort) ##'x = with(nzmort, nzmort[ethnic=="maori",])[,1:2] # Maori mortality ##'# x = with(nzmort, nzmort[ethnic!="maori",])[,1:2] # Non-Maori mortality ##' ##'## As exact observations ##'# Plot hazard functions ##'h0 = Uhaz(x[,1]+0.5, x[,2], deg=0)$h # U-shaped hazard ##'plot(h0, fn="h", col="green3", pch=2) ##'h1 = Uhaz(x[,1]+0.5, x[,2], deg=1)$h # convex hazard ##'plot(h1, fn="h", add=TRUE, pch=1) ##'h2 = Uhaz(x[,1]+0.5, x[,2], deg=2)$h # smooth U-shaped hazard ##'plot(h2, fn="h", add=TRUE, col="red3") ##' ##'# Plot densities ##'age = 0:max(x[,1]) ##'count = integer(length(age)) ##'count[x[,"age"]+1] = x[,"deaths"] ##'barplot(count/sum(count), space=0, col="lightgrey", ylab="Density") ##'axis(1, pos=NA, at=0:10*10) ##'plot(h0, fn="d", add=TRUE, col="green3", pch=2) ##'plot(h1, fn="d", add=TRUE, col="blue3", pch=1) ##'plot(h2, fn="d", add=TRUE, col="red3", pch=19) ##' ##'## As interval-censored observations ##'# Plot hazard functions ##'x2 = cbind(x[,1], x[,1]+1, x[,2]) ##'h0 = Uhaz(x2, deg=0)$h # U-shaped hazard ##'plot(h0, fn="h", col="green3", pch=2) ##'h1 = Uhaz(x2, deg=1)$h # convex hazard ##'plot(h1, fn="h", add=TRUE, pch=1) ##'h2 = Uhaz(x2, deg=2)$h # smooth U-shaped hazard ##'plot(h2, fn="h", add=TRUE, col="red3", pch=1) ##' ##'# Plot densities ##'barplot(count/sum(count), space=0, col="lightgrey") ##'axis(1, pos=NA, at=0:10*10) ##'plot(h0, fn="d", add=TRUE, col="green3", pch=2) ##'plot(h1, fn="d", add=TRUE, col="blue3", pch=1) ##'plot(h2, fn="d", add=TRUE, col="red3", pch=19) ##' NULL npsurv/MD50000644000176200001440000000273313731664713012137 0ustar liggesusersd426c6dee33a52bb12d93c5c7790b54b *DESCRIPTION 76a927e32f56c8ae1e359db61003f82c *NAMESPACE cf508328aab63449399d903addc26b54 *R/Uhaz.R 12465dce234312f31c693aed0936b22f *R/icendata.R 2d62279b4d2e6468628288dce1aac22b *R/npsurv-package.R 054ab55eb362812c1b87c14d6bd69851 *R/npsurv.R 237be856104d680263fd4d3c68f0c604 *data/acfail.rda f899ce39f558a3c0bd44957146f76ff8 *data/ap.rda f41030bc4511396645ca241a0da91385 *data/cancer.rda 216932fc31c1a6944f2dd48e8cd9b2e0 *data/gastric.rda c735afa2ef11951387ecc1cc2e5838d4 *data/leukemia.rda 028a58d586aa71bf2b7d07df289ea3d8 *data/marijuana.rda 3cb11231442a3a3dc205a188f805594b *data/nzmort.rda d033d610e8c7dfa722d8fb15f35b57c3 *man/Deltamatrix.Rd 27ad5d652edb5d063e808f63ee49e298 *man/Uhaz.Rd 3dc6db7971c7ffd985645adc6dd10758 *man/acfail.Rd 5aa196a8cde4c08269de30e9fe547e88 *man/ap.Rd 872fd06f9346754882f3e5c301ac9065 *man/cancer.Rd f5adfd397f03a58325ce3627f39df2f4 *man/gastric.Rd c76db44be7644bb14ecefd152436817c *man/hazuh.Rd d3e5fd4cd429b05f299477e91ef993d2 *man/icendata.Rd b9f8ad4c88a22917c0782036ef9fdb97 *man/idf.Rd 054afa922f9332301fa1ea56f7b9dfda *man/km.Rd 2d9c50a3558a290596d0ffdfe4ca54ef *man/leukemia.Rd 9ded29b29c91dfa09caceed4b52fdd5a *man/logLikuh.Rd b6057d23ff965568e95c9a33f0c5231e *man/marijuana.Rd cc8de420c0c1b641e6028fb6664066b0 *man/npsurv.Rd 7c20f5531a6b3764c562389cfcbc9f98 *man/nzmort.Rd 0cdc393a8df11fb97b9ca99525145aad *man/plot.Uhaz.Rd d5a260d797343d6ec4e72678bf0b9868 *man/plot.npsurv.Rd a142dc84b30f113a335520cc6bed4c01 *man/uh.Rd