princurve/0000755000175100001440000000000012136161327012307 5ustar hornikusersprincurve/MD50000644000175100001440000000065512136161327012625 0ustar hornikuserscac6a865a84a2c0cd38f88d56395d94e *ChangeLog df13c32f7d1c15b9507dc5f052705cdd *DESCRIPTION 53d3f6dbb3f726586e28689bc3a2c3da *NAMESPACE 28e63dee56ef295adc78f15c77da14f3 *R/principal.curve.R 1458b262900fb75f730ab2cdb5895b4c *README c33636a059567d43db0adb48dfc5af21 *man/get.lam.Rd e2aedd9f38306e853165615c0217fc14 *man/principal.curve.Rd ecf7b1ed377275a2b270ba3d641f45b7 *src/getlam.f 33498dcd0282f1c20cbefca4ad723f07 *src/sortdi.f princurve/src/0000755000175100001440000000000012136155716013103 5ustar hornikusersprincurve/src/sortdi.f0000644000175100001440000000330012136155716014552 0ustar hornikusers subroutine sortdi (v,a,ii,jj) c c puts into a the permutation vector which sorts v into c increasing order. only elements from ii to jj are considered. c arrays iu(k) and il(k) permit sorting up to 2**(k+1)-1 elements c v is returned sorted c this is a modification of cacm algorithm #347 by r. c. singleton, c which is a modified hoare quicksort. c integer t,tt,ii,jj,iu(20),il(20) integer a(jj) double precision v(*), vt, vtt m=1 i=ii j=jj 10 if (i.ge.j) go to 80 20 k=i ij=(j+i)/2 t=a(ij) vt=v(ij) if (v(i).le.vt) go to 30 a(ij)=a(i) a(i)=t t=a(ij) v(ij)=v(i) v(i)=vt vt=v(ij) 30 l=j if (v(j).ge.vt) go to 50 a(ij)=a(j) a(j)=t t=a(ij) v(ij)=v(j) v(j)=vt vt=v(ij) if (v(i).le.vt) go to 50 a(ij)=a(i) a(i)=t t=a(ij) v(ij)=v(i) v(i)=vt vt=v(ij) go to 50 40 a(l)=a(k) a(k)=tt v(l)=v(k) v(k)=vtt 50 l=l-1 if (v(l).gt.vt) go to 50 tt=a(l) vtt=v(l) 60 k=k+1 if (v(k).lt.vt) go to 60 if (k.le.l) go to 40 if (l-i.le.j-k) go to 70 il(m)=i iu(m)=l i=k m=m+1 go to 90 70 il(m)=k iu(m)=j j=l m=m+1 go to 90 80 m=m-1 if (m.eq.0) return i=il(m) j=iu(m) 90 if (j-i.gt.10) go to 20 if (i.eq.ii) go to 10 i=i-1 100 i=i+1 if (i.eq.j) go to 80 t=a(i+1) vt=v(i+1) if (v(i).le.vt) go to 100 k=i 110 a(k+1)=a(k) v(k+1)=v(k) k=k-1 if (vt.lt.v(k)) go to 110 a(k+1)=t v(k+1)=vt go to 100 end princurve/src/getlam.f0000644000175100001440000000515212136155716014526 0ustar hornikusersC Output from Public domain Ratfor, version 1.0 subroutine getlam(n,p,x,sx,lambda,order,dist,ns,s,strech,vecx,temp *sx) integer n,p,ns,order(n) double precision x(n,p),sx(n,p),s(ns,p),lambda(n),dist(n),tempsx(p *), vecx(p),strech if(strech .lt. 0d0)then strech=0d0 endif if(strech .gt. 2d0)then strech=2d0 endif do23004 j=1,p s(1,j)=s(1,j)+strech*(s(1,j)-s(2,j)) s(ns,j)=s(ns,j)+strech*(s(ns,j)-s(ns-1,j)) 23004 continue 23005 continue do23006 i=1,n do23008 j=1,p vecx(j)=x(i,j) 23008 continue 23009 continue call lamix(ns,p,vecx,s,lambda(i),dist(i),tempsx) do23010 j=1,p sx(i,j)=tempsx(j) 23010 continue 23011 continue 23006 continue 23007 continue do23012 i=1,n order(i)=i 23012 continue 23013 continue call sortdi(lambda,order,1,n) call newlam(n,p,sx,lambda,order) return end subroutine newlam(n,p,sx,lambda,tag) integer n,p,tag(n) double precision sx(n,p),lambda(n),lami lambda(tag(1))=0d0 i=1 23014 if(.not.(i.lt.n))goto 23016 lami=0d0 do23017 j=1,p lami=lami+(sx(tag(i+1),j)-sx(tag(i),j))**2 23017 continue 23018 continue lambda(tag(i+1))=lambda(tag(i))+dsqrt(lami) 23015 i=i+1 goto 23014 23016 continue return end subroutine lamix(ns,p,x,s,lambda,dismin,temps) integer ns,p double precision lambda,x(p),s(ns,p),dismin,temps(p) double precision v(2,p),d1sqr,d2sqr,d12,dsqr, d1,w real lam,lammin integer left,right dismin=1d60 lammin=1 ik = 1 23019 if(.not.(ik.lt.ns))goto 23021 d1sqr=0.0 d2sqr=0.0 do23022 j=1,p v(2,j)=x(j)-s(ik,j) v(1,j)=s(ik+1,j)-s(ik,j) d1sqr=d1sqr+v(1,j)**2 d2sqr=d2sqr+v(2,j)**2 23022 continue 23023 continue if(d1sqr.lt.1d-8*d2sqr)then lam=ik dsqr=d2sqr else d12=0d0 do23026 j=1,p d12 = d12+v(1,j)*v(2,j) 23026 continue 23027 continue d1=d12/d1sqr if(d1.gt.1d0)then lam=ik+1 dsqr=d1sqr+d2sqr-2d0*d12 else if(d1.lt.0.0)then lam=ik dsqr=d2sqr else lam=ik+(d1) dsqr=d2sqr-(d12**2)/d1sqr endif endif endif if(dsqr.lt.dismin)then dismin=dsqr lammin=lam endif 23020 ik=ik+1 goto 23019 23021 continue left=lammin if(lammin.ge.ns)then left=ns-1 endif right=left+1 w=dble(lammin-left) do23036 j=1,p temps(j)=(1d0-w)*s(left,j)+w*s(right,j) 23036 continue 23037 continue lambda=dble(lammin) return end princurve/man/0000755000175100001440000000000012136155716013067 5ustar hornikusersprincurve/man/principal.curve.Rd0000644000175100001440000000627611635315524016473 0ustar hornikusers\name{principal.curve} \alias{principal.curve} \alias{lines.principal.curve} \alias{plot.principal.curve} \alias{points.principal.curve} \title{Fit a Principal Curve} \usage{ principal.curve(x, start=NULL, thresh=0.001, plot.true=FALSE, maxit=10, stretch=2, smoother="smooth.spline", trace=FALSE, \dots) } \description{Fits a principal curve which describes a smooth curve that passes through the \code{middle} of the data \code{x} in an orthogonal sense. This curve is a nonparametric generalization of a linear principal component. If a closed curve is fit (using \code{smoother = "periodic.lowess"}) then the starting curve defaults to a circle, and each fit is followed by a bias correction suggested by J. Banfield.} \arguments{ \item{x}{a matrix of points in arbitrary dimension} \item{start}{either a previously fit principal curve, or else a matrix of points that in row order define a starting curve. If missing or NULL, then the first principal component is used. If the smoother is \code{"periodic.lowess"}, then a circle is used as the start.} \item{thresh}{convergence threshold on shortest distances to the curve.} \item{plot.true}{If \code{TRUE} the iterations are plotted.} \item{maxit}{maximum number of iterations.} \item{stretch}{a factor by which the curve can be extrapolated when points are projected. Default is 2 (times the last segment length). The default is 0 for \code{smoother} equal to \code{"periodic.lowess"}.} \item{smoother}{choice of smoother. The default is \code{"smooth.spline"}, and other choices are \code{"lowess"} and \code{"periodic.lowess"}. The latter allows one to fit closed curves. Beware, you may want to use \code{iter = 0} with \code{lowess()}.} \item{trace}{If \code{TRUE}, the iteration information is printed} \item{\dots}{additional arguments to the smoothers} } \value{ An object of class \code{"principal.curve"} is returned. For this object the following generic methods a currently available: \code{plot, points, lines}. It has components: \item{s}{a matrix corresponding to \code{x}, giving their projections onto the curve.} \item{tag}{an index, such that \code{s[tag, ]} is smooth.} \item{lambda}{for each point, its arc-length from the beginning of the curve. The curve is parametrized approximately by arc-length, and hence is \code{unit-speed}.} \item{dist}{the sum-of-squared distances from the points to their projections.} \item{converged}{A logical indicating whether the algorithm converged or not.} \item{nbrOfIterations}{Number of iterations completed before returning.} \item{call}{the call that created this object; allows it to be \code{updated()}.} } \seealso{ \code{\link{get.lam}} } \keyword{regression} \keyword{smooth} \keyword{nonparametric} \references{ \dQuote{Principal Curves} by Hastie, T. and Stuetzle, W. 1989, JASA. See also Banfield and Raftery (JASA, 1992). } \examples{ x <- runif(100,-1,1) x <- cbind(x, x ^ 2 + rnorm(100, sd = 0.1)) fit1 <- principal.curve(x, plot = TRUE) fit2 <- principal.curve(x, plot = TRUE, smoother = "lowess") lines(fit1) points(fit1) plot(fit1) whiskers <- function(from, to) segments(from[, 1], from[, 2], to[, 1], to[, 2]) whiskers(x, fit1$s) } princurve/man/get.lam.Rd0000644000175100001440000000217511304021307014672 0ustar hornikusers\name{get.lam} \alias{get.lam} \title{Projection Index} \usage{ get.lam(x, s, tag, stretch = 2) } \arguments{ \item{x}{a matrix of data points} \item{s}{a parametrized curve, represented by a polygon.} \item{tag}{the order of the point in \code{s}. Default is the given order.} \item{stretch}{A stretch factor for the endpoints of the curve; a maximum of 2. it allows the curve to grow, if required, and helps avoid bunching at the end.} } \description{ Finds the projection index for a matrix of points \code{x}, when projected onto a curve \code{s}. The curve need not be of the same length as the number of points. If the points on the curve are not in order, this order needs to be given as well, in \code{tag}. } \value{ A structure is returned which represents a fitted curve. It has components \item{s}{The fitted points on the curve corresponding to each point \code{x}.} \item{tag}{the order of the fitted points} \item{lambda}{The projection index for each point} \item{dist}{The total squared distance from the curve} } \seealso{ \code{\link{principal.curve}} } \keyword{regression} \keyword{smooth} \keyword{nonparametric} princurve/README0000644000175100001440000000253411304021307013160 0ustar hornikusersThis is an R port of Trevor Hastie's principal.curve package which can be found in the statlib archive. Changes from original: replaced missing ylim.scale by own function changes in "periodic.lowess" and changes in the call of "periodic.lowess" in the subroutine "bias.correct.curve" All changes are marked by a comment starting with AW. The latest version of this package can always be downloaded from any CRAN mirror. The CRAN master site can be found at http://www.ci.tuwien.ac.at/R ftp://ftp.ci.tuwien.ac.at/pub/R ************************************************************************ * Andreas Weingessel * ************************************************************************ * Institut für Statistik * Tel: (+43 1) 58801 10716 * * Technische Universität Wien * Fax: (+43 1) 58801 10798 * * Wiedner Hauptstr. 8-10/1071 * Andreas.Weingessel@ci.tuwien.ac.at * * A-1040 Wien, Austria * http://www.ci.tuwien.ac.at/~weingessel * ************************************************************************ The address of Trevor Hastie is (according to the information provided in the statlib package): Trevor Hastie trevor@research.att.com 1-908-582-5647 (FAX) 1-908-582-3340 rm 2C-261 AT&T Bell Labs, Murray Hill, NJ 07974 princurve/R/0000755000175100001440000000000012136155716012515 5ustar hornikusersprincurve/R/principal.curve.R0000644000175100001440000002055111635315550015744 0ustar hornikusers"bias.correct.curve" <- function(x, pcurve, ...) { # bias correction, as suggested by #Jeff Banfield p <- ncol(x) ones <- rep(1, p) sbar <- apply(pcurve$s, 2, "mean") ray <- drop(sqrt(((x - pcurve$s)^2) %*% ones)) dist1 <- (scale(x, sbar, FALSE)^2) %*% ones dist2 <- (scale(pcurve$s, sbar, FALSE)^2) %*% ones sign <- 2 * as.double(dist1 > dist2) - 1 ray <- sign * ray sray <- approx(periodic.lowess(pcurve$lambda, ray, ...)$x, periodic.lowess(pcurve$lambda, ray, ...)$y, pcurve$lambda)$y ## AW: changed periodic.lowess() to periodic.lowess()$x and $y pcurve$s <- pcurve$s + (abs(sray)/ray) * ((x - pcurve$s)) get.lam(x, pcurve$s, pcurve$tag, stretch = 0) } "get.lam" <- function(x, s, tag, stretch = 2) { storage.mode(x) <- "double" storage.mode(s) <- "double" storage.mode(stretch) <- "double" if(!missing(tag)) s <- s[tag, ] np <- dim(x) if(length(np) != 2) stop("get.lam needs a matrix input") n <- np[1] p <- np[2] tt <- .Fortran("getlam", n, p, x, s = x, lambda = double(n), tag = integer(n), dist = double(n), as.integer(nrow(s)), s, stretch, double(p), double(p), PACKAGE = "princurve")[c("s", "tag", "lambda", "dist")] tt$dist <- sum(tt$dist) class(tt) <- "principal.curve" tt } "lines.principal.curve" <- function(x, ...) lines(x$s[x$tag, ], ...) "periodic.lowess"<- function(x, y, f = 0.59999999999999998, ...) { n <- length(x) o <- order(x) r <- range(x) d <- diff(r)/(length(unique(x)) - 1) xr <- x[o[1:(n/2)]] - r[1] + d + r[2] xl <- x[o[(n/2):n]] - r[2] - d + r[1] yr <- y[o[1:(n/2)]] yl <- y[o[(n/2):n]] xnew <- c(xl, x, xr) ynew <- c(yl, y, yr) f <- f/2 fit <- lowess(xnew, ynew, f = f, ...) approx(fit$x, fit$y, x[o], rule = 2) # AW: changed fit to fit$x, fit$y } "plot.principal.curve" <- function(x, ...) plot(x$s[x$tag, ], ..., type = "l") "points.principal.curve" <- function(x, ...) points(x$s, ...) principal.curve <- function(x, start=NULL, thresh=0.001, plot.true=FALSE, maxit=10, stretch=2, smoother="smooth.spline", trace=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'smoother': if (is.function(smoother)) { smootherFcn <- smoother; } else { smooth.list <- c("smooth.spline", "lowess", "periodic.lowess"); smoother <- match.arg(smoother, smooth.list); smootherFcn <- NULL; } # Argument 'stretch': stretches <- c(2, 2, 0); if (is.function(smoother)) { if (is.null(stretch)) stop("Argument 'stretch' must be given if 'smoother' is a function."); } else { if(missing(stretch) || is.null(stretch)) { stretch <- stretches[match(smoother, smooth.list)]; } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(smootherFcn)) { # Setup the smoother function smootherFcn(lambda, xj, ...) which must # return fitted {y}:s. smootherFcn <- switch(smoother, lowess = function(lambda, xj, ...) { lowess(lambda, xj, ...)$y; }, smooth.spline = function(lambda, xj, ..., df=5) { o <- order(lambda); lambda <- lambda[o]; xj <- xj[o]; fit <- smooth.spline(lambda, xj, ..., df=df, keep.data=FALSE); predict(fit, x=lambda)$y; }, periodic.lowess = function(lambda, xj, ...) { periodic.lowess(lambda, xj, ...)$y; } ) # smootherFcn() # Should the fitted curve be bias corrected (in each iteration)? biasCorrectCurve <- (smoother == "periodic.lowess"); } else { biasCorrectCurve <- FALSE; } this.call <- match.call() dist.old <- sum(diag(var(x))) d <- dim(x) n <- d[1] p <- d[2] # You can give starting values for the curve if (missing(start) || is.null(start)) { # use largest principal component if (is.character(smoother) && smoother == "periodic.lowess") { start <- startCircle(x) } else { xbar <- colMeans(x) xstar <- scale(x, center=xbar, scale=FALSE) svd.xstar <- svd(xstar) dd <- svd.xstar$d lambda <- svd.xstar$u[,1] * dd[1] tag <- order(lambda) s <- scale(outer(lambda, svd.xstar$v[,1]), center=-xbar, scale=FALSE) dist <- sum((dd^2)[-1]) * n start <- list(s=s, tag=tag, lambda=lambda, dist=dist) } } else if (!inherits(start, "principal.curve")) { # use given starting curve if (is.matrix(start)) { start <- get.lam(x, s=start, stretch=stretch) } else { stop("Invalid starting curve: should be a matrix or principal.curve") } } pcurve <- start if (plot.true) { plot(x[,1:2], xlim=adjust.range(x[,1], 1.3999999999999999), ylim=adjust.range(x[,2], 1.3999999999999999)) lines(pcurve$s[pcurve$tag, 1:2]) } it <- 0 if (trace) { cat("Starting curve---distance^2: ", pcurve$dist, "\n", sep=""); } # Pre-allocate nxp matrix 's' naValue <- as.double(NA); s <- matrix(naValue, nrow=n, ncol=p); hasConverged <- (abs((dist.old - pcurve$dist)/dist.old) <= thresh); while (!hasConverged && it < maxit) { it <- it + 1; for(jj in 1:p) { s[,jj] <- smootherFcn(pcurve$lambda, x[,jj], ...); } dist.old <- pcurve$dist; # Finds the "projection index" for a matrix of points 'x', # when projected onto a curve 's'. The projection index, # \lambda_f(x) [Eqn (3) in Hastie & Stuetzle (1989), is # the value of \lambda for which f(\lambda) is closest # to x. pcurve <- get.lam(x, s=s, stretch=stretch); # Bias correct? if (biasCorrectCurve) pcurve <- bias.correct.curve(x, pcurve=pcurve, ...) # Converged? hasConverged <- (abs((dist.old - pcurve$dist)/dist.old) <= thresh); if (plot.true) { plot(x[,1:2], xlim=adjust.range(x[,1], 1.3999999999999999), ylim=adjust.range(x[,2], 1.3999999999999999)) lines(pcurve$s[pcurve$tag, 1:2]) } if (trace) { cat("Iteration ", it, "---distance^2: ", pcurve$dist, "\n", sep=""); } } # while() # Return fit structure(list( s = pcurve$s, tag = pcurve$tag, lambda = pcurve$lambda, dist = pcurve$dist, converged = hasConverged, # Added by HB nbrOfIterations = as.integer(it), # Added by HB call = this.call ), class="principal.curve"); } # principal.curve.hb() ########################################################################### # HISTORY: # 2009-07-15 # o MEMORY OPTIMIZATION: Now the result matrix allocated as doubles, not # logicals (as NA is), in order to prevent a coersion. # 2009-02-08 # o BUG FIX: An error was thrown if 'smoother' was a function. # o Cleaned up source code (removed comments). # 2008-05-27 # o Benchmarking: For larger data sets, most of the time is spent in # get.lam(). # o BUG FIX: smooth.spline(x,y) will only use *and* return values for # "unique" {x}:s. This means that the fitted {y}:s maybe be fewer than # the input vector. In order to control for this, we use predict(). # o Now 'smoother' can also be a function taking arguments 'lambda', 'xj' # and '...' and return 'y' of the same length as 'lambda' and 'xj'. # o Now arguments 'start' and 'stretch' can be NULL, which behaves the # same as if they are "missing" [which is hard to emulate with for # instance do.call()]. # o Added 'converged' and 'nbrOfIterations' to return structure. # o SPEED UP/MEMORY OPTIMIZATION: Now the nxp matrix 's' is allocated only # once. Before it was built up using cbind() once per iteration. # o SPEED UP: Now the smoother function is identified/created before # starting the algorithm, and not once per dimension and iteration. ########################################################################### adjust.range <- function (x, fact) { # AW: written by AW, replaces ylim.scale r <- range (x); d <- diff(r)*(fact-1)/2; c(r[1]-d, r[2]+d) } "startCircle" <- function(x) { # the starting circle uses the first two co-ordinates, # and assumes the others are zero d <- dim(x) n <- d[1] p <- d[2] # use best circle centered at xbar xbar <- apply(x, 2, "mean") ray <- sqrt((scale(x, xbar, FALSE)^2) %*% rep(1, p)) radius <- mean(ray) s <- cbind(radius * sin((pi * (1:101))/50), radius * cos((pi * (1:101))/50)) if(p > 2) s <- cbind(s, matrix(0, n, p - 2)) get.lam(x, s) } princurve/NAMESPACE0000644000175100001440000000025511635310660013527 0ustar hornikusersuseDynLib("princurve") export("principal.curve", "get.lam") S3method("lines", "principal.curve") S3method("plot", "principal.curve") S3method("points", "principal.curve") princurve/DESCRIPTION0000644000175100001440000000103012136161327014007 0ustar hornikusersPackage: princurve Version: 1.1-12 Title: Fits a Principal Curve in Arbitrary Dimension Author: S original by Trevor Hastie R port by Andreas Weingessel Maintainer: Andreas Weingessel Description: fits a principal curve to a data matrix in arbitrary dimensions License: GPL-2 Imports: stats, graphics Packaged: 2013-04-25 07:31:26 UTC; hornik NeedsCompilation: yes Repository: CRAN Date/Publication: 2013-04-25 10:01:27 princurve/ChangeLog0000644000175100001440000000427712136155702014073 0ustar hornikusers2013-04-25 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-12. * src/sortdi.f (sortdi): Fix Fortran array bounds problem. 2011-09-18 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-11. * R/principal.curve.R: * man/principal.curve.Rd: Move whiskers() from code to example. * NAMESPACE: Added. 2009-10-04 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-10. * R/principal.curve.R (principal.curve): * man/principal.curve.Rd: Enhancements by Henrik Bengtsson . 2007-07-12 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-9. (License): Clarify. 2006-10-04 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-8. (License): Standardize. 2004-11-04 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-7. (Depends): Depend on R >= 1.9.0. (License): Changed to GPL 2 or better as granted by Trevor Hastie. 2004-11-03 Kurt Hornik * R/principal.curve.R: Stop requiring the now defunct 'modreg'. 2004-01-31 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-6. * INDEX: Removed. 2002-07-03 Kurt Hornik * DESCRIPTION: New version is 1.1-5. * R/principal.curve.R: Add 'PACKAGE' argument to FF calls. 2002-07-02 Kurt Hornik * DESCRIPTION: New version is 1.1-4. * man/principal.curve.Rd: T/F fixes. 2001-06-10 Andreas Weingessel * Version 1.1-2: Changed keyword entries to fit R standard. 2001-01-31 Andreas Weingessel * Changed definition of v in line 58 of getlam.f from double precision v(2,10) to double precision v(2,p) 2000-12-27 Andreas Weingessel * Version 1.1-0: Changes in the DESCRIPTION file to fit R-1.2.0. Various improvments in the documentation: Added alias entries, a description entry for principal.curve, default values and entries for the undocumented objects. Changed F to FALSE in the R code.