nitpick/0000755000723300071640000000000011113550736012443 5ustar brenardMultBildnitpick/R/0000755000723300071640000000000011113550736012644 5ustar brenardMultBildnitpick/R/ams.pp.bins2breaks.R0000644000723300071640000000034510771167745016410 0ustar brenardMultBild"ams.pp.bins2breaks" <- function(bins) { # returns the n+1 breaks enclosing n bins n <- length(bins); return( c( bins[1]-0.5*diff(bins[1:2]), bins[1:(n-1)]+0.5*diff(bins), bins[n]+0.5*diff(bins[(n-1):n]) ) ); } nitpick/R/ams.pp.buildModelMatrix.R0000644000723300071640000001174410771167746017456 0ustar brenardMultBildams.pp.buildModelMatrix<-function(begin,end,offset,data, EFA=TRUE,maxRangeSize=1,thresholdInt=0,limit=1e-26, charge=c(2,3,4,5), width=width) { if (begin<=offset) { offsetLower<-begin-1 #set lower offset zero if begin length(data[,1])) { offsetUpper<-length(data[,1])-end #set upper offset zero if begin data[i-1,2]) &(data[i,2]>data[i+1,2])&(data[i,2]>thresholdInt)) { if((i-lastModelBuilt>=maxRangeSize)||lastModelBuilt==0) { lastModelBuilt<-i if(EFA) { modelsPerBin<-length(charge)*2 xChargeMatrix<-matrix(nrow=leng,ncol=modelsPerBin) maxXPeak<-vector(length=modelsPerBin) monoisotopic<-vector(length=modelsPerBin) for(m in 1:modelsPerBin) { if((floor(m/2)-m/2)==0) { SInd<-"Y" } else { SInd<-"N" } xChargefftGeneral<-buildAveragineModelShort(data[i,1], data[(begin-offsetLower):(end+offsetUpper),1],charge[floor((m+1)/2)],S=SInd,width=width) xChargefft<-xChargefftGeneral[[1]] #can be reused!! x2<-Re(fft(xChargefft,inv=T))[1:leng] maxXPeak[m]<-findMaxPeakAndPosition(cbind(c(1:(length(x2))),x2))[2] monoisotopic[m]<-xChargefftGeneral[[2]] xChargeMatrix[,m]<-x2 } } else { modelsPerBin<-length(charge) xChargeMatrix<-matrix(nrow=leng,ncol=modelsPerBin) maxXPeak<-vector(length=modelsPerBin) monoisotopic<-vector(length=modelsPerBin) for(m in 1:modelsPerBin) { SInd<-"Ave" xChargefftGeneral<-buildAveragineModelShort(data[i,1], data[(begin-offsetLower):(end+offsetUpper),1],charge[m],S=SInd,width=width) xChargefft<-xChargefftGeneral[[1]] #can be reused!! x2<-Re(fft(xChargefft,inv=T))[1:leng] maxXPeak[m]<-findMaxPeakAndPosition(cbind(c(1:(length(x2))),x2))[2] monoisotopic[m]<-xChargefftGeneral[[2]] xChargeMatrix[,m]<-x2 } } } if(((i-1)>max(iCollection[,1],0))) { xAdd<-NULL for(m in 1:modelsPerBin) { xAdd<-cbind(xAdd,xMoved(i-begin,xChargeMatrix[,m],maxXPeak[m],offsetLower,offsetUpper)) iCollection<-rbind(iCollection,c(i-1,charge[(floor((m+1)/2))*EFA+m*(1-EFA)],2-2*(m/2-floor(m/2)),monoisotopic[m],maxXPeak[m])) } xCombined<-cbind(xCombined,xAdd) } xAdd<-NULL for(m in 1:modelsPerBin) { xAdd<-cbind(xAdd,xMoved(i-begin+1,xChargeMatrix[,m],maxXPeak[m],offsetLower,offsetUpper)) iCollection<-rbind(iCollection,c(i,charge[(floor((m+1)/2))*EFA+m*(1-EFA)],2-2*(m/2-floor(m/2)),monoisotopic[m],maxXPeak[m])) } xCombined<-cbind(xCombined,xAdd) if(!EFA) preselection<-c(preselection,(dim(xCombined)[2]-(modelsPerBin-1)):dim(xCombined)[2]) if(EFA) preselection<-c(preselection,((dim(xCombined)[2]-(modelsPerBin-1)):dim(xCombined)[2])[seq(2,modelsPerBin,2)]) xAdd<-NULL for(m in 1:modelsPerBin) { xAdd<-cbind(xAdd,xMoved(i-begin+2,xChargeMatrix[,m],maxXPeak[m],offsetLower,offsetUpper)) iCollection<-rbind(iCollection,c(i+1,charge[(floor((m+1)/2))*EFA+m*(1-EFA)],2-2*(m/2-floor(m/2)),monoisotopic[m],maxXPeak[m])) } xCombined<-cbind(xCombined,xAdd) } } beginWithOffset<-(begin-offsetLower) endWithOffset<-(end+offsetUpper) ret <- list( xCombined=xCombined, preselection=preselection, iCollection=iCollection, beginWithOffset=beginWithOffset, endWithOffset=endWithOffset ) class(ret) <- "ams.pp.strap.model" return(ret) } xMoved<-function(i,x,maxXPeak,offsetLower,offsetUpper) {distance<-i-maxXPeak+offsetLower #Rotating the xVector such that its peak corresponds to the peak of the filteredSignal lengthX<-length(x) xMoved<-c( rep(0,max(distance,0)), x[max(-distance+1,0):min(lengthX,lengthX-distance)], rep(0,max(-distance,0)) ) return(xMoved) } nitpick/R/ams.pp.getRegions.MeanFilter.R0000744000723300071640000000146611113543344020324 0ustar brenardMultBildams.pp.getRegions.MeanFilter<-function (spectrum, mz.nIntervals, mz.neighborhood, noise.threshold.factor) { embedding <- embed(c(rep(0, (mz.neighborhood - 1)/2), spectrum[, 2], rep(0, (mz.neighborhood - 1)/2)), mz.neighborhood) if(dim(embedding)[1] (noise.mean * noise.threshold.factor))) } return(relevantRegions) } nitpick/R/ams.pp.listPeaks.R0000644000723300071640000000076210771167746016146 0ustar brenardMultBild ams.pp.listPeaks<-function(beta,iCollection) { lengthSolution<-sum(beta>0) active<-which(beta>0) position<-vector(length=lengthSolution) charge<-vector(length=lengthSolution) sulfInd<-vector(length=lengthSolution) value<-beta[beta>0] charge<-iCollection[active,2] sulfInd<-iCollection[active,3] position<-iCollection[active,1]-iCollection[active,5]+iCollection[active,4] return(cbind(as.vector(position),as.vector(charge),as.vector(value),as.vector(sulfInd))) } nitpick/R/ams.pp.pl.slidingMax.R0000644000723300071640000000173510771167745016720 0ustar brenardMultBild"ams.pp.pl.slidingMax" <- function(peaks, charge.state.wise=F, n=3) { discardPeaks <- function(p, n) { keep <- NULL; current.row <- 1; while(current.row <= nrow(p)) { # p is sorted; so it suffices to look # (n-1)/2 bins ahead neighborhood <- which( p[current.row:nrow(p),2] <= p[current.row,2]+(n-1)/2 ) + current.row -1; # find max is.max <- which.max(p[neighborhood, 6]); keep <- c(keep, neighborhood[is.max]); current.row <- max(neighborhood)+1; } return(p[keep,,drop=F]); } # make sure peak list is sorted by mass if (is.unsorted(peaks[,1])) { peaks <- peaks[sort(peaks[,1], index.return=T)$ix,,drop=F]; } if (charge.state.wise) { # split data charge state wise ret <- NULL; for (i in sort(unique(peaks[,5]))) { ret <- rbind(ret, discardPeaks(peaks[peaks[,5]==i,,drop=F], n)); } # re-sort by mass ret <- ret[sort(ret[,1], index.return=T)$ix,,drop=F]; } else { ret <- discardPeaks(peaks, n) } return(ret); } nitpick/R/ams.pp.runHierarchical.R0000744000723300071640000000441610771167746017313 0ustar brenardMultBildams.pp.runHierarchical<-function(data,bins,charges,thresholdBetaHat=0,thresholdR2, limit,width,EFA=EFA,gdf=FALSE,bic.steps=0,maxRangeSize=10) {print("started running Simple Peak Detection") resultSingle<-ams.pp.runSingleRegression(data=data, bins=bins, charges=charges, thresholdBetaHat=thresholdBetaHat,thresholdR2=thresholdR2,width=width) nextN<-1 flaggedAreas<-as.matrix(resultSingle[[3]]) order<-order(flaggedAreas[,2]) flaggedAreas<-cbind(rep(0,length(order)),flaggedAreas[order,]) lengthFlaggedAreasOld<-0 while(length(flaggedAreas)!=lengthFlaggedAreasOld) {lengthFlaggedAreasOld<-length(flaggedAreas) flaggedAreas<-joinAdjacent(flaggedAreas) } flaggedAreas<-flaggedAreas[,2:4] #filter out those flaggedAreas which have been already explained by simple regression solvedAreas<-as.matrix(resultSingle[[2]][,4:5]) if(length(solvedAreas)==2) {solvedAreas<-matrix(solvedAreas,nrow=1,ncol=2) } if(length(solvedAreas)>0) { for (i in 1:nrow(solvedAreas)) { for (j in 1: nrow(flaggedAreas)) { if((solvedAreas[i,1]>=flaggedAreas[j,1])&(solvedAreas[i,1]<=flaggedAreas[j,2])) { oldEnd<-flaggedAreas[j,2] flaggedAreas[j,2]<-solvedAreas[i,1]-1 if(oldEnd>solvedAreas[i,2]) {rbind(flaggedAreas,c(solvedAreas[i,2]+1,oldEnd,flaggedAreas[j,3])) } } if((solvedAreas[i,2]>=flaggedAreas[j,1])&(solvedAreas[i,2]<=flaggedAreas[j,2])) { flaggedAreas[j,1]<-solvedAreas[i,2]+1 } } } } #remove only one elemental flagged areas flaggedAreas<-flaggedAreas[flaggedAreas[,1]!=flaggedAreas[,2],] print("started running Multiple Peak Detection") resultMult<-matrix(nrow=0,ncol=4) for (k in 1:(length(flaggedAreas[,1]))) { kong<-ams.pp.buildModelMatrix(flaggedAreas[k,1],flaggedAreas[k,2],100,data=data,EFA=EFA,maxRangeSize=maxRangeSize,charge=charges,width=width,limit=limit) fooResult<-predict.ams.pp.strap.model(kong,EFA=EFA,data=data,includingZero=TRUE,gdf=gdf,bic.steps=bic.steps) fight<-ams.pp.listPeaks(fooResult,kong$iCollection) resultMult=rbind(resultMult,fight) } #join result with Result from Single final<-rbind(resultMult,resultSingle[[2]][,c(1:3,7)]) return(final) } nitpick/R/ams.pp.runSingleRegression.R0000744000723300071640000001572610771167746020225 0ustar brenardMultBildams.pp.runSingleRegression<-function(data,bins,charges=c(2,3,4,5),thresholdR2,width=0.0001,sulLevels=3,thresholdBetaHat=NULL) { flaggedLower<-list() flaggedUpper<-list() flaggedOff<-list() resultSinglePos<-list() resultSingleInt<-list() resultSingleCha<-list() resultSingleLower<-list() resultSingleUpper<-list() resultSingleMax<-list() resultSingleS<-list() for (i in 1:(length(bins[,1]))) {if (bins[i,1]==0) { if(is.null(thresholdBetaHat)) { thresholdBetaHat<-median(data[bins[i,2]:bins[i,3],2]) } distance<-bins[i,3]-bins[i,2]+1 exactness<-1/((width)/distance)#1/((width/charge)/distance) dataBin<-data[bins[i,2]:bins[i,3],2] comb<-2*length(charges) fftModel<-matrix(ncol=comb,nrow=length(data[bins[i,2]:bins[i,3],1])) fftModelPart2<-matrix(ncol=comb,nrow=1) filterRes<-matrix(ncol=comb,nrow=length(data[bins[i,2]:bins[i,3],1])) resThreshold<-matrix(ncol=comb,nrow=length(data[bins[i,2]:bins[i,3],1])) for(j in 1:length(charges)) {fftModel[,2*j]<-buildAveragineModelShort((data[bins[i,2],1]+data[bins[i,3],1])/2,data[bins[i,2]:bins[i,3],1],charges[j],width=width,S="Y")[[1]][1:length(data[bins[i,2]:bins[i,3],1])] fftModel[,2*j-1]<-buildAveragineModelShort((data[bins[i,2],1]+data[bins[i,3],1])/2,data[bins[i,2]:bins[i,3],1],charges[j],width=width,S="N")[[1]][1:length(data[bins[i,2]:bins[i,3],1])] } dataBin<-t(cbind(t(dataBin),t(rep(0,max(length(fftModel[,1])-length(dataBin),0))))) for(j in 1:comb) {filterRes[,j]<-fft(dataBin)*fftModel[,j] resThreshold[,j]<-Re(fft(filterRes[,j],inverse=T)) } unsolved<-c(1:(length(resThreshold[,1]))) resultsr<-"none" while (resultsr!="small"&!is.null(unsolved)&(length(unsolved)>1)) { maxVec<-apply(resThreshold[unsolved,],2,max) maxValue<-max(resThreshold[unsolved,]) combVec<-c(1:comb) SInd<-2-min(combVec[maxVec==maxValue])%%2 S<-"stop" if(SInd==1) {S<-"N" } if(SInd==2) {S<-"Y" } maxCharge<-ceiling(min(combVec[maxVec==maxValue])/2)+min(charges)-1 maxChargePos<-(min(combVec[maxVec==maxValue])) posCharge<-which(combVec==maxChargePos) filteredSignal<-resThreshold[,posCharge] maxXPeak<-findMaxPeakAndPositionUns(cbind(c(1:(length(resThreshold[,1]))),data[bins[i,2]:bins[i,3],2]),unsolved) xMovedGeneral<-buildAveragineModelShort(data[bins[i,2]+maxXPeak[2]-1,1],data[bins[i,2]:bins[i,3],1],maxCharge,width=width,S=S) xMoved<-Re(fft(xMovedGeneral[[1]],inverse=T)) maxPeak<-findMaxPeakAndPosition(cbind(c(1:(length(xMoved))),xMoved)) xRot<-t(cbind(t(xMoved[min((maxPeak[2]+1),(length(xMoved))):(length(xMoved))]),t(xMoved[0:(maxPeak[2])]))) offset<-max(2*max(c(1:(length(xRot)/2))[xRot[1:(length(xRot)/2)]>.1*maxPeak[1]])+2,2) if((maxPeak[2]+offset>length(xMoved))||(maxPeak[2]-offset<1)) { if ((maxPeak[2]+offset>length(xMoved))&&(maxPeak[2]-offset<1)) {x<-c(rep(0,offset-maxPeak[2]+1),xMoved,rep(0,offset+maxPeak[2]-length(xMoved))) } else{if (maxPeak[2]-offset<1) {x<-c(rep(0,offset-maxPeak[2]+1),xMoved[1:(maxPeak[2]+offset)]) } if (maxPeak[2]+offset>length(xMoved)) {x<-c(xMoved[(maxPeak[2]-offset):length(xMoved)],rep(0,offset+maxPeak[2]-length(xMoved))) } } } else { x<-xMoved[(maxPeak[2]-offset):(maxPeak[2]+offset)] } if((bins[i,2]+maxXPeak[2]-offset-1<1)||(bins[i,2]+maxXPeak[2]+offset-1>length(data[,2]))) { if (bins[i,2]+maxXPeak[2]-offset-1<1) { y<-c(rep(0,-bins[i,2]-maxXPeak[2]+offset+2),data[max(bins[i,2]+maxXPeak[2]-offset-1,1):min(bins[i,2]+maxXPeak[2]+offset-1,length(data[,2])),2]) } if (bins[i,2]+maxXPeak[2]+offset>length(data[,2])) {y<-c(data[max(bins[i,2]+maxXPeak[2]-offset,1):min(bins[i,2]+maxXPeak[2]+offset-1,length(data[,2])),2],rep(0,bins[i,2]+maxXPeak[2]+offset-length(data[,2]))) } } else { y<-data[max(bins[i,2]+maxXPeak[2]-offset-1,1):min(bins[i,2]+maxXPeak[2]+offset-1,length(data[,2])),2] } regress<-lm(y~0+x) if(regress$coefficients[1]>thresholdBetaHat) {expMax<-maxXPeak[2] lower<-expMax-(offset/2) while(!(lower %in% unsolved) & (lower< expMax)) {lower<-lower+1 } upper<-expMax+(offset/2) while(!(upper %in% unsolved) & upper> expMax) {upper<-upper-1 } unsolved<-unsolved[!unsolved %in% (max(expMax-offset/2,1)):min(expMax+offset/2,length(resThreshold[,1]))] if(lower==upper) {lower<-min(unsolved) upper<-max(unsolved) unsolved<-NULL } Rsquared<-summary(regress)$r.squared#1-mse/var(y) if(is.na(Rsquared)) { resultsr<-"pos" } if(!is.na(Rsquared)&&(Rsquared0) {for (i in 1:length(resultSinglePos)) {singleResult[i,1]<-resultSinglePos[[i]] singleResult[i,3]<-resultSingleInt[[i]] singleResult[i,2]<-resultSingleCha[[i]] singleResult[i,4]<-resultSingleLower[[i]] singleResult[i,5]<-resultSingleUpper[[i]] singleResult[i,6]<-resultSingleMax[[i]] singleResult[i,7]<-resultSingleS[[i]] } } if(nrow(flagged)>0) {for (i in 1:length(flaggedLower)) {flagged[i,1]<-flaggedLower[[i]] flagged[i,2]<-flaggedUpper[[i]] flagged[i,3]<-flaggedOff[[i]] } } return(list(0,singleResult,flagged)) } nitpick/R/ams.pp.slidingMaxAdjusted.R0000644000723300071640000000300010771167745017755 0ustar brenardMultBildams.pp.pl.slidingMaxAdjusted <- function(result, charge.state.wise=F, n=3) {peaks<-matrix(NA,nrow=length(result[,1]),ncol=7) peaks[,1]<-result[,1] peaks[,2]<-result[,1] peaks[,5]<-result[,2] peaks[,6]<-result[,3] peaks[,7]<-result[,4] print(result) print(dim(peaks)) discardPeaks <- function(p, n) { keep <- NULL; current.row <- 1; while(current.row <= nrow(p)) { # p is sorted; so it suffices to look # (n-1)/2 bins ahead neighborhood <- which( p[current.row:nrow(p),2] <= p[current.row,2]+(n-1)/2 ) + current.row -1; # find max sul<-sum(p[(p[neighborhood, 7]==1),6]) nonsul<-sum(p[(p[neighborhood, 7]==2),6]) midsul<-sum(p[(p[neighborhood, 7]==0),6]) if (nonsul+sul+midsul==0) {sulCont<-0.0417 #from Averagine } else {sulCont<-sul/(nonsul+sul+midsul) } #print(cbind(sulCont,nonsul,sul)) is.max <- which.max(p[neighborhood, 6]); keep <- c(keep, neighborhood[is.max]); current.row <- max(neighborhood)+1; p[neighborhood[is.max],7]<-sulCont } return(p[keep,]); } # make sure peak list is sorted by mass if (is.unsorted(peaks[,1])) { peaks <- peaks[sort(peaks[,1], index.return=T)$ix,]; } if (charge.state.wise) { # split data charge state wise ret <- NULL; for (i in sort(unique(peaks[,5]))) { ret <- rbind(ret, discardPeaks(peaks[peaks[,5]==i,], n)); } # re-sort by mass ret <- ret[sort(ret[,1], index.return=T)$ix,]; } else { ret <- discardPeaks(peaks, n) } ret<-ret[,c(1,5,6,7)] print(ret) return(ret); } nitpick/R/ams.pp.strap.lars.R0000744000723300071640000003064610771167745016304 0ustar brenardMultBild"ams.pp.strap.lars" <- function(x, y, type = c("plasso"), trace = FALSE, Gram, eps = .Machine$double.eps, max.steps, use.Gram = TRUE, EFA=FALSE, preselection, bic.steps=5, gdf=FALSE,ignores=NULL,includingZero=FALSE) { ###### gdf=TRUE uses generalized degrees of freedom, gdf=FALSE uses number of active components for df estimation ###### bic.steps=0 uses comparison with minimum possible BIC stopifnot(require(iwrlars)) ### program automatically centers and standardizes predictors. ### ### Original program by Brad Efron September 2001 ### Recoded by Trevor Hastie November 2001 ### Computational efficiency December 22, 2001 ### Bug fixes and singularities February 2003 ### Conversion to R April 2003 ### Copyright Brad Efron and Trevor Hastie ### ### Extension for "plasso" by Bernhard Renard, Marc Kirchner, January 2007 call <- match.call() type <- match.arg(type) TYPE <- switch(type, plasso = "PLASSO") if(trace) cat(paste(TYPE, "sequence\n")) nm <- dim(x) n <- nm[1] m <- nm[2] im <- inactive <- seq(m) one <- rep(1, n) vn <- dimnames(x)[[2]] ### Center x and y, and scale x, and save the means and sds meanx <- drop(one %*% x)/n x <- scale(x, meanx, FALSE) # centers x normx <- sqrt(drop(one %*% (x^2))) nosignal<-normx/sqrt(n) < eps ignores<-NULL if(any(nosignal)) { # ignore variables with too small a variance ignores<-c(ignores,im[nosignal]) inactive<-im[-ignores] normx[nosignal]<-eps*sqrt(n) if(trace) cat("LARS Step 0 :\t", sum(nosignal), "Variables with Variance < \eps; dropped for good\n") # } #else ignores <- NULL #singularities; augmented later as well names(normx) <- NULL x <- scale(x, FALSE, normx) # scales x if(use.Gram & missing(Gram)) { if(m > 500 && n < m) cat("There are more than 500 variables and n var(y))) { message(noisefactor) sigmasq <- 0 for (frzl in 1:10) { message(" -- ", frzl) noise <- 1e8*noisefactor*.Machine$double.eps * matrix(rnorm(nrow(x)*length(preselection), 0, 1), nrow(x), length(preselection)) ssq.beta <- nnls.fit(x[,preselection,drop=F]+noise, y) sigmasq <- sigmasq + mean((y-x[,preselection,drop=F] %*% ssq.beta)^2) } sigmasq <- sigmasq / 10 noisefactor <- noisefactor +1 } if(sigmasq > var(y)) { sigmasq<-.5*var(y) } message("adjusted sigmasq: ", sigmasq, " with noise factor ", noisefactor) ##### Added necessary precalculations: sigmasq.Old<-sigmasq if(max(ssq.beta)<=.Machine$double.eps) {return() } # ssq.beta <-as.matrix( nnls.fit(as.matrix(x[,preselection2]), y)) # sigmasq <- min(mean((y-as.matrix(x[,preselection2]) %*% (ssq.beta))^2),sigmasq) if(gdf) { ######(**) ##### Added necessary precalculations: # length of NNLS solution (full model) and the estimated df for that solutions in order to standardize solutions lengthNNLS<-sum(ssq.beta>.Machine$double.eps) #number of positive coefficients in full models covEnd<-(x[,preselection,drop=F] %*% ssq.beta)*y dfEnd<-sum(covEnd)/sigmasq } predErrorFull<-sigmasq bic <- NULL; if(includingZero) { bic[1]<-n/predErrorFull*var(y) } ###### Added nnls.err <-NULL; ###### Added df <-NULL; minbic.idx <- 0; biccount <- 0; ### MAIN LOOP ################################### while((k < max.steps) & (length(active) < maxDf)) { action <- NULL k <- k + 1 C <- Cvec[inactive] ### identify the largest nonactive gradient if (type=="plasso") { Cmax <- max(C) if (Cmax <= 0) break; # nowhere left where we can move without violating the constraint } else { Cmax <- max(abs(C)) } ### Check if we are in a DROP situation if(!any(drops)) { if (type=="plasso") { new <- C >= Cmax - eps # see Efron et. al (2003), section 3. } else { new <- abs(C) >= Cmax - eps } C <- C[!new] # for later new <- inactive[new] # Get index numbers ### We keep the choleski R of X[,active] (in the order they enter) for(inew in new) { if(use.Gram) { R <- updateR( Gram[inew, inew], R, drop(Gram[inew, active]), Gram=TRUE, eps=eps ) } else { R <- updateR( x[, inew], R, x[, active], Gram=FALSE, eps=eps ) } if(attr(R, "rank") == length(active)) { ##singularity; back out nR <- seq(length(active)) R <- R[nR, nR, drop = FALSE] attr(R, "rank") <- length(active) ignores <- c(ignores, inew) action <- c(action, - inew) if(trace) cat("LARS Step", k, ":\t Variable", inew, "\tcollinear; dropped for good\n") } else { if(first.in[inew] == 0) first.in[inew] <- k active <- c(active, inew) #NN# if added if (type=="plasso") { Sign <- c(Sign, 1) } else { Sign <- c(Sign, sign(Cvec[inew])) } action <- c(action, inew) if(trace) cat("LARS Step", k, ":\t Variable", inew, "\tadded\n") } } } else { action <- - dropid } Gi1 <- backsolve(R, backsolvet(R, Sign)) ### Now we have to do the forward.stagewise dance ### This is equivalent to NNLS dropouts<-NULL A <- 1/sqrt(sum(Gi1 * Sign)) w <- A * Gi1 # note that w has the right signs if(!use.Gram) u <- drop(x[, active, drop = FALSE] %*% w) ### ### Now we see how far we go along this direction before the ### next competitor arrives. There are several cases ### ### If the active set is all of x, go all the way if(length(active) >= min(n-1, m - length(ignores) ) ) { gamhat <- Cmax/A } else { if(use.Gram) { a <- drop(w %*% Gram[active, - c(active,ignores), drop = FALSE]) } else { a <- drop(u %*% x[, - c(active, ignores), drop=FALSE]) } #NN# if plasso if (type=="plasso"){ gam <- c((Cmax - C)/(A - a)) } else { gam <- c((Cmax - C)/(A - a), (Cmax + C)/(A + a)) } ### Any dropouts will have gam=0, which are ignored here #NN# if plasso if (type=="plasso"){ gamhat <- min(gam[gam > eps]) } else { gamhat <- min(gam[gam > eps], Cmax/A) } } if(type == "lasso"|type=="plasso") { dropid <- NULL b1 <- beta[k, active] # beta starts at 0 z1 <- - b1/w zmin <- min(z1[z1 > eps], gamhat) if(zmin < gamhat) { gamhat <- zmin drops <- z1 == zmin } else drops <- FALSE } beta[k + 1, ] <- beta[k, ] beta[k + 1, active] <- beta[k + 1, active] + gamhat * w if(use.Gram) { Cvec <- Cvec - gamhat * Gram[, active, drop = FALSE] %*% w } else { residuals <- residuals - gamhat * u Cvec <- drop(t(residuals) %*% x) } Gamrat <- c(Gamrat, gamhat/(Cmax/A)) arc.length <- c(arc.length, gamhat) ### Check if we have to drop any guys if((type == "lasso"|type=="plasso") && any(drops)) { dropid <- seq(drops)[drops] # convert logical -> numbers for(id in rev(dropid)) { if(trace) cat("Lasso Step", k+1, ":\t Variable", active[id], "\tdropped\n") R <- downdateR(R, id) } dropid <- active[drops] # indices from 1:m beta[k+1,dropid]<-0 # added to make sure dropped coef is zero active <- active[!drops] Sign <- Sign[!drops] } if(!is.null(vn)) names(action) <- vn[abs(action)] actions[[k]] <- action ####Added for Extended Fractional Averagine if(EFA) { if(is.finite(active[k])) { if(((active[k]%%2)==0)) ignores<-c(ignores,(active[k]-1)) if(((active[k]%%2)==1)) ignores<-c(ignores,(active[k]+1)) } } #### End Change for Extended Fractional Averagine inactive <- im[ - c(active, ignores)] ##### Change in df estimation (now using Ye) and in stopping criterion ##### Added: if and gdf estimation if(gdf){ if(is.finite((max(beta)))) { xbetahat<-x%*%beta[k+1,] #calculate X Betahat for given k } else { print("non-finite max(beta)") nnls.beta<-nnls.fit(x[,which(beta[k+1,]>0),drop=F], y); xbetahat<-x[,which(beta[k+1,]>0),drop=F]%*%nnls.beta } cov<-xbetahat*y #calculated cov(X Betahat(i), y(i)) df[k]<-sum(cov)/sigmasq df[k]<-df[k]*(lengthNNLS/dfEnd) #standardize such that full model has df of full model # to account for innaccuracy in cov estimation } else { df[k]<-sum(beta[k+1,]!=0) } ##### Addition end # get beta from nnls.fit and calcualte rss beta.nnls <- nnls.fit(x[,which(beta[k+1,]>0),drop=F], y); nnls.estimate <- x[,which(beta[k+1,]>0),drop=F] %*% beta.nnls; nnls.residual <- y - nnls.estimate; nnls.err[k] <- mean(nnls.residual^2); ######try if this helps: if (nnls.err[k] < sigmasq) {nnls.err[k] <- sigmasq } if(includingZero) { ###### change bic to include df estimate from further up bic[k+1] <- (n/sigmasq)*(nnls.err[k]+((log(n)*df[k]*sigmasq)/n)) } else { bic[k] <- (n/sigmasq)*(nnls.err[k]+((log(n)*df[k]*sigmasq)/n)) } ###### Added if and Exact Criterion if (bic.steps==0){ #calculate lowest possible bic at that point (so current df, but min possible pred. error) #so pred Error of full model possibleBicMin<-(n/sigmasq)*(predErrorFull+((log(n)*df[k]*sigmasq)/n)) if ( ( k>1 ) && ( bic[minbic.idx]1)) {print("fractional binomial is only defined for amount in (0,1)") return() } if (molecule=="H") {S1<-matrix(c(1.0078246, 2.0141021,0.99985, 0.00015),nrow=2,ncol=2)} # H if (molecule=="C") {S1<-matrix(c(12.0000000, 13.0033554,0.988930, 0.011070 ),nrow=2,ncol=2)} # C if (molecule=="N") {S1<-matrix(c(14.0030732, 15.0001088,0.996337, 0.003663 ),nrow=2,ncol=2)} # N if (molecule=="O") {S1<-matrix(c(15.9949141, 16.9991322, 17.9991616,0.997590, 0.000374, 0.002036 ),nrow=3,ncol=2)} # O if (molecule=="S") {S1<-matrix(c(31.972070, 32.971456, 33.967866, 34, 35.967080,0.9502, 0.0075, 0.0421, 0, 0.0002 ),nrow=5,ncol=2)} # S S1[,1]<-S1[,1]/charge S1new<-S1 S1new[1,1]<-S1[1,1]*amount #correcting the mass S1new[,1]<-S1[,1]-S1[1,1]+S1new[1,1] S0<-S1new #distribution for no atom S0[,2]<-0 S0[1,2]<-1 #all mass at zero Snew<-S1new #return object Snew[,2]<-amount*S1new[,2]+(1-amount)*S0[,2] if (molecule=="S") #exclude p()=0 {Snew<-Snew[c(1,2,3,5),] } return(Snew) } #adapts scale of a data set to match requirements of exactntess #Funktion funktioniert noch nicht fuer den Fall hdistr, also wenn #nicht 0er eingefuegt werden muessen, sondern vielmehr Daten ausgesucht fftDiscrete<-function(distr1, distr2,limit=1e-26) {kmax<-length(distr1[,1])+length(distr2[,1]) distrNew<-matrix(0,nrow=kmax,ncol=2) for (k in 2:kmax) {for (i in 1:min((k-1),length(distr1[,1]))) {if ( ((k-i)>=0) && ((k-i)<=length(distr2[,1]))) {distrNew[k,2]=distrNew[k,2]+distr1[i,2]*distr2[(k-i),2] distrNew[k,1]=distrNew[k,1]+distr1[i,2]*distr2[(k-i),2]*(distr1[i,1]+distr2[(k-i),1]) } } } distrNew<-distrNew[distrNew[,2]>0,] distrNew[,1]<-distrNew[,1]/distrNew[,2] distrNew<-distrNew[distrNew[,2]>limit,] return(distrNew) } #function that builds an exact averagine model by convoluting #mercury result for next lowest integer model and the continuous binomials #for each element buildAveragineModelShort<-function(mass, masses, charge, limit=1e-26,width=.00005,S="Ave") #width needs to be set visually {stopifnot(require(amsmercury)) if (S=="Ave") { stoich<-exactAveragine(mass*1.000641*charge) } if (S=="Y") { stoich<-sulveragine(mass*1.000641*charge) } if (S=="N") { stoich<-lightAveragine(mass*1.000641*charge) } basic<-ams.mercury.mercury(stoich, charge, limit) hdistr<-getDistributionHBin("H",(stoich[1]-floor(stoich[1])),charge) cdistr<-getDistributionHBin("C",(stoich[2]-floor(stoich[2])),charge) ndistr<-getDistributionHBin("N",(stoich[3]-floor(stoich[3])),charge) odistr<-getDistributionHBin("O",(stoich[4]-floor(stoich[4])),charge) sdistr<-getDistributionHBin("S",(stoich[5]-floor(stoich[5])),charge) result<-fftDiscrete(hdistr,cdistr,limit) result<-fftDiscrete(result,ndistr,limit) result<-fftDiscrete(result,odistr,limit) result<-fftDiscrete(result,sdistr,limit) result<-fftDiscrete(basic,result,limit) masses<-ams.pp.bins2breaks(masses) sig <- rep(0, length(masses)) position<-vector(length=length(result[,1])) position<-findInterval(result[,1],masses) for (i in 1:length(result[,1])) {sig[position[i]]<-sig[position[i]]+result[i,2] } minPosition<-min(position) lpf<-vector(length=length(masses)) lastResult<-1 i<-floor(length(masses)/2) lastResult<-1 lpf[1]<-lastResult change<-1 lpfi<-dnorm(masses[i],masses[i],width*masses[i]) while((lastResult>limit)&(change=(bins[i+1,2])-1)) #if 2 adjacent are both 0 { binsNew[nextN,3]<-bins[i+1,3] if(binsNew[nextN,2]>bins[i+1,2]) {binsNew[nextN,2]<-bins[i+1,2] } } else{ #otherwise no change taking place nextN<-nextN+1 binsNew[nextN,]<-bins[i+1,] } } #return new Bins return(binsNew[1:nextN,]) } nitpick/R/nnls.fit.R0000644000723300071640000000215710771167746014545 0ustar brenardMultBild"nnls.fit" <- function(x,y,wsqrt=1,eps=0,rank.tol=1e-07) { ## Purpose: Nonnegative Least Squares (similar to the S-Plus function ## with the same name) with the help of the R-library quadprog ## ------------------------------------------------------------------------ ## Attention: ## - weights are square roots of usual weights ## - the constraint is coefficient>=eps ## ------------------------------------------------------------------------ ## Author: Marcel Wolbers, July 99 ## #======================================================================== require ("quadprog") m <- NCOL(x) if (length(eps)==1) eps <- rep(eps,m) x <- x * wsqrt y <- y * wsqrt #sometimes a rescaling of x and y helps (if solve.QP.compact fails otherwise) xscale <- apply(abs(x),2,mean) yscale <- mean(abs(y)) x <- t(t(x)/xscale) y <- y/yscale Rinv <- backsolve(qr.R(qr(x)),diag(m)) cf <- solve.QP.compact(Dmat=Rinv,dvec=t(x)%*%y,Amat=rbind(rep(1,m)), Aind=rbind(rep(1,m),1:m),bvec=eps*xscale/yscale, factorized=TRUE)$sol cf <- cf*yscale/xscale #scale back cf } nitpick/R/predict.ams.pp.strap.model.R0000644000723300071640000000225210771167746020064 0ustar brenardMultBildpredict.ams.pp.strap.model<-function(model,EFA=TRUE,data,bic.steps=0,gdf=TRUE,includingZero=FALSE) { xCombined<-model$xCombined if(!is.null(xCombined)) { beginWithOffset<-model$beginWithOffset endWithOffset<-model$endWithOffset preselection<-model$preselection larsResult<-NULL larsResult$beta<-NULL if(length(preselection)==0) { larsResult$beta<-0 } else { larsResult<-ams.pp.strap.lars(xCombined,data[beginWithOffset:endWithOffset,2], EFA=EFA,preselection=preselection,bic.steps=bic.steps,gdf=gdf,ignores=ignores,includingZero=includingZero) } if(is.null(larsResult)||is.null(larsResult$beta)|| !is.finite(larsResult$beta) ||(sum(larsResult$beta)==0)) { xlassoTotal<-NULL nnls<-NULL beta<-NULL } else { xlassoTotal<-c(1:dim(xCombined)[2])[larsResult$beta>0] nnls<-nnls.fit(as.matrix(xCombined[,xlassoTotal]),data[beginWithOffset:endWithOffset,2]) beta<-rep(0,dim(xCombined)[2]) beta[xlassoTotal]<-nnls } } else { beta<-vector(length=0) } return(beta) }nitpick/R/runPostProc.R0000744000723300071640000000752711113525232015272 0ustar brenardMultBildrunPostProc<-function(width,quant,g,pathIn,pathOut,mz.bin=mz.bins, chargestates=c(1,2,3,4,5,6),database=FALSE,recalc=FALSE,databasePath=NULL,threshCounts=0,linear=TRUE){ load(pathIn) pp_resultList<-matrix(nrow=0,ncol=4) charges<-chargestates #source("~/projects/sulveragine/ams.pp.slidingMaxAdjustedSul.R") ##recalc the breakpoints result<-ams.pp.pl.slidingMaxAdjusted(resultList,FALSE,g) ##if database comparison wanted if(database) { ##recalc the breakpoints if(recalc) {breakCountMatrix<-matrix(ncol=max(chargestates),nrow=length(mz.bin)+1) for (i in charges) {breakCountMatrix[,i]<-compareSwissprot(i,mz.bin,databasePath) } save(breakCountMatrix,file=paste(path,"/breakCountMatrix.RDA",sep="")) } else {load(paste(path,"/breakCountMatrix.RDA",sep="")) } resultListSample<-NULL for (k in chargestates) {resultListSampleCharge<-result[result[,2]==k,,drop=F] resultListSample<- rbind(resultListSampleCharge[breakCountMatrix[resultListSampleCharge[,1],k]>threshCounts, ],resultListSample) } result<-resultListSample } #print(result) result<-result[result[,1]>0,] #print(i) xCombined<-NULL for(j in 1:length(result[,1])) { if(result[j,4]>.5) {model<-buildAveragineModelShort(mz.bin[result[j,1]],mz.bin, result[j,2],S="Y",width=width)#,linear=linear) xNew<-Re(fft(model[[1]],inverse=TRUE)) xNew<-c(xNew[max((model[[2]]-result[j,1]),0):length(xNew)],rep(0,max(0,(model[[2]]-result[j,1])))) } else {model<-buildAveragineModelShort(mz.bin[result[j,1]],mz.bin, result[j,2],S="N",width=width) xNew<-Re(fft(model[[1]],inverse=TRUE)) xNew<-c(xNew[max((model[[2]]-result[j,1]),0):length(xNew)],rep(0,max(0,(model[[2]]-result[j,1]))))#,linear=linear) } xCombined<-cbind(xCombined,xNew) } print(dim(xCombined)) result[,3]<-try(nnls.fit(xCombined[1:length(mz.bin),],N$S[i,]),silent=TRUE) xCombined<-xCombined[,result[,3]>.Machine$double.eps] result<-result[result[,3]>.Machine$double.eps,] pp_resultList<-result save(xCombined,file=paste(pathOut,"xCombined_","_",g,"_",width*1000,".RDA",sep="")) save(pp_resultList,file=paste(pathOut,"pp_resultList_","_",g,"_",width*1000,".RDA",sep="")) } postProc<-function(result){ chargeOrd<-order(result[,2],result[,1]) result<-result[chargeOrd,] #get rid of similar charge states next to each other, only using max ppResult<-matrix(0,nrow=length(result)/3,ncol=3) nextN<-1 for (i in 2:(length(result)/3)){ if (( (result[i,1]-1==result[i-1,1]) && (result[i,2]==result[i-1,2])) |( (i>=3) && (result[i,1]-1==result[i-2,1]) && (result[i,2]==result[i-2,2]))){ ppResult[nextN,]<-(result[i,3]>=ppResult[nextN,3])*result[i,]+(result[i,3]=ppResult[nextN,3])*ppResult[j,]+(ppResult[j,3]threshCounts, ],resultListSample) } result<-resultListSample } #print(result) result<-result[result[,1]>0,] #print(i) xCombined<-NULL for(j in 1:length(result[,1])) { if(result[j,4]>.5) {model<-buildAveragineModelShort(mz.bin[result[j,1]],mz.bin, result[j,2],S="Y",width=width) xNew<-Re(fft(model[[1]],inverse=TRUE)) xNew<-c(xNew[max((model[[2]]-result[j,1]),0):length(xNew)],rep(0,max(0,(model[[2]]-result[j,1])))) } else {model<-buildAveragineModelShort(mz.bin[result[j,1]],mz.bin, result[j,2],S="N",width=width) xNew<-Re(fft(model[[1]],inverse=TRUE)) xNew<-c(xNew[max((model[[2]]-result[j,1]),0):length(xNew)],rep(0,max(0,(model[[2]]-result[j,1])))) } xCombined<-cbind(xCombined,xNew) } print(dim(xCombined)) result[,3]<-nnls.fit(xCombined[1:length(mz.bin),],spectrum[,2]) xCombined<-xCombined[,result[,3]>.Machine$double.eps] result<-result[result[,3]>.Machine$double.eps,] pp_resultList<-result save(xCombined,file=paste(pathOut,"xCombined_","_",g,"_",width*1000,".RDA",sep="")) save(pp_resultList,file=paste(pathOut,"pp_resultList_","_",g,"_",width*1000,".RDA",sep="")) } postProc<-function(result){ chargeOrd<-order(result[,2],result[,1]) result<-result[chargeOrd,] #get rid of similar charge states next to each other, only using max ppResult<-matrix(0,nrow=length(result)/3,ncol=3) nextN<-1 for (i in 2:(length(result)/3)){ if (( (result[i,1]-1==result[i-1,1]) && (result[i,2]==result[i-1,2])) |( (i>=3) && (result[i,1]-1==result[i-2,1]) && (result[i,2]==result[i-2,2]))){ ppResult[nextN,]<-(result[i,3]>=ppResult[nextN,3])*result[i,]+(result[i,3]=ppResult[nextN,3])*ppResult[j,]+(ppResult[j,3] Description: STRAP implementation for peak picking in MS spectra License: LGPL v2.0 or any later version Packaged: Thu Nov 27 17:59:33 2008; brenard